backend-api/crawler_ai_project_files/schema.yaml

1 line
10 KiB
YAML

{"openapi":"3.1.0","info":{"title":"Data Integration Pipeline API","version":"0.1.0"},"paths":{"/pipelines":{"get":{"summary":"List all pipelines","description":"Retrieve all registered pipelines.","operationId":"list_pipelines_pipelines_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"items":{"$ref":"#/components/schemas/Pipeline"},"type":"array","title":"Response List Pipelines Pipelines Get"}}}}}},"post":{"summary":"Create a new pipeline","description":"Register a new pipeline with sources configuration.","operationId":"create_pipeline_pipelines_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PipelineCreate"}}},"required":true},"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Pipeline"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/pipelines/{pipeline_id}":{"get":{"summary":"Get a pipeline by ID","description":"Fetch details of a specific pipeline.","operationId":"get_pipeline_pipelines__pipeline_id__get","parameters":[{"name":"pipeline_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pipeline Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Pipeline"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/pipelines/{pipeline_id}/run":{"post":{"summary":"Trigger a pipeline run","description":"Start a new run for the given pipeline. Runs asynchronously.","operationId":"run_pipeline_pipelines__pipeline_id__run_post","parameters":[{"name":"pipeline_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pipeline Id"}}],"responses":{"201":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Run"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/pipelines/{pipeline_id}/runs":{"get":{"summary":"List runs for a pipeline","description":"List all runs associated with a pipeline.","operationId":"list_runs_pipelines__pipeline_id__runs_get","parameters":[{"name":"pipeline_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pipeline Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/Run"},"title":"Response List Runs Pipelines Pipeline Id Runs Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/pipelines/{pipeline_id}/runs/{run_id}":{"get":{"summary":"Get run status","description":"Retrieve the status of a specific run.","operationId":"get_run_pipelines__pipeline_id__runs__run_id__get","parameters":[{"name":"pipeline_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pipeline Id"}},{"name":"run_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Run Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Run"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/pipelines/{pipeline_id}/runs/{run_id}/results":{"get":{"summary":"Get run results","description":"Retrieve normalized results of a completed run.","operationId":"get_run_results_pipelines__pipeline_id__runs__run_id__results_get","parameters":[{"name":"pipeline_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pipeline Id"}},{"name":"run_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Run Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"array","items":{"type":"object","additionalProperties":true},"title":"Response Get Run Results Pipelines Pipeline Id Runs Run Id Results Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/pipelines/{pipeline_id}/runs/{run_id}/error":{"get":{"summary":"Get run error message","description":"Retrieve the error message for a run that failed.","operationId":"get_run_error_pipelines__pipeline_id__runs__run_id__error_get","parameters":[{"name":"pipeline_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Pipeline Id"}},{"name":"run_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","title":"Run Id"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"string","title":"Response Get Run Error Pipelines Pipeline Id Runs Run Id Error Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"ApiConfig":{"properties":{"url":{"type":"string","maxLength":2083,"minLength":1,"format":"uri","title":"Url","description":"API endpoint URL","example":"https://api.example.com/data"},"token":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Token","description":"Optional bearer token for API authentication","example":"abcdef123456"}},"type":"object","required":["url"],"title":"ApiConfig","description":"Configuration for an API source."},"ApiSource":{"properties":{"type":{"type":"string","const":"api","title":"Type","description":"Discriminator for API source","default":"api"},"config":{"$ref":"#/components/schemas/ApiConfig"}},"type":"object","required":["config"],"title":"ApiSource","description":"An API-based data source."},"FileConfig":{"properties":{"path":{"type":"string","title":"Path","description":"Path to the input file","example":"/data/myfile.json"},"format":{"type":"string","enum":["csv","json","sqlite"],"title":"Format","description":"Format of the file","default":"json","example":"csv"}},"type":"object","required":["path"],"title":"FileConfig","description":"Configuration for a file-based source."},"FileSource":{"properties":{"type":{"type":"string","const":"file","title":"Type","description":"Discriminator for file source","default":"file"},"config":{"$ref":"#/components/schemas/FileConfig"}},"type":"object","required":["config"],"title":"FileSource","description":"A file-based data source."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"Pipeline":{"properties":{"id":{"type":"string","format":"uuid","title":"Id","description":"Unique identifier for the pipeline"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Optional human-readable name for the pipeline"},"sources":{"items":{"oneOf":[{"$ref":"#/components/schemas/ApiSource"},{"$ref":"#/components/schemas/ScrapeSource"},{"$ref":"#/components/schemas/FileSource"}],"description":"Union of all source types","discriminator":{"propertyName":"type","mapping":{"api":"#/components/schemas/ApiSource","file":"#/components/schemas/FileSource","scrape":"#/components/schemas/ScrapeSource"}}},"type":"array","title":"Sources","description":"List of configured data sources"},"created_at":{"type":"string","format":"date-time","title":"Created At","description":"UTC timestamp when the pipeline was created"}},"type":"object","required":["id","sources","created_at"],"title":"Pipeline","description":"Representation of a pipeline."},"PipelineCreate":{"properties":{"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Optional human-readable name for the pipeline","example":"My Data Pipeline"},"sources":{"items":{"oneOf":[{"$ref":"#/components/schemas/ApiSource"},{"$ref":"#/components/schemas/ScrapeSource"},{"$ref":"#/components/schemas/FileSource"}],"description":"Union of all source types","discriminator":{"propertyName":"type","mapping":{"api":"#/components/schemas/ApiSource","file":"#/components/schemas/FileSource","scrape":"#/components/schemas/ScrapeSource"}}},"type":"array","title":"Sources","description":"List of data sources for this pipeline"}},"type":"object","required":["sources"],"title":"PipelineCreate","description":"Payload for creating a new pipeline."},"Run":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"pipeline_id":{"type":"string","format":"uuid","title":"Pipeline Id"},"status":{"type":"string","enum":["PENDING","RUNNING","COMPLETED","FAILED"],"title":"Status"},"started_at":{"type":"string","format":"date-time","title":"Started At"},"finished_at":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Finished At"}},"type":"object","required":["id","pipeline_id","status","started_at"],"title":"Run","description":"Status of a pipeline run."},"ScrapeConfig":{"properties":{"urls":{"items":{"type":"string","maxLength":2083,"minLength":1,"format":"uri"},"type":"array","title":"Urls","description":"List of URLs to scrape","example":["https://example.com/page1","https://example.com/page2"]},"schema_file":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Schema File","description":"Path to a JSON file containing CSS extraction schema","example":"schemas/page_schema.json"},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt","description":"Prompt string for LLM-based extraction","example":"Extract product titles and prices"}},"type":"object","required":["urls"],"title":"ScrapeConfig","description":"Configuration for a web-scraping source."},"ScrapeSource":{"properties":{"type":{"type":"string","const":"scrape","title":"Type","description":"Discriminator for scrape source","default":"scrape"},"config":{"$ref":"#/components/schemas/ScrapeConfig"}},"type":"object","required":["config"],"title":"ScrapeSource","description":"A web-scraping data source."},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}}}}