""" Pydantic models for pipelines and runs. """ from typing import List, Union, Annotated, Optional, Literal, Dict, Any from uuid import UUID from datetime import datetime from pydantic import BaseModel, Field, HttpUrl, field_validator class RunCreate(BaseModel): """ Model for creating a new run. (Empty) """ pass class Run(BaseModel): """ Status of a pipeline run. """ id: UUID pipeline_id: UUID status: Literal['PENDING', 'RUNNING', 'COMPLETED', 'FAILED'] started_at: datetime finished_at: Optional[datetime] = None class RunResult(Run): """ Extended run model including results or error. """ results: Optional[List[Dict[str, Any]]] = None error: Optional[str] = None class ApiConfig(BaseModel): """ Configuration for an API source. """ url: HttpUrl = Field( ..., description="API endpoint URL", example="https://api.example.com/data" ) token: Optional[str] = Field( None, description="Optional bearer token for API authentication", example="abcdef123456" ) class ScrapeConfig(BaseModel): """ Configuration for a web-scraping source. """ urls: List[HttpUrl] = Field( ..., description="List of URLs to scrape", example=["https://example.com/page1", "https://example.com/page2"] ) schema_file: Optional[str] = Field( None, description="Path to a JSON file containing CSS extraction schema", example="schemas/page_schema.json" ) prompt: Optional[str] = Field( None, description="Prompt string for LLM-based extraction", example="Extract product titles and prices" ) class FileConfig(BaseModel): """ Configuration for a file-based source. """ path: str = Field( ..., description="Path to the input file", example="/data/myfile.json" ) format: Literal["csv", "json", "sqlite"] = Field( "json", description="Format of the file", example="csv" ) @field_validator("path") def path_extension_matches_format(cls, v: str, values): fmt = values.get("format") if fmt and not v.lower().endswith(f".{fmt}"): raise ValueError(f"File extension must match format '{fmt}'") return v class ApiSource(BaseModel): """ An API-based data source. """ type: Literal["api"] = Field( "api", description="Discriminator for API source" # Removed const=True ) config: ApiConfig class ScrapeSource(BaseModel): """ A web-scraping data source. """ type: Literal["scrape"] = Field( "scrape", description="Discriminator for scrape source" # Removed const=True ) config: ScrapeConfig class FileSource(BaseModel): """ A file-based data source. """ type: Literal["file"] = Field( "file", description="Discriminator for file source" # Removed const=True ) config: FileConfig Source = Annotated[ Union[ApiSource, ScrapeSource, FileSource], Field(discriminator="type", description="Union of all source types") ] class PipelineCreate(BaseModel): """ Payload for creating a new pipeline. """ name: Optional[str] = Field( None, description="Optional human-readable name for the pipeline", example="My Data Pipeline" ) sources: List[Source] = Field( ..., description="List of data sources for this pipeline" ) class Pipeline(BaseModel): """ Representation of a pipeline. """ id: UUID = Field( ..., description="Unique identifier for the pipeline" ) name: Optional[str] = Field( None, description="Optional human-readable name for the pipeline" ) sources: List[Source] = Field( ..., description="List of configured data sources" ) created_at: datetime = Field( ..., description="UTC timestamp when the pipeline was created" )