backend-api/pipeline/services/pipeline_service.py

from datetime import datetime, timezone
from uuid import UUID, uuid4
from loguru import logger

from models.pipeline import (
    Pipeline,
    PipelineCreate,
    PipelineConfig,
    RunFrequency,
    PipelineStatus,
)
from models.ingestion import IngestorInput
from stores.base import PipelineStore


class PipelineService:
    def __init__(self, store: PipelineStore):
        self.store = store
        logger.info(f"PipelineService initialized with store: {type(store).__name__}")

    async def create_pipeline(
        self,
        name: str,
        description: str,
        ingestor_config: IngestorInput,
        run_frequency: RunFrequency,
    ) -> Pipeline:
        """Create a new pipeline using the store"""
        logger.info(
            f"Creating pipeline: name={name}, description={description}, ingestor_config=..., run_frequency={run_frequency}"
        )
        try:
            pipeline_id = uuid4()
            now = datetime.now(timezone.utc)
            pipeline = Pipeline(
                id=pipeline_id,
                name=name,
                description=description,
                config=PipelineConfig(
                    ingestor_config=ingestor_config,
                    run_frequency=run_frequency,
                    # will be set by scheduler
                    last_run=None,
                    next_run=None,
                ),
                status=PipelineStatus.INACTIVE,
                created_at=now,
                updated_at=now,
            )
            await self.store.save(pipeline)
            logger.info(f"Pipeline created and saved: id={pipeline.id}")
            return pipeline
        except Exception as e:
            logger.error(f"Failed to create pipeline: {e}")
            raise e

    async def update_pipeline(
        self, pipeline_id: UUID, pipeline_in: PipelineCreate
    ) -> Pipeline | None:
        """Update an existing pipeline using the store"""
        logger.info(f"Updating pipeline: id={pipeline_id}, update_data=...")
        existing_pipeline = await self.store.get(pipeline_id)
        if not existing_pipeline:
            logger.warning(f"Pipeline not found for update: id={pipeline_id}")
            return None

        try:
            # TODO: review code here
            # Create an updated pipeline object based on existing one and input
            # Be careful about mutable defaults if not using Pydantic properly
            update_data = pipeline_in.model_dump(
                exclude_unset=True
            )  # Get only provided fields

            # Merge update_data into a dict representation of the existing pipeline
            # Note: This simplistic merge might need refinement based on how partial updates should behave,
            # especially for nested structures like 'config'. Pydantic's model_update might be useful here if applicable.
            updated_pipeline_data = existing_pipeline.model_dump()

            if "name" in update_data:
                updated_pipeline_data["name"] = update_data["name"]
            if "description" in update_data:
                updated_pipeline_data["description"] = update_data["description"]

            # Merge config - replace the whole config if provided in input
            if "config" in update_data:
                # Ensure the input config is a PipelineConfig object before assignment
                if pipeline_in.config:
                    updated_pipeline_data["config"] = pipeline_in.config.model_dump()
                else:
                    # Handle cases where config might be passed differently if needed
                    logger.error("Invalid config format for update")
                    raise ValueError("Invalid config format for update")

            # Re-validate the merged data into a Pipeline object
            # Preserve original ID and created_at
            updated_pipeline = Pipeline.model_validate(updated_pipeline_data)
            # updated_at is handled by the store's save method

            await self.store.save(updated_pipeline)  # Save the updated object
            logger.info(f"Pipeline updated: id={updated_pipeline.id}")
            return updated_pipeline
        except Exception as e:
            logger.error(f"Failed to update pipeline id={pipeline_id}: {e}")
            raise e

    async def delete_pipeline(self, pipeline_id: UUID) -> bool:
        """Delete an existing pipeline using the store"""
        logger.info(f"Deleting pipeline: id={pipeline_id}")
        deleted = await self.store.delete(pipeline_id)
        if not deleted:
            logger.warning(f"Pipeline not found for deletion: id={pipeline_id}")
        return deleted

    async def get_pipeline(self, pipeline_id: UUID) -> Pipeline | None:
        """Get a single pipeline by ID"""
        logger.info(f"Getting pipeline: id={pipeline_id}")
        return await self.store.get(pipeline_id)

    async def list_pipelines(self) -> list[Pipeline]:
        """Get all pipelines"""
        logger.info("Listing all pipelines")
        return await self.store.get_all()

    async def run_pipeline(self, pipeline_id: UUID) -> None:
        """Run an existing pipeline"""
        pipeline = await self.store.get(pipeline_id)
        if not pipeline:
            logger.error(f"Cannot run pipeline: Pipeline not found (id={pipeline_id})")
            return
        if pipeline.status == PipelineStatus.ACTIVE:
            logger.warning(
                f"Pipeline id={pipeline_id} is already active/running (status={pipeline.status}). Skipping run."
            )
            return

        logger.info(
            f"Attempting to run pipeline: id={pipeline_id}, name='{pipeline.name}'"
        )

        try:
            # 2. Update status to ACTIVE (optional, depends on desired state mgmt)
            # pipeline.status = PipelineStatus.ACTIVE
            # pipeline.config.last_run = datetime.now(timezone.utc) # Mark start time
            # await self.store.save(pipeline)

            # 3. Execute the actual pipeline logic (ingestion, processing, etc.)
            # This part depends heavily on your pipeline runner implementation
            # It would use pipeline.config.ingestor_config, etc.
            logger.info(f"Executing pipeline logic for id={pipeline_id}...")
            # ...
            # result = await actual_pipeline_runner(pipeline.config)
            # ...
            logger.info(f"Pipeline logic finished for id={pipeline_id}.")

            # 4. Update status (e.g., back to INACTIVE) and run times on completion
            # Fetch the latest state in case it changed during run
            current_pipeline_state = await self.store.get(pipeline_id)
            if current_pipeline_state:
                current_pipeline_state.status = (
                    PipelineStatus.INACTIVE
                )  # Or COMPLETED if you have that state
                current_pipeline_state.config.last_run = datetime.now(
                    timezone.utc
                )  # Mark end time
                # TODO: Calculate next_run based on current_pipeline_state.config.run_frequency
                # current_pipeline_state.config.next_run = calculate_next_run(...)
                await self.store.save(current_pipeline_state)
                logger.info(
                    f"Pipeline run finished and state updated for id={pipeline_id}"
                )
            else:
                logger.warning(
                    f"Pipeline id={pipeline_id} disappeared during run. Cannot update final state."
                )

        except Exception as e:
            logger.error(f"Failed during pipeline run id={pipeline_id}: {e}")
            # Optionally update status to FAILED
            current_pipeline_state = await self.store.get(pipeline_id)
            if current_pipeline_state:
                # current_pipeline_state.status = PipelineStatus.FAILED # Add a FAILED state if desired
                await self.store.save(current_pipeline_state)
            # Handle/log the error appropriately
            # raise # Optionally re-raise