backend-api/pipeline/services/pipeline_service.py

186 lines
7.9 KiB
Python

from datetime import datetime, timezone
from uuid import UUID, uuid4
from loguru import logger
from models.pipeline import (
Pipeline,
PipelineCreate,
PipelineConfig,
RunFrequency,
PipelineStatus,
)
from models.ingestion import IngestorInput
from stores.base import PipelineStore
class PipelineService:
def __init__(self, store: PipelineStore):
self.store = store
logger.info(f"PipelineService initialized with store: {type(store).__name__}")
async def create_pipeline(
self,
name: str,
description: str,
ingestor_config: IngestorInput,
run_frequency: RunFrequency,
) -> Pipeline:
"""Create a new pipeline using the store"""
logger.info(
f"Creating pipeline: name={name}, description={description}, ingestor_config=..., run_frequency={run_frequency}"
)
try:
pipeline_id = uuid4()
now = datetime.now(timezone.utc)
pipeline = Pipeline(
id=pipeline_id,
name=name,
description=description,
config=PipelineConfig(
ingestor_config=ingestor_config,
run_frequency=run_frequency,
# will be set by scheduler
last_run=None,
next_run=None,
),
status=PipelineStatus.INACTIVE,
created_at=now,
updated_at=now,
)
await self.store.save(pipeline)
logger.info(f"Pipeline created and saved: id={pipeline.id}")
return pipeline
except Exception as e:
logger.error(f"Failed to create pipeline: {e}")
raise e
async def update_pipeline(
self, pipeline_id: UUID, pipeline_in: PipelineCreate
) -> Pipeline | None:
"""Update an existing pipeline using the store"""
logger.info(f"Updating pipeline: id={pipeline_id}, update_data=...")
existing_pipeline = await self.store.get(pipeline_id)
if not existing_pipeline:
logger.warning(f"Pipeline not found for update: id={pipeline_id}")
return None
try:
# TODO: review code here
# Create an updated pipeline object based on existing one and input
# Be careful about mutable defaults if not using Pydantic properly
update_data = pipeline_in.model_dump(
exclude_unset=True
) # Get only provided fields
# Merge update_data into a dict representation of the existing pipeline
# Note: This simplistic merge might need refinement based on how partial updates should behave,
# especially for nested structures like 'config'. Pydantic's model_update might be useful here if applicable.
updated_pipeline_data = existing_pipeline.model_dump()
if "name" in update_data:
updated_pipeline_data["name"] = update_data["name"]
if "description" in update_data:
updated_pipeline_data["description"] = update_data["description"]
# Merge config - replace the whole config if provided in input
if "config" in update_data:
# Ensure the input config is a PipelineConfig object before assignment
if pipeline_in.config:
updated_pipeline_data["config"] = pipeline_in.config.model_dump()
else:
# Handle cases where config might be passed differently if needed
logger.error("Invalid config format for update")
raise ValueError("Invalid config format for update")
# Re-validate the merged data into a Pipeline object
# Preserve original ID and created_at
updated_pipeline = Pipeline.model_validate(updated_pipeline_data)
# updated_at is handled by the store's save method
await self.store.save(updated_pipeline) # Save the updated object
logger.info(f"Pipeline updated: id={updated_pipeline.id}")
return updated_pipeline
except Exception as e:
logger.error(f"Failed to update pipeline id={pipeline_id}: {e}")
raise e
async def delete_pipeline(self, pipeline_id: UUID) -> bool:
"""Delete an existing pipeline using the store"""
logger.info(f"Deleting pipeline: id={pipeline_id}")
deleted = await self.store.delete(pipeline_id)
if not deleted:
logger.warning(f"Pipeline not found for deletion: id={pipeline_id}")
return deleted
async def get_pipeline(self, pipeline_id: UUID) -> Pipeline | None:
"""Get a single pipeline by ID"""
logger.info(f"Getting pipeline: id={pipeline_id}")
return await self.store.get(pipeline_id)
async def list_pipelines(self) -> list[Pipeline]:
"""Get all pipelines"""
logger.info("Listing all pipelines")
return await self.store.get_all()
async def run_pipeline(self, pipeline_id: UUID) -> None:
"""Run an existing pipeline"""
pipeline = await self.store.get(pipeline_id)
if not pipeline:
logger.error(f"Cannot run pipeline: Pipeline not found (id={pipeline_id})")
return
if pipeline.status == PipelineStatus.ACTIVE:
logger.warning(
f"Pipeline id={pipeline_id} is already active/running (status={pipeline.status}). Skipping run."
)
return
logger.info(
f"Attempting to run pipeline: id={pipeline_id}, name='{pipeline.name}'"
)
try:
# 2. Update status to ACTIVE (optional, depends on desired state mgmt)
# pipeline.status = PipelineStatus.ACTIVE
# pipeline.config.last_run = datetime.now(timezone.utc) # Mark start time
# await self.store.save(pipeline)
# 3. Execute the actual pipeline logic (ingestion, processing, etc.)
# This part depends heavily on your pipeline runner implementation
# It would use pipeline.config.ingestor_config, etc.
logger.info(f"Executing pipeline logic for id={pipeline_id}...")
# ...
# result = await actual_pipeline_runner(pipeline.config)
# ...
logger.info(f"Pipeline logic finished for id={pipeline_id}.")
# 4. Update status (e.g., back to INACTIVE) and run times on completion
# Fetch the latest state in case it changed during run
current_pipeline_state = await self.store.get(pipeline_id)
if current_pipeline_state:
current_pipeline_state.status = (
PipelineStatus.INACTIVE
) # Or COMPLETED if you have that state
current_pipeline_state.config.last_run = datetime.now(
timezone.utc
) # Mark end time
# TODO: Calculate next_run based on current_pipeline_state.config.run_frequency
# current_pipeline_state.config.next_run = calculate_next_run(...)
await self.store.save(current_pipeline_state)
logger.info(
f"Pipeline run finished and state updated for id={pipeline_id}"
)
else:
logger.warning(
f"Pipeline id={pipeline_id} disappeared during run. Cannot update final state."
)
except Exception as e:
logger.error(f"Failed during pipeline run id={pipeline_id}: {e}")
# Optionally update status to FAILED
current_pipeline_state = await self.store.get(pipeline_id)
if current_pipeline_state:
# current_pipeline_state.status = PipelineStatus.FAILED # Add a FAILED state if desired
await self.store.save(current_pipeline_state)
# Handle/log the error appropriately
# raise # Optionally re-raise