mirror of
https://github.com/borbann-platform/backend-api.git
synced 2025-12-18 20:24:05 +01:00
186 lines
7.9 KiB
Python
186 lines
7.9 KiB
Python
from datetime import datetime, timezone
|
|
from uuid import UUID, uuid4
|
|
from loguru import logger
|
|
|
|
from models.pipeline import (
|
|
Pipeline,
|
|
PipelineCreate,
|
|
PipelineConfig,
|
|
RunFrequency,
|
|
PipelineStatus,
|
|
)
|
|
from models.ingestion import IngestorInput
|
|
from stores.base import PipelineStore
|
|
|
|
|
|
class PipelineService:
|
|
def __init__(self, store: PipelineStore):
|
|
self.store = store
|
|
logger.info(f"PipelineService initialized with store: {type(store).__name__}")
|
|
|
|
async def create_pipeline(
|
|
self,
|
|
name: str,
|
|
description: str,
|
|
ingestor_config: IngestorInput,
|
|
run_frequency: RunFrequency,
|
|
) -> Pipeline:
|
|
"""Create a new pipeline using the store"""
|
|
logger.info(
|
|
f"Creating pipeline: name={name}, description={description}, ingestor_config=..., run_frequency={run_frequency}"
|
|
)
|
|
try:
|
|
pipeline_id = uuid4()
|
|
now = datetime.now(timezone.utc)
|
|
pipeline = Pipeline(
|
|
id=pipeline_id,
|
|
name=name,
|
|
description=description,
|
|
config=PipelineConfig(
|
|
ingestor_config=ingestor_config,
|
|
run_frequency=run_frequency,
|
|
# will be set by scheduler
|
|
last_run=None,
|
|
next_run=None,
|
|
),
|
|
status=PipelineStatus.INACTIVE,
|
|
created_at=now,
|
|
updated_at=now,
|
|
)
|
|
await self.store.save(pipeline)
|
|
logger.info(f"Pipeline created and saved: id={pipeline.id}")
|
|
return pipeline
|
|
except Exception as e:
|
|
logger.error(f"Failed to create pipeline: {e}")
|
|
raise e
|
|
|
|
async def update_pipeline(
|
|
self, pipeline_id: UUID, pipeline_in: PipelineCreate
|
|
) -> Pipeline | None:
|
|
"""Update an existing pipeline using the store"""
|
|
logger.info(f"Updating pipeline: id={pipeline_id}, update_data=...")
|
|
existing_pipeline = await self.store.get(pipeline_id)
|
|
if not existing_pipeline:
|
|
logger.warning(f"Pipeline not found for update: id={pipeline_id}")
|
|
return None
|
|
|
|
try:
|
|
# TODO: review code here
|
|
# Create an updated pipeline object based on existing one and input
|
|
# Be careful about mutable defaults if not using Pydantic properly
|
|
update_data = pipeline_in.model_dump(
|
|
exclude_unset=True
|
|
) # Get only provided fields
|
|
|
|
# Merge update_data into a dict representation of the existing pipeline
|
|
# Note: This simplistic merge might need refinement based on how partial updates should behave,
|
|
# especially for nested structures like 'config'. Pydantic's model_update might be useful here if applicable.
|
|
updated_pipeline_data = existing_pipeline.model_dump()
|
|
|
|
if "name" in update_data:
|
|
updated_pipeline_data["name"] = update_data["name"]
|
|
if "description" in update_data:
|
|
updated_pipeline_data["description"] = update_data["description"]
|
|
|
|
# Merge config - replace the whole config if provided in input
|
|
if "config" in update_data:
|
|
# Ensure the input config is a PipelineConfig object before assignment
|
|
if pipeline_in.config:
|
|
updated_pipeline_data["config"] = pipeline_in.config.model_dump()
|
|
else:
|
|
# Handle cases where config might be passed differently if needed
|
|
logger.error("Invalid config format for update")
|
|
raise ValueError("Invalid config format for update")
|
|
|
|
# Re-validate the merged data into a Pipeline object
|
|
# Preserve original ID and created_at
|
|
updated_pipeline = Pipeline.model_validate(updated_pipeline_data)
|
|
# updated_at is handled by the store's save method
|
|
|
|
await self.store.save(updated_pipeline) # Save the updated object
|
|
logger.info(f"Pipeline updated: id={updated_pipeline.id}")
|
|
return updated_pipeline
|
|
except Exception as e:
|
|
logger.error(f"Failed to update pipeline id={pipeline_id}: {e}")
|
|
raise e
|
|
|
|
async def delete_pipeline(self, pipeline_id: UUID) -> bool:
|
|
"""Delete an existing pipeline using the store"""
|
|
logger.info(f"Deleting pipeline: id={pipeline_id}")
|
|
deleted = await self.store.delete(pipeline_id)
|
|
if not deleted:
|
|
logger.warning(f"Pipeline not found for deletion: id={pipeline_id}")
|
|
return deleted
|
|
|
|
async def get_pipeline(self, pipeline_id: UUID) -> Pipeline | None:
|
|
"""Get a single pipeline by ID"""
|
|
logger.info(f"Getting pipeline: id={pipeline_id}")
|
|
return await self.store.get(pipeline_id)
|
|
|
|
async def list_pipelines(self) -> list[Pipeline]:
|
|
"""Get all pipelines"""
|
|
logger.info("Listing all pipelines")
|
|
return await self.store.get_all()
|
|
|
|
async def run_pipeline(self, pipeline_id: UUID) -> None:
|
|
"""Run an existing pipeline"""
|
|
pipeline = await self.store.get(pipeline_id)
|
|
if not pipeline:
|
|
logger.error(f"Cannot run pipeline: Pipeline not found (id={pipeline_id})")
|
|
return
|
|
if pipeline.status == PipelineStatus.ACTIVE:
|
|
logger.warning(
|
|
f"Pipeline id={pipeline_id} is already active/running (status={pipeline.status}). Skipping run."
|
|
)
|
|
return
|
|
|
|
logger.info(
|
|
f"Attempting to run pipeline: id={pipeline_id}, name='{pipeline.name}'"
|
|
)
|
|
|
|
try:
|
|
# 2. Update status to ACTIVE (optional, depends on desired state mgmt)
|
|
# pipeline.status = PipelineStatus.ACTIVE
|
|
# pipeline.config.last_run = datetime.now(timezone.utc) # Mark start time
|
|
# await self.store.save(pipeline)
|
|
|
|
# 3. Execute the actual pipeline logic (ingestion, processing, etc.)
|
|
# This part depends heavily on your pipeline runner implementation
|
|
# It would use pipeline.config.ingestor_config, etc.
|
|
logger.info(f"Executing pipeline logic for id={pipeline_id}...")
|
|
# ...
|
|
# result = await actual_pipeline_runner(pipeline.config)
|
|
# ...
|
|
logger.info(f"Pipeline logic finished for id={pipeline_id}.")
|
|
|
|
# 4. Update status (e.g., back to INACTIVE) and run times on completion
|
|
# Fetch the latest state in case it changed during run
|
|
current_pipeline_state = await self.store.get(pipeline_id)
|
|
if current_pipeline_state:
|
|
current_pipeline_state.status = (
|
|
PipelineStatus.INACTIVE
|
|
) # Or COMPLETED if you have that state
|
|
current_pipeline_state.config.last_run = datetime.now(
|
|
timezone.utc
|
|
) # Mark end time
|
|
# TODO: Calculate next_run based on current_pipeline_state.config.run_frequency
|
|
# current_pipeline_state.config.next_run = calculate_next_run(...)
|
|
await self.store.save(current_pipeline_state)
|
|
logger.info(
|
|
f"Pipeline run finished and state updated for id={pipeline_id}"
|
|
)
|
|
else:
|
|
logger.warning(
|
|
f"Pipeline id={pipeline_id} disappeared during run. Cannot update final state."
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed during pipeline run id={pipeline_id}: {e}")
|
|
# Optionally update status to FAILED
|
|
current_pipeline_state = await self.store.get(pipeline_id)
|
|
if current_pipeline_state:
|
|
# current_pipeline_state.status = PipelineStatus.FAILED # Add a FAILED state if desired
|
|
await self.store.save(current_pipeline_state)
|
|
# Handle/log the error appropriately
|
|
# raise # Optionally re-raise
|