feat: add pipeline result data endpoint

This commit is contained in:
Sosokker 2025-05-12 23:21:25 +07:00
parent e1cd04a07e
commit f99e49974a
3 changed files with 72 additions and 8 deletions

View File

@ -3,7 +3,7 @@ import enum
from uuid import UUID from uuid import UUID
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from models.ingestion import IngestorInput from models.ingestion import IngestorInput, OutputData
class PipelineStatus(str, enum.Enum): class PipelineStatus(str, enum.Enum):
@ -33,6 +33,9 @@ class Pipeline(BaseModel):
status: PipelineStatus = Field(default=PipelineStatus.INACTIVE) status: PipelineStatus = Field(default=PipelineStatus.INACTIVE)
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
latest_run_output: OutputData | None = Field(
default=None, description="Output of the last successful run"
)
class PipelineCreate(BaseModel): class PipelineCreate(BaseModel):

View File

@ -9,6 +9,7 @@ from fastapi import (
) )
from models.pipeline import Pipeline, PipelineCreate, PipelineStatus from models.pipeline import Pipeline, PipelineCreate, PipelineStatus
from models.ingestion import OutputData
from services.pipeline_service import PipelineService from services.pipeline_service import PipelineService
from dependencies import ( from dependencies import (
get_pipeline_service, get_pipeline_service,
@ -198,3 +199,31 @@ async def run_pipeline_manually(
background_tasks.add_task(service.run_pipeline, pipeline_id=pipeline_id) background_tasks.add_task(service.run_pipeline, pipeline_id=pipeline_id)
return {"detail": f"Pipeline run triggered for {pipeline_id}"} return {"detail": f"Pipeline run triggered for {pipeline_id}"}
@router.get(
"/{pipeline_id}/results",
response_model=OutputData | None,
summary="Get latest run results for a pipeline",
description="Retrieves the aggregated data output from the last successful run of the specified pipeline.",
)
async def get_pipeline_results(
pipeline_id: UUID,
service: PipelineService = Depends(get_pipeline_service),
) -> OutputData | None:
"""
Fetches the results of the last successful run for the given pipeline_id.
Returns null or an empty structure if no successful run with output is found.
"""
results = await service.get_pipeline_latest_results(pipeline_id)
if results is None:
pipeline_exists = await service.get_pipeline(pipeline_id)
if not pipeline_exists:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Pipeline with id {pipeline_id} not found.",
)
return None
return results

View File

@ -17,7 +17,7 @@ from models.pipeline import (
RunFrequency, RunFrequency,
PipelineStatus, PipelineStatus,
) )
from models.ingestion import IngestorInput from models.ingestion import IngestorInput, OutputData
from stores.base import PipelineStore from stores.base import PipelineStore
from scheduler.utils import calculate_next_run, UTC from scheduler.utils import calculate_next_run, UTC
@ -279,10 +279,13 @@ class PipelineService:
# --- Execute Pipeline Logic --- # --- Execute Pipeline Logic ---
run_successful = False run_successful = False
ingestion_output: OutputData | None = None
try: try:
logger.info("Executing core logic...") logger.info("Executing core logic...")
# This call and anything within it will inherit the pipeline_id context # This call and anything within it will inherit the pipeline_id context
await self._execute_ingestion(pipeline.config.ingestor_config) ingestion_output = await self._execute_ingestion(
pipeline.config.ingestor_config
)
logger.info("Core logic finished successfully.") logger.info("Core logic finished successfully.")
run_successful = True run_successful = True
except Exception as e: except Exception as e:
@ -317,6 +320,15 @@ class PipelineService:
if run_successful: if run_successful:
final_pipeline_state.config.last_run = now final_pipeline_state.config.last_run = now
if ingestion_output:
final_pipeline_state.latest_run_output = ingestion_output
else:
logger.warning(
"Run was successful but no ingestion output captured."
)
final_pipeline_state.latest_run_output = None
else:
logger.warning("Run failed.")
current_last_run = final_pipeline_state.config.last_run current_last_run = final_pipeline_state.config.last_run
final_pipeline_state.config.next_run = calculate_next_run( final_pipeline_state.config.next_run = calculate_next_run(
@ -346,17 +358,16 @@ class PipelineService:
) )
# Pipeline might be left ACTIVE or FAILED state might not be saved. Needs monitoring. # Pipeline might be left ACTIVE or FAILED state might not be saved. Needs monitoring.
async def _execute_ingestion(self, config: IngestorInput): async def _execute_ingestion(self, config: IngestorInput) -> OutputData | None:
""" """
Executes the ingestion process for a pipeline using the provided IngestorInput config. Executes the ingestion process for a pipeline using the provided IngestorInput config.
Returns the ingestion results or raises an exception on failure. Returns the ingestion results or raises an exception on failure.
""" """
try: try:
# from ..ingestion import Ingestor
logger.info(f"Executing ingestion with config: {config}") logger.info(f"Executing ingestion with config: {config}")
results = Ingestor.run(config.sources) results: OutputData = Ingestor.run(config.sources)
logger.info( logger.info(
f"Ingestion completed successfully. Results count: {len(results.records)}" f"Ingestion completed successfully. Records count: {len(results.records)}"
) )
return results return results
except ImportError: except ImportError:
@ -364,4 +375,25 @@ class PipelineService:
raise RuntimeError("Ingestion module not found") raise RuntimeError("Ingestion module not found")
except Exception as e: except Exception as e:
logger.error(f"Ingestion execution failed: {e}", exc_info=True) logger.error(f"Ingestion execution failed: {e}", exc_info=True)
raise raise
async def get_pipeline_latest_results(
self, pipeline_id: UUID
) -> Optional[OutputData]:
"""Retrieves the output from the latest successful run of a pipeline."""
logger.debug(f"Getting latest results for pipeline: id={pipeline_id}")
pipeline = await self.store.get(pipeline_id)
if pipeline:
if pipeline.latest_run_output and pipeline.config.last_run:
# NOTE: can use PipelineRunResult
return pipeline.latest_run_output
elif pipeline.config.last_run:
logger.info(
f"Pipeline {pipeline_id} ran at {pipeline.config.last_run} but has no stored output (or run failed)."
)
return None
else:
logger.info(f"Pipeline {pipeline_id} has no recorded run or output.")
return None
logger.warning(f"Pipeline {pipeline_id} not found when retrieving results.")
return None