mirror of
https://github.com/borbann-platform/backend-api.git
synced 2025-12-18 12:14:05 +01:00
215 lines
10 KiB
Python
215 lines
10 KiB
Python
import asyncio
|
|
from datetime import datetime
|
|
import logging
|
|
import random
|
|
import time
|
|
|
|
from celery import chord, group, shared_task
|
|
|
|
from app import crud, models, schemas # Keep imports for structure
|
|
from app.core.db import AsyncSessionFactory # Use session factory directly in tasks
|
|
from app.services.processing_service import ProcessingService # Import dummy service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# --- Helper to run async code from sync Celery tasks ---
|
|
def async_to_sync(awaitable):
|
|
"""Runs an awaitable in a new event loop."""
|
|
return asyncio.run(awaitable)
|
|
|
|
|
|
# --- Dummy Sub-Tasks ---
|
|
@shared_task(bind=True, max_retries=1, default_retry_delay=5)
|
|
def dummy_source_task(self, source_id: int, source_type: str):
|
|
"""DUMMY: Simulates processing any data source type."""
|
|
task_id = self.request.id
|
|
logger.info(f"DUMMY TASK dummy_source_task[ID:{task_id}]: Start DS:{source_id} Type:{source_type}")
|
|
await_time = random.uniform(0.05, 0.2)
|
|
time.sleep(await_time) # Simulate work
|
|
|
|
# Simulate occasional failure
|
|
if random.random() < 0.08:
|
|
error_msg = f"Simulated failure processing source {source_id}"
|
|
logger.warning(f"DUMMY TASK dummy_source_task[ID:{task_id}]: {error_msg}")
|
|
raise ValueError(error_msg) # Raise exception for Celery retry/failure
|
|
|
|
# Simulate successful result (list of dicts)
|
|
num_records = random.randint(1, 3)
|
|
result = [{f"data_{source_id}_{i}": random.random(), "source_type": source_type} for i in range(num_records)]
|
|
logger.info(f"DUMMY TASK dummy_source_task[ID:{task_id}]: Finish DS:{source_id}, generated {num_records} records.")
|
|
return result
|
|
|
|
|
|
# --- Dummy Aggregation Task (Callback) ---
|
|
@shared_task(bind=True)
|
|
def dummy_aggregate_task(self, results: list, pipeline_id: int, run_id: int):
|
|
"""DUMMY: Simulates aggregating results and saving."""
|
|
task_id = self.request.id
|
|
logger.info(
|
|
f"DUMMY TASK dummy_aggregate_task[ID:{task_id}]: Start Aggregation for RunID:{run_id}, PipelineID:{pipeline_id}. Received {len(results)} results."
|
|
)
|
|
log_messages = [f"Aggregation simulation started at {datetime.utcnow()}"]
|
|
final_status = schemas.PipelineStatus.COMPLETED
|
|
output_location = None
|
|
errors_encountered = sum(1 for r in results if isinstance(r, Exception))
|
|
|
|
# Instantiate dummy service
|
|
service = ProcessingService()
|
|
|
|
async def process_and_save():
|
|
nonlocal output_location, final_status # Allow modification
|
|
try:
|
|
# Call dummy processing service
|
|
processed_df = await service.process_pipeline_results(results, {"dummy_pipeline_cfg": True})
|
|
|
|
if not processed_df.empty:
|
|
# Simulate saving (no actual file handler needed here for dummy)
|
|
await asyncio.sleep(0.1) # Simulate save time
|
|
output_location = f"dummy_outputs/run_{run_id}_output_{datetime.utcnow().strftime('%Y%m%d%H%M%S')}.csv"
|
|
log_messages.append(f"Simulated saving results to {output_location}, shape: {processed_df.shape}")
|
|
logger.info(f"DUMMY AGGREGATION: Simulated save complete to {output_location}")
|
|
else:
|
|
log_messages.append("No data processed after aggregation/filtering.")
|
|
# Keep COMPLETED status if no errors, otherwise FAILED was set below
|
|
|
|
except Exception as e:
|
|
logger.error(f"DUMMY AGGREGATION: Error during dummy processing: {e}", exc_info=True)
|
|
log_messages.append(f"ERROR during processing: {e}")
|
|
final_status = schemas.PipelineStatus.FAILED
|
|
output_location = None
|
|
|
|
if errors_encountered > 0 and final_status != schemas.PipelineStatus.FAILED:
|
|
log_messages.append("Pipeline simulation completed with source task errors.")
|
|
# Optional: Set a specific status like COMPLETED_WITH_ERRORS if needed
|
|
elif errors_encountered > 0 and not processed_df.empty:
|
|
final_status = schemas.PipelineStatus.FAILED # Fail if errors and no data
|
|
|
|
# Simulate DB Update
|
|
final_log = "\n".join(log_messages)
|
|
logger.info(f"DUMMY AGGREGATION: Simulating final DB update for RunID:{run_id} to status {final_status}")
|
|
if AsyncSessionFactory: # Check if DB is configured
|
|
async with AsyncSessionFactory() as session:
|
|
try:
|
|
# Call dummy CRUD functions
|
|
await crud.pipeline_run.update_run_status(
|
|
db=session,
|
|
run_id=run_id,
|
|
status=final_status,
|
|
output_location=output_location,
|
|
run_log=final_log,
|
|
)
|
|
await crud.pipeline.update( # Use generic update for status
|
|
db=session,
|
|
db_obj=models.Pipeline(id=pipeline_id), # Need a dummy obj for update
|
|
obj_in={"status": schemas.PipelineStatus.IDLE},
|
|
)
|
|
logger.info(f"DUMMY AGGREGATION: DB update simulation successful for RunID:{run_id}.")
|
|
except Exception as db_exc:
|
|
logger.error(
|
|
f"DUMMY AGGREGATION: Failed DB update simulation for RunID:{run_id}: {db_exc}", exc_info=True
|
|
)
|
|
else:
|
|
logger.warning("DUMMY AGGREGATION: Skipping DB update simulation as DB is not configured.")
|
|
|
|
async_to_sync(process_and_save())
|
|
logger.info(f"DUMMY TASK dummy_aggregate_task[ID:{task_id}]: Finish Aggregation Simulation for RunID:{run_id}")
|
|
|
|
|
|
# --- Dummy Pipeline Orchestrator Task ---
|
|
@shared_task(bind=True)
|
|
def run_pipeline_task(self, pipeline_id: int):
|
|
"""DUMMY: Simulates fetching pipeline details and scheduling sub-tasks."""
|
|
task_id = self.request.id
|
|
logger.info(
|
|
f"DUMMY TASK run_pipeline_task[ID:{task_id}]: Start Orchestration Simulation for PipelineID:{pipeline_id}"
|
|
)
|
|
run_id = None
|
|
|
|
async def setup_and_dispatch():
|
|
nonlocal run_id
|
|
if not AsyncSessionFactory:
|
|
logger.error("Cannot simulate pipeline run: Database not configured.")
|
|
return None, "Database not configured"
|
|
|
|
async with AsyncSessionFactory() as session:
|
|
# 1. Get Pipeline (dummy)
|
|
pipeline = await crud.pipeline.get_with_details(session, id=pipeline_id)
|
|
if not pipeline:
|
|
logger.error(f"Pipeline {pipeline_id} not found (simulated).")
|
|
return None, "Pipeline not found"
|
|
if pipeline.status != schemas.PipelineStatus.IDLE:
|
|
logger.warning(f"Pipeline {pipeline_id} not idle (status: {pipeline.status}), skipping run simulation.")
|
|
return None, f"Pipeline status is {pipeline.status}"
|
|
|
|
# 2. Create Run Record (dummy)
|
|
run = await crud.pipeline_run.create(
|
|
session, pipeline_id=pipeline_id, celery_task_id=task_id, status=schemas.PipelineStatus.RUNNING
|
|
)
|
|
run_id = run.id
|
|
logger.info(f"Created dummy PipelineRun record with RunID:{run_id}")
|
|
|
|
# 3. Update Pipeline Status (dummy)
|
|
await crud.pipeline.update(session, db_obj=pipeline, obj_in={"status": schemas.PipelineStatus.RUNNING})
|
|
logger.info(f"Set dummy Pipeline {pipeline_id} status to RUNNING")
|
|
|
|
# 4. Prepare sub-tasks (using dummy sources from get_with_details)
|
|
if not pipeline.data_sources:
|
|
logger.warning(f"No data sources found for pipeline {pipeline_id}. Finishing run.")
|
|
await crud.pipeline_run.update_run_status(
|
|
session, run_id=run_id, status=schemas.PipelineStatus.COMPLETED, run_log="No data sources found."
|
|
)
|
|
await crud.pipeline.update(session, db_obj=pipeline, obj_in={"status": schemas.PipelineStatus.IDLE})
|
|
return [], None # No tasks to run
|
|
|
|
sub_tasks = [dummy_source_task.s(ds.id, ds.type.value) for ds in pipeline.data_sources]
|
|
logger.info(f"Prepared {len(sub_tasks)} dummy sub-tasks for RunID:{run_id}")
|
|
return sub_tasks, None
|
|
|
|
async def fail_run(error_message: str):
|
|
"""Helper to mark run as failed if setup simulation fails."""
|
|
if run_id and AsyncSessionFactory:
|
|
logger.error(f"Simulating run failure for RunID:{run_id} - {error_message}")
|
|
async with AsyncSessionFactory() as session:
|
|
await crud.pipeline_run.update_run_status(
|
|
db=session,
|
|
run_id=run_id,
|
|
status=schemas.PipelineStatus.FAILED,
|
|
run_log=f"Orchestration failed: {error_message}",
|
|
)
|
|
await crud.pipeline.update(
|
|
db=session, db_obj=models.Pipeline(id=pipeline_id), obj_in={"status": schemas.PipelineStatus.IDLE}
|
|
)
|
|
|
|
try:
|
|
sub_task_signatures, error = async_to_sync(setup_and_dispatch())
|
|
|
|
if error:
|
|
logger.error(f"Orchestration setup simulation failed: {error}")
|
|
# fail_run should have been called if run_id was set
|
|
return
|
|
|
|
if not sub_task_signatures:
|
|
logger.info("No sub-tasks to execute.")
|
|
return # Setup marked run as completed/failed
|
|
|
|
# Define the workflow chord
|
|
workflow = chord(
|
|
header=group(sub_task_signatures),
|
|
body=dummy_aggregate_task.s(pipeline_id=pipeline_id, run_id=run_id), # Ensure run_id is passed
|
|
)
|
|
|
|
# Simulate applying the workflow
|
|
logger.info(
|
|
f"DUMMY TASK run_pipeline_task[ID:{task_id}]: Simulating Celery chord apply_async() for RunID:{run_id}"
|
|
)
|
|
# In a real test you might call workflow() directly to execute synchronously
|
|
# For this dummy structure, just log the intent.
|
|
logger.info(f"DUMMY TASK run_pipeline_task[ID:{task_id}]: Workflow simulation scheduled for RunID:{run_id}")
|
|
|
|
except Exception as exc:
|
|
logger.error(
|
|
f"DUMMY TASK run_pipeline_task[ID:{task_id}]: Orchestration Simulation FAILED: {exc}", exc_info=True
|
|
)
|
|
async_to_sync(fail_run(f"Orchestration simulation exception: {type(exc).__name__}"))
|