backend-api/crawler_ai_project_files/log/logging_utils.py

70 lines
2.5 KiB
Python

from loguru import logger
from queue import Queue
from typing import Dict, Optional
import json
# Per-run log queues (thread-safe)
RUN_LOG_QUEUES: Dict[str, Queue] = {}
RUN_LOG_HANDLERS: Dict[str, int] = {}
# Structured log format
def make_log_record(level: str, message: str, pipeline_id: Optional[str], run_id: Optional[str], status: Optional[str] = None, error: Optional[str] = None, extra: Optional[dict] = None) -> dict:
record = {
"level": level,
"message": message,
"pipeline_id": pipeline_id,
"run_id": run_id,
"status": status,
"error": error,
"extra": extra or {},
}
return record
# Custom loguru sink for per-run logging
def log_sink(message):
record = message.record
run_id = record["extra"].get("run_id")
pipeline_id = record["extra"].get("pipeline_id")
if run_id and run_id in RUN_LOG_QUEUES:
# Structure the log as JSON for frontend parsing
log_entry = make_log_record(
level=record["level"].name,
message=record["message"],
pipeline_id=pipeline_id,
run_id=run_id,
status=record["extra"].get("status"),
error=record["extra"].get("error"),
extra=record["extra"]
)
RUN_LOG_QUEUES[run_id].put(json.dumps(log_entry))
# Setup per-run logging sink
def setup_run_logging(pipeline_id: str, run_id: str):
log_queue = Queue()
RUN_LOG_QUEUES[run_id] = log_queue
handler_id = logger.add(
log_sink,
filter=lambda record: record["extra"].get("run_id") == run_id,
enqueue=True
)
RUN_LOG_HANDLERS[run_id] = handler_id
return log_queue
# Remove per-run logging sink and clean up
def cleanup_run_logging(run_id: str):
if run_id in RUN_LOG_HANDLERS:
logger.remove(RUN_LOG_HANDLERS[run_id])
del RUN_LOG_HANDLERS[run_id]
if run_id in RUN_LOG_QUEUES:
del RUN_LOG_QUEUES[run_id]
# Helper for logging with context
def pipeline_log(level: str, message: str, pipeline_id: str, run_id: str, status: Optional[str] = None, error: Optional[str] = None, extra: Optional[dict] = None):
logger.log(level, message, extra={"pipeline_id": pipeline_id, "run_id": run_id, "status": status, "error": error, **(extra or {})})
# Example usage:
# setup_run_logging(pipeline_id, run_id)
# pipeline_log("INFO", "Pipeline started", pipeline_id, run_id, status="RUNNING")
# pipeline_log("ERROR", "Pipeline failed", pipeline_id, run_id, status="FAILED", error="Some error")
# cleanup_run_logging(run_id)