import json import logging import os from datetime import datetime from typing import Dict, Any, List from src.config import load_pdf from src.agents.cv_agents import CVAgentOrchestrator from src.agents.scoring_agent import SimpleScoringAgent logger = logging.getLogger(__name__) async def parse_cv(pdf_path: str) -> Dict[str, Any]: # Initialize orchestrator and scoring agent here or pass them as arguments # For simplicity, initializing them here. In a real app, consider dependency injection. orchestrator = CVAgentOrchestrator(llm=None) # LLM will be passed to agents directly scoring_agent = SimpleScoringAgent() cv_text = load_pdf(pdf_path) if not cv_text or not cv_text.strip(): return _create_fallback_data() logger.info(f"CV text loaded: {len(cv_text)} characters") sections = orchestrator.split_cv_sections(cv_text) logger.info(f"Sections extracted: {list(sections.keys())}") cv_data = orchestrator.extract_all_sections(sections) logger.info(f"CV data extracted: {cv_data is not None}") if not cv_data or not cv_data.get("candidat") or not _is_valid_extraction(cv_data): logger.warning("Agent extraction failed or incomplete, using fallback extraction") return _create_fallback_data() logger.info("Calculating skill levels...") scores = scoring_agent.calculate_scores(cv_data["candidat"]) if scores and scores.get("analyse_competences"): cv_data["candidat"].update(scores) skills_count = len(scores.get("analyse_competences", [])) levels_summary = _get_levels_summary(scores.get("analyse_competences", [])) logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}") else: logger.warning("No skill levels calculated, adding empty analysis") cv_data["candidat"]["analyse_competences"] = [] return cv_data def _create_fallback_data() -> Dict[str, Any]: return { "candidat": { "informations_personnelles": { "nom": "Données non extraites", "email": "N/A", "numero_de_telephone": "N/A", "localisation": "N/A" }, "compétences": { "hard_skills": [], "soft_skills": [] }, "expériences": [], "projets": [], "formations": [], "reconversion": { "is_reconversion": False, "analysis": "N/A" }, "analyse_competences": [] } } def _get_levels_summary(competences: List[Dict[str, Any]]) -> str: levels_count = {} for comp in competences: level = comp.get("level", "unknown") levels_count[level] = levels_count.get(level, 0) + 1 return ", ".join([f"{count} {level}" for level, count in levels_count.items()]) def _is_valid_extraction(cv_data: Dict[str, Any]) -> bool: candidat = cv_data.get("candidat", {}) has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip()) has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or candidat.get("compétences", {}).get("soft_skills", [])) has_experience = bool(candidat.get("expériences", [])) return has_info or has_skills or has_experience