QuentinL52 commited on
Commit
ef9eba2
·
verified ·
1 Parent(s): 19f6793

Upload 7 files

Browse files
main.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import tempfile
4
+
5
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Query
6
+ from fastapi.concurrency import run_in_threadpool
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+
9
+ from src.services.cv_service import parse_cv
10
+
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ app = FastAPI(
15
+ title="CV Parser API",
16
+ description="API for parsing CVs.",
17
+ version="1.0.0",
18
+ docs_url="/docs",
19
+ redoc_url="/redoc"
20
+ )
21
+
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ from pydantic import BaseModel
31
+
32
+ class HealthCheck(BaseModel):
33
+ status: str = "ok"
34
+
35
+ @app.get("/", response_model=HealthCheck, tags=["Status"])
36
+ async def health_check():
37
+ return HealthCheck()
38
+
39
+ @app.post("/parse-cv/", tags=["CV Parsing"])
40
+ async def parse_cv_endpoint(
41
+ file: UploadFile = File(...),
42
+ user_id: str = Query(None, description="ID of the user to link the CV to")
43
+ ):
44
+ """
45
+ Parses a CV file (PDF) and returns the parsed data.
46
+ """
47
+ if file.content_type != "application/pdf":
48
+ raise HTTPException(status_code=400, detail="PDF file required")
49
+
50
+ contents = await file.read()
51
+
52
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
53
+ tmp.write(contents)
54
+ tmp_path = tmp.name
55
+
56
+ try:
57
+ result = await run_in_threadpool(parse_cv, tmp_path, user_id)
58
+ finally:
59
+ if os.path.exists(tmp_path):
60
+ os.remove(tmp_path)
61
+
62
+ if not result:
63
+ raise HTTPException(status_code=500, detail="Failed to extract data from CV.")
64
+
65
+ return result
66
+
67
+ if __name__ == "__main__":
68
+ import uvicorn
69
+ port = int(os.getenv("PORT", 8001)) # Use PORT environment variable, default to 8001
70
+ uvicorn.run(app, host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pydantic
4
+ python-multipart
5
+
6
+ langchain-core
7
+ langchain-community
8
+ langchain-openai
9
+ langchain_groq
10
+ langchain-huggingface
11
+ crewai
12
+ crewai-tools
13
+ sentence_transformers
14
+ torch
15
+ transformers
16
+ sentencepiece
17
+ accelerate
18
+ pypdf
19
+ python-dotenv
20
+ requests
21
+ faiss-cpu
22
+
23
+ httpx==0.28.1
src/agents/cv_agents.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from typing import Dict, Any, List
4
+ from crewai import Agent, Task, Crew, Process
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class CVAgentOrchestrator:
9
+ def __init__(self, llm):
10
+ self.llm = llm
11
+ self._create_agents()
12
+
13
+ def _create_agents(self):
14
+ self.section_splitter = Agent(
15
+ role="Analyseur de Structure de CV",
16
+ goal="Découper intelligemment un CV en sections thématiques",
17
+ backstory="Expert en analyse documentaire spécialisé dans la reconnaissance de structures de CV.",
18
+ verbose=False,
19
+ llm=self.llm
20
+ )
21
+
22
+ self.contact_extractor = Agent(
23
+ role="Extracteur d'informations de contact",
24
+ goal="Extraire les coordonnées du candidat",
25
+ backstory="Expert en extraction d'informations de contact avec précision.",
26
+ verbose=False,
27
+ llm=self.llm
28
+ )
29
+
30
+ self.skills_extractor = Agent(
31
+ role="Extracteur de compétences",
32
+ goal="Identifier hard skills et soft skills",
33
+ backstory="Spécialiste en identification de compétences techniques et comportementales.",
34
+ verbose=False,
35
+ llm=self.llm
36
+ )
37
+
38
+ self.experience_extractor = Agent(
39
+ role="Extracteur d'expériences",
40
+ goal="Extraire les expériences professionnelles",
41
+ backstory="Expert en analyse de parcours professionnels.",
42
+ verbose=False,
43
+ llm=self.llm
44
+ )
45
+
46
+ self.project_extractor = Agent(
47
+ role="Extracteur de projets",
48
+ goal="Identifier projets professionnels et personnels",
49
+ backstory="Spécialiste en identification de projets significatifs.",
50
+ verbose=False,
51
+ llm=self.llm
52
+ )
53
+
54
+ self.education_extractor = Agent(
55
+ role="Extracteur de formations",
56
+ goal="Extraire formations et diplômes",
57
+ backstory="Expert en analyse de parcours académiques.",
58
+ verbose=False,
59
+ llm=self.llm
60
+ )
61
+
62
+ self.reconversion_detector = Agent(
63
+ role="Détecteur de reconversion",
64
+ goal="Analyser les changements de carrière",
65
+ backstory="Conseiller d'orientation expert en transitions de carrière.",
66
+ verbose=False,
67
+ llm=self.llm
68
+ )
69
+
70
+ self.profile_builder = Agent(
71
+ role="Constructeur de profil",
72
+ goal="Assembler le profil candidat final",
73
+ backstory="Expert en structuration de données JSON.",
74
+ verbose=False,
75
+ llm=self.llm
76
+ )
77
+
78
+ def split_cv_sections(self, cv_content: str) -> Dict[str, str]:
79
+ task = Task(
80
+ description=f"Analyser ce CV et l'organiser en sections: {cv_content}",
81
+ expected_output="""JSON avec sections: contact, experiences, projects, education, skills, other""",
82
+ agent=self.section_splitter
83
+ )
84
+
85
+ crew = Crew(
86
+ agents=[self.section_splitter],
87
+ tasks=[task],
88
+ process=Process.sequential,
89
+ verbose=False,
90
+ telemetry=False
91
+ )
92
+
93
+ result = crew.kickoff()
94
+ return self._parse_sections_result(result)
95
+
96
+ def extract_all_sections(self, sections: Dict[str, str]) -> Dict[str, Any]:
97
+ # Créer les tâches avec les sections en input
98
+ tasks = self._create_extraction_tasks(sections)
99
+
100
+ crew = Crew(
101
+ agents=[
102
+ self.contact_extractor,
103
+ self.skills_extractor,
104
+ self.experience_extractor,
105
+ self.project_extractor,
106
+ self.education_extractor,
107
+ self.reconversion_detector,
108
+ self.profile_builder
109
+ ],
110
+ tasks=tasks,
111
+ process=Process.sequential,
112
+ verbose=True, # Activer pour debug
113
+ telemetry=False
114
+ )
115
+
116
+ # Passer les sections comme inputs
117
+ inputs = {
118
+ "contact": sections.get("contact", ""),
119
+ "experiences": sections.get("experiences", ""),
120
+ "projects": sections.get("projects", ""),
121
+ "education": sections.get("education", ""),
122
+ "skills": sections.get("skills", ""),
123
+ "other": sections.get("other", "")
124
+ }
125
+
126
+ logger.info(f"Starting crew with inputs: {list(inputs.keys())}")
127
+ result = crew.kickoff(inputs=inputs)
128
+ logger.info(f"Crew completed. Raw result: {result.raw if hasattr(result, 'raw') else str(result)[:200]}...")
129
+
130
+ return self._parse_final_result(result)
131
+
132
+ def _create_extraction_tasks(self, sections: Dict[str, str]) -> List[Task]:
133
+ contact_task = Task(
134
+ description=(
135
+ "Voici la section contact du CV : {contact}\n"
136
+ "Extraire précisément le nom, email, téléphone et localisation du candidat."
137
+ ),
138
+ expected_output='{"nom": "...", "email": "...", "numero_de_telephone": "...", "localisation": "..."}',
139
+ agent=self.contact_extractor
140
+ )
141
+
142
+ skills_task = Task(
143
+ description=(
144
+ "Voici les sections pertinentes du CV :\n"
145
+ "Expériences: {experiences}\n"
146
+ "Projets: {projects}\n"
147
+ "Compétences: {skills}\n"
148
+ "Extraire toutes les compétences techniques (hard skills) et comportementales (soft skills) mentionnées."
149
+ ),
150
+ expected_output='{"hard_skills": ["compétence1", "compétence2"], "soft_skills": ["compétence1", "compétence2"]}',
151
+ agent=self.skills_extractor
152
+ )
153
+
154
+ experience_task = Task(
155
+ description=(
156
+ "Voici la section expériences du CV : {experiences}\n"
157
+ "Extraire toutes les expériences professionnelles avec poste, entreprise, dates et responsabilités."
158
+ ),
159
+ expected_output='[{"Poste": "titre", "Entreprise": "nom", "start_date": "date", "end_date": "date", "responsabilités": ["resp1", "resp2"]}]',
160
+ agent=self.experience_extractor
161
+ )
162
+
163
+ project_task = Task(
164
+ description=(
165
+ "Voici les sections projets et expériences du CV :\n"
166
+ "Projets: {projects}\n"
167
+ "Identifier et extraire les projets professionnels et personnels distincts des responsabilités générales."
168
+ ),
169
+ expected_output='{"professional": [{"title": "titre", "technologies": ["tech1"], "outcomes": ["résultat1"]}], "personal": []}',
170
+ agent=self.project_extractor
171
+ )
172
+
173
+ education_task = Task(
174
+ description=(
175
+ "Voici la section formations du CV : {education}\n"
176
+ "Extraire toutes les formations, diplômes et certifications avec institution et dates."
177
+ ),
178
+ expected_output='[{"degree": "diplôme", "institution": "établissement", "start_date": "date", "end_date": "date"}]',
179
+ agent=self.education_extractor
180
+ )
181
+
182
+ reconversion_task = Task(
183
+ description=(
184
+ "En analysant les expériences extraites précédemment, déterminer si le candidat est en reconversion professionnelle. "
185
+ "Chercher des changements de secteur, de type de poste ou des transitions significatives."
186
+ ),
187
+ expected_output='{"reconversion_analysis": {"is_reconversion": true, "analysis": "Explication détaillée..."}}',
188
+ agent=self.reconversion_detector,
189
+ context=[experience_task]
190
+ )
191
+
192
+ profile_task = Task(
193
+ description=(
194
+ "Assembler toutes les informations extraites des tâches précédentes en un profil candidat complet. "
195
+ "Créer un JSON valide avec une clé 'candidat' contenant toutes les sections."
196
+ ),
197
+ expected_output=(
198
+ '{"candidat": {'
199
+ '"informations_personnelles": {...}, '
200
+ '"compétences": {...}, '
201
+ '"expériences": [...], '
202
+ '"projets": {...}, '
203
+ '"formations": [...], '
204
+ '"reconversion": {...}'
205
+ '}}'
206
+ ),
207
+ agent=self.profile_builder,
208
+ context=[contact_task, skills_task, experience_task, project_task, education_task, reconversion_task]
209
+ )
210
+
211
+ return [contact_task, skills_task, experience_task, project_task, education_task, reconversion_task, profile_task]
212
+
213
+ def _parse_sections_result(self, result) -> Dict[str, str]:
214
+ result_str = result.raw if hasattr(result, 'raw') else str(result)
215
+
216
+ if '```json' in result_str:
217
+ result_str = result_str.split('```json')[1].split('```')[0].strip()
218
+ elif '```' in result_str:
219
+ parts = result_str.split('```')
220
+ if len(parts) >= 3:
221
+ result_str = parts[1].strip()
222
+
223
+ parsed = json.loads(result_str)
224
+
225
+ # Assurer que toutes les sections nécessaires existent
226
+ default_sections = {
227
+ "contact": "",
228
+ "experiences": "",
229
+ "projects": "",
230
+ "education": "",
231
+ "skills": "",
232
+ "other": ""
233
+ }
234
+
235
+ for key in default_sections:
236
+ if key not in parsed:
237
+ parsed[key] = default_sections[key]
238
+
239
+ return parsed
240
+
241
+ def _parse_final_result(self, result) -> Dict[str, Any]:
242
+ result_str = result.raw if hasattr(result, 'raw') else str(result)
243
+
244
+ if '```json' in result_str:
245
+ result_str = result_str.split('```json')[1].split('```')[0].strip()
246
+ elif '```' in result_str:
247
+ parts = result_str.split('```')
248
+ if len(parts) >= 3:
249
+ result_str = parts[1].strip()
250
+
251
+ return json.loads(result_str)
src/agents/scoring_agent.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import Dict, List, Any
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class SimpleScoringAgent:
10
+
11
+ def calculate_scores(self, candidat_data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
12
+ if not candidat_data or not isinstance(candidat_data, dict):
13
+ return {"analyse_competences": []}
14
+
15
+ skills_data = candidat_data.get("compétences", {})
16
+ skills_list = self._extract_skills_list(skills_data)
17
+
18
+ if not skills_list:
19
+ return {"analyse_competences": []}
20
+
21
+ skill_analysis = []
22
+
23
+ for skill in skills_list:
24
+ level = self._determine_skill_level(skill, candidat_data)
25
+ skill_analysis.append({
26
+ "skill": skill,
27
+ "level": level
28
+ })
29
+
30
+ return {"analyse_competences": skill_analysis}
31
+
32
+ def _extract_skills_list(self, skills_data: Dict[str, Any]) -> List[str]:
33
+ """Extrait la liste des compétences"""
34
+ skills_list = []
35
+
36
+ if isinstance(skills_data, dict):
37
+ skills_list.extend(skills_data.get("hard_skills", []))
38
+ skills_list.extend(skills_data.get("soft_skills", []))
39
+ elif isinstance(skills_data, list):
40
+ skills_list = [item.get("nom") for item in skills_data if item.get("nom")]
41
+
42
+ return [skill for skill in skills_list if skill and isinstance(skill, str) and skill.strip()]
43
+
44
+ def _determine_skill_level(self, skill: str, candidat_data: Dict[str, Any]) -> str:
45
+ """Détermine le niveau d'une compétence selon des règles simples"""
46
+
47
+ frequency = self._count_skill_mentions(skill, candidat_data)
48
+ max_duration = self._get_max_duration_for_skill(skill, candidat_data)
49
+ has_pro_experience = self._has_professional_experience(skill, candidat_data)
50
+
51
+ # Règles simples de classification
52
+ if has_pro_experience and max_duration >= 3.0:
53
+ return "expert"
54
+ elif has_pro_experience and max_duration >= 1.0:
55
+ return "avance"
56
+ elif frequency >= 3 or max_duration >= 0.5:
57
+ return "intermediaire"
58
+ else:
59
+ return "debutant"
60
+
61
+ def _count_skill_mentions(self, skill: str, candidat_data: Dict[str, Any]) -> int:
62
+ """Compte le nombre de mentions de la compétence"""
63
+ skill_lower = skill.lower()
64
+ total_mentions = 0
65
+
66
+ # Recherche dans toutes les sections
67
+ all_text = self._get_all_text_content(candidat_data).lower()
68
+ total_mentions = all_text.count(skill_lower)
69
+
70
+ return total_mentions
71
+
72
+ def _get_max_duration_for_skill(self, skill: str, candidat_data: Dict[str, Any]) -> float:
73
+ """Trouve la durée maximum d'utilisation de la compétence"""
74
+ skill_lower = skill.lower()
75
+ max_duration = 0.0
76
+
77
+ experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
78
+ experiences = candidat_data.get(experiences_key, [])
79
+
80
+ if not isinstance(experiences, list):
81
+ return 0.0
82
+
83
+ for exp in experiences:
84
+ if not isinstance(exp, dict):
85
+ continue
86
+
87
+ exp_text = json.dumps(exp, ensure_ascii=False).lower()
88
+
89
+ if skill_lower in exp_text:
90
+ duration = self._calculate_experience_duration(exp)
91
+ max_duration = max(max_duration, duration)
92
+
93
+ return max_duration
94
+
95
+ def _has_professional_experience(self, skill: str, candidat_data: Dict[str, Any]) -> bool:
96
+ """Vérifie si la compétence a été utilisée en contexte professionnel"""
97
+ skill_lower = skill.lower()
98
+
99
+ experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
100
+ experiences = candidat_data.get(experiences_key, [])
101
+
102
+ if not isinstance(experiences, list):
103
+ return False
104
+
105
+ for exp in experiences:
106
+ if not isinstance(exp, dict):
107
+ continue
108
+
109
+ exp_text = json.dumps(exp, ensure_ascii=False).lower()
110
+ if skill_lower in exp_text:
111
+ return True
112
+
113
+ return False
114
+
115
+ def _get_all_text_content(self, candidat_data: Dict[str, Any]) -> str:
116
+ """Récupère tout le contenu textuel du CV"""
117
+ all_content = []
118
+
119
+ # Expériences
120
+ experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
121
+ for exp in candidat_data.get(experiences_key, []):
122
+ if isinstance(exp, dict):
123
+ all_content.append(json.dumps(exp, ensure_ascii=False))
124
+
125
+ # Projets
126
+ projects = candidat_data.get("projets", {})
127
+ if isinstance(projects, dict):
128
+ for project_type in ["professional", "personal"]:
129
+ for project in projects.get(project_type, []):
130
+ if isinstance(project, dict):
131
+ all_content.append(json.dumps(project, ensure_ascii=False))
132
+
133
+ # Formations
134
+ for formation in candidat_data.get("formations", []):
135
+ if isinstance(formation, dict):
136
+ all_content.append(json.dumps(formation, ensure_ascii=False))
137
+
138
+ return " ".join(all_content)
139
+
140
+ def _calculate_experience_duration(self, exp: Dict[str, Any]) -> float:
141
+ """Calcule la durée d'une expérience en années"""
142
+ start_date_str = exp.get("date_debut", exp.get("start_date", ""))
143
+ end_date_str = exp.get("date_fin", exp.get("end_date", ""))
144
+
145
+ if not isinstance(start_date_str, str):
146
+ start_date_str = str(start_date_str) if start_date_str else ""
147
+ if not isinstance(end_date_str, str):
148
+ end_date_str = str(end_date_str) if end_date_str else ""
149
+
150
+ return self._calculate_duration_in_years(start_date_str, end_date_str)
151
+
152
+ def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float:
153
+ """Calcule la durée entre deux dates en années"""
154
+ start_date = self._parse_date(start_date_str)
155
+ end_date = self._parse_date(end_date_str)
156
+
157
+ if start_date and end_date:
158
+ if end_date < start_date:
159
+ return 0.0
160
+ return (end_date - start_date).days / 365.25
161
+
162
+ return 0.0
163
+
164
+ def _parse_date(self, date_str: str) -> datetime:
165
+ """Parse une date de manière simple"""
166
+ if not date_str or not isinstance(date_str, str):
167
+ return None
168
+
169
+ date_str_lower = date_str.lower().strip()
170
+ if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current", "présent", "actuellement"]:
171
+ return datetime.now()
172
+
173
+ # Extraction simple de l'année
174
+ year_match = re.search(r'\b(20\d{2}|19\d{2})\b', date_str)
175
+ if year_match:
176
+ year = int(year_match.group(1))
177
+ return datetime(year, 1, 1)
178
+
179
+ return None
180
+
181
+ # Alias pour maintenir la compatibilité
182
+ ScoringAgent = SimpleScoringAgent
183
+ ImprovedScoringAgent = SimpleScoringAgent
src/config.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
+ from langchain_groq import ChatGroq
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_openai import ChatOpenAI
7
+ from typing import Dict, List, Any, Tuple, Optional, Type
8
+ from crewai import LLM
9
+ #########################################################################################################
10
+ # formatage du json
11
+ def format_cv(document):
12
+ def format_section(title, data, indent=0):
13
+ prefix = " " * indent
14
+ lines = [f"{title}:"]
15
+ if isinstance(data, dict):
16
+ for k, v in data.items():
17
+ if isinstance(v, (dict, list)):
18
+ lines.append(f"{prefix}- {k.capitalize()}:")
19
+ lines.extend(format_section("", v, indent + 1))
20
+ else:
21
+ lines.append(f"{prefix}- {k.capitalize()}: {v}")
22
+ elif isinstance(data, list):
23
+ for i, item in enumerate(data):
24
+ lines.append(f"{prefix}- Élément {i + 1}:")
25
+ lines.extend(format_section("", item, indent + 1))
26
+ else:
27
+ lines.append(f"{prefix}- {data}")
28
+ return lines
29
+ sections = []
30
+ for section_name, content in document.items():
31
+ title = section_name.replace("_", " ").capitalize()
32
+ sections.extend(format_section(title, content))
33
+ sections.append("")
34
+ return "\n".join(sections)
35
+
36
+
37
+ def read_system_prompt(file_path):
38
+ with open(file_path, 'r', encoding='utf-8') as file:
39
+ return file.read()
40
+
41
+ def load_pdf(pdf_path):
42
+ loader = PyPDFLoader(pdf_path)
43
+ pages = loader.load_and_split()
44
+ cv_text = ""
45
+ for page in pages:
46
+ cv_text += page.page_content + "\n\n"
47
+ return cv_text
48
+
49
+ #########################################################################################################
50
+ # modéles
51
+
52
+ """GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
53
+ model_google = "gemini/gemma-3-27b-it"
54
+ def chat_gemini():
55
+ llm = ChatGoogleGenerativeAI("gemini/gemma-3-27b-it")"""
56
+
57
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
58
+ model_openai = "gpt-4o"
59
+
60
+ def crew_openai():
61
+ llm = ChatOpenAI(
62
+ model="gpt-4o-mini",
63
+ temperature=0.1,
64
+ api_key=OPENAI_API_KEY
65
+ )
66
+ return llm
67
+
68
+ def chat_openai():
69
+ llm = ChatOpenAI(
70
+ model="gpt-4o",
71
+ temperature=0.6,
72
+ api_key=OPENAI_API_KEY
73
+ )
74
+ return llm
src/models.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Dict, Any, Optional
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+ def load_all_models() -> Dict[str, Any]:
7
+ models = {
8
+ "status": False,
9
+ "deep_learning_analyzer": None,
10
+ "rag_handler": None,
11
+ "llm": None
12
+ }
13
+
14
+ try:
15
+ from src.core.deep_learning_analyzer import MultiModelInterviewAnalyzer
16
+ models["deep_learning_analyzer"] = MultiModelInterviewAnalyzer()
17
+ logger.info("✅ Deep Learning Analyzer chargé")
18
+ except Exception as e:
19
+ logger.error(f"❌ Erreur chargement Deep Learning Analyzer: {e}")
20
+
21
+ try:
22
+ from src.core.rag_handler import get_rag_handler
23
+ models["rag_handler"] = get_rag_handler()
24
+ logger.info("✅ RAG Handler chargé")
25
+ except Exception as e:
26
+ logger.error(f"❌ Erreur chargement RAG Handler: {e}")
27
+
28
+ try:
29
+ from src.config import crew_openai
30
+ models["llm"] = crew_openai()
31
+ logger.info("✅ LLM chargé")
32
+ except Exception as e:
33
+ logger.error(f"❌ Erreur chargement LLM: {e}")
34
+
35
+ models["status"] = all(v is not None for k, v in models.items() if k != "status")
36
+
37
+ return models
src/services/cv_service.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ from datetime import datetime
5
+ from typing import Dict, Any, List
6
+ from src.config import load_pdf
7
+ from src.agents.cv_agents import CVAgentOrchestrator
8
+ from src.agents.scoring_agent import SimpleScoringAgent
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ async def parse_cv(pdf_path: str, user_id: str = None) -> Dict[str, Any]:
13
+ # Initialize orchestrator and scoring agent here or pass them as arguments
14
+ # For simplicity, initializing them here. In a real app, consider dependency injection.
15
+ orchestrator = CVAgentOrchestrator(llm=None) # LLM will be passed to agents directly
16
+ scoring_agent = SimpleScoringAgent()
17
+
18
+ cv_text = load_pdf(pdf_path)
19
+ if not cv_text or not cv_text.strip():
20
+ return _create_fallback_data()
21
+
22
+ logger.info(f"CV text loaded: {len(cv_text)} characters")
23
+ sections = orchestrator.split_cv_sections(cv_text)
24
+ logger.info(f"Sections extracted: {list(sections.keys())}")
25
+ cv_data = orchestrator.extract_all_sections(sections)
26
+ logger.info(f"CV data extracted: {cv_data is not None}")
27
+
28
+ if not cv_data or not cv_data.get("candidat") or not _is_valid_extraction(cv_data):
29
+ logger.warning("Agent extraction failed or incomplete, using fallback extraction")
30
+ return _create_fallback_data()
31
+
32
+ logger.info("Calculating skill levels...")
33
+ scores = scoring_agent.calculate_scores(cv_data["candidat"])
34
+ if scores and scores.get("analyse_competences"):
35
+ cv_data["candidat"].update(scores)
36
+ skills_count = len(scores.get("analyse_competences", []))
37
+ levels_summary = _get_levels_summary(scores.get("analyse_competences", []))
38
+ logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}")
39
+ else:
40
+ logger.warning("No skill levels calculated, adding empty analysis")
41
+ cv_data["candidat"]["analyse_competences"] = []
42
+
43
+ return cv_data
44
+
45
+ def _create_fallback_data() -> Dict[str, Any]:
46
+ return {
47
+ "candidat": {
48
+ "informations_personnelles": {
49
+ "nom": "Données non extraites",
50
+ "email": "N/A",
51
+ "numero_de_telephone": "N/A",
52
+ "localisation": "N/A"
53
+ },
54
+ "compétences": {
55
+ "hard_skills": [],
56
+ "soft_skills": []
57
+ },
58
+ "expériences": [],
59
+ "projets": [],
60
+ "formations": [],
61
+ "reconversion": {
62
+ "is_reconversion": False,
63
+ "analysis": "N/A"
64
+ },
65
+ "analyse_competences": []
66
+ }
67
+ }
68
+
69
+ def _get_levels_summary(competences: List[Dict[str, Any]]) -> str:
70
+ levels_count = {}
71
+ for comp in competences:
72
+ level = comp.get("level", "unknown")
73
+ levels_count[level] = levels_count.get(level, 0) + 1
74
+ return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
75
+
76
+ def _is_valid_extraction(cv_data: Dict[str, Any]) -> bool:
77
+ candidat = cv_data.get("candidat", {})
78
+ has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
79
+ has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
80
+ candidat.get("compétences", {}).get("soft_skills", []))
81
+ has_experience = bool(candidat.get("expériences", []))
82
+ return has_info or has_skills or has_experience