Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- main.py +70 -0
- requirements.txt +23 -0
- src/agents/cv_agents.py +251 -0
- src/agents/scoring_agent.py +183 -0
- src/config.py +74 -0
- src/models.py +37 -0
- src/services/cv_service.py +82 -0
main.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException, Query
|
6 |
+
from fastapi.concurrency import run_in_threadpool
|
7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
8 |
+
|
9 |
+
from src.services.cv_service import parse_cv
|
10 |
+
|
11 |
+
logging.basicConfig(level=logging.INFO)
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
app = FastAPI(
|
15 |
+
title="CV Parser API",
|
16 |
+
description="API for parsing CVs.",
|
17 |
+
version="1.0.0",
|
18 |
+
docs_url="/docs",
|
19 |
+
redoc_url="/redoc"
|
20 |
+
)
|
21 |
+
|
22 |
+
app.add_middleware(
|
23 |
+
CORSMiddleware,
|
24 |
+
allow_origins=["*"],
|
25 |
+
allow_credentials=True,
|
26 |
+
allow_methods=["*"],
|
27 |
+
allow_headers=["*"],
|
28 |
+
)
|
29 |
+
|
30 |
+
from pydantic import BaseModel
|
31 |
+
|
32 |
+
class HealthCheck(BaseModel):
|
33 |
+
status: str = "ok"
|
34 |
+
|
35 |
+
@app.get("/", response_model=HealthCheck, tags=["Status"])
|
36 |
+
async def health_check():
|
37 |
+
return HealthCheck()
|
38 |
+
|
39 |
+
@app.post("/parse-cv/", tags=["CV Parsing"])
|
40 |
+
async def parse_cv_endpoint(
|
41 |
+
file: UploadFile = File(...),
|
42 |
+
user_id: str = Query(None, description="ID of the user to link the CV to")
|
43 |
+
):
|
44 |
+
"""
|
45 |
+
Parses a CV file (PDF) and returns the parsed data.
|
46 |
+
"""
|
47 |
+
if file.content_type != "application/pdf":
|
48 |
+
raise HTTPException(status_code=400, detail="PDF file required")
|
49 |
+
|
50 |
+
contents = await file.read()
|
51 |
+
|
52 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
53 |
+
tmp.write(contents)
|
54 |
+
tmp_path = tmp.name
|
55 |
+
|
56 |
+
try:
|
57 |
+
result = await run_in_threadpool(parse_cv, tmp_path, user_id)
|
58 |
+
finally:
|
59 |
+
if os.path.exists(tmp_path):
|
60 |
+
os.remove(tmp_path)
|
61 |
+
|
62 |
+
if not result:
|
63 |
+
raise HTTPException(status_code=500, detail="Failed to extract data from CV.")
|
64 |
+
|
65 |
+
return result
|
66 |
+
|
67 |
+
if __name__ == "__main__":
|
68 |
+
import uvicorn
|
69 |
+
port = int(os.getenv("PORT", 8001)) # Use PORT environment variable, default to 8001
|
70 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
requirements.txt
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn[standard]
|
3 |
+
pydantic
|
4 |
+
python-multipart
|
5 |
+
|
6 |
+
langchain-core
|
7 |
+
langchain-community
|
8 |
+
langchain-openai
|
9 |
+
langchain_groq
|
10 |
+
langchain-huggingface
|
11 |
+
crewai
|
12 |
+
crewai-tools
|
13 |
+
sentence_transformers
|
14 |
+
torch
|
15 |
+
transformers
|
16 |
+
sentencepiece
|
17 |
+
accelerate
|
18 |
+
pypdf
|
19 |
+
python-dotenv
|
20 |
+
requests
|
21 |
+
faiss-cpu
|
22 |
+
|
23 |
+
httpx==0.28.1
|
src/agents/cv_agents.py
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
from crewai import Agent, Task, Crew, Process
|
5 |
+
|
6 |
+
logger = logging.getLogger(__name__)
|
7 |
+
|
8 |
+
class CVAgentOrchestrator:
|
9 |
+
def __init__(self, llm):
|
10 |
+
self.llm = llm
|
11 |
+
self._create_agents()
|
12 |
+
|
13 |
+
def _create_agents(self):
|
14 |
+
self.section_splitter = Agent(
|
15 |
+
role="Analyseur de Structure de CV",
|
16 |
+
goal="Découper intelligemment un CV en sections thématiques",
|
17 |
+
backstory="Expert en analyse documentaire spécialisé dans la reconnaissance de structures de CV.",
|
18 |
+
verbose=False,
|
19 |
+
llm=self.llm
|
20 |
+
)
|
21 |
+
|
22 |
+
self.contact_extractor = Agent(
|
23 |
+
role="Extracteur d'informations de contact",
|
24 |
+
goal="Extraire les coordonnées du candidat",
|
25 |
+
backstory="Expert en extraction d'informations de contact avec précision.",
|
26 |
+
verbose=False,
|
27 |
+
llm=self.llm
|
28 |
+
)
|
29 |
+
|
30 |
+
self.skills_extractor = Agent(
|
31 |
+
role="Extracteur de compétences",
|
32 |
+
goal="Identifier hard skills et soft skills",
|
33 |
+
backstory="Spécialiste en identification de compétences techniques et comportementales.",
|
34 |
+
verbose=False,
|
35 |
+
llm=self.llm
|
36 |
+
)
|
37 |
+
|
38 |
+
self.experience_extractor = Agent(
|
39 |
+
role="Extracteur d'expériences",
|
40 |
+
goal="Extraire les expériences professionnelles",
|
41 |
+
backstory="Expert en analyse de parcours professionnels.",
|
42 |
+
verbose=False,
|
43 |
+
llm=self.llm
|
44 |
+
)
|
45 |
+
|
46 |
+
self.project_extractor = Agent(
|
47 |
+
role="Extracteur de projets",
|
48 |
+
goal="Identifier projets professionnels et personnels",
|
49 |
+
backstory="Spécialiste en identification de projets significatifs.",
|
50 |
+
verbose=False,
|
51 |
+
llm=self.llm
|
52 |
+
)
|
53 |
+
|
54 |
+
self.education_extractor = Agent(
|
55 |
+
role="Extracteur de formations",
|
56 |
+
goal="Extraire formations et diplômes",
|
57 |
+
backstory="Expert en analyse de parcours académiques.",
|
58 |
+
verbose=False,
|
59 |
+
llm=self.llm
|
60 |
+
)
|
61 |
+
|
62 |
+
self.reconversion_detector = Agent(
|
63 |
+
role="Détecteur de reconversion",
|
64 |
+
goal="Analyser les changements de carrière",
|
65 |
+
backstory="Conseiller d'orientation expert en transitions de carrière.",
|
66 |
+
verbose=False,
|
67 |
+
llm=self.llm
|
68 |
+
)
|
69 |
+
|
70 |
+
self.profile_builder = Agent(
|
71 |
+
role="Constructeur de profil",
|
72 |
+
goal="Assembler le profil candidat final",
|
73 |
+
backstory="Expert en structuration de données JSON.",
|
74 |
+
verbose=False,
|
75 |
+
llm=self.llm
|
76 |
+
)
|
77 |
+
|
78 |
+
def split_cv_sections(self, cv_content: str) -> Dict[str, str]:
|
79 |
+
task = Task(
|
80 |
+
description=f"Analyser ce CV et l'organiser en sections: {cv_content}",
|
81 |
+
expected_output="""JSON avec sections: contact, experiences, projects, education, skills, other""",
|
82 |
+
agent=self.section_splitter
|
83 |
+
)
|
84 |
+
|
85 |
+
crew = Crew(
|
86 |
+
agents=[self.section_splitter],
|
87 |
+
tasks=[task],
|
88 |
+
process=Process.sequential,
|
89 |
+
verbose=False,
|
90 |
+
telemetry=False
|
91 |
+
)
|
92 |
+
|
93 |
+
result = crew.kickoff()
|
94 |
+
return self._parse_sections_result(result)
|
95 |
+
|
96 |
+
def extract_all_sections(self, sections: Dict[str, str]) -> Dict[str, Any]:
|
97 |
+
# Créer les tâches avec les sections en input
|
98 |
+
tasks = self._create_extraction_tasks(sections)
|
99 |
+
|
100 |
+
crew = Crew(
|
101 |
+
agents=[
|
102 |
+
self.contact_extractor,
|
103 |
+
self.skills_extractor,
|
104 |
+
self.experience_extractor,
|
105 |
+
self.project_extractor,
|
106 |
+
self.education_extractor,
|
107 |
+
self.reconversion_detector,
|
108 |
+
self.profile_builder
|
109 |
+
],
|
110 |
+
tasks=tasks,
|
111 |
+
process=Process.sequential,
|
112 |
+
verbose=True, # Activer pour debug
|
113 |
+
telemetry=False
|
114 |
+
)
|
115 |
+
|
116 |
+
# Passer les sections comme inputs
|
117 |
+
inputs = {
|
118 |
+
"contact": sections.get("contact", ""),
|
119 |
+
"experiences": sections.get("experiences", ""),
|
120 |
+
"projects": sections.get("projects", ""),
|
121 |
+
"education": sections.get("education", ""),
|
122 |
+
"skills": sections.get("skills", ""),
|
123 |
+
"other": sections.get("other", "")
|
124 |
+
}
|
125 |
+
|
126 |
+
logger.info(f"Starting crew with inputs: {list(inputs.keys())}")
|
127 |
+
result = crew.kickoff(inputs=inputs)
|
128 |
+
logger.info(f"Crew completed. Raw result: {result.raw if hasattr(result, 'raw') else str(result)[:200]}...")
|
129 |
+
|
130 |
+
return self._parse_final_result(result)
|
131 |
+
|
132 |
+
def _create_extraction_tasks(self, sections: Dict[str, str]) -> List[Task]:
|
133 |
+
contact_task = Task(
|
134 |
+
description=(
|
135 |
+
"Voici la section contact du CV : {contact}\n"
|
136 |
+
"Extraire précisément le nom, email, téléphone et localisation du candidat."
|
137 |
+
),
|
138 |
+
expected_output='{"nom": "...", "email": "...", "numero_de_telephone": "...", "localisation": "..."}',
|
139 |
+
agent=self.contact_extractor
|
140 |
+
)
|
141 |
+
|
142 |
+
skills_task = Task(
|
143 |
+
description=(
|
144 |
+
"Voici les sections pertinentes du CV :\n"
|
145 |
+
"Expériences: {experiences}\n"
|
146 |
+
"Projets: {projects}\n"
|
147 |
+
"Compétences: {skills}\n"
|
148 |
+
"Extraire toutes les compétences techniques (hard skills) et comportementales (soft skills) mentionnées."
|
149 |
+
),
|
150 |
+
expected_output='{"hard_skills": ["compétence1", "compétence2"], "soft_skills": ["compétence1", "compétence2"]}',
|
151 |
+
agent=self.skills_extractor
|
152 |
+
)
|
153 |
+
|
154 |
+
experience_task = Task(
|
155 |
+
description=(
|
156 |
+
"Voici la section expériences du CV : {experiences}\n"
|
157 |
+
"Extraire toutes les expériences professionnelles avec poste, entreprise, dates et responsabilités."
|
158 |
+
),
|
159 |
+
expected_output='[{"Poste": "titre", "Entreprise": "nom", "start_date": "date", "end_date": "date", "responsabilités": ["resp1", "resp2"]}]',
|
160 |
+
agent=self.experience_extractor
|
161 |
+
)
|
162 |
+
|
163 |
+
project_task = Task(
|
164 |
+
description=(
|
165 |
+
"Voici les sections projets et expériences du CV :\n"
|
166 |
+
"Projets: {projects}\n"
|
167 |
+
"Identifier et extraire les projets professionnels et personnels distincts des responsabilités générales."
|
168 |
+
),
|
169 |
+
expected_output='{"professional": [{"title": "titre", "technologies": ["tech1"], "outcomes": ["résultat1"]}], "personal": []}',
|
170 |
+
agent=self.project_extractor
|
171 |
+
)
|
172 |
+
|
173 |
+
education_task = Task(
|
174 |
+
description=(
|
175 |
+
"Voici la section formations du CV : {education}\n"
|
176 |
+
"Extraire toutes les formations, diplômes et certifications avec institution et dates."
|
177 |
+
),
|
178 |
+
expected_output='[{"degree": "diplôme", "institution": "établissement", "start_date": "date", "end_date": "date"}]',
|
179 |
+
agent=self.education_extractor
|
180 |
+
)
|
181 |
+
|
182 |
+
reconversion_task = Task(
|
183 |
+
description=(
|
184 |
+
"En analysant les expériences extraites précédemment, déterminer si le candidat est en reconversion professionnelle. "
|
185 |
+
"Chercher des changements de secteur, de type de poste ou des transitions significatives."
|
186 |
+
),
|
187 |
+
expected_output='{"reconversion_analysis": {"is_reconversion": true, "analysis": "Explication détaillée..."}}',
|
188 |
+
agent=self.reconversion_detector,
|
189 |
+
context=[experience_task]
|
190 |
+
)
|
191 |
+
|
192 |
+
profile_task = Task(
|
193 |
+
description=(
|
194 |
+
"Assembler toutes les informations extraites des tâches précédentes en un profil candidat complet. "
|
195 |
+
"Créer un JSON valide avec une clé 'candidat' contenant toutes les sections."
|
196 |
+
),
|
197 |
+
expected_output=(
|
198 |
+
'{"candidat": {'
|
199 |
+
'"informations_personnelles": {...}, '
|
200 |
+
'"compétences": {...}, '
|
201 |
+
'"expériences": [...], '
|
202 |
+
'"projets": {...}, '
|
203 |
+
'"formations": [...], '
|
204 |
+
'"reconversion": {...}'
|
205 |
+
'}}'
|
206 |
+
),
|
207 |
+
agent=self.profile_builder,
|
208 |
+
context=[contact_task, skills_task, experience_task, project_task, education_task, reconversion_task]
|
209 |
+
)
|
210 |
+
|
211 |
+
return [contact_task, skills_task, experience_task, project_task, education_task, reconversion_task, profile_task]
|
212 |
+
|
213 |
+
def _parse_sections_result(self, result) -> Dict[str, str]:
|
214 |
+
result_str = result.raw if hasattr(result, 'raw') else str(result)
|
215 |
+
|
216 |
+
if '```json' in result_str:
|
217 |
+
result_str = result_str.split('```json')[1].split('```')[0].strip()
|
218 |
+
elif '```' in result_str:
|
219 |
+
parts = result_str.split('```')
|
220 |
+
if len(parts) >= 3:
|
221 |
+
result_str = parts[1].strip()
|
222 |
+
|
223 |
+
parsed = json.loads(result_str)
|
224 |
+
|
225 |
+
# Assurer que toutes les sections nécessaires existent
|
226 |
+
default_sections = {
|
227 |
+
"contact": "",
|
228 |
+
"experiences": "",
|
229 |
+
"projects": "",
|
230 |
+
"education": "",
|
231 |
+
"skills": "",
|
232 |
+
"other": ""
|
233 |
+
}
|
234 |
+
|
235 |
+
for key in default_sections:
|
236 |
+
if key not in parsed:
|
237 |
+
parsed[key] = default_sections[key]
|
238 |
+
|
239 |
+
return parsed
|
240 |
+
|
241 |
+
def _parse_final_result(self, result) -> Dict[str, Any]:
|
242 |
+
result_str = result.raw if hasattr(result, 'raw') else str(result)
|
243 |
+
|
244 |
+
if '```json' in result_str:
|
245 |
+
result_str = result_str.split('```json')[1].split('```')[0].strip()
|
246 |
+
elif '```' in result_str:
|
247 |
+
parts = result_str.split('```')
|
248 |
+
if len(parts) >= 3:
|
249 |
+
result_str = parts[1].strip()
|
250 |
+
|
251 |
+
return json.loads(result_str)
|
src/agents/scoring_agent.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
import logging
|
4 |
+
from datetime import datetime
|
5 |
+
from typing import Dict, List, Any
|
6 |
+
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
class SimpleScoringAgent:
|
10 |
+
|
11 |
+
def calculate_scores(self, candidat_data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
|
12 |
+
if not candidat_data or not isinstance(candidat_data, dict):
|
13 |
+
return {"analyse_competences": []}
|
14 |
+
|
15 |
+
skills_data = candidat_data.get("compétences", {})
|
16 |
+
skills_list = self._extract_skills_list(skills_data)
|
17 |
+
|
18 |
+
if not skills_list:
|
19 |
+
return {"analyse_competences": []}
|
20 |
+
|
21 |
+
skill_analysis = []
|
22 |
+
|
23 |
+
for skill in skills_list:
|
24 |
+
level = self._determine_skill_level(skill, candidat_data)
|
25 |
+
skill_analysis.append({
|
26 |
+
"skill": skill,
|
27 |
+
"level": level
|
28 |
+
})
|
29 |
+
|
30 |
+
return {"analyse_competences": skill_analysis}
|
31 |
+
|
32 |
+
def _extract_skills_list(self, skills_data: Dict[str, Any]) -> List[str]:
|
33 |
+
"""Extrait la liste des compétences"""
|
34 |
+
skills_list = []
|
35 |
+
|
36 |
+
if isinstance(skills_data, dict):
|
37 |
+
skills_list.extend(skills_data.get("hard_skills", []))
|
38 |
+
skills_list.extend(skills_data.get("soft_skills", []))
|
39 |
+
elif isinstance(skills_data, list):
|
40 |
+
skills_list = [item.get("nom") for item in skills_data if item.get("nom")]
|
41 |
+
|
42 |
+
return [skill for skill in skills_list if skill and isinstance(skill, str) and skill.strip()]
|
43 |
+
|
44 |
+
def _determine_skill_level(self, skill: str, candidat_data: Dict[str, Any]) -> str:
|
45 |
+
"""Détermine le niveau d'une compétence selon des règles simples"""
|
46 |
+
|
47 |
+
frequency = self._count_skill_mentions(skill, candidat_data)
|
48 |
+
max_duration = self._get_max_duration_for_skill(skill, candidat_data)
|
49 |
+
has_pro_experience = self._has_professional_experience(skill, candidat_data)
|
50 |
+
|
51 |
+
# Règles simples de classification
|
52 |
+
if has_pro_experience and max_duration >= 3.0:
|
53 |
+
return "expert"
|
54 |
+
elif has_pro_experience and max_duration >= 1.0:
|
55 |
+
return "avance"
|
56 |
+
elif frequency >= 3 or max_duration >= 0.5:
|
57 |
+
return "intermediaire"
|
58 |
+
else:
|
59 |
+
return "debutant"
|
60 |
+
|
61 |
+
def _count_skill_mentions(self, skill: str, candidat_data: Dict[str, Any]) -> int:
|
62 |
+
"""Compte le nombre de mentions de la compétence"""
|
63 |
+
skill_lower = skill.lower()
|
64 |
+
total_mentions = 0
|
65 |
+
|
66 |
+
# Recherche dans toutes les sections
|
67 |
+
all_text = self._get_all_text_content(candidat_data).lower()
|
68 |
+
total_mentions = all_text.count(skill_lower)
|
69 |
+
|
70 |
+
return total_mentions
|
71 |
+
|
72 |
+
def _get_max_duration_for_skill(self, skill: str, candidat_data: Dict[str, Any]) -> float:
|
73 |
+
"""Trouve la durée maximum d'utilisation de la compétence"""
|
74 |
+
skill_lower = skill.lower()
|
75 |
+
max_duration = 0.0
|
76 |
+
|
77 |
+
experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
|
78 |
+
experiences = candidat_data.get(experiences_key, [])
|
79 |
+
|
80 |
+
if not isinstance(experiences, list):
|
81 |
+
return 0.0
|
82 |
+
|
83 |
+
for exp in experiences:
|
84 |
+
if not isinstance(exp, dict):
|
85 |
+
continue
|
86 |
+
|
87 |
+
exp_text = json.dumps(exp, ensure_ascii=False).lower()
|
88 |
+
|
89 |
+
if skill_lower in exp_text:
|
90 |
+
duration = self._calculate_experience_duration(exp)
|
91 |
+
max_duration = max(max_duration, duration)
|
92 |
+
|
93 |
+
return max_duration
|
94 |
+
|
95 |
+
def _has_professional_experience(self, skill: str, candidat_data: Dict[str, Any]) -> bool:
|
96 |
+
"""Vérifie si la compétence a été utilisée en contexte professionnel"""
|
97 |
+
skill_lower = skill.lower()
|
98 |
+
|
99 |
+
experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
|
100 |
+
experiences = candidat_data.get(experiences_key, [])
|
101 |
+
|
102 |
+
if not isinstance(experiences, list):
|
103 |
+
return False
|
104 |
+
|
105 |
+
for exp in experiences:
|
106 |
+
if not isinstance(exp, dict):
|
107 |
+
continue
|
108 |
+
|
109 |
+
exp_text = json.dumps(exp, ensure_ascii=False).lower()
|
110 |
+
if skill_lower in exp_text:
|
111 |
+
return True
|
112 |
+
|
113 |
+
return False
|
114 |
+
|
115 |
+
def _get_all_text_content(self, candidat_data: Dict[str, Any]) -> str:
|
116 |
+
"""Récupère tout le contenu textuel du CV"""
|
117 |
+
all_content = []
|
118 |
+
|
119 |
+
# Expériences
|
120 |
+
experiences_key = "expériences" if "expériences" in candidat_data else "experiences_professionnelles"
|
121 |
+
for exp in candidat_data.get(experiences_key, []):
|
122 |
+
if isinstance(exp, dict):
|
123 |
+
all_content.append(json.dumps(exp, ensure_ascii=False))
|
124 |
+
|
125 |
+
# Projets
|
126 |
+
projects = candidat_data.get("projets", {})
|
127 |
+
if isinstance(projects, dict):
|
128 |
+
for project_type in ["professional", "personal"]:
|
129 |
+
for project in projects.get(project_type, []):
|
130 |
+
if isinstance(project, dict):
|
131 |
+
all_content.append(json.dumps(project, ensure_ascii=False))
|
132 |
+
|
133 |
+
# Formations
|
134 |
+
for formation in candidat_data.get("formations", []):
|
135 |
+
if isinstance(formation, dict):
|
136 |
+
all_content.append(json.dumps(formation, ensure_ascii=False))
|
137 |
+
|
138 |
+
return " ".join(all_content)
|
139 |
+
|
140 |
+
def _calculate_experience_duration(self, exp: Dict[str, Any]) -> float:
|
141 |
+
"""Calcule la durée d'une expérience en années"""
|
142 |
+
start_date_str = exp.get("date_debut", exp.get("start_date", ""))
|
143 |
+
end_date_str = exp.get("date_fin", exp.get("end_date", ""))
|
144 |
+
|
145 |
+
if not isinstance(start_date_str, str):
|
146 |
+
start_date_str = str(start_date_str) if start_date_str else ""
|
147 |
+
if not isinstance(end_date_str, str):
|
148 |
+
end_date_str = str(end_date_str) if end_date_str else ""
|
149 |
+
|
150 |
+
return self._calculate_duration_in_years(start_date_str, end_date_str)
|
151 |
+
|
152 |
+
def _calculate_duration_in_years(self, start_date_str: str, end_date_str: str) -> float:
|
153 |
+
"""Calcule la durée entre deux dates en années"""
|
154 |
+
start_date = self._parse_date(start_date_str)
|
155 |
+
end_date = self._parse_date(end_date_str)
|
156 |
+
|
157 |
+
if start_date and end_date:
|
158 |
+
if end_date < start_date:
|
159 |
+
return 0.0
|
160 |
+
return (end_date - start_date).days / 365.25
|
161 |
+
|
162 |
+
return 0.0
|
163 |
+
|
164 |
+
def _parse_date(self, date_str: str) -> datetime:
|
165 |
+
"""Parse une date de manière simple"""
|
166 |
+
if not date_str or not isinstance(date_str, str):
|
167 |
+
return None
|
168 |
+
|
169 |
+
date_str_lower = date_str.lower().strip()
|
170 |
+
if date_str_lower in ["aujourd'hui", "maintenant", "en cours", "current", "présent", "actuellement"]:
|
171 |
+
return datetime.now()
|
172 |
+
|
173 |
+
# Extraction simple de l'année
|
174 |
+
year_match = re.search(r'\b(20\d{2}|19\d{2})\b', date_str)
|
175 |
+
if year_match:
|
176 |
+
year = int(year_match.group(1))
|
177 |
+
return datetime(year, 1, 1)
|
178 |
+
|
179 |
+
return None
|
180 |
+
|
181 |
+
# Alias pour maintenir la compatibilité
|
182 |
+
ScoringAgent = SimpleScoringAgent
|
183 |
+
ImprovedScoringAgent = SimpleScoringAgent
|
src/config.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
load_dotenv()
|
4 |
+
from langchain_groq import ChatGroq
|
5 |
+
from langchain_community.document_loaders import PyPDFLoader
|
6 |
+
from langchain_openai import ChatOpenAI
|
7 |
+
from typing import Dict, List, Any, Tuple, Optional, Type
|
8 |
+
from crewai import LLM
|
9 |
+
#########################################################################################################
|
10 |
+
# formatage du json
|
11 |
+
def format_cv(document):
|
12 |
+
def format_section(title, data, indent=0):
|
13 |
+
prefix = " " * indent
|
14 |
+
lines = [f"{title}:"]
|
15 |
+
if isinstance(data, dict):
|
16 |
+
for k, v in data.items():
|
17 |
+
if isinstance(v, (dict, list)):
|
18 |
+
lines.append(f"{prefix}- {k.capitalize()}:")
|
19 |
+
lines.extend(format_section("", v, indent + 1))
|
20 |
+
else:
|
21 |
+
lines.append(f"{prefix}- {k.capitalize()}: {v}")
|
22 |
+
elif isinstance(data, list):
|
23 |
+
for i, item in enumerate(data):
|
24 |
+
lines.append(f"{prefix}- Élément {i + 1}:")
|
25 |
+
lines.extend(format_section("", item, indent + 1))
|
26 |
+
else:
|
27 |
+
lines.append(f"{prefix}- {data}")
|
28 |
+
return lines
|
29 |
+
sections = []
|
30 |
+
for section_name, content in document.items():
|
31 |
+
title = section_name.replace("_", " ").capitalize()
|
32 |
+
sections.extend(format_section(title, content))
|
33 |
+
sections.append("")
|
34 |
+
return "\n".join(sections)
|
35 |
+
|
36 |
+
|
37 |
+
def read_system_prompt(file_path):
|
38 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
39 |
+
return file.read()
|
40 |
+
|
41 |
+
def load_pdf(pdf_path):
|
42 |
+
loader = PyPDFLoader(pdf_path)
|
43 |
+
pages = loader.load_and_split()
|
44 |
+
cv_text = ""
|
45 |
+
for page in pages:
|
46 |
+
cv_text += page.page_content + "\n\n"
|
47 |
+
return cv_text
|
48 |
+
|
49 |
+
#########################################################################################################
|
50 |
+
# modéles
|
51 |
+
|
52 |
+
"""GEMINI_API_KEY = os.getenv("GOOGLE_API_KEY")
|
53 |
+
model_google = "gemini/gemma-3-27b-it"
|
54 |
+
def chat_gemini():
|
55 |
+
llm = ChatGoogleGenerativeAI("gemini/gemma-3-27b-it")"""
|
56 |
+
|
57 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
58 |
+
model_openai = "gpt-4o"
|
59 |
+
|
60 |
+
def crew_openai():
|
61 |
+
llm = ChatOpenAI(
|
62 |
+
model="gpt-4o-mini",
|
63 |
+
temperature=0.1,
|
64 |
+
api_key=OPENAI_API_KEY
|
65 |
+
)
|
66 |
+
return llm
|
67 |
+
|
68 |
+
def chat_openai():
|
69 |
+
llm = ChatOpenAI(
|
70 |
+
model="gpt-4o",
|
71 |
+
temperature=0.6,
|
72 |
+
api_key=OPENAI_API_KEY
|
73 |
+
)
|
74 |
+
return llm
|
src/models.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from typing import Dict, Any, Optional
|
3 |
+
|
4 |
+
logger = logging.getLogger(__name__)
|
5 |
+
|
6 |
+
def load_all_models() -> Dict[str, Any]:
|
7 |
+
models = {
|
8 |
+
"status": False,
|
9 |
+
"deep_learning_analyzer": None,
|
10 |
+
"rag_handler": None,
|
11 |
+
"llm": None
|
12 |
+
}
|
13 |
+
|
14 |
+
try:
|
15 |
+
from src.core.deep_learning_analyzer import MultiModelInterviewAnalyzer
|
16 |
+
models["deep_learning_analyzer"] = MultiModelInterviewAnalyzer()
|
17 |
+
logger.info("✅ Deep Learning Analyzer chargé")
|
18 |
+
except Exception as e:
|
19 |
+
logger.error(f"❌ Erreur chargement Deep Learning Analyzer: {e}")
|
20 |
+
|
21 |
+
try:
|
22 |
+
from src.core.rag_handler import get_rag_handler
|
23 |
+
models["rag_handler"] = get_rag_handler()
|
24 |
+
logger.info("✅ RAG Handler chargé")
|
25 |
+
except Exception as e:
|
26 |
+
logger.error(f"❌ Erreur chargement RAG Handler: {e}")
|
27 |
+
|
28 |
+
try:
|
29 |
+
from src.config import crew_openai
|
30 |
+
models["llm"] = crew_openai()
|
31 |
+
logger.info("✅ LLM chargé")
|
32 |
+
except Exception as e:
|
33 |
+
logger.error(f"❌ Erreur chargement LLM: {e}")
|
34 |
+
|
35 |
+
models["status"] = all(v is not None for k, v in models.items() if k != "status")
|
36 |
+
|
37 |
+
return models
|
src/services/cv_service.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from datetime import datetime
|
5 |
+
from typing import Dict, Any, List
|
6 |
+
from src.config import load_pdf
|
7 |
+
from src.agents.cv_agents import CVAgentOrchestrator
|
8 |
+
from src.agents.scoring_agent import SimpleScoringAgent
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
async def parse_cv(pdf_path: str, user_id: str = None) -> Dict[str, Any]:
|
13 |
+
# Initialize orchestrator and scoring agent here or pass them as arguments
|
14 |
+
# For simplicity, initializing them here. In a real app, consider dependency injection.
|
15 |
+
orchestrator = CVAgentOrchestrator(llm=None) # LLM will be passed to agents directly
|
16 |
+
scoring_agent = SimpleScoringAgent()
|
17 |
+
|
18 |
+
cv_text = load_pdf(pdf_path)
|
19 |
+
if not cv_text or not cv_text.strip():
|
20 |
+
return _create_fallback_data()
|
21 |
+
|
22 |
+
logger.info(f"CV text loaded: {len(cv_text)} characters")
|
23 |
+
sections = orchestrator.split_cv_sections(cv_text)
|
24 |
+
logger.info(f"Sections extracted: {list(sections.keys())}")
|
25 |
+
cv_data = orchestrator.extract_all_sections(sections)
|
26 |
+
logger.info(f"CV data extracted: {cv_data is not None}")
|
27 |
+
|
28 |
+
if not cv_data or not cv_data.get("candidat") or not _is_valid_extraction(cv_data):
|
29 |
+
logger.warning("Agent extraction failed or incomplete, using fallback extraction")
|
30 |
+
return _create_fallback_data()
|
31 |
+
|
32 |
+
logger.info("Calculating skill levels...")
|
33 |
+
scores = scoring_agent.calculate_scores(cv_data["candidat"])
|
34 |
+
if scores and scores.get("analyse_competences"):
|
35 |
+
cv_data["candidat"].update(scores)
|
36 |
+
skills_count = len(scores.get("analyse_competences", []))
|
37 |
+
levels_summary = _get_levels_summary(scores.get("analyse_competences", []))
|
38 |
+
logger.info(f"Skill levels calculated: {skills_count} skills - {levels_summary}")
|
39 |
+
else:
|
40 |
+
logger.warning("No skill levels calculated, adding empty analysis")
|
41 |
+
cv_data["candidat"]["analyse_competences"] = []
|
42 |
+
|
43 |
+
return cv_data
|
44 |
+
|
45 |
+
def _create_fallback_data() -> Dict[str, Any]:
|
46 |
+
return {
|
47 |
+
"candidat": {
|
48 |
+
"informations_personnelles": {
|
49 |
+
"nom": "Données non extraites",
|
50 |
+
"email": "N/A",
|
51 |
+
"numero_de_telephone": "N/A",
|
52 |
+
"localisation": "N/A"
|
53 |
+
},
|
54 |
+
"compétences": {
|
55 |
+
"hard_skills": [],
|
56 |
+
"soft_skills": []
|
57 |
+
},
|
58 |
+
"expériences": [],
|
59 |
+
"projets": [],
|
60 |
+
"formations": [],
|
61 |
+
"reconversion": {
|
62 |
+
"is_reconversion": False,
|
63 |
+
"analysis": "N/A"
|
64 |
+
},
|
65 |
+
"analyse_competences": []
|
66 |
+
}
|
67 |
+
}
|
68 |
+
|
69 |
+
def _get_levels_summary(competences: List[Dict[str, Any]]) -> str:
|
70 |
+
levels_count = {}
|
71 |
+
for comp in competences:
|
72 |
+
level = comp.get("level", "unknown")
|
73 |
+
levels_count[level] = levels_count.get(level, 0) + 1
|
74 |
+
return ", ".join([f"{count} {level}" for level, count in levels_count.items()])
|
75 |
+
|
76 |
+
def _is_valid_extraction(cv_data: Dict[str, Any]) -> bool:
|
77 |
+
candidat = cv_data.get("candidat", {})
|
78 |
+
has_info = bool(candidat.get("informations_personnelles", {}).get("nom", "").strip())
|
79 |
+
has_skills = bool(candidat.get("compétences", {}).get("hard_skills", []) or
|
80 |
+
candidat.get("compétences", {}).get("soft_skills", []))
|
81 |
+
has_experience = bool(candidat.get("expériences", []))
|
82 |
+
return has_info or has_skills or has_experience
|