YoussefMorad1 commited on
Commit
d46f9de
·
1 Parent(s): 0c1528f

Add application file

Browse files
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+
14
+ ENV PORT=7860
15
+
16
+ # ⬇️ FINAL change: run your mounted app from main.py
17
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from skills_extraction.skills_extraction import app as skills_app
3
+ from semantic_similarity.semantic_similarity import app as similarity_app
4
+
5
+ main_app = FastAPI()
6
+
7
+ # Mount the two apps under different routes
8
+ main_app.mount("/skills", skills_app)
9
+ main_app.mount("/similarity", similarity_app)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ pydantic
6
+ numpy
7
+ sentence_transformers
semantic_similarity/DTOs.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, List
2
+ from pydantic import BaseModel
3
+ from .models import *
4
+
5
+
6
+ # Needed for testing
7
+ class SkillPairInput(BaseModel):
8
+ skill1: str
9
+ skill2: str
10
+
11
+
12
+ # Skill Matching
13
+ class SkillsMatchingRequest(BaseModel):
14
+ jobSkills: List[JobSkill]
15
+ userSkills: List[UserSkill]
16
+ similarityThreshold: Optional[float] = 0.7
17
+
18
+
19
+ class MatchedSkill(BaseModel):
20
+ jobSkill: JobSkill
21
+ userSkill: UserSkill
22
+ similarity: float
23
+
24
+
25
+ class MatchingSkillsResponse(BaseModel):
26
+ matchedSkills: List[MatchedSkill]
27
+ unmatchedJobSkills: List[JobSkill]
28
+ unmatchedUserSkills: List[UserSkill]
29
+
30
+
31
+ # Project Matching
32
+
33
+ class ProjectsMatchingRequest(BaseModel):
34
+ jobSkills: List[JobSkill]
35
+ projects: List[Project]
36
+ similarityThreshold: Optional[float] = 0.7
37
+
38
+
39
+ class MatchedProjectSkill(BaseModel):
40
+ jobSkill: JobSkill
41
+ projectSkill: ProjectSkill
42
+ similarity: float
43
+
44
+
45
+ class MatchedProject(BaseModel):
46
+ project: Project
47
+ matchedSkills: List[MatchedProjectSkill]
48
+ matchedSkillsCount: int
49
+
50
+
51
+ class MatchingProjectsResponse(BaseModel):
52
+ allAnalyzedProjects: List[MatchedProject]
semantic_similarity/__init__.py ADDED
File without changes
semantic_similarity/__pycache__/DTOs.cpython-39.pyc ADDED
Binary file (2.23 kB). View file
 
semantic_similarity/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (171 Bytes). View file
 
semantic_similarity/__pycache__/models.cpython-39.pyc ADDED
Binary file (1.04 kB). View file
 
semantic_similarity/__pycache__/semantic_similarity.cpython-39.pyc ADDED
Binary file (3.33 kB). View file
 
semantic_similarity/models.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+
5
+ class BaseSkill(BaseModel):
6
+ id: int
7
+ skill: str
8
+
9
+
10
+ class UserSkill(BaseSkill):
11
+ pass
12
+
13
+
14
+ class ProjectSkill(BaseSkill):
15
+ pass
16
+
17
+
18
+ class JobSkill(BaseSkill):
19
+ pass
20
+
21
+
22
+ class Project(BaseModel):
23
+ id: int
24
+ skills: List[ProjectSkill]
semantic_similarity/semantic_similarity.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from fastapi import FastAPI
3
+ from sentence_transformers import SentenceTransformer, util
4
+ from .models import *
5
+ from .DTOs import *
6
+
7
+ app = FastAPI(title="Skill Embedding API")
8
+
9
+ # Load model once at startup
10
+ # You can replace this with, https://huggingface.co/burakkececi/bert-software-engineering ?
11
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
12
+
13
+
14
+ # --- Endpoints ---
15
+
16
+ @app.post("/similarity")
17
+ def similarity(pair: SkillPairInput):
18
+ emb1 = model.encode(pair.skill1, convert_to_tensor=True)
19
+ emb2 = model.encode(pair.skill2, convert_to_tensor=True)
20
+ sim_score = util.cos_sim(emb1, emb2).item()
21
+ return {
22
+ "skill1": pair.skill1,
23
+ "skill2": pair.skill2,
24
+ "similarity": sim_score
25
+ }
26
+
27
+
28
+ @app.post("/match-skills", response_model=MatchingSkillsResponse)
29
+ def match_skills(req: SkillsMatchingRequest):
30
+ job_skills = req.jobSkills
31
+ user_skills = req.userSkills
32
+ threshold = req.similarityThreshold
33
+
34
+ job_texts = [j.skill for j in job_skills]
35
+ user_texts = [u.skill for u in user_skills]
36
+
37
+ job_embeddings = model.encode(job_texts, convert_to_tensor=True)
38
+ user_embeddings = model.encode(user_texts, convert_to_tensor=True)
39
+
40
+ similarity_matrix = util.cos_sim(job_embeddings, user_embeddings)
41
+
42
+ matched = []
43
+ unmatched_job_indices = set(range(len(job_skills)))
44
+ unmatched_user_indices = set(range(len(user_skills)))
45
+
46
+ for i, job_row in enumerate(similarity_matrix):
47
+ best_idx = int(job_row.argmax())
48
+ best_score = float(job_row[best_idx])
49
+
50
+ if best_score >= threshold:
51
+ matched.append(MatchedSkill(
52
+ jobSkill=job_skills[i],
53
+ userSkill=user_skills[best_idx],
54
+ similarity=best_score
55
+ ))
56
+ unmatched_job_indices.discard(i)
57
+ unmatched_user_indices.discard(best_idx)
58
+
59
+ unmatched_jobs = [job_skills[i] for i in unmatched_job_indices]
60
+ unmatched_users = [user_skills[i] for i in unmatched_user_indices]
61
+
62
+ return MatchingSkillsResponse(
63
+ matchedSkills=matched,
64
+ unmatchedJobSkills=unmatched_jobs,
65
+ unmatchedUserSkills=unmatched_users
66
+ )
67
+
68
+
69
+ @app.post("/match-projects-skills", response_model=MatchingProjectsResponse)
70
+ def match_projects_skills(req: ProjectsMatchingRequest):
71
+ job_skills = req.jobSkills
72
+ projects = req.projects
73
+ threshold = req.similarityThreshold
74
+
75
+ job_texts = [job.skill for job in job_skills]
76
+ job_embeddings = model.encode(job_texts, convert_to_tensor=True)
77
+
78
+ matched_projects: List[MatchedProject] = []
79
+
80
+ for project in projects:
81
+ project_texts = [ps.skill for ps in project.skills]
82
+ project_embeddings = model.encode(project_texts, convert_to_tensor=True)
83
+
84
+ similarity_matrix = util.cos_sim(job_embeddings, project_embeddings)
85
+
86
+ matched_skills: List[MatchedProjectSkill] = []
87
+
88
+ for i, job_skill in enumerate(job_skills):
89
+ job_row = similarity_matrix[i]
90
+ best_idx = int(job_row.argmax())
91
+ best_score = float(job_row[best_idx])
92
+
93
+ if best_score >= threshold:
94
+ matched_skills.append(MatchedProjectSkill(
95
+ jobSkill=job_skill,
96
+ projectSkill=project.skills[best_idx],
97
+ similarity=best_score
98
+ ))
99
+
100
+ matched_projects.append(MatchedProject(
101
+ project=project,
102
+ matchedSkills=matched_skills,
103
+ matchedSkillsCount=len(matched_skills)
104
+ ))
105
+
106
+ return MatchingProjectsResponse(allAnalyzedProjects=matched_projects)
107
+
108
+
109
+ # uvicorn semantic_similarity:app --host 0.0.0.0 --port 8001
110
+ # if __name__ == "__main__":
111
+ # uvicorn.run(app, host="0.0.0.0", port=8001, reload=False)
skills_extraction/__pycache__/skills_extraction.cpython-39.pyc ADDED
Binary file (1.85 kB). View file
 
skills_extraction/skills_extraction.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import numpy as np
4
+ from fastapi import FastAPI
5
+ from pydantic import BaseModel
6
+ from transformers import pipeline
7
+
8
+ # Load models and tokenizers
9
+ knowledge_nlp = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="first")
10
+ skill_nlp = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
11
+
12
+ app = FastAPI()
13
+
14
+
15
+ class TextInput(BaseModel):
16
+ jobDescription: str
17
+
18
+
19
+ def convert_from_numpy(predictions):
20
+ for pred in predictions:
21
+ for key, value in pred.items():
22
+ if isinstance(value, (np.float32, np.int32, np.int64)): # Handle NumPy numeric types
23
+ pred[key] = float(value)
24
+ return predictions
25
+
26
+
27
+ def merge_BI_and_get_results(predictions):
28
+ results, curSkill, curScore, curNoWords = [], "", 0, 0
29
+ for pred in predictions:
30
+ if pred['entity_group'] == 'B':
31
+ if curSkill:
32
+ results.append({"name": curSkill, "confidence": curScore / curNoWords}) # Average confidence
33
+ curSkill, curScore, curNoWords = pred['word'], pred['score'], 1
34
+ else:
35
+ curSkill += " " + pred['word']
36
+ curScore += pred['score']
37
+ curNoWords += 1
38
+ if curSkill:
39
+ results.append({"name": curSkill, "confidence": curScore / curNoWords})
40
+ return results
41
+
42
+
43
+ @app.post("/predict_knowledge")
44
+ def predict_knowledge(input_data: TextInput):
45
+ predictions = knowledge_nlp(input_data.jobDescription)
46
+ predictions = convert_from_numpy(predictions)
47
+ # print(json.dumps(predictions, indent=2))
48
+ return {"knowledge_predictions": merge_BI_and_get_results(predictions)}
49
+
50
+
51
+ @app.post("/predict_skills")
52
+ def predict_skills(input_data: TextInput):
53
+ predictions = skill_nlp(input_data.jobDescription)
54
+ predictions = convert_from_numpy(predictions)
55
+ # print(json.dumps(predictions, indent=2))
56
+ return {"skills_predictions": merge_BI_and_get_results(predictions)}
57
+
58
+ # Run with:
59
+ # uvicorn main:app --host 0.0.0.0 --port 8000