Spaces:

YoussefMorad1
/

instacv_gp

Running on CPU Upgrade

App Files Files Community

YoussefMorad1 commited on Apr 27

Commit

d46f9de

1 Parent(s): 0c1528f

Add application file

Browse files

Files changed (13) hide show

Dockerfile +17 -0
main.py +9 -0
requirements.txt +7 -0
semantic_similarity/DTOs.py +52 -0
semantic_similarity/__init__.py +0 -0
semantic_similarity/__pycache__/DTOs.cpython-39.pyc +0 -0
semantic_similarity/__pycache__/__init__.cpython-39.pyc +0 -0
semantic_similarity/__pycache__/models.cpython-39.pyc +0 -0
semantic_similarity/__pycache__/semantic_similarity.cpython-39.pyc +0 -0
semantic_similarity/models.py +24 -0
semantic_similarity/semantic_similarity.py +111 -0
skills_extraction/__pycache__/skills_extraction.cpython-39.pyc +0 -0
skills_extraction/skills_extraction.py +59 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+ENV PORT=7860
+# ⬇️ FINAL change: run your mounted app from main.py
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from fastapi import FastAPI
+from skills_extraction.skills_extraction import app as skills_app
+from semantic_similarity.semantic_similarity import app as similarity_app
+main_app = FastAPI()
+# Mount the two apps under different routes
+main_app.mount("/skills", skills_app)
+main_app.mount("/similarity", similarity_app)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+transformers
+torch
+pydantic
+numpy
+sentence_transformers

semantic_similarity/DTOs.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Optional, List
+from pydantic import BaseModel
+from .models import *
+# Needed for testing
+class SkillPairInput(BaseModel):
+    skill1: str
+    skill2: str
+# Skill Matching
+class SkillsMatchingRequest(BaseModel):
+    jobSkills: List[JobSkill]
+    userSkills: List[UserSkill]
+    similarityThreshold: Optional[float] = 0.7
+class MatchedSkill(BaseModel):
+    jobSkill: JobSkill
+    userSkill: UserSkill
+    similarity: float
+class MatchingSkillsResponse(BaseModel):
+    matchedSkills: List[MatchedSkill]
+    unmatchedJobSkills: List[JobSkill]
+    unmatchedUserSkills: List[UserSkill]
+# Project Matching
+class ProjectsMatchingRequest(BaseModel):
+    jobSkills: List[JobSkill]
+    projects: List[Project]
+    similarityThreshold: Optional[float] = 0.7
+class MatchedProjectSkill(BaseModel):
+    jobSkill: JobSkill
+    projectSkill: ProjectSkill
+    similarity: float
+class MatchedProject(BaseModel):
+    project: Project
+    matchedSkills: List[MatchedProjectSkill]
+    matchedSkillsCount: int
+class MatchingProjectsResponse(BaseModel):
+    allAnalyzedProjects: List[MatchedProject]

semantic_similarity/__init__.py ADDED Viewed

File without changes

semantic_similarity/__pycache__/DTOs.cpython-39.pyc ADDED Viewed

Binary file (2.23 kB). View file

semantic_similarity/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (171 Bytes). View file

semantic_similarity/__pycache__/models.cpython-39.pyc ADDED Viewed

Binary file (1.04 kB). View file

semantic_similarity/__pycache__/semantic_similarity.cpython-39.pyc ADDED Viewed

Binary file (3.33 kB). View file

semantic_similarity/models.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from pydantic import BaseModel
+from typing import List
+class BaseSkill(BaseModel):
+    id: int
+    skill: str
+class UserSkill(BaseSkill):
+    pass
+class ProjectSkill(BaseSkill):
+    pass
+class JobSkill(BaseSkill):
+    pass
+class Project(BaseModel):
+    id: int
+    skills: List[ProjectSkill]

semantic_similarity/semantic_similarity.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import uvicorn
+from fastapi import FastAPI
+from sentence_transformers import SentenceTransformer, util
+from .models import *
+from .DTOs import *
+app = FastAPI(title="Skill Embedding API")
+# Load model once at startup
+# You can replace this with, https://huggingface.co/burakkececi/bert-software-engineering ?
+model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# --- Endpoints ---
+@app.post("/similarity")
+def similarity(pair: SkillPairInput):
+    emb1 = model.encode(pair.skill1, convert_to_tensor=True)
+    emb2 = model.encode(pair.skill2, convert_to_tensor=True)
+    sim_score = util.cos_sim(emb1, emb2).item()
+    return {
+        "skill1": pair.skill1,
+        "skill2": pair.skill2,
+        "similarity": sim_score
+    }
+@app.post("/match-skills", response_model=MatchingSkillsResponse)
+def match_skills(req: SkillsMatchingRequest):
+    job_skills = req.jobSkills
+    user_skills = req.userSkills
+    threshold = req.similarityThreshold
+    job_texts = [j.skill for j in job_skills]
+    user_texts = [u.skill for u in user_skills]
+    job_embeddings = model.encode(job_texts, convert_to_tensor=True)
+    user_embeddings = model.encode(user_texts, convert_to_tensor=True)
+    similarity_matrix = util.cos_sim(job_embeddings, user_embeddings)
+    matched = []
+    unmatched_job_indices = set(range(len(job_skills)))
+    unmatched_user_indices = set(range(len(user_skills)))
+    for i, job_row in enumerate(similarity_matrix):
+        best_idx = int(job_row.argmax())
+        best_score = float(job_row[best_idx])
+        if best_score >= threshold:
+            matched.append(MatchedSkill(
+                jobSkill=job_skills[i],
+                userSkill=user_skills[best_idx],
+                similarity=best_score
+            ))
+            unmatched_job_indices.discard(i)
+            unmatched_user_indices.discard(best_idx)
+    unmatched_jobs = [job_skills[i] for i in unmatched_job_indices]
+    unmatched_users = [user_skills[i] for i in unmatched_user_indices]
+    return MatchingSkillsResponse(
+        matchedSkills=matched,
+        unmatchedJobSkills=unmatched_jobs,
+        unmatchedUserSkills=unmatched_users
+    )
+@app.post("/match-projects-skills", response_model=MatchingProjectsResponse)
+def match_projects_skills(req: ProjectsMatchingRequest):
+    job_skills = req.jobSkills
+    projects = req.projects
+    threshold = req.similarityThreshold
+    job_texts = [job.skill for job in job_skills]
+    job_embeddings = model.encode(job_texts, convert_to_tensor=True)
+    matched_projects: List[MatchedProject] = []
+    for project in projects:
+        project_texts = [ps.skill for ps in project.skills]
+        project_embeddings = model.encode(project_texts, convert_to_tensor=True)
+        similarity_matrix = util.cos_sim(job_embeddings, project_embeddings)
+        matched_skills: List[MatchedProjectSkill] = []
+        for i, job_skill in enumerate(job_skills):
+            job_row = similarity_matrix[i]
+            best_idx = int(job_row.argmax())
+            best_score = float(job_row[best_idx])
+            if best_score >= threshold:
+                matched_skills.append(MatchedProjectSkill(
+                    jobSkill=job_skill,
+                    projectSkill=project.skills[best_idx],
+                    similarity=best_score
+                ))
+        matched_projects.append(MatchedProject(
+            project=project,
+            matchedSkills=matched_skills,
+            matchedSkillsCount=len(matched_skills)
+        ))
+    return MatchingProjectsResponse(allAnalyzedProjects=matched_projects)
+# uvicorn semantic_similarity:app --host 0.0.0.0 --port 8001
+# if __name__ == "__main__":
+#     uvicorn.run(app, host="0.0.0.0", port=8001, reload=False)

skills_extraction/__pycache__/skills_extraction.cpython-39.pyc ADDED Viewed

Binary file (1.85 kB). View file

skills_extraction/skills_extraction.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import json
+import numpy as np
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import pipeline
+# Load models and tokenizers
+knowledge_nlp = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="first")
+skill_nlp = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first")
+app = FastAPI()
+class TextInput(BaseModel):
+    jobDescription: str
+def convert_from_numpy(predictions):
+    for pred in predictions:
+        for key, value in pred.items():
+            if isinstance(value, (np.float32, np.int32, np.int64)):  # Handle NumPy numeric types
+                pred[key] = float(value)
+    return predictions
+def merge_BI_and_get_results(predictions):
+    results, curSkill, curScore, curNoWords = [], "", 0, 0
+    for pred in predictions:
+        if pred['entity_group'] == 'B':
+            if curSkill:
+                results.append({"name": curSkill, "confidence": curScore / curNoWords})  # Average confidence
+            curSkill, curScore, curNoWords = pred['word'], pred['score'], 1
+        else:
+            curSkill += " " + pred['word']
+            curScore += pred['score']
+            curNoWords += 1
+    if curSkill:
+        results.append({"name": curSkill, "confidence": curScore / curNoWords})
+    return results
+@app.post("/predict_knowledge")
+def predict_knowledge(input_data: TextInput):
+    predictions = knowledge_nlp(input_data.jobDescription)
+    predictions = convert_from_numpy(predictions)
+    # print(json.dumps(predictions, indent=2))
+    return {"knowledge_predictions": merge_BI_and_get_results(predictions)}
+@app.post("/predict_skills")
+def predict_skills(input_data: TextInput):
+    predictions = skill_nlp(input_data.jobDescription)
+    predictions = convert_from_numpy(predictions)
+    # print(json.dumps(predictions, indent=2))
+    return {"skills_predictions": merge_BI_and_get_results(predictions)}
+# Run with:
+# uvicorn main:app --host 0.0.0.0 --port 8000