import os from uuid import uuid4 import uvicorn from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware import aiofiles import PyPDF2 from langchain_openai import ChatOpenAI from langchain.schema import HumanMessage import json UPLOAD_FOLDER = "uploads" os.makedirs(UPLOAD_FOLDER, exist_ok=True) app = FastAPI() # Enable CORS (you can restrict origins later) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) llm = ChatOpenAI( model_name="gpt-4o-mini", # Use a valid model name like "gpt-4o" or "gpt-4-turbo" temperature=0, openai_api_key=os.getenv("OPENAI_API_KEY") ) # Helper functions def extract_date(date_str): if not date_str or "present" in str(date_str).lower(): now = datetime.now() return {"year": now.year, "month": now.month} try: parts = date_str.split() return {"year": int(parts[1]), "month": convert_month(parts[0])} except: return {"year": None, "month": None} def convert_month(month_str): months = { "jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6, "jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12 } return months.get(month_str.strip().lower()[:3], None) def calculate_duration(start, end): s = extract_date(start) e = extract_date(end) if s["year"] and e["year"]: months = (e["year"] - s["year"]) * 12 + (e["month"] - s["month"]) return months if months >= 0 else None return None def parse_resume_text(text: str) -> dict: prompt = f""" Extract structured information from this resume text and return the result as a JSON object with the following keys: - basics: {{first_name, last_name, gender, emails, phone_numbers, address, total_experience_in_years, profession, summary, skills, has_driving_license}} - educations - professional_experiences - trainings_and_certifications - languages - awards - references Resume: {text} """ result = llm([HumanMessage(content=prompt)]) extracted = json.loads(result.content) # Map the old structure to the new one basics = extracted.get("basics", {}) educations = extracted.get("educations", []) professional_experiences = extracted.get("professional_experiences", []) new_profile = { "profile": { "basics": { "first_name": basics.get("first_name"), "last_name": basics.get("last_name"), "gender": basics.get("gender", "male"), # default or infer "emails": basics.get("emails", []), "urls": [], # Populate if available "phone_numbers": basics.get("phone_numbers", []), "date_of_birth": {"year": None, "month": None, "day": None}, "address": basics.get("address"), "total_experience_in_years": basics.get("total_experience_in_years", 0), "profession": basics.get("profession"), "summary": basics.get("summary"), "skills": basics.get("skills", []), "has_driving_license": basics.get("has_driving_license", False), }, "languages": extracted.get("languages", []), "educations": [ { "start_year": None, "is_current": False, "end_year": int(e.get("graduation_date", "").split()[-1]) if "graduation_date" in e else None, "issuing_organization": e.get("institution"), "description": f"{e.get('degree')}, {e.get('country', '')}".strip() } for e in educations ], "trainings_and_certifications": extracted.get("trainings_and_certifications", []), "professional_experiences": [ { "start_date": extract_date(p.get("start_date")), "is_current": p.get("end_date", "").lower() == "present", "end_date": extract_date(p.get("end_date")), "duration_in_months": calculate_duration(p.get("start_date"), p.get("end_date")), "company": p.get("company"), "location": "Hyderabad", # default or parse if available "title": p.get("job_title"), "description": " ".join(p.get("responsibilities", [])) } for p in professional_experiences ], "awards": extracted.get("awards", []), "references": extracted.get("references", []), }, "cv_text": text, "cv_language": "en" } return new_profile # โœ… Save uploaded file asynchronously async def save_file(file: UploadFile) -> str: filename = f"{uuid4()}_{file.filename}" file_path = os.path.join(UPLOAD_FOLDER, filename) async with aiofiles.open(file_path, 'wb') as out_file: content = await file.read() await out_file.write(content) return file_path # โœ… Extract text from PDF using PyPDF2 def extract_text_from_pdf(pdf_path: str) -> str: text = "" try: with open(pdf_path, "rb") as file: pdf_reader = PyPDF2.PdfReader(file) for page in pdf_reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" return text.strip() except Exception as e: return f"Error extracting text: {str(e)}" @app.post("/parse-resume") async def parse_resume(file: UploadFile = File(...)): try: print("๐Ÿ”„ Saving file...") path = await save_file(file) print(f"โœ… File saved at {path}") print("๐Ÿ“„ Extracting text...") text = extract_text_from_pdf(path) print("โœ… Text extracted.") json_result = parse_resume_text(text) print("โœ… JSON Created.") os.remove(path) print("๐Ÿงน File removed.") return json_result except Exception as e: import traceback print("โŒ Exception occurred:\n", traceback.format_exc()) return JSONResponse(status_code=500, content={"error": str(e)}) @app.get("/") async def root(): return {"message": "Resume PDF Text Extractor is running ๐ŸŽฏ"} if __name__ == "__main__": uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)