Resume-EndPoint

Sleeping

File size: 6,564 Bytes

import os
from uuid import uuid4
import uvicorn
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import aiofiles
import PyPDF2
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
import json
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

app = FastAPI()

# Enable CORS (you can restrict origins later)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


llm = ChatOpenAI(
    model_name="gpt-4o-mini",  # Use a valid model name like "gpt-4o" or "gpt-4-turbo"
    temperature=0,
    openai_api_key=os.getenv("OPENAI_API_KEY")
)
# Helper functions
def extract_date(date_str):
    if not date_str or "present" in str(date_str).lower():
        now = datetime.now()
        return {"year": now.year, "month": now.month}
    try:
        parts = date_str.split()
        return {"year": int(parts[1]), "month": convert_month(parts[0])}
    except:
        return {"year": None, "month": None}

def convert_month(month_str):
    months = {
        "jan": 1, "feb": 2, "mar": 3, "apr": 4,
        "may": 5, "jun": 6, "jul": 7, "aug": 8,
        "sep": 9, "oct": 10, "nov": 11, "dec": 12
    }
    return months.get(month_str.strip().lower()[:3], None)

def calculate_duration(start, end):
    s = extract_date(start)
    e = extract_date(end)
    if s["year"] and e["year"]:
        months = (e["year"] - s["year"]) * 12 + (e["month"] - s["month"])
        return months if months >= 0 else None
    return None
    
def parse_resume_text(text: str) -> dict:
    prompt = f"""
    Extract structured information from this resume text and return the result as a JSON object with the following keys:
    - basics: {{first_name, last_name, gender, emails, phone_numbers, address, total_experience_in_years, profession, summary, skills, has_driving_license}}
    - educations
    - professional_experiences
    - trainings_and_certifications
    - languages
    - awards
    - references
    Resume:
    {text}
    """
    result = llm([HumanMessage(content=prompt)])
    extracted = json.loads(result.content)

    # Map the old structure to the new one
    basics = extracted.get("basics", {})
    educations = extracted.get("educations", [])
    professional_experiences = extracted.get("professional_experiences", [])

    new_profile = {
        "profile": {
            "basics": {
                "first_name": basics.get("first_name"),
                "last_name": basics.get("last_name"),
                "gender": basics.get("gender", "male"),  # default or infer
                "emails": basics.get("emails", []),
                "urls": [],  # Populate if available
                "phone_numbers": basics.get("phone_numbers", []),
                "date_of_birth": {"year": None, "month": None, "day": None},
                "address": basics.get("address"),
                "total_experience_in_years": basics.get("total_experience_in_years", 0),
                "profession": basics.get("profession"),
                "summary": basics.get("summary"),
                "skills": basics.get("skills", []),
                "has_driving_license": basics.get("has_driving_license", False),
            },
            "languages": extracted.get("languages", []),
            "educations": [
                {
                    "start_year": None,
                    "is_current": False,
                    "end_year": int(e.get("graduation_date", "").split()[-1]) if "graduation_date" in e else None,
                    "issuing_organization": e.get("institution"),
                    "description": f"{e.get('degree')}, {e.get('country', '')}".strip()
                } for e in educations
            ],
            "trainings_and_certifications": extracted.get("trainings_and_certifications", []),
            "professional_experiences": [
                {
                    "start_date": extract_date(p.get("start_date")),
                    "is_current": p.get("end_date", "").lower() == "present",
                    "end_date": extract_date(p.get("end_date")),
                    "duration_in_months": calculate_duration(p.get("start_date"), p.get("end_date")),
                    "company": p.get("company"),
                    "location": "Hyderabad",  # default or parse if available
                    "title": p.get("job_title"),
                    "description": " ".join(p.get("responsibilities", []))
                } for p in professional_experiences
            ],
            "awards": extracted.get("awards", []),
            "references": extracted.get("references", []),
        },
        "cv_text": text,
        "cv_language": "en"
    }

    return new_profile


# ✅ Save uploaded file asynchronously
async def save_file(file: UploadFile) -> str:
    filename = f"{uuid4()}_{file.filename}"
    file_path = os.path.join(UPLOAD_FOLDER, filename)
    async with aiofiles.open(file_path, 'wb') as out_file:
        content = await file.read()
        await out_file.write(content)
    return file_path

# ✅ Extract text from PDF using PyPDF2
def extract_text_from_pdf(pdf_path: str) -> str:
    text = ""
    try:
        with open(pdf_path, "rb") as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        return text.strip()
    except Exception as e:
        return f"Error extracting text: {str(e)}"
        
@app.post("/parse-resume")
async def parse_resume(file: UploadFile = File(...)):
    try:
        print("🔄 Saving file...")
        path = await save_file(file)
        print(f"✅ File saved at {path}")

        print("📄 Extracting text...")
        text = extract_text_from_pdf(path)
        print("✅ Text extracted.")

        json_result = parse_resume_text(text)
        print("✅ JSON Created.")
        
        os.remove(path)
        print("🧹 File removed.")

        return json_result
    
    except Exception as e:
        import traceback
        print("❌ Exception occurred:\n", traceback.format_exc())
        return JSONResponse(status_code=500, content={"error": str(e)})


@app.get("/")
async def root():
    return {"message": "Resume PDF Text Extractor is running 🎯"}

if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)