File size: 4,224 Bytes
c44fa64
 
3baee5b
c44fa64
 
 
 
 
144401d
 
7ead369
d2522df
37e042f
d2522df
c44fa64
 
3baee5b
 
 
c44fa64
3baee5b
 
 
 
 
 
 
 
5e6aa67
144401d
 
 
 
 
c12aa56
144401d
 
f3efdf4
7a967fc
144401d
 
 
 
 
 
cb07a78
 
144401d
 
f3efdf4
144401d
 
496d7e3
 
 
 
4821717
 
7a967fc
5e6aa67
 
 
 
 
 
 
 
 
e0e3a7b
 
 
 
 
 
 
 
 
10995ec
5e6aa67
 
 
 
 
 
 
 
 
 
 
 
 
6c0bb7c
 
c44fa64
6c0bb7c
 
 
 
 
e0e3a7b
 
 
 
 
 
 
 
6c0bb7c
e0e3a7b
28e3c15
144401d
6c0bb7c
 
 
9fe3540
 
905b759
 
 
9fe3540
6c0bb7c
c44fa64
6c0bb7c
 
 
c44fa64
 
 
 
 
3baee5b
 
c44fa64
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
from uuid import uuid4
import uvicorn
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import aiofiles
import PyPDF2
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
import json
from fastapi.responses import FileResponse
from docx import Document

UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

app = FastAPI()

# Enable CORS (you can restrict origins later)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


llm = ChatOpenAI(
    model_name="gpt-4o-mini",  # Use a valid model name like "gpt-4o" or "gpt-4-turbo"
    temperature=0,
    openai_api_key=os.getenv("OPENAI_API_KEY")
)
    
def parse_resume_text(text: str) -> dict:
    prompt = f"""
    Extract structured information from this resume text and return the result in strict JSON format with the following keys:
    - basics: {{first_name, last_name, gender, emails, phone_numbers, address, total_experience_in_years, profession, summary, skills, has_driving_license}}
    - educations
    - professional_experiences
    - trainings_and_certifications
    - languages
    - awards
    - references
    - cv_text: {text}
    - cv_language: "en"
    Resume:
    {text}
    Return ONLY valid JSON, no text, no explanation.
    """
    result = llm([HumanMessage(content=prompt)])
    #return result.content
    raw_string = str(result.content).replace("```json\n", "").replace("\n```", "")
    final_data = json.loads(raw_string)
    return (json.dumps(final_data, indent=2))



# βœ… Save uploaded file asynchronously
async def save_file(file: UploadFile) -> str:
    filename = f"{uuid4()}_{file.filename}"
    file_path = os.path.join(UPLOAD_FOLDER, filename)
    async with aiofiles.open(file_path, 'wb') as out_file:
        content = await file.read()
        await out_file.write(content)
    return file_path

# βœ… Extract text from DOCX
def extract_text_from_docx(docx_path: str) -> str:
    try:
        doc = Document(docx_path)
        text = "\n".join([para.text for para in doc.paragraphs])
        return text.strip()
    except Exception as e:
        return f"Error extracting text from DOCX: {str(e)}"

# βœ… Extract text from PDF using PyPDF2
def extract_text_from_pdf(pdf_path: str) -> str:
    text = ""
    try:
        with open(pdf_path, "rb") as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        return text.strip()
    except Exception as e:
        return f"Error extracting text: {str(e)}"
        
@app.post("/parse-resume")
async def parse_resume(file: UploadFile = File(...)):
    try:
        print("πŸ”„ Saving file...")
        path = await save_file(file)
        print(f"βœ… File saved at {path}")

        print("πŸ“„ Extracting text...")
        ext = os.path.splitext(path)[-1].lower()
        if ext == ".pdf":
            text = extract_text_from_pdf(path)
        elif ext in [".docx", ".doc"]:
            text = extract_text_from_docx(path)
        else:
            os.remove(path)
            return JSONResponse(status_code=400, content={"error": "Unsupported file type"})

        print("βœ… Text extracted.")
        json_result = parse_resume_text(text)
        
        os.remove(path)
        print("🧹 File removed.")

        filename = "cleaned_resume.json"
        file_path = os.path.join(UPLOAD_FOLDER, filename)
        with open(file_path, "w") as f:
            f.write(json_result)

        return FileResponse( path=file_path, filename=filename, media_type="application/json")
    
    except Exception as e:
        import traceback
        print("❌ Exception occurred:\n", traceback.format_exc())
        return JSONResponse(status_code=500, content={"error": str(e)})


@app.get("/")
async def root():
    return {"message": "Resume PDF Text Extractor is running 🎯"}

if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)