Resume-EndPoint / main.py
Alexvatti's picture
Update main.py
28e3c15 verified
import os
from uuid import uuid4
import uvicorn
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import aiofiles
import PyPDF2
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
import json
from fastapi.responses import FileResponse
from docx import Document
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app = FastAPI()
# Enable CORS (you can restrict origins later)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
llm = ChatOpenAI(
model_name="gpt-4o-mini", # Use a valid model name like "gpt-4o" or "gpt-4-turbo"
temperature=0,
openai_api_key=os.getenv("OPENAI_API_KEY")
)
def parse_resume_text(text: str) -> dict:
prompt = f"""
Extract structured information from this resume text and return the result in strict JSON format with the following keys:
- basics: {{first_name, last_name, gender, emails, phone_numbers, address, total_experience_in_years, profession, summary, skills, has_driving_license}}
- educations
- professional_experiences
- trainings_and_certifications
- languages
- awards
- references
- cv_text: {text}
- cv_language: "en"
Resume:
{text}
Return ONLY valid JSON, no text, no explanation.
"""
result = llm([HumanMessage(content=prompt)])
#return result.content
raw_string = str(result.content).replace("```json\n", "").replace("\n```", "")
final_data = json.loads(raw_string)
return (json.dumps(final_data, indent=2))
# βœ… Save uploaded file asynchronously
async def save_file(file: UploadFile) -> str:
filename = f"{uuid4()}_{file.filename}"
file_path = os.path.join(UPLOAD_FOLDER, filename)
async with aiofiles.open(file_path, 'wb') as out_file:
content = await file.read()
await out_file.write(content)
return file_path
# βœ… Extract text from DOCX
def extract_text_from_docx(docx_path: str) -> str:
try:
doc = Document(docx_path)
text = "\n".join([para.text for para in doc.paragraphs])
return text.strip()
except Exception as e:
return f"Error extracting text from DOCX: {str(e)}"
# βœ… Extract text from PDF using PyPDF2
def extract_text_from_pdf(pdf_path: str) -> str:
text = ""
try:
with open(pdf_path, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text.strip()
except Exception as e:
return f"Error extracting text: {str(e)}"
@app.post("/parse-resume")
async def parse_resume(file: UploadFile = File(...)):
try:
print("πŸ”„ Saving file...")
path = await save_file(file)
print(f"βœ… File saved at {path}")
print("πŸ“„ Extracting text...")
ext = os.path.splitext(path)[-1].lower()
if ext == ".pdf":
text = extract_text_from_pdf(path)
elif ext in [".docx", ".doc"]:
text = extract_text_from_docx(path)
else:
os.remove(path)
return JSONResponse(status_code=400, content={"error": "Unsupported file type"})
print("βœ… Text extracted.")
json_result = parse_resume_text(text)
os.remove(path)
print("🧹 File removed.")
filename = "cleaned_resume.json"
file_path = os.path.join(UPLOAD_FOLDER, filename)
with open(file_path, "w") as f:
f.write(json_result)
return FileResponse( path=file_path, filename=filename, media_type="application/json")
except Exception as e:
import traceback
print("❌ Exception occurred:\n", traceback.format_exc())
return JSONResponse(status_code=500, content={"error": str(e)})
@app.get("/")
async def root():
return {"message": "Resume PDF Text Extractor is running 🎯"}
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)