import os from uuid import uuid4 import uvicorn from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware import aiofiles import PyPDF2 from langchain_openai import ChatOpenAI from langchain.schema import HumanMessage import json from fastapi.responses import FileResponse from docx import Document UPLOAD_FOLDER = "uploads" os.makedirs(UPLOAD_FOLDER, exist_ok=True) app = FastAPI() # Enable CORS (you can restrict origins later) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) llm = ChatOpenAI( model_name="gpt-4o-mini", # Use a valid model name like "gpt-4o" or "gpt-4-turbo" temperature=0, openai_api_key=os.getenv("OPENAI_API_KEY") ) def parse_resume_text(text: str) -> dict: prompt = f""" Extract structured information from this resume text and return the result in strict JSON format with the following keys: - basics: {{first_name, last_name, gender, emails, phone_numbers, address, total_experience_in_years, profession, summary, skills, has_driving_license}} - educations - professional_experiences - trainings_and_certifications - languages - awards - references - cv_text: {text} - cv_language: "en" Resume: {text} Return ONLY valid JSON, no text, no explanation. """ result = llm([HumanMessage(content=prompt)]) #return result.content raw_string = str(result.content).replace("```json\n", "").replace("\n```", "") final_data = json.loads(raw_string) return (json.dumps(final_data, indent=2)) # โœ… Save uploaded file asynchronously async def save_file(file: UploadFile) -> str: filename = f"{uuid4()}_{file.filename}" file_path = os.path.join(UPLOAD_FOLDER, filename) async with aiofiles.open(file_path, 'wb') as out_file: content = await file.read() await out_file.write(content) return file_path # โœ… Extract text from DOCX def extract_text_from_docx(docx_path: str) -> str: try: doc = Document(docx_path) text = "\n".join([para.text for para in doc.paragraphs]) return text.strip() except Exception as e: return f"Error extracting text from DOCX: {str(e)}" # โœ… Extract text from PDF using PyPDF2 def extract_text_from_pdf(pdf_path: str) -> str: text = "" try: with open(pdf_path, "rb") as file: pdf_reader = PyPDF2.PdfReader(file) for page in pdf_reader.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" return text.strip() except Exception as e: return f"Error extracting text: {str(e)}" @app.post("/parse-resume") async def parse_resume(file: UploadFile = File(...)): try: print("๐Ÿ”„ Saving file...") path = await save_file(file) print(f"โœ… File saved at {path}") print("๐Ÿ“„ Extracting text...") ext = os.path.splitext(path)[-1].lower() if ext == ".pdf": text = extract_text_from_pdf(path) elif ext in [".docx", ".doc"]: text = extract_text_from_docx(path) else: os.remove(path) return JSONResponse(status_code=400, content={"error": "Unsupported file type"}) print("โœ… Text extracted.") json_result = parse_resume_text(text) os.remove(path) print("๐Ÿงน File removed.") filename = "cleaned_resume.json" file_path = os.path.join(UPLOAD_FOLDER, filename) with open(file_path, "w") as f: f.write(json_result) return FileResponse( path=file_path, filename=filename, media_type="application/json") except Exception as e: import traceback print("โŒ Exception occurred:\n", traceback.format_exc()) return JSONResponse(status_code=500, content={"error": str(e)}) @app.get("/") async def root(): return {"message": "Resume PDF Text Extractor is running ๐ŸŽฏ"} if __name__ == "__main__": uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)