Spaces:
Sleeping
Sleeping
File size: 4,224 Bytes
c44fa64 3baee5b c44fa64 144401d 7ead369 d2522df 37e042f d2522df c44fa64 3baee5b c44fa64 3baee5b 5e6aa67 144401d c12aa56 144401d f3efdf4 7a967fc 144401d cb07a78 144401d f3efdf4 144401d 496d7e3 4821717 7a967fc 5e6aa67 e0e3a7b 10995ec 5e6aa67 6c0bb7c c44fa64 6c0bb7c e0e3a7b 6c0bb7c e0e3a7b 28e3c15 144401d 6c0bb7c 9fe3540 905b759 9fe3540 6c0bb7c c44fa64 6c0bb7c c44fa64 3baee5b c44fa64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
from uuid import uuid4
import uvicorn
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import aiofiles
import PyPDF2
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
import json
from fastapi.responses import FileResponse
from docx import Document
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app = FastAPI()
# Enable CORS (you can restrict origins later)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
llm = ChatOpenAI(
model_name="gpt-4o-mini", # Use a valid model name like "gpt-4o" or "gpt-4-turbo"
temperature=0,
openai_api_key=os.getenv("OPENAI_API_KEY")
)
def parse_resume_text(text: str) -> dict:
prompt = f"""
Extract structured information from this resume text and return the result in strict JSON format with the following keys:
- basics: {{first_name, last_name, gender, emails, phone_numbers, address, total_experience_in_years, profession, summary, skills, has_driving_license}}
- educations
- professional_experiences
- trainings_and_certifications
- languages
- awards
- references
- cv_text: {text}
- cv_language: "en"
Resume:
{text}
Return ONLY valid JSON, no text, no explanation.
"""
result = llm([HumanMessage(content=prompt)])
#return result.content
raw_string = str(result.content).replace("```json\n", "").replace("\n```", "")
final_data = json.loads(raw_string)
return (json.dumps(final_data, indent=2))
# β
Save uploaded file asynchronously
async def save_file(file: UploadFile) -> str:
filename = f"{uuid4()}_{file.filename}"
file_path = os.path.join(UPLOAD_FOLDER, filename)
async with aiofiles.open(file_path, 'wb') as out_file:
content = await file.read()
await out_file.write(content)
return file_path
# β
Extract text from DOCX
def extract_text_from_docx(docx_path: str) -> str:
try:
doc = Document(docx_path)
text = "\n".join([para.text for para in doc.paragraphs])
return text.strip()
except Exception as e:
return f"Error extracting text from DOCX: {str(e)}"
# β
Extract text from PDF using PyPDF2
def extract_text_from_pdf(pdf_path: str) -> str:
text = ""
try:
with open(pdf_path, "rb") as file:
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text.strip()
except Exception as e:
return f"Error extracting text: {str(e)}"
@app.post("/parse-resume")
async def parse_resume(file: UploadFile = File(...)):
try:
print("π Saving file...")
path = await save_file(file)
print(f"β
File saved at {path}")
print("π Extracting text...")
ext = os.path.splitext(path)[-1].lower()
if ext == ".pdf":
text = extract_text_from_pdf(path)
elif ext in [".docx", ".doc"]:
text = extract_text_from_docx(path)
else:
os.remove(path)
return JSONResponse(status_code=400, content={"error": "Unsupported file type"})
print("β
Text extracted.")
json_result = parse_resume_text(text)
os.remove(path)
print("π§Ή File removed.")
filename = "cleaned_resume.json"
file_path = os.path.join(UPLOAD_FOLDER, filename)
with open(file_path, "w") as f:
f.write(json_result)
return FileResponse( path=file_path, filename=filename, media_type="application/json")
except Exception as e:
import traceback
print("β Exception occurred:\n", traceback.format_exc())
return JSONResponse(status_code=500, content={"error": str(e)})
@app.get("/")
async def root():
return {"message": "Resume PDF Text Extractor is running π―"}
if __name__ == "__main__":
uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True) |