Alexvatti commited on
Commit
e0e3a7b
Β·
verified Β·
1 Parent(s): cb07a78

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +18 -4
main.py CHANGED
@@ -64,6 +64,15 @@ async def save_file(file: UploadFile) -> str:
64
  await out_file.write(content)
65
  return file_path
66
 
 
 
 
 
 
 
 
 
 
67
  # βœ… Extract text from PDF using PyPDF2
68
  def extract_text_from_pdf(pdf_path: str) -> str:
69
  text = ""
@@ -86,11 +95,16 @@ async def parse_resume(file: UploadFile = File(...)):
86
  print(f"βœ… File saved at {path}")
87
 
88
  print("πŸ“„ Extracting text...")
89
- text = extract_text_from_pdf(path)
90
- print("βœ… Text extracted.")
 
 
 
 
 
 
91
 
92
- json_result = parse_resume_text(text)
93
- print("βœ… JSON Created.")
94
 
95
  os.remove(path)
96
  print("🧹 File removed.")
 
64
  await out_file.write(content)
65
  return file_path
66
 
67
+ # βœ… Extract text from DOCX
68
+ def extract_text_from_docx(docx_path: str) -> str:
69
+ try:
70
+ doc = Document(docx_path)
71
+ text = "\n".join([para.text for para in doc.paragraphs])
72
+ return text.strip()
73
+ except Exception as e:
74
+ return f"Error extracting text from DOCX: {str(e)}"
75
+
76
  # βœ… Extract text from PDF using PyPDF2
77
  def extract_text_from_pdf(pdf_path: str) -> str:
78
  text = ""
 
95
  print(f"βœ… File saved at {path}")
96
 
97
  print("πŸ“„ Extracting text...")
98
+ ext = os.path.splitext(path)[-1].lower()
99
+ if ext == ".pdf":
100
+ text = extract_text_from_pdf(path)
101
+ elif ext in [".docx", ".doc"]:
102
+ text = extract_text_from_docx(path)
103
+ else:
104
+ os.remove(path)
105
+ return JSONResponse(status_code=400, content={"error": "Unsupported file type"})
106
 
107
+ print("βœ… Text extracted.")
 
108
 
109
  os.remove(path)
110
  print("🧹 File removed.")