resumate / functions /gradio.py
gperdrizet's picture
Cleaned up LinkedIn resume PDF text extraction and parsing
f70c1ff verified
raw
history blame
7.29 kB
"""
gradio.py
Functions for handling Gradio UI interactions and processing user inputs.
"""
import logging
from pathlib import Path
from functions.helper import clean_text_whitespace
from functions.linkedin_resume import extract_text
# from functions.github import get_github_repositories
# from functions.job_call import summarize_job_call
# from functions.writer_agent import write_resume
# pylint: disable=broad-exception-caught
# Set up logging
# Create logs directory if it doesn't exist
logs_dir = Path(__file__).parent.parent / "logs"
logs_dir.mkdir(exist_ok=True)
# Strip extraneous handlers
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# Configure logging to write to file and console
logging.basicConfig(
level=logging.INFO,
format='%(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(logs_dir / "gradio.log", mode='w'), # Log to file
logging.StreamHandler() # Also log to console
]
)
def process_inputs(
linkedin_pdf_path: str = None,
github_url: str = None,
job_post_text: str = None,
user_instructions: str = None
):
"""
Process the input files and URLs from the Gradio interface.
Args:
linkedin_pdf: Uploaded LinkedIn resume export PDF file
github_url (str): GitHub profile URL
job_post_text (str): Job post text content
user_instructions (str): Additional instructions from the user
Returns:
str: Formatted output with file and URL information
"""
logger = logging.getLogger(f'{__name__}.process_inputs')
logger.info("LinkedIn PDF: %s", linkedin_pdf_path)
logger.info("GitHub URL: %s", github_url)
logger.info("Job post: %s", clean_text_whitespace(job_post_text[:100]).replace("\n", " "))
logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
result = ""
# Extract and structure text from the linkedin profile PDF
logger.info("Extracting text from LinkedIn PDF: %s", linkedin_pdf_path)
extraction_result = extract_text(linkedin_pdf_path)
if extraction_result:
logger.info("LinkedIn PDF text extraction successful")
else:
logger.error("LinkedIn PDF text extraction failed")
# if extraction_result["status"] == "success":
# result += " βœ… Text extraction successful\n\n"
# logger.info("LinkedIn PDF text extraction successful")
# elif extraction_result["status"] == "warning":
# result += f" ⚠️ Text extraction: {extraction_result['message']}\n\n"
# logger.warning("LinkedIn PDF extraction warning: %s", extraction_result['message'])
# else:
# result += f" ❌ Text extraction failed: {extraction_result['message']}\n\n"
# logger.error("LinkedIn PDF extraction failed: %s", extraction_result['message'])
# # Process GitHub profile
# if github_url and github_url.strip():
# result += "βœ… GitHub Profile URL provided\n"
# logger.info("Processing GitHub URL: %s", github_url.strip())
# # Retrieve repositories from GitHub
# github_result = get_github_repositories(github_url.strip())
# if github_result["status"] == "success":
# result += " βœ… GitHub list download successful\n\n"
# logger.info(
# "GitHub repositories retrieved successfully for %s",
# github_result['metadata']['username']
# )
# else:
# result += f" ❌ GitHub extraction failed: {github_result['message']}\n\n"
# logger.error("GitHub extraction failed: %s", github_result['message'])
# else:
# result += "❌ No GitHub profile URL provided\n\n"
# logger.info("No GitHub URL provided")
# # Process job post text
# if job_post_text and job_post_text.strip():
# result += "βœ… Job post text provided\n"
# logger.info("Job post text provided (%d characters)", len(job_post_text))
# summary = summarize_job_call(job_post_text.strip())
# result += " βœ… Job post summary generated\n"
# logger.info("Job post summary generated (%d characters)", len(summary))
# else:
# result += "❌ Job post not provided\n"
# logger.info("No job post text provided")
# summary = None
# # Process user instructions
# if user_instructions and user_instructions.strip():
# result += "βœ… Additional instructions provided\n"
# logger.info("User instructions provided (%d characters)", len(user_instructions))
# else:
# result += "ℹ️ No additional instructions provided\n"
# logger.info("No additional instructions provided")
# logger.info("Input processing completed")
# # Generate resume only if we have valid extraction result
# if extraction_result and extraction_result.get("status") == "success":
# try:
# _ = write_resume(extraction_result, user_instructions, summary)
# result += "\nβœ… Resume generated successfully\n"
# logger.info("Resume generation completed successfully")
# except Exception as e:
# result += f"\n❌ Resume generation failed: {str(e)}\n"
# logger.error("Resume generation failed: %s", str(e))
# else:
# result += "\n❌ Cannot generate resume: No valid LinkedIn data extracted\n"
# result += "Please ensure you upload a valid LinkedIn PDF export file.\n"
# logger.warning("Resume generation skipped - no valid LinkedIn data available")
return result
# def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
# """
# Get structured data from all inputs for further processing.
# Args:
# linkedin_pdf: Uploaded LinkedIn resume export PDF file
# github_url (str): GitHub profile URL
# job_post_text (str): Job post text content
# instructions (str): Additional instructions from the user
# Returns:
# dict: Structured data containing all processed information
# """
# job_post_text = job_post_text.strip() if job_post_text and job_post_text.strip() else None
# instructions = instructions.strip() if instructions and instructions.strip() else None
# processed_data = {
# "linkedin": None,
# "github": None,
# "job_post": job_post_text,
# "user_instructions": instructions,
# "errors": []
# }
# # Process LinkedIn PDF
# if linkedin_pdf is not None:
# file_path = linkedin_pdf.name
# extraction_result = extract_text_from_linkedin_pdf(file_path)
# if extraction_result["status"] == "success":
# processed_data["linkedin"] = extraction_result
# else:
# processed_data["errors"].append(f"LinkedIn: {extraction_result['message']}")
# # Process GitHub profile
# if github_url and github_url.strip():
# github_result = get_github_repositories(github_url)
# if github_result["status"] == "success":
# processed_data["github"] = github_result
# else:
# processed_data["errors"].append(f"GitHub: {github_result['message']}")
# return processed_data