Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

resumate / functions /gradio.py

gperdrizet

Cleaned up LinkedIn resume PDF text extraction and parsing

f70c1ff verified about 2 months ago

raw

history blame

7.29 kB

	"""
	gradio.py

	Functions for handling Gradio UI interactions and processing user inputs.
	"""

	import logging
	from pathlib import Path
	from functions.helper import clean_text_whitespace
	from functions.linkedin_resume import extract_text
	# from functions.github import get_github_repositories
	# from functions.job_call import summarize_job_call
	# from functions.writer_agent import write_resume

	# pylint: disable=broad-exception-caught

	# Set up logging
	# Create logs directory if it doesn't exist
	logs_dir = Path(__file__).parent.parent / "logs"
	logs_dir.mkdir(exist_ok=True)

	# Strip extraneous handlers
	for handler in logging.root.handlers[:]:
	logging.root.removeHandler(handler)

	# Configure logging to write to file and console
	logging.basicConfig(
	level=logging.INFO,
	format='%(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler(logs_dir / "gradio.log", mode='w'), # Log to file
	logging.StreamHandler() # Also log to console
	]
	)


	def process_inputs(
	linkedin_pdf_path: str = None,
	github_url: str = None,
	job_post_text: str = None,
	user_instructions: str = None
	):
	"""
	Process the input files and URLs from the Gradio interface.

	Args:
	linkedin_pdf: Uploaded LinkedIn resume export PDF file
	github_url (str): GitHub profile URL
	job_post_text (str): Job post text content
	user_instructions (str): Additional instructions from the user

	Returns:
	str: Formatted output with file and URL information
	"""

	logger = logging.getLogger(f'{__name__}.process_inputs')
	logger.info("LinkedIn PDF: %s", linkedin_pdf_path)
	logger.info("GitHub URL: %s", github_url)
	logger.info("Job post: %s", clean_text_whitespace(job_post_text[:100]).replace("\n", " "))
	logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
	result = ""

	# Extract and structure text from the linkedin profile PDF
	logger.info("Extracting text from LinkedIn PDF: %s", linkedin_pdf_path)
	extraction_result = extract_text(linkedin_pdf_path)

	if extraction_result:
	logger.info("LinkedIn PDF text extraction successful")

	else:
	logger.error("LinkedIn PDF text extraction failed")

	# if extraction_result["status"] == "success":
	# result += " ✅ Text extraction successful\n\n"
	# logger.info("LinkedIn PDF text extraction successful")

	# elif extraction_result["status"] == "warning":
	# result += f" ⚠️ Text extraction: {extraction_result['message']}\n\n"
	# logger.warning("LinkedIn PDF extraction warning: %s", extraction_result['message'])
	# else:
	# result += f" ❌ Text extraction failed: {extraction_result['message']}\n\n"
	# logger.error("LinkedIn PDF extraction failed: %s", extraction_result['message'])

	# # Process GitHub profile
	# if github_url and github_url.strip():
	# result += "✅ GitHub Profile URL provided\n"
	# logger.info("Processing GitHub URL: %s", github_url.strip())

	# # Retrieve repositories from GitHub
	# github_result = get_github_repositories(github_url.strip())

	# if github_result["status"] == "success":
	# result += " ✅ GitHub list download successful\n\n"
	# logger.info(
	# "GitHub repositories retrieved successfully for %s",
	# github_result['metadata']['username']
	# )

	# else:
	# result += f" ❌ GitHub extraction failed: {github_result['message']}\n\n"
	# logger.error("GitHub extraction failed: %s", github_result['message'])
	# else:
	# result += "❌ No GitHub profile URL provided\n\n"
	# logger.info("No GitHub URL provided")

	# # Process job post text
	# if job_post_text and job_post_text.strip():
	# result += "✅ Job post text provided\n"
	# logger.info("Job post text provided (%d characters)", len(job_post_text))

	# summary = summarize_job_call(job_post_text.strip())
	# result += " ✅ Job post summary generated\n"
	# logger.info("Job post summary generated (%d characters)", len(summary))

	# else:
	# result += "❌ Job post not provided\n"
	# logger.info("No job post text provided")
	# summary = None

	# # Process user instructions
	# if user_instructions and user_instructions.strip():
	# result += "✅ Additional instructions provided\n"
	# logger.info("User instructions provided (%d characters)", len(user_instructions))

	# else:
	# result += "ℹ️ No additional instructions provided\n"
	# logger.info("No additional instructions provided")

	# logger.info("Input processing completed")

	# # Generate resume only if we have valid extraction result
	# if extraction_result and extraction_result.get("status") == "success":
	# try:
	# _ = write_resume(extraction_result, user_instructions, summary)
	# result += "\n✅ Resume generated successfully\n"
	# logger.info("Resume generation completed successfully")

	# except Exception as e:
	# result += f"\n❌ Resume generation failed: {str(e)}\n"
	# logger.error("Resume generation failed: %s", str(e))
	# else:
	# result += "\n❌ Cannot generate resume: No valid LinkedIn data extracted\n"
	# result += "Please ensure you upload a valid LinkedIn PDF export file.\n"
	# logger.warning("Resume generation skipped - no valid LinkedIn data available")

	return result


	# def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
	# """
	# Get structured data from all inputs for further processing.

	# Args:
	# linkedin_pdf: Uploaded LinkedIn resume export PDF file
	# github_url (str): GitHub profile URL
	# job_post_text (str): Job post text content
	# instructions (str): Additional instructions from the user

	# Returns:
	# dict: Structured data containing all processed information
	# """

	# job_post_text = job_post_text.strip() if job_post_text and job_post_text.strip() else None
	# instructions = instructions.strip() if instructions and instructions.strip() else None

	# processed_data = {
	# "linkedin": None,
	# "github": None,
	# "job_post": job_post_text,
	# "user_instructions": instructions,
	# "errors": []
	# }

	# # Process LinkedIn PDF
	# if linkedin_pdf is not None:
	# file_path = linkedin_pdf.name
	# extraction_result = extract_text_from_linkedin_pdf(file_path)

	# if extraction_result["status"] == "success":
	# processed_data["linkedin"] = extraction_result

	# else:
	# processed_data["errors"].append(f"LinkedIn: {extraction_result['message']}")

	# # Process GitHub profile
	# if github_url and github_url.strip():
	# github_result = get_github_repositories(github_url)

	# if github_result["status"] == "success":
	# processed_data["github"] = github_result

	# else:
	# processed_data["errors"].append(f"GitHub: {github_result['message']}")

	# return processed_data