Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

resumate / functions /job_call.py

gperdrizet

Cleaned up LinkedIn resume PDF text extraction and parsing

f70c1ff verified about 2 months ago

raw

history blame

4.27 kB

	'''Functions for summarizing and formatting job calls.'''

	import os
	import json
	import logging
	from pathlib import Path
	from datetime import datetime
	from openai import OpenAI
	from configuration import (
	INFERENCE_URL,
	SUMMARIZER_MODEL,
	JOB_CALL_EXTRACTION_PROMPT
	)

	# pylint: disable=broad-exception-caught

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	def load_default_job_call() -> str:
	"""
	Load default job call text from data/sample_job.txt if it exists.

	Returns:
	str: The default job call text, or empty string if file doesn't exist
	"""
	try:
	# Get the project root directory (parent of functions directory)
	project_root = Path(__file__).parent.parent
	default_job_path = project_root / "data" / "sample_job.txt"

	if default_job_path.exists():
	with open(default_job_path, 'r', encoding='utf-8') as f:
	job_text = f.read().strip()

	logger.info(
	"Loaded default job call from: %s (%d characters)",
	default_job_path,
	len(job_text)
	)

	return job_text

	else:
	logger.info("No default job call file found at: %s", default_job_path)
	return ""

	except Exception as e:
	logger.warning("Failed to load default job call: %s", str(e))
	return ""


	def summarize_job_call(job_call: str) -> str:
	'''Extracts and summarizes key information from job call.

	Args:
	job_call (str): Job call text to summarize. Must be provided and non-empty.

	Returns:
	str: Summarized job call information, or None if summarization fails
	'''

	if not job_call or not job_call.strip():
	logger.warning("No job call text provided for summarization")

	return None

	logger.info("Summarizing job call (%d characters)", len(job_call))

	client = OpenAI(
	base_url=INFERENCE_URL,
	api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
	)

	messages = [
	{
	'role': 'system',
	'content': f'{JOB_CALL_EXTRACTION_PROMPT}{job_call}'
	},
	{
	'role': 'user',
	'content': f'JOB CALL\n{job_call}'
	}
	]

	completion_args = {
	'model': SUMMARIZER_MODEL,
	'messages': messages,
	}

	print(completion_args)

	try:
	response = client.chat.completions.create(**completion_args)

	except Exception as e:
	response = None
	logger.error('Error during job summarization API call: %s', e)

	if response is not None:
	summary = response.choices[0].message.content

	try:
	summary = json.loads(summary)

	except json.JSONDecodeError as e:
	logger.error("Failed to parse job call summary JSON: %s", e)

	# Save the extracted job call information to data directory
	try:
	_save_job_call_data(summary)

	except Exception as save_error:
	logger.warning("Failed to save job call data: %s", str(save_error))

	else:
	summary = None

	return summary


	def _save_job_call_data(extracted_summary: str) -> None:
	"""
	Save job call data (original and extracted summary) to the data/job_calls directory.

	Args:
	extracted_summary (str): The extracted/summarized job call information
	"""

	try:
	# Get the project root directory and job_calls subdirectory
	project_root = Path(__file__).parent.parent
	job_calls_dir = project_root / "data" / "job_calls"

	# Create job_calls directory if it doesn't exist
	job_calls_dir.mkdir(parents=True, exist_ok=True)

	# Create timestamped filename
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"job_call_extracted_{timestamp}.json"
	file_path = job_calls_dir / filename

	# Save to JSON file
	with open(file_path, 'w', encoding='utf-8') as output_file:
	json.dump(extracted_summary, output_file)

	logger.info("Saved job call data to: %s", file_path)

	except Exception as e:
	logger.error("Error saving job call data: %s", str(e))
	raise