resumate / functions /job_call.py
gperdrizet's picture
Cleaned up LinkedIn resume PDF text extraction and parsing
f70c1ff verified
raw
history blame
4.27 kB
'''Functions for summarizing and formatting job calls.'''
import os
import json
import logging
from pathlib import Path
from datetime import datetime
from openai import OpenAI
from configuration import (
INFERENCE_URL,
SUMMARIZER_MODEL,
JOB_CALL_EXTRACTION_PROMPT
)
# pylint: disable=broad-exception-caught
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_default_job_call() -> str:
"""
Load default job call text from data/sample_job.txt if it exists.
Returns:
str: The default job call text, or empty string if file doesn't exist
"""
try:
# Get the project root directory (parent of functions directory)
project_root = Path(__file__).parent.parent
default_job_path = project_root / "data" / "sample_job.txt"
if default_job_path.exists():
with open(default_job_path, 'r', encoding='utf-8') as f:
job_text = f.read().strip()
logger.info(
"Loaded default job call from: %s (%d characters)",
default_job_path,
len(job_text)
)
return job_text
else:
logger.info("No default job call file found at: %s", default_job_path)
return ""
except Exception as e:
logger.warning("Failed to load default job call: %s", str(e))
return ""
def summarize_job_call(job_call: str) -> str:
'''Extracts and summarizes key information from job call.
Args:
job_call (str): Job call text to summarize. Must be provided and non-empty.
Returns:
str: Summarized job call information, or None if summarization fails
'''
if not job_call or not job_call.strip():
logger.warning("No job call text provided for summarization")
return None
logger.info("Summarizing job call (%d characters)", len(job_call))
client = OpenAI(
base_url=INFERENCE_URL,
api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
)
messages = [
{
'role': 'system',
'content': f'{JOB_CALL_EXTRACTION_PROMPT}{job_call}'
},
{
'role': 'user',
'content': f'JOB CALL\n{job_call}'
}
]
completion_args = {
'model': SUMMARIZER_MODEL,
'messages': messages,
}
print(completion_args)
try:
response = client.chat.completions.create(**completion_args)
except Exception as e:
response = None
logger.error('Error during job summarization API call: %s', e)
if response is not None:
summary = response.choices[0].message.content
try:
summary = json.loads(summary)
except json.JSONDecodeError as e:
logger.error("Failed to parse job call summary JSON: %s", e)
# Save the extracted job call information to data directory
try:
_save_job_call_data(summary)
except Exception as save_error:
logger.warning("Failed to save job call data: %s", str(save_error))
else:
summary = None
return summary
def _save_job_call_data(extracted_summary: str) -> None:
"""
Save job call data (original and extracted summary) to the data/job_calls directory.
Args:
extracted_summary (str): The extracted/summarized job call information
"""
try:
# Get the project root directory and job_calls subdirectory
project_root = Path(__file__).parent.parent
job_calls_dir = project_root / "data" / "job_calls"
# Create job_calls directory if it doesn't exist
job_calls_dir.mkdir(parents=True, exist_ok=True)
# Create timestamped filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"job_call_extracted_{timestamp}.json"
file_path = job_calls_dir / filename
# Save to JSON file
with open(file_path, 'w', encoding='utf-8') as output_file:
json.dump(extracted_summary, output_file)
logger.info("Saved job call data to: %s", file_path)
except Exception as e:
logger.error("Error saving job call data: %s", str(e))
raise