'''Functions for summarizing and formatting job calls.''' import os import json import logging import unicodedata from pathlib import Path from datetime import datetime from openai import OpenAI from configuration import ( INFERENCE_URL, SUMMARIZER_MODEL, JOB_CALL_EXTRACTION_PROMPT ) from functions.helper import clean_text_whitespace # pylint: disable=broad-exception-caught def summarize_job_call(job_call: str) -> str: '''Extracts and summarizes key information from job call. Args: job_call (str): Job call text to summarize. Must be provided and non-empty. Returns: str: Summarized job call information, or None if summarization fails ''' logger = logging.getLogger(f'{__name__}.summarize_job_call') # Clean up the job call text job_call = unicodedata.normalize('NFKC', job_call) job_call = clean_text_whitespace(job_call) client = OpenAI( base_url=INFERENCE_URL, api_key=os.environ.get("API_KEY", "dummy-key-for-testing") ) messages = [ { 'role': 'system', 'content': f'{JOB_CALL_EXTRACTION_PROMPT}{job_call}' }, { 'role': 'user', 'content': f'JOB CALL\n{job_call}' } ] completion_args = { 'model': SUMMARIZER_MODEL, 'messages': messages, } try: response = client.chat.completions.create(**completion_args) except Exception as e: response = None logger.error('Error during job summarization API call: %s', e) if response is not None: summary = response.choices[0].message.content try: print(summary) summary = json.loads(summary) print(summary.keys()) except json.JSONDecodeError as e: logger.error("Failed to parse job call summary JSON: %s", e) # Save the extracted job call information to data directory try: _save_job_call_data(summary) except Exception as save_error: logger.warning("Failed to save job call data: %s", str(save_error)) else: summary = None return summary def _save_job_call_data(extracted_summary: str) -> None: """ Save job call data (original and extracted summary) to the data/job_calls directory. Args: extracted_summary (str): The extracted/summarized job call information """ logger = logging.getLogger(f'{__name__}._save_job_call_data') try: # Get the project root directory and job_calls subdirectory project_root = Path(__file__).parent.parent job_calls_dir = project_root / "data" / "job_calls" # Create job_calls directory if it doesn't exist job_calls_dir.mkdir(parents=True, exist_ok=True) # Create timestamped filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"job_call_extracted_{timestamp}.json" file_path = job_calls_dir / filename # Save to JSON file with open(file_path, 'w', encoding='utf-8') as output_file: json.dump(extracted_summary, output_file) except Exception as e: logger.error("Error saving job call data: %s", str(e))