File size: 3,252 Bytes
cbec091
 
7482626
0899dd5
b9464fb
ed82b74
5d93a4f
0899dd5
7482626
dfe6c2f
7482626
dfe6c2f
7482626
dfe6c2f
cbec091
ed82b74
 
f9a80bc
 
5d93a4f
7da0e48
5d93a4f
7da0e48
5d93a4f
7da0e48
 
5d93a4f
7da0e48
5d93a4f
 
55cf84d
5d93a4f
ed82b74
 
 
cbec091
7482626
 
 
 
 
cbec091
 
 
bcdc087
7ff29b0
 
 
 
cbec091
 
 
 
dfe6c2f
cbec091
 
 
 
7482626
cbec091
f9a80bc
cbec091
7ff29b0
cbec091
 
 
d949669
 
aa26954
d949669
ed82b74
d949669
 
 
cbec091
0899dd5
 
8a31cf5
7ff29b0
0899dd5
 
 
cbec091
 
 
b9464fb
0899dd5
 
8a31cf5
0899dd5
bef6750
 
0899dd5
 
 
8a31cf5
55cf84d
 
0899dd5
bef6750
0899dd5
bef6750
 
 
 
 
0899dd5
 
 
bef6750
 
0899dd5
8a31cf5
 
bef6750
0899dd5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
'''Functions for summarizing and formatting job calls.'''

import os
import json
import logging
import unicodedata
from pathlib import Path
from datetime import datetime
from openai import OpenAI
from configuration import (
    INFERENCE_URL,
    SUMMARIZER_MODEL,
    JOB_CALL_EXTRACTION_PROMPT
)

from functions.helper import clean_text_whitespace

# pylint: disable=broad-exception-caught


def summarize_job_call(job_call: str) -> str:
    '''Extracts and summarizes key information from job call.
    
    Args:
        job_call (str): Job call text to summarize. Must be provided and non-empty.
    
    Returns:
        str: Summarized job call information, or None if summarization fails
    '''

    logger = logging.getLogger(f'{__name__}.summarize_job_call')

    # Clean up the job call text
    job_call = unicodedata.normalize('NFKC', job_call)
    job_call = clean_text_whitespace(job_call)

    client = OpenAI(
        base_url=INFERENCE_URL,
        api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
    )

    messages = [
        {
            'role': 'system',
            'content': f'{JOB_CALL_EXTRACTION_PROMPT}{job_call}'
        },
        {
            'role': 'user',
            'content': f'JOB CALL\n{job_call}'
        }
    ]

    completion_args = {
        'model': SUMMARIZER_MODEL,
        'messages': messages,
    }

    try:
        response = client.chat.completions.create(**completion_args)

    except Exception as e:
        response = None
        logger.error('Error during job summarization API call: %s', e)

    if response is not None:
        summary = response.choices[0].message.content

        try:
            print(summary)
            summary = json.loads(summary)
            print(summary.keys())

        except json.JSONDecodeError as e:
            logger.error("Failed to parse job call summary JSON: %s", e)

        # Save the extracted job call information to data directory
        try:
            _save_job_call_data(summary)

        except Exception as save_error:
            logger.warning("Failed to save job call data: %s", str(save_error))

    else:
        summary = None

    return summary


def _save_job_call_data(extracted_summary: str) -> None:
    """
    Save job call data (original and extracted summary) to the data/job_calls directory.

    Args:
        extracted_summary (str): The extracted/summarized job call information
    """

    logger = logging.getLogger(f'{__name__}._save_job_call_data')

    try:
        # Get the project root directory and job_calls subdirectory
        project_root = Path(__file__).parent.parent
        job_calls_dir = project_root / "data" / "job_calls"

        # Create job_calls directory if it doesn't exist
        job_calls_dir.mkdir(parents=True, exist_ok=True)

        # Create timestamped filename
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"job_call_extracted_{timestamp}.json"
        file_path = job_calls_dir / filename

        # Save to JSON file
        with open(file_path, 'w', encoding='utf-8') as output_file:
            json.dump(extracted_summary, output_file)

    except Exception as e:
        logger.error("Error saving job call data: %s", str(e))