File size: 4,266 Bytes
cbec091
 
7482626
0899dd5
b9464fb
5d93a4f
0899dd5
7482626
dfe6c2f
7482626
dfe6c2f
7482626
dfe6c2f
cbec091
f9a80bc
 
b9464fb
 
 
 
 
5d93a4f
 
 
bef6750
5d93a4f
 
 
 
 
 
 
 
 
 
 
 
7ff29b0
 
 
 
 
 
5d93a4f
7ff29b0
5d93a4f
 
 
 
 
 
 
 
 
7da0e48
5d93a4f
7da0e48
5d93a4f
7da0e48
 
5d93a4f
7da0e48
5d93a4f
 
 
7da0e48
f70c1ff
7da0e48
5d93a4f
 
cbec091
7482626
 
 
 
 
cbec091
 
 
bcdc087
7ff29b0
 
 
 
cbec091
 
 
 
dfe6c2f
cbec091
 
 
7ff29b0
 
cbec091
7482626
cbec091
f9a80bc
cbec091
7ff29b0
cbec091
 
 
d949669
 
 
 
 
 
cbec091
0899dd5
 
8a31cf5
7ff29b0
0899dd5
 
 
cbec091
 
 
b9464fb
0899dd5
 
8a31cf5
0899dd5
bef6750
 
0899dd5
 
 
8a31cf5
0899dd5
bef6750
0899dd5
bef6750
 
 
 
 
0899dd5
 
 
bef6750
 
0899dd5
8a31cf5
 
bef6750
0899dd5
bef6750
0899dd5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
'''Functions for summarizing and formatting job calls.'''

import os
import json
import logging
from pathlib import Path
from datetime import datetime
from openai import OpenAI
from configuration import (
    INFERENCE_URL,
    SUMMARIZER_MODEL,
    JOB_CALL_EXTRACTION_PROMPT
)

# pylint: disable=broad-exception-caught

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def load_default_job_call() -> str:
    """
    Load default job call text from data/sample_job.txt if it exists.

    Returns:
        str: The default job call text, or empty string if file doesn't exist
    """
    try:
        # Get the project root directory (parent of functions directory)
        project_root = Path(__file__).parent.parent
        default_job_path = project_root / "data" / "sample_job.txt"

        if default_job_path.exists():
            with open(default_job_path, 'r', encoding='utf-8') as f:
                job_text = f.read().strip()

            logger.info(
                "Loaded default job call from: %s (%d characters)",
                default_job_path,
                len(job_text)
            )

            return job_text

        else:
            logger.info("No default job call file found at: %s", default_job_path)
            return ""

    except Exception as e:
        logger.warning("Failed to load default job call: %s", str(e))
        return ""


def summarize_job_call(job_call: str) -> str:
    '''Extracts and summarizes key information from job call.
    
    Args:
        job_call (str): Job call text to summarize. Must be provided and non-empty.
    
    Returns:
        str: Summarized job call information, or None if summarization fails
    '''

    if not job_call or not job_call.strip():
        logger.warning("No job call text provided for summarization")

        return None

    logger.info("Summarizing job call (%d characters)", len(job_call))

    client = OpenAI(
        base_url=INFERENCE_URL,
        api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
    )

    messages = [
        {
            'role': 'system',
            'content': f'{JOB_CALL_EXTRACTION_PROMPT}{job_call}'
        },
        {
            'role': 'user',
            'content': f'JOB CALL\n{job_call}'
        }
    ]

    completion_args = {
        'model': SUMMARIZER_MODEL,
        'messages': messages,
    }

    print(completion_args)

    try:
        response = client.chat.completions.create(**completion_args)

    except Exception as e:
        response = None
        logger.error('Error during job summarization API call: %s', e)

    if response is not None:
        summary = response.choices[0].message.content

        try:
            summary = json.loads(summary)

        except json.JSONDecodeError as e:
            logger.error("Failed to parse job call summary JSON: %s", e)

        # Save the extracted job call information to data directory
        try:
            _save_job_call_data(summary)

        except Exception as save_error:
            logger.warning("Failed to save job call data: %s", str(save_error))

    else:
        summary = None

    return summary


def _save_job_call_data(extracted_summary: str) -> None:
    """
    Save job call data (original and extracted summary) to the data/job_calls directory.

    Args:
        extracted_summary (str): The extracted/summarized job call information
    """

    try:
        # Get the project root directory and job_calls subdirectory
        project_root = Path(__file__).parent.parent
        job_calls_dir = project_root / "data" / "job_calls"

        # Create job_calls directory if it doesn't exist
        job_calls_dir.mkdir(parents=True, exist_ok=True)

        # Create timestamped filename
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"job_call_extracted_{timestamp}.json"
        file_path = job_calls_dir / filename

        # Save to JSON file
        with open(file_path, 'w', encoding='utf-8') as output_file:
            json.dump(extracted_summary, output_file)

        logger.info("Saved job call data to: %s", file_path)

    except Exception as e:
        logger.error("Error saving job call data: %s", str(e))
        raise