Spaces:
Configuration error
Configuration error
Added GitHub repository selector
Browse files- configuration.py +14 -0
- functions/gradio.py +31 -31
- functions/job_call.py +1 -0
- functions/writer_agent.py +131 -48
- tests/test_resumate.py +20 -2
configuration.py
CHANGED
@@ -20,6 +20,10 @@ SUMMARIZER_MODEL = "claude-3-5-haiku-20241022"
|
|
20 |
# - Claude-3-5-Haiku is the best model for this task so far.
|
21 |
AGENT_MODEL = "claude-3-5-haiku-20241022"
|
22 |
|
|
|
|
|
|
|
|
|
23 |
AGENT_INSTRUCTIONS = """
|
24 |
You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
|
25 |
You will receive two pieces of JSON structured context: a job call and a LinkedIn profile.
|
@@ -69,4 +73,14 @@ You are a career support AI agent tasked with extracting key information from a
|
|
69 |
|
70 |
Format your response as a JSON object with requested fields. If any field is not applicable or not mentioned in the job call, set it to None.
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
"""
|
|
|
20 |
# - Claude-3-5-Haiku is the best model for this task so far.
|
21 |
AGENT_MODEL = "claude-3-5-haiku-20241022"
|
22 |
|
23 |
+
# Will be used for tasked related to writing the resume such as selecting
|
24 |
+
# the best GitHub repositories, writing the resume content, etc.
|
25 |
+
WRITER_MODEL = "claude-3-5-haiku-20241022"
|
26 |
+
|
27 |
AGENT_INSTRUCTIONS = """
|
28 |
You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
|
29 |
You will receive two pieces of JSON structured context: a job call and a LinkedIn profile.
|
|
|
73 |
|
74 |
Format your response as a JSON object with requested fields. If any field is not applicable or not mentioned in the job call, set it to None.
|
75 |
|
76 |
+
"""
|
77 |
+
|
78 |
+
REPO_SELECTION_PROMPT = """
|
79 |
+
You are an AI agent responsible for selecting the most relevant GitHub repositories from a user's profile based on a job call. Your task is to analyze the provided job call and choose repositories that best match the requirements and skills mentioned in the job description.
|
80 |
+
Prioritize more recent and active repositories that demonstrate the user's skills and experience related to the job call. Format your output as a Python list containing only the repository titles like this:
|
81 |
+
|
82 |
+
['first-repo', 'second-repo', 'third-repo']
|
83 |
+
|
84 |
+
Respond with only this list of repository titles, without any additional text or explanation.
|
85 |
+
|
86 |
"""
|
functions/gradio.py
CHANGED
@@ -10,7 +10,7 @@ from functions.helper import clean_text_whitespace
|
|
10 |
from functions.linkedin_resume import extract_text
|
11 |
from functions.github import get_github_repositories
|
12 |
from functions.job_call import summarize_job_call
|
13 |
-
|
14 |
|
15 |
# pylint: disable=broad-exception-caught
|
16 |
|
@@ -60,29 +60,29 @@ def process_inputs(
|
|
60 |
logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
|
61 |
result = ""
|
62 |
|
63 |
-
#
|
64 |
-
#
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
|
69 |
-
|
70 |
|
71 |
-
|
72 |
-
|
73 |
|
74 |
-
#
|
75 |
-
#
|
76 |
-
|
77 |
|
78 |
-
#
|
79 |
-
|
80 |
|
81 |
-
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
|
86 |
|
87 |
# ==================================================================== #
|
88 |
# Process job post text
|
@@ -97,6 +97,7 @@ def process_inputs(
|
|
97 |
else:
|
98 |
logger.error("Job post parsing failed")
|
99 |
|
|
|
100 |
# # Process user instructions
|
101 |
# if user_instructions and user_instructions.strip():
|
102 |
# result += "✅ Additional instructions provided\n"
|
@@ -108,20 +109,19 @@ def process_inputs(
|
|
108 |
|
109 |
# logger.info("Input processing completed")
|
110 |
|
111 |
-
# #
|
112 |
-
# if
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
# result += "Please ensure you upload a valid LinkedIn PDF export file.\n"
|
124 |
-
# logger.warning("Resume generation skipped - no valid LinkedIn data available")
|
125 |
|
126 |
return result
|
127 |
|
|
|
10 |
from functions.linkedin_resume import extract_text
|
11 |
from functions.github import get_github_repositories
|
12 |
from functions.job_call import summarize_job_call
|
13 |
+
from functions.writer_agent import write_resume
|
14 |
|
15 |
# pylint: disable=broad-exception-caught
|
16 |
|
|
|
60 |
logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
|
61 |
result = ""
|
62 |
|
63 |
+
# ==================================================================== #
|
64 |
+
# Extract and structure text from the linkedin profile PDF
|
65 |
+
logger.info("Extracting text from LinkedIn PDF: %s", linkedin_pdf_path)
|
66 |
+
linkedin_resume = extract_text(linkedin_pdf_path)
|
67 |
|
68 |
+
if linkedin_resume:
|
69 |
+
logger.info("LinkedIn PDF text extraction successful")
|
70 |
|
71 |
+
else:
|
72 |
+
logger.error("LinkedIn PDF text extraction failed")
|
73 |
|
74 |
+
# ==================================================================== #
|
75 |
+
# Process GitHub profile
|
76 |
+
logger.info("Processing GitHub profile: %s", github_username.strip())
|
77 |
|
78 |
+
# Retrieve repositories from GitHub
|
79 |
+
github_repositories = get_github_repositories(github_username.strip())
|
80 |
|
81 |
+
if github_repositories:
|
82 |
+
logger.info("GitHub repositories retrieved successfully")
|
83 |
|
84 |
+
else:
|
85 |
+
logger.error("GitHub repositories retrieval failed")
|
86 |
|
87 |
# ==================================================================== #
|
88 |
# Process job post text
|
|
|
97 |
else:
|
98 |
logger.error("Job post parsing failed")
|
99 |
|
100 |
+
# # ==================================================================== #
|
101 |
# # Process user instructions
|
102 |
# if user_instructions and user_instructions.strip():
|
103 |
# result += "✅ Additional instructions provided\n"
|
|
|
109 |
|
110 |
# logger.info("Input processing completed")
|
111 |
|
112 |
+
# ==================================================================== #
|
113 |
+
# Generate resume only if we have valid extraction result
|
114 |
+
if linkedin_resume and github_repositories and job_post:
|
115 |
+
logger.info("Generating resume with provided data")
|
116 |
+
|
117 |
+
try:
|
118 |
+
_ = write_resume(linkedin_resume, github_repositories, job_post)
|
119 |
|
120 |
+
except Exception as e:
|
121 |
+
result += f"\n❌ Resume generation failed: {str(e)}\n"
|
122 |
+
logger.error("Resume generation failed: %s", str(e))
|
123 |
+
else:
|
124 |
+
logger.warning("Resume generation skipped - content missing")
|
|
|
|
|
125 |
|
126 |
return result
|
127 |
|
functions/job_call.py
CHANGED
@@ -66,6 +66,7 @@ def summarize_job_call(job_call: str) -> str:
|
|
66 |
summary = response.choices[0].message.content
|
67 |
|
68 |
try:
|
|
|
69 |
summary = json.loads(summary)
|
70 |
print(summary.keys())
|
71 |
|
|
|
66 |
summary = response.choices[0].message.content
|
67 |
|
68 |
try:
|
69 |
+
print(summary)
|
70 |
summary = json.loads(summary)
|
71 |
print(summary.keys())
|
72 |
|
functions/writer_agent.py
CHANGED
@@ -1,84 +1,167 @@
|
|
1 |
'''Agent responsible for writing the resume based on user provided context'''
|
2 |
|
|
|
3 |
import json
|
4 |
import logging
|
5 |
import os
|
|
|
6 |
from smolagents import OpenAIServerModel, CodeAgent
|
7 |
-
from configuration import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# pylint: disable=broad-exception-caught
|
10 |
|
11 |
-
logging.basicConfig(level=logging.INFO)
|
12 |
-
logger = logging.getLogger(__name__)
|
13 |
|
14 |
-
def write_resume(
|
15 |
|
16 |
"""
|
17 |
Generates a resume based on the provided content.
|
18 |
|
19 |
Args:
|
20 |
-
|
21 |
-
|
22 |
-
job_summary (dict
|
23 |
|
24 |
Returns:
|
25 |
str: The generated resume.
|
26 |
"""
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
api_base=INFERENCE_URL,
|
33 |
-
api_key=os.environ.get("API_KEY"),
|
34 |
-
)
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
instructions = AGENT_INSTRUCTIONS
|
48 |
|
49 |
-
if job_summary is not None:
|
50 |
-
instructions += f"\n\nJob Requirements and Details:\n{json.dumps(job_summary)}"
|
51 |
-
logger.info("Added job summary to agent prompt")
|
52 |
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
|
|
|
|
|
|
63 |
|
64 |
-
|
65 |
-
|
66 |
|
67 |
-
|
|
|
|
|
68 |
|
69 |
-
|
70 |
-
|
|
|
|
|
71 |
|
72 |
-
|
73 |
-
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
f.write(submitted_answer)
|
78 |
|
79 |
-
|
|
|
80 |
|
81 |
-
|
82 |
-
|
|
|
83 |
|
84 |
-
return
|
|
|
1 |
'''Agent responsible for writing the resume based on user provided context'''
|
2 |
|
3 |
+
import ast
|
4 |
import json
|
5 |
import logging
|
6 |
import os
|
7 |
+
from openai import OpenAI
|
8 |
from smolagents import OpenAIServerModel, CodeAgent
|
9 |
+
from configuration import (
|
10 |
+
INFERENCE_URL,
|
11 |
+
AGENT_MODEL,
|
12 |
+
AGENT_INSTRUCTIONS,
|
13 |
+
WRITER_MODEL,
|
14 |
+
REPO_SELECTION_PROMPT
|
15 |
+
)
|
16 |
+
|
17 |
|
18 |
# pylint: disable=broad-exception-caught
|
19 |
|
|
|
|
|
20 |
|
21 |
+
def write_resume(linkedin_resume: dict, github_repositories: list, job_call: dict) -> str:
|
22 |
|
23 |
"""
|
24 |
Generates a resume based on the provided content.
|
25 |
|
26 |
Args:
|
27 |
+
linkedin_resume (dict): Resume content extracted from linkedin profile.
|
28 |
+
github_repositories (dict): Information about the applicants GitHub repositories.
|
29 |
+
job_summary (dict): Extracted/summarized job call information.
|
30 |
|
31 |
Returns:
|
32 |
str: The generated resume.
|
33 |
"""
|
34 |
|
35 |
+
logger = logging.getLogger(f'{__name__}.write_resume')
|
36 |
+
|
37 |
+
logger.info("Selecting relevant GitHub repositories based on job call")
|
38 |
+
project_repos = _choose_repositories(github_repositories, job_call)
|
39 |
+
print("project_repos:", project_repos)
|
40 |
+
|
41 |
+
# model = OpenAIServerModel(
|
42 |
+
# model_id=AGENT_MODEL,
|
43 |
+
# api_base=INFERENCE_URL,
|
44 |
+
# api_key=os.environ.get("API_KEY"),
|
45 |
+
# )
|
46 |
+
|
47 |
+
# agent = CodeAgent(
|
48 |
+
# model=model,
|
49 |
+
# tools=[],
|
50 |
+
# additional_authorized_imports=['json', 'pandas'],
|
51 |
+
# name="writer_agent",
|
52 |
+
# verbosity_level=1,
|
53 |
+
# max_steps=20,
|
54 |
+
# planning_interval=5
|
55 |
+
# )
|
56 |
+
|
57 |
+
# # Prepare instructions - combine default with user instructions and job summary
|
58 |
+
# instructions = AGENT_INSTRUCTIONS
|
59 |
+
|
60 |
+
# if job_summary is not None:
|
61 |
+
# instructions += f"\n\nJob Requirements and Details:\n{json.dumps(job_summary)}"
|
62 |
+
# logger.info("Added job summary to agent prompt")
|
63 |
+
|
64 |
+
# if user_instructions and user_instructions.strip():
|
65 |
+
|
66 |
+
# instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
|
67 |
+
# logger.info("Added user instructions to agent prompt")
|
68 |
+
|
69 |
+
# submitted_answer = agent.run(
|
70 |
+
# instructions + '\n' + json.dumps(content['structured_text']),
|
71 |
+
# )
|
72 |
+
|
73 |
+
# logger.info("submitted_answer: %s", submitted_answer)
|
74 |
+
|
75 |
+
# # Create data directory if it doesn't exist
|
76 |
+
# data_dir = 'data'
|
77 |
+
|
78 |
+
# if not os.path.exists(data_dir):
|
79 |
|
80 |
+
# os.makedirs(data_dir)
|
81 |
+
# logger.info("Created data directory: %s", data_dir)
|
|
|
|
|
|
|
82 |
|
83 |
+
# # Save the resume to resume.md in the data directory
|
84 |
+
# resume_file_path = os.path.join(data_dir, 'resume.md')
|
85 |
+
|
86 |
+
# try:
|
87 |
+
# with open(resume_file_path, 'w', encoding='utf-8') as f:
|
88 |
+
# f.write(submitted_answer)
|
89 |
+
|
90 |
+
# logger.info("Resume saved to: %s", resume_file_path)
|
91 |
+
|
92 |
+
# except Exception as e:
|
93 |
+
# logger.error("Failed to save resume to file: %s", e)
|
94 |
+
|
95 |
+
return project_repos
|
96 |
+
|
97 |
+
|
98 |
+
def _choose_repositories(github_repositories: list, job_call: dict) -> list:
|
99 |
+
"""
|
100 |
+
Choose relevant GitHub repositories based on the job call requirements.
|
101 |
+
|
102 |
+
Args:
|
103 |
+
github_repositories (dict): Information about the applicants GitHub repositories.
|
104 |
+
job_call (dict): Extracted/summarized job call information.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
list: Filtered list of relevant repositories.
|
108 |
+
"""
|
109 |
|
110 |
+
logger = logging.getLogger(f'{__name__}._choose_repositories')
|
|
|
111 |
|
|
|
|
|
|
|
112 |
|
113 |
+
# Create a new repo list without the full README text - this way we can save on input tokens
|
114 |
+
# by only sending the model the repo metadata, title, description, topics, etc.
|
115 |
+
repo_data = [
|
116 |
+
{k: v for k, v in d.items() if k != 'readme'}
|
117 |
+
for d in github_repositories
|
118 |
+
]
|
119 |
|
120 |
+
# Let the model select the most relevant repositories based on the job call
|
121 |
+
client = OpenAI(
|
122 |
+
base_url=INFERENCE_URL,
|
123 |
+
api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
|
124 |
+
)
|
125 |
|
126 |
+
messages = [
|
127 |
+
{
|
128 |
+
'role': 'system',
|
129 |
+
'content': f'{REPO_SELECTION_PROMPT}'
|
130 |
+
},
|
131 |
+
{
|
132 |
+
'role': 'user',
|
133 |
+
'content': f'JOB CALL\n{json.dumps(job_call)}\n\nREPOSITORIES\n{json.dumps(repo_data)}'
|
134 |
+
}
|
135 |
+
]
|
136 |
|
137 |
+
completion_args = {
|
138 |
+
'model': WRITER_MODEL,
|
139 |
+
'messages': messages,
|
140 |
+
}
|
141 |
|
142 |
+
try:
|
143 |
+
response = client.chat.completions.create(**completion_args)
|
144 |
|
145 |
+
except Exception as e:
|
146 |
+
response = None
|
147 |
+
logger.error('Error during job summarization API call: %s', e)
|
148 |
|
149 |
+
if response is not None:
|
150 |
+
response = response.choices[0].message.content
|
151 |
+
response = ast.literal_eval(response)
|
152 |
+
print(f'Selected repositories {type(response)}: {response}')
|
153 |
|
154 |
+
# Now use the repository selection response to filter the repositories
|
155 |
+
selected_repos = []
|
156 |
|
157 |
+
for repo in github_repositories:
|
158 |
+
print(repo['name'])
|
|
|
159 |
|
160 |
+
if repo['name'] in response:
|
161 |
+
selected_repos.append(repo)
|
162 |
|
163 |
+
# selected_repos = [
|
164 |
+
# repo for repo in github_repositories if repo['name'] in response
|
165 |
+
# ]
|
166 |
|
167 |
+
return selected_repos
|
tests/test_resumate.py
CHANGED
@@ -2,8 +2,10 @@
|
|
2 |
Test for resume generation functionality
|
3 |
"""
|
4 |
|
|
|
5 |
import unittest
|
6 |
from functions.gradio import process_inputs
|
|
|
7 |
|
8 |
class TestResumeGeneration(unittest.TestCase):
|
9 |
"""Test to run resume generation on pre-defined inputs."""
|
@@ -19,9 +21,18 @@ class TestResumeGeneration(unittest.TestCase):
|
|
19 |
|
20 |
self.user_instructions = ""
|
21 |
|
|
|
|
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
result = process_inputs(
|
27 |
linkedin_pdf_path=self.linkedin_pdf_path,
|
@@ -31,3 +42,10 @@ class TestResumeGeneration(unittest.TestCase):
|
|
31 |
)
|
32 |
|
33 |
print(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
Test for resume generation functionality
|
3 |
"""
|
4 |
|
5 |
+
import json
|
6 |
import unittest
|
7 |
from functions.gradio import process_inputs
|
8 |
+
from functions.writer_agent import write_resume
|
9 |
|
10 |
class TestResumeGeneration(unittest.TestCase):
|
11 |
"""Test to run resume generation on pre-defined inputs."""
|
|
|
21 |
|
22 |
self.user_instructions = ""
|
23 |
|
24 |
+
with open('tests/test_data/github_repos.json', 'r', encoding='utf-8') as f:
|
25 |
+
self.github_repositories = json.load(f)
|
26 |
|
27 |
+
with open('tests/test_data/job_call.json', 'r', encoding='utf-8') as f:
|
28 |
+
self.job_call = json.load(f)
|
29 |
+
|
30 |
+
with open('tests/test_data/linkedin_resume.json', 'r', encoding='utf-8') as f:
|
31 |
+
self.linkedin_resume = json.load(f)
|
32 |
+
|
33 |
+
|
34 |
+
def test_process_inputs(self):
|
35 |
+
"""Test input preprocessing for resume generation with pre-defined inputs."""
|
36 |
|
37 |
result = process_inputs(
|
38 |
linkedin_pdf_path=self.linkedin_pdf_path,
|
|
|
42 |
)
|
43 |
|
44 |
print(result)
|
45 |
+
|
46 |
+
def test_write_resume(self):
|
47 |
+
"""Test resume writing functionality with pre-defined inputs."""
|
48 |
+
|
49 |
+
result = write_resume(self.linkedin_resume, self.github_repositories, self.job_call)
|
50 |
+
|
51 |
+
print(result)
|