gperdrizet commited on
Commit
aa26954
·
verified ·
1 Parent(s): 9161371

Added GitHub repository selector

Browse files
configuration.py CHANGED
@@ -20,6 +20,10 @@ SUMMARIZER_MODEL = "claude-3-5-haiku-20241022"
20
  # - Claude-3-5-Haiku is the best model for this task so far.
21
  AGENT_MODEL = "claude-3-5-haiku-20241022"
22
 
 
 
 
 
23
  AGENT_INSTRUCTIONS = """
24
  You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
25
  You will receive two pieces of JSON structured context: a job call and a LinkedIn profile.
@@ -69,4 +73,14 @@ You are a career support AI agent tasked with extracting key information from a
69
 
70
  Format your response as a JSON object with requested fields. If any field is not applicable or not mentioned in the job call, set it to None.
71
 
 
 
 
 
 
 
 
 
 
 
72
  """
 
20
  # - Claude-3-5-Haiku is the best model for this task so far.
21
  AGENT_MODEL = "claude-3-5-haiku-20241022"
22
 
23
+ # Will be used for tasked related to writing the resume such as selecting
24
+ # the best GitHub repositories, writing the resume content, etc.
25
+ WRITER_MODEL = "claude-3-5-haiku-20241022"
26
+
27
  AGENT_INSTRUCTIONS = """
28
  You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
29
  You will receive two pieces of JSON structured context: a job call and a LinkedIn profile.
 
73
 
74
  Format your response as a JSON object with requested fields. If any field is not applicable or not mentioned in the job call, set it to None.
75
 
76
+ """
77
+
78
+ REPO_SELECTION_PROMPT = """
79
+ You are an AI agent responsible for selecting the most relevant GitHub repositories from a user's profile based on a job call. Your task is to analyze the provided job call and choose repositories that best match the requirements and skills mentioned in the job description.
80
+ Prioritize more recent and active repositories that demonstrate the user's skills and experience related to the job call. Format your output as a Python list containing only the repository titles like this:
81
+
82
+ ['first-repo', 'second-repo', 'third-repo']
83
+
84
+ Respond with only this list of repository titles, without any additional text or explanation.
85
+
86
  """
functions/gradio.py CHANGED
@@ -10,7 +10,7 @@ from functions.helper import clean_text_whitespace
10
  from functions.linkedin_resume import extract_text
11
  from functions.github import get_github_repositories
12
  from functions.job_call import summarize_job_call
13
- # from functions.writer_agent import write_resume
14
 
15
  # pylint: disable=broad-exception-caught
16
 
@@ -60,29 +60,29 @@ def process_inputs(
60
  logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
61
  result = ""
62
 
63
- # # ==================================================================== #
64
- # # Extract and structure text from the linkedin profile PDF
65
- # logger.info("Extracting text from LinkedIn PDF: %s", linkedin_pdf_path)
66
- # linkedin_resume = extract_text(linkedin_pdf_path)
67
 
68
- # if linkedin_resume:
69
- # logger.info("LinkedIn PDF text extraction successful")
70
 
71
- # else:
72
- # logger.error("LinkedIn PDF text extraction failed")
73
 
74
- # # ==================================================================== #
75
- # # Process GitHub profile
76
- # logger.info("Processing GitHub profile: %s", github_username.strip())
77
 
78
- # # Retrieve repositories from GitHub
79
- # github_repositories = get_github_repositories(github_username.strip())
80
 
81
- # if github_repositories:
82
- # logger.info("GitHub repositories retrieved successfully")
83
 
84
- # else:
85
- # logger.error("GitHub repositories retrieval failed")
86
 
87
  # ==================================================================== #
88
  # Process job post text
@@ -97,6 +97,7 @@ def process_inputs(
97
  else:
98
  logger.error("Job post parsing failed")
99
 
 
100
  # # Process user instructions
101
  # if user_instructions and user_instructions.strip():
102
  # result += "✅ Additional instructions provided\n"
@@ -108,20 +109,19 @@ def process_inputs(
108
 
109
  # logger.info("Input processing completed")
110
 
111
- # # Generate resume only if we have valid extraction result
112
- # if extraction_result and extraction_result.get("status") == "success":
113
- # try:
114
- # _ = write_resume(extraction_result, user_instructions, summary)
115
- # result += "\n✅ Resume generated successfully\n"
116
- # logger.info("Resume generation completed successfully")
 
117
 
118
- # except Exception as e:
119
- # result += f"\n❌ Resume generation failed: {str(e)}\n"
120
- # logger.error("Resume generation failed: %s", str(e))
121
- # else:
122
- # result += "\n❌ Cannot generate resume: No valid LinkedIn data extracted\n"
123
- # result += "Please ensure you upload a valid LinkedIn PDF export file.\n"
124
- # logger.warning("Resume generation skipped - no valid LinkedIn data available")
125
 
126
  return result
127
 
 
10
  from functions.linkedin_resume import extract_text
11
  from functions.github import get_github_repositories
12
  from functions.job_call import summarize_job_call
13
+ from functions.writer_agent import write_resume
14
 
15
  # pylint: disable=broad-exception-caught
16
 
 
60
  logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
61
  result = ""
62
 
63
+ # ==================================================================== #
64
+ # Extract and structure text from the linkedin profile PDF
65
+ logger.info("Extracting text from LinkedIn PDF: %s", linkedin_pdf_path)
66
+ linkedin_resume = extract_text(linkedin_pdf_path)
67
 
68
+ if linkedin_resume:
69
+ logger.info("LinkedIn PDF text extraction successful")
70
 
71
+ else:
72
+ logger.error("LinkedIn PDF text extraction failed")
73
 
74
+ # ==================================================================== #
75
+ # Process GitHub profile
76
+ logger.info("Processing GitHub profile: %s", github_username.strip())
77
 
78
+ # Retrieve repositories from GitHub
79
+ github_repositories = get_github_repositories(github_username.strip())
80
 
81
+ if github_repositories:
82
+ logger.info("GitHub repositories retrieved successfully")
83
 
84
+ else:
85
+ logger.error("GitHub repositories retrieval failed")
86
 
87
  # ==================================================================== #
88
  # Process job post text
 
97
  else:
98
  logger.error("Job post parsing failed")
99
 
100
+ # # ==================================================================== #
101
  # # Process user instructions
102
  # if user_instructions and user_instructions.strip():
103
  # result += "✅ Additional instructions provided\n"
 
109
 
110
  # logger.info("Input processing completed")
111
 
112
+ # ==================================================================== #
113
+ # Generate resume only if we have valid extraction result
114
+ if linkedin_resume and github_repositories and job_post:
115
+ logger.info("Generating resume with provided data")
116
+
117
+ try:
118
+ _ = write_resume(linkedin_resume, github_repositories, job_post)
119
 
120
+ except Exception as e:
121
+ result += f"\n❌ Resume generation failed: {str(e)}\n"
122
+ logger.error("Resume generation failed: %s", str(e))
123
+ else:
124
+ logger.warning("Resume generation skipped - content missing")
 
 
125
 
126
  return result
127
 
functions/job_call.py CHANGED
@@ -66,6 +66,7 @@ def summarize_job_call(job_call: str) -> str:
66
  summary = response.choices[0].message.content
67
 
68
  try:
 
69
  summary = json.loads(summary)
70
  print(summary.keys())
71
 
 
66
  summary = response.choices[0].message.content
67
 
68
  try:
69
+ print(summary)
70
  summary = json.loads(summary)
71
  print(summary.keys())
72
 
functions/writer_agent.py CHANGED
@@ -1,84 +1,167 @@
1
  '''Agent responsible for writing the resume based on user provided context'''
2
 
 
3
  import json
4
  import logging
5
  import os
 
6
  from smolagents import OpenAIServerModel, CodeAgent
7
- from configuration import INFERENCE_URL, AGENT_MODEL, AGENT_INSTRUCTIONS
 
 
 
 
 
 
 
8
 
9
  # pylint: disable=broad-exception-caught
10
 
11
- logging.basicConfig(level=logging.INFO)
12
- logger = logging.getLogger(__name__)
13
 
14
- def write_resume(content: str, user_instructions: str = None, job_summary: dict = None) -> str:
15
 
16
  """
17
  Generates a resume based on the provided content.
18
 
19
  Args:
20
- content (str): The content to be used for generating the resume.
21
- user_instructions (str, optional): Additional instructions from the user.
22
- job_summary (dict, optional): Extracted/summarized job call information.
23
 
24
  Returns:
25
  str: The generated resume.
26
  """
27
 
28
- if content['status'] == 'success':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- model = OpenAIServerModel(
31
- model_id=AGENT_MODEL,
32
- api_base=INFERENCE_URL,
33
- api_key=os.environ.get("API_KEY"),
34
- )
35
 
36
- agent = CodeAgent(
37
- model=model,
38
- tools=[],
39
- additional_authorized_imports=['json', 'pandas'],
40
- name="writer_agent",
41
- verbosity_level=1,
42
- max_steps=20,
43
- planning_interval=5
44
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Prepare instructions - combine default with user instructions and job summary
47
- instructions = AGENT_INSTRUCTIONS
48
 
49
- if job_summary is not None:
50
- instructions += f"\n\nJob Requirements and Details:\n{json.dumps(job_summary)}"
51
- logger.info("Added job summary to agent prompt")
52
 
53
- if user_instructions and user_instructions.strip():
 
 
 
 
 
54
 
55
- instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
56
- logger.info("Added user instructions to agent prompt")
 
 
 
57
 
58
- submitted_answer = agent.run(
59
- instructions + '\n' + json.dumps(content['structured_text']),
60
- )
 
 
 
 
 
 
 
61
 
62
- logger.info("submitted_answer: %s", submitted_answer)
 
 
 
63
 
64
- # Create data directory if it doesn't exist
65
- data_dir = 'data'
66
 
67
- if not os.path.exists(data_dir):
 
 
68
 
69
- os.makedirs(data_dir)
70
- logger.info("Created data directory: %s", data_dir)
 
 
71
 
72
- # Save the resume to resume.md in the data directory
73
- resume_file_path = os.path.join(data_dir, 'resume.md')
74
 
75
- try:
76
- with open(resume_file_path, 'w', encoding='utf-8') as f:
77
- f.write(submitted_answer)
78
 
79
- logger.info("Resume saved to: %s", resume_file_path)
 
80
 
81
- except Exception as e:
82
- logger.error("Failed to save resume to file: %s", e)
 
83
 
84
- return submitted_answer
 
1
  '''Agent responsible for writing the resume based on user provided context'''
2
 
3
+ import ast
4
  import json
5
  import logging
6
  import os
7
+ from openai import OpenAI
8
  from smolagents import OpenAIServerModel, CodeAgent
9
+ from configuration import (
10
+ INFERENCE_URL,
11
+ AGENT_MODEL,
12
+ AGENT_INSTRUCTIONS,
13
+ WRITER_MODEL,
14
+ REPO_SELECTION_PROMPT
15
+ )
16
+
17
 
18
  # pylint: disable=broad-exception-caught
19
 
 
 
20
 
21
+ def write_resume(linkedin_resume: dict, github_repositories: list, job_call: dict) -> str:
22
 
23
  """
24
  Generates a resume based on the provided content.
25
 
26
  Args:
27
+ linkedin_resume (dict): Resume content extracted from linkedin profile.
28
+ github_repositories (dict): Information about the applicants GitHub repositories.
29
+ job_summary (dict): Extracted/summarized job call information.
30
 
31
  Returns:
32
  str: The generated resume.
33
  """
34
 
35
+ logger = logging.getLogger(f'{__name__}.write_resume')
36
+
37
+ logger.info("Selecting relevant GitHub repositories based on job call")
38
+ project_repos = _choose_repositories(github_repositories, job_call)
39
+ print("project_repos:", project_repos)
40
+
41
+ # model = OpenAIServerModel(
42
+ # model_id=AGENT_MODEL,
43
+ # api_base=INFERENCE_URL,
44
+ # api_key=os.environ.get("API_KEY"),
45
+ # )
46
+
47
+ # agent = CodeAgent(
48
+ # model=model,
49
+ # tools=[],
50
+ # additional_authorized_imports=['json', 'pandas'],
51
+ # name="writer_agent",
52
+ # verbosity_level=1,
53
+ # max_steps=20,
54
+ # planning_interval=5
55
+ # )
56
+
57
+ # # Prepare instructions - combine default with user instructions and job summary
58
+ # instructions = AGENT_INSTRUCTIONS
59
+
60
+ # if job_summary is not None:
61
+ # instructions += f"\n\nJob Requirements and Details:\n{json.dumps(job_summary)}"
62
+ # logger.info("Added job summary to agent prompt")
63
+
64
+ # if user_instructions and user_instructions.strip():
65
+
66
+ # instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
67
+ # logger.info("Added user instructions to agent prompt")
68
+
69
+ # submitted_answer = agent.run(
70
+ # instructions + '\n' + json.dumps(content['structured_text']),
71
+ # )
72
+
73
+ # logger.info("submitted_answer: %s", submitted_answer)
74
+
75
+ # # Create data directory if it doesn't exist
76
+ # data_dir = 'data'
77
+
78
+ # if not os.path.exists(data_dir):
79
 
80
+ # os.makedirs(data_dir)
81
+ # logger.info("Created data directory: %s", data_dir)
 
 
 
82
 
83
+ # # Save the resume to resume.md in the data directory
84
+ # resume_file_path = os.path.join(data_dir, 'resume.md')
85
+
86
+ # try:
87
+ # with open(resume_file_path, 'w', encoding='utf-8') as f:
88
+ # f.write(submitted_answer)
89
+
90
+ # logger.info("Resume saved to: %s", resume_file_path)
91
+
92
+ # except Exception as e:
93
+ # logger.error("Failed to save resume to file: %s", e)
94
+
95
+ return project_repos
96
+
97
+
98
+ def _choose_repositories(github_repositories: list, job_call: dict) -> list:
99
+ """
100
+ Choose relevant GitHub repositories based on the job call requirements.
101
+
102
+ Args:
103
+ github_repositories (dict): Information about the applicants GitHub repositories.
104
+ job_call (dict): Extracted/summarized job call information.
105
+
106
+ Returns:
107
+ list: Filtered list of relevant repositories.
108
+ """
109
 
110
+ logger = logging.getLogger(f'{__name__}._choose_repositories')
 
111
 
 
 
 
112
 
113
+ # Create a new repo list without the full README text - this way we can save on input tokens
114
+ # by only sending the model the repo metadata, title, description, topics, etc.
115
+ repo_data = [
116
+ {k: v for k, v in d.items() if k != 'readme'}
117
+ for d in github_repositories
118
+ ]
119
 
120
+ # Let the model select the most relevant repositories based on the job call
121
+ client = OpenAI(
122
+ base_url=INFERENCE_URL,
123
+ api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
124
+ )
125
 
126
+ messages = [
127
+ {
128
+ 'role': 'system',
129
+ 'content': f'{REPO_SELECTION_PROMPT}'
130
+ },
131
+ {
132
+ 'role': 'user',
133
+ 'content': f'JOB CALL\n{json.dumps(job_call)}\n\nREPOSITORIES\n{json.dumps(repo_data)}'
134
+ }
135
+ ]
136
 
137
+ completion_args = {
138
+ 'model': WRITER_MODEL,
139
+ 'messages': messages,
140
+ }
141
 
142
+ try:
143
+ response = client.chat.completions.create(**completion_args)
144
 
145
+ except Exception as e:
146
+ response = None
147
+ logger.error('Error during job summarization API call: %s', e)
148
 
149
+ if response is not None:
150
+ response = response.choices[0].message.content
151
+ response = ast.literal_eval(response)
152
+ print(f'Selected repositories {type(response)}: {response}')
153
 
154
+ # Now use the repository selection response to filter the repositories
155
+ selected_repos = []
156
 
157
+ for repo in github_repositories:
158
+ print(repo['name'])
 
159
 
160
+ if repo['name'] in response:
161
+ selected_repos.append(repo)
162
 
163
+ # selected_repos = [
164
+ # repo for repo in github_repositories if repo['name'] in response
165
+ # ]
166
 
167
+ return selected_repos
tests/test_resumate.py CHANGED
@@ -2,8 +2,10 @@
2
  Test for resume generation functionality
3
  """
4
 
 
5
  import unittest
6
  from functions.gradio import process_inputs
 
7
 
8
  class TestResumeGeneration(unittest.TestCase):
9
  """Test to run resume generation on pre-defined inputs."""
@@ -19,9 +21,18 @@ class TestResumeGeneration(unittest.TestCase):
19
 
20
  self.user_instructions = ""
21
 
 
 
22
 
23
- def test_generate_resume(self):
24
- """Test resume generation with pre-defined inputs."""
 
 
 
 
 
 
 
25
 
26
  result = process_inputs(
27
  linkedin_pdf_path=self.linkedin_pdf_path,
@@ -31,3 +42,10 @@ class TestResumeGeneration(unittest.TestCase):
31
  )
32
 
33
  print(result)
 
 
 
 
 
 
 
 
2
  Test for resume generation functionality
3
  """
4
 
5
+ import json
6
  import unittest
7
  from functions.gradio import process_inputs
8
+ from functions.writer_agent import write_resume
9
 
10
  class TestResumeGeneration(unittest.TestCase):
11
  """Test to run resume generation on pre-defined inputs."""
 
21
 
22
  self.user_instructions = ""
23
 
24
+ with open('tests/test_data/github_repos.json', 'r', encoding='utf-8') as f:
25
+ self.github_repositories = json.load(f)
26
 
27
+ with open('tests/test_data/job_call.json', 'r', encoding='utf-8') as f:
28
+ self.job_call = json.load(f)
29
+
30
+ with open('tests/test_data/linkedin_resume.json', 'r', encoding='utf-8') as f:
31
+ self.linkedin_resume = json.load(f)
32
+
33
+
34
+ def test_process_inputs(self):
35
+ """Test input preprocessing for resume generation with pre-defined inputs."""
36
 
37
  result = process_inputs(
38
  linkedin_pdf_path=self.linkedin_pdf_path,
 
42
  )
43
 
44
  print(result)
45
+
46
+ def test_write_resume(self):
47
+ """Test resume writing functionality with pre-defined inputs."""
48
+
49
+ result = write_resume(self.linkedin_resume, self.github_repositories, self.job_call)
50
+
51
+ print(result)