gperdrizet commited on
Commit
b888581
·
verified ·
1 Parent(s): aa26954

Finished writer model

Browse files
Files changed (3) hide show
  1. configuration.py +51 -25
  2. functions/gradio.py +0 -1
  3. functions/writer_agent.py +98 -56
configuration.py CHANGED
@@ -18,43 +18,58 @@ SUMMARIZER_MODEL = "claude-3-5-haiku-20241022"
18
  # - Qwen2.5-Coder-14B-Instruct works OK, but is not great at markdown formatting
19
  # and tends to get some details wrong.
20
  # - Claude-3-5-Haiku is the best model for this task so far.
21
- AGENT_MODEL = "claude-3-5-haiku-20241022"
22
-
23
- # Will be used for tasked related to writing the resume such as selecting
24
- # the best GitHub repositories, writing the resume content, etc.
25
  WRITER_MODEL = "claude-3-5-haiku-20241022"
26
 
27
- AGENT_INSTRUCTIONS = """
28
  You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
29
- You will receive two pieces of JSON structured context: a job call and a LinkedIn profile.
30
-
31
- LINKEDIN PROFILE EXAMPLE
32
-
33
- "structured_text": {
34
- "sections": {
35
- "contact_info": "Contact details",
36
- "summary": "Personal summary statement",
37
- "skills": "Skills list",
38
- "experience": "List of work experiences",
39
- "education": "List of degrees",
40
- "other sections": "Any other relevant sections from LinkedIn profile"
41
- },
42
  }
43
 
44
- JOB CALL EXAMPLE
45
 
46
- 'Job title': 'Position title',
47
- 'Company description': 'Description of employer',
48
- 'Job description': 'Job description summary',
49
- 'Key skills': 'Required skills list',
50
- 'Experience level': 'Required experience',
51
- 'Education requirements': 'Required education level or degree'
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  Use this information to create a comprehensive resume that emphasizes the match between the provided linkedin profile and the job call. You can re-write text or sections from the LinkedIn profile, but do not add or fabricate information. Everything in the resume should be based on the provided context. The resume should include the following sections:
55
  - Contact Information
56
  - Summary
57
  - Skills
 
58
  - Work Experience
59
  - Education
60
 
@@ -83,4 +98,15 @@ Prioritize more recent and active repositories that demonstrate the user's skill
83
 
84
  Respond with only this list of repository titles, without any additional text or explanation.
85
 
 
 
 
 
 
 
 
 
 
 
 
86
  """
 
18
  # - Qwen2.5-Coder-14B-Instruct works OK, but is not great at markdown formatting
19
  # and tends to get some details wrong.
20
  # - Claude-3-5-Haiku is the best model for this task so far.
 
 
 
 
21
  WRITER_MODEL = "claude-3-5-haiku-20241022"
22
 
23
+ WRITER_INSTRUCTIONS = """
24
  You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
25
+ You will receive three pieces of JSON structured context: a job call, a LinkedIn resume and a list of relevant projects. Each of these will be formatted as follows:
26
+
27
+ JOB CALL FORMAT
28
+
29
+ {
30
+ "job_title": "Position",
31
+ "company_description": "Company or organization information",
32
+ "job_description": "Description of role and responsibilities",
33
+ "key_skills": "List of required sills",
34
+ "tools_technologies": "List of necessary tools and technologies",
35
+ "experience_level": "Prior experience necessary",
36
+ "education_requirements": "Desired education level"
 
37
  }
38
 
39
+ LINKEDIN RESUME FORMAT
40
 
41
+ {
42
+ "contact_info": "Applicant contact information",
43
+ "certifications": "Licenses and certifications",
44
+ "summary": "Applicant personal statement",
45
+ "experience": "Applicant professional experience",
46
+ "education": "Applicant education and degrees"
47
+ }
48
 
49
+ PROJECT LIST FORMAT
50
+
51
+ {
52
+ "projects": [
53
+ {
54
+ "title": "Repository 1 title",
55
+ "description": "Repository 1 project description",
56
+ "technologies": "List of tools and technologies",
57
+ "link": "URL"
58
+ },
59
+ {
60
+ "title": "Repository 2 title",
61
+ "description": "Repository 2 project description",
62
+ "technologies": "List of tools and technologies",
63
+ "link": "URL"
64
+ },
65
+ ]
66
+ }
67
 
68
  Use this information to create a comprehensive resume that emphasizes the match between the provided linkedin profile and the job call. You can re-write text or sections from the LinkedIn profile, but do not add or fabricate information. Everything in the resume should be based on the provided context. The resume should include the following sections:
69
  - Contact Information
70
  - Summary
71
  - Skills
72
+ - Projects
73
  - Work Experience
74
  - Education
75
 
 
98
 
99
  Respond with only this list of repository titles, without any additional text or explanation.
100
 
101
+ """
102
+
103
+ PROJECTS_SECTION_PROMPT = """
104
+ You are an AI agent responsible for writing the projects section of a resume based on selected GitHub repositories. Your task is to generate a well-structured and professional description of the projects that highlights the user's skills, contributions, and achievements.
105
+ You will receive a list of repository titles and a job call. Use this information to create a comprehensive projects section that emphasizes the match between the provided repositories and the job call. You can re-write text or sections from the repositories, but do not add or fabricate information.
106
+ Everything in the projects section should be based on the provided context. Format your response as a JSON object with the following fields:
107
+ - 'projects': A list of dictionaries, each containing:
108
+ - 'title': The title of the project
109
+ - 'description': A brief description of the project, including the user's role and contributions
110
+ - 'technologies': A list of technologies used in the project
111
+ - 'link': A link to the project repository
112
  """
functions/gradio.py CHANGED
@@ -118,7 +118,6 @@ def process_inputs(
118
  _ = write_resume(linkedin_resume, github_repositories, job_post)
119
 
120
  except Exception as e:
121
- result += f"\n❌ Resume generation failed: {str(e)}\n"
122
  logger.error("Resume generation failed: %s", str(e))
123
  else:
124
  logger.warning("Resume generation skipped - content missing")
 
118
  _ = write_resume(linkedin_resume, github_repositories, job_post)
119
 
120
  except Exception as e:
 
121
  logger.error("Resume generation failed: %s", str(e))
122
  else:
123
  logger.warning("Resume generation skipped - content missing")
functions/writer_agent.py CHANGED
@@ -5,13 +5,12 @@ import json
5
  import logging
6
  import os
7
  from openai import OpenAI
8
- from smolagents import OpenAIServerModel, CodeAgent
9
  from configuration import (
10
  INFERENCE_URL,
11
- AGENT_MODEL,
12
- AGENT_INSTRUCTIONS,
13
  WRITER_MODEL,
14
- REPO_SELECTION_PROMPT
 
15
  )
16
 
17
 
@@ -36,63 +35,67 @@ def write_resume(linkedin_resume: dict, github_repositories: list, job_call: dic
36
 
37
  logger.info("Selecting relevant GitHub repositories based on job call")
38
  project_repos = _choose_repositories(github_repositories, job_call)
39
- print("project_repos:", project_repos)
40
 
41
- # model = OpenAIServerModel(
42
- # model_id=AGENT_MODEL,
43
- # api_base=INFERENCE_URL,
44
- # api_key=os.environ.get("API_KEY"),
45
- # )
46
 
47
- # agent = CodeAgent(
48
- # model=model,
49
- # tools=[],
50
- # additional_authorized_imports=['json', 'pandas'],
51
- # name="writer_agent",
52
- # verbosity_level=1,
53
- # max_steps=20,
54
- # planning_interval=5
55
- # )
56
 
57
- # # Prepare instructions - combine default with user instructions and job summary
58
- # instructions = AGENT_INSTRUCTIONS
 
 
 
59
 
60
- # if job_summary is not None:
61
- # instructions += f"\n\nJob Requirements and Details:\n{json.dumps(job_summary)}"
62
- # logger.info("Added job summary to agent prompt")
63
 
64
- # if user_instructions and user_instructions.strip():
65
 
66
- # instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
67
- # logger.info("Added user instructions to agent prompt")
 
 
 
 
 
 
 
 
68
 
69
- # submitted_answer = agent.run(
70
- # instructions + '\n' + json.dumps(content['structured_text']),
71
- # )
 
72
 
73
- # logger.info("submitted_answer: %s", submitted_answer)
 
74
 
75
- # # Create data directory if it doesn't exist
76
- # data_dir = 'data'
 
77
 
78
- # if not os.path.exists(data_dir):
 
79
 
80
- # os.makedirs(data_dir)
81
- # logger.info("Created data directory: %s", data_dir)
82
 
83
- # # Save the resume to resume.md in the data directory
84
- # resume_file_path = os.path.join(data_dir, 'resume.md')
85
 
86
- # try:
87
- # with open(resume_file_path, 'w', encoding='utf-8') as f:
88
- # f.write(submitted_answer)
89
 
90
- # logger.info("Resume saved to: %s", resume_file_path)
 
91
 
92
- # except Exception as e:
93
- # logger.error("Failed to save resume to file: %s", e)
 
 
 
 
 
 
94
 
95
- return project_repos
96
 
97
 
98
  def _choose_repositories(github_repositories: list, job_call: dict) -> list:
@@ -109,7 +112,6 @@ def _choose_repositories(github_repositories: list, job_call: dict) -> list:
109
 
110
  logger = logging.getLogger(f'{__name__}._choose_repositories')
111
 
112
-
113
  # Create a new repo list without the full README text - this way we can save on input tokens
114
  # by only sending the model the repo metadata, title, description, topics, etc.
115
  repo_data = [
@@ -149,19 +151,59 @@ def _choose_repositories(github_repositories: list, job_call: dict) -> list:
149
  if response is not None:
150
  response = response.choices[0].message.content
151
  response = ast.literal_eval(response)
152
- print(f'Selected repositories {type(response)}: {response}')
153
 
154
  # Now use the repository selection response to filter the repositories
155
- selected_repos = []
 
 
 
 
156
 
157
- for repo in github_repositories:
158
- print(repo['name'])
159
 
160
- if repo['name'] in response:
161
- selected_repos.append(repo)
 
162
 
163
- # selected_repos = [
164
- # repo for repo in github_repositories if repo['name'] in response
165
- # ]
166
 
167
- return selected_repos
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import logging
6
  import os
7
  from openai import OpenAI
 
8
  from configuration import (
9
  INFERENCE_URL,
10
+ WRITER_INSTRUCTIONS,
 
11
  WRITER_MODEL,
12
+ REPO_SELECTION_PROMPT,
13
+ PROJECTS_SECTION_PROMPT
14
  )
15
 
16
 
 
35
 
36
  logger.info("Selecting relevant GitHub repositories based on job call")
37
  project_repos = _choose_repositories(github_repositories, job_call)
 
38
 
39
+ logger.info("Writing projects section of the resume")
40
+ projects = _write_projects_section(project_repos, job_call)
 
 
 
41
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # Let the model select the most relevant repositories based on the job call
44
+ client = OpenAI(
45
+ base_url=INFERENCE_URL,
46
+ api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
47
+ )
48
 
49
+ prompt = f'JOB CALL\n{job_call}\nLINKEDIN RESUME\n{linkedin_resume}\nPROJECTS\n{projects}'
 
 
50
 
 
51
 
52
+ messages = [
53
+ {
54
+ 'role': 'system',
55
+ 'content': WRITER_INSTRUCTIONS
56
+ },
57
+ {
58
+ 'role': 'user',
59
+ 'content': prompt
60
+ }
61
+ ]
62
 
63
+ completion_args = {
64
+ 'model': WRITER_MODEL,
65
+ 'messages': messages,
66
+ }
67
 
68
+ try:
69
+ response = client.chat.completions.create(**completion_args)
70
 
71
+ except Exception as e:
72
+ response = None
73
+ logger.error('Error during job summarization API call: %s', e)
74
 
75
+ if response is not None:
76
+ response = response.choices[0].message.content
77
 
78
+ # Create data directory if it doesn't exist
79
+ data_dir = 'data'
80
 
81
+ if not os.path.exists(data_dir):
 
82
 
83
+ os.makedirs(data_dir)
84
+ logger.info("Created data directory: %s", data_dir)
 
85
 
86
+ # Save the resume to resume.md in the data directory
87
+ resume_file_path = os.path.join(data_dir, 'resume.md')
88
 
89
+ try:
90
+ with open(resume_file_path, 'w', encoding='utf-8') as f:
91
+ f.write(response)
92
+
93
+ logger.info("Resume saved to: %s", resume_file_path)
94
+
95
+ except Exception as e:
96
+ logger.error("Failed to save resume to file: %s", e)
97
 
98
+ return response
99
 
100
 
101
  def _choose_repositories(github_repositories: list, job_call: dict) -> list:
 
112
 
113
  logger = logging.getLogger(f'{__name__}._choose_repositories')
114
 
 
115
  # Create a new repo list without the full README text - this way we can save on input tokens
116
  # by only sending the model the repo metadata, title, description, topics, etc.
117
  repo_data = [
 
151
  if response is not None:
152
  response = response.choices[0].message.content
153
  response = ast.literal_eval(response)
 
154
 
155
  # Now use the repository selection response to filter the repositories
156
+ selected_repos = [
157
+ repo for repo in github_repositories if repo['name'] in response
158
+ ]
159
+
160
+ return selected_repos
161
 
 
 
162
 
163
+ def _write_projects_section(project_repos: list, job_call: dict) -> str:
164
+ """
165
+ Write the projects section of the resume based on selected GitHub repositories.
166
 
167
+ Args:
168
+ project_repos (list): List of relevant GitHub repositories.
169
+ job_call (dict): Extracted/summarized job call information.
170
 
171
+ Returns:
172
+ str: Formatted projects section for the resume.
173
+ """
174
+
175
+ logger = logging.getLogger(f'{__name__}._write_projects_section')
176
+
177
+ # Let the model select the most relevant repositories based on the job call
178
+ client = OpenAI(
179
+ base_url=INFERENCE_URL,
180
+ api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
181
+ )
182
+
183
+ messages = [
184
+ {
185
+ 'role': 'system',
186
+ 'content': f'{PROJECTS_SECTION_PROMPT}'
187
+ },
188
+ {
189
+ 'role': 'user',
190
+ 'content': f'JOB CALL\n{json.dumps(job_call)}\n\nREPOSITORIES\n{json.dumps(project_repos)}'
191
+ }
192
+ ]
193
+
194
+ completion_args = {
195
+ 'model': WRITER_MODEL,
196
+ 'messages': messages,
197
+ }
198
+
199
+ try:
200
+ response = client.chat.completions.create(**completion_args)
201
+
202
+ except Exception as e:
203
+ response = None
204
+ logger.error('Error during job summarization API call: %s', e)
205
+
206
+ if response is not None:
207
+ response = response.choices[0].message.content
208
+
209
+ return response