Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

gperdrizet commited on Jul 29

Commit

aa26954

verified ·

1 Parent(s): 9161371

Added GitHub repository selector

Browse files

Files changed (5) hide show

configuration.py +14 -0
functions/gradio.py +31 -31
functions/job_call.py +1 -0
functions/writer_agent.py +131 -48
tests/test_resumate.py +20 -2

configuration.py CHANGED Viewed

@@ -20,6 +20,10 @@ SUMMARIZER_MODEL = "claude-3-5-haiku-20241022"
 # - Claude-3-5-Haiku is the best model for this task so far.
 AGENT_MODEL = "claude-3-5-haiku-20241022"
 AGENT_INSTRUCTIONS = """
 You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
 You will receive two pieces of JSON structured context: a job call and a LinkedIn profile.
@@ -69,4 +73,14 @@ You are a career support AI agent tasked with extracting key information from a
 Format your response as a JSON object with requested fields. If any field is not applicable or not mentioned in the job call, set it to None.
 """

 # - Claude-3-5-Haiku is the best model for this task so far.
 AGENT_MODEL = "claude-3-5-haiku-20241022"
+# Will be used for tasked related to writing the resume such as selecting
+# the best GitHub repositories, writing the resume content, etc.
+WRITER_MODEL = "claude-3-5-haiku-20241022"
 AGENT_INSTRUCTIONS = """
 You are an AI agent responsible for writing a resume based on the provided context. Your task is to generate a well-structured and professional resume that highlights the user's skills, experiences, and achievements.
 You will receive two pieces of JSON structured context: a job call and a LinkedIn profile.
 Format your response as a JSON object with requested fields. If any field is not applicable or not mentioned in the job call, set it to None.
+"""
+REPO_SELECTION_PROMPT = """
+You are an AI agent responsible for selecting the most relevant GitHub repositories from a user's profile based on a job call. Your task is to analyze the provided job call and choose repositories that best match the requirements and skills mentioned in the job description.
+Prioritize more recent and active repositories that demonstrate the user's skills and experience related to the job call. Format your output as a Python list containing only the repository titles like this:
+['first-repo', 'second-repo', 'third-repo']
+Respond with only this list of repository titles, without any additional text or explanation.
 """

functions/gradio.py CHANGED Viewed

@@ -10,7 +10,7 @@ from functions.helper import clean_text_whitespace
 from functions.linkedin_resume import extract_text
 from functions.github import get_github_repositories
 from functions.job_call import summarize_job_call
-# from functions.writer_agent import write_resume
 # pylint: disable=broad-exception-caught
@@ -60,29 +60,29 @@ def process_inputs(
     logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
     result = ""
-    # # ==================================================================== #
-    # # Extract and structure text from the linkedin profile PDF
-    # logger.info("Extracting text from LinkedIn PDF: %s", linkedin_pdf_path)
-    # linkedin_resume = extract_text(linkedin_pdf_path)
-    # if linkedin_resume:
-    #     logger.info("LinkedIn PDF text extraction successful")
-    # else:
-    #     logger.error("LinkedIn PDF text extraction failed")
-    # # ==================================================================== #
-    # # Process GitHub profile
-    # logger.info("Processing GitHub profile: %s", github_username.strip())
-    # # Retrieve repositories from GitHub
-    # github_repositories = get_github_repositories(github_username.strip())
-    # if github_repositories:
-    #     logger.info("GitHub repositories retrieved successfully")
-    # else:
-    #     logger.error("GitHub repositories retrieval failed")
     # ==================================================================== #
     # Process job post text
@@ -97,6 +97,7 @@ def process_inputs(
     else:
         logger.error("Job post parsing failed")
     # # Process user instructions
     # if user_instructions and user_instructions.strip():
     #     result += "✅ Additional instructions provided\n"
@@ -108,20 +109,19 @@ def process_inputs(
     # logger.info("Input processing completed")
-    # # Generate resume only if we have valid extraction result
-    # if extraction_result and extraction_result.get("status") == "success":
-    #     try:
-    #         _ = write_resume(extraction_result, user_instructions, summary)
-    #         result += "\n✅ Resume generated successfully\n"
-    #         logger.info("Resume generation completed successfully")
-    #     except Exception as e:
-    #         result += f"\n❌ Resume generation failed: {str(e)}\n"
-    #         logger.error("Resume generation failed: %s", str(e))
-    # else:
-    #     result += "\n❌ Cannot generate resume: No valid LinkedIn data extracted\n"
-    #     result += "Please ensure you upload a valid LinkedIn PDF export file.\n"
-    #     logger.warning("Resume generation skipped - no valid LinkedIn data available")
     return result

 from functions.linkedin_resume import extract_text
 from functions.github import get_github_repositories
 from functions.job_call import summarize_job_call
+from functions.writer_agent import write_resume
 # pylint: disable=broad-exception-caught
     logger.info("User instructions: %s", user_instructions[:100] if user_instructions else "None")
     result = ""
+    # ==================================================================== #
+    # Extract and structure text from the linkedin profile PDF
+    logger.info("Extracting text from LinkedIn PDF: %s", linkedin_pdf_path)
+    linkedin_resume = extract_text(linkedin_pdf_path)
+    if linkedin_resume:
+        logger.info("LinkedIn PDF text extraction successful")
+    else:
+        logger.error("LinkedIn PDF text extraction failed")
+    # ==================================================================== #
+    # Process GitHub profile
+    logger.info("Processing GitHub profile: %s", github_username.strip())
+    # Retrieve repositories from GitHub
+    github_repositories = get_github_repositories(github_username.strip())
+    if github_repositories:
+        logger.info("GitHub repositories retrieved successfully")
+    else:
+        logger.error("GitHub repositories retrieval failed")
     # ==================================================================== #
     # Process job post text
     else:
         logger.error("Job post parsing failed")
+    # # ==================================================================== #
     # # Process user instructions
     # if user_instructions and user_instructions.strip():
     #     result += "✅ Additional instructions provided\n"
     # logger.info("Input processing completed")
+    # ==================================================================== #
+    # Generate resume only if we have valid extraction result
+    if linkedin_resume and github_repositories and job_post:
+        logger.info("Generating resume with provided data")
+        try:
+            _ = write_resume(linkedin_resume, github_repositories, job_post)
+        except Exception as e:
+            result += f"\n❌ Resume generation failed: {str(e)}\n"
+            logger.error("Resume generation failed: %s", str(e))
+    else:
+        logger.warning("Resume generation skipped - content missing")
     return result

functions/job_call.py CHANGED Viewed

@@ -66,6 +66,7 @@ def summarize_job_call(job_call: str) -> str:
         summary = response.choices[0].message.content
         try:
             summary = json.loads(summary)
             print(summary.keys())

         summary = response.choices[0].message.content
         try:
+            print(summary)
             summary = json.loads(summary)
             print(summary.keys())

functions/writer_agent.py CHANGED Viewed

@@ -1,84 +1,167 @@
 '''Agent responsible for writing the resume based on user provided context'''
 import json
 import logging
 import os
 from smolagents import OpenAIServerModel, CodeAgent
-from configuration import INFERENCE_URL, AGENT_MODEL, AGENT_INSTRUCTIONS
 # pylint: disable=broad-exception-caught
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-def write_resume(content: str, user_instructions: str = None, job_summary: dict = None) -> str:
     """
     Generates a resume based on the provided content.
     Args:
-        content (str): The content to be used for generating the resume.
-        user_instructions (str, optional): Additional instructions from the user.
-        job_summary (dict, optional): Extracted/summarized job call information.
     Returns:
         str: The generated resume.
     """
-    if content['status'] == 'success':
-        model = OpenAIServerModel(
-            model_id=AGENT_MODEL,
-            api_base=INFERENCE_URL,
-            api_key=os.environ.get("API_KEY"),
-        )
-        agent = CodeAgent(
-            model=model,
-            tools=[],
-            additional_authorized_imports=['json', 'pandas'],
-            name="writer_agent",
-            verbosity_level=1,
-            max_steps=20,
-            planning_interval=5
-        )
-        # Prepare instructions - combine default with user instructions and job summary
-        instructions = AGENT_INSTRUCTIONS
-        if job_summary is not None:
-            instructions += f"\n\nJob Requirements and Details:\n{json.dumps(job_summary)}"
-            logger.info("Added job summary to agent prompt")
-        if user_instructions and user_instructions.strip():
-            instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
-            logger.info("Added user instructions to agent prompt")
-        submitted_answer = agent.run(
-            instructions + '\n' + json.dumps(content['structured_text']),
-        )
-        logger.info("submitted_answer: %s", submitted_answer)
-        # Create data directory if it doesn't exist
-        data_dir = 'data'
-        if not os.path.exists(data_dir):
-            os.makedirs(data_dir)
-            logger.info("Created data directory: %s", data_dir)
-        # Save the resume to resume.md in the data directory
-        resume_file_path = os.path.join(data_dir, 'resume.md')
-        try:
-            with open(resume_file_path, 'w', encoding='utf-8') as f:
-                f.write(submitted_answer)
-            logger.info("Resume saved to: %s", resume_file_path)
-        except Exception as e:
-            logger.error("Failed to save resume to file: %s", e)
-    return submitted_answer

 '''Agent responsible for writing the resume based on user provided context'''
+import ast
 import json
 import logging
 import os
+from openai import OpenAI
 from smolagents import OpenAIServerModel, CodeAgent
+from configuration import (
+    INFERENCE_URL,
+    AGENT_MODEL,
+    AGENT_INSTRUCTIONS,
+    WRITER_MODEL,
+    REPO_SELECTION_PROMPT
+)
 # pylint: disable=broad-exception-caught
+def write_resume(linkedin_resume: dict, github_repositories: list, job_call: dict) -> str:
     """
     Generates a resume based on the provided content.
     Args:
+        linkedin_resume (dict): Resume content extracted from linkedin profile.
+        github_repositories (dict): Information about the applicants GitHub repositories.
+        job_summary (dict): Extracted/summarized job call information.
     Returns:
         str: The generated resume.
     """
+    logger = logging.getLogger(f'{__name__}.write_resume')
+    logger.info("Selecting relevant GitHub repositories based on job call")
+    project_repos = _choose_repositories(github_repositories, job_call)
+    print("project_repos:", project_repos)
+        # model = OpenAIServerModel(
+        #     model_id=AGENT_MODEL,
+        #     api_base=INFERENCE_URL,
+        #     api_key=os.environ.get("API_KEY"),
+        # )
+        # agent = CodeAgent(
+        #     model=model,
+        #     tools=[],
+        #     additional_authorized_imports=['json', 'pandas'],
+        #     name="writer_agent",
+        #     verbosity_level=1,
+        #     max_steps=20,
+        #     planning_interval=5
+        # )
+        # # Prepare instructions - combine default with user instructions and job summary
+        # instructions = AGENT_INSTRUCTIONS
+        # if job_summary is not None:
+        #     instructions += f"\n\nJob Requirements and Details:\n{json.dumps(job_summary)}"
+        #     logger.info("Added job summary to agent prompt")
+        # if user_instructions and user_instructions.strip():
+        #     instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
+        #     logger.info("Added user instructions to agent prompt")
+        # submitted_answer = agent.run(
+        #     instructions + '\n' + json.dumps(content['structured_text']),
+        # )
+        # logger.info("submitted_answer: %s", submitted_answer)
+        # # Create data directory if it doesn't exist
+        # data_dir = 'data'
+        # if not os.path.exists(data_dir):
+        #     os.makedirs(data_dir)
+        #     logger.info("Created data directory: %s", data_dir)
+        # # Save the resume to resume.md in the data directory
+        # resume_file_path = os.path.join(data_dir, 'resume.md')
+        # try:
+        #     with open(resume_file_path, 'w', encoding='utf-8') as f:
+        #         f.write(submitted_answer)
+        #     logger.info("Resume saved to: %s", resume_file_path)
+        # except Exception as e:
+        #     logger.error("Failed to save resume to file: %s", e)
+    return project_repos
+def _choose_repositories(github_repositories: list, job_call: dict) -> list:
+    """
+    Choose relevant GitHub repositories based on the job call requirements.
+    Args:
+        github_repositories (dict): Information about the applicants GitHub repositories.
+        job_call (dict): Extracted/summarized job call information.
+    Returns:
+        list: Filtered list of relevant repositories.
+    """
+    logger = logging.getLogger(f'{__name__}._choose_repositories')
+    # Create a new repo list without the full README text - this way we can save on input tokens
+    # by only sending the model the repo metadata, title, description, topics, etc.
+    repo_data = [
+        {k: v for k, v in d.items() if k != 'readme'}
+        for d in github_repositories
+    ]
+    # Let the model select the most relevant repositories based on the job call
+    client = OpenAI(
+        base_url=INFERENCE_URL,
+        api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
+    )
+    messages = [
+        {
+            'role': 'system',
+            'content': f'{REPO_SELECTION_PROMPT}'
+        },
+        {
+            'role': 'user',
+            'content': f'JOB CALL\n{json.dumps(job_call)}\n\nREPOSITORIES\n{json.dumps(repo_data)}'
+        }
+    ]
+    completion_args = {
+        'model': WRITER_MODEL,
+        'messages': messages,
+    }
+    try:
+        response = client.chat.completions.create(**completion_args)
+    except Exception as e:
+        response = None
+        logger.error('Error during job summarization API call: %s', e)
+    if response is not None:
+        response = response.choices[0].message.content
+        response = ast.literal_eval(response)
+        print(f'Selected repositories {type(response)}: {response}')
+    # Now use the repository selection response to filter the repositories
+    selected_repos = []
+    for repo in github_repositories:
+        print(repo['name'])
+        if repo['name'] in response:
+            selected_repos.append(repo)
+    # selected_repos = [
+    #     repo for repo in github_repositories if repo['name'] in response
+    # ]
+    return selected_repos

tests/test_resumate.py CHANGED Viewed

@@ -2,8 +2,10 @@
 Test for resume generation functionality
 """
 import unittest
 from functions.gradio import process_inputs
 class TestResumeGeneration(unittest.TestCase):
     """Test to run resume generation on pre-defined inputs."""
@@ -19,9 +21,18 @@ class TestResumeGeneration(unittest.TestCase):
         self.user_instructions = ""
-    def test_generate_resume(self):
-        """Test resume generation with pre-defined inputs."""
         result = process_inputs(
             linkedin_pdf_path=self.linkedin_pdf_path,
@@ -31,3 +42,10 @@ class TestResumeGeneration(unittest.TestCase):
         )
         print(result)

 Test for resume generation functionality
 """
+import json
 import unittest
 from functions.gradio import process_inputs
+from functions.writer_agent import write_resume
 class TestResumeGeneration(unittest.TestCase):
     """Test to run resume generation on pre-defined inputs."""
         self.user_instructions = ""
+        with open('tests/test_data/github_repos.json', 'r', encoding='utf-8') as f:
+            self.github_repositories = json.load(f)
+        with open('tests/test_data/job_call.json', 'r', encoding='utf-8') as f:
+            self.job_call = json.load(f)
+        with open('tests/test_data/linkedin_resume.json', 'r', encoding='utf-8') as f:
+            self.linkedin_resume = json.load(f)
+    def test_process_inputs(self):
+        """Test input preprocessing for resume generation with pre-defined inputs."""
         result = process_inputs(
             linkedin_pdf_path=self.linkedin_pdf_path,
         )
         print(result)
+    def test_write_resume(self):
+        """Test resume writing functionality with pre-defined inputs."""
+        result = write_resume(self.linkedin_resume, self.github_repositories, self.job_call)
+        print(result)