Spaces:

gperdrizet
/

resumate

Configuration error

App Files Files Community

gperdrizet commited on Jul 12

Commit

bef6750

verified ·

1 Parent(s): 0899dd5

Cleaned up data directory structure for intermediate results

Browse files

Files changed (6) hide show

functions/github.py +66 -66
functions/gradio.py +6 -6
functions/job_call.py +16 -16
functions/linkedin_resume.py +18 -14
functions/writer_agent.py +7 -2
tests/test_gradio.py +4 -3

functions/github.py CHANGED Viewed

@@ -22,13 +22,13 @@ logger = logging.getLogger(__name__)
 def get_github_repositories(github_url: str) -> Dict:
     """
     Retrieve public repositories from a GitHub profile URL.
     Args:
         github_url (str): GitHub profile URL (e.g., https://github.com/username)
     Returns:
         dict: Dictionary containing status, repositories list, and metadata
     Example:
         {
             "status": "success",
@@ -93,10 +93,10 @@ def get_github_repositories(github_url: str) -> Dict:
         # Save results to JSON file
         try:
-            data_dir = Path(__file__).parent.parent / "data"
-            data_dir.mkdir(exist_ok=True)
-            output_file = data_dir / "github_repos.json"
             with open(output_file, 'w', encoding='utf-8') as f:
                 json.dump(result, f, indent=2, ensure_ascii=False)
@@ -118,10 +118,10 @@ def get_github_repositories(github_url: str) -> Dict:
 def _extract_github_username(github_url: str) -> Optional[str]:
     """
     Extract username from GitHub URL.
     Args:
         github_url (str): GitHub profile URL
     Returns:
         Optional[str]: Username if valid URL, None otherwise
     """
@@ -157,10 +157,10 @@ def _extract_github_username(github_url: str) -> Optional[str]:
 def _get_github_user_info(username: str) -> Dict:
     """
     Get basic user information from GitHub API.
     Args:
         username (str): GitHub username
     Returns:
         dict: API response with user information
     """
@@ -193,10 +193,10 @@ def _get_github_user_info(username: str) -> Dict:
 def _get_user_repositories(username: str) -> Dict:
     """
     Get user's public repositories from GitHub API.
     Args:
         username (str): GitHub username
     Returns:
         dict: API response with repositories
     """
@@ -254,10 +254,10 @@ def _get_user_repositories(username: str) -> Dict:
 def _process_repository_data(repos: List[Dict]) -> List[Dict]:
     """
     Process and clean repository data for easier consumption.
     Args:
         repos (List[Dict]): Raw repository data from GitHub API
     Returns:
         List[Dict]: Processed repository data
     """
@@ -295,10 +295,10 @@ def _process_repository_data(repos: List[Dict]) -> List[Dict]:
 def format_repositories_for_llm(github_result: Dict) -> str:
     """
     Format GitHub repositories data for LLM consumption.
     Args:
         github_result (dict): Result from get_github_repositories
     Returns:
         str: Formatted text ready for LLM context
     """
@@ -355,13 +355,13 @@ def format_repositories_for_llm(github_result: Dict) -> str:
 def get_repository_details(repo_url: str) -> Dict:
     """
     Get detailed information about a specific GitHub repository.
     Args:
         repo_url (str): GitHub repository URL (e.g., https://github.com/user/repo)
     Returns:
         dict: Dictionary containing comprehensive repository information
     Example:
         {
             "status": "success",
@@ -404,7 +404,7 @@ def get_repository_details(repo_url: str) -> Dict:
     try:
         # Extract owner and repo name from URL
         owner, repo_name = _extract_repo_info(repo_url)
         if not owner or not repo_name:
             return {"status": "error", "message": "Invalid GitHub repository URL format"}
@@ -419,7 +419,7 @@ def get_repository_details(repo_url: str) -> Dict:
         # Get additional repository details
         additional_data = {}
         # Get languages
         languages_result = _get_repository_languages(owner, repo_name)
         if languages_result["status"] == "success":
@@ -488,10 +488,10 @@ def get_repository_details(repo_url: str) -> Dict:
         # Save results to JSON file
         try:
-            data_dir = Path(__file__).parent.parent / "data"
-            data_dir.mkdir(exist_ok=True)
-            output_file = data_dir / f"repo_details_{owner}_{repo_name}.json"
             with open(output_file, 'w', encoding='utf-8') as f:
                 json.dump(result, f, indent=2, ensure_ascii=False)
@@ -512,40 +512,40 @@ def get_repository_details(repo_url: str) -> Dict:
 def _extract_repo_info(repo_url: str) -> tuple:
     """
     Extract owner and repository name from GitHub repository URL.
     Args:
         repo_url (str): GitHub repository URL
     Returns:
         tuple: (owner, repo_name) if valid URL, (None, None) otherwise
     """
     try:
         # Clean up the URL
         url = repo_url.strip().rstrip('/')
         # Handle various GitHub repository URL formats
         patterns = [
             r'github\.com/([^/]+)/([^/]+)/?$',  # https://github.com/owner/repo
             r'github\.com/([^/]+)/([^/]+)/.*',  # https://github.com/owner/repo/anything
         ]
         for pattern in patterns:
             match = re.search(pattern, url)
             if match:
                 owner = match.group(1)
                 repo_name = match.group(2)
                 # Remove .git suffix if present
                 if repo_name.endswith('.git'):
                     repo_name = repo_name[:-4]
                 # Validate format
-                if (re.match(r'^[a-zA-Z0-9\-_\.]+$', owner) and
                     re.match(r'^[a-zA-Z0-9\-_\.]+$', repo_name)):
                     return owner, repo_name
         return None, None
     except Exception as e:
         logger.warning("Error extracting repo info from URL %s: %s", repo_url, str(e))
         return None, None
@@ -559,18 +559,18 @@ def _get_repository_info(owner: str, repo_name: str) -> Dict:
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 404:
             return {"status": "error", "message": f"Repository '{owner}/{repo_name}' not found"}
         elif response.status_code == 403:
             return {"status": "error", "message": "GitHub API rate limit exceeded"}
         elif response.status_code != 200:
             return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
         return {"status": "success", "data": response.json()}
     except requests.RequestException as e:
         logger.error("Network error fetching repository info: %s", str(e))
         return {"status": "error", "message": f"Network error: {str(e)}"}
@@ -584,23 +584,23 @@ def _get_repository_languages(owner: str, repo_name: str) -> Dict:
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             # Convert byte counts to percentages
             languages = response.json()
             total_bytes = sum(languages.values())
             if total_bytes > 0:
                 language_percentages = {
                     lang: round((bytes_count / total_bytes) * 100, 1)
                     for lang, bytes_count in languages.items()
                 }
                 return {"status": "success", "data": language_percentages}
         return {"status": "error", "message": "Could not retrieve languages"}
     except Exception as e:
         logger.warning("Error fetching repository languages: %s", str(e))
         return {"status": "error", "message": str(e)}
@@ -614,21 +614,21 @@ def _get_repository_readme(owner: str, repo_name: str) -> Dict:
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             readme_data = response.json()
             # Get the raw content URL and fetch it
             download_url = readme_data.get("download_url")
             if download_url:
                 content_response = requests.get(download_url, timeout=10)
                 if content_response.status_code == 200:
                     return {"status": "success", "data": content_response.text}
         return {"status": "error", "message": "README not found"}
     except Exception as e:
         logger.warning("Error fetching README: %s", str(e))
         return {"status": "error", "message": str(e)}
@@ -642,12 +642,12 @@ def _get_repository_contents(owner: str, repo_name: str, path: str = "") -> Dict
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             contents = response.json()
             # Extract file and directory names
             file_structure = []
             for item in contents:
@@ -655,14 +655,14 @@ def _get_repository_contents(owner: str, repo_name: str, path: str = "") -> Dict
                 if item.get("type") == "dir":
                     name += "/"
                 file_structure.append(name)
             # Sort with directories first
             file_structure.sort(key=lambda x: (not x.endswith("/"), x.lower()))
             return {"status": "success", "data": file_structure}
         return {"status": "error", "message": "Could not retrieve file structure"}
     except Exception as e:
         logger.warning("Error fetching repository contents: %s", str(e))
         return {"status": "error", "message": str(e)}
@@ -676,12 +676,12 @@ def _get_repository_releases(owner: str, repo_name: str) -> Dict:
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             releases = response.json()
             # Extract key release information
             release_info = []
             for release in releases[:10]:  # Limit to 10 most recent
@@ -692,11 +692,11 @@ def _get_repository_releases(owner: str, repo_name: str) -> Dict:
                     "prerelease": release.get("prerelease", False),
                     "draft": release.get("draft", False)
                 })
             return {"status": "success", "data": release_info}
         return {"status": "error", "message": "Could not retrieve releases"}
     except Exception as e:
         logger.warning("Error fetching repository releases: %s", str(e))
         return {"status": "error", "message": str(e)}
@@ -710,12 +710,12 @@ def _get_repository_contributors(owner: str, repo_name: str) -> Dict:
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             contributors = response.json()
             # Extract key contributor information
             contributor_info = []
             for contributor in contributors[:20]:  # Limit to top 20 contributors
@@ -725,11 +725,11 @@ def _get_repository_contributors(owner: str, repo_name: str) -> Dict:
                     "html_url": contributor.get("html_url", ""),
                     "type": contributor.get("type", "")
                 })
             return {"status": "success", "data": contributor_info}
         return {"status": "error", "message": "Could not retrieve contributors"}
     except Exception as e:
         logger.warning("Error fetching repository contributors: %s", str(e))
         return {"status": "error", "message": str(e)}

 def get_github_repositories(github_url: str) -> Dict:
     """
     Retrieve public repositories from a GitHub profile URL.
     Args:
         github_url (str): GitHub profile URL (e.g., https://github.com/username)
     Returns:
         dict: Dictionary containing status, repositories list, and metadata
     Example:
         {
             "status": "success",
         # Save results to JSON file
         try:
+            github_repos_dir = Path(__file__).parent.parent / "data" / "github_repos"
+            github_repos_dir.mkdir(parents=True, exist_ok=True)
+            output_file = github_repos_dir / "github_repos.json"
             with open(output_file, 'w', encoding='utf-8') as f:
                 json.dump(result, f, indent=2, ensure_ascii=False)
 def _extract_github_username(github_url: str) -> Optional[str]:
     """
     Extract username from GitHub URL.
     Args:
         github_url (str): GitHub profile URL
     Returns:
         Optional[str]: Username if valid URL, None otherwise
     """
 def _get_github_user_info(username: str) -> Dict:
     """
     Get basic user information from GitHub API.
     Args:
         username (str): GitHub username
     Returns:
         dict: API response with user information
     """
 def _get_user_repositories(username: str) -> Dict:
     """
     Get user's public repositories from GitHub API.
     Args:
         username (str): GitHub username
     Returns:
         dict: API response with repositories
     """
 def _process_repository_data(repos: List[Dict]) -> List[Dict]:
     """
     Process and clean repository data for easier consumption.
     Args:
         repos (List[Dict]): Raw repository data from GitHub API
     Returns:
         List[Dict]: Processed repository data
     """
 def format_repositories_for_llm(github_result: Dict) -> str:
     """
     Format GitHub repositories data for LLM consumption.
     Args:
         github_result (dict): Result from get_github_repositories
     Returns:
         str: Formatted text ready for LLM context
     """
 def get_repository_details(repo_url: str) -> Dict:
     """
     Get detailed information about a specific GitHub repository.
     Args:
         repo_url (str): GitHub repository URL (e.g., https://github.com/user/repo)
     Returns:
         dict: Dictionary containing comprehensive repository information
     Example:
         {
             "status": "success",
     try:
         # Extract owner and repo name from URL
         owner, repo_name = _extract_repo_info(repo_url)
         if not owner or not repo_name:
             return {"status": "error", "message": "Invalid GitHub repository URL format"}
         # Get additional repository details
         additional_data = {}
         # Get languages
         languages_result = _get_repository_languages(owner, repo_name)
         if languages_result["status"] == "success":
         # Save results to JSON file
         try:
+            github_repos_dir = Path(__file__).parent.parent / "data" / "github_repos"
+            github_repos_dir.mkdir(parents=True, exist_ok=True)
+            output_file = github_repos_dir / f"repo_details_{owner}_{repo_name}.json"
             with open(output_file, 'w', encoding='utf-8') as f:
                 json.dump(result, f, indent=2, ensure_ascii=False)
 def _extract_repo_info(repo_url: str) -> tuple:
     """
     Extract owner and repository name from GitHub repository URL.
     Args:
         repo_url (str): GitHub repository URL
     Returns:
         tuple: (owner, repo_name) if valid URL, (None, None) otherwise
     """
     try:
         # Clean up the URL
         url = repo_url.strip().rstrip('/')
         # Handle various GitHub repository URL formats
         patterns = [
             r'github\.com/([^/]+)/([^/]+)/?$',  # https://github.com/owner/repo
             r'github\.com/([^/]+)/([^/]+)/.*',  # https://github.com/owner/repo/anything
         ]
         for pattern in patterns:
             match = re.search(pattern, url)
             if match:
                 owner = match.group(1)
                 repo_name = match.group(2)
                 # Remove .git suffix if present
                 if repo_name.endswith('.git'):
                     repo_name = repo_name[:-4]
                 # Validate format
+                if (re.match(r'^[a-zA-Z0-9\-_\.]+$', owner) and
                     re.match(r'^[a-zA-Z0-9\-_\.]+$', repo_name)):
                     return owner, repo_name
         return None, None
     except Exception as e:
         logger.warning("Error extracting repo info from URL %s: %s", repo_url, str(e))
         return None, None
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 404:
             return {"status": "error", "message": f"Repository '{owner}/{repo_name}' not found"}
         elif response.status_code == 403:
             return {"status": "error", "message": "GitHub API rate limit exceeded"}
         elif response.status_code != 200:
             return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
         return {"status": "success", "data": response.json()}
     except requests.RequestException as e:
         logger.error("Network error fetching repository info: %s", str(e))
         return {"status": "error", "message": f"Network error: {str(e)}"}
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             # Convert byte counts to percentages
             languages = response.json()
             total_bytes = sum(languages.values())
             if total_bytes > 0:
                 language_percentages = {
                     lang: round((bytes_count / total_bytes) * 100, 1)
                     for lang, bytes_count in languages.items()
                 }
                 return {"status": "success", "data": language_percentages}
         return {"status": "error", "message": "Could not retrieve languages"}
     except Exception as e:
         logger.warning("Error fetching repository languages: %s", str(e))
         return {"status": "error", "message": str(e)}
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             readme_data = response.json()
             # Get the raw content URL and fetch it
             download_url = readme_data.get("download_url")
             if download_url:
                 content_response = requests.get(download_url, timeout=10)
                 if content_response.status_code == 200:
                     return {"status": "success", "data": content_response.text}
         return {"status": "error", "message": "README not found"}
     except Exception as e:
         logger.warning("Error fetching README: %s", str(e))
         return {"status": "error", "message": str(e)}
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             contents = response.json()
             # Extract file and directory names
             file_structure = []
             for item in contents:
                 if item.get("type") == "dir":
                     name += "/"
                 file_structure.append(name)
             # Sort with directories first
             file_structure.sort(key=lambda x: (not x.endswith("/"), x.lower()))
             return {"status": "success", "data": file_structure}
         return {"status": "error", "message": "Could not retrieve file structure"}
     except Exception as e:
         logger.warning("Error fetching repository contents: %s", str(e))
         return {"status": "error", "message": str(e)}
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             releases = response.json()
             # Extract key release information
             release_info = []
             for release in releases[:10]:  # Limit to 10 most recent
                     "prerelease": release.get("prerelease", False),
                     "draft": release.get("draft", False)
                 })
             return {"status": "success", "data": release_info}
         return {"status": "error", "message": "Could not retrieve releases"}
     except Exception as e:
         logger.warning("Error fetching repository releases: %s", str(e))
         return {"status": "error", "message": str(e)}
             "Accept": "application/vnd.github.v3+json",
             "User-Agent": "Resumate-App/1.0"
         }
         response = requests.get(url, headers=headers, timeout=10)
         if response.status_code == 200:
             contributors = response.json()
             # Extract key contributor information
             contributor_info = []
             for contributor in contributors[:20]:  # Limit to top 20 contributors
                     "html_url": contributor.get("html_url", ""),
                     "type": contributor.get("type", "")
                 })
             return {"status": "success", "data": contributor_info}
         return {"status": "error", "message": "Could not retrieve contributors"}
     except Exception as e:
         logger.warning("Error fetching repository contributors: %s", str(e))
         return {"status": "error", "message": str(e)}

functions/gradio.py CHANGED Viewed

@@ -46,13 +46,13 @@ def process_with_default_option(
 def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
     """
     Process the input files and URLs from the Gradio interface.
     Args:
         linkedin_pdf: Uploaded LinkedIn resume export PDF file or mock file object with path
-        github_url (str): GitHub profile URL
         job_post_text (str): Job post text content
         user_instructions (str): Additional instructions from the user
     Returns:
         str: Formatted output with file and URL information
     """
@@ -176,7 +176,7 @@ def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
     # Generate resume only if we have valid extraction result
     if extraction_result and extraction_result.get("status") == "success":
         try:
-            _ = write_resume(extraction_result, user_instructions)
             result += "\n✅ Resume generated successfully\n"
             logger.info("Resume generation completed successfully")
@@ -194,10 +194,10 @@ def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
 def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
     """
     Get structured data from all inputs for further processing.
     Args:
         linkedin_pdf: Uploaded LinkedIn resume export PDF file
-        github_url (str): GitHub profile URL
         job_post_text (str): Job post text content
         instructions (str): Additional instructions from the user

 def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
     """
     Process the input files and URLs from the Gradio interface.
     Args:
         linkedin_pdf: Uploaded LinkedIn resume export PDF file or mock file object with path
+        github_url (str): GitHub profile URL
         job_post_text (str): Job post text content
         user_instructions (str): Additional instructions from the user
     Returns:
         str: Formatted output with file and URL information
     """
     # Generate resume only if we have valid extraction result
     if extraction_result and extraction_result.get("status") == "success":
         try:
+            _ = write_resume(extraction_result, user_instructions, summary)
             result += "\n✅ Resume generated successfully\n"
             logger.info("Resume generation completed successfully")
 def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
     """
     Get structured data from all inputs for further processing.
     Args:
         linkedin_pdf: Uploaded LinkedIn resume export PDF file
+        github_url (str): GitHub profile URL
         job_post_text (str): Job post text content
         instructions (str): Additional instructions from the user

functions/job_call.py CHANGED Viewed

@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
 def load_default_job_call() -> str:
     """
     Load default job call text from data/sample_job.txt if it exists.
     Returns:
         str: The default job call text, or empty string if file doesn't exist
     """
@@ -45,11 +45,11 @@ def load_default_job_call() -> str:
 def summarize_job_call(job_call: str = None) -> str:
     '''Extracts and summarizes key information from job call.
     Args:
         job_call (str, optional): Job call text to summarize. If None or empty,
                                   attempts to load default from data/sample_job.txt
     Returns:
         str: Summarized job call information, or None if no job call available
     '''
@@ -119,25 +119,25 @@ def summarize_job_call(job_call: str = None) -> str:
 def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
     """
-    Save job call data (original and extracted summary) to the data directory.
     Args:
         original_job_call (str): The original job call text
         extracted_summary (str): The extracted/summarized job call information
     """
     try:
-        # Get the project root directory and data directory
         project_root = Path(__file__).parent.parent
-        data_dir = project_root / "data"
-        # Create data directory if it doesn't exist
-        data_dir.mkdir(exist_ok=True)
         # Create timestamped filename
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"job_call_extracted_{timestamp}.json"
-        file_path = data_dir / filename
         # Prepare data to save
         job_call_data = {
             "timestamp": datetime.now().isoformat(),
@@ -149,13 +149,13 @@ def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
                 "extraction_successful": extracted_summary is not None
             }
         }
         # Save to JSON file
         with open(file_path, 'w', encoding='utf-8') as f:
             json.dump(job_call_data, f, indent=2, ensure_ascii=False)
         logger.info("Saved job call data to: %s", file_path)
     except Exception as e:
         logger.error("Error saving job call data: %s", str(e))
         raise

 def load_default_job_call() -> str:
     """
     Load default job call text from data/sample_job.txt if it exists.
     Returns:
         str: The default job call text, or empty string if file doesn't exist
     """
 def summarize_job_call(job_call: str = None) -> str:
     '''Extracts and summarizes key information from job call.
     Args:
         job_call (str, optional): Job call text to summarize. If None or empty,
                                   attempts to load default from data/sample_job.txt
     Returns:
         str: Summarized job call information, or None if no job call available
     '''
 def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
     """
+    Save job call data (original and extracted summary) to the data/job_calls directory.
     Args:
         original_job_call (str): The original job call text
         extracted_summary (str): The extracted/summarized job call information
     """
     try:
+        # Get the project root directory and job_calls subdirectory
         project_root = Path(__file__).parent.parent
+        job_calls_dir = project_root / "data" / "job_calls"
+        # Create job_calls directory if it doesn't exist
+        job_calls_dir.mkdir(parents=True, exist_ok=True)
         # Create timestamped filename
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"job_call_extracted_{timestamp}.json"
+        file_path = job_calls_dir / filename
         # Prepare data to save
         job_call_data = {
             "timestamp": datetime.now().isoformat(),
                 "extraction_successful": extracted_summary is not None
             }
         }
         # Save to JSON file
         with open(file_path, 'w', encoding='utf-8') as f:
             json.dump(job_call_data, f, indent=2, ensure_ascii=False)
         logger.info("Saved job call data to: %s", file_path)
     except Exception as e:
         logger.error("Error saving job call data: %s", str(e))
         raise

functions/linkedin_resume.py CHANGED Viewed

@@ -11,6 +11,7 @@ import io
 import os
 import json
 from pathlib import Path
 import PyPDF2
 # pylint: disable=broad-exception-caught
@@ -39,13 +40,13 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
     """
     Extract and structure text content from an uploaded LinkedIn resume export PDF file
     for optimal LLM processing.
     Args:
         pdf_file: The file path string to the uploaded PDF file
     Returns:
         dict: Dictionary containing extraction status, structured text content, and metadata
     Example:
         {
             "status": "success",
@@ -123,10 +124,13 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
         # Save results to JSON file
         try:
-            data_dir = Path(__file__).parent.parent / "data"
-            data_dir.mkdir(exist_ok=True)
-            output_file = data_dir / "linkedin_resume.json"
             with open(output_file, 'w', encoding='utf-8') as f:
                 json.dump(result, f, indent=2, ensure_ascii=False)
@@ -149,10 +153,10 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
 def _structure_resume_text(text: str) -> dict:
     """
     Structure resume text into logical sections for optimal LLM processing.
     Args:
         text (str): Raw extracted text from PDF
     Returns:
         dict: Structured text with sections, full text, and summary
     """
@@ -253,11 +257,11 @@ def _structure_resume_text(text: str) -> dict:
 def _format_for_llm(sections: dict) -> str:
     """
     Format the resume sections in an optimal way for LLM processing.
     Args:
         sections (dict): Structured sections
         full_text (str): Full cleaned text
     Returns:
         str: LLM-optimized formatted text
     """
@@ -297,10 +301,10 @@ def _format_for_llm(sections: dict) -> str:
 def _clean_extracted_text(text: str) -> str:
     """
     Clean and normalize extracted text from PDF for better LLM processing.
     Args:
         text (str): Raw extracted text
     Returns:
         str: Cleaned text optimized for LLM consumption
     """
@@ -346,10 +350,10 @@ def _clean_extracted_text(text: str) -> str:
 def get_llm_context_from_resume(extraction_result: dict) -> str:
     """
     Extract the best formatted text for LLM context from the extraction result.
     Args:
         extraction_result (dict): Result from extract_text_from_linkedin_pdf
     Returns:
         str: Formatted text ready for LLM context
     """

 import os
 import json
 from pathlib import Path
+from datetime import datetime
 import PyPDF2
 # pylint: disable=broad-exception-caught
     """
     Extract and structure text content from an uploaded LinkedIn resume export PDF file
     for optimal LLM processing.
     Args:
         pdf_file: The file path string to the uploaded PDF file
     Returns:
         dict: Dictionary containing extraction status, structured text content, and metadata
     Example:
         {
             "status": "success",
         # Save results to JSON file
         try:
+            linkedin_profile_dir = Path(__file__).parent.parent / "data" / "linkedin_profile"
+            linkedin_profile_dir.mkdir(parents=True, exist_ok=True)
+            # Create timestamped filename
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_file = linkedin_profile_dir / f"linkedin_resume_{timestamp}.json"
             with open(output_file, 'w', encoding='utf-8') as f:
                 json.dump(result, f, indent=2, ensure_ascii=False)
 def _structure_resume_text(text: str) -> dict:
     """
     Structure resume text into logical sections for optimal LLM processing.
     Args:
         text (str): Raw extracted text from PDF
     Returns:
         dict: Structured text with sections, full text, and summary
     """
 def _format_for_llm(sections: dict) -> str:
     """
     Format the resume sections in an optimal way for LLM processing.
     Args:
         sections (dict): Structured sections
         full_text (str): Full cleaned text
     Returns:
         str: LLM-optimized formatted text
     """
 def _clean_extracted_text(text: str) -> str:
     """
     Clean and normalize extracted text from PDF for better LLM processing.
     Args:
         text (str): Raw extracted text
     Returns:
         str: Cleaned text optimized for LLM consumption
     """
 def get_llm_context_from_resume(extraction_result: dict) -> str:
     """
     Extract the best formatted text for LLM context from the extraction result.
     Args:
         extraction_result (dict): Result from extract_text_from_linkedin_pdf
     Returns:
         str: Formatted text ready for LLM context
     """

functions/writer_agent.py CHANGED Viewed

@@ -11,7 +11,7 @@ from configuration import AGENT_MODEL, INSTRUCTIONS
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-def write_resume(content: str, user_instructions: str = None) -> str:
     """
     Generates a resume based on the provided content.
@@ -19,6 +19,7 @@ def write_resume(content: str, user_instructions: str = None) -> str:
     Args:
         content (str): The content to be used for generating the resume.
         user_instructions (str, optional): Additional instructions from the user.
     Returns:
         str: The generated resume.
@@ -36,9 +37,13 @@ def write_resume(content: str, user_instructions: str = None) -> str:
             planning_interval=5
         )
-        # Prepare instructions - combine default with user instructions
         instructions = INSTRUCTIONS
         if user_instructions and user_instructions.strip():
             instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+def write_resume(content: str, user_instructions: str = None, job_summary: str = None) -> str:
     """
     Generates a resume based on the provided content.
     Args:
         content (str): The content to be used for generating the resume.
         user_instructions (str, optional): Additional instructions from the user.
+        job_summary (str, optional): Extracted/summarized job call information.
     Returns:
         str: The generated resume.
             planning_interval=5
         )
+        # Prepare instructions - combine default with user instructions and job summary
         instructions = INSTRUCTIONS
+        if job_summary and job_summary.strip():
+            instructions += f"\n\nJob Requirements and Details:\n{job_summary.strip()}"
+            logger.info("Added job summary to agent prompt (%d characters)", len(job_summary))
         if user_instructions and user_instructions.strip():
             instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"

tests/test_gradio.py CHANGED Viewed

@@ -72,10 +72,11 @@ class TestProcessInputs(unittest.TestCase):
             self.assertIn("✅ Additional instructions provided", result)
             self.assertIn("✅ Resume generated successfully", result)
-            # Verify write_resume was called with user instructions
             mock_write_resume.assert_called_with(
                 mock_linkedin_result,
-                "Please emphasize technical skills"
             )
     @patch('functions.gradio.extract_text_from_linkedin_pdf')
@@ -391,7 +392,7 @@ class TestGetProcessedData(unittest.TestCase):
         result = gradio.get_processed_data(None, "", "   ", "")
         self.assertEqual(result["job_post"], "Default job content")
-        # Test with empty string - should load default
         result = gradio.get_processed_data(None, "", "", "")
         self.assertEqual(result["job_post"], "Default job content")

             self.assertIn("✅ Additional instructions provided", result)
             self.assertIn("✅ Resume generated successfully", result)
+            # Verify write_resume was called with user instructions and job summary
             mock_write_resume.assert_called_with(
                 mock_linkedin_result,
+                "Please emphasize technical skills",
+                "Job summary content\n"
             )
     @patch('functions.gradio.extract_text_from_linkedin_pdf')
         result = gradio.get_processed_data(None, "", "   ", "")
         self.assertEqual(result["job_post"], "Default job content")
+        # Test with empty string - should load default
         result = gradio.get_processed_data(None, "", "", "")
         self.assertEqual(result["job_post"], "Default job content")