gperdrizet commited on
Commit
bef6750
·
verified ·
1 Parent(s): 0899dd5

Cleaned up data directory structure for intermediate results

Browse files
functions/github.py CHANGED
@@ -22,13 +22,13 @@ logger = logging.getLogger(__name__)
22
  def get_github_repositories(github_url: str) -> Dict:
23
  """
24
  Retrieve public repositories from a GitHub profile URL.
25
-
26
  Args:
27
  github_url (str): GitHub profile URL (e.g., https://github.com/username)
28
-
29
  Returns:
30
  dict: Dictionary containing status, repositories list, and metadata
31
-
32
  Example:
33
  {
34
  "status": "success",
@@ -93,10 +93,10 @@ def get_github_repositories(github_url: str) -> Dict:
93
 
94
  # Save results to JSON file
95
  try:
96
- data_dir = Path(__file__).parent.parent / "data"
97
- data_dir.mkdir(exist_ok=True)
98
 
99
- output_file = data_dir / "github_repos.json"
100
  with open(output_file, 'w', encoding='utf-8') as f:
101
  json.dump(result, f, indent=2, ensure_ascii=False)
102
 
@@ -118,10 +118,10 @@ def get_github_repositories(github_url: str) -> Dict:
118
  def _extract_github_username(github_url: str) -> Optional[str]:
119
  """
120
  Extract username from GitHub URL.
121
-
122
  Args:
123
  github_url (str): GitHub profile URL
124
-
125
  Returns:
126
  Optional[str]: Username if valid URL, None otherwise
127
  """
@@ -157,10 +157,10 @@ def _extract_github_username(github_url: str) -> Optional[str]:
157
  def _get_github_user_info(username: str) -> Dict:
158
  """
159
  Get basic user information from GitHub API.
160
-
161
  Args:
162
  username (str): GitHub username
163
-
164
  Returns:
165
  dict: API response with user information
166
  """
@@ -193,10 +193,10 @@ def _get_github_user_info(username: str) -> Dict:
193
  def _get_user_repositories(username: str) -> Dict:
194
  """
195
  Get user's public repositories from GitHub API.
196
-
197
  Args:
198
  username (str): GitHub username
199
-
200
  Returns:
201
  dict: API response with repositories
202
  """
@@ -254,10 +254,10 @@ def _get_user_repositories(username: str) -> Dict:
254
  def _process_repository_data(repos: List[Dict]) -> List[Dict]:
255
  """
256
  Process and clean repository data for easier consumption.
257
-
258
  Args:
259
  repos (List[Dict]): Raw repository data from GitHub API
260
-
261
  Returns:
262
  List[Dict]: Processed repository data
263
  """
@@ -295,10 +295,10 @@ def _process_repository_data(repos: List[Dict]) -> List[Dict]:
295
  def format_repositories_for_llm(github_result: Dict) -> str:
296
  """
297
  Format GitHub repositories data for LLM consumption.
298
-
299
  Args:
300
  github_result (dict): Result from get_github_repositories
301
-
302
  Returns:
303
  str: Formatted text ready for LLM context
304
  """
@@ -355,13 +355,13 @@ def format_repositories_for_llm(github_result: Dict) -> str:
355
  def get_repository_details(repo_url: str) -> Dict:
356
  """
357
  Get detailed information about a specific GitHub repository.
358
-
359
  Args:
360
  repo_url (str): GitHub repository URL (e.g., https://github.com/user/repo)
361
-
362
  Returns:
363
  dict: Dictionary containing comprehensive repository information
364
-
365
  Example:
366
  {
367
  "status": "success",
@@ -404,7 +404,7 @@ def get_repository_details(repo_url: str) -> Dict:
404
  try:
405
  # Extract owner and repo name from URL
406
  owner, repo_name = _extract_repo_info(repo_url)
407
-
408
  if not owner or not repo_name:
409
  return {"status": "error", "message": "Invalid GitHub repository URL format"}
410
 
@@ -419,7 +419,7 @@ def get_repository_details(repo_url: str) -> Dict:
419
 
420
  # Get additional repository details
421
  additional_data = {}
422
-
423
  # Get languages
424
  languages_result = _get_repository_languages(owner, repo_name)
425
  if languages_result["status"] == "success":
@@ -488,10 +488,10 @@ def get_repository_details(repo_url: str) -> Dict:
488
 
489
  # Save results to JSON file
490
  try:
491
- data_dir = Path(__file__).parent.parent / "data"
492
- data_dir.mkdir(exist_ok=True)
493
 
494
- output_file = data_dir / f"repo_details_{owner}_{repo_name}.json"
495
  with open(output_file, 'w', encoding='utf-8') as f:
496
  json.dump(result, f, indent=2, ensure_ascii=False)
497
 
@@ -512,40 +512,40 @@ def get_repository_details(repo_url: str) -> Dict:
512
  def _extract_repo_info(repo_url: str) -> tuple:
513
  """
514
  Extract owner and repository name from GitHub repository URL.
515
-
516
  Args:
517
  repo_url (str): GitHub repository URL
518
-
519
  Returns:
520
  tuple: (owner, repo_name) if valid URL, (None, None) otherwise
521
  """
522
  try:
523
  # Clean up the URL
524
  url = repo_url.strip().rstrip('/')
525
-
526
  # Handle various GitHub repository URL formats
527
  patterns = [
528
  r'github\.com/([^/]+)/([^/]+)/?$', # https://github.com/owner/repo
529
  r'github\.com/([^/]+)/([^/]+)/.*', # https://github.com/owner/repo/anything
530
  ]
531
-
532
  for pattern in patterns:
533
  match = re.search(pattern, url)
534
  if match:
535
  owner = match.group(1)
536
  repo_name = match.group(2)
537
-
538
  # Remove .git suffix if present
539
  if repo_name.endswith('.git'):
540
  repo_name = repo_name[:-4]
541
-
542
  # Validate format
543
- if (re.match(r'^[a-zA-Z0-9\-_\.]+$', owner) and
544
  re.match(r'^[a-zA-Z0-9\-_\.]+$', repo_name)):
545
  return owner, repo_name
546
-
547
  return None, None
548
-
549
  except Exception as e:
550
  logger.warning("Error extracting repo info from URL %s: %s", repo_url, str(e))
551
  return None, None
@@ -559,18 +559,18 @@ def _get_repository_info(owner: str, repo_name: str) -> Dict:
559
  "Accept": "application/vnd.github.v3+json",
560
  "User-Agent": "Resumate-App/1.0"
561
  }
562
-
563
  response = requests.get(url, headers=headers, timeout=10)
564
-
565
  if response.status_code == 404:
566
  return {"status": "error", "message": f"Repository '{owner}/{repo_name}' not found"}
567
  elif response.status_code == 403:
568
  return {"status": "error", "message": "GitHub API rate limit exceeded"}
569
  elif response.status_code != 200:
570
  return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
571
-
572
  return {"status": "success", "data": response.json()}
573
-
574
  except requests.RequestException as e:
575
  logger.error("Network error fetching repository info: %s", str(e))
576
  return {"status": "error", "message": f"Network error: {str(e)}"}
@@ -584,23 +584,23 @@ def _get_repository_languages(owner: str, repo_name: str) -> Dict:
584
  "Accept": "application/vnd.github.v3+json",
585
  "User-Agent": "Resumate-App/1.0"
586
  }
587
-
588
  response = requests.get(url, headers=headers, timeout=10)
589
-
590
  if response.status_code == 200:
591
  # Convert byte counts to percentages
592
  languages = response.json()
593
  total_bytes = sum(languages.values())
594
-
595
  if total_bytes > 0:
596
  language_percentages = {
597
  lang: round((bytes_count / total_bytes) * 100, 1)
598
  for lang, bytes_count in languages.items()
599
  }
600
  return {"status": "success", "data": language_percentages}
601
-
602
  return {"status": "error", "message": "Could not retrieve languages"}
603
-
604
  except Exception as e:
605
  logger.warning("Error fetching repository languages: %s", str(e))
606
  return {"status": "error", "message": str(e)}
@@ -614,21 +614,21 @@ def _get_repository_readme(owner: str, repo_name: str) -> Dict:
614
  "Accept": "application/vnd.github.v3+json",
615
  "User-Agent": "Resumate-App/1.0"
616
  }
617
-
618
  response = requests.get(url, headers=headers, timeout=10)
619
-
620
  if response.status_code == 200:
621
  readme_data = response.json()
622
-
623
  # Get the raw content URL and fetch it
624
  download_url = readme_data.get("download_url")
625
  if download_url:
626
  content_response = requests.get(download_url, timeout=10)
627
  if content_response.status_code == 200:
628
  return {"status": "success", "data": content_response.text}
629
-
630
  return {"status": "error", "message": "README not found"}
631
-
632
  except Exception as e:
633
  logger.warning("Error fetching README: %s", str(e))
634
  return {"status": "error", "message": str(e)}
@@ -642,12 +642,12 @@ def _get_repository_contents(owner: str, repo_name: str, path: str = "") -> Dict
642
  "Accept": "application/vnd.github.v3+json",
643
  "User-Agent": "Resumate-App/1.0"
644
  }
645
-
646
  response = requests.get(url, headers=headers, timeout=10)
647
-
648
  if response.status_code == 200:
649
  contents = response.json()
650
-
651
  # Extract file and directory names
652
  file_structure = []
653
  for item in contents:
@@ -655,14 +655,14 @@ def _get_repository_contents(owner: str, repo_name: str, path: str = "") -> Dict
655
  if item.get("type") == "dir":
656
  name += "/"
657
  file_structure.append(name)
658
-
659
  # Sort with directories first
660
  file_structure.sort(key=lambda x: (not x.endswith("/"), x.lower()))
661
-
662
  return {"status": "success", "data": file_structure}
663
-
664
  return {"status": "error", "message": "Could not retrieve file structure"}
665
-
666
  except Exception as e:
667
  logger.warning("Error fetching repository contents: %s", str(e))
668
  return {"status": "error", "message": str(e)}
@@ -676,12 +676,12 @@ def _get_repository_releases(owner: str, repo_name: str) -> Dict:
676
  "Accept": "application/vnd.github.v3+json",
677
  "User-Agent": "Resumate-App/1.0"
678
  }
679
-
680
  response = requests.get(url, headers=headers, timeout=10)
681
-
682
  if response.status_code == 200:
683
  releases = response.json()
684
-
685
  # Extract key release information
686
  release_info = []
687
  for release in releases[:10]: # Limit to 10 most recent
@@ -692,11 +692,11 @@ def _get_repository_releases(owner: str, repo_name: str) -> Dict:
692
  "prerelease": release.get("prerelease", False),
693
  "draft": release.get("draft", False)
694
  })
695
-
696
  return {"status": "success", "data": release_info}
697
-
698
  return {"status": "error", "message": "Could not retrieve releases"}
699
-
700
  except Exception as e:
701
  logger.warning("Error fetching repository releases: %s", str(e))
702
  return {"status": "error", "message": str(e)}
@@ -710,12 +710,12 @@ def _get_repository_contributors(owner: str, repo_name: str) -> Dict:
710
  "Accept": "application/vnd.github.v3+json",
711
  "User-Agent": "Resumate-App/1.0"
712
  }
713
-
714
  response = requests.get(url, headers=headers, timeout=10)
715
-
716
  if response.status_code == 200:
717
  contributors = response.json()
718
-
719
  # Extract key contributor information
720
  contributor_info = []
721
  for contributor in contributors[:20]: # Limit to top 20 contributors
@@ -725,11 +725,11 @@ def _get_repository_contributors(owner: str, repo_name: str) -> Dict:
725
  "html_url": contributor.get("html_url", ""),
726
  "type": contributor.get("type", "")
727
  })
728
-
729
  return {"status": "success", "data": contributor_info}
730
-
731
  return {"status": "error", "message": "Could not retrieve contributors"}
732
-
733
  except Exception as e:
734
  logger.warning("Error fetching repository contributors: %s", str(e))
735
  return {"status": "error", "message": str(e)}
 
22
  def get_github_repositories(github_url: str) -> Dict:
23
  """
24
  Retrieve public repositories from a GitHub profile URL.
25
+
26
  Args:
27
  github_url (str): GitHub profile URL (e.g., https://github.com/username)
28
+
29
  Returns:
30
  dict: Dictionary containing status, repositories list, and metadata
31
+
32
  Example:
33
  {
34
  "status": "success",
 
93
 
94
  # Save results to JSON file
95
  try:
96
+ github_repos_dir = Path(__file__).parent.parent / "data" / "github_repos"
97
+ github_repos_dir.mkdir(parents=True, exist_ok=True)
98
 
99
+ output_file = github_repos_dir / "github_repos.json"
100
  with open(output_file, 'w', encoding='utf-8') as f:
101
  json.dump(result, f, indent=2, ensure_ascii=False)
102
 
 
118
  def _extract_github_username(github_url: str) -> Optional[str]:
119
  """
120
  Extract username from GitHub URL.
121
+
122
  Args:
123
  github_url (str): GitHub profile URL
124
+
125
  Returns:
126
  Optional[str]: Username if valid URL, None otherwise
127
  """
 
157
  def _get_github_user_info(username: str) -> Dict:
158
  """
159
  Get basic user information from GitHub API.
160
+
161
  Args:
162
  username (str): GitHub username
163
+
164
  Returns:
165
  dict: API response with user information
166
  """
 
193
  def _get_user_repositories(username: str) -> Dict:
194
  """
195
  Get user's public repositories from GitHub API.
196
+
197
  Args:
198
  username (str): GitHub username
199
+
200
  Returns:
201
  dict: API response with repositories
202
  """
 
254
  def _process_repository_data(repos: List[Dict]) -> List[Dict]:
255
  """
256
  Process and clean repository data for easier consumption.
257
+
258
  Args:
259
  repos (List[Dict]): Raw repository data from GitHub API
260
+
261
  Returns:
262
  List[Dict]: Processed repository data
263
  """
 
295
  def format_repositories_for_llm(github_result: Dict) -> str:
296
  """
297
  Format GitHub repositories data for LLM consumption.
298
+
299
  Args:
300
  github_result (dict): Result from get_github_repositories
301
+
302
  Returns:
303
  str: Formatted text ready for LLM context
304
  """
 
355
  def get_repository_details(repo_url: str) -> Dict:
356
  """
357
  Get detailed information about a specific GitHub repository.
358
+
359
  Args:
360
  repo_url (str): GitHub repository URL (e.g., https://github.com/user/repo)
361
+
362
  Returns:
363
  dict: Dictionary containing comprehensive repository information
364
+
365
  Example:
366
  {
367
  "status": "success",
 
404
  try:
405
  # Extract owner and repo name from URL
406
  owner, repo_name = _extract_repo_info(repo_url)
407
+
408
  if not owner or not repo_name:
409
  return {"status": "error", "message": "Invalid GitHub repository URL format"}
410
 
 
419
 
420
  # Get additional repository details
421
  additional_data = {}
422
+
423
  # Get languages
424
  languages_result = _get_repository_languages(owner, repo_name)
425
  if languages_result["status"] == "success":
 
488
 
489
  # Save results to JSON file
490
  try:
491
+ github_repos_dir = Path(__file__).parent.parent / "data" / "github_repos"
492
+ github_repos_dir.mkdir(parents=True, exist_ok=True)
493
 
494
+ output_file = github_repos_dir / f"repo_details_{owner}_{repo_name}.json"
495
  with open(output_file, 'w', encoding='utf-8') as f:
496
  json.dump(result, f, indent=2, ensure_ascii=False)
497
 
 
512
  def _extract_repo_info(repo_url: str) -> tuple:
513
  """
514
  Extract owner and repository name from GitHub repository URL.
515
+
516
  Args:
517
  repo_url (str): GitHub repository URL
518
+
519
  Returns:
520
  tuple: (owner, repo_name) if valid URL, (None, None) otherwise
521
  """
522
  try:
523
  # Clean up the URL
524
  url = repo_url.strip().rstrip('/')
525
+
526
  # Handle various GitHub repository URL formats
527
  patterns = [
528
  r'github\.com/([^/]+)/([^/]+)/?$', # https://github.com/owner/repo
529
  r'github\.com/([^/]+)/([^/]+)/.*', # https://github.com/owner/repo/anything
530
  ]
531
+
532
  for pattern in patterns:
533
  match = re.search(pattern, url)
534
  if match:
535
  owner = match.group(1)
536
  repo_name = match.group(2)
537
+
538
  # Remove .git suffix if present
539
  if repo_name.endswith('.git'):
540
  repo_name = repo_name[:-4]
541
+
542
  # Validate format
543
+ if (re.match(r'^[a-zA-Z0-9\-_\.]+$', owner) and
544
  re.match(r'^[a-zA-Z0-9\-_\.]+$', repo_name)):
545
  return owner, repo_name
546
+
547
  return None, None
548
+
549
  except Exception as e:
550
  logger.warning("Error extracting repo info from URL %s: %s", repo_url, str(e))
551
  return None, None
 
559
  "Accept": "application/vnd.github.v3+json",
560
  "User-Agent": "Resumate-App/1.0"
561
  }
562
+
563
  response = requests.get(url, headers=headers, timeout=10)
564
+
565
  if response.status_code == 404:
566
  return {"status": "error", "message": f"Repository '{owner}/{repo_name}' not found"}
567
  elif response.status_code == 403:
568
  return {"status": "error", "message": "GitHub API rate limit exceeded"}
569
  elif response.status_code != 200:
570
  return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
571
+
572
  return {"status": "success", "data": response.json()}
573
+
574
  except requests.RequestException as e:
575
  logger.error("Network error fetching repository info: %s", str(e))
576
  return {"status": "error", "message": f"Network error: {str(e)}"}
 
584
  "Accept": "application/vnd.github.v3+json",
585
  "User-Agent": "Resumate-App/1.0"
586
  }
587
+
588
  response = requests.get(url, headers=headers, timeout=10)
589
+
590
  if response.status_code == 200:
591
  # Convert byte counts to percentages
592
  languages = response.json()
593
  total_bytes = sum(languages.values())
594
+
595
  if total_bytes > 0:
596
  language_percentages = {
597
  lang: round((bytes_count / total_bytes) * 100, 1)
598
  for lang, bytes_count in languages.items()
599
  }
600
  return {"status": "success", "data": language_percentages}
601
+
602
  return {"status": "error", "message": "Could not retrieve languages"}
603
+
604
  except Exception as e:
605
  logger.warning("Error fetching repository languages: %s", str(e))
606
  return {"status": "error", "message": str(e)}
 
614
  "Accept": "application/vnd.github.v3+json",
615
  "User-Agent": "Resumate-App/1.0"
616
  }
617
+
618
  response = requests.get(url, headers=headers, timeout=10)
619
+
620
  if response.status_code == 200:
621
  readme_data = response.json()
622
+
623
  # Get the raw content URL and fetch it
624
  download_url = readme_data.get("download_url")
625
  if download_url:
626
  content_response = requests.get(download_url, timeout=10)
627
  if content_response.status_code == 200:
628
  return {"status": "success", "data": content_response.text}
629
+
630
  return {"status": "error", "message": "README not found"}
631
+
632
  except Exception as e:
633
  logger.warning("Error fetching README: %s", str(e))
634
  return {"status": "error", "message": str(e)}
 
642
  "Accept": "application/vnd.github.v3+json",
643
  "User-Agent": "Resumate-App/1.0"
644
  }
645
+
646
  response = requests.get(url, headers=headers, timeout=10)
647
+
648
  if response.status_code == 200:
649
  contents = response.json()
650
+
651
  # Extract file and directory names
652
  file_structure = []
653
  for item in contents:
 
655
  if item.get("type") == "dir":
656
  name += "/"
657
  file_structure.append(name)
658
+
659
  # Sort with directories first
660
  file_structure.sort(key=lambda x: (not x.endswith("/"), x.lower()))
661
+
662
  return {"status": "success", "data": file_structure}
663
+
664
  return {"status": "error", "message": "Could not retrieve file structure"}
665
+
666
  except Exception as e:
667
  logger.warning("Error fetching repository contents: %s", str(e))
668
  return {"status": "error", "message": str(e)}
 
676
  "Accept": "application/vnd.github.v3+json",
677
  "User-Agent": "Resumate-App/1.0"
678
  }
679
+
680
  response = requests.get(url, headers=headers, timeout=10)
681
+
682
  if response.status_code == 200:
683
  releases = response.json()
684
+
685
  # Extract key release information
686
  release_info = []
687
  for release in releases[:10]: # Limit to 10 most recent
 
692
  "prerelease": release.get("prerelease", False),
693
  "draft": release.get("draft", False)
694
  })
695
+
696
  return {"status": "success", "data": release_info}
697
+
698
  return {"status": "error", "message": "Could not retrieve releases"}
699
+
700
  except Exception as e:
701
  logger.warning("Error fetching repository releases: %s", str(e))
702
  return {"status": "error", "message": str(e)}
 
710
  "Accept": "application/vnd.github.v3+json",
711
  "User-Agent": "Resumate-App/1.0"
712
  }
713
+
714
  response = requests.get(url, headers=headers, timeout=10)
715
+
716
  if response.status_code == 200:
717
  contributors = response.json()
718
+
719
  # Extract key contributor information
720
  contributor_info = []
721
  for contributor in contributors[:20]: # Limit to top 20 contributors
 
725
  "html_url": contributor.get("html_url", ""),
726
  "type": contributor.get("type", "")
727
  })
728
+
729
  return {"status": "success", "data": contributor_info}
730
+
731
  return {"status": "error", "message": "Could not retrieve contributors"}
732
+
733
  except Exception as e:
734
  logger.warning("Error fetching repository contributors: %s", str(e))
735
  return {"status": "error", "message": str(e)}
functions/gradio.py CHANGED
@@ -46,13 +46,13 @@ def process_with_default_option(
46
  def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
47
  """
48
  Process the input files and URLs from the Gradio interface.
49
-
50
  Args:
51
  linkedin_pdf: Uploaded LinkedIn resume export PDF file or mock file object with path
52
- github_url (str): GitHub profile URL
53
  job_post_text (str): Job post text content
54
  user_instructions (str): Additional instructions from the user
55
-
56
  Returns:
57
  str: Formatted output with file and URL information
58
  """
@@ -176,7 +176,7 @@ def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
176
  # Generate resume only if we have valid extraction result
177
  if extraction_result and extraction_result.get("status") == "success":
178
  try:
179
- _ = write_resume(extraction_result, user_instructions)
180
  result += "\n✅ Resume generated successfully\n"
181
  logger.info("Resume generation completed successfully")
182
 
@@ -194,10 +194,10 @@ def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
194
  def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
195
  """
196
  Get structured data from all inputs for further processing.
197
-
198
  Args:
199
  linkedin_pdf: Uploaded LinkedIn resume export PDF file
200
- github_url (str): GitHub profile URL
201
  job_post_text (str): Job post text content
202
  instructions (str): Additional instructions from the user
203
 
 
46
  def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
47
  """
48
  Process the input files and URLs from the Gradio interface.
49
+
50
  Args:
51
  linkedin_pdf: Uploaded LinkedIn resume export PDF file or mock file object with path
52
+ github_url (str): GitHub profile URL
53
  job_post_text (str): Job post text content
54
  user_instructions (str): Additional instructions from the user
55
+
56
  Returns:
57
  str: Formatted output with file and URL information
58
  """
 
176
  # Generate resume only if we have valid extraction result
177
  if extraction_result and extraction_result.get("status") == "success":
178
  try:
179
+ _ = write_resume(extraction_result, user_instructions, summary)
180
  result += "\n✅ Resume generated successfully\n"
181
  logger.info("Resume generation completed successfully")
182
 
 
194
  def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
195
  """
196
  Get structured data from all inputs for further processing.
197
+
198
  Args:
199
  linkedin_pdf: Uploaded LinkedIn resume export PDF file
200
+ github_url (str): GitHub profile URL
201
  job_post_text (str): Job post text content
202
  instructions (str): Additional instructions from the user
203
 
functions/job_call.py CHANGED
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
18
  def load_default_job_call() -> str:
19
  """
20
  Load default job call text from data/sample_job.txt if it exists.
21
-
22
  Returns:
23
  str: The default job call text, or empty string if file doesn't exist
24
  """
@@ -45,11 +45,11 @@ def load_default_job_call() -> str:
45
 
46
  def summarize_job_call(job_call: str = None) -> str:
47
  '''Extracts and summarizes key information from job call.
48
-
49
  Args:
50
  job_call (str, optional): Job call text to summarize. If None or empty,
51
  attempts to load default from data/sample_job.txt
52
-
53
  Returns:
54
  str: Summarized job call information, or None if no job call available
55
  '''
@@ -119,25 +119,25 @@ def summarize_job_call(job_call: str = None) -> str:
119
 
120
  def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
121
  """
122
- Save job call data (original and extracted summary) to the data directory.
123
-
124
  Args:
125
  original_job_call (str): The original job call text
126
  extracted_summary (str): The extracted/summarized job call information
127
  """
128
  try:
129
- # Get the project root directory and data directory
130
  project_root = Path(__file__).parent.parent
131
- data_dir = project_root / "data"
132
-
133
- # Create data directory if it doesn't exist
134
- data_dir.mkdir(exist_ok=True)
135
-
136
  # Create timestamped filename
137
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
138
  filename = f"job_call_extracted_{timestamp}.json"
139
- file_path = data_dir / filename
140
-
141
  # Prepare data to save
142
  job_call_data = {
143
  "timestamp": datetime.now().isoformat(),
@@ -149,13 +149,13 @@ def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
149
  "extraction_successful": extracted_summary is not None
150
  }
151
  }
152
-
153
  # Save to JSON file
154
  with open(file_path, 'w', encoding='utf-8') as f:
155
  json.dump(job_call_data, f, indent=2, ensure_ascii=False)
156
-
157
  logger.info("Saved job call data to: %s", file_path)
158
-
159
  except Exception as e:
160
  logger.error("Error saving job call data: %s", str(e))
161
  raise
 
18
  def load_default_job_call() -> str:
19
  """
20
  Load default job call text from data/sample_job.txt if it exists.
21
+
22
  Returns:
23
  str: The default job call text, or empty string if file doesn't exist
24
  """
 
45
 
46
  def summarize_job_call(job_call: str = None) -> str:
47
  '''Extracts and summarizes key information from job call.
48
+
49
  Args:
50
  job_call (str, optional): Job call text to summarize. If None or empty,
51
  attempts to load default from data/sample_job.txt
52
+
53
  Returns:
54
  str: Summarized job call information, or None if no job call available
55
  '''
 
119
 
120
  def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
121
  """
122
+ Save job call data (original and extracted summary) to the data/job_calls directory.
123
+
124
  Args:
125
  original_job_call (str): The original job call text
126
  extracted_summary (str): The extracted/summarized job call information
127
  """
128
  try:
129
+ # Get the project root directory and job_calls subdirectory
130
  project_root = Path(__file__).parent.parent
131
+ job_calls_dir = project_root / "data" / "job_calls"
132
+
133
+ # Create job_calls directory if it doesn't exist
134
+ job_calls_dir.mkdir(parents=True, exist_ok=True)
135
+
136
  # Create timestamped filename
137
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
138
  filename = f"job_call_extracted_{timestamp}.json"
139
+ file_path = job_calls_dir / filename
140
+
141
  # Prepare data to save
142
  job_call_data = {
143
  "timestamp": datetime.now().isoformat(),
 
149
  "extraction_successful": extracted_summary is not None
150
  }
151
  }
152
+
153
  # Save to JSON file
154
  with open(file_path, 'w', encoding='utf-8') as f:
155
  json.dump(job_call_data, f, indent=2, ensure_ascii=False)
156
+
157
  logger.info("Saved job call data to: %s", file_path)
158
+
159
  except Exception as e:
160
  logger.error("Error saving job call data: %s", str(e))
161
  raise
functions/linkedin_resume.py CHANGED
@@ -11,6 +11,7 @@ import io
11
  import os
12
  import json
13
  from pathlib import Path
 
14
  import PyPDF2
15
 
16
  # pylint: disable=broad-exception-caught
@@ -39,13 +40,13 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
39
  """
40
  Extract and structure text content from an uploaded LinkedIn resume export PDF file
41
  for optimal LLM processing.
42
-
43
  Args:
44
  pdf_file: The file path string to the uploaded PDF file
45
-
46
  Returns:
47
  dict: Dictionary containing extraction status, structured text content, and metadata
48
-
49
  Example:
50
  {
51
  "status": "success",
@@ -123,10 +124,13 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
123
 
124
  # Save results to JSON file
125
  try:
126
- data_dir = Path(__file__).parent.parent / "data"
127
- data_dir.mkdir(exist_ok=True)
 
 
 
 
128
 
129
- output_file = data_dir / "linkedin_resume.json"
130
  with open(output_file, 'w', encoding='utf-8') as f:
131
  json.dump(result, f, indent=2, ensure_ascii=False)
132
 
@@ -149,10 +153,10 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
149
  def _structure_resume_text(text: str) -> dict:
150
  """
151
  Structure resume text into logical sections for optimal LLM processing.
152
-
153
  Args:
154
  text (str): Raw extracted text from PDF
155
-
156
  Returns:
157
  dict: Structured text with sections, full text, and summary
158
  """
@@ -253,11 +257,11 @@ def _structure_resume_text(text: str) -> dict:
253
  def _format_for_llm(sections: dict) -> str:
254
  """
255
  Format the resume sections in an optimal way for LLM processing.
256
-
257
  Args:
258
  sections (dict): Structured sections
259
  full_text (str): Full cleaned text
260
-
261
  Returns:
262
  str: LLM-optimized formatted text
263
  """
@@ -297,10 +301,10 @@ def _format_for_llm(sections: dict) -> str:
297
  def _clean_extracted_text(text: str) -> str:
298
  """
299
  Clean and normalize extracted text from PDF for better LLM processing.
300
-
301
  Args:
302
  text (str): Raw extracted text
303
-
304
  Returns:
305
  str: Cleaned text optimized for LLM consumption
306
  """
@@ -346,10 +350,10 @@ def _clean_extracted_text(text: str) -> str:
346
  def get_llm_context_from_resume(extraction_result: dict) -> str:
347
  """
348
  Extract the best formatted text for LLM context from the extraction result.
349
-
350
  Args:
351
  extraction_result (dict): Result from extract_text_from_linkedin_pdf
352
-
353
  Returns:
354
  str: Formatted text ready for LLM context
355
  """
 
11
  import os
12
  import json
13
  from pathlib import Path
14
+ from datetime import datetime
15
  import PyPDF2
16
 
17
  # pylint: disable=broad-exception-caught
 
40
  """
41
  Extract and structure text content from an uploaded LinkedIn resume export PDF file
42
  for optimal LLM processing.
43
+
44
  Args:
45
  pdf_file: The file path string to the uploaded PDF file
46
+
47
  Returns:
48
  dict: Dictionary containing extraction status, structured text content, and metadata
49
+
50
  Example:
51
  {
52
  "status": "success",
 
124
 
125
  # Save results to JSON file
126
  try:
127
+ linkedin_profile_dir = Path(__file__).parent.parent / "data" / "linkedin_profile"
128
+ linkedin_profile_dir.mkdir(parents=True, exist_ok=True)
129
+
130
+ # Create timestamped filename
131
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
132
+ output_file = linkedin_profile_dir / f"linkedin_resume_{timestamp}.json"
133
 
 
134
  with open(output_file, 'w', encoding='utf-8') as f:
135
  json.dump(result, f, indent=2, ensure_ascii=False)
136
 
 
153
  def _structure_resume_text(text: str) -> dict:
154
  """
155
  Structure resume text into logical sections for optimal LLM processing.
156
+
157
  Args:
158
  text (str): Raw extracted text from PDF
159
+
160
  Returns:
161
  dict: Structured text with sections, full text, and summary
162
  """
 
257
  def _format_for_llm(sections: dict) -> str:
258
  """
259
  Format the resume sections in an optimal way for LLM processing.
260
+
261
  Args:
262
  sections (dict): Structured sections
263
  full_text (str): Full cleaned text
264
+
265
  Returns:
266
  str: LLM-optimized formatted text
267
  """
 
301
  def _clean_extracted_text(text: str) -> str:
302
  """
303
  Clean and normalize extracted text from PDF for better LLM processing.
304
+
305
  Args:
306
  text (str): Raw extracted text
307
+
308
  Returns:
309
  str: Cleaned text optimized for LLM consumption
310
  """
 
350
  def get_llm_context_from_resume(extraction_result: dict) -> str:
351
  """
352
  Extract the best formatted text for LLM context from the extraction result.
353
+
354
  Args:
355
  extraction_result (dict): Result from extract_text_from_linkedin_pdf
356
+
357
  Returns:
358
  str: Formatted text ready for LLM context
359
  """
functions/writer_agent.py CHANGED
@@ -11,7 +11,7 @@ from configuration import AGENT_MODEL, INSTRUCTIONS
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
- def write_resume(content: str, user_instructions: str = None) -> str:
15
 
16
  """
17
  Generates a resume based on the provided content.
@@ -19,6 +19,7 @@ def write_resume(content: str, user_instructions: str = None) -> str:
19
  Args:
20
  content (str): The content to be used for generating the resume.
21
  user_instructions (str, optional): Additional instructions from the user.
 
22
 
23
  Returns:
24
  str: The generated resume.
@@ -36,9 +37,13 @@ def write_resume(content: str, user_instructions: str = None) -> str:
36
  planning_interval=5
37
  )
38
 
39
- # Prepare instructions - combine default with user instructions
40
  instructions = INSTRUCTIONS
41
 
 
 
 
 
42
  if user_instructions and user_instructions.strip():
43
 
44
  instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
 
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
+ def write_resume(content: str, user_instructions: str = None, job_summary: str = None) -> str:
15
 
16
  """
17
  Generates a resume based on the provided content.
 
19
  Args:
20
  content (str): The content to be used for generating the resume.
21
  user_instructions (str, optional): Additional instructions from the user.
22
+ job_summary (str, optional): Extracted/summarized job call information.
23
 
24
  Returns:
25
  str: The generated resume.
 
37
  planning_interval=5
38
  )
39
 
40
+ # Prepare instructions - combine default with user instructions and job summary
41
  instructions = INSTRUCTIONS
42
 
43
+ if job_summary and job_summary.strip():
44
+ instructions += f"\n\nJob Requirements and Details:\n{job_summary.strip()}"
45
+ logger.info("Added job summary to agent prompt (%d characters)", len(job_summary))
46
+
47
  if user_instructions and user_instructions.strip():
48
 
49
  instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
tests/test_gradio.py CHANGED
@@ -72,10 +72,11 @@ class TestProcessInputs(unittest.TestCase):
72
  self.assertIn("✅ Additional instructions provided", result)
73
  self.assertIn("✅ Resume generated successfully", result)
74
 
75
- # Verify write_resume was called with user instructions
76
  mock_write_resume.assert_called_with(
77
  mock_linkedin_result,
78
- "Please emphasize technical skills"
 
79
  )
80
 
81
  @patch('functions.gradio.extract_text_from_linkedin_pdf')
@@ -391,7 +392,7 @@ class TestGetProcessedData(unittest.TestCase):
391
  result = gradio.get_processed_data(None, "", " ", "")
392
  self.assertEqual(result["job_post"], "Default job content")
393
 
394
- # Test with empty string - should load default
395
  result = gradio.get_processed_data(None, "", "", "")
396
  self.assertEqual(result["job_post"], "Default job content")
397
 
 
72
  self.assertIn("✅ Additional instructions provided", result)
73
  self.assertIn("✅ Resume generated successfully", result)
74
 
75
+ # Verify write_resume was called with user instructions and job summary
76
  mock_write_resume.assert_called_with(
77
  mock_linkedin_result,
78
+ "Please emphasize technical skills",
79
+ "Job summary content\n"
80
  )
81
 
82
  @patch('functions.gradio.extract_text_from_linkedin_pdf')
 
392
  result = gradio.get_processed_data(None, "", " ", "")
393
  self.assertEqual(result["job_post"], "Default job content")
394
 
395
+ # Test with empty string - should load default
396
  result = gradio.get_processed_data(None, "", "", "")
397
  self.assertEqual(result["job_post"], "Default job content")
398