Spaces:
Configuration error
Configuration error
Cleaned up data directory structure for intermediate results
Browse files- functions/github.py +66 -66
- functions/gradio.py +6 -6
- functions/job_call.py +16 -16
- functions/linkedin_resume.py +18 -14
- functions/writer_agent.py +7 -2
- tests/test_gradio.py +4 -3
functions/github.py
CHANGED
@@ -22,13 +22,13 @@ logger = logging.getLogger(__name__)
|
|
22 |
def get_github_repositories(github_url: str) -> Dict:
|
23 |
"""
|
24 |
Retrieve public repositories from a GitHub profile URL.
|
25 |
-
|
26 |
Args:
|
27 |
github_url (str): GitHub profile URL (e.g., https://github.com/username)
|
28 |
-
|
29 |
Returns:
|
30 |
dict: Dictionary containing status, repositories list, and metadata
|
31 |
-
|
32 |
Example:
|
33 |
{
|
34 |
"status": "success",
|
@@ -93,10 +93,10 @@ def get_github_repositories(github_url: str) -> Dict:
|
|
93 |
|
94 |
# Save results to JSON file
|
95 |
try:
|
96 |
-
|
97 |
-
|
98 |
|
99 |
-
output_file =
|
100 |
with open(output_file, 'w', encoding='utf-8') as f:
|
101 |
json.dump(result, f, indent=2, ensure_ascii=False)
|
102 |
|
@@ -118,10 +118,10 @@ def get_github_repositories(github_url: str) -> Dict:
|
|
118 |
def _extract_github_username(github_url: str) -> Optional[str]:
|
119 |
"""
|
120 |
Extract username from GitHub URL.
|
121 |
-
|
122 |
Args:
|
123 |
github_url (str): GitHub profile URL
|
124 |
-
|
125 |
Returns:
|
126 |
Optional[str]: Username if valid URL, None otherwise
|
127 |
"""
|
@@ -157,10 +157,10 @@ def _extract_github_username(github_url: str) -> Optional[str]:
|
|
157 |
def _get_github_user_info(username: str) -> Dict:
|
158 |
"""
|
159 |
Get basic user information from GitHub API.
|
160 |
-
|
161 |
Args:
|
162 |
username (str): GitHub username
|
163 |
-
|
164 |
Returns:
|
165 |
dict: API response with user information
|
166 |
"""
|
@@ -193,10 +193,10 @@ def _get_github_user_info(username: str) -> Dict:
|
|
193 |
def _get_user_repositories(username: str) -> Dict:
|
194 |
"""
|
195 |
Get user's public repositories from GitHub API.
|
196 |
-
|
197 |
Args:
|
198 |
username (str): GitHub username
|
199 |
-
|
200 |
Returns:
|
201 |
dict: API response with repositories
|
202 |
"""
|
@@ -254,10 +254,10 @@ def _get_user_repositories(username: str) -> Dict:
|
|
254 |
def _process_repository_data(repos: List[Dict]) -> List[Dict]:
|
255 |
"""
|
256 |
Process and clean repository data for easier consumption.
|
257 |
-
|
258 |
Args:
|
259 |
repos (List[Dict]): Raw repository data from GitHub API
|
260 |
-
|
261 |
Returns:
|
262 |
List[Dict]: Processed repository data
|
263 |
"""
|
@@ -295,10 +295,10 @@ def _process_repository_data(repos: List[Dict]) -> List[Dict]:
|
|
295 |
def format_repositories_for_llm(github_result: Dict) -> str:
|
296 |
"""
|
297 |
Format GitHub repositories data for LLM consumption.
|
298 |
-
|
299 |
Args:
|
300 |
github_result (dict): Result from get_github_repositories
|
301 |
-
|
302 |
Returns:
|
303 |
str: Formatted text ready for LLM context
|
304 |
"""
|
@@ -355,13 +355,13 @@ def format_repositories_for_llm(github_result: Dict) -> str:
|
|
355 |
def get_repository_details(repo_url: str) -> Dict:
|
356 |
"""
|
357 |
Get detailed information about a specific GitHub repository.
|
358 |
-
|
359 |
Args:
|
360 |
repo_url (str): GitHub repository URL (e.g., https://github.com/user/repo)
|
361 |
-
|
362 |
Returns:
|
363 |
dict: Dictionary containing comprehensive repository information
|
364 |
-
|
365 |
Example:
|
366 |
{
|
367 |
"status": "success",
|
@@ -404,7 +404,7 @@ def get_repository_details(repo_url: str) -> Dict:
|
|
404 |
try:
|
405 |
# Extract owner and repo name from URL
|
406 |
owner, repo_name = _extract_repo_info(repo_url)
|
407 |
-
|
408 |
if not owner or not repo_name:
|
409 |
return {"status": "error", "message": "Invalid GitHub repository URL format"}
|
410 |
|
@@ -419,7 +419,7 @@ def get_repository_details(repo_url: str) -> Dict:
|
|
419 |
|
420 |
# Get additional repository details
|
421 |
additional_data = {}
|
422 |
-
|
423 |
# Get languages
|
424 |
languages_result = _get_repository_languages(owner, repo_name)
|
425 |
if languages_result["status"] == "success":
|
@@ -488,10 +488,10 @@ def get_repository_details(repo_url: str) -> Dict:
|
|
488 |
|
489 |
# Save results to JSON file
|
490 |
try:
|
491 |
-
|
492 |
-
|
493 |
|
494 |
-
output_file =
|
495 |
with open(output_file, 'w', encoding='utf-8') as f:
|
496 |
json.dump(result, f, indent=2, ensure_ascii=False)
|
497 |
|
@@ -512,40 +512,40 @@ def get_repository_details(repo_url: str) -> Dict:
|
|
512 |
def _extract_repo_info(repo_url: str) -> tuple:
|
513 |
"""
|
514 |
Extract owner and repository name from GitHub repository URL.
|
515 |
-
|
516 |
Args:
|
517 |
repo_url (str): GitHub repository URL
|
518 |
-
|
519 |
Returns:
|
520 |
tuple: (owner, repo_name) if valid URL, (None, None) otherwise
|
521 |
"""
|
522 |
try:
|
523 |
# Clean up the URL
|
524 |
url = repo_url.strip().rstrip('/')
|
525 |
-
|
526 |
# Handle various GitHub repository URL formats
|
527 |
patterns = [
|
528 |
r'github\.com/([^/]+)/([^/]+)/?$', # https://github.com/owner/repo
|
529 |
r'github\.com/([^/]+)/([^/]+)/.*', # https://github.com/owner/repo/anything
|
530 |
]
|
531 |
-
|
532 |
for pattern in patterns:
|
533 |
match = re.search(pattern, url)
|
534 |
if match:
|
535 |
owner = match.group(1)
|
536 |
repo_name = match.group(2)
|
537 |
-
|
538 |
# Remove .git suffix if present
|
539 |
if repo_name.endswith('.git'):
|
540 |
repo_name = repo_name[:-4]
|
541 |
-
|
542 |
# Validate format
|
543 |
-
if (re.match(r'^[a-zA-Z0-9\-_\.]+$', owner) and
|
544 |
re.match(r'^[a-zA-Z0-9\-_\.]+$', repo_name)):
|
545 |
return owner, repo_name
|
546 |
-
|
547 |
return None, None
|
548 |
-
|
549 |
except Exception as e:
|
550 |
logger.warning("Error extracting repo info from URL %s: %s", repo_url, str(e))
|
551 |
return None, None
|
@@ -559,18 +559,18 @@ def _get_repository_info(owner: str, repo_name: str) -> Dict:
|
|
559 |
"Accept": "application/vnd.github.v3+json",
|
560 |
"User-Agent": "Resumate-App/1.0"
|
561 |
}
|
562 |
-
|
563 |
response = requests.get(url, headers=headers, timeout=10)
|
564 |
-
|
565 |
if response.status_code == 404:
|
566 |
return {"status": "error", "message": f"Repository '{owner}/{repo_name}' not found"}
|
567 |
elif response.status_code == 403:
|
568 |
return {"status": "error", "message": "GitHub API rate limit exceeded"}
|
569 |
elif response.status_code != 200:
|
570 |
return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
|
571 |
-
|
572 |
return {"status": "success", "data": response.json()}
|
573 |
-
|
574 |
except requests.RequestException as e:
|
575 |
logger.error("Network error fetching repository info: %s", str(e))
|
576 |
return {"status": "error", "message": f"Network error: {str(e)}"}
|
@@ -584,23 +584,23 @@ def _get_repository_languages(owner: str, repo_name: str) -> Dict:
|
|
584 |
"Accept": "application/vnd.github.v3+json",
|
585 |
"User-Agent": "Resumate-App/1.0"
|
586 |
}
|
587 |
-
|
588 |
response = requests.get(url, headers=headers, timeout=10)
|
589 |
-
|
590 |
if response.status_code == 200:
|
591 |
# Convert byte counts to percentages
|
592 |
languages = response.json()
|
593 |
total_bytes = sum(languages.values())
|
594 |
-
|
595 |
if total_bytes > 0:
|
596 |
language_percentages = {
|
597 |
lang: round((bytes_count / total_bytes) * 100, 1)
|
598 |
for lang, bytes_count in languages.items()
|
599 |
}
|
600 |
return {"status": "success", "data": language_percentages}
|
601 |
-
|
602 |
return {"status": "error", "message": "Could not retrieve languages"}
|
603 |
-
|
604 |
except Exception as e:
|
605 |
logger.warning("Error fetching repository languages: %s", str(e))
|
606 |
return {"status": "error", "message": str(e)}
|
@@ -614,21 +614,21 @@ def _get_repository_readme(owner: str, repo_name: str) -> Dict:
|
|
614 |
"Accept": "application/vnd.github.v3+json",
|
615 |
"User-Agent": "Resumate-App/1.0"
|
616 |
}
|
617 |
-
|
618 |
response = requests.get(url, headers=headers, timeout=10)
|
619 |
-
|
620 |
if response.status_code == 200:
|
621 |
readme_data = response.json()
|
622 |
-
|
623 |
# Get the raw content URL and fetch it
|
624 |
download_url = readme_data.get("download_url")
|
625 |
if download_url:
|
626 |
content_response = requests.get(download_url, timeout=10)
|
627 |
if content_response.status_code == 200:
|
628 |
return {"status": "success", "data": content_response.text}
|
629 |
-
|
630 |
return {"status": "error", "message": "README not found"}
|
631 |
-
|
632 |
except Exception as e:
|
633 |
logger.warning("Error fetching README: %s", str(e))
|
634 |
return {"status": "error", "message": str(e)}
|
@@ -642,12 +642,12 @@ def _get_repository_contents(owner: str, repo_name: str, path: str = "") -> Dict
|
|
642 |
"Accept": "application/vnd.github.v3+json",
|
643 |
"User-Agent": "Resumate-App/1.0"
|
644 |
}
|
645 |
-
|
646 |
response = requests.get(url, headers=headers, timeout=10)
|
647 |
-
|
648 |
if response.status_code == 200:
|
649 |
contents = response.json()
|
650 |
-
|
651 |
# Extract file and directory names
|
652 |
file_structure = []
|
653 |
for item in contents:
|
@@ -655,14 +655,14 @@ def _get_repository_contents(owner: str, repo_name: str, path: str = "") -> Dict
|
|
655 |
if item.get("type") == "dir":
|
656 |
name += "/"
|
657 |
file_structure.append(name)
|
658 |
-
|
659 |
# Sort with directories first
|
660 |
file_structure.sort(key=lambda x: (not x.endswith("/"), x.lower()))
|
661 |
-
|
662 |
return {"status": "success", "data": file_structure}
|
663 |
-
|
664 |
return {"status": "error", "message": "Could not retrieve file structure"}
|
665 |
-
|
666 |
except Exception as e:
|
667 |
logger.warning("Error fetching repository contents: %s", str(e))
|
668 |
return {"status": "error", "message": str(e)}
|
@@ -676,12 +676,12 @@ def _get_repository_releases(owner: str, repo_name: str) -> Dict:
|
|
676 |
"Accept": "application/vnd.github.v3+json",
|
677 |
"User-Agent": "Resumate-App/1.0"
|
678 |
}
|
679 |
-
|
680 |
response = requests.get(url, headers=headers, timeout=10)
|
681 |
-
|
682 |
if response.status_code == 200:
|
683 |
releases = response.json()
|
684 |
-
|
685 |
# Extract key release information
|
686 |
release_info = []
|
687 |
for release in releases[:10]: # Limit to 10 most recent
|
@@ -692,11 +692,11 @@ def _get_repository_releases(owner: str, repo_name: str) -> Dict:
|
|
692 |
"prerelease": release.get("prerelease", False),
|
693 |
"draft": release.get("draft", False)
|
694 |
})
|
695 |
-
|
696 |
return {"status": "success", "data": release_info}
|
697 |
-
|
698 |
return {"status": "error", "message": "Could not retrieve releases"}
|
699 |
-
|
700 |
except Exception as e:
|
701 |
logger.warning("Error fetching repository releases: %s", str(e))
|
702 |
return {"status": "error", "message": str(e)}
|
@@ -710,12 +710,12 @@ def _get_repository_contributors(owner: str, repo_name: str) -> Dict:
|
|
710 |
"Accept": "application/vnd.github.v3+json",
|
711 |
"User-Agent": "Resumate-App/1.0"
|
712 |
}
|
713 |
-
|
714 |
response = requests.get(url, headers=headers, timeout=10)
|
715 |
-
|
716 |
if response.status_code == 200:
|
717 |
contributors = response.json()
|
718 |
-
|
719 |
# Extract key contributor information
|
720 |
contributor_info = []
|
721 |
for contributor in contributors[:20]: # Limit to top 20 contributors
|
@@ -725,11 +725,11 @@ def _get_repository_contributors(owner: str, repo_name: str) -> Dict:
|
|
725 |
"html_url": contributor.get("html_url", ""),
|
726 |
"type": contributor.get("type", "")
|
727 |
})
|
728 |
-
|
729 |
return {"status": "success", "data": contributor_info}
|
730 |
-
|
731 |
return {"status": "error", "message": "Could not retrieve contributors"}
|
732 |
-
|
733 |
except Exception as e:
|
734 |
logger.warning("Error fetching repository contributors: %s", str(e))
|
735 |
return {"status": "error", "message": str(e)}
|
|
|
22 |
def get_github_repositories(github_url: str) -> Dict:
|
23 |
"""
|
24 |
Retrieve public repositories from a GitHub profile URL.
|
25 |
+
|
26 |
Args:
|
27 |
github_url (str): GitHub profile URL (e.g., https://github.com/username)
|
28 |
+
|
29 |
Returns:
|
30 |
dict: Dictionary containing status, repositories list, and metadata
|
31 |
+
|
32 |
Example:
|
33 |
{
|
34 |
"status": "success",
|
|
|
93 |
|
94 |
# Save results to JSON file
|
95 |
try:
|
96 |
+
github_repos_dir = Path(__file__).parent.parent / "data" / "github_repos"
|
97 |
+
github_repos_dir.mkdir(parents=True, exist_ok=True)
|
98 |
|
99 |
+
output_file = github_repos_dir / "github_repos.json"
|
100 |
with open(output_file, 'w', encoding='utf-8') as f:
|
101 |
json.dump(result, f, indent=2, ensure_ascii=False)
|
102 |
|
|
|
118 |
def _extract_github_username(github_url: str) -> Optional[str]:
|
119 |
"""
|
120 |
Extract username from GitHub URL.
|
121 |
+
|
122 |
Args:
|
123 |
github_url (str): GitHub profile URL
|
124 |
+
|
125 |
Returns:
|
126 |
Optional[str]: Username if valid URL, None otherwise
|
127 |
"""
|
|
|
157 |
def _get_github_user_info(username: str) -> Dict:
|
158 |
"""
|
159 |
Get basic user information from GitHub API.
|
160 |
+
|
161 |
Args:
|
162 |
username (str): GitHub username
|
163 |
+
|
164 |
Returns:
|
165 |
dict: API response with user information
|
166 |
"""
|
|
|
193 |
def _get_user_repositories(username: str) -> Dict:
|
194 |
"""
|
195 |
Get user's public repositories from GitHub API.
|
196 |
+
|
197 |
Args:
|
198 |
username (str): GitHub username
|
199 |
+
|
200 |
Returns:
|
201 |
dict: API response with repositories
|
202 |
"""
|
|
|
254 |
def _process_repository_data(repos: List[Dict]) -> List[Dict]:
|
255 |
"""
|
256 |
Process and clean repository data for easier consumption.
|
257 |
+
|
258 |
Args:
|
259 |
repos (List[Dict]): Raw repository data from GitHub API
|
260 |
+
|
261 |
Returns:
|
262 |
List[Dict]: Processed repository data
|
263 |
"""
|
|
|
295 |
def format_repositories_for_llm(github_result: Dict) -> str:
|
296 |
"""
|
297 |
Format GitHub repositories data for LLM consumption.
|
298 |
+
|
299 |
Args:
|
300 |
github_result (dict): Result from get_github_repositories
|
301 |
+
|
302 |
Returns:
|
303 |
str: Formatted text ready for LLM context
|
304 |
"""
|
|
|
355 |
def get_repository_details(repo_url: str) -> Dict:
|
356 |
"""
|
357 |
Get detailed information about a specific GitHub repository.
|
358 |
+
|
359 |
Args:
|
360 |
repo_url (str): GitHub repository URL (e.g., https://github.com/user/repo)
|
361 |
+
|
362 |
Returns:
|
363 |
dict: Dictionary containing comprehensive repository information
|
364 |
+
|
365 |
Example:
|
366 |
{
|
367 |
"status": "success",
|
|
|
404 |
try:
|
405 |
# Extract owner and repo name from URL
|
406 |
owner, repo_name = _extract_repo_info(repo_url)
|
407 |
+
|
408 |
if not owner or not repo_name:
|
409 |
return {"status": "error", "message": "Invalid GitHub repository URL format"}
|
410 |
|
|
|
419 |
|
420 |
# Get additional repository details
|
421 |
additional_data = {}
|
422 |
+
|
423 |
# Get languages
|
424 |
languages_result = _get_repository_languages(owner, repo_name)
|
425 |
if languages_result["status"] == "success":
|
|
|
488 |
|
489 |
# Save results to JSON file
|
490 |
try:
|
491 |
+
github_repos_dir = Path(__file__).parent.parent / "data" / "github_repos"
|
492 |
+
github_repos_dir.mkdir(parents=True, exist_ok=True)
|
493 |
|
494 |
+
output_file = github_repos_dir / f"repo_details_{owner}_{repo_name}.json"
|
495 |
with open(output_file, 'w', encoding='utf-8') as f:
|
496 |
json.dump(result, f, indent=2, ensure_ascii=False)
|
497 |
|
|
|
512 |
def _extract_repo_info(repo_url: str) -> tuple:
|
513 |
"""
|
514 |
Extract owner and repository name from GitHub repository URL.
|
515 |
+
|
516 |
Args:
|
517 |
repo_url (str): GitHub repository URL
|
518 |
+
|
519 |
Returns:
|
520 |
tuple: (owner, repo_name) if valid URL, (None, None) otherwise
|
521 |
"""
|
522 |
try:
|
523 |
# Clean up the URL
|
524 |
url = repo_url.strip().rstrip('/')
|
525 |
+
|
526 |
# Handle various GitHub repository URL formats
|
527 |
patterns = [
|
528 |
r'github\.com/([^/]+)/([^/]+)/?$', # https://github.com/owner/repo
|
529 |
r'github\.com/([^/]+)/([^/]+)/.*', # https://github.com/owner/repo/anything
|
530 |
]
|
531 |
+
|
532 |
for pattern in patterns:
|
533 |
match = re.search(pattern, url)
|
534 |
if match:
|
535 |
owner = match.group(1)
|
536 |
repo_name = match.group(2)
|
537 |
+
|
538 |
# Remove .git suffix if present
|
539 |
if repo_name.endswith('.git'):
|
540 |
repo_name = repo_name[:-4]
|
541 |
+
|
542 |
# Validate format
|
543 |
+
if (re.match(r'^[a-zA-Z0-9\-_\.]+$', owner) and
|
544 |
re.match(r'^[a-zA-Z0-9\-_\.]+$', repo_name)):
|
545 |
return owner, repo_name
|
546 |
+
|
547 |
return None, None
|
548 |
+
|
549 |
except Exception as e:
|
550 |
logger.warning("Error extracting repo info from URL %s: %s", repo_url, str(e))
|
551 |
return None, None
|
|
|
559 |
"Accept": "application/vnd.github.v3+json",
|
560 |
"User-Agent": "Resumate-App/1.0"
|
561 |
}
|
562 |
+
|
563 |
response = requests.get(url, headers=headers, timeout=10)
|
564 |
+
|
565 |
if response.status_code == 404:
|
566 |
return {"status": "error", "message": f"Repository '{owner}/{repo_name}' not found"}
|
567 |
elif response.status_code == 403:
|
568 |
return {"status": "error", "message": "GitHub API rate limit exceeded"}
|
569 |
elif response.status_code != 200:
|
570 |
return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
|
571 |
+
|
572 |
return {"status": "success", "data": response.json()}
|
573 |
+
|
574 |
except requests.RequestException as e:
|
575 |
logger.error("Network error fetching repository info: %s", str(e))
|
576 |
return {"status": "error", "message": f"Network error: {str(e)}"}
|
|
|
584 |
"Accept": "application/vnd.github.v3+json",
|
585 |
"User-Agent": "Resumate-App/1.0"
|
586 |
}
|
587 |
+
|
588 |
response = requests.get(url, headers=headers, timeout=10)
|
589 |
+
|
590 |
if response.status_code == 200:
|
591 |
# Convert byte counts to percentages
|
592 |
languages = response.json()
|
593 |
total_bytes = sum(languages.values())
|
594 |
+
|
595 |
if total_bytes > 0:
|
596 |
language_percentages = {
|
597 |
lang: round((bytes_count / total_bytes) * 100, 1)
|
598 |
for lang, bytes_count in languages.items()
|
599 |
}
|
600 |
return {"status": "success", "data": language_percentages}
|
601 |
+
|
602 |
return {"status": "error", "message": "Could not retrieve languages"}
|
603 |
+
|
604 |
except Exception as e:
|
605 |
logger.warning("Error fetching repository languages: %s", str(e))
|
606 |
return {"status": "error", "message": str(e)}
|
|
|
614 |
"Accept": "application/vnd.github.v3+json",
|
615 |
"User-Agent": "Resumate-App/1.0"
|
616 |
}
|
617 |
+
|
618 |
response = requests.get(url, headers=headers, timeout=10)
|
619 |
+
|
620 |
if response.status_code == 200:
|
621 |
readme_data = response.json()
|
622 |
+
|
623 |
# Get the raw content URL and fetch it
|
624 |
download_url = readme_data.get("download_url")
|
625 |
if download_url:
|
626 |
content_response = requests.get(download_url, timeout=10)
|
627 |
if content_response.status_code == 200:
|
628 |
return {"status": "success", "data": content_response.text}
|
629 |
+
|
630 |
return {"status": "error", "message": "README not found"}
|
631 |
+
|
632 |
except Exception as e:
|
633 |
logger.warning("Error fetching README: %s", str(e))
|
634 |
return {"status": "error", "message": str(e)}
|
|
|
642 |
"Accept": "application/vnd.github.v3+json",
|
643 |
"User-Agent": "Resumate-App/1.0"
|
644 |
}
|
645 |
+
|
646 |
response = requests.get(url, headers=headers, timeout=10)
|
647 |
+
|
648 |
if response.status_code == 200:
|
649 |
contents = response.json()
|
650 |
+
|
651 |
# Extract file and directory names
|
652 |
file_structure = []
|
653 |
for item in contents:
|
|
|
655 |
if item.get("type") == "dir":
|
656 |
name += "/"
|
657 |
file_structure.append(name)
|
658 |
+
|
659 |
# Sort with directories first
|
660 |
file_structure.sort(key=lambda x: (not x.endswith("/"), x.lower()))
|
661 |
+
|
662 |
return {"status": "success", "data": file_structure}
|
663 |
+
|
664 |
return {"status": "error", "message": "Could not retrieve file structure"}
|
665 |
+
|
666 |
except Exception as e:
|
667 |
logger.warning("Error fetching repository contents: %s", str(e))
|
668 |
return {"status": "error", "message": str(e)}
|
|
|
676 |
"Accept": "application/vnd.github.v3+json",
|
677 |
"User-Agent": "Resumate-App/1.0"
|
678 |
}
|
679 |
+
|
680 |
response = requests.get(url, headers=headers, timeout=10)
|
681 |
+
|
682 |
if response.status_code == 200:
|
683 |
releases = response.json()
|
684 |
+
|
685 |
# Extract key release information
|
686 |
release_info = []
|
687 |
for release in releases[:10]: # Limit to 10 most recent
|
|
|
692 |
"prerelease": release.get("prerelease", False),
|
693 |
"draft": release.get("draft", False)
|
694 |
})
|
695 |
+
|
696 |
return {"status": "success", "data": release_info}
|
697 |
+
|
698 |
return {"status": "error", "message": "Could not retrieve releases"}
|
699 |
+
|
700 |
except Exception as e:
|
701 |
logger.warning("Error fetching repository releases: %s", str(e))
|
702 |
return {"status": "error", "message": str(e)}
|
|
|
710 |
"Accept": "application/vnd.github.v3+json",
|
711 |
"User-Agent": "Resumate-App/1.0"
|
712 |
}
|
713 |
+
|
714 |
response = requests.get(url, headers=headers, timeout=10)
|
715 |
+
|
716 |
if response.status_code == 200:
|
717 |
contributors = response.json()
|
718 |
+
|
719 |
# Extract key contributor information
|
720 |
contributor_info = []
|
721 |
for contributor in contributors[:20]: # Limit to top 20 contributors
|
|
|
725 |
"html_url": contributor.get("html_url", ""),
|
726 |
"type": contributor.get("type", "")
|
727 |
})
|
728 |
+
|
729 |
return {"status": "success", "data": contributor_info}
|
730 |
+
|
731 |
return {"status": "error", "message": "Could not retrieve contributors"}
|
732 |
+
|
733 |
except Exception as e:
|
734 |
logger.warning("Error fetching repository contributors: %s", str(e))
|
735 |
return {"status": "error", "message": str(e)}
|
functions/gradio.py
CHANGED
@@ -46,13 +46,13 @@ def process_with_default_option(
|
|
46 |
def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
|
47 |
"""
|
48 |
Process the input files and URLs from the Gradio interface.
|
49 |
-
|
50 |
Args:
|
51 |
linkedin_pdf: Uploaded LinkedIn resume export PDF file or mock file object with path
|
52 |
-
github_url (str): GitHub profile URL
|
53 |
job_post_text (str): Job post text content
|
54 |
user_instructions (str): Additional instructions from the user
|
55 |
-
|
56 |
Returns:
|
57 |
str: Formatted output with file and URL information
|
58 |
"""
|
@@ -176,7 +176,7 @@ def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
|
|
176 |
# Generate resume only if we have valid extraction result
|
177 |
if extraction_result and extraction_result.get("status") == "success":
|
178 |
try:
|
179 |
-
_ = write_resume(extraction_result, user_instructions)
|
180 |
result += "\n✅ Resume generated successfully\n"
|
181 |
logger.info("Resume generation completed successfully")
|
182 |
|
@@ -194,10 +194,10 @@ def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
|
|
194 |
def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
|
195 |
"""
|
196 |
Get structured data from all inputs for further processing.
|
197 |
-
|
198 |
Args:
|
199 |
linkedin_pdf: Uploaded LinkedIn resume export PDF file
|
200 |
-
github_url (str): GitHub profile URL
|
201 |
job_post_text (str): Job post text content
|
202 |
instructions (str): Additional instructions from the user
|
203 |
|
|
|
46 |
def process_inputs(linkedin_pdf, github_url, job_post_text, user_instructions):
|
47 |
"""
|
48 |
Process the input files and URLs from the Gradio interface.
|
49 |
+
|
50 |
Args:
|
51 |
linkedin_pdf: Uploaded LinkedIn resume export PDF file or mock file object with path
|
52 |
+
github_url (str): GitHub profile URL
|
53 |
job_post_text (str): Job post text content
|
54 |
user_instructions (str): Additional instructions from the user
|
55 |
+
|
56 |
Returns:
|
57 |
str: Formatted output with file and URL information
|
58 |
"""
|
|
|
176 |
# Generate resume only if we have valid extraction result
|
177 |
if extraction_result and extraction_result.get("status") == "success":
|
178 |
try:
|
179 |
+
_ = write_resume(extraction_result, user_instructions, summary)
|
180 |
result += "\n✅ Resume generated successfully\n"
|
181 |
logger.info("Resume generation completed successfully")
|
182 |
|
|
|
194 |
def get_processed_data(linkedin_pdf, github_url, job_post_text, instructions):
|
195 |
"""
|
196 |
Get structured data from all inputs for further processing.
|
197 |
+
|
198 |
Args:
|
199 |
linkedin_pdf: Uploaded LinkedIn resume export PDF file
|
200 |
+
github_url (str): GitHub profile URL
|
201 |
job_post_text (str): Job post text content
|
202 |
instructions (str): Additional instructions from the user
|
203 |
|
functions/job_call.py
CHANGED
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
|
18 |
def load_default_job_call() -> str:
|
19 |
"""
|
20 |
Load default job call text from data/sample_job.txt if it exists.
|
21 |
-
|
22 |
Returns:
|
23 |
str: The default job call text, or empty string if file doesn't exist
|
24 |
"""
|
@@ -45,11 +45,11 @@ def load_default_job_call() -> str:
|
|
45 |
|
46 |
def summarize_job_call(job_call: str = None) -> str:
|
47 |
'''Extracts and summarizes key information from job call.
|
48 |
-
|
49 |
Args:
|
50 |
job_call (str, optional): Job call text to summarize. If None or empty,
|
51 |
attempts to load default from data/sample_job.txt
|
52 |
-
|
53 |
Returns:
|
54 |
str: Summarized job call information, or None if no job call available
|
55 |
'''
|
@@ -119,25 +119,25 @@ def summarize_job_call(job_call: str = None) -> str:
|
|
119 |
|
120 |
def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
|
121 |
"""
|
122 |
-
Save job call data (original and extracted summary) to the data directory.
|
123 |
-
|
124 |
Args:
|
125 |
original_job_call (str): The original job call text
|
126 |
extracted_summary (str): The extracted/summarized job call information
|
127 |
"""
|
128 |
try:
|
129 |
-
# Get the project root directory and
|
130 |
project_root = Path(__file__).parent.parent
|
131 |
-
|
132 |
-
|
133 |
-
# Create
|
134 |
-
|
135 |
-
|
136 |
# Create timestamped filename
|
137 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
138 |
filename = f"job_call_extracted_{timestamp}.json"
|
139 |
-
file_path =
|
140 |
-
|
141 |
# Prepare data to save
|
142 |
job_call_data = {
|
143 |
"timestamp": datetime.now().isoformat(),
|
@@ -149,13 +149,13 @@ def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
|
|
149 |
"extraction_successful": extracted_summary is not None
|
150 |
}
|
151 |
}
|
152 |
-
|
153 |
# Save to JSON file
|
154 |
with open(file_path, 'w', encoding='utf-8') as f:
|
155 |
json.dump(job_call_data, f, indent=2, ensure_ascii=False)
|
156 |
-
|
157 |
logger.info("Saved job call data to: %s", file_path)
|
158 |
-
|
159 |
except Exception as e:
|
160 |
logger.error("Error saving job call data: %s", str(e))
|
161 |
raise
|
|
|
18 |
def load_default_job_call() -> str:
|
19 |
"""
|
20 |
Load default job call text from data/sample_job.txt if it exists.
|
21 |
+
|
22 |
Returns:
|
23 |
str: The default job call text, or empty string if file doesn't exist
|
24 |
"""
|
|
|
45 |
|
46 |
def summarize_job_call(job_call: str = None) -> str:
|
47 |
'''Extracts and summarizes key information from job call.
|
48 |
+
|
49 |
Args:
|
50 |
job_call (str, optional): Job call text to summarize. If None or empty,
|
51 |
attempts to load default from data/sample_job.txt
|
52 |
+
|
53 |
Returns:
|
54 |
str: Summarized job call information, or None if no job call available
|
55 |
'''
|
|
|
119 |
|
120 |
def _save_job_call_data(original_job_call: str, extracted_summary: str) -> None:
|
121 |
"""
|
122 |
+
Save job call data (original and extracted summary) to the data/job_calls directory.
|
123 |
+
|
124 |
Args:
|
125 |
original_job_call (str): The original job call text
|
126 |
extracted_summary (str): The extracted/summarized job call information
|
127 |
"""
|
128 |
try:
|
129 |
+
# Get the project root directory and job_calls subdirectory
|
130 |
project_root = Path(__file__).parent.parent
|
131 |
+
job_calls_dir = project_root / "data" / "job_calls"
|
132 |
+
|
133 |
+
# Create job_calls directory if it doesn't exist
|
134 |
+
job_calls_dir.mkdir(parents=True, exist_ok=True)
|
135 |
+
|
136 |
# Create timestamped filename
|
137 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
138 |
filename = f"job_call_extracted_{timestamp}.json"
|
139 |
+
file_path = job_calls_dir / filename
|
140 |
+
|
141 |
# Prepare data to save
|
142 |
job_call_data = {
|
143 |
"timestamp": datetime.now().isoformat(),
|
|
|
149 |
"extraction_successful": extracted_summary is not None
|
150 |
}
|
151 |
}
|
152 |
+
|
153 |
# Save to JSON file
|
154 |
with open(file_path, 'w', encoding='utf-8') as f:
|
155 |
json.dump(job_call_data, f, indent=2, ensure_ascii=False)
|
156 |
+
|
157 |
logger.info("Saved job call data to: %s", file_path)
|
158 |
+
|
159 |
except Exception as e:
|
160 |
logger.error("Error saving job call data: %s", str(e))
|
161 |
raise
|
functions/linkedin_resume.py
CHANGED
@@ -11,6 +11,7 @@ import io
|
|
11 |
import os
|
12 |
import json
|
13 |
from pathlib import Path
|
|
|
14 |
import PyPDF2
|
15 |
|
16 |
# pylint: disable=broad-exception-caught
|
@@ -39,13 +40,13 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
|
|
39 |
"""
|
40 |
Extract and structure text content from an uploaded LinkedIn resume export PDF file
|
41 |
for optimal LLM processing.
|
42 |
-
|
43 |
Args:
|
44 |
pdf_file: The file path string to the uploaded PDF file
|
45 |
-
|
46 |
Returns:
|
47 |
dict: Dictionary containing extraction status, structured text content, and metadata
|
48 |
-
|
49 |
Example:
|
50 |
{
|
51 |
"status": "success",
|
@@ -123,10 +124,13 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
|
|
123 |
|
124 |
# Save results to JSON file
|
125 |
try:
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
output_file = data_dir / "linkedin_resume.json"
|
130 |
with open(output_file, 'w', encoding='utf-8') as f:
|
131 |
json.dump(result, f, indent=2, ensure_ascii=False)
|
132 |
|
@@ -149,10 +153,10 @@ def extract_text_from_linkedin_pdf(pdf_file) -> dict:
|
|
149 |
def _structure_resume_text(text: str) -> dict:
|
150 |
"""
|
151 |
Structure resume text into logical sections for optimal LLM processing.
|
152 |
-
|
153 |
Args:
|
154 |
text (str): Raw extracted text from PDF
|
155 |
-
|
156 |
Returns:
|
157 |
dict: Structured text with sections, full text, and summary
|
158 |
"""
|
@@ -253,11 +257,11 @@ def _structure_resume_text(text: str) -> dict:
|
|
253 |
def _format_for_llm(sections: dict) -> str:
|
254 |
"""
|
255 |
Format the resume sections in an optimal way for LLM processing.
|
256 |
-
|
257 |
Args:
|
258 |
sections (dict): Structured sections
|
259 |
full_text (str): Full cleaned text
|
260 |
-
|
261 |
Returns:
|
262 |
str: LLM-optimized formatted text
|
263 |
"""
|
@@ -297,10 +301,10 @@ def _format_for_llm(sections: dict) -> str:
|
|
297 |
def _clean_extracted_text(text: str) -> str:
|
298 |
"""
|
299 |
Clean and normalize extracted text from PDF for better LLM processing.
|
300 |
-
|
301 |
Args:
|
302 |
text (str): Raw extracted text
|
303 |
-
|
304 |
Returns:
|
305 |
str: Cleaned text optimized for LLM consumption
|
306 |
"""
|
@@ -346,10 +350,10 @@ def _clean_extracted_text(text: str) -> str:
|
|
346 |
def get_llm_context_from_resume(extraction_result: dict) -> str:
|
347 |
"""
|
348 |
Extract the best formatted text for LLM context from the extraction result.
|
349 |
-
|
350 |
Args:
|
351 |
extraction_result (dict): Result from extract_text_from_linkedin_pdf
|
352 |
-
|
353 |
Returns:
|
354 |
str: Formatted text ready for LLM context
|
355 |
"""
|
|
|
11 |
import os
|
12 |
import json
|
13 |
from pathlib import Path
|
14 |
+
from datetime import datetime
|
15 |
import PyPDF2
|
16 |
|
17 |
# pylint: disable=broad-exception-caught
|
|
|
40 |
"""
|
41 |
Extract and structure text content from an uploaded LinkedIn resume export PDF file
|
42 |
for optimal LLM processing.
|
43 |
+
|
44 |
Args:
|
45 |
pdf_file: The file path string to the uploaded PDF file
|
46 |
+
|
47 |
Returns:
|
48 |
dict: Dictionary containing extraction status, structured text content, and metadata
|
49 |
+
|
50 |
Example:
|
51 |
{
|
52 |
"status": "success",
|
|
|
124 |
|
125 |
# Save results to JSON file
|
126 |
try:
|
127 |
+
linkedin_profile_dir = Path(__file__).parent.parent / "data" / "linkedin_profile"
|
128 |
+
linkedin_profile_dir.mkdir(parents=True, exist_ok=True)
|
129 |
+
|
130 |
+
# Create timestamped filename
|
131 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
132 |
+
output_file = linkedin_profile_dir / f"linkedin_resume_{timestamp}.json"
|
133 |
|
|
|
134 |
with open(output_file, 'w', encoding='utf-8') as f:
|
135 |
json.dump(result, f, indent=2, ensure_ascii=False)
|
136 |
|
|
|
153 |
def _structure_resume_text(text: str) -> dict:
|
154 |
"""
|
155 |
Structure resume text into logical sections for optimal LLM processing.
|
156 |
+
|
157 |
Args:
|
158 |
text (str): Raw extracted text from PDF
|
159 |
+
|
160 |
Returns:
|
161 |
dict: Structured text with sections, full text, and summary
|
162 |
"""
|
|
|
257 |
def _format_for_llm(sections: dict) -> str:
|
258 |
"""
|
259 |
Format the resume sections in an optimal way for LLM processing.
|
260 |
+
|
261 |
Args:
|
262 |
sections (dict): Structured sections
|
263 |
full_text (str): Full cleaned text
|
264 |
+
|
265 |
Returns:
|
266 |
str: LLM-optimized formatted text
|
267 |
"""
|
|
|
301 |
def _clean_extracted_text(text: str) -> str:
|
302 |
"""
|
303 |
Clean and normalize extracted text from PDF for better LLM processing.
|
304 |
+
|
305 |
Args:
|
306 |
text (str): Raw extracted text
|
307 |
+
|
308 |
Returns:
|
309 |
str: Cleaned text optimized for LLM consumption
|
310 |
"""
|
|
|
350 |
def get_llm_context_from_resume(extraction_result: dict) -> str:
|
351 |
"""
|
352 |
Extract the best formatted text for LLM context from the extraction result.
|
353 |
+
|
354 |
Args:
|
355 |
extraction_result (dict): Result from extract_text_from_linkedin_pdf
|
356 |
+
|
357 |
Returns:
|
358 |
str: Formatted text ready for LLM context
|
359 |
"""
|
functions/writer_agent.py
CHANGED
@@ -11,7 +11,7 @@ from configuration import AGENT_MODEL, INSTRUCTIONS
|
|
11 |
logging.basicConfig(level=logging.INFO)
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
-
def write_resume(content: str, user_instructions: str = None) -> str:
|
15 |
|
16 |
"""
|
17 |
Generates a resume based on the provided content.
|
@@ -19,6 +19,7 @@ def write_resume(content: str, user_instructions: str = None) -> str:
|
|
19 |
Args:
|
20 |
content (str): The content to be used for generating the resume.
|
21 |
user_instructions (str, optional): Additional instructions from the user.
|
|
|
22 |
|
23 |
Returns:
|
24 |
str: The generated resume.
|
@@ -36,9 +37,13 @@ def write_resume(content: str, user_instructions: str = None) -> str:
|
|
36 |
planning_interval=5
|
37 |
)
|
38 |
|
39 |
-
# Prepare instructions - combine default with user instructions
|
40 |
instructions = INSTRUCTIONS
|
41 |
|
|
|
|
|
|
|
|
|
42 |
if user_instructions and user_instructions.strip():
|
43 |
|
44 |
instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
|
|
|
11 |
logging.basicConfig(level=logging.INFO)
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
+
def write_resume(content: str, user_instructions: str = None, job_summary: str = None) -> str:
|
15 |
|
16 |
"""
|
17 |
Generates a resume based on the provided content.
|
|
|
19 |
Args:
|
20 |
content (str): The content to be used for generating the resume.
|
21 |
user_instructions (str, optional): Additional instructions from the user.
|
22 |
+
job_summary (str, optional): Extracted/summarized job call information.
|
23 |
|
24 |
Returns:
|
25 |
str: The generated resume.
|
|
|
37 |
planning_interval=5
|
38 |
)
|
39 |
|
40 |
+
# Prepare instructions - combine default with user instructions and job summary
|
41 |
instructions = INSTRUCTIONS
|
42 |
|
43 |
+
if job_summary and job_summary.strip():
|
44 |
+
instructions += f"\n\nJob Requirements and Details:\n{job_summary.strip()}"
|
45 |
+
logger.info("Added job summary to agent prompt (%d characters)", len(job_summary))
|
46 |
+
|
47 |
if user_instructions and user_instructions.strip():
|
48 |
|
49 |
instructions += f"\n\nAdditional user instructions:\n{user_instructions.strip()}"
|
tests/test_gradio.py
CHANGED
@@ -72,10 +72,11 @@ class TestProcessInputs(unittest.TestCase):
|
|
72 |
self.assertIn("✅ Additional instructions provided", result)
|
73 |
self.assertIn("✅ Resume generated successfully", result)
|
74 |
|
75 |
-
# Verify write_resume was called with user instructions
|
76 |
mock_write_resume.assert_called_with(
|
77 |
mock_linkedin_result,
|
78 |
-
"Please emphasize technical skills"
|
|
|
79 |
)
|
80 |
|
81 |
@patch('functions.gradio.extract_text_from_linkedin_pdf')
|
@@ -391,7 +392,7 @@ class TestGetProcessedData(unittest.TestCase):
|
|
391 |
result = gradio.get_processed_data(None, "", " ", "")
|
392 |
self.assertEqual(result["job_post"], "Default job content")
|
393 |
|
394 |
-
# Test with empty string - should load default
|
395 |
result = gradio.get_processed_data(None, "", "", "")
|
396 |
self.assertEqual(result["job_post"], "Default job content")
|
397 |
|
|
|
72 |
self.assertIn("✅ Additional instructions provided", result)
|
73 |
self.assertIn("✅ Resume generated successfully", result)
|
74 |
|
75 |
+
# Verify write_resume was called with user instructions and job summary
|
76 |
mock_write_resume.assert_called_with(
|
77 |
mock_linkedin_result,
|
78 |
+
"Please emphasize technical skills",
|
79 |
+
"Job summary content\n"
|
80 |
)
|
81 |
|
82 |
@patch('functions.gradio.extract_text_from_linkedin_pdf')
|
|
|
392 |
result = gradio.get_processed_data(None, "", " ", "")
|
393 |
self.assertEqual(result["job_post"], "Default job content")
|
394 |
|
395 |
+
# Test with empty string - should load default
|
396 |
result = gradio.get_processed_data(None, "", "", "")
|
397 |
self.assertEqual(result["job_post"], "Default job content")
|
398 |
|