Spaces:
Configuration error
Configuration error
Commit
Β·
da25614
1
Parent(s):
f5b66ec
Added function to retreive user's public GitHub repository list.
Browse files- functions/github.py +317 -0
- requirements.txt +2 -1
- resumate.py +28 -2
functions/github.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
github.py
|
3 |
+
|
4 |
+
Functions for retrieving information from GitHub profiles and repositories.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import re
|
8 |
+
import logging
|
9 |
+
import requests
|
10 |
+
from typing import List, Dict, Optional
|
11 |
+
from urllib.parse import urlparse
|
12 |
+
|
13 |
+
# Set up logging
|
14 |
+
logging.basicConfig(level=logging.INFO)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
|
18 |
+
def get_github_repositories(github_url: str) -> Dict:
|
19 |
+
"""
|
20 |
+
Retrieve public repositories from a GitHub profile URL.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
github_url (str): GitHub profile URL (e.g., https://github.com/username)
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
dict: Dictionary containing status, repositories list, and metadata
|
27 |
+
|
28 |
+
Example:
|
29 |
+
{
|
30 |
+
"status": "success",
|
31 |
+
"repositories": [
|
32 |
+
{
|
33 |
+
"name": "repo-name",
|
34 |
+
"description": "Repository description",
|
35 |
+
"language": "Python",
|
36 |
+
"stars": 10,
|
37 |
+
"forks": 2,
|
38 |
+
"updated_at": "2024-01-01T00:00:00Z",
|
39 |
+
"html_url": "https://github.com/user/repo",
|
40 |
+
"topics": ["python", "api"]
|
41 |
+
}
|
42 |
+
],
|
43 |
+
"metadata": {
|
44 |
+
"username": "username",
|
45 |
+
"total_repos": 25,
|
46 |
+
"public_repos": 20
|
47 |
+
},
|
48 |
+
"message": "Successfully retrieved repositories"
|
49 |
+
}
|
50 |
+
"""
|
51 |
+
if not github_url or not github_url.strip():
|
52 |
+
return {"status": "error", "message": "No GitHub URL provided"}
|
53 |
+
|
54 |
+
try:
|
55 |
+
# Extract username from GitHub URL
|
56 |
+
username = _extract_github_username(github_url)
|
57 |
+
if not username:
|
58 |
+
return {"status": "error", "message": "Invalid GitHub URL format"}
|
59 |
+
|
60 |
+
logger.info(f"Fetching repositories for GitHub user: {username}")
|
61 |
+
|
62 |
+
# Get user info first
|
63 |
+
user_info = _get_github_user_info(username)
|
64 |
+
if user_info["status"] != "success":
|
65 |
+
return user_info
|
66 |
+
|
67 |
+
# Get repositories
|
68 |
+
repositories = _get_user_repositories(username)
|
69 |
+
if repositories["status"] != "success":
|
70 |
+
return repositories
|
71 |
+
|
72 |
+
# Process and structure repository data
|
73 |
+
processed_repos = _process_repository_data(repositories["data"])
|
74 |
+
|
75 |
+
return {
|
76 |
+
"status": "success",
|
77 |
+
"repositories": processed_repos,
|
78 |
+
"metadata": {
|
79 |
+
"username": username,
|
80 |
+
"total_repos": user_info["data"].get("public_repos", 0),
|
81 |
+
"public_repos": len(processed_repos),
|
82 |
+
"profile_url": github_url
|
83 |
+
},
|
84 |
+
"message": f"Successfully retrieved {len(processed_repos)} repositories"
|
85 |
+
}
|
86 |
+
|
87 |
+
except Exception as e:
|
88 |
+
logger.error(f"Error retrieving GitHub repositories: {str(e)}")
|
89 |
+
return {
|
90 |
+
"status": "error",
|
91 |
+
"message": f"Failed to retrieve GitHub repositories: {str(e)}"
|
92 |
+
}
|
93 |
+
|
94 |
+
|
95 |
+
def _extract_github_username(github_url: str) -> Optional[str]:
|
96 |
+
"""
|
97 |
+
Extract username from GitHub URL.
|
98 |
+
|
99 |
+
Args:
|
100 |
+
github_url (str): GitHub profile URL
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
Optional[str]: Username if valid URL, None otherwise
|
104 |
+
"""
|
105 |
+
try:
|
106 |
+
# Clean up the URL
|
107 |
+
url = github_url.strip().rstrip('/')
|
108 |
+
|
109 |
+
# Handle various GitHub URL formats
|
110 |
+
patterns = [
|
111 |
+
r'github\.com/([^/]+)/?$', # https://github.com/username
|
112 |
+
r'github\.com/([^/]+)/.*', # https://github.com/username/anything
|
113 |
+
r'^([a-zA-Z0-9\-_]+)$' # Just username
|
114 |
+
]
|
115 |
+
|
116 |
+
for pattern in patterns:
|
117 |
+
match = re.search(pattern, url)
|
118 |
+
if match:
|
119 |
+
username = match.group(1)
|
120 |
+
# Validate username format
|
121 |
+
if re.match(r'^[a-zA-Z0-9\-_]+$', username) and len(username) <= 39:
|
122 |
+
return username
|
123 |
+
|
124 |
+
return None
|
125 |
+
|
126 |
+
except Exception as e:
|
127 |
+
logger.warning(f"Error extracting username from URL {github_url}: {str(e)}")
|
128 |
+
return None
|
129 |
+
|
130 |
+
|
131 |
+
def _get_github_user_info(username: str) -> Dict:
|
132 |
+
"""
|
133 |
+
Get basic user information from GitHub API.
|
134 |
+
|
135 |
+
Args:
|
136 |
+
username (str): GitHub username
|
137 |
+
|
138 |
+
Returns:
|
139 |
+
dict: API response with user information
|
140 |
+
"""
|
141 |
+
try:
|
142 |
+
url = f"https://api.github.com/users/{username}"
|
143 |
+
headers = {
|
144 |
+
"Accept": "application/vnd.github.v3+json",
|
145 |
+
"User-Agent": "Resumate-App/1.0"
|
146 |
+
}
|
147 |
+
|
148 |
+
response = requests.get(url, headers=headers, timeout=10)
|
149 |
+
|
150 |
+
if response.status_code == 404:
|
151 |
+
return {"status": "error", "message": f"GitHub user '{username}' not found"}
|
152 |
+
elif response.status_code == 403:
|
153 |
+
return {"status": "error", "message": "GitHub API rate limit exceeded"}
|
154 |
+
elif response.status_code != 200:
|
155 |
+
return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
|
156 |
+
|
157 |
+
return {"status": "success", "data": response.json()}
|
158 |
+
|
159 |
+
except requests.RequestException as e:
|
160 |
+
logger.error(f"Network error fetching user info: {str(e)}")
|
161 |
+
return {"status": "error", "message": f"Network error: {str(e)}"}
|
162 |
+
|
163 |
+
|
164 |
+
def _get_user_repositories(username: str) -> Dict:
|
165 |
+
"""
|
166 |
+
Get user's public repositories from GitHub API.
|
167 |
+
|
168 |
+
Args:
|
169 |
+
username (str): GitHub username
|
170 |
+
|
171 |
+
Returns:
|
172 |
+
dict: API response with repositories
|
173 |
+
"""
|
174 |
+
try:
|
175 |
+
# Get repositories with pagination
|
176 |
+
all_repos = []
|
177 |
+
page = 1
|
178 |
+
per_page = 100 # Maximum allowed by GitHub API
|
179 |
+
|
180 |
+
while True:
|
181 |
+
url = f"https://api.github.com/users/{username}/repos"
|
182 |
+
params = {
|
183 |
+
"type": "public",
|
184 |
+
"sort": "updated",
|
185 |
+
"direction": "desc",
|
186 |
+
"per_page": per_page,
|
187 |
+
"page": page
|
188 |
+
}
|
189 |
+
headers = {
|
190 |
+
"Accept": "application/vnd.github.v3+json",
|
191 |
+
"User-Agent": "Resumate-App/1.0"
|
192 |
+
}
|
193 |
+
|
194 |
+
response = requests.get(url, headers=headers, params=params, timeout=10)
|
195 |
+
|
196 |
+
if response.status_code != 200:
|
197 |
+
return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
|
198 |
+
|
199 |
+
repos = response.json()
|
200 |
+
if not repos: # No more repositories
|
201 |
+
break
|
202 |
+
|
203 |
+
all_repos.extend(repos)
|
204 |
+
|
205 |
+
# If we got less than per_page, we've reached the end
|
206 |
+
if len(repos) < per_page:
|
207 |
+
break
|
208 |
+
|
209 |
+
page += 1
|
210 |
+
|
211 |
+
# Safety limit to prevent infinite loops
|
212 |
+
if page > 10: # Max 1000 repos
|
213 |
+
break
|
214 |
+
|
215 |
+
return {"status": "success", "data": all_repos}
|
216 |
+
|
217 |
+
except requests.RequestException as e:
|
218 |
+
logger.error(f"Network error fetching repositories: {str(e)}")
|
219 |
+
return {"status": "error", "message": f"Network error: {str(e)}"}
|
220 |
+
|
221 |
+
|
222 |
+
def _process_repository_data(repos: List[Dict]) -> List[Dict]:
|
223 |
+
"""
|
224 |
+
Process and clean repository data for easier consumption.
|
225 |
+
|
226 |
+
Args:
|
227 |
+
repos (List[Dict]): Raw repository data from GitHub API
|
228 |
+
|
229 |
+
Returns:
|
230 |
+
List[Dict]: Processed repository data
|
231 |
+
"""
|
232 |
+
processed = []
|
233 |
+
|
234 |
+
for repo in repos:
|
235 |
+
# Skip forks unless they have significant modifications
|
236 |
+
if repo.get("fork", False) and repo.get("stargazers_count", 0) == 0:
|
237 |
+
continue
|
238 |
+
|
239 |
+
processed_repo = {
|
240 |
+
"name": repo.get("name", ""),
|
241 |
+
"description": repo.get("description", ""),
|
242 |
+
"language": repo.get("language", ""),
|
243 |
+
"stars": repo.get("stargazers_count", 0),
|
244 |
+
"forks": repo.get("forks_count", 0),
|
245 |
+
"updated_at": repo.get("updated_at", ""),
|
246 |
+
"created_at": repo.get("created_at", ""),
|
247 |
+
"html_url": repo.get("html_url", ""),
|
248 |
+
"topics": repo.get("topics", []),
|
249 |
+
"size": repo.get("size", 0),
|
250 |
+
"is_fork": repo.get("fork", False),
|
251 |
+
"default_branch": repo.get("default_branch", "main"),
|
252 |
+
"has_issues": repo.get("has_issues", False),
|
253 |
+
"has_wiki": repo.get("has_wiki", False),
|
254 |
+
"has_pages": repo.get("has_pages", False)
|
255 |
+
}
|
256 |
+
|
257 |
+
processed.append(processed_repo)
|
258 |
+
|
259 |
+
return processed
|
260 |
+
|
261 |
+
|
262 |
+
def format_repositories_for_llm(github_result: Dict) -> str:
|
263 |
+
"""
|
264 |
+
Format GitHub repositories data for LLM consumption.
|
265 |
+
|
266 |
+
Args:
|
267 |
+
github_result (dict): Result from get_github_repositories
|
268 |
+
|
269 |
+
Returns:
|
270 |
+
str: Formatted text ready for LLM context
|
271 |
+
"""
|
272 |
+
if github_result.get("status") != "success":
|
273 |
+
return f"GitHub repositories could not be retrieved: {github_result.get('message', 'Unknown error')}"
|
274 |
+
|
275 |
+
repositories = github_result.get("repositories", [])
|
276 |
+
metadata = github_result.get("metadata", {})
|
277 |
+
|
278 |
+
if not repositories:
|
279 |
+
return f"No public repositories found for {metadata.get('username', 'user')}"
|
280 |
+
|
281 |
+
formatted_parts = [
|
282 |
+
"=== GITHUB REPOSITORIES ===\n",
|
283 |
+
f"Profile: {metadata.get('profile_url', 'N/A')}",
|
284 |
+
f"Username: {metadata.get('username', 'N/A')}",
|
285 |
+
f"Public Repositories: {len(repositories)}\n"
|
286 |
+
]
|
287 |
+
|
288 |
+
for i, repo in enumerate(repositories[:20], 1): # Limit to top 20 repos
|
289 |
+
repo_info = [
|
290 |
+
f"[REPOSITORY {i}]",
|
291 |
+
f"Name: {repo['name']}",
|
292 |
+
f"URL: {repo['html_url']}"
|
293 |
+
]
|
294 |
+
|
295 |
+
if repo['description']:
|
296 |
+
repo_info.append(f"Description: {repo['description']}")
|
297 |
+
|
298 |
+
if repo['language']:
|
299 |
+
repo_info.append(f"Primary Language: {repo['language']}")
|
300 |
+
|
301 |
+
if repo['topics']:
|
302 |
+
repo_info.append(f"Topics: {', '.join(repo['topics'][:5])}") # Limit topics
|
303 |
+
|
304 |
+
repo_info.extend([
|
305 |
+
f"Stars: {repo['stars']} | Forks: {repo['forks']}",
|
306 |
+
f"Last Updated: {repo['updated_at'][:10]}", # Just the date
|
307 |
+
"" # Empty line between repositories
|
308 |
+
])
|
309 |
+
|
310 |
+
formatted_parts.extend(repo_info)
|
311 |
+
|
312 |
+
if len(repositories) > 20:
|
313 |
+
formatted_parts.append(f"... and {len(repositories) - 20} more repositories")
|
314 |
+
|
315 |
+
formatted_parts.append("\n=== END GITHUB REPOSITORIES ===")
|
316 |
+
|
317 |
+
return '\n'.join(formatted_parts)
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
gradio==5.35.0
|
2 |
-
PyPDF2==3.0.1
|
|
|
|
1 |
gradio==5.35.0
|
2 |
+
PyPDF2==3.0.1
|
3 |
+
requests==2.31.0
|
resumate.py
CHANGED
@@ -16,6 +16,7 @@ To run:
|
|
16 |
|
17 |
import gradio as gr
|
18 |
from functions.linkedin_resume import extract_text_from_linkedin_pdf
|
|
|
19 |
|
20 |
|
21 |
def process_inputs(linkedin_pdf, github_url, job_post_url):
|
@@ -51,9 +52,34 @@ def process_inputs(linkedin_pdf, github_url, job_post_url):
|
|
51 |
else:
|
52 |
result += "β No LinkedIn resume PDF file uploaded\n\n"
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
# Process other inputs
|
55 |
-
result += f"
|
56 |
-
result += f"Job Post URL: {job_post_url if job_post_url else 'Not provided'}\n"
|
57 |
|
58 |
return result
|
59 |
|
|
|
16 |
|
17 |
import gradio as gr
|
18 |
from functions.linkedin_resume import extract_text_from_linkedin_pdf
|
19 |
+
from functions.github import get_github_repositories, format_repositories_for_llm
|
20 |
|
21 |
|
22 |
def process_inputs(linkedin_pdf, github_url, job_post_url):
|
|
|
52 |
else:
|
53 |
result += "β No LinkedIn resume PDF file uploaded\n\n"
|
54 |
|
55 |
+
# Process GitHub profile
|
56 |
+
if github_url and github_url.strip():
|
57 |
+
result += f"β
GitHub Profile URL provided: {github_url}\n"
|
58 |
+
|
59 |
+
# Retrieve repositories from GitHub
|
60 |
+
github_result = get_github_repositories(github_url)
|
61 |
+
|
62 |
+
if github_result["status"] == "success":
|
63 |
+
metadata = github_result["metadata"]
|
64 |
+
repositories = github_result["repositories"]
|
65 |
+
|
66 |
+
result += f" π GitHub extraction: SUCCESS\n"
|
67 |
+
result += f" π€ Username: {metadata['username']}\n"
|
68 |
+
result += f" π Public repositories found: {len(repositories)}\n\n"
|
69 |
+
|
70 |
+
# Show the formatted repositories for LLM
|
71 |
+
result += "π GITHUB REPOSITORIES (LLM-Ready):\n"
|
72 |
+
result += "=" * 60 + "\n"
|
73 |
+
result += format_repositories_for_llm(github_result) + "\n"
|
74 |
+
result += "=" * 60 + "\n\n"
|
75 |
+
|
76 |
+
else:
|
77 |
+
result += f" β GitHub extraction failed: {github_result['message']}\n\n"
|
78 |
+
else:
|
79 |
+
result += "β No GitHub profile URL provided\n\n"
|
80 |
+
|
81 |
# Process other inputs
|
82 |
+
result += f"Job Post: {job_post_url if job_post_url else 'Not provided'}\n"
|
|
|
83 |
|
84 |
return result
|
85 |
|