Spaces:

gperdrizet
/

resumate

Configuration error

gperdrizet commited on Jul 9

Commit

da25614

1 Parent(s): f5b66ec

Added function to retreive user's public GitHub repository list.

Browse files

Files changed (3) hide show

functions/github.py +317 -0
requirements.txt +2 -1
resumate.py +28 -2

functions/github.py ADDED Viewed

	@@ -0,0 +1,317 @@

+"""
+github.py
+Functions for retrieving information from GitHub profiles and repositories.
+"""
+import re
+import logging
+import requests
+from typing import List, Dict, Optional
+from urllib.parse import urlparse
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def get_github_repositories(github_url: str) -> Dict:
+    """
+    Retrieve public repositories from a GitHub profile URL.
+    Args:
+        github_url (str): GitHub profile URL (e.g., https://github.com/username)
+    Returns:
+        dict: Dictionary containing status, repositories list, and metadata
+    Example:
+        {
+            "status": "success",
+            "repositories": [
+                {
+                    "name": "repo-name",
+                    "description": "Repository description",
+                    "language": "Python",
+                    "stars": 10,
+                    "forks": 2,
+                    "updated_at": "2024-01-01T00:00:00Z",
+                    "html_url": "https://github.com/user/repo",
+                    "topics": ["python", "api"]
+                }
+            ],
+            "metadata": {
+                "username": "username",
+                "total_repos": 25,
+                "public_repos": 20
+            },
+            "message": "Successfully retrieved repositories"
+        }
+    """
+    if not github_url or not github_url.strip():
+        return {"status": "error", "message": "No GitHub URL provided"}
+    try:
+        # Extract username from GitHub URL
+        username = _extract_github_username(github_url)
+        if not username:
+            return {"status": "error", "message": "Invalid GitHub URL format"}
+        logger.info(f"Fetching repositories for GitHub user: {username}")
+        # Get user info first
+        user_info = _get_github_user_info(username)
+        if user_info["status"] != "success":
+            return user_info
+        # Get repositories
+        repositories = _get_user_repositories(username)
+        if repositories["status"] != "success":
+            return repositories
+        # Process and structure repository data
+        processed_repos = _process_repository_data(repositories["data"])
+        return {
+            "status": "success",
+            "repositories": processed_repos,
+            "metadata": {
+                "username": username,
+                "total_repos": user_info["data"].get("public_repos", 0),
+                "public_repos": len(processed_repos),
+                "profile_url": github_url
+            },
+            "message": f"Successfully retrieved {len(processed_repos)} repositories"
+        }
+    except Exception as e:
+        logger.error(f"Error retrieving GitHub repositories: {str(e)}")
+        return {
+            "status": "error",
+            "message": f"Failed to retrieve GitHub repositories: {str(e)}"
+        }
+def _extract_github_username(github_url: str) -> Optional[str]:
+    """
+    Extract username from GitHub URL.
+    Args:
+        github_url (str): GitHub profile URL
+    Returns:
+        Optional[str]: Username if valid URL, None otherwise
+    """
+    try:
+        # Clean up the URL
+        url = github_url.strip().rstrip('/')
+        # Handle various GitHub URL formats
+        patterns = [
+            r'github\.com/([^/]+)/?$',  # https://github.com/username
+            r'github\.com/([^/]+)/.*',  # https://github.com/username/anything
+            r'^([a-zA-Z0-9\-_]+)$'     # Just username
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                username = match.group(1)
+                # Validate username format
+                if re.match(r'^[a-zA-Z0-9\-_]+$', username) and len(username) <= 39:
+                    return username
+        return None
+    except Exception as e:
+        logger.warning(f"Error extracting username from URL {github_url}: {str(e)}")
+        return None
+def _get_github_user_info(username: str) -> Dict:
+    """
+    Get basic user information from GitHub API.
+    Args:
+        username (str): GitHub username
+    Returns:
+        dict: API response with user information
+    """
+    try:
+        url = f"https://api.github.com/users/{username}"
+        headers = {
+            "Accept": "application/vnd.github.v3+json",
+            "User-Agent": "Resumate-App/1.0"
+        }
+        response = requests.get(url, headers=headers, timeout=10)
+        if response.status_code == 404:
+            return {"status": "error", "message": f"GitHub user '{username}' not found"}
+        elif response.status_code == 403:
+            return {"status": "error", "message": "GitHub API rate limit exceeded"}
+        elif response.status_code != 200:
+            return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
+        return {"status": "success", "data": response.json()}
+    except requests.RequestException as e:
+        logger.error(f"Network error fetching user info: {str(e)}")
+        return {"status": "error", "message": f"Network error: {str(e)}"}
+def _get_user_repositories(username: str) -> Dict:
+    """
+    Get user's public repositories from GitHub API.
+    Args:
+        username (str): GitHub username
+    Returns:
+        dict: API response with repositories
+    """
+    try:
+        # Get repositories with pagination
+        all_repos = []
+        page = 1
+        per_page = 100  # Maximum allowed by GitHub API
+        while True:
+            url = f"https://api.github.com/users/{username}/repos"
+            params = {
+                "type": "public",
+                "sort": "updated",
+                "direction": "desc",
+                "per_page": per_page,
+                "page": page
+            }
+            headers = {
+                "Accept": "application/vnd.github.v3+json",
+                "User-Agent": "Resumate-App/1.0"
+            }
+            response = requests.get(url, headers=headers, params=params, timeout=10)
+            if response.status_code != 200:
+                return {"status": "error", "message": f"GitHub API error: {response.status_code}"}
+            repos = response.json()
+            if not repos:  # No more repositories
+                break
+            all_repos.extend(repos)
+            # If we got less than per_page, we've reached the end
+            if len(repos) < per_page:
+                break
+            page += 1
+            # Safety limit to prevent infinite loops
+            if page > 10:  # Max 1000 repos
+                break
+        return {"status": "success", "data": all_repos}
+    except requests.RequestException as e:
+        logger.error(f"Network error fetching repositories: {str(e)}")
+        return {"status": "error", "message": f"Network error: {str(e)}"}
+def _process_repository_data(repos: List[Dict]) -> List[Dict]:
+    """
+    Process and clean repository data for easier consumption.
+    Args:
+        repos (List[Dict]): Raw repository data from GitHub API
+    Returns:
+        List[Dict]: Processed repository data
+    """
+    processed = []
+    for repo in repos:
+        # Skip forks unless they have significant modifications
+        if repo.get("fork", False) and repo.get("stargazers_count", 0) == 0:
+            continue
+        processed_repo = {
+            "name": repo.get("name", ""),
+            "description": repo.get("description", ""),
+            "language": repo.get("language", ""),
+            "stars": repo.get("stargazers_count", 0),
+            "forks": repo.get("forks_count", 0),
+            "updated_at": repo.get("updated_at", ""),
+            "created_at": repo.get("created_at", ""),
+            "html_url": repo.get("html_url", ""),
+            "topics": repo.get("topics", []),
+            "size": repo.get("size", 0),
+            "is_fork": repo.get("fork", False),
+            "default_branch": repo.get("default_branch", "main"),
+            "has_issues": repo.get("has_issues", False),
+            "has_wiki": repo.get("has_wiki", False),
+            "has_pages": repo.get("has_pages", False)
+        }
+        processed.append(processed_repo)
+    return processed
+def format_repositories_for_llm(github_result: Dict) -> str:
+    """
+    Format GitHub repositories data for LLM consumption.
+    Args:
+        github_result (dict): Result from get_github_repositories
+    Returns:
+        str: Formatted text ready for LLM context
+    """
+    if github_result.get("status") != "success":
+        return f"GitHub repositories could not be retrieved: {github_result.get('message', 'Unknown error')}"
+    repositories = github_result.get("repositories", [])
+    metadata = github_result.get("metadata", {})
+    if not repositories:
+        return f"No public repositories found for {metadata.get('username', 'user')}"
+    formatted_parts = [
+        "=== GITHUB REPOSITORIES ===\n",
+        f"Profile: {metadata.get('profile_url', 'N/A')}",
+        f"Username: {metadata.get('username', 'N/A')}",
+        f"Public Repositories: {len(repositories)}\n"
+    ]
+    for i, repo in enumerate(repositories[:20], 1):  # Limit to top 20 repos
+        repo_info = [
+            f"[REPOSITORY {i}]",
+            f"Name: {repo['name']}",
+            f"URL: {repo['html_url']}"
+        ]
+        if repo['description']:
+            repo_info.append(f"Description: {repo['description']}")
+        if repo['language']:
+            repo_info.append(f"Primary Language: {repo['language']}")
+        if repo['topics']:
+            repo_info.append(f"Topics: {', '.join(repo['topics'][:5])}")  # Limit topics
+        repo_info.extend([
+            f"Stars: {repo['stars']} | Forks: {repo['forks']}",
+            f"Last Updated: {repo['updated_at'][:10]}",  # Just the date
+            ""  # Empty line between repositories
+        ])
+        formatted_parts.extend(repo_info)
+    if len(repositories) > 20:
+        formatted_parts.append(f"... and {len(repositories) - 20} more repositories")
+    formatted_parts.append("\n=== END GITHUB REPOSITORIES ===")
+    return '\n'.join(formatted_parts)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 gradio==5.35.0
-PyPDF2==3.0.1

 gradio==5.35.0
+PyPDF2==3.0.1
+requests==2.31.0

resumate.py CHANGED Viewed

@@ -16,6 +16,7 @@ To run:
 import gradio as gr
 from functions.linkedin_resume import extract_text_from_linkedin_pdf
 def process_inputs(linkedin_pdf, github_url, job_post_url):
@@ -51,9 +52,34 @@ def process_inputs(linkedin_pdf, github_url, job_post_url):
     else:
         result += "❌ No LinkedIn resume PDF file uploaded\n\n"
     # Process other inputs
-    result += f"GitHub Profile: {github_url if github_url else 'Not provided'}\n"
-    result += f"Job Post URL: {job_post_url if job_post_url else 'Not provided'}\n"
     return result

 import gradio as gr
 from functions.linkedin_resume import extract_text_from_linkedin_pdf
+from functions.github import get_github_repositories, format_repositories_for_llm
 def process_inputs(linkedin_pdf, github_url, job_post_url):
     else:
         result += "❌ No LinkedIn resume PDF file uploaded\n\n"
+    # Process GitHub profile
+    if github_url and github_url.strip():
+        result += f"✅ GitHub Profile URL provided: {github_url}\n"
+        # Retrieve repositories from GitHub
+        github_result = get_github_repositories(github_url)
+        if github_result["status"] == "success":
+            metadata = github_result["metadata"]
+            repositories = github_result["repositories"]
+            result += f"   📊 GitHub extraction: SUCCESS\n"
+            result += f"   👤 Username: {metadata['username']}\n"
+            result += f"   📁 Public repositories found: {len(repositories)}\n\n"
+            # Show the formatted repositories for LLM
+            result += "📂 GITHUB REPOSITORIES (LLM-Ready):\n"
+            result += "=" * 60 + "\n"
+            result += format_repositories_for_llm(github_result) + "\n"
+            result += "=" * 60 + "\n\n"
+        else:
+            result += f"   ❌ GitHub extraction failed: {github_result['message']}\n\n"
+    else:
+        result += "❌ No GitHub profile URL provided\n\n"
     # Process other inputs
+    result += f"Job Post: {job_post_url if job_post_url else 'Not provided'}\n"
     return result