"""
github.py

Functions for retrieving information from GitHub profiles and repositories.
"""

import re
import json
import logging
from typing import List, Dict, Optional
from pathlib import Path

import requests

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def get_github_repositories(github_url: str) -> Dict:
    """
    Retrieve public repositories from a GitHub profile URL.
    
    Args:
        github_url (str): GitHub profile URL (e.g., https://github.com/username)
        
    Returns:
        dict: Dictionary containing status, repositories list, and metadata
        
    Example:
        {
            "status": "success",
            "repositories": [
                {
                    "name": "repo-name",
                    "description": "Repository description",
                    "language": "Python",
                    "stars": 10,
                    "forks": 2,
                    "updated_at": "2024-01-01T00:00:00Z",
                    "html_url": "https://github.com/user/repo",
                    "topics": ["python", "api"]
                }
            ],
            "metadata": {
                "username": "username",
                "total_repos": 25,
                "public_repos": 20
            },
            "message": "Successfully retrieved repositories"
        }
    """
    if not github_url or not github_url.strip():
        return {"status": "error", "message": "No GitHub URL provided"}

    try:
        # Extract username from GitHub URL
        username = _extract_github_username(github_url)

        if not username:
            return {"status": "error", "message": "Invalid GitHub URL format"}

        logger.info("Fetching repositories for GitHub user: %s", username)

        # Get user info first
        user_info = _get_github_user_info(username)

        if user_info["status"] != "success":
            return user_info

        # Get repositories
        repositories = _get_user_repositories(username)

        if repositories["status"] != "success":
            return repositories

        # Process and structure repository data
        processed_repos = _process_repository_data(repositories["data"])

        result = {
            "status": "success",
            "repositories": processed_repos,
            "metadata": {
                "username": username,
                "total_repos": user_info["data"].get("public_repos", 0),
                "public_repos": len(processed_repos),
                "profile_url": github_url
            },
            "message": f"Successfully retrieved {len(processed_repos)} repositories"
        }

        # Save results to JSON file
        try:
            data_dir = Path(__file__).parent.parent / "data"
            data_dir.mkdir(exist_ok=True)

            output_file = data_dir / "github_repos.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(result, f, indent=2, ensure_ascii=False)

            logger.info("GitHub repositories saved to %s", output_file)
        except Exception as save_error: # pylint: disable=broad-exception-caught
            logger.warning("Failed to save GitHub repositories to file: %s", str(save_error))

        return result

    except Exception as e: # pylint: disable=broad-exception-caught
        logger.error("Error retrieving GitHub repositories: %s", str(e))

        return {
            "status": "error",
            "message": f"Failed to retrieve GitHub repositories: {str(e)}"
        }


def _extract_github_username(github_url: str) -> Optional[str]:
    """
    Extract username from GitHub URL.
    
    Args:
        github_url (str): GitHub profile URL
        
    Returns:
        Optional[str]: Username if valid URL, None otherwise
    """
    try:
        # Clean up the URL
        url = github_url.strip().rstrip('/')

        # Handle various GitHub URL formats
        patterns = [
            r'github\.com/([^/]+)/?$',  # https://github.com/username
            r'github\.com/([^/]+)/.*',  # https://github.com/username/anything
            r'^([a-zA-Z0-9\-_]+)$'     # Just username
        ]

        for pattern in patterns:
            match = re.search(pattern, url)

            if match:
                username = match.group(1)

                # Validate username format
                if re.match(r'^[a-zA-Z0-9\-_]+$', username) and len(username) <= 39:
                    return username

        return None

    except Exception as e: # pylint: disable=broad-exception-caught
        logger.warning("Error extracting username from URL %s: %s", github_url, str(e))

        return None


def _get_github_user_info(username: str) -> Dict:
    """
    Get basic user information from GitHub API.
    
    Args:
        username (str): GitHub username
        
    Returns:
        dict: API response with user information
    """
    try:
        url = f"https://api.github.com/users/{username}"
        headers = {
            "Accept": "application/vnd.github.v3+json",
            "User-Agent": "Resumate-App/1.0"
        }

        response = requests.get(url, headers=headers, timeout=10)

        if response.status_code == 404:
            return {"status": "error", "message": f"GitHub user '{username}' not found"}

        elif response.status_code == 403:
            return {"status": "error", "message": "GitHub API rate limit exceeded"}

        elif response.status_code != 200:
            return {"status": "error", "message": f"GitHub API error: {response.status_code}"}

        return {"status": "success", "data": response.json()}

    except requests.RequestException as e:
        logger.error("Network error fetching user info: %s", str(e))

        return {"status": "error", "message": f"Network error: {str(e)}"}


def _get_user_repositories(username: str) -> Dict:
    """
    Get user's public repositories from GitHub API.
    
    Args:
        username (str): GitHub username
        
    Returns:
        dict: API response with repositories
    """
    try:
        # Get repositories with pagination
        all_repos = []
        page = 1
        per_page = 100  # Maximum allowed by GitHub API

        while True:

            url = f"https://api.github.com/users/{username}/repos"
            params = {
                "type": "public",
                "sort": "updated",
                "direction": "desc",
                "per_page": per_page,
                "page": page
            }
            headers = {
                "Accept": "application/vnd.github.v3+json",
                "User-Agent": "Resumate-App/1.0"
            }

            response = requests.get(url, headers=headers, params=params, timeout=10)

            if response.status_code != 200:
                return {"status": "error", "message": f"GitHub API error: {response.status_code}"}

            repos = response.json()

            if not repos:  # No more repositories
                break

            all_repos.extend(repos)

            # If we got less than per_page, we've reached the end
            if len(repos) < per_page:
                break

            page += 1

            # Safety limit to prevent infinite loops
            if page > 10:  # Max 1000 repos
                break

        return {"status": "success", "data": all_repos}

    except requests.RequestException as e:
        logger.error("Network error fetching repositories: %s", str(e))

        return {"status": "error", "message": f"Network error: {str(e)}"}


def _process_repository_data(repos: List[Dict]) -> List[Dict]:
    """
    Process and clean repository data for easier consumption.
    
    Args:
        repos (List[Dict]): Raw repository data from GitHub API
        
    Returns:
        List[Dict]: Processed repository data
    """
    processed = []

    for repo in repos:

        # Skip forks unless they have significant modifications
        if repo.get("fork", False) and repo.get("stargazers_count", 0) == 0:
            continue

        processed_repo = {
            "name": repo.get("name", ""),
            "description": repo.get("description", ""),
            "language": repo.get("language", ""),
            "stars": repo.get("stargazers_count", 0),
            "forks": repo.get("forks_count", 0),
            "updated_at": repo.get("updated_at", ""),
            "created_at": repo.get("created_at", ""),
            "html_url": repo.get("html_url", ""),
            "topics": repo.get("topics", []),
            "size": repo.get("size", 0),
            "is_fork": repo.get("fork", False),
            "default_branch": repo.get("default_branch", "main"),
            "has_issues": repo.get("has_issues", False),
            "has_wiki": repo.get("has_wiki", False),
            "has_pages": repo.get("has_pages", False)
        }

        processed.append(processed_repo)

    return processed


def format_repositories_for_llm(github_result: Dict) -> str:
    """
    Format GitHub repositories data for LLM consumption.
    
    Args:
        github_result (dict): Result from get_github_repositories
        
    Returns:
        str: Formatted text ready for LLM context
    """

    if github_result.get("status") != "success":
        return "GitHub repositories could not be retrieved: " + \
            f"{github_result.get('message', 'Unknown error')}"

    repositories = github_result.get("repositories", [])
    metadata = github_result.get("metadata", {})

    if not repositories:
        return f"No public repositories found for {metadata.get('username', 'user')}"

    formatted_parts = [
        "=== GITHUB REPOSITORIES ===\n",
        f"Profile: {metadata.get('profile_url', 'N/A')}",
        f"Username: {metadata.get('username', 'N/A')}",
        f"Public Repositories: {len(repositories)}\n"
    ]

    for i, repo in enumerate(repositories[:20], 1):  # Limit to top 20 repos
        repo_info = [
            f"[REPOSITORY {i}]",
            f"Name: {repo['name']}",
            f"URL: {repo['html_url']}"
        ]

        if repo['description']:
            repo_info.append(f"Description: {repo['description']}")

        if repo['language']:
            repo_info.append(f"Primary Language: {repo['language']}")

        if repo['topics']:
            repo_info.append(f"Topics: {', '.join(repo['topics'][:5])}")  # Limit topics

        repo_info.extend([
            f"Stars: {repo['stars']} | Forks: {repo['forks']}",
            f"Last Updated: {repo['updated_at'][:10]}",  # Just the date
            ""  # Empty line between repositories
        ])

        formatted_parts.extend(repo_info)

    if len(repositories) > 20:
        formatted_parts.append(f"... and {len(repositories) - 20} more repositories")

    formatted_parts.append("\n=== END GITHUB REPOSITORIES ===")

    return '\n'.join(formatted_parts)