agentic-system / upload_to_hub.py
Cascade Bot
Added Groq streaming support and optimizations - clean version
1d75522
import os
import sys
import time
import subprocess
from pathlib import Path
import logging
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import HfApi, create_repo
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def setup_requests_session(
retries: int = 5,
backoff_factor: float = 1.0,
status_forcelist: Optional[list] = None
) -> requests.Session:
"""Configure requests session with retries."""
if status_forcelist is None:
status_forcelist = [408, 429, 500, 502, 503, 504]
session = requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
return session
def check_network_connectivity(host: str = "8.8.8.8", timeout: int = 5) -> bool:
"""Check if network is accessible."""
try:
# Try DNS resolution first
subprocess.run(
["ping", "-c", "1", "-W", str(timeout), host],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True
)
return True
except subprocess.CalledProcessError:
return False
def check_huggingface_connectivity(timeout: int = 5) -> bool:
"""Check if Hugging Face is accessible."""
session = setup_requests_session()
try:
response = session.get("https://huggingface.co", timeout=timeout)
return response.status_code == 200
except:
return False
def wait_for_network(
max_attempts: int = 5,
delay: int = 10,
hosts: Optional[list] = None
) -> bool:
"""Wait for network connectivity."""
if hosts is None:
hosts = ["8.8.8.8", "1.1.1.1"]
for attempt in range(max_attempts):
logger.info(f"Checking network connectivity (attempt {attempt + 1}/{max_attempts})")
# Try different DNS servers
for host in hosts:
if check_network_connectivity(host):
logger.info(f"Network connectivity established via {host}")
return True
# Check Hugging Face specifically
if check_huggingface_connectivity():
logger.info("Hugging Face is accessible")
return True
if attempt < max_attempts - 1:
logger.warning(f"Network check failed. Waiting {delay} seconds before retry...")
time.sleep(delay)
return False
def upload_to_huggingface():
"""Upload the project to Hugging Face."""
creds_path = None
try:
# Load environment variables
load_dotenv()
token = os.getenv("HUGGINGFACE_TOKEN")
if not token:
raise ValueError("HUGGINGFACE_TOKEN not found in environment variables")
# Check network connectivity with increased timeout
if not wait_for_network(max_attempts=10, delay=15):
raise ConnectionError("Failed to establish network connectivity")
# Initialize Hugging Face API with retry session
session = setup_requests_session(retries=7, backoff_factor=2.0)
api = HfApi(token=token, endpoint="https://huggingface.co")
# Define Space name (modify as needed)
space_name = "agentic-system"
space_id = f"nananie143/{space_name}"
# Create or get existing Space with retries and force hardware restart
max_attempts = 3
for attempt in range(max_attempts):
try:
space_info = api.create_repo(
repo_id=space_id,
repo_type="space",
space_sdk="gradio",
private=False,
exist_ok=True,
hardware={"accelerator": "t4-medium"},
storage={"hf": {"root": "/data"}},
)
logger.info(f"Space ready: {space_info.url}")
# Force hardware restart to ensure clean environment
try:
api.request_space_hardware(
repo_id=space_id,
hardware="t4-medium",
sleep_time=2
)
logger.info("Requested hardware restart")
except Exception as e:
logger.warning(f"Hardware restart request failed: {e}")
break
except Exception as e:
if attempt == max_attempts - 1:
logger.error(f"Error creating/accessing Space after {max_attempts} attempts: {e}")
raise
logger.warning(f"Attempt {attempt + 1} failed, retrying...")
time.sleep(5 * (attempt + 1))
# Add .gitattributes to ensure proper file handling
gitattributes_content = """
*.py text eol=lf
*.sh text eol=lf
*.yml text eol=lf
*.txt text eol=lf
requirements.txt text eol=lf
"""
with open(".gitattributes", "w") as f:
f.write(gitattributes_content.strip())
# Files to exclude from upload
exclude_patterns = [
"__pycache__",
"*.pyc",
".git",
".env",
".env.example",
"models/*",
"flagged/*",
".pytest_cache",
"*.log",
"*.gguf",
".gitignore",
"*.backup",
"*.bak*",
"*.patch",
"*.temp",
".DS_Store"
]
# Important files to ensure are included
important_files = [
"app.py",
"agentic_system.py",
"requirements.txt",
"space.yml",
"download_models_space.py",
"app_space.sh",
"orchestrator.py",
"team_management.py",
"meta_learning.py",
"config.py",
"upload_to_hub.py",
".gitattributes"
]
# Prepare files for upload with validation
files_to_upload = []
root_path = Path(".")
# First add important files with validation
for file in important_files:
file_path = Path(file)
if file_path.is_file():
if file_path.stat().st_size > 0: # Check if file is not empty
files_to_upload.append(str(file_path))
else:
logger.warning(f"Skipping empty file: {file}")
else:
logger.warning(f"Important file not found: {file}")
# Then add other files with validation
for path in root_path.rglob("*"):
if path.is_file():
relative_path = str(path.relative_to(root_path))
if relative_path not in files_to_upload: # Skip if already added
skip = False
for pattern in exclude_patterns:
if Path(relative_path).match(pattern):
skip = True
break
if not skip and path.stat().st_size > 0: # Check if file is not empty
files_to_upload.append(relative_path)
# Upload files with retry mechanism
logger.info("Starting file upload...")
total_files = len(files_to_upload)
for idx, file_path in enumerate(files_to_upload, 1):
max_retries = 3
retry_count = 0
while retry_count < max_retries:
try:
logger.info(f"[{idx}/{total_files}] Uploading: {file_path}")
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=file_path,
repo_id=space_id,
repo_type="space"
)
logger.info(f"✓ Uploaded: {file_path}")
break
except Exception as e:
retry_count += 1
if retry_count == max_retries:
logger.error(f"Failed to upload {file_path} after {max_retries} attempts: {e}")
else:
logger.warning(f"Retry {retry_count}/{max_retries} for {file_path}: {e}")
time.sleep(5 * retry_count)
logger.info(f"Space updated successfully! Visit: https://huggingface.co/spaces/{space_id}")
except Exception as e:
logger.error(f"Error uploading to Hugging Face: {e}")
raise
finally:
if creds_path and os.path.exists(creds_path):
os.remove(creds_path)
if __name__ == "__main__":
upload_to_huggingface()