import os import json import datetime from typing import Optional from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.utils import HfFolder, RepositoryNotFoundError HF_TOKEN = os.environ.get("HF_TOKEN") HF_REPO_ID = os.environ.get("HF_REPO_ID","broadfield-dev/chat-ai-db") DB_FILENAME = "social_media_platform.db" DATASET_FILENAME = "interactions.jsonl" def _ensure_repo_exists(): """Creates the Hugging Face repository if it does not already exist.""" if not HF_TOKEN or not HF_REPO_ID: return try: api = HfApi() # This will create the repo if it doesn't exist and do nothing if it does. api.create_repo( repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN, exist_ok=True, ) except Exception as e: print(f"SYNC ERROR: Could not create or verify repository '{HF_REPO_ID}'. Please check your token and repo ID. Error: {e}") def log_interaction_to_dataset(event_data: dict): if not HF_TOKEN or not HF_REPO_ID: return try: with open(DATASET_FILENAME, "a") as f: f.write(json.dumps(event_data) + "\n") except Exception as e: print(f"DATASET LOG ERROR: {e}") def sync_files_to_hub(): if not HF_TOKEN or not HF_REPO_ID: return _ensure_repo_exists() # Make sure the repo exists before uploading print("SYNC: Uploading database and dataset to Hugging Face Hub...") try: api = HfApi() api.upload_file( path_or_fileobj=DB_FILENAME, path_in_repo=DB_FILENAME, repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN, commit_message="Sync latest simulation state" ) if os.path.exists(DATASET_FILENAME): api.upload_file( path_or_fileobj=DATASET_FILENAME, path_in_repo=DATASET_FILENAME, repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN, commit_message="Sync latest interactions" ) print("SYNC: Upload successful.") except Exception as e: print(f"SYNC ERROR: Failed to upload files: {e}") def download_files_from_hub(): if not HF_TOKEN or not HF_REPO_ID: print("SYNC: Hub environment variables not set. Skipping download.") return print(f"SYNC: Attempting to download latest state from '{HF_REPO_ID}'...") try: hf_hub_download( repo_id=HF_REPO_ID, filename=DB_FILENAME, repo_type="dataset", token=HF_TOKEN, local_dir="." ) print("SYNC: Database download successful.") hf_hub_download( repo_id=HF_REPO_ID, filename=DATASET_FILENAME, repo_type="dataset", token=HF_TOKEN, local_dir="." ) print("SYNC: Dataset log download successful.") except RepositoryNotFoundError: print(f"SYNC INFO: Repository '{HF_REPO_ID}' not found on the Hub. A new one will be created on the first action.") except Exception as e: print(f"SYNC INFO: Could not download files (may not exist yet): {e}") def upload_image_to_hub(image_file, agent_id: int) -> Optional[str]: if not HF_TOKEN or not HF_REPO_ID: return None _ensure_repo_exists() # Make sure the repo exists before uploading try: api = HfApi() timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") filename_ext = image_file.filename.split('.')[-1] if '.' in image_file.filename else 'jpg' filename = f"images/{agent_id}_{timestamp}.{filename_ext}" base_url = f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{filename}" api.upload_file( path_or_fileobj=image_file.stream.read(), path_in_repo=filename, repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN, ) return base_url except Exception as e: print(f"IMAGE UPLOAD ERROR: {e}") return None