Spaces:
Runtime error
Runtime error
| import os | |
| import base64 | |
| import json | |
| import io | |
| import datetime | |
| from PIL import Image | |
| import logging | |
| from datasets import Dataset, load_dataset, Features, Value, Sequence | |
| import copy | |
| import numpy as np | |
| logger = logging.getLogger(__name__) | |
| HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name | |
| # Custom JSON Encoder to handle numpy types (copy from app_mcp.py if it's identical) | |
| class NumpyEncoder(json.JSONEncoder): | |
| def default(self, obj): | |
| if isinstance(obj, np.float32): | |
| return float(obj) | |
| return json.JSONEncoder.default(self, obj) | |
| def _pil_to_base64(image: Image.Image) -> str: | |
| """Converts a PIL Image to a base64 string.""" | |
| # Explicitly check if the input is a PIL Image | |
| if not isinstance(image, Image.Image): | |
| raise TypeError(f"Expected a PIL Image, but received type: {type(image)}") | |
| buffered = io.BytesIO() | |
| # Ensure image is in RGB mode before saving as JPEG | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| image.save(buffered, format="JPEG", quality=85) | |
| return base64.b64encode(buffered.getvalue()).decode('utf-8') | |
| def initialize_dataset(): | |
| """Initializes or loads the Hugging Face dataset.""" | |
| try: | |
| # Try to load existing dataset | |
| dataset = load_dataset(HF_DATASET_NAME, split="train") | |
| logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}") | |
| except Exception: | |
| # If dataset does not exist, create a new one with an empty structure | |
| logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}") | |
| # Define the features explicitly | |
| features = Features({ | |
| "timestamp": Value('string'), | |
| "image": Value('string'), # base64 string | |
| "inference_request": Value('string'), # JSON string | |
| "model_predictions": Value('string'), # JSON string | |
| "ensemble_output": Value('string'), # JSON string | |
| "forensic_outputs": Sequence(Value('string')), # List of base64 image strings | |
| "agent_monitoring_data": Value('string'), # JSON string | |
| "human_feedback": Value('string') # JSON string | |
| }) | |
| dataset = Dataset.from_dict({ | |
| "timestamp": [], | |
| "image": [], | |
| "inference_request": [], | |
| "model_predictions": [], | |
| "ensemble_output": [], | |
| "forensic_outputs": [], | |
| "agent_monitoring_data": [], | |
| "human_feedback": [] | |
| }, features=features) # Pass the features explicitly | |
| return dataset | |
| def log_inference_data( | |
| original_image: Image.Image, | |
| inference_params: dict, | |
| model_predictions: list[dict], | |
| ensemble_output: dict, | |
| forensic_images: list[Image.Image], | |
| agent_monitoring_data: dict, | |
| human_feedback: dict = None | |
| ): | |
| """Logs a single inference event to the Hugging Face dataset.""" | |
| try: | |
| dataset = initialize_dataset() | |
| # Convert PIL Images to base64 strings for storage | |
| original_image_b64 = _pil_to_base64(original_image) | |
| forensic_images_b64 = [] | |
| for img_item in forensic_images: | |
| if img_item is not None: | |
| if not isinstance(img_item, Image.Image): | |
| try: | |
| img_item = Image.fromarray(img_item) | |
| except Exception as e: | |
| logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}") | |
| continue # Skip this image if conversion fails | |
| # Now img_item should be a PIL Image, safe to pass to _pil_to_base64 | |
| forensic_images_b64.append(_pil_to_base64(img_item)) | |
| new_entry = { | |
| "timestamp": datetime.datetime.now().isoformat(), | |
| "image": original_image_b64, | |
| "inference_request": json.dumps(inference_params, cls=NumpyEncoder), | |
| "model_predictions": json.dumps(model_predictions, cls=NumpyEncoder), | |
| "ensemble_output": json.dumps(ensemble_output, cls=NumpyEncoder), | |
| "forensic_outputs": forensic_images_b64, # This is already a list of strings | |
| "agent_monitoring_data": json.dumps(agent_monitoring_data, cls=NumpyEncoder), | |
| "human_feedback": json.dumps(human_feedback if human_feedback is not None else {}, cls=NumpyEncoder) | |
| } | |
| # Get current dataset features | |
| features = dataset.features | |
| # Convert existing dataset to a list of dictionaries | |
| dataset_list = dataset.to_list() | |
| # Append the new entry to the list | |
| dataset_list.append(new_entry) | |
| # Create a new dataset from the updated list | |
| updated_dataset = Dataset.from_list(dataset_list, features=features) | |
| # This will push to the Hugging Face Hub if you are logged in and dataset is configured | |
| # Or save locally if not. | |
| updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") # Save locally for now | |
| logger.info("Inference data logged successfully to local cache.") | |
| # To push to hub, uncomment the line below and ensure HF_DATASET_NAME is set correctly and you are logged in | |
| # updated_dataset.push_to_hub(HF_DATASET_NAME, private=True) | |
| # logger.info("Inference data pushed to Hugging Face Hub.") | |
| except Exception as e: | |
| logger.error(f"Failed to log inference data to Hugging Face dataset: {e}") |