Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import json | |
from huggingface_hub import hf_hub_download, list_repo_files, upload_file, HfApi | |
from datasets import load_dataset, Dataset | |
import logging | |
import tempfile | |
# Configure logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
logger = logging.getLogger(__name__) | |
# Cricket annotation categories | |
ANNOTATION_CATEGORIES = { | |
"Bowler's Run Up": ["Fast", "Slow"], | |
"Delivery Type": ["Yorker", "Bouncer", "Length Ball", "Slower ball", "Googly", "Arm Ball", "Other"], | |
"Ball's trajectory": ["In Swing", "Out Swing", "Off spin", "Leg spin"], | |
"Shot Played": ["Cover Drive", "Straight Drive", "On Drive", "Pull", "Square Cut", "Defensive Block"], | |
"Outcome of the shot": ["Four (4)", "Six (6)", "Wicket", "Single (1)", "Double (2)", "Triple (3)", "Dot (0)"], | |
"Shot direction": ["Long On", "Long Off", "Cover", "Point", "Midwicket", "Square Leg", "Third Man", "Fine Leg"], | |
"Fielder's Action": ["Catch taken", "Catch dropped", "Misfield", "Run-out attempt", "Fielder fields"] | |
} | |
HF_REPO_ID = "srrthk/CricBench" | |
HF_REPO_TYPE = "dataset" | |
class VideoAnnotator: | |
def __init__(self): | |
self.video_files = [] | |
self.current_video_idx = 0 | |
self.annotations = {} | |
self.hf_token = os.environ.get("HF_TOKEN") | |
self.dataset = None | |
def load_videos_from_hf(self): | |
try: | |
logger.info(f"Loading dataset from HuggingFace: {HF_REPO_ID}") | |
self.dataset = load_dataset(HF_REPO_ID, token=self.hf_token) | |
# Get the split (usually 'train') | |
split = list(self.dataset.keys())[0] | |
self.dataset_split = self.dataset[split] | |
# Get all video files from the dataset | |
self.video_files = [item['video'] if 'video' in item else item['path'] | |
for item in self.dataset_split] | |
logger.info(f"Found {len(self.video_files)} video files") | |
return len(self.video_files) > 0 | |
except Exception as e: | |
logger.error(f"Error accessing HuggingFace dataset: {e}") | |
return False | |
def get_current_video(self): | |
if not self.video_files: | |
logger.warning("No video files available") | |
return None | |
video_path = self.video_files[self.current_video_idx] | |
logger.info(f"Loading video: {video_path}") | |
try: | |
local_path = hf_hub_download( | |
repo_id=HF_REPO_ID, | |
filename=video_path, | |
repo_type=HF_REPO_TYPE | |
) | |
logger.info(f"Video downloaded to: {local_path}") | |
return local_path | |
except Exception as e: | |
logger.error(f"Error downloading video: {e}") | |
return None | |
def save_annotation(self, annotations_dict): | |
if not annotations_dict: | |
logger.warning("No annotations to save") | |
return "No annotations to save" | |
video_name = os.path.basename(self.video_files[self.current_video_idx]) | |
logger.info(f"Saving annotations for {video_name}") | |
try: | |
# Update the dataset with the new annotations | |
if self.dataset is not None: | |
# Get the split name (e.g., 'train') | |
split = list(self.dataset.keys())[0] | |
# Create a copy of the dataset to modify | |
updated_dataset = self.dataset[split].to_pandas() | |
# Convert annotations to JSON string | |
annotation_json = json.dumps(annotations_dict) | |
# Update the annotations column for the current video | |
updated_dataset.loc[self.current_video_idx, 'annotations'] = annotation_json | |
# Convert back to Hugging Face dataset | |
new_dataset = Dataset.from_pandas(updated_dataset) | |
# Push updated dataset to Hugging Face Hub | |
if self.hf_token: | |
logger.info(f"Uploading updated dataset to Hugging Face: {HF_REPO_ID}") | |
new_dataset.push_to_hub( | |
HF_REPO_ID, | |
split=split, | |
token=self.hf_token | |
) | |
# Update our local copy | |
self.dataset[split] = new_dataset | |
return f"Annotations saved for {video_name} and uploaded to Hugging Face dataset" | |
else: | |
logger.warning("HF_TOKEN not found. Dataset updated locally only.") | |
self.dataset[split] = new_dataset | |
return f"Annotations saved locally for {video_name} (no HF upload)" | |
else: | |
logger.error("Dataset not loaded, cannot save annotations") | |
return "Error: Dataset not loaded" | |
except Exception as e: | |
logger.error(f"Error saving annotations: {e}") | |
return f"Error saving: {str(e)}" | |
def load_existing_annotation(self): | |
"""Try to load existing annotation for the current video from the dataset""" | |
if not self.dataset or not self.video_files: | |
return None | |
try: | |
# Get the split name (e.g., 'train') | |
split = list(self.dataset.keys())[0] | |
# Check if the current item has annotations | |
if 'annotations' in self.dataset[split][self.current_video_idx]: | |
annotation_str = self.dataset[split][self.current_video_idx]['annotations'] | |
if annotation_str: | |
return json.loads(annotation_str) | |
return None | |
except Exception as e: | |
logger.error(f"Error loading existing annotation: {e}") | |
return None | |
def next_video(self, *current_annotations): | |
# Save current annotations before moving to next video | |
if self.video_files: | |
annotations_dict = {} | |
for i, category in enumerate(ANNOTATION_CATEGORIES.keys()): | |
if current_annotations[i]: | |
annotations_dict[category] = current_annotations[i] | |
if annotations_dict: | |
self.save_annotation(annotations_dict) | |
# Move to next video | |
if self.current_video_idx < len(self.video_files) - 1: | |
self.current_video_idx += 1 | |
logger.info(f"Moving to next video (index: {self.current_video_idx})") | |
return self.get_current_video(), *[None] * len(ANNOTATION_CATEGORIES) | |
else: | |
logger.info("Already at the last video") | |
return self.get_current_video(), *[None] * len(ANNOTATION_CATEGORIES) | |
def prev_video(self, *current_annotations): | |
# Save current annotations before moving to previous video | |
if self.video_files: | |
annotations_dict = {} | |
for i, category in enumerate(ANNOTATION_CATEGORIES.keys()): | |
if current_annotations[i]: | |
annotations_dict[category] = current_annotations[i] | |
if annotations_dict: | |
self.save_annotation(annotations_dict) | |
# Move to previous video | |
if self.current_video_idx > 0: | |
self.current_video_idx -= 1 | |
logger.info(f"Moving to previous video (index: {self.current_video_idx})") | |
return self.get_current_video(), *[None] * len(ANNOTATION_CATEGORIES) | |
else: | |
logger.info("Already at the first video") | |
return self.get_current_video(), *[None] * len(ANNOTATION_CATEGORIES) | |
def create_interface(): | |
annotator = VideoAnnotator() | |
success = annotator.load_videos_from_hf() | |
if not success: | |
logger.error("Failed to load videos. Using demo mode with sample video.") | |
# In real app, you might want to provide a sample video or show an error | |
with gr.Blocks() as demo: | |
gr.Markdown("# Cricket Video Annotation Tool") | |
with gr.Row(): | |
video_player = gr.Video(label="Current Video") | |
annotation_components = [] | |
with gr.Row(): | |
with gr.Column(): | |
for category, options in list(ANNOTATION_CATEGORIES.items())[:4]: | |
radio = gr.Radio( | |
choices=options, | |
label=category, | |
info=f"Select {category}" | |
) | |
annotation_components.append(radio) | |
with gr.Column(): | |
for category, options in list(ANNOTATION_CATEGORIES.items())[4:]: | |
radio = gr.Radio( | |
choices=options, | |
label=category, | |
info=f"Select {category}" | |
) | |
annotation_components.append(radio) | |
with gr.Row(): | |
prev_btn = gr.Button("Previous Video") | |
save_btn = gr.Button("Save Annotations", variant="primary") | |
next_btn = gr.Button("Next Video") | |
# Initialize with first video | |
current_video = annotator.get_current_video() | |
if current_video: | |
video_player.value = current_video | |
# Try to load existing annotations | |
existing_annotations = annotator.load_existing_annotation() | |
if existing_annotations: | |
for i, category in enumerate(ANNOTATION_CATEGORIES.keys()): | |
if category in existing_annotations: | |
annotation_components[i].value = existing_annotations[category] | |
# Event handlers | |
save_btn.click( | |
fn=annotator.save_annotation, | |
inputs=[gr.Group(annotation_components)], | |
outputs=gr.Textbox(label="Status") | |
) | |
next_btn.click( | |
fn=annotator.next_video, | |
inputs=annotation_components, | |
outputs=[video_player] + annotation_components | |
) | |
prev_btn.click( | |
fn=annotator.prev_video, | |
inputs=annotation_components, | |
outputs=[video_player] + annotation_components | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() | |
# Add a local video for testing if no videos are loaded from Hugging Face |