Spaces:
Sleeping
Sleeping
File size: 3,811 Bytes
5561674 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
"""
Media-specific commit scheduler for handling media files in Hugging Face Spaces.
This module provides a specialized commit scheduler for media files that are stored
in the space's app directory (/home/user/app/media) rather than the persistent
storage directory.
"""
import os
from pathlib import Path
from typing import Optional
try: # absolute imports when installed
from trackio.commit_scheduler import CommitScheduler
from trackio.dummy_commit_scheduler import DummyCommitScheduler
from trackio.utils import get_media_path
except ImportError: # relative imports for local execution on Spaces
from commit_scheduler import CommitScheduler
from dummy_commit_scheduler import DummyCommitScheduler
from utils import get_media_path
class MediaCommitScheduler:
"""
Manages commit schedulers for media files in Hugging Face Spaces.
This class handles the creation and management of commit schedulers specifically
for media files, which are stored in the space's app directory rather than
the persistent storage directory.
"""
_current_scheduler: CommitScheduler | DummyCommitScheduler | None = None
@staticmethod
def initialize():
"""
Initialize the media commit scheduler if not already initialized.
This method ensures the scheduler is set up for the current environment.
It's safe to call multiple times - subsequent calls will be no-ops.
"""
if MediaCommitScheduler._current_scheduler is not None:
return
# Only create a real scheduler if we're in a space with proper config
hf_token = os.environ.get("HF_TOKEN")
dataset_id = os.environ.get("TRACKIO_DATASET_ID")
space_repo_name = os.environ.get("SPACE_REPO_NAME")
if dataset_id is None or space_repo_name is None:
# Use dummy scheduler for local development or missing config
scheduler = DummyCommitScheduler()
else:
# Create real scheduler for media files in space
scheduler = CommitScheduler(
repo_id=dataset_id,
repo_type="dataset",
folder_path=get_media_path(),
path_in_repo="media", # Store media files under "media/" in the dataset
private=True,
allow_patterns=["**/*"], # Allow all media file types
squash_history=True,
token=hf_token,
every=5, # Upload every 5 minutes
)
MediaCommitScheduler._current_scheduler = scheduler
@staticmethod
def get_upload_status():
"""
Get the status of the last media upload.
Returns:
dict with upload status information
"""
if MediaCommitScheduler._current_scheduler is None:
return {
"last_upload": None,
"minutes_since_upload": None,
"is_dummy": True,
"is_initialized": False
}
scheduler = MediaCommitScheduler._current_scheduler
if hasattr(scheduler, 'last_push_time') and scheduler.last_push_time:
import time
time_since_upload = time.time() - scheduler.last_push_time
return {
"last_upload": scheduler.last_push_time,
"minutes_since_upload": int(time_since_upload / 60),
"is_dummy": isinstance(scheduler, DummyCommitScheduler),
"is_initialized": True
}
return {
"last_upload": None,
"minutes_since_upload": None,
"is_dummy": isinstance(scheduler, DummyCommitScheduler),
"is_initialized": True
}
|