File size: 3,811 Bytes
5561674
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
Media-specific commit scheduler for handling media files in Hugging Face Spaces.

This module provides a specialized commit scheduler for media files that are stored
in the space's app directory (/home/user/app/media) rather than the persistent
storage directory.
"""

import os
from pathlib import Path
from typing import Optional

try:  # absolute imports when installed
    from trackio.commit_scheduler import CommitScheduler
    from trackio.dummy_commit_scheduler import DummyCommitScheduler
    from trackio.utils import get_media_path
except ImportError:  # relative imports for local execution on Spaces
    from commit_scheduler import CommitScheduler
    from dummy_commit_scheduler import DummyCommitScheduler
    from utils import get_media_path


class MediaCommitScheduler:
    """
    Manages commit schedulers for media files in Hugging Face Spaces.
    
    This class handles the creation and management of commit schedulers specifically
    for media files, which are stored in the space's app directory rather than
    the persistent storage directory.
    """
    
    _current_scheduler: CommitScheduler | DummyCommitScheduler | None = None
    
    @staticmethod
    def initialize():
        """
        Initialize the media commit scheduler if not already initialized.
        
        This method ensures the scheduler is set up for the current environment.
        It's safe to call multiple times - subsequent calls will be no-ops.
        """
        if MediaCommitScheduler._current_scheduler is not None:
            return
            
        # Only create a real scheduler if we're in a space with proper config
        hf_token = os.environ.get("HF_TOKEN")
        dataset_id = os.environ.get("TRACKIO_DATASET_ID")
        space_repo_name = os.environ.get("SPACE_REPO_NAME")
        
        if dataset_id is None or space_repo_name is None:
            # Use dummy scheduler for local development or missing config
            scheduler = DummyCommitScheduler()
        else:
            # Create real scheduler for media files in space
            scheduler = CommitScheduler(
                repo_id=dataset_id,
                repo_type="dataset",
                folder_path=get_media_path(),
                path_in_repo="media",  # Store media files under "media/" in the dataset
                private=True,
                allow_patterns=["**/*"],  # Allow all media file types
                squash_history=True,
                token=hf_token,
                every=5,  # Upload every 5 minutes
            )
            
        MediaCommitScheduler._current_scheduler = scheduler
    
    @staticmethod
    def get_upload_status():
        """
        Get the status of the last media upload.
        
        Returns:
            dict with upload status information
        """
        if MediaCommitScheduler._current_scheduler is None:
            return {
                "last_upload": None,
                "minutes_since_upload": None,
                "is_dummy": True,
                "is_initialized": False
            }
            
        scheduler = MediaCommitScheduler._current_scheduler
        if hasattr(scheduler, 'last_push_time') and scheduler.last_push_time:
            import time
            time_since_upload = time.time() - scheduler.last_push_time
            return {
                "last_upload": scheduler.last_push_time,
                "minutes_since_upload": int(time_since_upload / 60),
                "is_dummy": isinstance(scheduler, DummyCommitScheduler),
                "is_initialized": True
            }
        return {
            "last_upload": None,
            "minutes_since_upload": None,
            "is_dummy": isinstance(scheduler, DummyCommitScheduler),
            "is_initialized": True
        }