1Suro1 commited on
Commit
55b03ae
·
1 Parent(s): 6df6317

maybe fix storage

Browse files
Files changed (2) hide show
  1. app.py +47 -14
  2. src/envs.py +5 -2
app.py CHANGED
@@ -3,6 +3,9 @@ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
 
 
 
6
 
7
  from src.about import (
8
  CITATION_BUTTON_LABEL,
@@ -41,20 +44,50 @@ def restart_space():
41
  API.restart_space(repo_id=REPO_ID)
42
 
43
  ### Space initialisation
44
- try:
45
- print(EVAL_REQUESTS_PATH)
46
- snapshot_download(
47
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
48
- )
49
- except Exception:
50
- restart_space()
51
- try:
52
- print(EVAL_RESULTS_PATH)
53
- snapshot_download(
54
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
55
- )
56
- except Exception:
57
- restart_space()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
+ import os
7
+ import shutil
8
+ import time
9
 
10
  from src.about import (
11
  CITATION_BUTTON_LABEL,
 
44
  API.restart_space(repo_id=REPO_ID)
45
 
46
  ### Space initialisation
47
+
48
+ def cleanup_old_cache():
49
+ """Remove old cache directories to free up space"""
50
+ cache_dirs = [EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH]
51
+ for cache_dir in cache_dirs:
52
+ if os.path.exists(cache_dir):
53
+ # Check if cache is older than 6 hours
54
+ cache_age = time.time() - os.path.getctime(cache_dir)
55
+ if cache_age > 21600: # 6 hours in seconds
56
+ print(f"Removing old cache: {cache_dir}")
57
+ shutil.rmtree(cache_dir, ignore_errors=True)
58
+
59
+ def safe_download_dataset(repo_id, local_dir, repo_type="dataset"):
60
+ """Download dataset only if not already cached or cache is stale"""
61
+ try:
62
+ # Check if directory exists and has recent data
63
+ if os.path.exists(local_dir) and os.listdir(local_dir):
64
+ cache_age = time.time() - os.path.getctime(local_dir)
65
+ if cache_age < 3600: # Less than 1 hour old
66
+ print(f"Using cached data: {local_dir}")
67
+ return
68
+
69
+ print(f"Downloading: {repo_id} to {local_dir}")
70
+ snapshot_download(
71
+ repo_id=repo_id,
72
+ local_dir=local_dir,
73
+ repo_type=repo_type,
74
+ tqdm_class=None,
75
+ etag_timeout=30,
76
+ token=TOKEN,
77
+ resume_download=True,
78
+ force_download=False
79
+ )
80
+ except Exception as e:
81
+ print(f"Download failed for {repo_id}: {e}")
82
+ if not os.path.exists(local_dir) or not os.listdir(local_dir):
83
+ restart_space()
84
+
85
+ # Clean up old cache to free space
86
+ cleanup_old_cache()
87
+
88
+ # Download datasets with caching
89
+ safe_download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH, "dataset")
90
+ safe_download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH, "dataset")
91
 
92
 
93
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
src/envs.py CHANGED
@@ -14,12 +14,15 @@ QUEUE_REPO = f"{OWNER}/requests"
14
  RESULTS_REPO = f"{OWNER}/results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
- CACHE_PATH=os.getenv("HF_HOME", ".")
18
 
19
- # Local caches
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
22
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
23
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
24
 
 
 
 
25
  API = HfApi(token=TOKEN)
 
14
  RESULTS_REPO = f"{OWNER}/results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
+ CACHE_PATH=os.getenv("HF_HOME", "/tmp/hf_cache")
18
 
19
+ # Local caches - use /tmp for better cleanup
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
  EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
22
  EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
23
  EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
24
 
25
+ # Ensure cache directory exists
26
+ os.makedirs(CACHE_PATH, exist_ok=True)
27
+
28
  API = HfApi(token=TOKEN)