Spaces:
Running
Running
Refactor update_datasets
Browse files
app.py
CHANGED
@@ -7,6 +7,24 @@ import huggingface_hub as hfh
|
|
7 |
from apscheduler.schedulers.background import BackgroundScheduler
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def greet(name):
|
11 |
return "Hello " + name + "!!"
|
12 |
|
@@ -16,17 +34,11 @@ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
|
16 |
|
17 |
|
18 |
def update_datasets():
|
|
|
19 |
datasets = hfh.list_datasets()
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
local_dir="dataset",
|
24 |
-
clone_from="albertvillanova/datasets-report",
|
25 |
-
repo_type="dataset",
|
26 |
-
use_auth_token=token,
|
27 |
-
)
|
28 |
-
repo.git_pull()
|
29 |
-
os.makedirs("dataset/data", exist_ok=True)
|
30 |
today = datetime.datetime.now(datetime.timezone.utc).date().isoformat()
|
31 |
with repo.commit(f"Add {today} data file"):
|
32 |
with open(f"data/{today}.json", "w") as f:
|
|
|
7 |
from apscheduler.schedulers.background import BackgroundScheduler
|
8 |
|
9 |
|
10 |
+
DATASET_ID = "albertvillanova/datasets-report"
|
11 |
+
DATASET_PATH = "dataset"
|
12 |
+
DATA_DIR = "data"
|
13 |
+
DATA_PATH = f"{DATASET_PATH}{DATA_DIR}"
|
14 |
+
|
15 |
+
|
16 |
+
def pull_dataset_repo(repo_id=DATASET_ID, repo_path=DATASET_PATH):
|
17 |
+
token = os.environ.get('HUB_TOKEN')
|
18 |
+
repo = hfh.Repository(
|
19 |
+
local_dir=repo_path,
|
20 |
+
clone_from=repo_id,
|
21 |
+
repo_type="dataset",
|
22 |
+
use_auth_token=token,
|
23 |
+
)
|
24 |
+
repo.git_pull()
|
25 |
+
return repo
|
26 |
+
|
27 |
+
|
28 |
def greet(name):
|
29 |
return "Hello " + name + "!!"
|
30 |
|
|
|
34 |
|
35 |
|
36 |
def update_datasets():
|
37 |
+
# Retrieve datasets
|
38 |
datasets = hfh.list_datasets()
|
39 |
+
# Save dataset IDs
|
40 |
+
repo = pull_dataset_repo()
|
41 |
+
os.makedirs(DATA_PATH, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
today = datetime.datetime.now(datetime.timezone.utc).date().isoformat()
|
43 |
with repo.commit(f"Add {today} data file"):
|
44 |
with open(f"data/{today}.json", "w") as f:
|