albertvillanova HF Staff commited on
Commit
8df3bff
·
verified ·
1 Parent(s): fe04d94

Refactor update_datasets

Browse files
Files changed (1) hide show
  1. app.py +22 -10
app.py CHANGED
@@ -7,6 +7,24 @@ import huggingface_hub as hfh
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def greet(name):
11
  return "Hello " + name + "!!"
12
 
@@ -16,17 +34,11 @@ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
16
 
17
 
18
  def update_datasets():
 
19
  datasets = hfh.list_datasets()
20
- #
21
- token = os.environ.get('HUB_TOKEN')
22
- repo = hfh.Repository(
23
- local_dir="dataset",
24
- clone_from="albertvillanova/datasets-report",
25
- repo_type="dataset",
26
- use_auth_token=token,
27
- )
28
- repo.git_pull()
29
- os.makedirs("dataset/data", exist_ok=True)
30
  today = datetime.datetime.now(datetime.timezone.utc).date().isoformat()
31
  with repo.commit(f"Add {today} data file"):
32
  with open(f"data/{today}.json", "w") as f:
 
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
 
9
 
10
+ DATASET_ID = "albertvillanova/datasets-report"
11
+ DATASET_PATH = "dataset"
12
+ DATA_DIR = "data"
13
+ DATA_PATH = f"{DATASET_PATH}{DATA_DIR}"
14
+
15
+
16
+ def pull_dataset_repo(repo_id=DATASET_ID, repo_path=DATASET_PATH):
17
+ token = os.environ.get('HUB_TOKEN')
18
+ repo = hfh.Repository(
19
+ local_dir=repo_path,
20
+ clone_from=repo_id,
21
+ repo_type="dataset",
22
+ use_auth_token=token,
23
+ )
24
+ repo.git_pull()
25
+ return repo
26
+
27
+
28
  def greet(name):
29
  return "Hello " + name + "!!"
30
 
 
34
 
35
 
36
  def update_datasets():
37
+ # Retrieve datasets
38
  datasets = hfh.list_datasets()
39
+ # Save dataset IDs
40
+ repo = pull_dataset_repo()
41
+ os.makedirs(DATA_PATH, exist_ok=True)
 
 
 
 
 
 
 
42
  today = datetime.datetime.now(datetime.timezone.utc).date().isoformat()
43
  with repo.commit(f"Add {today} data file"):
44
  with open(f"data/{today}.json", "w") as f: