Spaces:
Running
Running
File size: 1,391 Bytes
157383b 88295ae 157383b e76d2e2 157383b 6a42136 8df3bff 6a42136 1d8ab2a 6a42136 88295ae 70050ea 88295ae 6a42136 157383b 8df3bff 157383b 8df3bff fe04d94 b814861 157383b f19a729 157383b 88295ae 70050ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import datetime
import json
import os
import gradio as gr
import huggingface_hub as hfh
from apscheduler.schedulers.background import BackgroundScheduler
DATASET_ID = "albertvillanova/datasets-report"
DATASET_PATH = "dataset"
DATA_DIR = "data"
DATA_PATH = f"{DATASET_PATH}{DATA_DIR}"
def pull_dataset_repo(repo_id=DATASET_ID, repo_path=DATASET_PATH):
token = os.environ.get('HUB_TOKEN')
repo = hfh.Repository(
local_dir=repo_path,
clone_from=repo_id,
repo_type="dataset",
use_auth_token=token,
)
repo.git_pull()
return repo
def greet(name):
return "Hello " + name + "!!"
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface.launch()
def update_datasets():
# Retrieve datasets
datasets = hfh.list_datasets()
# Save dataset IDs
repo = pull_dataset_repo()
os.makedirs(DATA_PATH, exist_ok=True)
today = datetime.datetime.now(datetime.timezone.utc).date().isoformat()
with repo.commit(f"Add {today} data file"):
with open(f"data/{today}.json", "w") as f:
json.dump([ds.id for ds in sorted(datasets, key=lambda item: item.id)], f)
scheduler = BackgroundScheduler()
scheduler.add_job(update_datasets, trigger="cron", hour=0, minute=1, timezone=datetime.timezone.utc)
scheduler.start()
demo = gr.Interface(fn=greet, inputs="text", outputs="text")
demo.launch()
|