Spaces:
Sleeping
Sleeping
Ben Burtenshaw
commited on
Commit
·
dfd3683
1
Parent(s):
32014a1
lose codeless version
Browse files- defaults.py +1 -1
- hub.py +23 -1
- pages/3_🌱 Generate Dataset.py +29 -10
defaults.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
| 3 |
|
| 4 |
SEED_DATA_PATH = "seed_data.json"
|
| 5 |
PIPELINE_PATH = "pipeline.yaml"
|
| 6 |
-
REMOTE_CODE_PATHS = ["
|
| 7 |
DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
|
| 8 |
N_PERSPECTIVES = 5
|
| 9 |
N_TOPICS = 5
|
|
|
|
| 3 |
|
| 4 |
SEED_DATA_PATH = "seed_data.json"
|
| 5 |
PIPELINE_PATH = "pipeline.yaml"
|
| 6 |
+
REMOTE_CODE_PATHS = ["requirements.txt"]
|
| 7 |
DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
|
| 8 |
N_PERSPECTIVES = 5
|
| 9 |
N_TOPICS = 5
|
hub.py
CHANGED
|
@@ -94,7 +94,7 @@ def push_pipeline_to_hub(
|
|
| 94 |
# upload the pipeline to the hub
|
| 95 |
hf_api.upload_file(
|
| 96 |
path_or_fileobj=pipeline_path,
|
| 97 |
-
path_in_repo="pipeline.
|
| 98 |
token=hub_token,
|
| 99 |
repo_id=repo_id,
|
| 100 |
repo_type="dataset",
|
|
@@ -127,3 +127,25 @@ def push_argilla_dataset_to_hub(
|
|
| 127 |
feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
|
| 128 |
local_dataset = feedback_dataset.pull()
|
| 129 |
local_dataset.push_to_huggingface(repo_id=repo_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
# upload the pipeline to the hub
|
| 95 |
hf_api.upload_file(
|
| 96 |
path_or_fileobj=pipeline_path,
|
| 97 |
+
path_in_repo="pipeline.py",
|
| 98 |
token=hub_token,
|
| 99 |
repo_id=repo_id,
|
| 100 |
repo_type="dataset",
|
|
|
|
| 127 |
feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
|
| 128 |
local_dataset = feedback_dataset.pull()
|
| 129 |
local_dataset.push_to_huggingface(repo_id=repo_id)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def push_pipeline_params(
|
| 133 |
+
pipeline_params,
|
| 134 |
+
hub_username,
|
| 135 |
+
hub_token: str,
|
| 136 |
+
project_name,
|
| 137 |
+
):
|
| 138 |
+
repo_id = f"{hub_username}/{project_name}"
|
| 139 |
+
temp_path = mktemp()
|
| 140 |
+
with open(temp_path, "w") as f:
|
| 141 |
+
json.dump(pipeline_params, f)
|
| 142 |
+
# upload the pipeline to the hub
|
| 143 |
+
hf_api.upload_file(
|
| 144 |
+
path_or_fileobj=temp_path,
|
| 145 |
+
path_in_repo="pipeline_params.json",
|
| 146 |
+
token=hub_token,
|
| 147 |
+
repo_id=repo_id,
|
| 148 |
+
repo_type="dataset",
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
print(f"Pipeline params uploaded to {repo_id}")
|
pages/3_🌱 Generate Dataset.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
from defaults import ARGILLA_URL
|
|
|
|
| 4 |
from utils import project_sidebar
|
| 5 |
|
| 6 |
st.set_page_config(
|
|
@@ -90,6 +91,25 @@ if all(
|
|
| 90 |
argilla_dataset_name,
|
| 91 |
]
|
| 92 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
st.markdown(
|
| 94 |
"To run the pipeline locally, you need to have the `distilabel` library installed. You can install it using the following command:"
|
| 95 |
)
|
|
@@ -106,19 +126,18 @@ if all(
|
|
| 106 |
|
| 107 |
st.code(
|
| 108 |
f"""
|
| 109 |
-
# Clone the project and install the requirements
|
| 110 |
git clone https://huggingface.co/datasets/{hub_username}/{project_name}
|
| 111 |
cd {project_name}
|
| 112 |
pip install -r requirements.txt
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
""",
|
| 122 |
language="bash",
|
| 123 |
)
|
| 124 |
st.markdown(
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
from defaults import ARGILLA_URL
|
| 4 |
+
from hub import push_pipeline_params, push_pipeline_to_hub
|
| 5 |
from utils import project_sidebar
|
| 6 |
|
| 7 |
st.set_page_config(
|
|
|
|
| 91 |
argilla_dataset_name,
|
| 92 |
]
|
| 93 |
):
|
| 94 |
+
push_pipeline_params(
|
| 95 |
+
pipeline_params={
|
| 96 |
+
"argilla_api_key": argilla_api_key,
|
| 97 |
+
"argilla_api_url": argilla_url,
|
| 98 |
+
"argilla_dataset_name": argilla_dataset_name,
|
| 99 |
+
"endpoint_base_url": base_url,
|
| 100 |
+
},
|
| 101 |
+
hub_username=hub_username,
|
| 102 |
+
hub_token=hub_token,
|
| 103 |
+
project_name=project_name,
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
push_pipeline_to_hub(
|
| 107 |
+
pipeline_path="pipeline.py",
|
| 108 |
+
hub_username=hub_username,
|
| 109 |
+
hub_token=hub_token,
|
| 110 |
+
project_name=project_name,
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
st.markdown(
|
| 114 |
"To run the pipeline locally, you need to have the `distilabel` library installed. You can install it using the following command:"
|
| 115 |
)
|
|
|
|
| 126 |
|
| 127 |
st.code(
|
| 128 |
f"""
|
|
|
|
| 129 |
git clone https://huggingface.co/datasets/{hub_username}/{project_name}
|
| 130 |
cd {project_name}
|
| 131 |
pip install -r requirements.txt
|
| 132 |
+
"""
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
st.markdown("Finally, you can run the pipeline using the following command:")
|
| 136 |
+
|
| 137 |
+
st.code(
|
| 138 |
+
"""
|
| 139 |
+
huggingface-cli login
|
| 140 |
+
python pipeline.py""",
|
| 141 |
language="bash",
|
| 142 |
)
|
| 143 |
st.markdown(
|