Spaces:
Running
Running
Commit
·
0dc360b
1
Parent(s):
c4d0aea
Initial version of leaderboad
Browse files- .gitignore +1 -0
- README.md +2 -1
- api.py +11 -0
- app.py +37 -0
- dataset_utils.py +89 -0
- leaderboard.py +56 -0
- leaderboard_data.py +71 -0
- requirements.txt +2 -0
- samples.py +73 -0
- samples_data.py +116 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
README.md
CHANGED
@@ -11,4 +11,5 @@ license: apache-2.0
|
|
11 |
short_description: Objective evaluations for speech generation systems
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
11 |
short_description: Objective evaluations for speech generation systems
|
12 |
---
|
13 |
|
14 |
+
Fetches data from `balacoon/speech_gen_baselines` and `balacoon/speech_gen_eval_testsets` datasets on Hugging Face to create a leaderboard for speech generation systems.
|
15 |
+
Metrics are computed with `speech_gen_eval` library. Leaderboard allows to compare the metrics and to listen to the samples.
|
api.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright 2025 Balacoon
|
3 |
+
|
4 |
+
api obj reused when interacting with datasets
|
5 |
+
"""
|
6 |
+
|
7 |
+
from huggingface_hub import HfApi
|
8 |
+
api = HfApi()
|
9 |
+
|
10 |
+
|
11 |
+
|
app.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright 2025 Balacoon
|
3 |
+
|
4 |
+
TTSLeaderboard app.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
from leaderboard import create_leaderboard_tab
|
9 |
+
from samples import create_samples_tab
|
10 |
+
|
11 |
+
|
12 |
+
def main():
|
13 |
+
with gr.Blocks(css="footer {visibility: hidden}") as demo:
|
14 |
+
gr.Markdown(
|
15 |
+
"""
|
16 |
+
# TTSLeaderboard
|
17 |
+
### Objective evaluation of speech generation systems
|
18 |
+
Evaluations are done with [speech_gen_eval](https://github.com/balacoon/speech_gen_eval),
|
19 |
+
on [speech_gen_eval_testsets](https://huggingface.co/datasets/balacoon/speech_gen_eval_testsets).
|
20 |
+
"""
|
21 |
+
)
|
22 |
+
|
23 |
+
with gr.Tabs():
|
24 |
+
with gr.TabItem("📊 Leaderboard"):
|
25 |
+
create_leaderboard_tab()
|
26 |
+
|
27 |
+
with gr.TabItem("🔊 Samples"):
|
28 |
+
create_samples_tab()
|
29 |
+
gr.Markdown("""
|
30 |
+
Click on the audio files to play sample generations from each model.
|
31 |
+
""")
|
32 |
+
|
33 |
+
demo.launch()
|
34 |
+
|
35 |
+
|
36 |
+
if __name__ == "__main__":
|
37 |
+
main()
|
dataset_utils.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright 2025 Balacoon
|
3 |
+
|
4 |
+
Utils to interact with the `metrics` dataset.
|
5 |
+
"""
|
6 |
+
|
7 |
+
from typing import Optional
|
8 |
+
from huggingface_hub.hf_api import RepoFolder
|
9 |
+
|
10 |
+
from api import api
|
11 |
+
|
12 |
+
baselines_repo = "balacoon/speech_gen_baselines"
|
13 |
+
|
14 |
+
def get_system_types() -> list[str]:
|
15 |
+
"""
|
16 |
+
Get what types of systems user can check a leaderboard for.
|
17 |
+
We check `balacoon/speech_gen_baselines` dataset,
|
18 |
+
where synthesis from different models are stored.
|
19 |
+
For example, the dataset would have the following structure:
|
20 |
+
```
|
21 |
+
speech_gen_baselines/
|
22 |
+
zero-tts/
|
23 |
+
vocoder/
|
24 |
+
```
|
25 |
+
"""
|
26 |
+
repo_tree = api.list_repo_tree(
|
27 |
+
baselines_repo,
|
28 |
+
repo_type="dataset",
|
29 |
+
recursive=False
|
30 |
+
)
|
31 |
+
top_level_dirs = [item.path for item in repo_tree if isinstance(item, RepoFolder)]
|
32 |
+
return top_level_dirs
|
33 |
+
|
34 |
+
|
35 |
+
def get_models(system_type: str) -> list[str]:
|
36 |
+
"""
|
37 |
+
Get all models under the given system type.
|
38 |
+
For example, for system_type="zero-tts", returns ["xtts", "yourtts"].
|
39 |
+
"""
|
40 |
+
models_tree = api.list_repo_tree(
|
41 |
+
baselines_repo,
|
42 |
+
repo_type="dataset",
|
43 |
+
path_in_repo=system_type,
|
44 |
+
recursive=False
|
45 |
+
)
|
46 |
+
model_dirs = [item.path for item in models_tree if isinstance(item, RepoFolder)]
|
47 |
+
# Extract just the model names from the full paths
|
48 |
+
model_names = [path.split('/')[-1] for path in model_dirs]
|
49 |
+
return model_names
|
50 |
+
|
51 |
+
|
52 |
+
def get_datasets(system_type: str, model_dirs: Optional[list[str]] = None, return_union: bool = True) -> list[str]:
|
53 |
+
"""
|
54 |
+
Get what metrics on which datasets are available for the given system type.
|
55 |
+
Go through all systems under system type, and check datasets under each system.
|
56 |
+
The dataset would have the following structure:
|
57 |
+
```
|
58 |
+
speech_gen_baselines/
|
59 |
+
zero-tts/
|
60 |
+
xtts/
|
61 |
+
vctk/
|
62 |
+
daps_celeb/
|
63 |
+
yourtts/
|
64 |
+
vctk/
|
65 |
+
daps_celeb/
|
66 |
+
```
|
67 |
+
"""
|
68 |
+
if model_dirs is None:
|
69 |
+
# Get all models under the system type
|
70 |
+
model_dirs = get_models(system_type)
|
71 |
+
|
72 |
+
# Get all unique datasets across all models
|
73 |
+
datasets_per_model = []
|
74 |
+
for model_dir in model_dirs:
|
75 |
+
datasets_tree = api.list_repo_tree(
|
76 |
+
baselines_repo,
|
77 |
+
repo_type="dataset",
|
78 |
+
path_in_repo=system_type + "/" + model_dir,
|
79 |
+
recursive=False
|
80 |
+
)
|
81 |
+
model_datasets = [item.path.split('/')[-1] for item in datasets_tree if isinstance(item, RepoFolder)]
|
82 |
+
datasets_per_model.append(model_datasets)
|
83 |
+
|
84 |
+
if return_union:
|
85 |
+
# return all possible datasets for these models
|
86 |
+
return sorted(list(set().union(*datasets_per_model)))
|
87 |
+
else:
|
88 |
+
# return only datasets which are present in all models
|
89 |
+
return sorted(list(set.intersection(*map(set, datasets_per_model))))
|
leaderboard.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright 2025 Balacoon
|
3 |
+
|
4 |
+
Leaderboard tab for TTSLeaderboard.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
from dataset_utils import get_system_types, get_datasets
|
10 |
+
from leaderboard_data import get_leaderboard_data
|
11 |
+
|
12 |
+
def create_leaderboard_tab():
|
13 |
+
with gr.Row():
|
14 |
+
gr.Markdown("""
|
15 |
+
Select system type and a dataset, to see the leaderboard.
|
16 |
+
""")
|
17 |
+
with gr.Row():
|
18 |
+
system_type = gr.Dropdown(choices=get_system_types(), label="System Type", value=None)
|
19 |
+
dataset = gr.Dropdown(choices=[], label="Dataset", value=None)
|
20 |
+
with gr.Row():
|
21 |
+
table = gr.DataFrame(
|
22 |
+
value=None,
|
23 |
+
row_count=0,
|
24 |
+
col_count=0,
|
25 |
+
)
|
26 |
+
def update_datasets(system_type: str):
|
27 |
+
# When system type changes, clear dataset and table
|
28 |
+
dataset_choices = get_datasets(system_type) if system_type else []
|
29 |
+
return [
|
30 |
+
gr.Dropdown(choices=dataset_choices, value=None),
|
31 |
+
gr.DataFrame(value=None, row_count=0, col_count=0)
|
32 |
+
]
|
33 |
+
|
34 |
+
def update_table(system_type: str, dataset: str):
|
35 |
+
# Only populate table when both selections are made
|
36 |
+
if not system_type or not dataset:
|
37 |
+
return gr.DataFrame(value=None, row_count=0)
|
38 |
+
|
39 |
+
df, datatypes = get_leaderboard_data(system_type, dataset)
|
40 |
+
return gr.DataFrame(
|
41 |
+
value=df,
|
42 |
+
row_count=(len(df), "fixed"),
|
43 |
+
datatype=datatypes
|
44 |
+
)
|
45 |
+
|
46 |
+
system_type.change(
|
47 |
+
fn=update_datasets,
|
48 |
+
inputs=[system_type],
|
49 |
+
outputs=[dataset, table]
|
50 |
+
)
|
51 |
+
|
52 |
+
dataset.change(
|
53 |
+
fn=update_table,
|
54 |
+
inputs=[system_type, dataset],
|
55 |
+
outputs=[table]
|
56 |
+
)
|
leaderboard_data.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright 2025 Balacoon
|
3 |
+
|
4 |
+
Utils to get data to populate leaderboard.
|
5 |
+
Communicates with `balacoon/speech_gen_baselines` dataset on Hugging Face,
|
6 |
+
that contains evaluation results of different speech generation systems.
|
7 |
+
"""
|
8 |
+
|
9 |
+
import yaml
|
10 |
+
import logging
|
11 |
+
import requests
|
12 |
+
|
13 |
+
import pandas as pd
|
14 |
+
from huggingface_hub import hf_hub_url
|
15 |
+
from huggingface_hub.hf_api import RepoFolder
|
16 |
+
|
17 |
+
from api import api
|
18 |
+
|
19 |
+
|
20 |
+
def get_leaderboard_data(system_type: str, dataset: str) -> pd.DataFrame:
|
21 |
+
"""
|
22 |
+
Fetches metrics.yaml for all systems of given type if they have evaluation for the given dataset.
|
23 |
+
Returns a DataFrame with metrics per system.
|
24 |
+
"""
|
25 |
+
# Get all models under the system type
|
26 |
+
models_tree = api.list_repo_tree(
|
27 |
+
"balacoon/speech_gen_baselines",
|
28 |
+
repo_type="dataset",
|
29 |
+
path_in_repo=system_type,
|
30 |
+
recursive=False
|
31 |
+
)
|
32 |
+
model_dirs = [item.path for item in models_tree if isinstance(item, RepoFolder)]
|
33 |
+
|
34 |
+
# Collect metrics for each model that has the dataset
|
35 |
+
metrics_data = []
|
36 |
+
for model_dir in model_dirs:
|
37 |
+
model_name = model_dir.split('/')[-1]
|
38 |
+
metrics_path = f"{model_dir}/{dataset}/metrics.yaml"
|
39 |
+
try:
|
40 |
+
url = hf_hub_url(
|
41 |
+
repo_id="balacoon/speech_gen_baselines",
|
42 |
+
filename=metrics_path,
|
43 |
+
repo_type="dataset"
|
44 |
+
)
|
45 |
+
response = requests.get(url)
|
46 |
+
metrics_content = yaml.safe_load(response.text)
|
47 |
+
if "metrics" not in metrics_content:
|
48 |
+
logging.error(f"`metrics` are missing from metrics.yaml ({metrics_path})")
|
49 |
+
continue
|
50 |
+
# prepare a row for the table
|
51 |
+
if "model_name" in metrics_content:
|
52 |
+
# overwrite model name
|
53 |
+
model_name = metrics_content["model_name"]
|
54 |
+
# add a link to a model if it is provided
|
55 |
+
if "link" in metrics_content:
|
56 |
+
model_name = f"[{model_name}]({metrics_content['link']})"
|
57 |
+
row = {"Model": model_name}
|
58 |
+
# Round all metric values to 4 decimal places
|
59 |
+
rounded_metrics = {k: round(float(v), 4) for k, v in metrics_content["metrics"].items()}
|
60 |
+
row.update(rounded_metrics)
|
61 |
+
metrics_data.append(row)
|
62 |
+
except:
|
63 |
+
# Skip if metrics.yaml doesn't exist for this model/dataset
|
64 |
+
continue
|
65 |
+
df = pd.DataFrame(metrics_data)
|
66 |
+
# Remove 'aesthetics_' prefix from column names where applicable
|
67 |
+
df.columns = [col.replace('aesthetics_', '') if col.startswith('aesthetics_') else col for col in df.columns]
|
68 |
+
# compose datatypes for the table: markdown for model name, and number for all other columns
|
69 |
+
datatypes = ["markdown"] + ["number"] * (len(df.columns) - 1)
|
70 |
+
return df, datatypes
|
71 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
huggingface_hub
|
2 |
+
pandas
|
samples.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright 2025 Balacoon
|
3 |
+
|
4 |
+
Samples tab for TTSLeaderboard.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
from dataset_utils import get_system_types, get_models, get_datasets
|
10 |
+
from samples_data import get_samples_data
|
11 |
+
|
12 |
+
def create_samples_tab():
|
13 |
+
with gr.Row():
|
14 |
+
gr.Markdown("""
|
15 |
+
Select system type, models of interest, and a dataset, to see samples.
|
16 |
+
""")
|
17 |
+
with gr.Row():
|
18 |
+
system_type = gr.Dropdown(choices=get_system_types(), label="System Type", value=None)
|
19 |
+
models = gr.Dropdown(choices=[], label="Models", value=None, multiselect=True)
|
20 |
+
dataset = gr.Dropdown(choices=[], label="Dataset", value=None)
|
21 |
+
with gr.Row():
|
22 |
+
table = gr.DataFrame(
|
23 |
+
value=None,
|
24 |
+
row_count=0,
|
25 |
+
col_count=0,
|
26 |
+
)
|
27 |
+
|
28 |
+
def update_models(system_type: str):
|
29 |
+
# When system type changes, clear dataset and table
|
30 |
+
models = get_models(system_type) if system_type else []
|
31 |
+
return [
|
32 |
+
gr.Dropdown(choices=models, value=None),
|
33 |
+
gr.Dropdown(choices=[], value=None),
|
34 |
+
gr.DataFrame(value=None, row_count=0, col_count=0)
|
35 |
+
]
|
36 |
+
|
37 |
+
def update_datasets(system_type: str, models: list[str]):
|
38 |
+
datasets = get_datasets(system_type, models, return_union=False)
|
39 |
+
return [
|
40 |
+
gr.Dropdown(choices=datasets, value=None),
|
41 |
+
gr.DataFrame(value=None, row_count=0, col_count=0)
|
42 |
+
]
|
43 |
+
|
44 |
+
def update_table(system_type: str, models: list[str], dataset: str):
|
45 |
+
# Only populate table when both selections are made
|
46 |
+
if not system_type or not dataset or not models:
|
47 |
+
return gr.DataFrame(value=None, row_count=0)
|
48 |
+
|
49 |
+
df, datatypes = get_samples_data(system_type, models, dataset)
|
50 |
+
return gr.DataFrame(
|
51 |
+
value=df,
|
52 |
+
row_count=(len(df), "fixed"),
|
53 |
+
datatype=datatypes,
|
54 |
+
wrap=True,
|
55 |
+
)
|
56 |
+
|
57 |
+
system_type.change(
|
58 |
+
fn=update_models,
|
59 |
+
inputs=[system_type],
|
60 |
+
outputs=[models, dataset, table]
|
61 |
+
)
|
62 |
+
|
63 |
+
models.change(
|
64 |
+
fn=update_datasets,
|
65 |
+
inputs=[system_type, models],
|
66 |
+
outputs=[dataset, table]
|
67 |
+
)
|
68 |
+
|
69 |
+
dataset.change(
|
70 |
+
fn=update_table,
|
71 |
+
inputs=[system_type, models, dataset],
|
72 |
+
outputs=[table]
|
73 |
+
)
|
samples_data.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Copyright 2025 Balacoon
|
3 |
+
|
4 |
+
Fetches samples from `balacoon/speech_gen_baselines` and
|
5 |
+
`balacoon/speech_gen_eval_testsets` datasets.
|
6 |
+
"""
|
7 |
+
|
8 |
+
import re
|
9 |
+
import logging
|
10 |
+
import requests
|
11 |
+
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
+
from huggingface_hub import hf_hub_url
|
15 |
+
|
16 |
+
|
17 |
+
def get_samples_data(system_type: str, models: list[str], dataset: str) -> tuple[pd.DataFrame, list[str]]:
|
18 |
+
"""
|
19 |
+
Fetches `demo` and `id_mapping` from `balacoon/speech_gen_eval_testsets` for the given dataset.
|
20 |
+
Then fetches reference files according to `id_mapping` from `balacoon/speech_gen_eval_testsets`.
|
21 |
+
Finally fetches synthetic samples for different models from `balacoon/speech_gen_baselines`
|
22 |
+
according to `demo`.
|
23 |
+
"""
|
24 |
+
testsets_repo = "balacoon/speech_gen_eval_testsets"
|
25 |
+
# 1. get demo and id_mapping
|
26 |
+
demo_path = f"{dataset}/demo"
|
27 |
+
id_mapping_path = f"{dataset}/id_mapping"
|
28 |
+
try:
|
29 |
+
# read demo ids
|
30 |
+
url = hf_hub_url(
|
31 |
+
repo_id=testsets_repo,
|
32 |
+
filename=demo_path,
|
33 |
+
repo_type="dataset"
|
34 |
+
)
|
35 |
+
response = requests.get(url)
|
36 |
+
demo = response.text.splitlines()
|
37 |
+
demo = [re.split(r"\s+", x.strip(), maxsplit=1) for x in demo]
|
38 |
+
|
39 |
+
if system_type == "vocoder":
|
40 |
+
# no need for mapping, mapping is to itself
|
41 |
+
mapping = {name: name for name, _ in demo}
|
42 |
+
else:
|
43 |
+
# read id mapping
|
44 |
+
url = hf_hub_url(
|
45 |
+
repo_id=testsets_repo,
|
46 |
+
filename=id_mapping_path,
|
47 |
+
repo_type="dataset"
|
48 |
+
)
|
49 |
+
response = requests.get(url)
|
50 |
+
mapping = response.text.splitlines()
|
51 |
+
mapping = [x.split() for x in mapping]
|
52 |
+
mapping = {k: v for k, v in mapping}
|
53 |
+
except Exception as e:
|
54 |
+
logging.error(f"Failed to read demo / mapping for {dataset}: {e}")
|
55 |
+
return pd.DataFrame()
|
56 |
+
|
57 |
+
# 2. get reference files
|
58 |
+
if not all(x in mapping for x, _ in demo):
|
59 |
+
raise ValueError(f"Failed to fetch demo or mapping for {dataset}, refresh the page.")
|
60 |
+
ref_ids = list(set([mapping[x] for x, _ in demo]))
|
61 |
+
reference_samples = {}
|
62 |
+
for id in ref_ids:
|
63 |
+
try:
|
64 |
+
url = hf_hub_url(
|
65 |
+
repo_id=testsets_repo,
|
66 |
+
filename=f"{dataset}/wav/{id}.wav",
|
67 |
+
repo_type="dataset"
|
68 |
+
)
|
69 |
+
reference_samples[id] = f"<audio src='{url}' controls></audio>"
|
70 |
+
except Exception as e:
|
71 |
+
logging.error(f"Failed to read reference {id} for {dataset}: {e}")
|
72 |
+
continue
|
73 |
+
|
74 |
+
# 3. get synthetic samples
|
75 |
+
systems_samples = {model: {} for model in models}
|
76 |
+
baselines_repo = "balacoon/speech_gen_baselines"
|
77 |
+
for model in models:
|
78 |
+
for id, _ in demo:
|
79 |
+
try:
|
80 |
+
filename = f"{system_type}/{model}/{dataset}/wav/{id}.wav"
|
81 |
+
url = hf_hub_url(
|
82 |
+
repo_id=baselines_repo,
|
83 |
+
filename=filename,
|
84 |
+
repo_type="dataset"
|
85 |
+
)
|
86 |
+
systems_samples[model][id] = f"<audio src='{url}' controls></audio>"
|
87 |
+
except Exception as e:
|
88 |
+
logging.error(f"Failed to read sample {id} from {filename} in {dataset}: {e}")
|
89 |
+
continue
|
90 |
+
|
91 |
+
# filter out demo ids, checking if all samples are present
|
92 |
+
filtered_demo = []
|
93 |
+
for id, txt in demo:
|
94 |
+
if id not in mapping:
|
95 |
+
continue
|
96 |
+
ref_id = mapping[id]
|
97 |
+
if ref_id not in reference_samples:
|
98 |
+
continue
|
99 |
+
if all(id in systems_samples[model] for model in models):
|
100 |
+
filtered_demo.append((id, txt))
|
101 |
+
|
102 |
+
# finally create a dataframe
|
103 |
+
rows = []
|
104 |
+
for id, txt in filtered_demo:
|
105 |
+
row = {
|
106 |
+
"id": id,
|
107 |
+
"text": txt,
|
108 |
+
"reference": reference_samples[mapping[id]],
|
109 |
+
}
|
110 |
+
for model in models:
|
111 |
+
row[model] = systems_samples[model][id]
|
112 |
+
rows.append(row)
|
113 |
+
datatypes = ["text", "text", "markdown"] + ["markdown"] * len(models)
|
114 |
+
return pd.DataFrame(rows), datatypes
|
115 |
+
|
116 |
+
|