Spaces:
Running
Running
update PER placeholder
Browse files- evaluation/svs_eval.py +3 -3
- interface.py +7 -5
evaluation/svs_eval.py
CHANGED
|
@@ -72,7 +72,7 @@ def compute_dissonance_rate(intervals, dissonant_intervals={1, 2, 6, 10, 11}):
|
|
| 72 |
return np.mean(dissonant) if intervals else np.nan
|
| 73 |
|
| 74 |
|
| 75 |
-
def eval_per(audio_path, model=None):
|
| 76 |
audio_array, sr = librosa.load(audio_path, sr=16000)
|
| 77 |
# TODO: implement PER evaluation
|
| 78 |
return {}
|
|
@@ -99,12 +99,12 @@ def load_evaluators(config):
|
|
| 99 |
return loaded
|
| 100 |
|
| 101 |
|
| 102 |
-
def run_evaluation(audio_path, evaluators):
|
| 103 |
results = {}
|
| 104 |
if "singmos" in evaluators:
|
| 105 |
results.update(eval_singmos(audio_path, evaluators["singmos"]))
|
| 106 |
if "per" in evaluators:
|
| 107 |
-
results.update(eval_per(audio_path, evaluators["per"]))
|
| 108 |
if "melody" in evaluators:
|
| 109 |
results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
|
| 110 |
if "aesthetic" in evaluators:
|
|
|
|
| 72 |
return np.mean(dissonant) if intervals else np.nan
|
| 73 |
|
| 74 |
|
| 75 |
+
def eval_per(audio_path, reference_text, model=None):
|
| 76 |
audio_array, sr = librosa.load(audio_path, sr=16000)
|
| 77 |
# TODO: implement PER evaluation
|
| 78 |
return {}
|
|
|
|
| 99 |
return loaded
|
| 100 |
|
| 101 |
|
| 102 |
+
def run_evaluation(audio_path, evaluators, **kwargs):
|
| 103 |
results = {}
|
| 104 |
if "singmos" in evaluators:
|
| 105 |
results.update(eval_singmos(audio_path, evaluators["singmos"]))
|
| 106 |
if "per" in evaluators:
|
| 107 |
+
results.update(eval_per(audio_path, kwargs["llm_text"], evaluators["per"]))
|
| 108 |
if "melody" in evaluators:
|
| 109 |
results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
|
| 110 |
if "aesthetic" in evaluators:
|
interface.py
CHANGED
|
@@ -24,6 +24,7 @@ class GradioInterface:
|
|
| 24 |
self.character_info[self.current_character].default_voice
|
| 25 |
]
|
| 26 |
self.pipeline = SingingDialoguePipeline(self.default_config)
|
|
|
|
| 27 |
|
| 28 |
def load_config(self, path: str):
|
| 29 |
with open(path, "r") as f:
|
|
@@ -211,21 +212,22 @@ class GradioInterface:
|
|
| 211 |
if not audio_path:
|
| 212 |
return gr.update(value=""), gr.update(value="")
|
| 213 |
tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
|
| 214 |
-
results = self.pipeline.run(
|
| 215 |
audio_path,
|
| 216 |
self.svs_model_map[self.current_svs_model]["lang"],
|
| 217 |
self.character_info[self.current_character].prompt,
|
| 218 |
self.current_voice,
|
| 219 |
output_audio_path=tmp_file,
|
| 220 |
)
|
| 221 |
-
formatted_logs = f"ASR: {results['asr_text']}\nLLM: {results['llm_text']}"
|
| 222 |
return gr.update(value=formatted_logs), gr.update(
|
| 223 |
-
value=results["output_audio_path"]
|
| 224 |
)
|
| 225 |
|
| 226 |
def update_metrics(self, audio_path):
|
| 227 |
-
if not audio_path:
|
| 228 |
return gr.update(value="")
|
| 229 |
-
results = self.pipeline.evaluate(audio_path)
|
|
|
|
| 230 |
formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
|
| 231 |
return gr.update(value=formatted_metrics)
|
|
|
|
| 24 |
self.character_info[self.current_character].default_voice
|
| 25 |
]
|
| 26 |
self.pipeline = SingingDialoguePipeline(self.default_config)
|
| 27 |
+
self.results = None
|
| 28 |
|
| 29 |
def load_config(self, path: str):
|
| 30 |
with open(path, "r") as f:
|
|
|
|
| 212 |
if not audio_path:
|
| 213 |
return gr.update(value=""), gr.update(value="")
|
| 214 |
tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
|
| 215 |
+
self.results = self.pipeline.run(
|
| 216 |
audio_path,
|
| 217 |
self.svs_model_map[self.current_svs_model]["lang"],
|
| 218 |
self.character_info[self.current_character].prompt,
|
| 219 |
self.current_voice,
|
| 220 |
output_audio_path=tmp_file,
|
| 221 |
)
|
| 222 |
+
formatted_logs = f"ASR: {self.results['asr_text']}\nLLM: {self.results['llm_text']}"
|
| 223 |
return gr.update(value=formatted_logs), gr.update(
|
| 224 |
+
value=self.results["output_audio_path"]
|
| 225 |
)
|
| 226 |
|
| 227 |
def update_metrics(self, audio_path):
|
| 228 |
+
if not audio_path or not self.results:
|
| 229 |
return gr.update(value="")
|
| 230 |
+
results = self.pipeline.evaluate(audio_path, **self.results)
|
| 231 |
+
results.update(self.results.get("metrics", {}))
|
| 232 |
formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
|
| 233 |
return gr.update(value=formatted_metrics)
|