Spaces:
Sleeping
Sleeping
Update requirements and add audiobox aesthetics evaluation functions
Browse files- requirements.txt +4 -1
- svs_eval.py +16 -0
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
git+https://github.com/
|
| 2 |
espnet_model_zoo
|
| 3 |
# pyopenjtalk
|
| 4 |
datasets
|
|
@@ -9,3 +9,6 @@ fastapi
|
|
| 9 |
uvicorn
|
| 10 |
fugashi
|
| 11 |
pykakasi
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
git+https://github.com/espnet/espnet
|
| 2 |
espnet_model_zoo
|
| 3 |
# pyopenjtalk
|
| 4 |
datasets
|
|
|
|
| 9 |
uvicorn
|
| 10 |
fugashi
|
| 11 |
pykakasi
|
| 12 |
+
basic-pitch[onnx]
|
| 13 |
+
audiobox_aesthetics
|
| 14 |
+
git+https://github.com/sea-turt1e/kanjiconv
|
svs_eval.py
CHANGED
|
@@ -18,6 +18,17 @@ def singmos_evaluation(predictor, wav_info, fs):
|
|
| 18 |
return score
|
| 19 |
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def score_extract_warmpup():
|
| 22 |
from basic_pitch.inference import predict
|
| 23 |
|
|
@@ -76,6 +87,7 @@ if __name__ == "__main__":
|
|
| 76 |
# warmup
|
| 77 |
predictor = singmos_warmup()
|
| 78 |
score_extractor = score_extract_warmpup()
|
|
|
|
| 79 |
|
| 80 |
# evaluate the audio
|
| 81 |
metrics = {}
|
|
@@ -87,6 +99,10 @@ if __name__ == "__main__":
|
|
| 87 |
# score metric evaluation
|
| 88 |
score_results = score_metric_evaluation(score_extractor, args.wav_path)
|
| 89 |
metrics.update(score_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# save results
|
| 92 |
with open(args.results_csv, "a") as f:
|
|
|
|
| 18 |
return score
|
| 19 |
|
| 20 |
|
| 21 |
+
def initialize_audiobox_predictor():
|
| 22 |
+
from audiobox_aesthetics.infer import initialize_predictor
|
| 23 |
+
predictor = initialize_predictor()
|
| 24 |
+
return predictor
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def audiobox_aesthetics_evaluation(predictor, audio_path):
|
| 28 |
+
score = predictor.forward([{"path": str(audio_path)}])
|
| 29 |
+
return score
|
| 30 |
+
|
| 31 |
+
|
| 32 |
def score_extract_warmpup():
|
| 33 |
from basic_pitch.inference import predict
|
| 34 |
|
|
|
|
| 87 |
# warmup
|
| 88 |
predictor = singmos_warmup()
|
| 89 |
score_extractor = score_extract_warmpup()
|
| 90 |
+
aesthetic_predictor = initialize_audiobox_predictor()
|
| 91 |
|
| 92 |
# evaluate the audio
|
| 93 |
metrics = {}
|
|
|
|
| 99 |
# score metric evaluation
|
| 100 |
score_results = score_metric_evaluation(score_extractor, args.wav_path)
|
| 101 |
metrics.update(score_results)
|
| 102 |
+
|
| 103 |
+
# audiobox aesthetics evaluation
|
| 104 |
+
score_results = audiobox_aesthetics_evaluation(aesthetic_predictor, args.wav_path)
|
| 105 |
+
metrics.update(score_results[0])
|
| 106 |
|
| 107 |
# save results
|
| 108 |
with open(args.results_csv, "a") as f:
|