Spaces:

jhansss
/

SingingSDS

Sleeping

jhansss commited on May 21

Commit

1ec2d7e

1 Parent(s): 025d5b1

Update requirements and add audiobox aesthetics evaluation functions

Files changed (2) hide show

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-git+https://github.com/South-Twilight/espnet==202402
 espnet_model_zoo
 # pyopenjtalk
 datasets
@@ -9,3 +9,6 @@ fastapi
 uvicorn
 fugashi
 pykakasi

+git+https://github.com/espnet/espnet
 espnet_model_zoo
 # pyopenjtalk
 datasets
 uvicorn
 fugashi
 pykakasi
+basic-pitch[onnx]
+audiobox_aesthetics
+git+https://github.com/sea-turt1e/kanjiconv

svs_eval.py CHANGED Viewed

@@ -18,6 +18,17 @@ def singmos_evaluation(predictor, wav_info, fs):
     return score
 def score_extract_warmpup():
     from basic_pitch.inference import predict
@@ -76,6 +87,7 @@ if __name__ == "__main__":
     # warmup
     predictor = singmos_warmup()
     score_extractor = score_extract_warmpup()
     # evaluate the audio
     metrics = {}
@@ -87,6 +99,10 @@ if __name__ == "__main__":
     # score metric evaluation
     score_results = score_metric_evaluation(score_extractor, args.wav_path)
     metrics.update(score_results)
     # save results
     with open(args.results_csv, "a") as f:

     return score
+def initialize_audiobox_predictor():
+    from audiobox_aesthetics.infer import initialize_predictor
+    predictor = initialize_predictor()
+    return predictor
+def audiobox_aesthetics_evaluation(predictor, audio_path):
+    score = predictor.forward([{"path": str(audio_path)}])
+    return score
 def score_extract_warmpup():
     from basic_pitch.inference import predict
     # warmup
     predictor = singmos_warmup()
     score_extractor = score_extract_warmpup()
+    aesthetic_predictor = initialize_audiobox_predictor()
     # evaluate the audio
     metrics = {}
     # score metric evaluation
     score_results = score_metric_evaluation(score_extractor, args.wav_path)
     metrics.update(score_results)
+    # audiobox aesthetics evaluation
+    score_results = audiobox_aesthetics_evaluation(aesthetic_predictor, args.wav_path)
+    metrics.update(score_results[0])
     # save results
     with open(args.results_csv, "a") as f: