Spaces:

jhansss
/

SingingSDS

Sleeping

lanlanliu commited on Jun 18

Commit

8606727

1 Parent(s): de623fa

resolve copilot's feedback

Files changed (3) hide show

server.py CHANGED Viewed

@@ -144,15 +144,13 @@ def on_click_metrics():
     # OWSM ctc + PER
     y, sr = librosa.load("tmp/response.wav", sr=16000)
     asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
-    # hyp_pinin = lazy_pinyin(asr_result)
-    hyp_pinin = get_pinyin(asr_result)
     with open(f"tmp/llm.txt", "r") as f:
         ref = f.read().replace(' ', '')
-    # ref_pinin = lazy_pinyin(ref)
-    ref_pinin = get_pinyin(ref)
-    per = jiwer.wer(" ".join(ref_pinin), " ".join(hyp_pinin))
     audio = librosa.load(f"tmp/response.wav", sr=sample_rate)[0]
     singmos = singmos_evaluation(

     # OWSM ctc + PER
     y, sr = librosa.load("tmp/response.wav", sr=16000)
     asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
+    hyp_pinyin = get_pinyin(asr_result)
     with open(f"tmp/llm.txt", "r") as f:
         ref = f.read().replace(' ', '')
+    ref_pinyin = get_pinyin(ref)
+    per = jiwer.wer(" ".join(ref_pinyin), " ".join(hyp_pinyin))
     audio = librosa.load(f"tmp/response.wav", sr=sample_rate)[0]
     singmos = singmos_evaluation(

test_performance.py CHANGED Viewed

@@ -118,10 +118,10 @@ def on_click_metrics(audio_path, ref):
     asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
     # Espnet embeded g2p, but sometimes it will mispronunce polyphonic characters
-    hyp_pinin = pypinyin_g2p_phone_without_prosody(asr_result)
-    ref_pinin = pypinyin_g2p_phone_without_prosody(ref)
-    per = jiwer.wer(ref_pinin, hyp_pinin)
     audio = librosa.load(audio_path, sr=22050)[0]
     singmos = singmos_evaluation(

     asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
     # Espnet embeded g2p, but sometimes it will mispronunce polyphonic characters
+    hyp_pinyin = pypinyin_g2p_phone_without_prosody(asr_result)
+    ref_pinyin = pypinyin_g2p_phone_without_prosody(ref)
+    per = jiwer.wer(ref_pinyin, hyp_pinyin)
     audio = librosa.load(audio_path, sr=22050)[0]
     singmos = singmos_evaluation(

util.py CHANGED Viewed

@@ -82,14 +82,13 @@ def is_chinese(char):
     return '\u4e00' <= char <= '\u9fff'
-def is_special(char):
-    return re.match(r'^[-——APSP]+$', char) is not None
 def get_pinyin(texts):
     texts = preprocess_input(texts, seg_syb="")
-    pattern = re.compile(r'[\u4e00-\u9fff]|[^\u4e00-\u9fff]+')
-    blocks = pattern.findall(texts)
     characters = [block for block in blocks if is_chinese(block)]
     chinese_text = ''.join(characters)
@@ -105,6 +104,10 @@ def get_pinyin(texts):
             text_list.append(chinese_pinyin[pinyin_idx])
             pinyin_idx += 1
         else:
-            text_list.append(block)
     return text_list

     return '\u4e00' <= char <= '\u9fff'
+def is_special(block):
+    return any(token in block for token in ['-', 'AP', 'SP'])
 def get_pinyin(texts):
     texts = preprocess_input(texts, seg_syb="")
+    blocks = re.compile(r'[\u4e00-\u9fff]|[^\u4e00-\u9fff]+').findall(texts)
     characters = [block for block in blocks if is_chinese(block)]
     chinese_text = ''.join(characters)
             text_list.append(chinese_pinyin[pinyin_idx])
             pinyin_idx += 1
         else:
+            if is_special(block):
+                specials = re.compile(r"-|AP|SP").findall(block)
+                text_list.extend(specials)
+            else:
+                text_list.append(block)
     return text_list