lanlanliu commited on
Commit
8606727
·
1 Parent(s): de623fa

resolve copilot's feedback

Browse files
Files changed (3) hide show
  1. server.py +3 -5
  2. test_performance.py +3 -3
  3. util.py +8 -5
server.py CHANGED
@@ -144,15 +144,13 @@ def on_click_metrics():
144
  # OWSM ctc + PER
145
  y, sr = librosa.load("tmp/response.wav", sr=16000)
146
  asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
147
- # hyp_pinin = lazy_pinyin(asr_result)
148
- hyp_pinin = get_pinyin(asr_result)
149
 
150
  with open(f"tmp/llm.txt", "r") as f:
151
  ref = f.read().replace(' ', '')
152
 
153
- # ref_pinin = lazy_pinyin(ref)
154
- ref_pinin = get_pinyin(ref)
155
- per = jiwer.wer(" ".join(ref_pinin), " ".join(hyp_pinin))
156
 
157
  audio = librosa.load(f"tmp/response.wav", sr=sample_rate)[0]
158
  singmos = singmos_evaluation(
 
144
  # OWSM ctc + PER
145
  y, sr = librosa.load("tmp/response.wav", sr=16000)
146
  asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
147
+ hyp_pinyin = get_pinyin(asr_result)
 
148
 
149
  with open(f"tmp/llm.txt", "r") as f:
150
  ref = f.read().replace(' ', '')
151
 
152
+ ref_pinyin = get_pinyin(ref)
153
+ per = jiwer.wer(" ".join(ref_pinyin), " ".join(hyp_pinyin))
 
154
 
155
  audio = librosa.load(f"tmp/response.wav", sr=sample_rate)[0]
156
  singmos = singmos_evaluation(
test_performance.py CHANGED
@@ -118,10 +118,10 @@ def on_click_metrics(audio_path, ref):
118
  asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
119
 
120
  # Espnet embeded g2p, but sometimes it will mispronunce polyphonic characters
121
- hyp_pinin = pypinyin_g2p_phone_without_prosody(asr_result)
122
 
123
- ref_pinin = pypinyin_g2p_phone_without_prosody(ref)
124
- per = jiwer.wer(ref_pinin, hyp_pinin)
125
 
126
  audio = librosa.load(audio_path, sr=22050)[0]
127
  singmos = singmos_evaluation(
 
118
  asr_result = asr_pipeline(y, generate_kwargs={"language": "mandarin"} )['text']
119
 
120
  # Espnet embeded g2p, but sometimes it will mispronunce polyphonic characters
121
+ hyp_pinyin = pypinyin_g2p_phone_without_prosody(asr_result)
122
 
123
+ ref_pinyin = pypinyin_g2p_phone_without_prosody(ref)
124
+ per = jiwer.wer(ref_pinyin, hyp_pinyin)
125
 
126
  audio = librosa.load(audio_path, sr=22050)[0]
127
  singmos = singmos_evaluation(
util.py CHANGED
@@ -82,14 +82,13 @@ def is_chinese(char):
82
  return '\u4e00' <= char <= '\u9fff'
83
 
84
 
85
- def is_special(char):
86
- return re.match(r'^[-——APSP]+$', char) is not None
87
 
88
 
89
  def get_pinyin(texts):
90
  texts = preprocess_input(texts, seg_syb="")
91
- pattern = re.compile(r'[\u4e00-\u9fff]|[^\u4e00-\u9fff]+')
92
- blocks = pattern.findall(texts)
93
 
94
  characters = [block for block in blocks if is_chinese(block)]
95
  chinese_text = ''.join(characters)
@@ -105,6 +104,10 @@ def get_pinyin(texts):
105
  text_list.append(chinese_pinyin[pinyin_idx])
106
  pinyin_idx += 1
107
  else:
108
- text_list.append(block)
 
 
 
 
109
 
110
  return text_list
 
82
  return '\u4e00' <= char <= '\u9fff'
83
 
84
 
85
+ def is_special(block):
86
+ return any(token in block for token in ['-', 'AP', 'SP'])
87
 
88
 
89
  def get_pinyin(texts):
90
  texts = preprocess_input(texts, seg_syb="")
91
+ blocks = re.compile(r'[\u4e00-\u9fff]|[^\u4e00-\u9fff]+').findall(texts)
 
92
 
93
  characters = [block for block in blocks if is_chinese(block)]
94
  chinese_text = ''.join(characters)
 
104
  text_list.append(chinese_pinyin[pinyin_idx])
105
  pinyin_idx += 1
106
  else:
107
+ if is_special(block):
108
+ specials = re.compile(r"-|AP|SP").findall(block)
109
+ text_list.extend(specials)
110
+ else:
111
+ text_list.append(block)
112
 
113
  return text_list