FanBuCUHK commited on
Commit
d164d7c
·
verified ·
1 Parent(s): c08969e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -30
app.py CHANGED
@@ -1,6 +1,25 @@
 
1
  import gradio as gr
2
  import httpx
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  async def call_api(text: str, audio_path: str):
6
  # 读取音频文件
@@ -20,32 +39,6 @@ async def call_api(text: str, audio_path: str):
20
  return response.json()["result"]
21
 
22
 
23
- def load_examples():
24
- return [
25
- ["Can you turn my English into German?", "./show_case/common_voice_en_19664034.mp3"], # En-De
26
- ["Can you identify the initial word that connects to 'currency_name' in this audio clip?",
27
- "./show_case/audio-1434542201-headset.wav"], # ER
28
- ["What do you think the speaker's message is intended to be in this audio?",
29
- "./show_case/audio-1434542201-headset.wav"], # IC
30
- ["What does the person say?", "./show_case/p225_002.wav"], # DFake
31
- ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Fake.wav"], # DFake
32
- [
33
- "What emotional weight does the speaker's tone carry?\nPick one answer from A, B, C, and D.\nA: fear\nB: sadness\nC: joy\nD: neutral",
34
- "./show_case/SER(emotion)_example.wav"], # SER(emotion)
35
- [
36
- "Choose the most suitable answer from options A, B, C, and D to respond the question in next line, you may only choose A or B or C or D.\nThe number of speakers delivering this speech is what?\nA. 4\nB. 2\nC.1\nD. 3",
37
- "./show_case/SNV_example.wav"], # SNV
38
- ["Identify the language of the conversation you just heard.", "./show_case/SLR_example.wav"], # SLR
39
- ["tell the gender of the speaker in this audio.", "./show_case/SGR_018.wav"], # SGR
40
- ["What's the sound we're hearing in this audio from?", "./show_case/Sound_Vocal_example.wav"], # Sound_vocal
41
- ["What is your best guess at the setting of this sound clip?", "./show_case/Scene_example.wav"], # Sound_cochl
42
- [
43
- "Choose the most suitable answer from options A, B, C, and D to respond the question in next line, Please think step by step and you may only choose A or B or C or D.\nRecognize the segment where 'project' is spoken by the speaker.\nA. [5.28, 5.39]\nB. [0.92, 1.39]\nC. [4.75, 5.28]\nD. [3.86, 4.23]",
44
- "./show_case/SG_audio_1.wav"], # SG
45
- ["What type of business does the first person's son have?", "./show_case/SFT_Fisher_example.wav"] # SFT_Fisher
46
- ]
47
-
48
-
49
  iface = gr.Interface(
50
  fn=call_api,
51
  inputs=[
@@ -53,13 +46,11 @@ iface = gr.Interface(
53
  gr.Audio(type="filepath", label="Upload Audio", value="./show_case/p225_002.wav")
54
  ],
55
  outputs=gr.Textbox(label="Model output"),
56
- examples=[], # Initially no examples shown
57
  allow_flagging="never"
58
  )
59
 
60
- # Add a button to load examples
61
- iface.add_button("Show Example", load_examples)
62
  iface.launch()
63
-
64
  if __name__ == '__main__':
 
65
  pass
 
1
+ # frontend.py
2
  import gradio as gr
3
  import httpx
4
 
5
+ examples = [
6
+ ["Can you turn my English into German?", "./show_case/common_voice_en_19664034.mp3"], # En-De
7
+ ["Can you identify the initial word that connects to 'currency_name' in this audio clip?", "./show_case/audio-1434542201-headset.wav"], # ER
8
+ ["What do you think the speaker's message is intended to be in this audio?", "./show_case/audio-1434542201-headset.wav"], # IC
9
+ ["What does the person say?", "./show_case/p225_002.wav"], # DFake
10
+ # ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Real.wav"], # DFake
11
+ ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Fake.wav"], # DFake
12
+ ["What emotional weight does the speaker's tone carry?\nPick one answer from A, B, C, and D.\nA: fear\nB: sadness\nC: joy\nD: neutral", "./show_case/SER(emotion)_example.wav"], #SER(emotion)
13
+ # ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/SVD_14154_file31512.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav"], # SVD
14
+ ["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, you may only choose A or B or C or D.\nThe number of speakers delivering this speech is what?\nA. 4\nB. 2\nC.1\nD. 3", "./show_case/SNV_example.wav"], #SNV
15
+ ["Identify the language of the conversation you just heard.","./show_case/SLR_example.wav"], #SLR
16
+ ["tell the gender of the speaker in this audio.","./show_case/SGR_018.wav"], #SGR
17
+ ["What's the sound we're hearing in this audio from?","./show_case/Sound_Vocal_example.wav"], #Sound_vocal
18
+ ["What is your best guess at the setting of this sound clip?","./show_case/Scene_example.wav"], #Sound_cochl
19
+ ["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, Please think step by step and you may only choose A or B or C or D.\nRecognize the segment where 'project' is spoken by the speaker.\nA. [5.28, 5.39]\nB. [0.92, 1.39]\nC. [4.75, 5.28]\nD. [3.86, 4.23]","./show_case/SG_audio_1.wav"], #SG
20
+ ["What type of business does the first person's son have?","./show_case/SFT_Fisher_example.wav"] #SFT_Fisher
21
+ ]
22
+
23
 
24
  async def call_api(text: str, audio_path: str):
25
  # 读取音频文件
 
39
  return response.json()["result"]
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  iface = gr.Interface(
43
  fn=call_api,
44
  inputs=[
 
46
  gr.Audio(type="filepath", label="Upload Audio", value="./show_case/p225_002.wav")
47
  ],
48
  outputs=gr.Textbox(label="Model output"),
49
+ examples=examples,
50
  allow_flagging="never"
51
  )
52
 
 
 
53
  iface.launch()
 
54
  if __name__ == '__main__':
55
+ # curl -X POST -F "text=What does the person say?" -F "audio_file=@./test_audio.wav" http://36.151.70.8:30113/process/
56
  pass