Spaces:

FreedomIntelligence
/

SoundwaveDemo

Running

App Files Files Community

FanBuCUHK commited on 2 days ago

Commit

d164d7c

verified ·

1 Parent(s): c08969e

Upload app.py

Browse files

Files changed (1) hide show

app.py +21 -30

app.py CHANGED Viewed

@@ -1,6 +1,25 @@
 import gradio as gr
 import httpx
 async def call_api(text: str, audio_path: str):
     # 读取音频文件
@@ -20,32 +39,6 @@ async def call_api(text: str, audio_path: str):
     return response.json()["result"]
-def load_examples():
-    return [
-        ["Can you turn my English into German?", "./show_case/common_voice_en_19664034.mp3"],  # En-De
-        ["Can you identify the initial word that connects to 'currency_name' in this audio clip?",
-         "./show_case/audio-1434542201-headset.wav"],  # ER
-        ["What do you think the speaker's message is intended to be in this audio?",
-         "./show_case/audio-1434542201-headset.wav"],  # IC
-        ["What does the person say?", "./show_case/p225_002.wav"],  # DFake
-        ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Fake.wav"],  # DFake
-        [
-            "What emotional weight does the speaker's tone carry?\nPick one answer from A, B, C, and D.\nA: fear\nB: sadness\nC: joy\nD: neutral",
-            "./show_case/SER(emotion)_example.wav"],  # SER(emotion)
-        [
-            "Choose the most suitable answer from options A, B, C, and D to respond the question in next line, you may only choose A or B or C or D.\nThe number of speakers delivering this speech is what?\nA. 4\nB. 2\nC.1\nD. 3",
-            "./show_case/SNV_example.wav"],  # SNV
-        ["Identify the language of the conversation you just heard.", "./show_case/SLR_example.wav"],  # SLR
-        ["tell the gender of the speaker in this audio.", "./show_case/SGR_018.wav"],  # SGR
-        ["What's the sound we're hearing in this audio from?", "./show_case/Sound_Vocal_example.wav"],  # Sound_vocal
-        ["What is your best guess at the setting of this sound clip?", "./show_case/Scene_example.wav"],  # Sound_cochl
-        [
-            "Choose the most suitable answer from options A, B, C, and D to respond the question in next line, Please think step by step and you may only choose A or B or C or D.\nRecognize the segment where 'project' is spoken by the speaker.\nA. [5.28, 5.39]\nB. [0.92, 1.39]\nC. [4.75, 5.28]\nD. [3.86, 4.23]",
-            "./show_case/SG_audio_1.wav"],  # SG
-        ["What type of business does the first person's son have?", "./show_case/SFT_Fisher_example.wav"]  # SFT_Fisher
-    ]
 iface = gr.Interface(
     fn=call_api,
     inputs=[
@@ -53,13 +46,11 @@ iface = gr.Interface(
         gr.Audio(type="filepath", label="Upload Audio", value="./show_case/p225_002.wav")
     ],
     outputs=gr.Textbox(label="Model output"),
-    examples=[],  # Initially no examples shown
     allow_flagging="never"
 )
-# Add a button to load examples
-iface.add_button("Show Example", load_examples)
 iface.launch()
 if __name__ == '__main__':
     pass

+# frontend.py
 import gradio as gr
 import httpx
+examples = [
+        ["Can you turn my English into German?", "./show_case/common_voice_en_19664034.mp3"],  # En-De
+        ["Can you identify the initial word that connects to 'currency_name' in this audio clip?", "./show_case/audio-1434542201-headset.wav"],  # ER
+        ["What do you think the speaker's message is intended to be in this audio?", "./show_case/audio-1434542201-headset.wav"],  # IC
+        ["What does the person say?", "./show_case/p225_002.wav"],  # DFake
+        # ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Real.wav"],  # DFake
+        ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Fake.wav"],  # DFake
+        ["What emotional weight does the speaker's tone carry?\nPick one answer from A, B, C, and D.\nA: fear\nB: sadness\nC: joy\nD: neutral", "./show_case/SER(emotion)_example.wav"],  #SER(emotion)
+        # ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/SVD_14154_file31512.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav"],  # SVD
+        ["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, you may only choose A or B or C or D.\nThe number of speakers delivering this speech is what?\nA. 4\nB. 2\nC.1\nD. 3", "./show_case/SNV_example.wav"],  #SNV
+        ["Identify the language of the conversation you just heard.","./show_case/SLR_example.wav"], #SLR
+        ["tell the gender of the speaker in this audio.","./show_case/SGR_018.wav"], #SGR
+        ["What's the sound we're hearing in this audio from?","./show_case/Sound_Vocal_example.wav"], #Sound_vocal
+        ["What is your best guess at the setting of this sound clip?","./show_case/Scene_example.wav"], #Sound_cochl
+        ["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, Please think step by step and you may only choose A or B or C or D.\nRecognize the segment where 'project' is spoken by the speaker.\nA. [5.28, 5.39]\nB. [0.92, 1.39]\nC. [4.75, 5.28]\nD. [3.86, 4.23]","./show_case/SG_audio_1.wav"], #SG
+        ["What type of business does the first person's son have?","./show_case/SFT_Fisher_example.wav"] #SFT_Fisher
+    ]
 async def call_api(text: str, audio_path: str):
     # 读取音频文件
     return response.json()["result"]
 iface = gr.Interface(
     fn=call_api,
     inputs=[
         gr.Audio(type="filepath", label="Upload Audio", value="./show_case/p225_002.wav")
     ],
     outputs=gr.Textbox(label="Model output"),
+    examples=examples,
     allow_flagging="never"
 )
 iface.launch()
 if __name__ == '__main__':
+    # curl -X POST -F "text=What does the person say?" -F "audio_file=@./test_audio.wav" http://36.151.70.8:30113/process/
     pass