littlebird13 commited on
Commit
14be0b9
·
verified ·
1 Parent(s): b736de0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import datetime
3
+ import base64
4
+ import numpy as np
5
+ import dashscope
6
+ import os
7
+
8
+ API_KEY = os.environ['API_KEY']
9
+
10
+ VOICE_OPTIONS = {
11
+ "Cherry / 芊悦": "Cherry",
12
+ "Ethan / 晨煦": "Ethan",
13
+ "Jennifer / 詹妮弗": "Jennifer",
14
+ "Ryan / 甜茶": "Ryan",
15
+ "Katerina / 卡捷琳娜": "Katerina",
16
+ "Nofish / 不吃鱼": "Nofish",
17
+ "Elias / 墨讲师": "Elias",
18
+ "Li / 南京-老李": "Li",
19
+ "Marcus / 陕西-秦川": "Marcus",
20
+ "Roy / 闽南-阿杰": "Roy",
21
+ "Peter / 天津-李彼得": "Peter",
22
+ "Eric / 四川-程川": "Eric",
23
+ "Rocky / 粤语-阿强": "Rocky",
24
+ "Kiki / 粤语-阿清": "Kiki",
25
+ "Sunny / 四川-晴儿": "Sunny",
26
+ "Jada / 上海-阿珍": "Jada",
27
+ "Dylan / 北京-晓东": "Dylan",
28
+ }
29
+ DEFAULT_VOICE = 'Cherry / 芊悦'
30
+
31
+ LANGUAGE_OPTIONS = [
32
+ "Auto / 自动",
33
+ "English / 英文",
34
+ "Chinese / 中文",
35
+ "German / 德语",
36
+ "Italian / 意大利语",
37
+ "Portuguese / 葡萄牙语",
38
+ "Spanish / 西班牙语",
39
+ "Japanese / 日语",
40
+ "Korean / 韩语",
41
+ "French / 法语",
42
+ "Russian / 俄语"
43
+ ]
44
+
45
+ LANGUAGE_MAP = {
46
+ "Auto / 自动": "Auto",
47
+ "English / 英文": "English",
48
+ "Chinese / 中文": "Chinese",
49
+ "German / 德语": "German",
50
+ "Italian / 意大利语": "Italian",
51
+ "Portuguese / 葡萄牙语": "Portuguese",
52
+ "Spanish / 西班牙语": "Spanish",
53
+ "Japanese / 日语": "Japanese",
54
+ "Korean / 韩语": "Korean",
55
+ "French / 法语": "French",
56
+ "Russian / 俄语": "Russian"
57
+ }
58
+
59
+ def tts_interface(text, voice_display, language_display):
60
+ voice_name = VOICE_OPTIONS[voice_display]
61
+
62
+ # 将显示的语言转换为API参数
63
+ language = LANGUAGE_MAP[language_display]
64
+
65
+ print(f"text: {text}, {voice_name}, {language} time: {datetime.datetime.now()}\n")
66
+
67
+ audio_frames = []
68
+
69
+ responses = dashscope.MultiModalConversation.call(
70
+ api_key=API_KEY,
71
+ model="qwen3-tts-flash",
72
+ text=text,
73
+ voice=voice_name,
74
+ stream=True,
75
+ language_type=language
76
+ )
77
+
78
+ for chunk in responses:
79
+ audio_string = ""
80
+ try:
81
+ audio_string = chunk.output.audio.data
82
+ except:
83
+ print(chunk)
84
+ pass
85
+ wav_bytes = base64.b64decode(audio_string)
86
+ audio_np = np.frombuffer(wav_bytes, dtype=np.int16).astype(np.float32) / 32768.0
87
+ audio_frames.append(audio_np)
88
+
89
+ if audio_frames:
90
+ full_audio = np.concatenate(audio_frames)
91
+ else:
92
+ full_audio = None
93
+
94
+ sample_rate = 24000
95
+ return (sample_rate, full_audio)
96
+
97
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"]), css=".gradio-container {max-width: none !important;}") as demo:
98
+ gr.Markdown("# 🎤 Qwen3-TTS Demo")
99
+
100
+ with gr.Row():
101
+ with gr.Column():
102
+ # 输入文本 - 英文在前
103
+ text_input = gr.Textbox(
104
+ label="Input Text / 输入文本",
105
+ placeholder="Enter text to synthesis here... / 在此输入要合成为语音的文本...",
106
+ lines=4,
107
+ max_lines=8
108
+ )
109
+
110
+ # 发音人选择 - 英文在前
111
+ voice_select = gr.Dropdown(
112
+ label="Select Voice / 选择发音人",
113
+ choices=list(VOICE_OPTIONS.keys()),
114
+ value=DEFAULT_VOICE
115
+ )
116
+
117
+ # 语言选择 - 英文在前
118
+ language_select = gr.Dropdown(
119
+ label="Select Text Language / 选择文本语言",
120
+ choices=LANGUAGE_OPTIONS,
121
+ value="Auto / 自动"
122
+ )
123
+
124
+ # 生成按钮 - 英文在前
125
+ generate_btn = gr.Button("Generate Speech / 生成语音", variant="primary")
126
+
127
+ with gr.Column():
128
+ # 音频输出 - 英文在前
129
+ audio_output = gr.Audio(label="Generated Speech / 生成的语音", interactive=False)
130
+
131
+ # 示例文本 - 英文在前
132
+ examples = gr.Examples(
133
+ examples=[
134
+ ["你好,我是通义千问,很高兴认识你。", "Cherry / 芊悦", "Chinese / 中文"],
135
+ ["你好,我是通义千问,很高兴认识你。", "Dylan / 北京-晓东", "Chinese / 中文"],
136
+ ["Hello, this is a text-to-speech demo", "Jennifer / 詹妮弗", "English / 英文"],
137
+ ["こんにちは、これはデモです", "Cherry / 芊悦", "Japanese / 日语"],
138
+ ],
139
+ inputs=[text_input, voice_select, language_select],
140
+ label="Examples / 示例文本"
141
+ )
142
+
143
+ generate_btn.click(
144
+ fn=tts_interface,
145
+ inputs=[text_input, voice_select, language_select],
146
+ outputs=audio_output
147
+ )
148
+
149
+ if __name__ == "__main__":
150
+ demo.launch()