Mira1sen commited on
Commit
e569c5f
·
verified ·
1 Parent(s): 9a7fea3

Upload folder using huggingface_hub

Browse files
Files changed (20) hide show
  1. 1.wav +0 -0
  2. 11LabsTTS.py +168 -0
  3. README.md +2 -8
  4. api.json +12 -0
  5. ensemble_app.py +439 -0
  6. gradio_demo.py +13 -0
  7. guge_api.py +32 -0
  8. install.sh +17 -0
  9. microsoftTTS.py +31 -0
  10. output.mp3 +0 -0
  11. test.mp3 +0 -0
  12. test.py +357 -0
  13. test2.mp3 +1 -0
  14. test3.mp3 +0 -0
  15. test4.mp3 +17 -0
  16. test9.mp3 +17 -0
  17. test99.mp3 +0 -0
  18. try.py +33 -0
  19. try2.py +0 -0
  20. tts_gradio.py +255 -0
1.wav ADDED
Binary file (46.1 kB). View file
 
11LabsTTS.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ import requests
3
+ import gradio as gr
4
+
5
+ CHUNK_SIZE = 1024
6
+
7
+ headers1 = {
8
+ "Accept": "application/json",
9
+ "xi-api-key": "54f884da3108725f26af02d5907d1eb4"
10
+ }
11
+
12
+ headers2 = {
13
+ "Accept": "audio/mpeg",
14
+ "Content-Type": "application/json",
15
+ "xi-api-key": "54f884da3108725f26af02d5907d1eb4"
16
+ }
17
+
18
+ name_list = []
19
+
20
+ def elevenlabs_tts(text, voice_name):
21
+ # 获取声音列表
22
+ url1 = "https://api.elevenlabs.io/v1/voices"
23
+ response1 = requests.get(url1, headers=headers1)
24
+ voices = response1.json()['voices']
25
+
26
+ for voice in voices:
27
+ vid = voice['voice_id']
28
+ vname = voice['name']
29
+ label = voice['labels']
30
+ info = {"voice_id": vid, "name": vname, "labels": label}
31
+
32
+ name_list.append(info["name"])
33
+
34
+ if info['name'] == voice_name:
35
+ voice_id = info['voice_id']
36
+ url2 = "https://api.elevenlabs.io/v1/text-to-speech/" + voice_id
37
+ # print(infos)
38
+ # return infos
39
+
40
+ # 根据指定的人名合成语音
41
+ data = {
42
+ "text": text,
43
+ "model_id": "eleven_monolingual_v1",
44
+ "voice_settings": {
45
+ "stability": 0.5,
46
+ "similarity_boost": 0.5
47
+ }
48
+ }
49
+
50
+ response2 = requests.post(url2, json=data, headers=headers2)
51
+ with open('output.mp3', 'wb') as f:
52
+ for chunk in response2.iter_content(chunk_size=CHUNK_SIZE):
53
+ if chunk:
54
+ f.write(chunk)
55
+
56
+ return 'output.mp3'
57
+
58
+ demo = gr.Interface(
59
+ fn = elevenlabs_tts,
60
+
61
+ # demo输入设置
62
+ inputs = [
63
+ gr.Dropdown(name_list, label="发音人"),
64
+ gr.Textbox(label="输入文本"),
65
+ ],
66
+ # demo输出设置
67
+ outputs = [
68
+ "audio",
69
+ "text",
70
+ ],
71
+ # demo其他设置
72
+ title = "Text to Speech Synthesis",
73
+ description = "Synthesize speech from text using Elevenlabs",
74
+ examples = [
75
+ ["Rachel", "Hello World!"],
76
+ ["Clyde", "This is a test."],
77
+ ["Domi", "Gradio is awesome!"],
78
+ ]
79
+ )
80
+
81
+ if __name__ == "__main__":
82
+ demo.launch(share=True, server_name='0.0.0.0', server_port=8121)
83
+ #print(name_list)
84
+
85
+ """
86
+
87
+ import requests
88
+ import gradio as gr
89
+
90
+ CHUNK_SIZE = 1024
91
+
92
+ headers1 = {
93
+ "Accept": "application/json",
94
+ "xi-api-key": "54f884da3108725f26af02d5907d1eb4"
95
+ }
96
+
97
+ headers2 = {
98
+ "Accept": "audio/mpeg",
99
+ "Content-Type": "application/json",
100
+ "xi-api-key": "54f884da3108725f26af02d5907d1eb4"
101
+ }
102
+
103
+ def get_voice_names():
104
+ url1 = "https://api.elevenlabs.io/v1/voices"
105
+ response1 = requests.get(url1, headers=headers1)
106
+ voices = response1.json()['voices']
107
+ names = [voice['name'] for voice in voices]
108
+ return names
109
+
110
+ name_list = get_voice_names()
111
+
112
+ def elevenlabs_tts(voice_name, text):
113
+ # 获取声音列表
114
+ url1 = "https://api.elevenlabs.io/v1/voices"
115
+ response1 = requests.get(url1, headers=headers1)
116
+ voices = response1.json()['voices']
117
+ #print(voice_name)
118
+ #print(voices)
119
+
120
+ for voice in voices:
121
+ if voice['name'] == voice_name:
122
+ voice_id = voice['voice_id']
123
+ label = voice['labels']
124
+ url2 = "https://api.elevenlabs.io/v1/text-to-speech/" + voice_id
125
+ #print(voice_id)
126
+ #print(label)
127
+ break
128
+
129
+ data = {
130
+ "text": text,
131
+ "model_id": "eleven_monolingual_v1",
132
+ "voice_settings": {
133
+ "stability": 0.5,
134
+ "similarity_boost": 0.5
135
+ }
136
+ }
137
+
138
+ response2 = requests.post(url2, json=data, headers=headers2)
139
+ with open('output.mp3', 'wb') as f:
140
+ for chunk in response2.iter_content(chunk_size=CHUNK_SIZE):
141
+ if chunk:
142
+ f.write(chunk)
143
+
144
+ return 'output.mp3', label
145
+
146
+ demo = gr.Interface(
147
+ fn = elevenlabs_tts,
148
+ inputs = [
149
+ gr.Dropdown(name_list, label="发音人"),
150
+ gr.Textbox(label="输入文本"),
151
+ ],
152
+ outputs = [
153
+ "audio",
154
+ "text",
155
+ ],
156
+ title = "Text to Speech Synthesis",
157
+ description = "Synthesize speech from text using Elevenlabs",
158
+ examples = [
159
+ ["Rachel", "Hello World!"],
160
+ ["Clyde", "This is a test."],
161
+ ["Domi", "Gradio is awesome!"],
162
+ ]
163
+ )
164
+
165
+ if __name__ == "__main__":
166
+ demo.launch(share=True, server_name='0.0.0.0', server_port=8121)
167
+
168
+
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Gradio
3
- emoji: 🦀
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: gradio
3
+ app_file: gradio_demo.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.36.1
 
 
6
  ---
 
 
api.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "gravity-362603",
4
+ "private_key_id": "aa2ae4651df33c1aab0670e07e265992f943c292",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQC9VMCci15hlqhR\nv8ev+K1HNq5c309RsE0yxhRec1QsB1RYShIU0vkukbOXn5pqF0lRHasvtjpAuEVX\nXsUWrParLUdJJBsttAD9WwLBfek1cVfvTktHLyttM92V1R6ueWI0rHa0TQpVc/m0\nDjhuwCDDShRxMOdkmJ98ZT7Rpl9udw3O5tEyYkqkN8AFAtCwKsglxUFgEgHg60qr\nyAtdHm+K/wi4d1xlzvsmsr11227PFz5PYVAdpNMO5/JOopxTUp19KD8xo/pW1p1f\neDsJZc8dvwcwxei0qQ9FiFXm9rhSVkccMjsJf9PSOIRj9Q0q3Pe5HcaKB8kGRxBJ\nYQY+dM0hAgMBAAECggEAIskUjm6Iy7NLf6GVxb4R0Bi61nNs9lxHAsIj8eqMl35N\nP990DQFqKOU6MuxvCXj3zoB9+Ekp0MRKcTAHVBA5TeAGzehlWfNNjz8OdZB1rY0C\nlAQ1joGbH/g8iQ/cEEBbC8R4Fs5kzKh/Q1athaOlIVxV/yn2CaSFAOz1Kr2/oOLL\ntujyykgL69nkT4Namojh69GNIztLEQBjnrjSLXBJXYRov0FEVzA0DuI9ubV+upmS\nHEzgpHsJQCDVD/OryPH1KVN3Y1DCTdfSU3SUzOTiPv80v/H0GsZvfi/8M7rvNwgs\nZ4t+5Vs3zhOQXujT66dL7rzum2zyRcez1c6qU707tQKBgQDtC8e/au+jCKpjszuQ\nrMBhXl5BGowD5ODvLa1bB3r0V6C6uyryzXOTSnWcBiWqbv53BwQk+4LLYO/8g4MV\n3zD3EZK0L9l+UIOJbX2aHNf3fxU05zeuuREa35M93MgG+J5jAjhV3piWik8RcGtq\nQpbdNVVsVfATucGA9tH+r9YF4wKBgQDMeEUGsofjzH+PGkYWv6sxwtevBJAVqaVF\nVirT614ngGEgb1ct0lg7jRmiMTLrYwDNc3Wpel8jkfH7WNpDcrZjOv14qeVomdQa\n/3XUMdlxOxcFkJ14J6Jd4W3o/vQvPFlbUO/qH93WtotyLQW3pcQKGc0LK33UaLw1\nQFlSeJ/wKwKBgFgq4YjZBXjmaDndGHKfTo7owrYEYb+xCdjsbGZHSfxH+4h4xWi5\n8hr+vu/heXEDB7LN8USwJ93tmFWbtM4VzeiXqLFMeuD4oXQkPWts/wcXRZP/zD44\np5wPPHzt+ZheMMsysDGAMdEMcIxT/B/x1JtCkxtQe5NarBaDt2e7jydxAoGAMKh+\nZLWfuQo1YOyzuT4rk0/22+OOFlmkxvdWgdAFPAlFE97rNDBWZvWRIBPtNi79Recw\nHQDOGzzkBRiD+IEX/k1PfKORwG67FyGr/K9QO64AMIbVsn5cGVNIQKZLneISsbR1\nI5YpyKrwTF+CeM2t9Wlmbj4PV7kE8Pc7SnECPrkCgYBjzbcl+vTvfw6iy6z7iL+N\nQ+sgsUMoUymZF5xSH/iInzFDlrIZP+vJlC6eW/0Q2TZZqQU9eLUTTP9cB4XXEv45\nbvK1IlMSBeVHA3tSbpp/tNqb/YqDTyEV9AY2MMIAKyPReTUKZoRQMyOzzK03FYOk\nMbI0XZMpHoo60OECydP7Ng==\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "[email protected]",
7
+ "client_id": "112160328750047841519",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gcp-tts-gravity%40gravity-362603.iam.gserviceaccount.com"
12
+ }
ensemble_app.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding=utf8 -*-
3
+ ########################################################################
4
+ #
5
+ # Copyright (c) 2023 Baidu.com, Inc. All Rights Reserved
6
+ #
7
+ ########################################################################
8
+
9
+ """
10
+ Author: linxiaolong
11
+ """
12
+ import warnings
13
+ warnings.filterwarnings("ignore")
14
+
15
+ # 外部库
16
+ import re
17
+ import requests
18
+ import argparse
19
+ import json
20
+ import os
21
+ import re
22
+ import tempfile
23
+
24
+ import librosa
25
+ import numpy as np
26
+ # import torch
27
+ # from torch import no_grad, LongTensor
28
+ import commons
29
+ import gradio as gr
30
+ import gradio.utils as gr_utils
31
+ import gradio.processing_utils as gr_processing_utils
32
+
33
+ # 内部库
34
+ from models import SynthesizerTrn
35
+ from text import text_to_sequence, text_to_sequence_for_test, _clean_text
36
+ from mel_processing import spectrogram_torch
37
+ import utils
38
+ from text.symbols import symbols
39
+
40
+ limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
41
+ punct_regex = re.compile(r"[\.!\?。!?]")
42
+ silence_duration = 200
43
+
44
+
45
+ def split_text(text, regex):
46
+ """Split text into sentences by puncutations.
47
+
48
+ Args:
49
+ text: long text.
50
+ regex: puncutation regex.
51
+
52
+ Returns:
53
+ list of sentences.
54
+ """
55
+ sentences = re.split(regex, text)
56
+ puncts = re.findall(regex, text)
57
+
58
+ for i, sentence in enumerate(sentences):
59
+ if sentence == "":
60
+ continue
61
+ if i < len(puncts):
62
+ sentences[i] = sentences[i] + puncts[i]
63
+ else:
64
+ sentences[i] = sentences[i] + "。"
65
+ sentences = [i for i in sentences if i != ""]
66
+ return sentences
67
+
68
+
69
+ def concat_audio(audio_list, sampling_rate=22050, silence_duration=1000):
70
+ """Concatenate audio files and insert silence between them.
71
+
72
+ Args:
73
+ audio_list: list of audio files.
74
+ sampling_rate: audio sampling rate. Defaults to 22050.
75
+ silence_duration: silence duration in miliseconds. Defaults to 1000.
76
+
77
+ Returns:
78
+ concatenated audio.
79
+ """
80
+ silence_samples = int(sampling_rate * silence_duration / 1000)
81
+ silence = np.zeros(silence_samples, dtype=np.float16)
82
+
83
+ audio_num = len(audio_list)
84
+ if audio_num < 2:
85
+ return audio_list[0]
86
+ audio_cat = audio_list[0]
87
+ for i in range(1, audio_num):
88
+ audio_cat = np.concatenate((audio_cat, silence, audio_list[i]), axis=0)
89
+
90
+ return audio_cat
91
+
92
+
93
+ ### 外部TTS的超参数
94
+ microsoft_url = "https://japaneast.tts.speech.microsoft.com/cognitiveservices/v1"
95
+ microsoft_headers = {'Content-Type': 'application/json; charset=utf-8',
96
+ 'Ocp-Apim-Subscription-Key':'1f1ef0ce53b84261be94fab81df7e628'}
97
+ microsoft_model_list = [
98
+ "ja-JP-NanamiNeural",
99
+ "ja-JP-KeitaNeural",
100
+ "ja-JP-AoiNeural",
101
+ "ja-JP-DaichiNeural",
102
+ "ja-JP-MayuNeural",
103
+ "ja-JP-NaokiNeural",
104
+ "ja-JP-ShioriNeural"
105
+ ]
106
+
107
+ google_url = "http://gbu.jp02-a30-apisix-sandbox.baidu-int.com/gbu/rest/v2/tts/voice_gq"
108
+ google_headers = {'Content-Type': 'application/json; charset=utf-8',
109
+ 'apikey':'synclub-2383kjhjksxfv.2341gs'}
110
+ google_model_list = [
111
+ "ja-JP-Neural2-B",
112
+ "ja-JP-Neural2-C",
113
+ "ja-JP-Neural2-D",
114
+ "ja-JP-Standard-A",
115
+ "ja-JP-Standard-B",
116
+ "ja-JP-Standard-C",
117
+ "ja-JP-Standard-D",
118
+ "ja-JP-Wavenet-A",
119
+ "ja-JP-Wavenet-B",
120
+ "ja-JP-Wavenet-C",
121
+ "ja-JP-Wavenet-D"
122
+ ]
123
+
124
+ coefont_url = "http://gbu.jp02-a30-apisix-sandbox.baidu-int.com/gbu/rest/v2/tts/avatar_coe"
125
+ coefont_headers = {'Content-Type': 'application/json; charset=utf-8',
126
+ 'apikey':'synclub-2383kjhjksxfv.2341gs'}
127
+ coefont_id = [
128
+ '3f84b7b1-30fb-4677-a704-fd136515303e',
129
+ '9b826785-bea5-4740-b4cd-e9a286264705',
130
+ '7632cba3-4aca-4cee-9d15-ad1ac31f670c',
131
+ '2c91238a-96f9-4cb6-a69a-461ee66b0e6d',
132
+ '08428dee-65b6-490e-a3a3-60dfcdda889d',
133
+ 'c88367bc-5954-426b-a1ba-a683202803c8',
134
+ 'fb64a764-91d5-4510-bddd-70df3d62709a',
135
+ '5cfa1f33-bca8-4489-bcbe-701045993162',
136
+ '94cf7792-7c0c-4be4-88e7-c30d26ab6616',
137
+ '81dbd387-6ad6-4b22-93f9-4e2a0091b2fe',
138
+ '931a8568-039a-4cef-add7-bee71629c00e',
139
+ 'f91a9d29-c8b4-443f-ba07-82e7e36bd20b',
140
+ '23c76cf0-bee0-47fa-b735-9b7bdba9f26a',
141
+ 'cf5fdfb8-85ea-41e1-915b-257936791f17',
142
+ '0f7b53df-3c24-46a5-84d1-cbea39a956c0',
143
+ '3d499385-d331-4cbb-93c0-2057e60eddcf',
144
+ '18ca2f7b-97ca-486d-8f47-858965833642',
145
+ '33e0a2ff-5050-434c-9506-defe97e52f15',
146
+ '516b0f32-8b5f-48c5-b60e-38d508e2b06b',
147
+ 'c8720caf-2d2d-4130-8831-92f61f9e25e8',
148
+ '710001f5-e6f5-4cc0-8ba2-e6aa6da8d807',
149
+ 'd36f8bb1-8bd8-4e90-964a-9dbd3e374093',
150
+ '2157796c-fe48-4688-b7cc-7ea554edf77d',
151
+ '5cc0dc91-0c6a-4c50-b7d8-f3117cfe44ef',
152
+ 'be5c5295-aba2-4055-a9da-8926da7fb5a0',
153
+ '76763239-af14-4c0d-9435-956f096f77dc',
154
+ '10d298ee-ebbf-4838-a6c5-d608f2e3c338',
155
+ '694cb06e-73bd-43c4-94d4-f775ad3dbb26',
156
+ '5cf07e7c-5b1c-4360-a8de-7c928580d4b5',
157
+ '76e2ba06-b23a-4bbe-8148-e30ede9001b9',
158
+ 'c25ed97f-78f7-4e8f-b2fa-f8e29633588b',
159
+ 'e26382ba-2ae2-4cf7-8c1b-420ab4b845d8',
160
+ '82c4fcf5-d0ee-4fe9-9b0d-89a65d04f290'
161
+ ]
162
+ coefont_model_list = [
163
+ 'Canel',
164
+ '胡麻ちゃん',
165
+ 'バーチャル悪霊',
166
+ '引寄\u3000法則',
167
+ 'にっし~☆',
168
+ '志水 智(Tomo Shimizu)',
169
+ '花撫シア-最高精度-しっかり読み上げ',
170
+ 'UNF/UserNotFound',
171
+ 'RoBaKu',
172
+ 'おにもち',
173
+ '小菅 将太',
174
+ '秋月つむぎ(落ち着いたナレーション)',
175
+ '碧海紘斗_OhmiHiroto',
176
+ 'ちくわぶえ',
177
+ 'unnamed',
178
+ '今井瑶子(高精度。MC ナレーター 落ち着いたトーンです)',
179
+ '皆のお母さん',
180
+ '後藤邑子',
181
+ '田中和彦',
182
+ 'KTNR',
183
+ '天渡\u3000早苗',
184
+ '須戸ゼロ',
185
+ 'とり藻々',
186
+ '武田 祐子',
187
+ '【PRO】落ち着きナレーション♯畑耕平',
188
+ '音暖ののん Ver2.0(最高精度)',
189
+ 'ろさちゃん-soft-v2[最高精度] ¦ Losa-chan -soft- ∀ -汎用式概念χ',
190
+ 'パイナップル秀夫お姉さん',
191
+ 'minamo',
192
+ 'あさのゆき',
193
+ '聲華 琴音【紡】',
194
+ '黄琴海月【うるとら】',
195
+ '高橋 俊輔']
196
+ coefont_id_model_name_dict = dict(zip(coefont_model_list, coefont_id))
197
+
198
+ all_example = "今日は天気がいいから、一緒にハイキングに行きましょう。"
199
+
200
+ # def audio_postprocess(self, y):
201
+ # """
202
+ # 修改gr的音频后处理函数
203
+ # :param self:
204
+ # :param y:
205
+ # :return:
206
+ # """
207
+ # if y is None:
208
+ # return None
209
+
210
+ # if gr_utils.validate_url(y):
211
+ # file = gr_processing_utils.download_to_file(y, dir=self.temp_dir)
212
+ # elif isinstance(y, tuple):
213
+ # sample_rate, data = y
214
+ # file = tempfile.NamedTemporaryFile(
215
+ # suffix=".wav", dir=self.temp_dir, delete=False
216
+ # )
217
+ # gr_processing_utils.audio_to_file(sample_rate, data, file.name)
218
+ # else:
219
+ # file = gr_processing_utils.create_tmp_copy_of_file(y, dir=self.temp_dir)
220
+
221
+ # return gr_processing_utils.encode_url_or_file_to_base64(file.name)
222
+
223
+ # gr.Audio.postprocess = audio_postprocess
224
+
225
+ def get_text(text, hps):
226
+ """
227
+ :param text:
228
+ :param hps:
229
+ :param is_symbol:
230
+ :return:
231
+ """
232
+ # hps中没有包括symbols
233
+ text_norm = text_to_sequence(text, hps.data.text_cleaners)
234
+ # hps中有包括symbols
235
+ # text_norm = text_to_sequence_for_test(text, hps.symbols, hps.data.text_cleaners)
236
+ if hps.data.add_blank:
237
+ text_norm = commons.intersperse(text_norm, 0)
238
+ text_norm = LongTensor(text_norm)
239
+ return text_norm
240
+
241
+
242
+ def create_tts_fn(model, hps):
243
+ """
244
+ :param model:
245
+ :param hps:
246
+ :param speaker_ids:
247
+ :return:
248
+ """
249
+ def tts_fn(text, speed, noise_scale=.667, noise_scale_w=0.8, volume=1.0):
250
+ """
251
+ :param text:
252
+ :param speaker:
253
+ :param speed:
254
+ :param emo:
255
+ :param volume:
256
+ :param is_symbol:
257
+ :return:
258
+ """
259
+ sentences = split_text(text, punct_regex)
260
+ audio_list = []
261
+ for sentence in sentences:
262
+ stn_tst = get_text(sentence, hps)
263
+ with no_grad():
264
+ x_tst = stn_tst.unsqueeze(0).to(device)
265
+ x_tst_lengths = LongTensor([stn_tst.size(0)]).to(device)
266
+ audio = model.infer(x_tst, x_tst_lengths, noise_scale=noise_scale, noise_scale_w=noise_scale_w,
267
+ length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
268
+ audio_list.append(audio)
269
+ del stn_tst, x_tst, x_tst_lengths
270
+ audio = concat_audio(audio_list, hps.data.sampling_rate, silence_duration)
271
+ audio = audio * volume
272
+ return "Success", (hps.data.sampling_rate, audio)
273
+ return tts_fn
274
+
275
+
276
+ def microsoft(text, name, style="Neural"):
277
+ """
278
+ :param text:
279
+ :param name:
280
+ :param style:
281
+ :return:
282
+ """
283
+ headers = {
284
+ 'Ocp-Apim-Subscription-Key': '1f1ef0ce53b84261be94fab81df7e628',
285
+ 'Content-Type': 'application/ssml+xml',
286
+ 'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
287
+ 'User-Agent': 'curl',
288
+ }
289
+
290
+ data = ("<speak version='1.0' xml:lang='en-US'>"
291
+ f"<voice xml:lang='en-US' name='{name}'>" # xml:gender='Female'
292
+ f"{text}"
293
+ "</voice>"
294
+ "</speak>")
295
+
296
+ response = requests.post(
297
+ 'https://japaneast.tts.speech.microsoft.com/cognitiveservices/v1',
298
+ headers=headers,
299
+ data=data,
300
+ proxies= {
301
+ 'http': 'http://192.168.3.11:80',
302
+ 'https': 'http://192.168.3.11:80',
303
+ }
304
+ )
305
+ data = {
306
+ "text":text,
307
+ "name":name,
308
+ "style":style,
309
+ "format":"mp3"}
310
+ audio_url = requests.get(microsoft_url, headers=microsoft_headers, json=data).json()['data']['url']
311
+ return "Success", audio_url
312
+
313
+
314
+ def google(text, name):
315
+ """
316
+ :param text:
317
+ :param name:
318
+ :param style:
319
+ :return:
320
+ """
321
+ data = {
322
+ "text":text,
323
+ "name":name,
324
+ "sample_rate":16000}
325
+ audio_url = requests.get(google_url, headers=google_headers, json=data).json()['data']['url']
326
+ return "Success", audio_url
327
+
328
+
329
+ def coefont(text, name):
330
+ """
331
+ :param text:
332
+ :param name:
333
+ :param style:
334
+ :return:
335
+ """
336
+ data = {
337
+ "text":text,
338
+ "coefont":coefont_id_model_name_dict[name]
339
+ }
340
+ audio_url = requests.get(coefont_url, headers=coefont_headers, json=data).json()['data']['url']
341
+ return "Success", audio_url
342
+
343
+
344
+ if __name__ == '__main__':
345
+ parser = argparse.ArgumentParser()
346
+ parser.add_argument('--device', type=str, default='cuda')
347
+ parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
348
+ parser.add_argument("--port", type=int, default=8080, help="port")
349
+ parser.add_argument('--model_info_path', type=str, default='/gluster/speech_data/info.json')
350
+ args = parser.parse_args()
351
+
352
+ device = torch.device(args.device)
353
+ models_tts = []
354
+
355
+ with open(args.model_info_path, "r", encoding="utf-8") as f:
356
+ models_info = json.load(f)
357
+ for i, info in models_info.items():
358
+ model_name = info["model_name"]
359
+ author = info["author"]
360
+ lang = info["lang"]
361
+ example = info["example"]
362
+ config_path = info["config_path"]
363
+ model_path = info["model_path"]
364
+ model_type = info["model_type"]
365
+
366
+ hps = utils.get_hparams_from_file(config_path)
367
+ if model_type == "vits":
368
+ emotion_type = None
369
+ elif model_type == "vits-emotion":
370
+ emotion_type = "embedding"
371
+ elif model_type == "vits-emotion-logits":
372
+ emotion_type = "logits"
373
+
374
+ model = SynthesizerTrn(
375
+ len(symbols),
376
+ hps.data.filter_length // 2 + 1,
377
+ hps.train.segment_size // hps.data.hop_length,
378
+ emotion_type=emotion_type,
379
+ **hps.model)
380
+
381
+ utils.load_checkpoint(model_path, model, None)
382
+ model.eval().to(device)
383
+ if model_type == "vits":
384
+ # 普通TTS
385
+ models_tts.append((model_name, author, lang, example, create_tts_fn(model, hps)))
386
+ app = gr.Blocks()
387
+ with app:
388
+ gr.Markdown("## Japanese TTS Demo")
389
+ with gr.Tabs():
390
+ with gr.TabItem("自研"):
391
+ with gr.Tabs():
392
+ for i, (model_name, author, lang, example, tts_fn) in enumerate(models_tts):
393
+ with gr.TabItem(model_name):
394
+ with gr.Column():
395
+ tts_input1 = gr.TextArea(label="Text", value=example)
396
+ tts_input2 = gr.Slider(label="Speed", value=1.0, minimum=0.4, maximum=3, step=0.1)
397
+ tts_input3 = gr.Slider(label="noise_scale", value=0.0, minimum=0.0, maximum=2, step=0.1)
398
+ tts_input4 = gr.Slider(label="noise_scale_w", value=0.0,
399
+ minimum=0.0, maximum=2, step=0.1)
400
+ tts_input5 = gr.Slider(label="volume", value=1.0, minimum=0.1, maximum=4, step=0.1)
401
+ tts_submit = gr.Button("Generate", variant="primary")
402
+ tts_output1 = gr.Textbox(label="Output Message")
403
+ tts_output2 = gr.Audio(label="Output Audio")
404
+ tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, tts_input4, tts_input5],
405
+ [tts_output1, tts_output2])
406
+
407
+ with gr.TabItem("谷歌"):
408
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
409
+ tts_input2 = gr.Dropdown(google_model_list, label="name")
410
+ tts_submit = gr.Button("Generate", variant="primary")
411
+ tts_output1 = gr.Textbox(label="Output Message")
412
+ tts_output2 = gr.Audio(label="Output Audio")
413
+ tts_submit.click(google, [tts_input1, tts_input2],
414
+ [tts_output1, tts_output2])
415
+
416
+ with gr.TabItem("微软"):
417
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
418
+ tts_input2 = gr.Dropdown(microsoft_model_list, label="name")
419
+ tts_submit = gr.Button("Generate", variant="primary")
420
+ tts_output1 = gr.Textbox(label="Output Message")
421
+ tts_output2 = gr.Audio(label="Output Audio")
422
+ tts_submit.click(microsoft, [tts_input1, tts_input2],
423
+ [tts_output1, tts_output2])
424
+
425
+ with gr.TabItem("coefont"):
426
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
427
+ tts_input2 = gr.Dropdown(coefont_model_list, label="name")
428
+ tts_submit = gr.Button("Generate", variant="primary")
429
+ tts_output1 = gr.Textbox(label="Output Message")
430
+ tts_output2 = gr.Audio(label="Output Audio")
431
+ tts_submit.click(coefont, [tts_input1, tts_input2],
432
+ [tts_output1, tts_output2])
433
+
434
+ app.queue(concurrency_count=5).launch(show_api=False,
435
+ share=args.share,
436
+ server_name='0.0.0.0',
437
+ server_port=args.port,
438
+ show_error=True)
439
+
gradio_demo.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ def greet(name):
3
+ return "Hello " + name + "!"
4
+ with gr.Blocks() as demo:
5
+ # 设置输入组件
6
+ name = gr.Textbox(label="Name")
7
+ # 设置输出组件
8
+ output = gr.Textbox(label="Output Box")
9
+ # 设置按钮
10
+ greet_btn = gr.Button("Greet")
11
+ # 设置按钮点击事件,默认是垂直
12
+ greet_btn.click(fn=greet, inputs=name, outputs=output)
13
+ demo.launch(share=True)
guge_api.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ import json
4
+ import os
5
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="./api.json"
6
+
7
+ url = "https://texttospeech.googleapis.com/v1/text:synthesize"
8
+ headers = {'Content-Type': 'application/json; charset=utf-8','Authorization': 'Bearer ya29.c.b0Aaekm1Iv0qkoIurI3LgzvNRqWE9tZRNusy9HCLT_xcpTXvdNsvYQt_9PUIXDQAvHV-b9abi-_n9Totai8fgJ7qkJ5sJszhU8bKTkP9zPJccfTkU4pCvyJxCVO1APIj7p3Hdvl9LWAIGb6aqFdz0a2dSn2mzsGKSsv0UqjlPr4M4s08HWkW8fqdE8UO98xphUTm0bEYmYFn-feWAPG5QToNi62c_4KZxveCCPhZZOtWLSPCGOj-D9z1YIHZZcdWz8mWQ5lO2WpASEroak4ohRchjSEpnJka8F3uwoY36Oms4M32d9uKYbsdNMFfS1CudzvDYXKSBkH337KZqb6o-zhFUm13Ivxlr1Zbtvv6dw0kbg_my1-p69v9tbXIsgxvgYfjMfhxgxfOX3aJxsgnU-jw0d1w5jgs70InjbZRagcrRth24Sm4wzyma40j5UodiIvI0sRxQOBUzaZ93YXZje1S_vtoxcSUl9VSrQY9rWt2uk2Ili-aIsI6rUa06McJcnSB-jQhq61z3xpoqRXVd04SMZlc1YnWxhZVYakZoRIivMwFkjRvb0SIn7x_1M880QF4IYzMOb7eVkJW2t41u5y2rWSc7rerjetWd2vRehn63vJRp0jn3lskyM5YZqqiWs4VYjqdVt7f9jy3qR2Vgn5V2hvSdrjdbkSgc1UB3fl4k-4sbjrd-M3OpO99R0I921-nMc8lBgIFum9qZ-VcWJJ-ecYZtygSXiiR6ljX6p70h6m90vVbc8bwnk7ez99mUOzhkuR8B8wb1aJd8XyzdYuqwWfMyq7U89dUju-m6dji-XeMuSSwSiueaVhsU68FXiJa_-ieZra1Saak7zjq_feg8hjZRujbh0e_YqRO4Bhf7-5F-UrO1n7XwJIzofR_uxJfgxs6MuFxfUJX87azO1I31Zty5ZSc2Q4mynq0mlQWOvhl7fr8pSJMzfQtOJW6wOc0RrW3ouIB-mWRrie46gbqF0FzSylkipZ3JasoIeO-gS_olF-YfYR5i'
9
+ }
10
+ text = "二月の下旬に差し掛かる頃だった。"
11
+
12
+ data = {
13
+ "input":{
14
+ "text":text
15
+ },
16
+ "voice":{
17
+ "languageCode":"ja-JP",
18
+ "name":"ja-JP-Neural2-C",
19
+ "ssmlGender":"MALE"
20
+ },
21
+ "audioConfig":{
22
+ "audioEncoding":"MP3"
23
+ }
24
+ }
25
+ response = requests.post(url, headers=headers, json=data)
26
+ print(response)
27
+ # response = response.json()
28
+ audio = response.content
29
+ #audio = base64.b64decode(audio)
30
+ # The response's audio_content is binary.
31
+ with open("test9.mp3", "wb") as out:
32
+ out.write(audio)
install.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ URL=https://dl.google.com/dl/cloudsdk/channels/rapid/install_google_cloud_sdk.bash
4
+
5
+ function download {
6
+ scratch="$(mktemp -d -t tmp.XXXXXXXXXX)" || exit
7
+ script_file="$scratch/install_google_cloud_sdk.bash"
8
+
9
+ echo "Downloading Google Cloud SDK install script: $URL"
10
+ curl -# "$URL" > "$script_file" || exit
11
+ chmod 775 "$script_file"
12
+
13
+ echo "Running install script from: $script_file"
14
+ "$script_file" "$@"
15
+ }
16
+
17
+ download "$@"
microsoftTTS.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import azure.cognitiveservices.speech as speechsdk
3
+
4
+ speech_key = "1f1ef0ce53b84261be94fab81df7e628"
5
+ service_region = "japaneast"
6
+ speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
7
+
8
+ # This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
9
+ #speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
10
+ audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
11
+
12
+ # The neural multilingual voice can speak different languages based on the input text.
13
+ speech_config.speech_synthesis_voice_name='en-US-AvaMultilingualNeural'
14
+
15
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
16
+
17
+ # Get text from the console and synthesize to the default speaker.
18
+ print("Enter some text that you want to speak >")
19
+ text = input()
20
+
21
+ speech_synthesis_result = speech_synthesizer.speak_text_async(text).get()
22
+
23
+ if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
24
+ print("Speech synthesized for text [{}]".format(text))
25
+ elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
26
+ cancellation_details = speech_synthesis_result.cancellation_details
27
+ print("Speech synthesis canceled: {}".format(cancellation_details.reason))
28
+ if cancellation_details.reason == speechsdk.CancellationReason.Error:
29
+ if cancellation_details.error_details:
30
+ print("Error details: {}".format(cancellation_details.error_details))
31
+ print("Did you set the speech resource key and region values?")
output.mp3 ADDED
Binary file (36.4 kB). View file
 
test.mp3 ADDED
Binary file (35.7 kB). View file
 
test.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ # 外部库
5
+ import re
6
+ import requests
7
+ import argparse
8
+ import json
9
+ import os
10
+ import re
11
+ import tempfile
12
+ import librosa
13
+ import numpy as np
14
+ # import torch
15
+ # from torch import no_grad, LongTensor
16
+ # import commons
17
+ import gradio as gr
18
+ import gradio.utils as gr_utils
19
+ import gradio.processing_utils as gr_processing_utils
20
+
21
+ # 内部库
22
+ # from models import SynthesizerTrn
23
+ # from text import text_to_sequence, text_to_sequence_for_test, _clean_text
24
+ # from mel_processing import spectrogram_torch
25
+ # import utils
26
+ # from text.symbols import symbols
27
+ all_example = "my voice is my passport verify me."
28
+
29
+ eleven_voice_id = [
30
+ "21m00Tcm4TlvDq8ikWAM",
31
+ "29vD33N1CtxCmqQRPOHJ",
32
+ "2EiwWnXFnvU5JabPnv8n",
33
+ "5Q0t7uMcjvnagumLfvZi",
34
+ "AZnzlk1XvdvUeBnXmlld",
35
+ "CYw3kZ02Hs0563khs1Fj",
36
+ "D38z5RcWu1voky8WS1ja",
37
+ "EXAVITQu4vr4xnSDxMaL",
38
+ "ErXwobaYiN019PkySvjV",
39
+ "GBv7mTt0atIp3Br8iCZE",
40
+ "IKne3meq5aSn9XLyUdCD",
41
+ "JBFqnCBsd6RMkjVDRZzb",
42
+ "LcfcDJNUP1GQjkzn1xUU",
43
+ "MF3mGyEYCl7XYWbV9V6O",
44
+ "N2lVS1w4EtoT3dr4eOWO",
45
+ "ODq5zmih8GrVes37Dizd",
46
+ "SOYHLrjzK2X1ezoPC6cr",
47
+ "TX3LPaxmHKxFdv7VOQHJ",
48
+ "ThT5KcBeYPX3keUQqHPh",
49
+ "TxGEqnHWrfWFTfGW9XjX",
50
+ "VR6AewLTigWG4xSOukaG",
51
+ "XB0fDUnXU5powFXDhCwa",
52
+ "Xb7hH8MSUJpSbSDYk0k2",
53
+ "XrExE9yKIg1WjnnlVkGX",
54
+ "ZQe5CZNOzWyzPSCn5a3c",
55
+ "Zlb1dXrM653N07WRdFW3",
56
+ "bVMeCyTHy58xNoL34h3p",
57
+ "flq6f7yk4E4fJM5XTYuZ",
58
+ "g5CIjZEefAph4nQFvHAz",
59
+ "iP95p4xoKVk53GoZ742B",
60
+ "jBpfuIE2acCO8z3wKNLl",
61
+ "jsCqWAovK2LkecY7zXl4",
62
+ "nPczCjzI2devNBz1zQrb",
63
+ "oWAxZDx7w5VEj9dCyTzz",
64
+ "onwK4e9ZLuTAKqWW03F9",
65
+ "pFZP5JQG7iQjIQuC4Bku",
66
+ "pMsXgVXv3BLzUgSXRplE",
67
+ "pNInz6obpgDQGcFmaJgB",
68
+ "piTKgcLEGmPE4e6mEKli",
69
+ "pqHfZKP75CvOlQylNhV4",
70
+ "t0jbNlBVZ17f02VDIeMI",
71
+ "yoZ06aMxZJJ28mfd3POQ",
72
+ "z9fAnlkpzviPz146aGWa",
73
+ "zcAOhNBS3c14rBihAFp1",
74
+ "zrHiDhphv9ZnVXBqCLjz",
75
+ ]
76
+
77
+ eleven_name = [
78
+ "Rachel",
79
+ "Drew",
80
+ "Clyde",
81
+ "Paul",
82
+ "Domi",
83
+ "Dave",
84
+ "Fin",
85
+ "Sarah",
86
+ "Antoni",
87
+ "Thomas",
88
+ "Charlie",
89
+ "George",
90
+ "Emily",
91
+ "Elli",
92
+ "Callum",
93
+ "Patrick",
94
+ "Harry",
95
+ "Liam",
96
+ "Dorothy",
97
+ "Josh",
98
+ "Arnold",
99
+ "Charlotte",
100
+ "Alice",
101
+ "Matilda",
102
+ "James",
103
+ "Joseph",
104
+ "Jeremy",
105
+ "Michael",
106
+ "Ethan",
107
+ "Chris",
108
+ "Gigi",
109
+ "Freya",
110
+ "Brian",
111
+ "Grace",
112
+ "Daniel",
113
+ "Lily",
114
+ "Serena",
115
+ "Adam",
116
+ "Nicole",
117
+ "Bill",
118
+ "Jessie",
119
+ "Sam",
120
+ "Glinda",
121
+ "Giovanni",
122
+ "Mimi",
123
+ ]
124
+ eleven_id_model_name_dict = dict(zip(eleven_name, eleven_voice_id))
125
+
126
+ def openai(text, name):
127
+
128
+ headers = {
129
+ 'Authorization': 'Bearer ' + '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',
130
+ 'Content-Type': 'application/json',
131
+ }
132
+
133
+ json_data = {
134
+ 'model': 'tts-1-hd',
135
+ 'input': f'{text}',
136
+ 'voice': f'{name}',
137
+ }
138
+
139
+ response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, json=json_data)
140
+
141
+ # Note: json_data will not be serialized by requests
142
+ # exactly as it was in the original request.
143
+ #data = '{\n "model": "tts-1",\n "input": "The quick brown fox jumped over the lazy dog.",\n "voice": "alloy"\n }'
144
+ #response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, data=data)
145
+
146
+ return "Success", response
147
+
148
+ def elevenlabs(text,name):
149
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{name}"
150
+ CHUNK_SIZE = 1024
151
+ #url = "https://api.elevenlabs.io/v1/text-to-speech/<voice-id>"
152
+
153
+ headers = {
154
+ "Accept": "audio/mpeg",
155
+ "Content-Type": "application/json",
156
+ "xi-api-key": "a3391f0e3ff8472b61978dbb70ccc6fe"
157
+ }
158
+
159
+ data = {
160
+ "text": f"{text}",
161
+ "model_id": "eleven_monolingual_v1",
162
+ "voice_settings": {
163
+ "stability": 0.5,
164
+ "similarity_boost": 0.5
165
+ }
166
+ }
167
+
168
+ response = requests.post(url, json=data, headers=headers)
169
+ # with open('output.mp3', 'wb') as f:
170
+ # for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
171
+ # if chunk:
172
+ # f.write(chunk)
173
+ return "Success", response
174
+
175
+ microsoft_model_list = [
176
+ "en-US-AvaMultilingualNeural"
177
+ ]
178
+
179
+ def microsoft(text, name, style="Neural"):
180
+ """
181
+ :param text:
182
+ :param name:
183
+ :param style:
184
+ :return:
185
+ """
186
+ headers = {
187
+ 'Ocp-Apim-Subscription-Key': '1f1ef0ce53b84261be94fab81df7e628',
188
+ 'Content-Type': 'application/ssml+xml',
189
+ 'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
190
+ 'User-Agent': 'curl',
191
+ }
192
+
193
+ data = ("<speak version='1.0' xml:lang='en-US'>"
194
+ f"<voice xml:lang='en-US' name='{name}'>" # xml:gender='Female'
195
+ f"{text}"
196
+ "</voice>"
197
+ "</speak>")
198
+
199
+ response = requests.post(
200
+ 'https://japaneast.tts.speech.microsoft.com/cognitiveservices/v1',
201
+ headers=headers,
202
+ data=data,
203
+ )
204
+ # data = {
205
+ # "text":text,
206
+ # "name":name,
207
+ # "style":style,
208
+ # "format":"mp3"}
209
+ # audio_url = requests.get(microsoft_url, headers=microsoft_headers, json=data).json()['data']['url']
210
+ return "Success", response
211
+
212
+ # def google(text,name):
213
+ # # import subprocess
214
+ # # command1 = subprocess.run('gcloud auth print-access-token', shell=True, capture_output=True, text=True).stdout
215
+
216
+ # headers = {
217
+ # 'Authorization': 'Bearer ' + "synclub-2383kjhjksxfv.2341gs",
218
+ # 'x-goog-user-project': 'PROJECT_ID',
219
+ # 'Content-Type': 'application/json; charset=utf-8',
220
+ # }
221
+
222
+ # data = {
223
+ # "input": {
224
+ # "text": f"{text}"},
225
+ # "voice": {
226
+ # "languageCode": "en-gb",
227
+ # "name": "en-GB-Standard-A",
228
+ # "ssmlGender": "FEMALE"
229
+ # },
230
+ # "audioConfig": {
231
+ # "audioEncoding": "MP3"
232
+ # }
233
+ # }
234
+
235
+ # response = requests.post('https://texttospeech.googleapis.com/v1/text:synthesize', headers=headers, data=data)
236
+ # return "Success", response
237
+ if __name__ == '__main__':
238
+ parser = argparse.ArgumentParser()
239
+ parser.add_argument('--device', type=str, default='cuda')
240
+ parser.add_argument("--share", action="store_true", default=True, help="share gradio app")
241
+ parser.add_argument("--port", type=int, default=8081, help="port")
242
+ parser.add_argument('--model_info_path', type=str, default='/gluster/speech_data/info.json')
243
+ args = parser.parse_args()
244
+ # app = gr.Blocks()
245
+ # with app:
246
+ # gr.Markdown("## Japanese TTS Demo")
247
+ # with gr.Tabs():
248
+ # with gr.TabItem("微软"):
249
+ # tts_input1 = gr.TextArea(label="Text", value=all_example)
250
+ # tts_input2 = gr.Dropdown(microsoft_model_list, label="name")
251
+ # tts_submit = gr.Button("Generate", variant="primary")
252
+ # tts_output1 = gr.Textbox(label="Output Message")
253
+ # tts_output2 = gr.Audio(label="Output Audio")
254
+ # tts_submit.click(microsoft, [tts_input1, tts_input2],
255
+ # [tts_output1, tts_output2])
256
+ # _, audio = microsoft(all_example, 'en-US-AvaMultilingualNeural')
257
+ # _, audio = google(all_example,'alloy')
258
+ # print(audio)
259
+ # with open("test4.mp3", "wb") as f:
260
+ # f.write(audio.content)
261
+ #_, audio = elevenlabs(all_example, "21m00Tcm4TlvDq8ikWAM")
262
+ # print(audio)
263
+ # with open('output.mp3', 'wb') as f:
264
+ # for chunk in audio.iter_content(chunk_size=1024):
265
+ # if chunk:
266
+ # f.write(chunk)
267
+
268
+ # device = torch.device(args.device)
269
+ # models_tts = []
270
+
271
+ # with open(args.model_info_path, "r", encoding="utf-8") as f:
272
+ # models_info = json.load(f)
273
+ # for i, info in models_info.items():
274
+ # model_name = info["model_name"]
275
+ # author = info["author"]
276
+ # lang = info["lang"]
277
+ # example = info["example"]
278
+ # config_path = info["config_path"]
279
+ # model_path = info["model_path"]
280
+ # model_type = info["model_type"]
281
+
282
+ # hps = utils.get_hparams_from_file(config_path)
283
+ # if model_type == "vits":
284
+ # emotion_type = None
285
+ # elif model_type == "vits-emotion":
286
+ # emotion_type = "embedding"
287
+ # elif model_type == "vits-emotion-logits":
288
+ # emotion_type = "logits"
289
+
290
+ # model = SynthesizerTrn(
291
+ # len(symbols),
292
+ # hps.data.filter_length // 2 + 1,
293
+ # hps.train.segment_size // hps.data.hop_length,
294
+ # emotion_type=emotion_type,
295
+ # **hps.model)
296
+
297
+ # utils.load_checkpoint(model_path, model, None)
298
+ # model.eval().to(device)
299
+ # if model_type == "vits":
300
+ # # 普通TTS
301
+ # models_tts.append((model_name, author, lang, example, create_tts_fn(model, hps)))
302
+
303
+
304
+ app = gr.Blocks()
305
+ with app:
306
+ gr.Markdown("## Japanese TTS Demo")
307
+ with gr.Tabs():
308
+ # with gr.TabItem("自研"):
309
+ # with gr.Tabs():
310
+ # for i, (model_name, author, lang, example, tts_fn) in enumerate(models_tts):
311
+ # with gr.TabItem(model_name):
312
+ # with gr.Column():
313
+ # tts_input1 = gr.TextArea(label="Text", value=example)
314
+ # tts_input2 = gr.Slider(label="Speed", value=1.0, minimum=0.4, maximum=3, step=0.1)
315
+ # tts_input3 = gr.Slider(label="noise_scale", value=0.0, minimum=0.0, maximum=2, step=0.1)
316
+ # tts_input4 = gr.Slider(label="noise_scale_w", value=0.0,
317
+ # minimum=0.0, maximum=2, step=0.1)
318
+ # tts_input5 = gr.Slider(label="volume", value=1.0, minimum=0.1, maximum=4, step=0.1)
319
+ # tts_submit = gr.Button("Generate", variant="primary")
320
+ # tts_output1 = gr.Textbox(label="Output Message")
321
+ # tts_output2 = gr.Audio(label="Output Audio")
322
+ # tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, tts_input4, tts_input5],
323
+ # [tts_output1, tts_output2])
324
+
325
+ # with gr.TabItem("谷歌"):
326
+ # tts_input1 = gr.TextArea(label="Text", value=all_example)
327
+ # tts_input2 = gr.Dropdown(google_model_list, label="name")
328
+ # tts_submit = gr.Button("Generate", variant="primary")
329
+ # tts_output1 = gr.Textbox(label="Output Message")
330
+ # tts_output2 = gr.Audio(label="Output Audio")
331
+ # tts_submit.click(google, [tts_input1, tts_input2],
332
+ # [tts_output1, tts_output2])
333
+
334
+ with gr.TabItem("微软"):
335
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
336
+ tts_input2 = gr.Dropdown(microsoft_model_list, label="name")
337
+ tts_submit = gr.Button("Generate", variant="primary")
338
+ tts_output1 = gr.Textbox(label="Output Message")
339
+ tts_output2 = gr.Audio(label="Output Audio")
340
+ tts_submit.click(microsoft, [tts_input1, tts_input2],
341
+ [tts_output1, tts_output2])
342
+
343
+ # with gr.TabItem("coefont"):
344
+ # tts_input1 = gr.TextArea(label="Text", value=all_example)
345
+ # tts_input2 = gr.Dropdown(coefont_model_list, label="name")
346
+ # tts_submit = gr.Button("Generate", variant="primary")
347
+ # tts_output1 = gr.Textbox(label="Output Message")
348
+ # tts_output2 = gr.Audio(label="Output Audio")
349
+ # tts_submit.click(coefont, [tts_input1, tts_input2],
350
+ # [tts_output1, tts_output2])
351
+
352
+ app.launch(show_api=False,
353
+ share=args.share,
354
+ server_name='0.0.0.0',
355
+ server_port=args.port,
356
+ show_error=True)
357
+
test2.mp3 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"detail":{"status":"invalid_uid","message":"An invalid ID has been received: '<string>'. Make sure to provide a correct one."}}
test3.mp3 ADDED
Binary file (43.2 kB). View file
 
test4.mp3 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "error": {
3
+ "code": 401,
4
+ "message": "Request had invalid authentication credentials. Expected OAuth 2 access token, login cookie or other valid authentication credential. See https://developers.google.com/identity/sign-in/web/devconsole-project.",
5
+ "status": "UNAUTHENTICATED",
6
+ "details": [
7
+ {
8
+ "@type": "type.googleapis.com/google.rpc.ErrorInfo",
9
+ "reason": "ACCESS_TOKEN_TYPE_UNSUPPORTED",
10
+ "metadata": {
11
+ "method": "google.cloud.texttospeech.v1.TextToSpeech.SynthesizeSpeech",
12
+ "service": "texttospeech.googleapis.com"
13
+ }
14
+ }
15
+ ]
16
+ }
17
+ }
test9.mp3 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "error": {
3
+ "code": 401,
4
+ "message": "Request had invalid authentication credentials. Expected OAuth 2 access token, login cookie or other valid authentication credential. See https://developers.google.com/identity/sign-in/web/devconsole-project.",
5
+ "status": "UNAUTHENTICATED",
6
+ "details": [
7
+ {
8
+ "@type": "type.googleapis.com/google.rpc.ErrorInfo",
9
+ "reason": "ACCESS_TOKEN_TYPE_UNSUPPORTED",
10
+ "metadata": {
11
+ "service": "texttospeech.googleapis.com",
12
+ "method": "google.cloud.texttospeech.v1.TextToSpeech.SynthesizeSpeech"
13
+ }
14
+ }
15
+ ]
16
+ }
17
+ }
test99.mp3 ADDED
Binary file (43.2 kB). View file
 
try.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import base64
4
+ import json
5
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="./api.json"
6
+
7
+ url = "https://texttospeech.googleapis.com/v1/text:synthesize"
8
+ headers = {'Content-Type': 'application/json; charset=utf-8',
9
+ 'X-Goog-Api-Key': 'synclub-2383kjhjksxfv.2341gs' # 待补充
10
+ }
11
+ text = "二月の下旬に差し掛かる頃だった。"
12
+
13
+ data = {
14
+ "input":{
15
+ "text":text
16
+ },
17
+ "voice":{
18
+ "languageCode":"ja-JP",
19
+ "name":"ja-JP-Neural2-C",
20
+ "ssmlGender":"MALE"
21
+ },
22
+ "audioConfig":{
23
+ "audioEncoding":"MP3"
24
+ }
25
+ }
26
+ response = requests.post(url, headers=headers, json=data)
27
+ response = response.json()
28
+ print(response)
29
+ audio = response['audioContent']
30
+ audio = base64.b64decode(audio)
31
+ # The response's audio_content is binary.
32
+ with open("test.mp3", "wb") as out:
33
+ out.write(audio)
try2.py ADDED
File without changes
tts_gradio.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ # 外部库
5
+ import re
6
+ import requests
7
+ import argparse
8
+ import json
9
+ import os
10
+ import re
11
+ import tempfile
12
+ import librosa
13
+ import numpy as np
14
+ # import torch
15
+ # from torch import no_grad, LongTensor
16
+ # import commons
17
+ import gradio as gr
18
+ import gradio.utils as gr_utils
19
+ import gradio.processing_utils as gr_processing_utils
20
+
21
+ all_example = "my voice is my passport verify me."
22
+
23
+ microsoft_model_list = [
24
+ "en-US-AvaMultilingualNeural"
25
+ ]
26
+
27
+ openai_model_list = [
28
+ "alloy",
29
+ "echo",
30
+ "fable",
31
+ "onyx",
32
+ "nova",
33
+ "shimmer"
34
+ ]
35
+
36
+ eleven_voice_id = [
37
+ "21m00Tcm4TlvDq8ikWAM",
38
+ "29vD33N1CtxCmqQRPOHJ",
39
+ "2EiwWnXFnvU5JabPnv8n",
40
+ "5Q0t7uMcjvnagumLfvZi",
41
+ "AZnzlk1XvdvUeBnXmlld",
42
+ "CYw3kZ02Hs0563khs1Fj",
43
+ "D38z5RcWu1voky8WS1ja",
44
+ "EXAVITQu4vr4xnSDxMaL",
45
+ "ErXwobaYiN019PkySvjV",
46
+ "GBv7mTt0atIp3Br8iCZE",
47
+ "IKne3meq5aSn9XLyUdCD",
48
+ "JBFqnCBsd6RMkjVDRZzb",
49
+ "LcfcDJNUP1GQjkzn1xUU",
50
+ "MF3mGyEYCl7XYWbV9V6O",
51
+ "N2lVS1w4EtoT3dr4eOWO",
52
+ "ODq5zmih8GrVes37Dizd",
53
+ "SOYHLrjzK2X1ezoPC6cr",
54
+ "TX3LPaxmHKxFdv7VOQHJ",
55
+ "ThT5KcBeYPX3keUQqHPh",
56
+ "TxGEqnHWrfWFTfGW9XjX",
57
+ "VR6AewLTigWG4xSOukaG",
58
+ "XB0fDUnXU5powFXDhCwa",
59
+ "Xb7hH8MSUJpSbSDYk0k2",
60
+ "XrExE9yKIg1WjnnlVkGX",
61
+ "ZQe5CZNOzWyzPSCn5a3c",
62
+ "Zlb1dXrM653N07WRdFW3",
63
+ "bVMeCyTHy58xNoL34h3p",
64
+ "flq6f7yk4E4fJM5XTYuZ",
65
+ "g5CIjZEefAph4nQFvHAz",
66
+ "iP95p4xoKVk53GoZ742B",
67
+ "jBpfuIE2acCO8z3wKNLl",
68
+ "jsCqWAovK2LkecY7zXl4",
69
+ "nPczCjzI2devNBz1zQrb",
70
+ "oWAxZDx7w5VEj9dCyTzz",
71
+ "onwK4e9ZLuTAKqWW03F9",
72
+ "pFZP5JQG7iQjIQuC4Bku",
73
+ "pMsXgVXv3BLzUgSXRplE",
74
+ "pNInz6obpgDQGcFmaJgB",
75
+ "piTKgcLEGmPE4e6mEKli",
76
+ "pqHfZKP75CvOlQylNhV4",
77
+ "t0jbNlBVZ17f02VDIeMI",
78
+ "yoZ06aMxZJJ28mfd3POQ",
79
+ "z9fAnlkpzviPz146aGWa",
80
+ "zcAOhNBS3c14rBihAFp1",
81
+ "zrHiDhphv9ZnVXBqCLjz",
82
+ ]
83
+
84
+ eleven_name = [
85
+ "Rachel",
86
+ "Drew",
87
+ "Clyde",
88
+ "Paul",
89
+ "Domi",
90
+ "Dave",
91
+ "Fin",
92
+ "Sarah",
93
+ "Antoni",
94
+ "Thomas",
95
+ "Charlie",
96
+ "George",
97
+ "Emily",
98
+ "Elli",
99
+ "Callum",
100
+ "Patrick",
101
+ "Harry",
102
+ "Liam",
103
+ "Dorothy",
104
+ "Josh",
105
+ "Arnold",
106
+ "Charlotte",
107
+ "Alice",
108
+ "Matilda",
109
+ "James",
110
+ "Joseph",
111
+ "Jeremy",
112
+ "Michael",
113
+ "Ethan",
114
+ "Chris",
115
+ "Gigi",
116
+ "Freya",
117
+ "Brian",
118
+ "Grace",
119
+ "Daniel",
120
+ "Lily",
121
+ "Serena",
122
+ "Adam",
123
+ "Nicole",
124
+ "Bill",
125
+ "Jessie",
126
+ "Sam",
127
+ "Glinda",
128
+ "Giovanni",
129
+ "Mimi",
130
+ ]
131
+ eleven_id_model_name_dict = dict(zip(eleven_name, eleven_voice_id))
132
+
133
+ def openai(text, name):
134
+
135
+ headers = {
136
+ 'Authorization': 'Bearer ' + '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',
137
+ 'Content-Type': 'application/json',
138
+ }
139
+
140
+ json_data = {
141
+ 'model': 'tts-1-hd',
142
+ 'input': f'{text}',
143
+ 'voice': f'{name}',
144
+ }
145
+
146
+ response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, json=json_data)
147
+
148
+ # Note: json_data will not be serialized by requests
149
+ # exactly as it was in the original request.
150
+ #data = '{\n "model": "tts-1",\n "input": "The quick brown fox jumped over the lazy dog.",\n "voice": "alloy"\n }'
151
+ #response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, data=data)
152
+
153
+ return "Success", response
154
+
155
+ def elevenlabs(text,name):
156
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{eleven_id_model_name_dict[name]}"
157
+ CHUNK_SIZE = 1024
158
+ #url = "https://api.elevenlabs.io/v1/text-to-speech/<voice-id>"
159
+
160
+ headers = {
161
+ "Accept": "audio/mpeg",
162
+ "Content-Type": "application/json",
163
+ "xi-api-key": "a3391f0e3ff8472b61978dbb70ccc6fe"
164
+ }
165
+
166
+ data = {
167
+ "text": f"{text}",
168
+ "model_id": "eleven_monolingual_v1",
169
+ "voice_settings": {
170
+ "stability": 0.5,
171
+ "similarity_boost": 0.5
172
+ }
173
+ }
174
+
175
+ response = requests.post(url, json=data, headers=headers)
176
+ # with open('output.mp3', 'wb') as f:
177
+ # for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
178
+ # if chunk:
179
+ # f.write(chunk)
180
+ return "Success", response
181
+
182
+ def microsoft(text, name, style="Neural"):
183
+ """
184
+ :param text:
185
+ :param name:
186
+ :param style:
187
+ :return:
188
+ """
189
+ headers = {
190
+ 'Ocp-Apim-Subscription-Key': '1f1ef0ce53b84261be94fab81df7e628',
191
+ 'Content-Type': 'application/ssml+xml',
192
+ 'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
193
+ 'User-Agent': 'curl',
194
+ }
195
+
196
+ data = ("<speak version='1.0' xml:lang='en-US'>"
197
+ f"<voice xml:lang='en-US' name='{name}'>" # xml:gender='Female'
198
+ f"{text}"
199
+ "</voice>"
200
+ "</speak>")
201
+
202
+ response = requests.post(
203
+ 'https://japaneast.tts.speech.microsoft.com/cognitiveservices/v1',
204
+ headers=headers,
205
+ data=data,
206
+ )
207
+ return "Success", "sss"
208
+
209
+ if __name__ == '__main__':
210
+ parser = argparse.ArgumentParser()
211
+ parser.add_argument('--device', type=str, default='cuda')
212
+ parser.add_argument("--share", action="store_true", default=True, help="share gradio app")
213
+ parser.add_argument("--port", type=int, default=8081, help="port")
214
+ parser.add_argument('--model_info_path', type=str, default='/gluster/speech_data/info.json')
215
+ args = parser.parse_args()
216
+
217
+ app = gr.Blocks()
218
+ with app:
219
+ gr.Markdown("## Japanese TTS Demo")
220
+ with gr.Tabs():
221
+
222
+ with gr.TabItem("11Labs"):
223
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
224
+ tts_input2 = gr.Dropdown(eleven_name, label="name")
225
+ tts_submit = gr.Button("Generate", variant="primary")
226
+ tts_output1 = gr.Textbox(label="Output Message")
227
+ tts_output2 = gr.Audio(label="Output Audio")
228
+ tts_submit.click(elevenlabs, [tts_input1, tts_input2],
229
+ [tts_output1, tts_output2])
230
+
231
+ with gr.TabItem("微软"):
232
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
233
+ tts_input2 = gr.Dropdown(microsoft_model_list, label="name")
234
+ tts_submit = gr.Button("Generate", variant="primary")
235
+ tts_output1 = gr.Textbox(label="Output Message")
236
+ tts_output2 = gr.Audio(label="Output Audio")
237
+ tts_submit.click(microsoft, [tts_input1, tts_input2],
238
+ [tts_output1, tts_output2])
239
+
240
+ with gr.TabItem("openai"):
241
+ tts_input1 = gr.TextArea(label="Text", value=all_example)
242
+ tts_input2 = gr.Dropdown(openai_model_list, label="name")
243
+ tts_submit = gr.Button("Generate", variant="primary")
244
+ tts_output1 = gr.Textbox(label="Output Message")
245
+ tts_output2 = gr.Audio(label="Output Audio")
246
+ tts_submit.click(openai, [tts_input1, tts_input2],
247
+ [tts_output1, tts_output2])
248
+
249
+ app.queue(max_size=10)
250
+ app.launch(share=True)
251
+ # _, audio = openai(all_example,'alloy')
252
+ # print(audio)
253
+ # with open("test99.mp3", "wb") as f:
254
+ # f.write(audio.content)
255
+