Spaces:
Running
Running
give newcomer a cached sample pair; changed default voice clone for TTS
Browse files
README.md
CHANGED
|
@@ -9,7 +9,7 @@ emoji: π€π
|
|
| 9 |
colorFrom: red
|
| 10 |
colorTo: red
|
| 11 |
pinned: false
|
| 12 |
-
short_description:
|
| 13 |
models:
|
| 14 |
- coqui/XTTS-v2
|
| 15 |
- fishaudio/fish-speech-1.4
|
|
|
|
| 9 |
colorFrom: red
|
| 10 |
colorTo: red
|
| 11 |
pinned: false
|
| 12 |
+
short_description: Vote on the top HF TTS models!
|
| 13 |
models:
|
| 14 |
- coqui/XTTS-v2
|
| 15 |
- fishaudio/fish-speech-1.4
|
app.py
CHANGED
|
@@ -44,6 +44,9 @@ with open('harvard_sentences.txt') as f:
|
|
| 44 |
sents += f.read().strip().splitlines()
|
| 45 |
with open('llama3_command-r_sentences.txt') as f:
|
| 46 |
sents += f.read().strip().splitlines()
|
|
|
|
|
|
|
|
|
|
| 47 |
####################################
|
| 48 |
# Constants
|
| 49 |
####################################
|
|
@@ -213,8 +216,8 @@ DEFAULT_VOICE_TRANSCRIPT = "In the first half of the 20th century, science ficti
|
|
| 213 |
OVERRIDE_INPUTS = {
|
| 214 |
'coqui/xtts': {
|
| 215 |
1: 'en',
|
| 216 |
-
2:
|
| 217 |
-
3:
|
| 218 |
4: False, #use_mic
|
| 219 |
5: False, #cleanup_reference
|
| 220 |
6: False, #auto_detect
|
|
@@ -248,7 +251,7 @@ OVERRIDE_INPUTS = {
|
|
| 248 |
1: 'LikeManyWaters', # voice
|
| 249 |
},
|
| 250 |
'LeeSangHoon/HierSpeech_TTS': {
|
| 251 |
-
1:
|
| 252 |
2: 0.333,
|
| 253 |
3: 0.333,
|
| 254 |
4: 1,
|
|
@@ -267,6 +270,13 @@ OVERRIDE_INPUTS = {
|
|
| 267 |
2: 1, # speed
|
| 268 |
3: 'EN', # language
|
| 269 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
'parler-tts/parler_tts': {
|
| 271 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
| 272 |
},
|
|
@@ -438,13 +448,13 @@ INSTR = """
|
|
| 438 |
## π³οΈ Vote
|
| 439 |
|
| 440 |
* Press β‘ to get cached sample pairs you've yet to vote on. (Fast π)
|
| 441 |
-
* Or press π² to randomly use
|
| 442 |
* Or input text (πΊπΈ English only) to synthesize audio. (Slowest π due to _Toxicity_ test)
|
| 443 |
* Listen to the two audio clips, one after the other.
|
| 444 |
-
*
|
| 445 |
-
*
|
| 446 |
|
| 447 |
-
Note: It may take up to 30 seconds to synthesize audio.
|
| 448 |
""".strip()
|
| 449 |
request = ''
|
| 450 |
if SPACE_ID:
|
|
@@ -1391,12 +1401,17 @@ with gr.Blocks() as vote:
|
|
| 1391 |
# bothbad.click(both_bad, outputs=outputs, inputs=[model1, model2, useridstate])
|
| 1392 |
# bothgood.click(both_good, outputs=outputs, inputs=[model1, model2, useridstate])
|
| 1393 |
|
| 1394 |
-
|
| 1395 |
-
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
|
| 1399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1400 |
|
| 1401 |
with gr.Blocks() as about:
|
| 1402 |
gr.Markdown(ABOUT)
|
|
@@ -1407,6 +1422,7 @@ with gr.Blocks() as about:
|
|
| 1407 |
# dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
|
| 1408 |
# ddb = gr.Button("Delete DB")
|
| 1409 |
# ddb.click(del_db, inputs=dbtext, outputs=ddb)
|
|
|
|
| 1410 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none} .blurred-text {filter: blur(0.15em);}", js="cookie.js", title="TTS Arena") as demo:
|
| 1411 |
gr.Markdown(DESCR)
|
| 1412 |
# gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
|
|
|
|
| 44 |
sents += f.read().strip().splitlines()
|
| 45 |
with open('llama3_command-r_sentences.txt') as f:
|
| 46 |
sents += f.read().strip().splitlines()
|
| 47 |
+
|
| 48 |
+
# Credit: llama3_command-r sentences generated made by user KingNish
|
| 49 |
+
|
| 50 |
####################################
|
| 51 |
# Constants
|
| 52 |
####################################
|
|
|
|
| 216 |
OVERRIDE_INPUTS = {
|
| 217 |
'coqui/xtts': {
|
| 218 |
1: 'en',
|
| 219 |
+
2: 'https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav', # voice sample
|
| 220 |
+
3: 'https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav', # mic voice sample
|
| 221 |
4: False, #use_mic
|
| 222 |
5: False, #cleanup_reference
|
| 223 |
6: False, #auto_detect
|
|
|
|
| 251 |
1: 'LikeManyWaters', # voice
|
| 252 |
},
|
| 253 |
'LeeSangHoon/HierSpeech_TTS': {
|
| 254 |
+
1: file('https://huggingface.co/spaces/LeeSangHoon/HierSpeech_TTS/resolve/main/example/female.wav'), # voice sample
|
| 255 |
2: 0.333,
|
| 256 |
3: 0.333,
|
| 257 |
4: 1,
|
|
|
|
| 270 |
2: 1, # speed
|
| 271 |
3: 'EN', # language
|
| 272 |
},
|
| 273 |
+
'mrfakename/MetaVoice-1B-v0.1': {
|
| 274 |
+
1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
|
| 275 |
+
2: 5, # float (numeric value between 1.0 and 5.0) in 'Speaker similarity - How closely to match speaker identity and speech style.' Slider component
|
| 276 |
+
3: "Preset voices", # Literal['Preset voices', 'Upload target voice'] in 'Choose voice' Radio component
|
| 277 |
+
4: "Bria", # Literal['Bria', 'Alex', 'Jacob'] in 'Preset voices' Dropdown component
|
| 278 |
+
5: None, # filepath in 'Upload a clean sample to clone. Sample should contain 1 speaker, be between 30-90 seconds and not contain background noise.' Audio component
|
| 279 |
+
},
|
| 280 |
'parler-tts/parler_tts': {
|
| 281 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
| 282 |
},
|
|
|
|
| 448 |
## π³οΈ Vote
|
| 449 |
|
| 450 |
* Press β‘ to get cached sample pairs you've yet to vote on. (Fast π)
|
| 451 |
+
* Or press π² to randomly use a sentence from the list. (Slow π’)
|
| 452 |
* Or input text (πΊπΈ English only) to synthesize audio. (Slowest π due to _Toxicity_ test)
|
| 453 |
* Listen to the two audio clips, one after the other.
|
| 454 |
+
* _Vote on which audio sounds more natural to you._
|
| 455 |
+
* Model names are revealed after the vote is cast.
|
| 456 |
|
| 457 |
+
β Note: It **may take up to 30 seconds** to ***synthesize*** audio.
|
| 458 |
""".strip()
|
| 459 |
request = ''
|
| 460 |
if SPACE_ID:
|
|
|
|
| 1401 |
# bothbad.click(both_bad, outputs=outputs, inputs=[model1, model2, useridstate])
|
| 1402 |
# bothgood.click(both_good, outputs=outputs, inputs=[model1, model2, useridstate])
|
| 1403 |
|
| 1404 |
+
# get session cookie
|
| 1405 |
+
vote\
|
| 1406 |
+
.load(
|
| 1407 |
+
None,
|
| 1408 |
+
None,
|
| 1409 |
+
session_hash,
|
| 1410 |
+
js="() => { return getArenaCookie('session') }",
|
| 1411 |
+
)
|
| 1412 |
+
# give a cached sample pair to voter; .then() did not work here
|
| 1413 |
+
vote\
|
| 1414 |
+
.load(give_cached_sample, inputs=[session_hash], outputs=[*outputs, cachedt])
|
| 1415 |
|
| 1416 |
with gr.Blocks() as about:
|
| 1417 |
gr.Markdown(ABOUT)
|
|
|
|
| 1422 |
# dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
|
| 1423 |
# ddb = gr.Button("Delete DB")
|
| 1424 |
# ddb.click(del_db, inputs=dbtext, outputs=ddb)
|
| 1425 |
+
# Blur cached sample text so the voting user picks up mispronouncements
|
| 1426 |
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none} .blurred-text {filter: blur(0.15em);}", js="cookie.js", title="TTS Arena") as demo:
|
| 1427 |
gr.Markdown(DESCR)
|
| 1428 |
# gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
|