MohamedRashad commited on
Commit
e452575
·
1 Parent(s): 9801a70

Enhance speech generation examples with additional emotive tags and improve prompt tips

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -150,23 +150,31 @@ def generate_speech(text, voice, temperature, top_p, repetition_penalty, max_new
150
  # Examples for the UI
151
  examples = [
152
  ["Hey there my name is Tara, <chuckle> and I'm a speech generation model that can sound like a person.", "tara", 0.6, 0.95, 1.1, 1200],
153
- ["I've also been taught to understand and produce paralinguistic things like sighing, or chuckling, or yawning!", "dan", 0.7, 0.95, 1.1, 1200],
154
- ["I live in San Francisco, and have, uhm let's see, 3 billion 7 hundred ... well, lets just say a lot of parameters.", "emma", 0.6, 0.9, 1.2, 1200]
 
 
 
 
 
155
  ]
156
 
157
  # Available voices
158
  VOICES = ["tara", "leah", "jess", "leo", "dan", "mia", "zac", "zoe"]
159
 
 
 
 
160
  # Create Gradio interface
161
  with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
162
- gr.Markdown("""
163
  # 🎵 [Orpheus Text-to-Speech](https://github.com/canopyai/Orpheus-TTS)
164
  Enter your text below and hear it converted to natural-sounding speech with the Orpheus TTS model.
165
 
166
  ## Tips for better prompts:
167
- - Add paralinguistic elements like `<chuckle>`, `<sigh>`, or `uhm` for more human-like speech.
168
  - Longer text prompts generally work better than very short phrases
169
- - Adjust the temperature slider for more varied (higher) or consistent (lower) speech patterns
170
  """)
171
  with gr.Row():
172
  with gr.Column(scale=3):
 
150
  # Examples for the UI
151
  examples = [
152
  ["Hey there my name is Tara, <chuckle> and I'm a speech generation model that can sound like a person.", "tara", 0.6, 0.95, 1.1, 1200],
153
+ ["I've also been taught to understand and produce paralinguistic things <sigh> like sighing, or <laugh> laughing, or <yawn> yawning!", "dan", 0.7, 0.95, 1.1, 1200],
154
+ ["I live in San Francisco, and have, uhm let's see, 3 billion 7 hundred ... <gasp> well, lets just say a lot of parameters.", "leah", 0.6, 0.9, 1.2, 1200],
155
+ ["Sometimes when I talk too much, I need to <cough> excuse myself. <sniffle> The weather has been quite cold lately.", "leo", 0.65, 0.9, 1.1, 1200],
156
+ ["Public speaking can be challenging. <groan> But with enough practice, anyone can become better at it.", "jess", 0.7, 0.95, 1.1, 1200],
157
+ ["The hike was exhausting but the view from the top was absolutely breathtaking! <sigh> It was totally worth it.", "mia", 0.65, 0.9, 1.15, 1200],
158
+ ["Did you hear that joke? <laugh> I couldn't stop laughing when I first heard it. <chuckle> It's still funny.", "zac", 0.7, 0.95, 1.1, 1200],
159
+ ["After running the marathon, I was so tired <yawn> and needed a long rest. <sigh> But I felt accomplished.", "zoe", 0.6, 0.95, 1.1, 1200]
160
  ]
161
 
162
  # Available voices
163
  VOICES = ["tara", "leah", "jess", "leo", "dan", "mia", "zac", "zoe"]
164
 
165
+ # Available Emotive Tags
166
+ EMOTIVE_TAGS = ["`<laugh>`", "`<chuckle>`", "`<sigh>`", "`<cough>`", "`<sniffle>`", "`<groan>`", "`<yawn>`", "`<gasp>`"]
167
+
168
  # Create Gradio interface
169
  with gr.Blocks(title="Orpheus Text-to-Speech") as demo:
170
+ gr.Markdown(f"""
171
  # 🎵 [Orpheus Text-to-Speech](https://github.com/canopyai/Orpheus-TTS)
172
  Enter your text below and hear it converted to natural-sounding speech with the Orpheus TTS model.
173
 
174
  ## Tips for better prompts:
175
+ - Add paralinguistic elements like {", ".join(EMOTIVE_TAGS)} or `uhm` for more human-like speech.
176
  - Longer text prompts generally work better than very short phrases
177
+ - Increasing `repetition_penalty` and `temperature` makes the model speak faster.
178
  """)
179
  with gr.Row():
180
  with gr.Column(scale=3):