Spaces:
Sleeping
Sleeping
Another big commit
Browse files- README.md +8 -1
- data/voice_presets/metadata.json +4 -4
- examples/1.mp4 +0 -0
- examples/2.mp4 +0 -0
- examples/3.mp4 +0 -0
- examples/examples.py +76 -13
- pipeline.py +8 -0
- services.py +2 -1
- ui_client.py +26 -22
- utils.py +3 -0
README.md
CHANGED
|
@@ -45,7 +45,14 @@ export WAVJOURNEY_OPENAI_KEY=your_openai_key_here
|
|
| 45 |
|
| 46 |
6. Set environment variables for using API services
|
| 47 |
```bash
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
```
|
| 50 |
|
| 51 |
|
|
|
|
| 45 |
|
| 46 |
6. Set environment variables for using API services
|
| 47 |
```bash
|
| 48 |
+
# Set the port for the WAVJOURNEY service to 8021
|
| 49 |
+
export WAVJOURNEY_SERVICE_PORT=8021
|
| 50 |
+
|
| 51 |
+
# Set the URL for the WAVJOURNEY service to 127.0.0.1
|
| 52 |
+
export WAVJOURNEY_SERVICE_URL=127.0.0.1
|
| 53 |
+
|
| 54 |
+
# Limit the maximum script lines for WAVJOURNEY to 999
|
| 55 |
+
export WAVJOURNEY_MAX_SCRIPT_LINES=999
|
| 56 |
```
|
| 57 |
|
| 58 |
|
data/voice_presets/metadata.json
CHANGED
|
@@ -34,13 +34,13 @@
|
|
| 34 |
"desc": "a female voice of a off-site news reporter, suitable for news scenario",
|
| 35 |
"npz_path": "data/voice_presets/npz/news_female_speaker_outside.npz"
|
| 36 |
},
|
| 37 |
-
"
|
| 38 |
-
"id": "
|
| 39 |
"desc": "a small young boy voice",
|
| 40 |
"npz_path": "data/voice_presets/npz/child_boy.npz"
|
| 41 |
},
|
| 42 |
-
"
|
| 43 |
-
"id": "
|
| 44 |
"desc": "a voice of an old man",
|
| 45 |
"npz_path": "data/voice_presets/npz/elder_morgen.npz"
|
| 46 |
}
|
|
|
|
| 34 |
"desc": "a female voice of a off-site news reporter, suitable for news scenario",
|
| 35 |
"npz_path": "data/voice_presets/npz/news_female_speaker_outside.npz"
|
| 36 |
},
|
| 37 |
+
"Child": {
|
| 38 |
+
"id": "Child",
|
| 39 |
"desc": "a small young boy voice",
|
| 40 |
"npz_path": "data/voice_presets/npz/child_boy.npz"
|
| 41 |
},
|
| 42 |
+
"Old_man": {
|
| 43 |
+
"id": "Old_man",
|
| 44 |
"desc": "a voice of an old man",
|
| 45 |
"npz_path": "data/voice_presets/npz/elder_morgen.npz"
|
| 46 |
}
|
examples/1.mp4
ADDED
|
Binary file (365 kB). View file
|
|
|
examples/2.mp4
ADDED
|
Binary file (241 kB). View file
|
|
|
examples/3.mp4
ADDED
|
Binary file (346 kB). View file
|
|
|
examples/examples.py
CHANGED
|
@@ -1,24 +1,87 @@
|
|
| 1 |
|
| 2 |
example1 = {
|
| 3 |
-
'text': "
|
| 4 |
-
'
|
| 5 |
-
| Audio Type | Layout | ID
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
""",
|
| 10 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
}
|
| 12 |
|
| 13 |
example2 = {
|
| 14 |
-
'text': "
|
| 15 |
-
'
|
| 16 |
-
| Audio Type | Layout | ID
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
""",
|
| 21 |
-
'wav_file': 'examples/
|
| 22 |
}
|
| 23 |
|
| 24 |
-
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
example1 = {
|
| 3 |
+
'text': "An introduction to AI-assisted audio content creation.",
|
| 4 |
+
'table_script': """
|
| 5 |
+
| Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
|
| 6 |
+
|--------------|------------|----|-----------|--------|--------|------------------------------------------------------------------|--------|
|
| 7 |
+
| music | background | 1 | N/A | begin | -35 | Inspirational technology-themed music | Auto |
|
| 8 |
+
| speech | foreground | N/A| Narrator | N/A | -15 | Welcome to the future of audio content creation. | Auto |
|
| 9 |
+
| sound_effect | foreground | N/A| N/A | N/A | -35 | Digital startup sound | 2 |
|
| 10 |
+
| speech | foreground | N/A| Narrator | N/A | -15 | With evolving technology, we are introducing AI-assisted tools for pristine audio production. | Auto |
|
| 11 |
+
| sound_effect | foreground | N/A| N/A | N/A | -35 | Keyboard typing noise | 3 |
|
| 12 |
+
| speech | foreground | N/A| Narrator | N/A | -15 | Imagine crafting audio content with the power of AI at your fingertips. | Auto |
|
| 13 |
+
| sound_effect | background | 2 | N/A | begin | -35 | Ambiance of a busy control room | Auto |
|
| 14 |
+
| speech | foreground | N/A| Narrator | N/A | -15 | Enhanced quality, efficient production and limitless creativity, all under one roof. | Auto |
|
| 15 |
+
| sound_effect | background | 2 | N/A | end | N/A | N/A | Auto |
|
| 16 |
+
| speech | foreground | N/A| Narrator | N/A | -15 | Unleash your potential with AI-assisted audio content creation. | Auto |
|
| 17 |
+
| music | background | 1 | N/A | end | N/A | N/A | Auto |
|
| 18 |
|
| 19 |
""",
|
| 20 |
+
'table_voice': """
|
| 21 |
+
| Character | Voice |
|
| 22 |
+
|-------------|-----------|
|
| 23 |
+
| Narrator | News_Male |
|
| 24 |
+
|
| 25 |
+
""",
|
| 26 |
+
'wav_file': 'examples/1.mp4',
|
| 27 |
}
|
| 28 |
|
| 29 |
example2 = {
|
| 30 |
+
'text': "A couple dating in a cafe.",
|
| 31 |
+
'table_script': """
|
| 32 |
+
| Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
|
| 33 |
+
|--------------|------------|----|-----------|--------|--------|-----------------------------------------------|--------|
|
| 34 |
+
| sound_effect | background | 1 | N/A | begin | -35 | Soft chattering in a cafe | Auto |
|
| 35 |
+
| sound_effect | background | 2 | N/A | begin | -38 | Coffee brewing noises | Auto |
|
| 36 |
+
| music | background | 3 | N/A | begin | -35 | Soft jazz playing in the background | Auto |
|
| 37 |
+
| speech | foreground | N/A| Man | N/A | -15 | It’s really nice to finally get out and relax a little, isn’t it? | Auto |
|
| 38 |
+
| speech | foreground | N/A| Woman | N/A | -15 | I know, right? We should do this more often. | Auto |
|
| 39 |
+
| sound_effect | background | 2 | N/A | end | N/A | N/A | Auto |
|
| 40 |
+
| speech | foreground | N/A| Man | N/A | -15 | Here’s your coffee, just as you like it. | Auto |
|
| 41 |
+
| speech | foreground | N/A| Woman | N/A | -15 | Thank you, it smells wonderful. | Auto |
|
| 42 |
+
| music | background | 3 | N/A | end | N/A | N/A | Auto |
|
| 43 |
+
| sound_effect | background | 1 | N/A | end | N/A | N/A | Auto |
|
| 44 |
+
|
| 45 |
+
""",
|
| 46 |
+
'table_voice': """
|
| 47 |
+
| Character | Voice |
|
| 48 |
+
|-------------|-----------|
|
| 49 |
+
| Man | Male1 |
|
| 50 |
+
| Woman | Female1 |
|
| 51 |
+
|
| 52 |
+
""",
|
| 53 |
+
'wav_file': 'examples/2.mp4',
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
example3 = {
|
| 58 |
+
'text': "A child is participating in a farting contest.",
|
| 59 |
+
'table_script': """
|
| 60 |
+
| Audio Type | Layout | ID | Character | Action | Volume | Description | Length |
|
| 61 |
+
|--------------|------------|----|-----------|--------|--------|------------------------------------------------------|--------|
|
| 62 |
+
| sound_effect | background | 1 | N/A | begin | -35 | Outdoor park ambiance, people chattering | Auto |
|
| 63 |
+
| music | background | 2 | N/A | begin | -35 | Light comedy theme music, quirky | Auto |
|
| 64 |
+
| speech | foreground | N/A| Host | N/A | -15 | Welcome to the annual Fart Competition. | Auto |
|
| 65 |
+
| speech | foreground | N/A| Host | N/A | -15 | Now, let’s welcome our youngest participant. | Auto |
|
| 66 |
+
| sound_effect | foreground | N/A| N/A | N/A | -35 | Clapping sound | 2 |
|
| 67 |
+
| speech | foreground | N/A| Child | N/A | -15 | Hi, I’m excited to be here. | Auto |
|
| 68 |
+
| sound_effect | foreground | N/A| N/A | N/A | -35 | Short, cartoonish duration of a fart sound | 4 |
|
| 69 |
+
| sound_effect | foreground | N/A| N/A | N/A | -35 | Audience laughing and applauding | 2 |
|
| 70 |
+
| speech | foreground | N/A| Host | N/A | -15 | Wow, that was impressive! Let’s give another round of applause! | Auto |
|
| 71 |
+
| sound_effect | foreground | N/A| N/A | N/A | -35 | Audience clapping and cheering | 3 |
|
| 72 |
+
| music | background | 2 | N/A | end | N/A | N/A | Auto |
|
| 73 |
+
| sound_effect | background | 1 | N/A | end | N/A | N/A | Auto |
|
| 74 |
+
""",
|
| 75 |
+
'table_voice': """
|
| 76 |
+
| Character | Voice |
|
| 77 |
+
|-------------|-----------|
|
| 78 |
+
| Host | Male1 |
|
| 79 |
+
| Child | Child |
|
| 80 |
|
| 81 |
""",
|
| 82 |
+
'wav_file': 'examples/3.mp4',
|
| 83 |
}
|
| 84 |
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
examples = [example1, example2, example3]
|
pipeline.py
CHANGED
|
@@ -194,6 +194,14 @@ def generate_json_file(session_id, input_text, api_key):
|
|
| 194 |
|
| 195 |
# Function call used by Gradio: json to result wav
|
| 196 |
def generate_audio(session_id, json_script, api_key):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
output_path = utils.get_session_path(session_id)
|
| 198 |
output_audio_path = utils.get_session_audio_path(session_id)
|
| 199 |
voices = voice_presets.get_merged_voice_presets(session_id)
|
|
|
|
| 194 |
|
| 195 |
# Function call used by Gradio: json to result wav
|
| 196 |
def generate_audio(session_id, json_script, api_key):
|
| 197 |
+
def count_lines(content):
|
| 198 |
+
# Split the string using the newline character and count the non-empty lines
|
| 199 |
+
return sum(1 for line in content.split('\n') if line.strip())
|
| 200 |
+
|
| 201 |
+
max_lines = utils.get_max_script_lines()
|
| 202 |
+
if count_lines(json_script) > max_lines:
|
| 203 |
+
raise ValueError(f'The number of lines of the JSON script has exceeded {max_lines}!')
|
| 204 |
+
|
| 205 |
output_path = utils.get_session_path(session_id)
|
| 206 |
output_audio_path = utils.get_session_audio_path(session_id)
|
| 207 |
voices = voice_presets.get_merged_voice_presets(session_id)
|
services.py
CHANGED
|
@@ -227,4 +227,5 @@ def parse_voice():
|
|
| 227 |
|
| 228 |
if __name__ == '__main__':
|
| 229 |
service_port = get_service_port()
|
| 230 |
-
|
|
|
|
|
|
| 227 |
|
| 228 |
if __name__ == '__main__':
|
| 229 |
service_port = get_service_port()
|
| 230 |
+
# We disable multithreading to force services to process one request at a time and avoid CUDA OOM
|
| 231 |
+
app.run(debug=False, threaded=False, port=service_port)
|
ui_client.py
CHANGED
|
@@ -54,7 +54,7 @@ def generate_script_fn(instruction, _state: gr.State):
|
|
| 54 |
json_script = generate_json_file(session_id, instruction, api_key)
|
| 55 |
table_text = convert_json_to_md(json_script)
|
| 56 |
except Exception as e:
|
| 57 |
-
gr.Warning(str(e)
|
| 58 |
print(f"Generating script error: {str(e)}")
|
| 59 |
traceback.print_exc()
|
| 60 |
return [
|
|
@@ -99,7 +99,7 @@ def generate_audio_fn(state):
|
|
| 99 |
except Exception as e:
|
| 100 |
print(f"Generation audio error: {str(e)}")
|
| 101 |
traceback.print_exc()
|
| 102 |
-
gr.Warning(str(e)
|
| 103 |
|
| 104 |
return [
|
| 105 |
None,
|
|
@@ -210,7 +210,7 @@ def add_voice_preset(vp_id, vp_desc, file, ui_state, added_voice_preset):
|
|
| 210 |
except Exception as exception:
|
| 211 |
print(exception)
|
| 212 |
traceback.print_exc()
|
| 213 |
-
gr.Warning(str(exception)
|
| 214 |
|
| 215 |
# After added
|
| 216 |
dataframe = get_voice_preset_to_list(ui_state)
|
|
@@ -451,10 +451,29 @@ with gr.Blocks(css=css) as interface:
|
|
| 451 |
loading_icon = gr.HTML(loading_icon_html)
|
| 452 |
share_button = gr.Button(value="Share to community", elem_id="share-btn")
|
| 453 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
# System Voice Presets
|
| 455 |
gr.Markdown(label='System Voice Presets', value='# System Voice Presets')
|
| 456 |
-
|
| 457 |
-
|
|
|
|
| 458 |
# User Voice Preset Related
|
| 459 |
gr.Markdown('# (Optional) Speaker Customization ')
|
| 460 |
with gr.Accordion("Click to add speakers", open=False):
|
|
@@ -476,22 +495,7 @@ with gr.Blocks(css=css) as interface:
|
|
| 476 |
vp_file = gr.File(label='Wav File', type='file', file_types=['.wav'],
|
| 477 |
interactive=True)
|
| 478 |
vp_submit = gr.Button(label='Upload Voice Preset', value="Upload Voice Preset")
|
| 479 |
-
|
| 480 |
-
from examples.examples import examples as WJExamples
|
| 481 |
-
def example_fn(idx, _text_input):
|
| 482 |
-
print('from example', idx, _text_input)
|
| 483 |
-
example = WJExamples[int(idx)-1]
|
| 484 |
-
return example['table_text'], gr.make_waveform(example['wav_file'])
|
| 485 |
-
|
| 486 |
-
_idx_input = gr.Textbox(label='Example No')
|
| 487 |
-
_idx_input.visible=False
|
| 488 |
-
gr.Examples(
|
| 489 |
-
[[idx+1, x['text']] for idx, x in enumerate(WJExamples)],
|
| 490 |
-
fn=example_fn,
|
| 491 |
-
inputs=[_idx_input, text_input],
|
| 492 |
-
outputs=[char_voice_map_markdown, audio_output],
|
| 493 |
-
cache_examples=True,
|
| 494 |
-
)
|
| 495 |
# clear btn, will re-new a session
|
| 496 |
clear_btn = gr.ClearButton(value='Clear All')
|
| 497 |
|
|
@@ -579,5 +583,5 @@ with gr.Blocks(css=css) as interface:
|
|
| 579 |
# debug only
|
| 580 |
# print_state_btn = gr.Button(value='Print State')
|
| 581 |
# print_state_btn.click(fn=lambda state, state2: print(state, state2), inputs=[ui_state, selected_voice_presets])
|
| 582 |
-
interface.queue(concurrency_count=
|
| 583 |
interface.launch()
|
|
|
|
| 54 |
json_script = generate_json_file(session_id, instruction, api_key)
|
| 55 |
table_text = convert_json_to_md(json_script)
|
| 56 |
except Exception as e:
|
| 57 |
+
gr.Warning(str(e))
|
| 58 |
print(f"Generating script error: {str(e)}")
|
| 59 |
traceback.print_exc()
|
| 60 |
return [
|
|
|
|
| 99 |
except Exception as e:
|
| 100 |
print(f"Generation audio error: {str(e)}")
|
| 101 |
traceback.print_exc()
|
| 102 |
+
gr.Warning(str(e))
|
| 103 |
|
| 104 |
return [
|
| 105 |
None,
|
|
|
|
| 210 |
except Exception as exception:
|
| 211 |
print(exception)
|
| 212 |
traceback.print_exc()
|
| 213 |
+
gr.Warning(str(exception))
|
| 214 |
|
| 215 |
# After added
|
| 216 |
dataframe = get_voice_preset_to_list(ui_state)
|
|
|
|
| 451 |
loading_icon = gr.HTML(loading_icon_html)
|
| 452 |
share_button = gr.Button(value="Share to community", elem_id="share-btn")
|
| 453 |
|
| 454 |
+
# add examples
|
| 455 |
+
from examples.examples import examples as WJExamples
|
| 456 |
+
def example_fn(idx, _text_input):
|
| 457 |
+
print('from example', idx, _text_input)
|
| 458 |
+
example = WJExamples[int(idx)-1]
|
| 459 |
+
print(example['table_script'], example['table_voice'], gr.make_waveform(example['wav_file']))
|
| 460 |
+
return example['table_script'], example['table_voice'], gr.make_waveform(example['wav_file'])
|
| 461 |
+
|
| 462 |
+
_idx_input = gr.Textbox(label='Example No.')
|
| 463 |
+
_idx_input.visible=False
|
| 464 |
+
gr.Examples(
|
| 465 |
+
[[idx+1, x['text']] for idx, x in enumerate(WJExamples)],
|
| 466 |
+
fn=example_fn,
|
| 467 |
+
inputs=[_idx_input, text_input],
|
| 468 |
+
outputs=[audio_script_markdown, char_voice_map_markdown, audio_output],
|
| 469 |
+
cache_examples=True,
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
# System Voice Presets
|
| 473 |
gr.Markdown(label='System Voice Presets', value='# System Voice Presets')
|
| 474 |
+
with gr.Accordion("Click to see system speakers", open=False):
|
| 475 |
+
system_markdown_voice_presets = gr.Dataframe(label='System Voice Presets', headers=VOICE_PRESETS_HEADERS,
|
| 476 |
+
value=system_voice_presets)
|
| 477 |
# User Voice Preset Related
|
| 478 |
gr.Markdown('# (Optional) Speaker Customization ')
|
| 479 |
with gr.Accordion("Click to add speakers", open=False):
|
|
|
|
| 495 |
vp_file = gr.File(label='Wav File', type='file', file_types=['.wav'],
|
| 496 |
interactive=True)
|
| 497 |
vp_submit = gr.Button(label='Upload Voice Preset', value="Upload Voice Preset")
|
| 498 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
# clear btn, will re-new a session
|
| 500 |
clear_btn = gr.ClearButton(value='Clear All')
|
| 501 |
|
|
|
|
| 583 |
# debug only
|
| 584 |
# print_state_btn = gr.Button(value='Print State')
|
| 585 |
# print_state_btn.click(fn=lambda state, state2: print(state, state2), inputs=[ui_state, selected_voice_presets])
|
| 586 |
+
interface.queue(concurrency_count=1, max_size=20)
|
| 587 |
interface.launch()
|
utils.py
CHANGED
|
@@ -77,3 +77,6 @@ def get_api_key():
|
|
| 77 |
api_key = os.environ.get('WAVJOURNEY_OPENAI_KEY')
|
| 78 |
return api_key
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
api_key = os.environ.get('WAVJOURNEY_OPENAI_KEY')
|
| 78 |
return api_key
|
| 79 |
|
| 80 |
+
def get_max_script_lines():
|
| 81 |
+
max_lines = int(os.environ.get('WAVJOURNEY_MAX_SCRIPT_LINES', 999))
|
| 82 |
+
return max_lines
|