Spaces:
Running
Running
mrfakename
commited on
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
app.py
CHANGED
@@ -540,15 +540,19 @@ Have a conversation with an AI using your reference voice!
|
|
540 |
chatbot_interface = gr.Chatbot(label="Conversation")
|
541 |
|
542 |
with gr.Row():
|
543 |
-
with gr.Column():
|
544 |
-
audio_output_chat = gr.Audio(autoplay=True)
|
545 |
with gr.Column():
|
546 |
audio_input_chat = gr.Microphone(
|
547 |
label="Speak your message",
|
548 |
type="filepath",
|
549 |
)
|
550 |
-
|
551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
conversation_state = gr.State(
|
554 |
value=[
|
@@ -561,13 +565,14 @@ Have a conversation with an AI using your reference voice!
|
|
561 |
|
562 |
# Modify process_audio_input to use model and tokenizer from state
|
563 |
@gpu_decorator
|
564 |
-
def process_audio_input(audio_path, history, conv_state):
|
565 |
-
"""Handle audio input from user"""
|
566 |
-
|
|
|
567 |
return history, conv_state, ""
|
568 |
|
569 |
-
|
570 |
-
|
571 |
|
572 |
if not text.strip():
|
573 |
return history, conv_state, ""
|
@@ -621,7 +626,7 @@ Have a conversation with an AI using your reference voice!
|
|
621 |
# Handle audio input
|
622 |
audio_input_chat.stop_recording(
|
623 |
process_audio_input,
|
624 |
-
inputs=[audio_input_chat, chatbot_interface, conversation_state],
|
625 |
outputs=[chatbot_interface, conversation_state],
|
626 |
).then(
|
627 |
generate_audio_response,
|
@@ -633,6 +638,36 @@ Have a conversation with an AI using your reference voice!
|
|
633 |
audio_input_chat,
|
634 |
)
|
635 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
636 |
# Handle clear button
|
637 |
clear_btn_chat.click(
|
638 |
clear_conversation,
|
|
|
540 |
chatbot_interface = gr.Chatbot(label="Conversation")
|
541 |
|
542 |
with gr.Row():
|
|
|
|
|
543 |
with gr.Column():
|
544 |
audio_input_chat = gr.Microphone(
|
545 |
label="Speak your message",
|
546 |
type="filepath",
|
547 |
)
|
548 |
+
audio_output_chat = gr.Audio(autoplay=True)
|
549 |
+
with gr.Column():
|
550 |
+
text_input_chat = gr.Textbox(
|
551 |
+
label="Type your message",
|
552 |
+
lines=1,
|
553 |
+
)
|
554 |
+
send_btn_chat = gr.Button("Send")
|
555 |
+
clear_btn_chat = gr.Button("Clear Conversation")
|
556 |
|
557 |
conversation_state = gr.State(
|
558 |
value=[
|
|
|
565 |
|
566 |
# Modify process_audio_input to use model and tokenizer from state
|
567 |
@gpu_decorator
|
568 |
+
def process_audio_input(audio_path, text, history, conv_state):
|
569 |
+
"""Handle audio or text input from user"""
|
570 |
+
|
571 |
+
if not audio_path and not text.strip():
|
572 |
return history, conv_state, ""
|
573 |
|
574 |
+
if audio_path:
|
575 |
+
text = preprocess_ref_audio_text(audio_path, text)[1]
|
576 |
|
577 |
if not text.strip():
|
578 |
return history, conv_state, ""
|
|
|
626 |
# Handle audio input
|
627 |
audio_input_chat.stop_recording(
|
628 |
process_audio_input,
|
629 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
630 |
outputs=[chatbot_interface, conversation_state],
|
631 |
).then(
|
632 |
generate_audio_response,
|
|
|
638 |
audio_input_chat,
|
639 |
)
|
640 |
|
641 |
+
# Handle text input
|
642 |
+
text_input_chat.submit(
|
643 |
+
process_audio_input,
|
644 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
645 |
+
outputs=[chatbot_interface, conversation_state],
|
646 |
+
).then(
|
647 |
+
generate_audio_response,
|
648 |
+
inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
|
649 |
+
outputs=[audio_output_chat],
|
650 |
+
).then(
|
651 |
+
lambda: None,
|
652 |
+
None,
|
653 |
+
text_input_chat,
|
654 |
+
)
|
655 |
+
|
656 |
+
# Handle send button
|
657 |
+
send_btn_chat.click(
|
658 |
+
process_audio_input,
|
659 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
660 |
+
outputs=[chatbot_interface, conversation_state],
|
661 |
+
).then(
|
662 |
+
generate_audio_response,
|
663 |
+
inputs=[chatbot_interface, ref_audio_chat, ref_text_chat, model_choice_chat, remove_silence_chat],
|
664 |
+
outputs=[audio_output_chat],
|
665 |
+
).then(
|
666 |
+
lambda: None,
|
667 |
+
None,
|
668 |
+
text_input_chat,
|
669 |
+
)
|
670 |
+
|
671 |
# Handle clear button
|
672 |
clear_btn_chat.click(
|
673 |
clear_conversation,
|