Spaces:
Paused
Paused
Update chat_ai.py
Browse files- chat_ai.py +24 -15
chat_ai.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
-
# chat_ai.py
|
2 |
-
|
3 |
# ruff: noqa: E402
|
4 |
# Above allows ruff to ignore E402: module level import not at top of file
|
5 |
|
6 |
import re
|
7 |
import tempfile
|
|
|
8 |
|
9 |
import click
|
10 |
import gradio as gr
|
@@ -14,6 +13,7 @@ import torchaudio
|
|
14 |
from cached_path import cached_path
|
15 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
16 |
from num2words import num2words
|
|
|
17 |
|
18 |
try:
|
19 |
import spaces
|
@@ -37,6 +37,13 @@ from f5_tts.infer.utils_infer import (
|
|
37 |
save_spectrogram,
|
38 |
)
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# Cargar el vocoder
|
41 |
vocoder = load_vocoder()
|
42 |
|
@@ -50,6 +57,9 @@ F5TTS_ema_model = load_model(
|
|
50 |
chat_model_state = None
|
51 |
chat_tokenizer_state = None
|
52 |
|
|
|
|
|
|
|
53 |
@gpu_decorator
|
54 |
def generate_response(messages, model, tokenizer):
|
55 |
"""Genera una respuesta usando el modelo de chat"""
|
@@ -135,7 +145,7 @@ def load_chat_model():
|
|
135 |
if chat_model_state is None:
|
136 |
model_name = "Qwen/Qwen2.5-3B-Instruct"
|
137 |
chat_model_state = AutoModelForCausalLM.from_pretrained(
|
138 |
-
model_name, torch_dtype=
|
139 |
)
|
140 |
chat_tokenizer_state = AutoTokenizer.from_pretrained(model_name)
|
141 |
return chat_model_state, chat_tokenizer_state
|
@@ -154,7 +164,6 @@ with gr.Blocks() as app_chat:
|
|
154 |
|
155 |
if not USING_SPACES:
|
156 |
load_chat_model_btn = gr.Button("Cargar Modelo de Chat", variant="primary")
|
157 |
-
|
158 |
chat_interface_container = gr.Column(visible=False)
|
159 |
|
160 |
@gpu_decorator
|
@@ -220,19 +229,19 @@ with gr.Blocks() as app_chat:
|
|
220 |
)
|
221 |
|
222 |
@gpu_decorator
|
223 |
-
def process_input(audio_path, text, history, conv_state):
|
224 |
"""Procesa la entrada de audio o texto del usuario y genera una respuesta."""
|
225 |
if not audio_path and not text.strip():
|
226 |
return history, conv_state, ""
|
227 |
|
228 |
if audio_path:
|
229 |
-
#
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
|
237 |
if not text.strip():
|
238 |
return history, conv_state, ""
|
@@ -282,7 +291,7 @@ with gr.Blocks() as app_chat:
|
|
282 |
# Manejar la entrada de audio
|
283 |
audio_input_chat.stop_recording(
|
284 |
process_input,
|
285 |
-
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
286 |
outputs=[chatbot_interface, conversation_state, text_input_chat],
|
287 |
).then(
|
288 |
generate_audio_response,
|
@@ -297,7 +306,7 @@ with gr.Blocks() as app_chat:
|
|
297 |
# Manejar la entrada de texto
|
298 |
text_input_chat.submit(
|
299 |
process_input,
|
300 |
-
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
301 |
outputs=[chatbot_interface, conversation_state, text_input_chat],
|
302 |
).then(
|
303 |
generate_audio_response,
|
@@ -312,7 +321,7 @@ with gr.Blocks() as app_chat:
|
|
312 |
# Manejar el bot贸n de enviar
|
313 |
send_btn_chat.click(
|
314 |
process_input,
|
315 |
-
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state],
|
316 |
outputs=[chatbot_interface, conversation_state, text_input_chat],
|
317 |
).then(
|
318 |
generate_audio_response,
|
|
|
|
|
|
|
1 |
# ruff: noqa: E402
|
2 |
# Above allows ruff to ignore E402: module level import not at top of file
|
3 |
|
4 |
import re
|
5 |
import tempfile
|
6 |
+
import sys
|
7 |
|
8 |
import click
|
9 |
import gradio as gr
|
|
|
13 |
from cached_path import cached_path
|
14 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
15 |
from num2words import num2words
|
16 |
+
import torch
|
17 |
|
18 |
try:
|
19 |
import spaces
|
|
|
37 |
save_spectrogram,
|
38 |
)
|
39 |
|
40 |
+
# Importar Whisper para transcripci贸n
|
41 |
+
try:
|
42 |
+
import whisper
|
43 |
+
except ImportError:
|
44 |
+
print("El paquete 'whisper' no est谩 instalado. Aseg煤rate de instalarlo con 'pip install openai-whisper'")
|
45 |
+
sys.exit(1)
|
46 |
+
|
47 |
# Cargar el vocoder
|
48 |
vocoder = load_vocoder()
|
49 |
|
|
|
57 |
chat_model_state = None
|
58 |
chat_tokenizer_state = None
|
59 |
|
60 |
+
# Cargar el modelo de transcripci贸n Whisper
|
61 |
+
transcription_model = whisper.load_model("base") # Puedes elegir otros tama帽os como 'small', 'medium', 'large'
|
62 |
+
|
63 |
@gpu_decorator
|
64 |
def generate_response(messages, model, tokenizer):
|
65 |
"""Genera una respuesta usando el modelo de chat"""
|
|
|
145 |
if chat_model_state is None:
|
146 |
model_name = "Qwen/Qwen2.5-3B-Instruct"
|
147 |
chat_model_state = AutoModelForCausalLM.from_pretrained(
|
148 |
+
model_name, torch_dtype=torch.float16, device_map="auto"
|
149 |
)
|
150 |
chat_tokenizer_state = AutoTokenizer.from_pretrained(model_name)
|
151 |
return chat_model_state, chat_tokenizer_state
|
|
|
164 |
|
165 |
if not USING_SPACES:
|
166 |
load_chat_model_btn = gr.Button("Cargar Modelo de Chat", variant="primary")
|
|
|
167 |
chat_interface_container = gr.Column(visible=False)
|
168 |
|
169 |
@gpu_decorator
|
|
|
229 |
)
|
230 |
|
231 |
@gpu_decorator
|
232 |
+
def process_input(audio_path, text, history, conv_state, system_prompt):
|
233 |
"""Procesa la entrada de audio o texto del usuario y genera una respuesta."""
|
234 |
if not audio_path and not text.strip():
|
235 |
return history, conv_state, ""
|
236 |
|
237 |
if audio_path:
|
238 |
+
# Transcribir audio usando Whisper
|
239 |
+
result = transcription_model.transcribe(audio_path)
|
240 |
+
transcribed_text = result["text"].strip()
|
241 |
+
if transcribed_text:
|
242 |
+
text = transcribed_text
|
243 |
+
else:
|
244 |
+
text = text.strip()
|
245 |
|
246 |
if not text.strip():
|
247 |
return history, conv_state, ""
|
|
|
291 |
# Manejar la entrada de audio
|
292 |
audio_input_chat.stop_recording(
|
293 |
process_input,
|
294 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state, system_prompt_chat],
|
295 |
outputs=[chatbot_interface, conversation_state, text_input_chat],
|
296 |
).then(
|
297 |
generate_audio_response,
|
|
|
306 |
# Manejar la entrada de texto
|
307 |
text_input_chat.submit(
|
308 |
process_input,
|
309 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state, system_prompt_chat],
|
310 |
outputs=[chatbot_interface, conversation_state, text_input_chat],
|
311 |
).then(
|
312 |
generate_audio_response,
|
|
|
321 |
# Manejar el bot贸n de enviar
|
322 |
send_btn_chat.click(
|
323 |
process_input,
|
324 |
+
inputs=[audio_input_chat, text_input_chat, chatbot_interface, conversation_state, system_prompt_chat],
|
325 |
outputs=[chatbot_interface, conversation_state, text_input_chat],
|
326 |
).then(
|
327 |
generate_audio_response,
|