|
"""A simple web interactive chat demo based on gradio.""" |
|
|
|
import os |
|
import time |
|
import gradio as gr |
|
import numpy as np |
|
import spaces |
|
import torch |
|
|
|
|
|
from inference import OmniInference |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
omni_client = OmniInference('./checkpoint', device) |
|
omni_client.warm_up() |
|
|
|
|
|
OUT_CHUNK = 4096 |
|
OUT_RATE = 24000 |
|
OUT_CHANNELS = 1 |
|
|
|
|
|
@spaces.GPU |
|
def process_audio(audio): |
|
filepath = audio |
|
print(f"filepath: {filepath}") |
|
if filepath is None: |
|
return |
|
|
|
cnt = 0 |
|
tik = time.time() |
|
for chunk in omni_client.run_AT_batch_stream(filepath): |
|
|
|
if cnt == 0: |
|
print(f"first chunk time cost: {time.time() - tik:.3f}") |
|
cnt += 1 |
|
audio_data = np.frombuffer(chunk, dtype=np.int16) |
|
audio_data = audio_data.reshape(-1, OUT_CHANNELS) |
|
yield OUT_RATE, audio_data.astype(np.int16) |
|
|
|
|
|
demo = gr.Interface( |
|
process_audio, |
|
inputs=gr.Audio(type="filepath", label="Microphone"), |
|
outputs=[gr.Audio(label="Response", streaming=True, autoplay=True)], |
|
title="Chat Mini-Omni Demo", |
|
live=True, |
|
) |
|
demo.queue().launch() |