Spaces:
Build error
Build error
import streamlit as st | |
import torch | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
from peft import PeftModel | |
import numpy as np | |
import pyaudio | |
# Tải mô hình | |
def load_model(): | |
base_model_id = "openai/whisper-tiny" | |
adapter_id = "longhoang2112/whisper-turbo-fine-tuning-adapters" | |
processor = WhisperProcessor.from_pretrained(base_model_id) | |
model = WhisperForConditionalGeneration.from_pretrained(base_model_id) | |
try: | |
model = PeftModel.from_pretrained(model, adapter_id) | |
model.set_active_adapters(adapter_id) | |
except: | |
st.warning("Adapter loading failed. Using base model.") | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
return processor, model, device | |
processor, model, device = load_model() | |
# Ghi âm | |
def record_audio(duration=5, sample_rate=16000): | |
CHUNK = 1024 | |
FORMAT = pyaudio.paFloat32 | |
CHANNELS = 1 | |
p = pyaudio.PyAudio() | |
stream = p.open(format=FORMAT, channels=CHANNELS, rate=sample_rate, input=True, frames_per_buffer=CHUNK) | |
st.write(f"Đang ghi âm... ({duration} giây)") | |
frames = [] | |
for _ in range(0, int(sample_rate / CHUNK * duration)): | |
data = stream.read(CHUNK) | |
frames.append(np.frombuffer(data, dtype=np.float32)) | |
stream.stop_stream() | |
stream.close() | |
p.terminate() | |
return np.concatenate(frames), sample_rate | |
# Giao diện | |
st.title("Whisper Turbo với Adapter") | |
duration = st.slider("Thời gian ghi âm (giây):", 1, 10, 5) | |
if st.button("Ghi âm"): | |
audio, sample_rate = record_audio(duration) | |
input_features = processor(audio, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device) | |
with torch.no_grad(): | |
predicted_ids = model.generate(input_features) | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
st.write("**Kết quả:**", transcription) |