import React, { useState, useEffect, useRef } from 'react'; import MicIcon from '@mui/icons-material/Mic'; import StopIcon from '@mui/icons-material/Stop'; import styles from './page.module.css'; import useSpeechRecognition from './hooks/useSpeechRecognition'; import useAudioManager from './hooks/useAudioManager'; import { useMicVAD } from "@ricky0123/vad-react"; import * as ort from "onnxruntime-web"; ort.env.wasm.wasmPaths = "/_next/static/chunks/"; const getMimeType = (): string | null => { const types = ["audio/webm", "audio/mp4", "audio/ogg", "audio/wav", "audio/aac"]; for (let type of types) { if (MediaRecorder.isTypeSupported(type)) { return type; } } return null; }; interface VoiceInputFormProps { handleSubmit: any; input: string; setInput: React.Dispatch>; } const VoiceInputForm: React.FC = ({ handleSubmit, input, setInput }) => { const [isRecording, setIsRecording] = useState(false); const { startListening, stopListening, recognizedText } = useSpeechRecognition(); const { setAudioFromRecording } = useAudioManager(); const mediaRecorderRef = useRef(null); const audioChunksRef = useRef([]); const cleanupRecording = () => { if (mediaRecorderRef.current) { mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop()); mediaRecorderRef.current = null; } audioChunksRef.current = []; }; useEffect(() => { if (recognizedText) { setInput(recognizedText); } }, [recognizedText, setInput]); const startRecording = async () => { cleanupRecording(); // Clean up any existing recording resources try { // Simplified constraints for broader compatibility const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); let recorderOptions = {}; // Check if the mimeType is supported; if so, use it const mimeType = getMimeType(); if (mimeType && MediaRecorder.isTypeSupported(mimeType)) { recorderOptions = { mimeType }; } mediaRecorderRef.current = new MediaRecorder(stream, recorderOptions); mediaRecorderRef.current.ondataavailable = (event: BlobEvent) => { audioChunksRef.current.push(event.data); }; mediaRecorderRef.current.start(); } catch (err) { console.error("Error accessing media devices:", err); } }; const stopRecording = async (): Promise => { return new Promise((resolve, reject) => { const recorder = mediaRecorderRef.current; if (recorder && recorder.state === "recording") { recorder.onstop = () => { const audioBlob = new Blob(audioChunksRef.current, { 'type': recorder.mimeType }); audioChunksRef.current = []; resolve(audioBlob); }; recorder.stop(); } else { reject(new Error("MediaRecorder is not recording")); } }); }; const vad = useMicVAD({ modelURL: "/_next/static/chunks/silero_vad.onnx", workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js", startOnLoad: false, onSpeechEnd: async (audio) => { console.log('hello??') if (isRecording) { stopListening(); const recordedBlob = await stopRecording(); setAudioFromRecording(recordedBlob); const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); startListening(audioBuffer); setIsRecording(!isRecording); } }, }); const handleRecording = async () => { if (isRecording) { stopListening(); const recordedBlob = await stopRecording(); if (recordedBlob) { setAudioFromRecording(recordedBlob); const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); startListening(audioBuffer); } cleanupRecording(); // Clean up resources after stopping recording } else { vad.toggle(); await startRecording(); } setIsRecording(!isRecording); }; return (
setInput(e.target.value)} placeholder="Speak or type..." />
); }; const convertBlobToAudioBuffer = async (blob: Blob): Promise => { const audioContext = new AudioContext(); const arrayBuffer = await blob.arrayBuffer(); return await audioContext.decodeAudioData(arrayBuffer); }; export default VoiceInputForm;