|
import React, { useState, useEffect, useRef } from 'react'; |
|
import MicIcon from '@mui/icons-material/Mic'; |
|
import StopIcon from '@mui/icons-material/Stop'; |
|
import styles from './page.module.css'; |
|
|
|
import useSpeechRecognition from './hooks/useSpeechRecognition'; |
|
import useAudioManager from './hooks/useAudioManager'; |
|
import { useMicVAD } from "@ricky0123/vad-react"; |
|
|
|
import * as ort from "onnxruntime-web"; |
|
ort.env.wasm.wasmPaths = "/_next/static/chunks/"; |
|
|
|
const getMimeType = (): string | null => { |
|
const types = ["audio/webm", "audio/mp4", "audio/ogg", "audio/wav", "audio/aac"]; |
|
for (let type of types) { |
|
if (MediaRecorder.isTypeSupported(type)) { |
|
return type; |
|
} |
|
} |
|
return null; |
|
}; |
|
|
|
interface VoiceInputFormProps { |
|
handleSubmit: any; |
|
input: string; |
|
setInput: React.Dispatch<React.SetStateAction<string>>; |
|
} |
|
|
|
const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => { |
|
const [isRecording, setIsRecording] = useState(false); |
|
const { startListening, stopListening, recognizedText } = useSpeechRecognition(); |
|
const { setAudioFromRecording } = useAudioManager(); |
|
|
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null); |
|
const audioChunksRef = useRef<BlobPart[]>([]); |
|
|
|
const cleanupRecording = () => { |
|
if (mediaRecorderRef.current) { |
|
mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop()); |
|
mediaRecorderRef.current = null; |
|
} |
|
audioChunksRef.current = []; |
|
}; |
|
|
|
|
|
useEffect(() => { |
|
if (recognizedText) { |
|
setInput(recognizedText); |
|
} |
|
}, [recognizedText, setInput]); |
|
|
|
const startRecording = async () => { |
|
cleanupRecording(); |
|
|
|
try { |
|
|
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); |
|
let recorderOptions = {}; |
|
|
|
|
|
const mimeType = getMimeType(); |
|
if (mimeType && MediaRecorder.isTypeSupported(mimeType)) { |
|
recorderOptions = { mimeType }; |
|
} |
|
|
|
mediaRecorderRef.current = new MediaRecorder(stream, recorderOptions); |
|
|
|
mediaRecorderRef.current.ondataavailable = (event: BlobEvent) => { |
|
audioChunksRef.current.push(event.data); |
|
}; |
|
|
|
mediaRecorderRef.current.start(); |
|
} catch (err) { |
|
console.error("Error accessing media devices:", err); |
|
} |
|
}; |
|
|
|
|
|
const stopRecording = async (): Promise<Blob> => { |
|
return new Promise((resolve, reject) => { |
|
const recorder = mediaRecorderRef.current; |
|
if (recorder && recorder.state === "recording") { |
|
recorder.onstop = () => { |
|
const audioBlob = new Blob(audioChunksRef.current, { 'type': recorder.mimeType }); |
|
audioChunksRef.current = []; |
|
resolve(audioBlob); |
|
}; |
|
recorder.stop(); |
|
} else { |
|
reject(new Error("MediaRecorder is not recording")); |
|
} |
|
}); |
|
}; |
|
|
|
const vad = useMicVAD({ |
|
modelURL: "/_next/static/chunks/silero_vad.onnx", |
|
workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js", |
|
startOnLoad: false, |
|
onSpeechEnd: async (audio) => { |
|
console.log('hello??') |
|
if (isRecording) { |
|
stopListening(); |
|
const recordedBlob = await stopRecording(); |
|
setAudioFromRecording(recordedBlob); |
|
const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); |
|
startListening(audioBuffer); |
|
setIsRecording(!isRecording); |
|
} |
|
}, |
|
}); |
|
|
|
|
|
const handleRecording = async () => { |
|
if (isRecording) { |
|
stopListening(); |
|
const recordedBlob = await stopRecording(); |
|
if (recordedBlob) { |
|
setAudioFromRecording(recordedBlob); |
|
const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); |
|
startListening(audioBuffer); |
|
} |
|
cleanupRecording(); |
|
} else { |
|
vad.toggle(); |
|
await startRecording(); |
|
} |
|
setIsRecording(!isRecording); |
|
}; |
|
|
|
|
|
return ( |
|
<div> |
|
<form onSubmit={handleSubmit} className={styles.form}> |
|
<input |
|
type="text" |
|
value={input} |
|
className={styles.input} |
|
onChange={(e) => setInput(e.target.value)} |
|
placeholder="Speak or type..." |
|
/> |
|
</form> |
|
<button onClick={handleRecording} className={styles.button}> |
|
{isRecording ? <StopIcon /> : <MicIcon />} |
|
</button> |
|
</div> |
|
); |
|
}; |
|
|
|
const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => { |
|
const audioContext = new AudioContext(); |
|
const arrayBuffer = await blob.arrayBuffer(); |
|
return await audioContext.decodeAudioData(arrayBuffer); |
|
}; |
|
|
|
export default VoiceInputForm; |
|
|