|
import React, { useState, useEffect, useRef, useCallback } from 'react'; |
|
import styles from './page.module.css'; |
|
import { useMicVAD } from "@ricky0123/vad-react"; |
|
import * as ort from "onnxruntime-web"; |
|
import MicIcon from '@mui/icons-material/Mic'; |
|
import StopIcon from '@mui/icons-material/Stop'; |
|
import { webmFixDuration } from './BlobFix'; |
|
import Progress from './progress'; |
|
import { useTranscriber } from "./hooks/useTranscriber"; |
|
import constants from './constants'; |
|
|
|
ort.env.wasm.wasmPaths = "/_next/static/chunks/"; |
|
|
|
interface VoiceInputFormProps { |
|
handleSubmit: any; |
|
input: string; |
|
setInput: React.Dispatch<React.SetStateAction<string>>; |
|
} |
|
|
|
function getMimeType() { |
|
const types = [ |
|
"audio/webm", |
|
"audio/mp4", |
|
"audio/ogg", |
|
"audio/wav", |
|
"audio/aac", |
|
]; |
|
for (let i = 0; i < types.length; i++) { |
|
if (MediaRecorder.isTypeSupported(types[i])) { |
|
return types[i]; |
|
} |
|
} |
|
return undefined; |
|
} |
|
|
|
const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => { |
|
const audioContext = new AudioContext({ |
|
sampleRate: constants.SAMPLING_RATE, |
|
}); |
|
const arrayBuffer = await blob.arrayBuffer(); |
|
return await audioContext.decodeAudioData(arrayBuffer); |
|
}; |
|
|
|
|
|
const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => { |
|
const [recording, setRecording] = useState(false); |
|
const [duration, setDuration] = useState(0); |
|
const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null); |
|
|
|
const streamRef = useRef<MediaStream | null>(null); |
|
const mediaRecorderRef = useRef<MediaRecorder | null>(null); |
|
const chunksRef = useRef<Blob[]>([]); |
|
const [recognizedText, setRecognizedText] = useState(''); |
|
const transcriber = useTranscriber(); |
|
|
|
const startListening = useCallback((audioData: any) => { |
|
transcriber.start(audioData); |
|
}, [transcriber]); |
|
|
|
useEffect(() => { |
|
if (transcriber.output) { |
|
setRecognizedText(transcriber.output.text); |
|
} |
|
}, [transcriber]); |
|
|
|
const handleTranscriptionComplete = () => { |
|
const syntheticEvent = { |
|
preventDefault: () => {}, |
|
target: { |
|
elements: { |
|
input: { |
|
value: recognizedText |
|
} |
|
} |
|
} |
|
}; |
|
|
|
handleSubmit(syntheticEvent); |
|
}; |
|
|
|
useEffect(() => { |
|
if (transcriber.isComplete) { |
|
handleTranscriptionComplete(); |
|
} |
|
}, [transcriber]); |
|
|
|
useEffect(() => { |
|
if (recognizedText) { |
|
setInput(recognizedText); |
|
} |
|
}, [recognizedText, setInput]); |
|
|
|
useEffect(() => { |
|
const processRecording = async () => { |
|
if (recordedBlob) { |
|
const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); |
|
startListening(audioBuffer); |
|
setRecordedBlob(null); |
|
} |
|
}; |
|
|
|
processRecording(); |
|
}, [recordedBlob, startListening]); |
|
|
|
|
|
const vad = useMicVAD({ |
|
modelURL: "/_next/static/chunks/silero_vad.onnx", |
|
workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js", |
|
startOnLoad: false, |
|
onSpeechEnd: async () => { |
|
if (recording) { |
|
await stopRecording(); |
|
setRecording(!recording); |
|
} |
|
}, |
|
}); |
|
|
|
const stopRecording = () => { |
|
if ( |
|
mediaRecorderRef.current && |
|
mediaRecorderRef.current.state === "recording" |
|
) { |
|
mediaRecorderRef.current.stop(); |
|
setDuration(0); |
|
setRecording(false); |
|
} |
|
}; |
|
|
|
const startRecording = async () => { |
|
|
|
setRecordedBlob(null); |
|
|
|
transcriber.start(); |
|
|
|
let startTime = Date.now(); |
|
|
|
try { |
|
if (!streamRef.current) { |
|
streamRef.current = await navigator.mediaDevices.getUserMedia({ |
|
audio: true, |
|
}); |
|
} |
|
|
|
const mimeType = getMimeType(); |
|
const mediaRecorder = new MediaRecorder(streamRef.current, { |
|
mimeType, |
|
}); |
|
|
|
mediaRecorderRef.current = mediaRecorder; |
|
|
|
mediaRecorder.addEventListener("dataavailable", async (event) => { |
|
if (event.data.size > 0) { |
|
chunksRef.current.push(event.data); |
|
} |
|
if (mediaRecorder.state === "inactive") { |
|
const duration = Date.now() - startTime; |
|
|
|
|
|
let blob = new Blob(chunksRef.current, { type: mimeType }); |
|
|
|
if (mimeType === "audio/webm") { |
|
blob = await webmFixDuration(blob, duration, blob.type); |
|
} |
|
|
|
setRecordedBlob(blob); |
|
|
|
chunksRef.current = []; |
|
} |
|
}); |
|
mediaRecorder.start(); |
|
setRecording(true); |
|
} catch (error) { |
|
console.error("Error accessing microphone:", error); |
|
} |
|
}; |
|
|
|
useEffect(() => { |
|
let stream: MediaStream | null = null; |
|
|
|
if (recording) { |
|
const timer = setInterval(() => { |
|
setDuration((prevDuration) => prevDuration + 1); |
|
}, 1000); |
|
|
|
return () => { |
|
clearInterval(timer); |
|
}; |
|
} |
|
|
|
return () => { |
|
if (stream) { |
|
stream.getTracks().forEach((track) => track.stop()); |
|
} |
|
}; |
|
}, [recording]); |
|
|
|
const handleToggleRecording = () => { |
|
vad.start(); |
|
if (recording) { |
|
stopRecording(); |
|
} else { |
|
startRecording(); |
|
} |
|
}; |
|
|
|
return ( |
|
<div> |
|
{transcriber.progressItems.length > 0 && ( |
|
<div> |
|
<label> |
|
Loading model files... (only run once) |
|
</label> |
|
{transcriber.progressItems.map((data) => ( |
|
<div key={data.file}> |
|
<Progress |
|
text={data.file} |
|
percentage={data.progress} |
|
/> |
|
</div> |
|
))} |
|
</div> |
|
)} |
|
<form onSubmit={handleSubmit} className={styles.form}> |
|
<input |
|
type="text" |
|
value={input} |
|
className={styles.input} |
|
onChange={(e) => setInput(e.target.value)} |
|
placeholder="Speak or type..." |
|
/> |
|
</form> |
|
<button |
|
type='button' |
|
className={styles.button} |
|
onClick={handleToggleRecording} |
|
> |
|
{recording ? <StopIcon /> : <MicIcon />} |
|
</button> |
|
</div> |
|
); |
|
}; |
|
|
|
|
|
export default VoiceInputForm; |
|
|