matt HOFFNER
add isComplete value to hook for easier access to autosending messages once complete
862ccf9
import React, { useState, useEffect, useRef, useCallback } from 'react'; | |
import styles from './page.module.css'; | |
import { useMicVAD } from "@ricky0123/vad-react"; | |
import * as ort from "onnxruntime-web"; | |
import MicIcon from '@mui/icons-material/Mic'; | |
import StopIcon from '@mui/icons-material/Stop'; | |
import { webmFixDuration } from './BlobFix'; | |
import Progress from './progress'; | |
import { useTranscriber } from "./hooks/useTranscriber"; | |
ort.env.wasm.wasmPaths = "/_next/static/chunks/"; | |
interface VoiceInputFormProps { | |
handleSubmit: any; | |
input: string; | |
setInput: React.Dispatch<React.SetStateAction<string>>; | |
} | |
function getMimeType() { | |
const types = [ | |
"audio/webm", | |
"audio/mp4", | |
"audio/ogg", | |
"audio/wav", | |
"audio/aac", | |
]; | |
for (let i = 0; i < types.length; i++) { | |
if (MediaRecorder.isTypeSupported(types[i])) { | |
return types[i]; | |
} | |
} | |
return undefined; | |
} | |
const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => { | |
const audioContext = new AudioContext(); | |
const arrayBuffer = await blob.arrayBuffer(); | |
return await audioContext.decodeAudioData(arrayBuffer); | |
}; | |
const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => { | |
const [recording, setRecording] = useState(false); | |
const [duration, setDuration] = useState(0); | |
const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null); | |
const streamRef = useRef<MediaStream | null>(null); | |
const mediaRecorderRef = useRef<MediaRecorder | null>(null); | |
const chunksRef = useRef<Blob[]>([]); | |
const [recognizedText, setRecognizedText] = useState(''); | |
const transcriber = useTranscriber(); | |
const onFormSubmit = (e: React.FormEvent<HTMLFormElement>) => { | |
e.preventDefault(); | |
handleSubmit(input); // Assuming handleSubmit now takes the input as an argument | |
}; | |
const startListening = useCallback((audioData: any) => { | |
transcriber.start(audioData); | |
}, [transcriber]); | |
useEffect(() => { | |
if (transcriber.output) { | |
setRecognizedText(transcriber.output.text); | |
} | |
}, [transcriber.output, transcriber.isBusy]); | |
const handleTranscriptionComplete = () => { | |
// Create a synthetic event object | |
const syntheticEvent = { | |
preventDefault: () => {}, | |
target: { | |
// Mimic the structure of your form's event.target here | |
elements: { | |
// Assuming the form has an input field named 'input' | |
input: { | |
value: recognizedText | |
} | |
} | |
} | |
}; | |
handleSubmit(syntheticEvent); | |
}; | |
useEffect(() => { | |
if (transcriber.isComplete) { | |
handleTranscriptionComplete(); | |
} | |
}, [transcriber.isComplete]); | |
useEffect(() => { | |
if (recognizedText) { | |
setInput(recognizedText); | |
} | |
}, [recognizedText, setInput]); | |
useEffect(() => { | |
const processRecording = async () => { | |
if (recordedBlob) { | |
// Process the blob for transcription | |
const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); | |
startListening(audioBuffer); // Start the transcription process | |
// Reset the blob state if you want to prepare for a new recording | |
setRecordedBlob(null); | |
} | |
}; | |
processRecording(); | |
}, [recordedBlob, startListening]); | |
const vad = useMicVAD({ | |
modelURL: "/_next/static/chunks/silero_vad.onnx", | |
workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js", | |
startOnLoad: false, | |
onSpeechEnd: async () => { | |
if (recording) { | |
await stopRecording(); // Stop the recording | |
setRecording(!recording); // Update the recording state | |
} | |
}, | |
}); | |
const stopRecording = () => { | |
if ( | |
mediaRecorderRef.current && | |
mediaRecorderRef.current.state === "recording" | |
) { | |
mediaRecorderRef.current.stop(); // set state to inactive | |
setDuration(0); | |
setRecording(false); | |
} | |
}; | |
const startRecording = async () => { | |
// Reset recording (if any) | |
setRecordedBlob(null); | |
// @ts-ignore | |
transcriber.start(); | |
let startTime = Date.now(); | |
try { | |
if (!streamRef.current) { | |
streamRef.current = await navigator.mediaDevices.getUserMedia({ | |
audio: true, | |
}); | |
} | |
const mimeType = getMimeType(); | |
const mediaRecorder = new MediaRecorder(streamRef.current, { | |
mimeType, | |
}); | |
mediaRecorderRef.current = mediaRecorder; | |
mediaRecorder.addEventListener("dataavailable", async (event) => { | |
if (event.data.size > 0) { | |
chunksRef.current.push(event.data); | |
} | |
if (mediaRecorder.state === "inactive") { | |
const duration = Date.now() - startTime; | |
// Received a stop event | |
let blob = new Blob(chunksRef.current, { type: mimeType }); | |
if (mimeType === "audio/webm") { | |
blob = await webmFixDuration(blob, duration, blob.type); | |
} | |
setRecordedBlob(blob); | |
chunksRef.current = []; | |
} | |
}); | |
mediaRecorder.start(); | |
setRecording(true); | |
} catch (error) { | |
console.error("Error accessing microphone:", error); | |
} | |
}; | |
useEffect(() => { | |
let stream: MediaStream | null = null; | |
if (recording) { | |
const timer = setInterval(() => { | |
setDuration((prevDuration) => prevDuration + 1); | |
}, 1000); | |
return () => { | |
clearInterval(timer); | |
}; | |
} | |
return () => { | |
if (stream) { | |
stream.getTracks().forEach((track) => track.stop()); | |
} | |
}; | |
}, [recording]); | |
const handleToggleRecording = () => { | |
vad.start(); | |
if (recording) { | |
stopRecording(); | |
} else { | |
startRecording(); | |
} | |
}; | |
return ( | |
<div> | |
{transcriber.progressItems.length > 0 && ( | |
<div> | |
<label> | |
Loading model files... (only run once) | |
</label> | |
{transcriber.progressItems.map((data) => ( | |
<div key={data.file}> | |
<Progress | |
text={data.file} | |
percentage={data.progress} | |
/> | |
</div> | |
))} | |
</div> | |
)} | |
<form onSubmit={onFormSubmit} className={styles.form}> | |
<input | |
type="text" | |
value={input} | |
className={styles.input} | |
onChange={(e) => setInput(e.target.value)} | |
placeholder="Speak or type..." | |
/> | |
</form> | |
<button | |
type='button' | |
className={styles.button} | |
onClick={handleToggleRecording} | |
> | |
{recording ? <StopIcon /> : <MicIcon />} | |
</button> | |
</div> | |
); | |
}; | |
export default VoiceInputForm; | |