matt HOFFNER
updates
18648c5
raw
history blame
7.38 kB
import React, { useState, useEffect, useRef, useCallback } from 'react';
import styles from './page.module.css';
import { useMicVAD } from "@ricky0123/vad-react";
import * as ort from "onnxruntime-web";
import MicIcon from '@mui/icons-material/Mic';
import StopIcon from '@mui/icons-material/Stop';
import { webmFixDuration } from './BlobFix';
import Progress from './progress';
import { useTranscriber } from "./hooks/useTranscriber";
import constants from './constants';
ort.env.wasm.wasmPaths = "/_next/static/chunks/";
interface VoiceInputFormProps {
handleSubmit: any;
input: string;
setInput: React.Dispatch<React.SetStateAction<string>>;
}
function getMimeType() {
const types = [
"audio/webm",
"audio/mp4",
"audio/ogg",
"audio/wav",
"audio/aac",
];
for (let i = 0; i < types.length; i++) {
if (MediaRecorder.isTypeSupported(types[i])) {
return types[i];
}
}
return undefined;
}
const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => {
const audioContext = new AudioContext({
sampleRate: constants.SAMPLING_RATE,
});
const arrayBuffer = await blob.arrayBuffer();
return await audioContext.decodeAudioData(arrayBuffer);
};
const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => {
const [recording, setRecording] = useState(false);
const [duration, setDuration] = useState(0);
const [recordedBlob, setRecordedBlob] = useState<Blob | null>(null);
const streamRef = useRef<MediaStream | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
const [recognizedText, setRecognizedText] = useState('');
const transcriber = useTranscriber();
const startListening = useCallback((audioData: any) => {
transcriber.start(audioData);
}, [transcriber]);
useEffect(() => {
if (transcriber.output) {
setRecognizedText(transcriber.output.text);
}
}, [transcriber]);
const handleTranscriptionComplete = () => {
const syntheticEvent = {
preventDefault: () => {},
target: {
elements: {
input: {
value: recognizedText
}
}
}
};
handleSubmit(syntheticEvent);
};
useEffect(() => {
if (transcriber.isComplete) {
handleTranscriptionComplete();
}
}, [transcriber]);
useEffect(() => {
if (recognizedText) {
setInput(recognizedText);
}
}, [recognizedText, setInput]);
useEffect(() => {
const processRecording = async () => {
if (recordedBlob) {
const audioBuffer = await convertBlobToAudioBuffer(recordedBlob);
startListening(audioBuffer); // Start the transcription process
setRecordedBlob(null); // Reset the blob state if you want to prepare for a new recording
}
};
processRecording();
}, [recordedBlob, startListening]);
const vad = useMicVAD({
modelURL: "/_next/static/chunks/silero_vad.onnx",
workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js",
startOnLoad: false,
onSpeechEnd: async () => {
if (recording) {
await stopRecording(); // Stop the recording
setRecording(!recording); // Update the recording state
}
},
});
const stopRecording = () => {
if (
mediaRecorderRef.current &&
mediaRecorderRef.current.state === "recording"
) {
mediaRecorderRef.current.stop(); // set state to inactive
setDuration(0);
setRecording(false);
}
};
const startRecording = async () => {
// Reset recording (if any)
setRecordedBlob(null);
// @ts-ignore
transcriber.start();
let startTime = Date.now();
try {
if (!streamRef.current) {
streamRef.current = await navigator.mediaDevices.getUserMedia({
audio: true,
});
}
const mimeType = getMimeType();
const mediaRecorder = new MediaRecorder(streamRef.current, {
mimeType,
});
mediaRecorderRef.current = mediaRecorder;
mediaRecorder.addEventListener("dataavailable", async (event) => {
if (event.data.size > 0) {
chunksRef.current.push(event.data);
}
if (mediaRecorder.state === "inactive") {
const duration = Date.now() - startTime;
// Received a stop event
let blob = new Blob(chunksRef.current, { type: mimeType });
if (mimeType === "audio/webm") {
blob = await webmFixDuration(blob, duration, blob.type);
}
setRecordedBlob(blob);
chunksRef.current = [];
}
});
mediaRecorder.start();
setRecording(true);
} catch (error) {
console.error("Error accessing microphone:", error);
}
};
useEffect(() => {
let stream: MediaStream | null = null;
if (recording) {
const timer = setInterval(() => {
setDuration((prevDuration) => prevDuration + 1);
}, 1000);
return () => {
clearInterval(timer);
};
}
return () => {
if (stream) {
stream.getTracks().forEach((track) => track.stop());
}
};
}, [recording]);
const handleToggleRecording = () => {
vad.start();
if (recording) {
stopRecording();
} else {
startRecording();
}
};
return (
<div className={styles.inputContainer}>
{transcriber.progressItems.length > 0 && (
<div>
<label>
Loading model files... (only run once)
</label>
{transcriber.progressItems.map((data) => (
<div key={data.file}>
<Progress
text={data.file}
percentage={data.progress}
/>
</div>
))}
</div>
)}
<form onSubmit={handleSubmit} className={styles.form}>
<input
type="text"
value={input}
className={styles.input}
onChange={(e) => setInput(e.target.value)}
placeholder="Speak or type..."
/>
</form>
<button
type='button'
className={styles.button}
onClick={handleToggleRecording}
>
{recording ? <StopIcon /> : <MicIcon />}
</button>
</div>
);
};
export default VoiceInputForm;