import React, { useState, useEffect, useRef, useCallback } from 'react'; import styles from './page.module.css'; import { useMicVAD } from "@ricky0123/vad-react"; import * as ort from "onnxruntime-web"; import MicIcon from '@mui/icons-material/Mic'; import StopIcon from '@mui/icons-material/Stop'; import { webmFixDuration } from './BlobFix'; import Progress from './progress'; import { useTranscriber } from "./hooks/useTranscriber"; ort.env.wasm.wasmPaths = "/_next/static/chunks/"; interface VoiceInputFormProps { handleSubmit: any; input: string; setInput: React.Dispatch>; } function getMimeType() { const types = [ "audio/webm", "audio/mp4", "audio/ogg", "audio/wav", "audio/aac", ]; for (let i = 0; i < types.length; i++) { if (MediaRecorder.isTypeSupported(types[i])) { return types[i]; } } return undefined; } const convertBlobToAudioBuffer = async (blob: Blob): Promise => { const audioContext = new AudioContext(); const arrayBuffer = await blob.arrayBuffer(); return await audioContext.decodeAudioData(arrayBuffer); }; const VoiceInputForm: React.FC = ({ handleSubmit, input, setInput }) => { const [recording, setRecording] = useState(false); const [duration, setDuration] = useState(0); const [recordedBlob, setRecordedBlob] = useState(null); const streamRef = useRef(null); const mediaRecorderRef = useRef(null); const chunksRef = useRef([]); const [recognizedText, setRecognizedText] = useState(''); const transcriber = useTranscriber(); const onFormSubmit = (e: React.FormEvent) => { e.preventDefault(); handleSubmit(input); // Assuming handleSubmit now takes the input as an argument }; const startListening = useCallback((audioData: any) => { transcriber.start(audioData); }, [transcriber]); useEffect(() => { if (transcriber.output) { setRecognizedText(transcriber.output.text); } }, [transcriber.output, transcriber.isBusy]); const handleTranscriptionComplete = () => { // Create a synthetic event object const syntheticEvent = { preventDefault: () => {}, target: { // Mimic the structure of your form's event.target here elements: { // Assuming the form has an input field named 'input' input: { value: recognizedText } } } }; handleSubmit(syntheticEvent); }; useEffect(() => { if (transcriber.isComplete) { handleTranscriptionComplete(); } }, [transcriber.isComplete]); useEffect(() => { if (recognizedText) { setInput(recognizedText); } }, [recognizedText, setInput]); useEffect(() => { const processRecording = async () => { if (recordedBlob) { // Process the blob for transcription const audioBuffer = await convertBlobToAudioBuffer(recordedBlob); startListening(audioBuffer); // Start the transcription process // Reset the blob state if you want to prepare for a new recording setRecordedBlob(null); } }; processRecording(); }, [recordedBlob, startListening]); const vad = useMicVAD({ modelURL: "/_next/static/chunks/silero_vad.onnx", workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js", startOnLoad: false, onSpeechEnd: async () => { if (recording) { await stopRecording(); // Stop the recording setRecording(!recording); // Update the recording state } }, }); const stopRecording = () => { if ( mediaRecorderRef.current && mediaRecorderRef.current.state === "recording" ) { mediaRecorderRef.current.stop(); // set state to inactive setDuration(0); setRecording(false); } }; const startRecording = async () => { // Reset recording (if any) setRecordedBlob(null); // @ts-ignore transcriber.start(); let startTime = Date.now(); try { if (!streamRef.current) { streamRef.current = await navigator.mediaDevices.getUserMedia({ audio: true, }); } const mimeType = getMimeType(); const mediaRecorder = new MediaRecorder(streamRef.current, { mimeType, }); mediaRecorderRef.current = mediaRecorder; mediaRecorder.addEventListener("dataavailable", async (event) => { if (event.data.size > 0) { chunksRef.current.push(event.data); } if (mediaRecorder.state === "inactive") { const duration = Date.now() - startTime; // Received a stop event let blob = new Blob(chunksRef.current, { type: mimeType }); if (mimeType === "audio/webm") { blob = await webmFixDuration(blob, duration, blob.type); } setRecordedBlob(blob); chunksRef.current = []; } }); mediaRecorder.start(); setRecording(true); } catch (error) { console.error("Error accessing microphone:", error); } }; useEffect(() => { let stream: MediaStream | null = null; if (recording) { const timer = setInterval(() => { setDuration((prevDuration) => prevDuration + 1); }, 1000); return () => { clearInterval(timer); }; } return () => { if (stream) { stream.getTracks().forEach((track) => track.stop()); } }; }, [recording]); const handleToggleRecording = () => { vad.start(); if (recording) { stopRecording(); } else { startRecording(); } }; return (
{transcriber.progressItems.length > 0 && (
{transcriber.progressItems.map((data) => (
))}
)}
setInput(e.target.value)} placeholder="Speak or type..." />
); }; export default VoiceInputForm;