Spaces:

matthoffner
/

chat-with-internet

Sleeping

File size: 5,364 Bytes

import React, { useState, useEffect, useRef } from 'react';
import MicIcon from '@mui/icons-material/Mic';
import StopIcon from '@mui/icons-material/Stop';
import styles from './page.module.css';

import useSpeechRecognition from './hooks/useSpeechRecognition';
import useAudioManager from './hooks/useAudioManager';
import { useMicVAD } from "@ricky0123/vad-react";

import * as ort from "onnxruntime-web";
ort.env.wasm.wasmPaths = "/_next/static/chunks/";

const getMimeType = (): string | null => {
    const types = ["audio/webm", "audio/mp4", "audio/ogg", "audio/wav", "audio/aac"];
    for (let type of types) {
        if (MediaRecorder.isTypeSupported(type)) {
            return type;
        }
    }
    return null;
};

interface VoiceInputFormProps {
    handleSubmit: any;
    input: string;
    setInput: React.Dispatch<React.SetStateAction<string>>;
}

const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => {
    const [isRecording, setIsRecording] = useState(false);
    const { startListening, stopListening, recognizedText } = useSpeechRecognition();
    const { setAudioFromRecording } = useAudioManager();

    const mediaRecorderRef = useRef<MediaRecorder | null>(null);
    const audioChunksRef = useRef<BlobPart[]>([]);

    const cleanupRecording = () => {
        if (mediaRecorderRef.current) {
            mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop());
            mediaRecorderRef.current = null;
        }
        audioChunksRef.current = [];
    };


    useEffect(() => {
        if (recognizedText) {
            setInput(recognizedText);
        }
    }, [recognizedText, setInput]);

    const startRecording = async () => {
        cleanupRecording(); // Clean up any existing recording resources
    
        try {
            // Simplified constraints for broader compatibility
            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
            let recorderOptions = {};
    
            // Check if the mimeType is supported; if so, use it
            const mimeType = getMimeType();
            if (mimeType && MediaRecorder.isTypeSupported(mimeType)) {
                recorderOptions = { mimeType };
            }
    
            mediaRecorderRef.current = new MediaRecorder(stream, recorderOptions);
    
            mediaRecorderRef.current.ondataavailable = (event: BlobEvent) => {
                audioChunksRef.current.push(event.data);
            };
    
            mediaRecorderRef.current.start();
        } catch (err) {
            console.error("Error accessing media devices:", err);
        }
    };
    

    const stopRecording = async (): Promise<Blob> => {
        return new Promise((resolve, reject) => {
            const recorder = mediaRecorderRef.current;
            if (recorder && recorder.state === "recording") {
                recorder.onstop = () => {
                    const audioBlob = new Blob(audioChunksRef.current, { 'type': recorder.mimeType });
                    audioChunksRef.current = [];
                    resolve(audioBlob);
                };
                recorder.stop();
            } else {
                reject(new Error("MediaRecorder is not recording"));
            }
        });
    };

    const vad = useMicVAD({
        modelURL: "/_next/static/chunks/silero_vad.onnx",
        workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js",
        startOnLoad: false,
        onSpeechEnd: async (audio) => {
            console.log('hello??')
            if (isRecording) {
                stopListening();
                const recordedBlob = await stopRecording();
                setAudioFromRecording(recordedBlob);
                const audioBuffer = await convertBlobToAudioBuffer(recordedBlob);
                startListening(audioBuffer);
                setIsRecording(!isRecording);
            }
        },
    });


    const handleRecording = async () => {
        if (isRecording) {
            stopListening();
            const recordedBlob = await stopRecording();
            if (recordedBlob) {
                setAudioFromRecording(recordedBlob);
                const audioBuffer = await convertBlobToAudioBuffer(recordedBlob);
                startListening(audioBuffer);
            }
            cleanupRecording(); // Clean up resources after stopping recording
        } else {
            vad.toggle();
            await startRecording();
        }
        setIsRecording(!isRecording);
    };


    return (
        <div>
            <form onSubmit={handleSubmit} className={styles.form}>
                <input
                    type="text"
                    value={input}
                    className={styles.input}
                    onChange={(e) => setInput(e.target.value)}
                    placeholder="Speak or type..."
                />
            </form>
            <button onClick={handleRecording} className={styles.button}>
                {isRecording ? <StopIcon /> : <MicIcon />}
            </button>
        </div>
    );
};

const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => {
    const audioContext = new AudioContext();
    const arrayBuffer = await blob.arrayBuffer();
    return await audioContext.decodeAudioData(arrayBuffer);
};

export default VoiceInputForm;