File size: 5,364 Bytes
88cc829
 
 
 
 
 
 
c8758af
 
 
 
88cc829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8758af
 
 
 
 
 
 
 
 
88cc829
 
 
 
 
 
 
c8758af
7df6d3d
88cc829
7df6d3d
88cc829
7df6d3d
 
 
 
 
 
 
 
 
 
88cc829
 
 
7df6d3d
88cc829
 
 
 
 
7df6d3d
88cc829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8758af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88cc829
 
 
c8758af
 
 
 
 
 
 
88cc829
c8758af
 
88cc829
 
 
 
c8758af
88cc829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import React, { useState, useEffect, useRef } from 'react';
import MicIcon from '@mui/icons-material/Mic';
import StopIcon from '@mui/icons-material/Stop';
import styles from './page.module.css';

import useSpeechRecognition from './hooks/useSpeechRecognition';
import useAudioManager from './hooks/useAudioManager';
import { useMicVAD } from "@ricky0123/vad-react";

import * as ort from "onnxruntime-web";
ort.env.wasm.wasmPaths = "/_next/static/chunks/";

const getMimeType = (): string | null => {
    const types = ["audio/webm", "audio/mp4", "audio/ogg", "audio/wav", "audio/aac"];
    for (let type of types) {
        if (MediaRecorder.isTypeSupported(type)) {
            return type;
        }
    }
    return null;
};

interface VoiceInputFormProps {
    handleSubmit: any;
    input: string;
    setInput: React.Dispatch<React.SetStateAction<string>>;
}

const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => {
    const [isRecording, setIsRecording] = useState(false);
    const { startListening, stopListening, recognizedText } = useSpeechRecognition();
    const { setAudioFromRecording } = useAudioManager();

    const mediaRecorderRef = useRef<MediaRecorder | null>(null);
    const audioChunksRef = useRef<BlobPart[]>([]);

    const cleanupRecording = () => {
        if (mediaRecorderRef.current) {
            mediaRecorderRef.current.stream.getTracks().forEach(track => track.stop());
            mediaRecorderRef.current = null;
        }
        audioChunksRef.current = [];
    };


    useEffect(() => {
        if (recognizedText) {
            setInput(recognizedText);
        }
    }, [recognizedText, setInput]);

    const startRecording = async () => {
        cleanupRecording(); // Clean up any existing recording resources
    
        try {
            // Simplified constraints for broader compatibility
            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
            let recorderOptions = {};
    
            // Check if the mimeType is supported; if so, use it
            const mimeType = getMimeType();
            if (mimeType && MediaRecorder.isTypeSupported(mimeType)) {
                recorderOptions = { mimeType };
            }
    
            mediaRecorderRef.current = new MediaRecorder(stream, recorderOptions);
    
            mediaRecorderRef.current.ondataavailable = (event: BlobEvent) => {
                audioChunksRef.current.push(event.data);
            };
    
            mediaRecorderRef.current.start();
        } catch (err) {
            console.error("Error accessing media devices:", err);
        }
    };
    

    const stopRecording = async (): Promise<Blob> => {
        return new Promise((resolve, reject) => {
            const recorder = mediaRecorderRef.current;
            if (recorder && recorder.state === "recording") {
                recorder.onstop = () => {
                    const audioBlob = new Blob(audioChunksRef.current, { 'type': recorder.mimeType });
                    audioChunksRef.current = [];
                    resolve(audioBlob);
                };
                recorder.stop();
            } else {
                reject(new Error("MediaRecorder is not recording"));
            }
        });
    };

    const vad = useMicVAD({
        modelURL: "/_next/static/chunks/silero_vad.onnx",
        workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js",
        startOnLoad: false,
        onSpeechEnd: async (audio) => {
            console.log('hello??')
            if (isRecording) {
                stopListening();
                const recordedBlob = await stopRecording();
                setAudioFromRecording(recordedBlob);
                const audioBuffer = await convertBlobToAudioBuffer(recordedBlob);
                startListening(audioBuffer);
                setIsRecording(!isRecording);
            }
        },
    });


    const handleRecording = async () => {
        if (isRecording) {
            stopListening();
            const recordedBlob = await stopRecording();
            if (recordedBlob) {
                setAudioFromRecording(recordedBlob);
                const audioBuffer = await convertBlobToAudioBuffer(recordedBlob);
                startListening(audioBuffer);
            }
            cleanupRecording(); // Clean up resources after stopping recording
        } else {
            vad.toggle();
            await startRecording();
        }
        setIsRecording(!isRecording);
    };


    return (
        <div>
            <form onSubmit={handleSubmit} className={styles.form}>
                <input
                    type="text"
                    value={input}
                    className={styles.input}
                    onChange={(e) => setInput(e.target.value)}
                    placeholder="Speak or type..."
                />
            </form>
            <button onClick={handleRecording} className={styles.button}>
                {isRecording ? <StopIcon /> : <MicIcon />}
            </button>
        </div>
    );
};

const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => {
    const audioContext = new AudioContext();
    const arrayBuffer = await blob.arrayBuffer();
    return await audioContext.decodeAudioData(arrayBuffer);
};

export default VoiceInputForm;