Spaces:

matthoffner
/

chat-with-internet

Paused

matt HOFFNER

updates

18648c5 12 months ago

7.38 kB

	import React, { useState, useEffect, useRef, useCallback } from 'react';
	import styles from './page.module.css';
	import { useMicVAD } from "@ricky0123/vad-react";
	import * as ort from "onnxruntime-web";
	import MicIcon from '@mui/icons-material/Mic';
	import StopIcon from '@mui/icons-material/Stop';
	import { webmFixDuration } from './BlobFix';
	import Progress from './progress';
	import { useTranscriber } from "./hooks/useTranscriber";
	import constants from './constants';

	ort.env.wasm.wasmPaths = "/_next/static/chunks/";

	interface VoiceInputFormProps {
	handleSubmit: any;
	input: string;
	setInput: React.Dispatch<React.SetStateAction<string>>;
	}

	function getMimeType() {
	const types = [
	"audio/webm",
	"audio/mp4",
	"audio/ogg",
	"audio/wav",
	"audio/aac",
	];
	for (let i = 0; i < types.length; i++) {
	if (MediaRecorder.isTypeSupported(types[i])) {
	return types[i];
	}
	}
	return undefined;
	}

	const convertBlobToAudioBuffer = async (blob: Blob): Promise<AudioBuffer> => {
	const audioContext = new AudioContext({
	sampleRate: constants.SAMPLING_RATE,
	});
	const arrayBuffer = await blob.arrayBuffer();
	return await audioContext.decodeAudioData(arrayBuffer);
	};


	const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, setInput }) => {
	const [recording, setRecording] = useState(false);
	const [duration, setDuration] = useState(0);
	const [recordedBlob, setRecordedBlob] = useState<Blob \| null>(null);

	const streamRef = useRef<MediaStream \| null>(null);
	const mediaRecorderRef = useRef<MediaRecorder \| null>(null);
	const chunksRef = useRef<Blob[]>([]);
	const [recognizedText, setRecognizedText] = useState('');
	const transcriber = useTranscriber();

	const startListening = useCallback((audioData: any) => {
	transcriber.start(audioData);
	}, [transcriber]);

	useEffect(() => {
	if (transcriber.output) {
	setRecognizedText(transcriber.output.text);
	}
	}, [transcriber]);

	const handleTranscriptionComplete = () => {
	const syntheticEvent = {
	preventDefault: () => {},
	target: {
	elements: {
	input: {
	value: recognizedText
	}
	}
	}
	};

	handleSubmit(syntheticEvent);
	};

	useEffect(() => {
	if (transcriber.isComplete) {
	handleTranscriptionComplete();
	}
	}, [transcriber]);

	useEffect(() => {
	if (recognizedText) {
	setInput(recognizedText);
	}
	}, [recognizedText, setInput]);

	useEffect(() => {
	const processRecording = async () => {
	if (recordedBlob) {
	const audioBuffer = await convertBlobToAudioBuffer(recordedBlob);
	startListening(audioBuffer); // Start the transcription process
	setRecordedBlob(null); // Reset the blob state if you want to prepare for a new recording
	}
	};

	processRecording();
	}, [recordedBlob, startListening]);


	const vad = useMicVAD({
	modelURL: "/_next/static/chunks/silero_vad.onnx",
	workletURL: "/_next/static/chunks/vad.worklet.bundle.min.js",
	startOnLoad: false,
	onSpeechEnd: async () => {
	if (recording) {
	await stopRecording(); // Stop the recording
	setRecording(!recording); // Update the recording state
	}
	},
	});

	const stopRecording = () => {
	if (
	mediaRecorderRef.current &&
	mediaRecorderRef.current.state === "recording"
	) {
	mediaRecorderRef.current.stop(); // set state to inactive
	setDuration(0);
	setRecording(false);
	}
	};

	const startRecording = async () => {
	// Reset recording (if any)
	setRecordedBlob(null);
	// @ts-ignore
	transcriber.start();

	let startTime = Date.now();

	try {
	if (!streamRef.current) {
	streamRef.current = await navigator.mediaDevices.getUserMedia({
	audio: true,
	});
	}

	const mimeType = getMimeType();
	const mediaRecorder = new MediaRecorder(streamRef.current, {
	mimeType,
	});

	mediaRecorderRef.current = mediaRecorder;

	mediaRecorder.addEventListener("dataavailable", async (event) => {
	if (event.data.size > 0) {
	chunksRef.current.push(event.data);
	}
	if (mediaRecorder.state === "inactive") {
	const duration = Date.now() - startTime;

	// Received a stop event
	let blob = new Blob(chunksRef.current, { type: mimeType });

	if (mimeType === "audio/webm") {
	blob = await webmFixDuration(blob, duration, blob.type);
	}

	setRecordedBlob(blob);

	chunksRef.current = [];
	}
	});
	mediaRecorder.start();
	setRecording(true);
	} catch (error) {
	console.error("Error accessing microphone:", error);
	}
	};

	useEffect(() => {
	let stream: MediaStream \| null = null;

	if (recording) {
	const timer = setInterval(() => {
	setDuration((prevDuration) => prevDuration + 1);
	}, 1000);

	return () => {
	clearInterval(timer);
	};
	}

	return () => {
	if (stream) {
	stream.getTracks().forEach((track) => track.stop());
	}
	};
	}, [recording]);

	const handleToggleRecording = () => {
	vad.start();
	if (recording) {
	stopRecording();
	} else {
	startRecording();
	}
	};

	return (
	<div className={styles.inputContainer}>
	{transcriber.progressItems.length > 0 && (
	<div>
	<label>
	Loading model files... (only run once)
	</label>
	{transcriber.progressItems.map((data) => (
	<div key={data.file}>
	<Progress
	text={data.file}
	percentage={data.progress}
	/>
	</div>
	))}
	</div>
	)}
	<form onSubmit={handleSubmit} className={styles.form}>
	<input
	type="text"
	value={input}
	className={styles.input}
	onChange={(e) => setInput(e.target.value)}
	placeholder="Speak or type..."
	/>
	</form>
	<button
	type='button'
	className={styles.button}
	onClick={handleToggleRecording}
	>
	{recording ? <StopIcon /> : <MicIcon />}
	</button>
	</div>
	);
	};


	export default VoiceInputForm;