matt HOFFNER
commited on
Commit
·
20635bb
1
Parent(s):
a74fc7e
browser testing
Browse files- app/hooks/useSpeechRecognition.ts +0 -29
- app/input.tsx +36 -12
- app/progress.tsx +19 -0
app/hooks/useSpeechRecognition.ts
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
"use client";
|
| 2 |
-
|
| 3 |
-
import { useState, useEffect, useCallback } from "react";
|
| 4 |
-
import { useTranscriber } from "./useTranscriber";
|
| 5 |
-
|
| 6 |
-
const useSpeechRecognition = () => {
|
| 7 |
-
const [recognizedText, setRecognizedText] = useState('');
|
| 8 |
-
const transcriber = useTranscriber();
|
| 9 |
-
|
| 10 |
-
const startListening = useCallback((audioData: any) => {
|
| 11 |
-
if (!transcriber.isBusy && !transcriber.isModelLoading) {
|
| 12 |
-
transcriber.start(audioData);
|
| 13 |
-
}
|
| 14 |
-
}, [transcriber]);
|
| 15 |
-
|
| 16 |
-
const stopListening = useCallback(() => {
|
| 17 |
-
console.log("Stopped listening...", recognizedText);
|
| 18 |
-
}, [recognizedText]); // Updated dependency array
|
| 19 |
-
|
| 20 |
-
useEffect(() => {
|
| 21 |
-
if (transcriber.output && !transcriber.isBusy) {
|
| 22 |
-
setRecognizedText(transcriber.output.text);
|
| 23 |
-
}
|
| 24 |
-
}, [transcriber.output, transcriber.isBusy]);
|
| 25 |
-
|
| 26 |
-
return { startListening, stopListening, recognizedText };
|
| 27 |
-
};
|
| 28 |
-
|
| 29 |
-
export default useSpeechRecognition;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/input.tsx
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
-
import React, { useState, useEffect, useRef } from 'react';
|
| 2 |
import styles from './page.module.css';
|
| 3 |
-
import useSpeechRecognition from './hooks/useSpeechRecognition';
|
| 4 |
import { useMicVAD } from "@ricky0123/vad-react";
|
| 5 |
import * as ort from "onnxruntime-web";
|
| 6 |
import MicIcon from '@mui/icons-material/Mic';
|
| 7 |
import StopIcon from '@mui/icons-material/Stop';
|
| 8 |
import { webmFixDuration } from './BlobFix';
|
|
|
|
|
|
|
| 9 |
|
| 10 |
ort.env.wasm.wasmPaths = "/_next/static/chunks/";
|
| 11 |
|
|
@@ -46,9 +47,20 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
| 46 |
const streamRef = useRef<MediaStream | null>(null);
|
| 47 |
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
| 48 |
const chunksRef = useRef<Blob[]>([]);
|
| 49 |
-
const
|
|
|
|
| 50 |
|
| 51 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
useEffect(() => {
|
| 54 |
if (recognizedText) {
|
|
@@ -69,7 +81,7 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
| 69 |
};
|
| 70 |
|
| 71 |
processRecording();
|
| 72 |
-
}, [recordedBlob, startListening]);
|
| 73 |
|
| 74 |
const vad = useMicVAD({
|
| 75 |
modelURL: "/_next/static/chunks/silero_vad.onnx",
|
|
@@ -77,10 +89,7 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
| 77 |
startOnLoad: false,
|
| 78 |
onSpeechEnd: async () => {
|
| 79 |
if (recording) {
|
| 80 |
-
await stopRecording(); // Stop the recording
|
| 81 |
-
|
| 82 |
-
console.log('input', input);
|
| 83 |
-
|
| 84 |
setRecording(!recording); // Update the recording state
|
| 85 |
}
|
| 86 |
},
|
|
@@ -94,17 +103,17 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
| 94 |
mediaRecorderRef.current.stop(); // set state to inactive
|
| 95 |
setDuration(0);
|
| 96 |
setRecording(false);
|
| 97 |
-
vad.toggle();
|
| 98 |
}
|
| 99 |
};
|
| 100 |
|
| 101 |
const startRecording = async () => {
|
| 102 |
// Reset recording (if any)
|
| 103 |
setRecordedBlob(null);
|
| 104 |
-
vad.toggle();
|
| 105 |
|
| 106 |
let startTime = Date.now();
|
| 107 |
|
|
|
|
|
|
|
| 108 |
try {
|
| 109 |
if (!streamRef.current) {
|
| 110 |
streamRef.current = await navigator.mediaDevices.getUserMedia({
|
|
@@ -175,6 +184,21 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
| 175 |
|
| 176 |
return (
|
| 177 |
<div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
<form onSubmit={handleSubmit} className={styles.form}>
|
| 179 |
<input
|
| 180 |
type="text"
|
|
@@ -190,7 +214,7 @@ const VoiceInputForm: React.FC<VoiceInputFormProps> = ({ handleSubmit, input, se
|
|
| 190 |
onClick={handleToggleRecording}
|
| 191 |
>
|
| 192 |
{recording ? <StopIcon /> : <MicIcon />}
|
| 193 |
-
</button>
|
| 194 |
</div>
|
| 195 |
);
|
| 196 |
};
|
|
|
|
| 1 |
+
import React, { useState, useEffect, useRef, useCallback } from 'react';
|
| 2 |
import styles from './page.module.css';
|
|
|
|
| 3 |
import { useMicVAD } from "@ricky0123/vad-react";
|
| 4 |
import * as ort from "onnxruntime-web";
|
| 5 |
import MicIcon from '@mui/icons-material/Mic';
|
| 6 |
import StopIcon from '@mui/icons-material/Stop';
|
| 7 |
import { webmFixDuration } from './BlobFix';
|
| 8 |
+
import Progress from './progress';
|
| 9 |
+
import { useTranscriber } from "./hooks/useTranscriber";
|
| 10 |
|
| 11 |
ort.env.wasm.wasmPaths = "/_next/static/chunks/";
|
| 12 |
|
|
|
|
| 47 |
const streamRef = useRef<MediaStream | null>(null);
|
| 48 |
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
| 49 |
const chunksRef = useRef<Blob[]>([]);
|
| 50 |
+
const [recognizedText, setRecognizedText] = useState('');
|
| 51 |
+
const transcriber = useTranscriber();
|
| 52 |
|
| 53 |
+
const startListening = useCallback((audioData: any) => {
|
| 54 |
+
if (!transcriber.isBusy && !transcriber.isModelLoading) {
|
| 55 |
+
transcriber.start(audioData);
|
| 56 |
+
}
|
| 57 |
+
}, [transcriber]);
|
| 58 |
+
|
| 59 |
+
useEffect(() => {
|
| 60 |
+
if (transcriber.output) {
|
| 61 |
+
setRecognizedText(transcriber.output.text);
|
| 62 |
+
}
|
| 63 |
+
}, [transcriber.output, transcriber.isBusy]);
|
| 64 |
|
| 65 |
useEffect(() => {
|
| 66 |
if (recognizedText) {
|
|
|
|
| 81 |
};
|
| 82 |
|
| 83 |
processRecording();
|
| 84 |
+
}, [recording, recordedBlob, startListening]);
|
| 85 |
|
| 86 |
const vad = useMicVAD({
|
| 87 |
modelURL: "/_next/static/chunks/silero_vad.onnx",
|
|
|
|
| 89 |
startOnLoad: false,
|
| 90 |
onSpeechEnd: async () => {
|
| 91 |
if (recording) {
|
| 92 |
+
await stopRecording(); // Stop the recording
|
|
|
|
|
|
|
|
|
|
| 93 |
setRecording(!recording); // Update the recording state
|
| 94 |
}
|
| 95 |
},
|
|
|
|
| 103 |
mediaRecorderRef.current.stop(); // set state to inactive
|
| 104 |
setDuration(0);
|
| 105 |
setRecording(false);
|
|
|
|
| 106 |
}
|
| 107 |
};
|
| 108 |
|
| 109 |
const startRecording = async () => {
|
| 110 |
// Reset recording (if any)
|
| 111 |
setRecordedBlob(null);
|
|
|
|
| 112 |
|
| 113 |
let startTime = Date.now();
|
| 114 |
|
| 115 |
+
vad.start();
|
| 116 |
+
|
| 117 |
try {
|
| 118 |
if (!streamRef.current) {
|
| 119 |
streamRef.current = await navigator.mediaDevices.getUserMedia({
|
|
|
|
| 184 |
|
| 185 |
return (
|
| 186 |
<div>
|
| 187 |
+
{transcriber.progressItems.length > 0 && (
|
| 188 |
+
<div>
|
| 189 |
+
<label>
|
| 190 |
+
Loading model files... (only run once)
|
| 191 |
+
</label>
|
| 192 |
+
{transcriber.progressItems.map((data) => (
|
| 193 |
+
<div key={data.file}>
|
| 194 |
+
<Progress
|
| 195 |
+
text={data.file}
|
| 196 |
+
percentage={data.progress}
|
| 197 |
+
/>
|
| 198 |
+
</div>
|
| 199 |
+
))}
|
| 200 |
+
</div>
|
| 201 |
+
)}
|
| 202 |
<form onSubmit={handleSubmit} className={styles.form}>
|
| 203 |
<input
|
| 204 |
type="text"
|
|
|
|
| 214 |
onClick={handleToggleRecording}
|
| 215 |
>
|
| 216 |
{recording ? <StopIcon /> : <MicIcon />}
|
| 217 |
+
</button>
|
| 218 |
</div>
|
| 219 |
);
|
| 220 |
};
|
app/progress.tsx
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export default function Progress({
|
| 2 |
+
text,
|
| 3 |
+
percentage,
|
| 4 |
+
}: {
|
| 5 |
+
text: string;
|
| 6 |
+
percentage: number;
|
| 7 |
+
}) {
|
| 8 |
+
percentage = percentage ?? 0;
|
| 9 |
+
return (
|
| 10 |
+
<div className='mt-0.5 w-full relative text-sm text-white background-bg-cyan-400 bg-gray-200 border-1 border-gray-400 rounded-lg text-left overflow-hidden'>
|
| 11 |
+
<div
|
| 12 |
+
className='top-0 h-full bg-blue-500 whitespace-nowrap px-2'
|
| 13 |
+
style={{ width: `${percentage}%` }}
|
| 14 |
+
>
|
| 15 |
+
{text} ({`${percentage.toFixed(2)}%`})
|
| 16 |
+
</div>
|
| 17 |
+
</div>
|
| 18 |
+
);
|
| 19 |
+
}
|