qfuxa commited on
Commit
69bd3cb
·
1 Parent(s): b564d0f

src/web to web

Browse files
Files changed (1) hide show
  1. web/live_transcription.html +425 -0
web/live_transcription.html ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>Audio Transcription</title>
8
+ <style>
9
+ body {
10
+ font-family: 'Inter', sans-serif;
11
+ margin: 20px;
12
+ text-align: center;
13
+ }
14
+
15
+ #recordButton {
16
+ width: 80px;
17
+ height: 80px;
18
+ border: none;
19
+ border-radius: 50%;
20
+ background-color: white;
21
+ cursor: pointer;
22
+ transition: background-color 0.3s ease, transform 0.2s ease;
23
+ border: 1px solid rgb(252, 230, 229);
24
+ border-bottom: 5px solid rgb(252, 230, 229);
25
+ display: flex;
26
+ align-items: center;
27
+ justify-content: center;
28
+ }
29
+
30
+ #recordButton.recording {
31
+ background-color: rgba(255, 130, 123, 0.032);
32
+ border: 1px solid rgb(240, 198, 195);
33
+ border-bottom: 5px solid rgb(240, 198, 195);
34
+ color: white;
35
+ }
36
+
37
+ #recordButton:active {
38
+ transform: scale(0.95);
39
+ }
40
+
41
+ .shape-container {
42
+ width: 40px;
43
+ height: 40px;
44
+ display: flex;
45
+ align-items: center;
46
+ justify-content: center;
47
+ }
48
+
49
+ .shape {
50
+ width: 40px;
51
+ height: 40px;
52
+ background-color: rgb(209, 61, 53);
53
+ border-radius: 50%;
54
+ transition: border-radius 0.3s ease, background-color 0.3s ease;
55
+ }
56
+
57
+ #recordButton.recording .shape {
58
+ border-radius: 10px;
59
+ width: 30px;
60
+ height: 30px;
61
+
62
+ }
63
+
64
+ #status {
65
+ margin-top: 20px;
66
+ font-size: 16px;
67
+ color: #333;
68
+ }
69
+
70
+ .settings-container {
71
+ display: flex;
72
+ justify-content: center;
73
+ align-items: center;
74
+ gap: 15px;
75
+ margin-top: 20px;
76
+ }
77
+
78
+ .settings {
79
+ display: flex;
80
+ flex-direction: column;
81
+ align-items: flex-start;
82
+ gap: 5px;
83
+ }
84
+
85
+ #chunkSelector,
86
+ #websocketInput {
87
+ font-size: 16px;
88
+ padding: 5px;
89
+ border-radius: 5px;
90
+ border: 1px solid #ddd;
91
+ background-color: #ffffff;
92
+ max-height: 30px;
93
+ }
94
+
95
+ #websocketInput {
96
+ width: 200px;
97
+ }
98
+
99
+ #chunkSelector:focus,
100
+ #websocketInput:focus {
101
+ outline: none;
102
+ border-color: #007bff;
103
+ }
104
+
105
+ label {
106
+ font-size: 14px;
107
+ }
108
+
109
+ /* Speaker-labeled transcript area */
110
+ #linesTranscript {
111
+ margin: 20px auto;
112
+ max-width: 600px;
113
+ text-align: left;
114
+ font-size: 16px;
115
+ }
116
+
117
+ #linesTranscript p {
118
+ margin: 0px 0;
119
+ }
120
+
121
+ #linesTranscript strong {
122
+ color: #333;
123
+ }
124
+
125
+ #speaker {
126
+ background-color: rgb(252, 230, 229);
127
+ border-radius: 8px 8px 8px 0px;
128
+ padding: 2px 10px;
129
+ font-size: 14px;
130
+ margin-bottom: 0px;
131
+ }
132
+ .label_diarization {
133
+ background-color: #ffffff66;
134
+ border-radius: 8px 8px 8px 8px;
135
+ padding: 2px 10px;
136
+ margin-left: 10px;
137
+ font-size: 14px;
138
+ margin-bottom: 0px;
139
+ border-bottom: 3px solid rgb(155, 84, 84);
140
+ color: rgb(155, 84, 84)
141
+ }
142
+
143
+ .label_transcription {
144
+ background-color: #ffffff66;
145
+ border-radius: 8px 8px 8px 8px;
146
+ padding: 2px 10px;
147
+ margin-left: 10px;
148
+ font-size: 14px;
149
+ margin-bottom: 0px;
150
+ border-bottom: 3px solid #8825255c;
151
+ color: #8825255c
152
+ }
153
+
154
+ #timeInfo {
155
+ color: #666;
156
+ margin-left: 10px;
157
+ }
158
+
159
+ .textcontent {
160
+ font-size: 16px;
161
+ /* margin-left: 10px; */
162
+ padding-left: 10px;
163
+ border-left: 3px solid rgb(252, 230, 229);
164
+ margin-bottom: 10px;
165
+ margin-top: 1px;
166
+ padding-top: 5px;
167
+ border-radius: 0px 0px 0px 10px;
168
+ }
169
+
170
+ .buffer_diarization {
171
+ color: rgb(155, 84, 84);
172
+ margin-left: 4px;
173
+ }
174
+
175
+ .buffer_transcription {
176
+ color: #8825255c;
177
+ margin-left: 4px;
178
+ }
179
+
180
+
181
+ .spinner {
182
+ display: inline-block;
183
+ width: 8px;
184
+ height: 8px;
185
+ border: 2px solid #8825255c;
186
+ border-top: 2px solid #882525e5;
187
+ border-radius: 50%;
188
+ animation: spin 0.6s linear infinite;
189
+ vertical-align: middle;
190
+ margin-bottom: 2px;
191
+ margin-right: 5px;
192
+ }
193
+
194
+ @keyframes spin {
195
+ to {
196
+ transform: rotate(360deg);
197
+ }
198
+ }
199
+
200
+ .silence {
201
+ color: #666;
202
+ background-color: #f3f3f3;
203
+ font-size: 13px;
204
+ border-radius: 30px;
205
+ padding: 2px 10px;
206
+ }
207
+
208
+ .loading {
209
+ color: #666;
210
+ background-color: #ff4d4d0f;
211
+ border-radius: 8px 8px 8px 0px;
212
+ padding: 2px 10px;
213
+ font-size: 14px;
214
+ margin-bottom: 0px;
215
+
216
+ }
217
+ </style>
218
+ </head>
219
+
220
+ <body>
221
+
222
+ <div class="settings-container">
223
+ <button id="recordButton">
224
+ <div class="shape-container">
225
+ <div class="shape"></div>
226
+ </div>
227
+ </button>
228
+ <div class="settings">
229
+ <div>
230
+ <label for="chunkSelector">Chunk size (ms):</label>
231
+ <select id="chunkSelector">
232
+ <option value="500">500 ms</option>
233
+ <option value="1000" selected>1000 ms</option>
234
+ <option value="2000">2000 ms</option>
235
+ <option value="3000">3000 ms</option>
236
+ <option value="4000">4000 ms</option>
237
+ <option value="5000">5000 ms</option>
238
+ </select>
239
+ </div>
240
+ <div>
241
+ <label for="websocketInput">WebSocket URL:</label>
242
+ <input id="websocketInput" type="text" value="ws://localhost:8000/asr" />
243
+ </div>
244
+ </div>
245
+ </div>
246
+
247
+ <p id="status"></p>
248
+
249
+ <!-- Speaker-labeled transcript -->
250
+ <div id="linesTranscript"></div>
251
+
252
+ <script>
253
+ let isRecording = false;
254
+ let websocket = null;
255
+ let recorder = null;
256
+ let chunkDuration = 1000;
257
+ let websocketUrl = "ws://localhost:8000/asr";
258
+ let userClosing = false;
259
+
260
+ const statusText = document.getElementById("status");
261
+ const recordButton = document.getElementById("recordButton");
262
+ const chunkSelector = document.getElementById("chunkSelector");
263
+ const websocketInput = document.getElementById("websocketInput");
264
+ const linesTranscriptDiv = document.getElementById("linesTranscript");
265
+
266
+ chunkSelector.addEventListener("change", () => {
267
+ chunkDuration = parseInt(chunkSelector.value);
268
+ });
269
+
270
+ websocketInput.addEventListener("change", () => {
271
+ const urlValue = websocketInput.value.trim();
272
+ if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
273
+ statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
274
+ return;
275
+ }
276
+ websocketUrl = urlValue;
277
+ statusText.textContent = "WebSocket URL updated. Ready to connect.";
278
+ });
279
+
280
+ function setupWebSocket() {
281
+ return new Promise((resolve, reject) => {
282
+ try {
283
+ websocket = new WebSocket(websocketUrl);
284
+ } catch (error) {
285
+ statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
286
+ reject(error);
287
+ return;
288
+ }
289
+
290
+ websocket.onopen = () => {
291
+ statusText.textContent = "Connected to server.";
292
+ resolve();
293
+ };
294
+
295
+ websocket.onclose = () => {
296
+ if (userClosing) {
297
+ statusText.textContent = "WebSocket closed by user.";
298
+ } else {
299
+ statusText.textContent =
300
+ "Disconnected from the WebSocket server. (Check logs if model is loading.)";
301
+ }
302
+ userClosing = false;
303
+ };
304
+
305
+ websocket.onerror = () => {
306
+ statusText.textContent = "Error connecting to WebSocket.";
307
+ reject(new Error("Error connecting to WebSocket"));
308
+ };
309
+
310
+ // Handle messages from server
311
+ websocket.onmessage = (event) => {
312
+ const data = JSON.parse(event.data);
313
+
314
+ const {
315
+ lines = [],
316
+ buffer_transcription = "",
317
+ buffer_diarization = "",
318
+ remaining_time_transcription = 0,
319
+ remaining_time_diarization = 0
320
+ } = data;
321
+
322
+ renderLinesWithBuffer(
323
+ lines,
324
+ buffer_diarization,
325
+ buffer_transcription,
326
+ remaining_time_diarization,
327
+ remaining_time_transcription
328
+ );
329
+ };
330
+ });
331
+ }
332
+
333
+ function renderLinesWithBuffer(lines, buffer_diarization, buffer_transcription, remaining_time_diarization, remaining_time_transcription) {
334
+ const linesHtml = lines.map((item, idx) => {
335
+ let timeInfo = "";
336
+ if (item.beg !== undefined && item.end !== undefined) {
337
+ timeInfo = ` ${item.beg} - ${item.end}`;
338
+ }
339
+
340
+ let speakerLabel = "";
341
+ if (item.speaker === -2) {
342
+ speakerLabel = `<span class="silence">Silence<span id='timeInfo'>${timeInfo}</span></span>`;
343
+ } else if (item.speaker == 0) {
344
+ speakerLabel = `<span class='loading'><span class="spinner"></span><span id='timeInfo'>${remaining_time_diarization} second(s) of audio are undergoing diarization</span></span>`;
345
+ } else if (item.speaker == -1) {
346
+ speakerLabel = `<span id="speaker"><span id='timeInfo'>${timeInfo}</span></span>`;
347
+ } else if (item.speaker !== -1) {
348
+ speakerLabel = `<span id="speaker">Speaker ${item.speaker}<span id='timeInfo'>${timeInfo}</span></span>`;
349
+ }
350
+
351
+ let textContent = item.text;
352
+ if (idx === lines.length - 1 && buffer_diarization) {
353
+ speakerLabel += `<span class="label_diarization"><span class="spinner"></span>Diarization lag<span id='timeInfo'>${remaining_time_diarization}s</span></span>`
354
+ textContent += `<span class="buffer_diarization">${buffer_diarization}</span>`;
355
+ }
356
+ if (idx === lines.length - 1 && buffer_transcription) {
357
+ speakerLabel += `<span class="label_transcription"><span class="spinner"></span>Transcription lag <span id='timeInfo'>${remaining_time_transcription}s</span></span>`
358
+ textContent += `<span class="buffer_transcription">${buffer_transcription}</span>`;
359
+ }
360
+
361
+ return textContent
362
+ ? `<p>${speakerLabel}<br/><div class='textcontent'>${textContent}</div></p>`
363
+ : `<p>${speakerLabel}<br/></p>`;
364
+ }).join("");
365
+
366
+ linesTranscriptDiv.innerHTML = linesHtml;
367
+ }
368
+
369
+ async function startRecording() {
370
+ try {
371
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
372
+ recorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
373
+ recorder.ondataavailable = (e) => {
374
+ if (websocket && websocket.readyState === WebSocket.OPEN) {
375
+ websocket.send(e.data);
376
+ }
377
+ };
378
+ recorder.start(chunkDuration);
379
+ isRecording = true;
380
+ updateUI();
381
+ } catch (err) {
382
+ statusText.textContent = "Error accessing microphone. Please allow microphone access.";
383
+ }
384
+ }
385
+
386
+ function stopRecording() {
387
+ userClosing = true;
388
+ if (recorder) {
389
+ recorder.stop();
390
+ recorder = null;
391
+ }
392
+ isRecording = false;
393
+
394
+ if (websocket) {
395
+ websocket.close();
396
+ websocket = null;
397
+ }
398
+
399
+ updateUI();
400
+ }
401
+
402
+ async function toggleRecording() {
403
+ if (!isRecording) {
404
+ linesTranscriptDiv.innerHTML = "";
405
+ try {
406
+ await setupWebSocket();
407
+ await startRecording();
408
+ } catch (err) {
409
+ statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
410
+ }
411
+ } else {
412
+ stopRecording();
413
+ }
414
+ }
415
+
416
+ function updateUI() {
417
+ recordButton.classList.toggle("recording", isRecording);
418
+ statusText.textContent = isRecording ? "Recording..." : "Click to start transcription";
419
+ }
420
+
421
+ recordButton.addEventListener("click", toggleRecording);
422
+ </script>
423
+ </body>
424
+
425
+ </html>