first speaker is "0" no more None
Browse files
src/diarization/diarization_online.py
CHANGED
|
@@ -57,9 +57,10 @@ def init_diart(SAMPLE_RATE):
|
|
| 57 |
l_speakers = []
|
| 58 |
annotation, audio = result
|
| 59 |
for speaker in annotation._labels:
|
| 60 |
-
|
|
|
|
| 61 |
asyncio.create_task(
|
| 62 |
-
l_speakers_queue.put({"speaker": speaker, "
|
| 63 |
)
|
| 64 |
|
| 65 |
l_speakers_queue = asyncio.Queue()
|
|
@@ -74,13 +75,36 @@ def init_diart(SAMPLE_RATE):
|
|
| 74 |
class DiartDiarization():
|
| 75 |
def __init__(self, SAMPLE_RATE):
|
| 76 |
self.inference, self.l_speakers_queue, self.ws_source = init_diart(SAMPLE_RATE)
|
|
|
|
| 77 |
|
| 78 |
-
async def
|
| 79 |
self.ws_source.push_audio(pcm_array)
|
| 80 |
-
|
| 81 |
while not self.l_speakers_queue.empty():
|
| 82 |
-
|
| 83 |
-
return speakers
|
| 84 |
|
| 85 |
def close(self):
|
| 86 |
self.ws_source.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
l_speakers = []
|
| 58 |
annotation, audio = result
|
| 59 |
for speaker in annotation._labels:
|
| 60 |
+
segments_beg = annotation._labels[speaker].segments_boundaries_[0]
|
| 61 |
+
segments_end = annotation._labels[speaker].segments_boundaries_[-1]
|
| 62 |
asyncio.create_task(
|
| 63 |
+
l_speakers_queue.put({"speaker": speaker, "beg": segments_beg, "end": segments_end})
|
| 64 |
)
|
| 65 |
|
| 66 |
l_speakers_queue = asyncio.Queue()
|
|
|
|
| 75 |
class DiartDiarization():
|
| 76 |
def __init__(self, SAMPLE_RATE):
|
| 77 |
self.inference, self.l_speakers_queue, self.ws_source = init_diart(SAMPLE_RATE)
|
| 78 |
+
self.segment_speakers = []
|
| 79 |
|
| 80 |
+
async def diarize(self, pcm_array):
|
| 81 |
self.ws_source.push_audio(pcm_array)
|
| 82 |
+
self.segment_speakers = []
|
| 83 |
while not self.l_speakers_queue.empty():
|
| 84 |
+
self.segment_speakers.append(await self.l_speakers_queue.get())
|
|
|
|
| 85 |
|
| 86 |
def close(self):
|
| 87 |
self.ws_source.close()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def assign_speakers_to_chunks(self, chunks):
|
| 91 |
+
"""
|
| 92 |
+
Go through each chunk and see which speaker(s) overlap
|
| 93 |
+
that chunk's time range in the Diart annotation.
|
| 94 |
+
Then store the speaker label(s) (or choose the most overlapping).
|
| 95 |
+
This modifies `chunks` in-place or returns a new list with assigned speakers.
|
| 96 |
+
"""
|
| 97 |
+
if not self.segment_speakers:
|
| 98 |
+
return chunks
|
| 99 |
+
|
| 100 |
+
for segment in self.segment_speakers:
|
| 101 |
+
seg_beg = segment["beg"]
|
| 102 |
+
seg_end = segment["end"]
|
| 103 |
+
speaker = segment["speaker"]
|
| 104 |
+
for ch in chunks:
|
| 105 |
+
if seg_end <= ch["beg"] or seg_beg >= ch["end"]:
|
| 106 |
+
continue
|
| 107 |
+
# We have overlap. Let's just pick the speaker (could be more precise in a more complex implementation)
|
| 108 |
+
ch["speaker"] = speaker
|
| 109 |
+
|
| 110 |
+
return chunks
|
src/web/live_transcription.html
CHANGED
|
@@ -7,8 +7,8 @@
|
|
| 7 |
<style>
|
| 8 |
body {
|
| 9 |
font-family: 'Inter', sans-serif;
|
| 10 |
-
text-align: center;
|
| 11 |
margin: 20px;
|
|
|
|
| 12 |
}
|
| 13 |
#recordButton {
|
| 14 |
width: 80px;
|
|
@@ -28,18 +28,10 @@
|
|
| 28 |
#recordButton:active {
|
| 29 |
transform: scale(0.95);
|
| 30 |
}
|
| 31 |
-
#
|
| 32 |
margin-top: 20px;
|
| 33 |
-
font-size:
|
| 34 |
-
|
| 35 |
-
}
|
| 36 |
-
.transcription {
|
| 37 |
-
display: inline;
|
| 38 |
-
color: black;
|
| 39 |
-
}
|
| 40 |
-
.buffer {
|
| 41 |
-
display: inline;
|
| 42 |
-
color: rgb(197, 197, 197);
|
| 43 |
}
|
| 44 |
.settings-container {
|
| 45 |
display: flex;
|
|
@@ -73,9 +65,29 @@
|
|
| 73 |
label {
|
| 74 |
font-size: 14px;
|
| 75 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
</style>
|
| 77 |
</head>
|
| 78 |
<body>
|
|
|
|
| 79 |
<div class="settings-container">
|
| 80 |
<button id="recordButton">🎙️</button>
|
| 81 |
<div class="settings">
|
|
@@ -96,9 +108,11 @@
|
|
| 96 |
</div>
|
| 97 |
</div>
|
| 98 |
</div>
|
|
|
|
| 99 |
<p id="status"></p>
|
| 100 |
|
| 101 |
-
|
|
|
|
| 102 |
|
| 103 |
<script>
|
| 104 |
let isRecording = false;
|
|
@@ -106,89 +120,97 @@
|
|
| 106 |
let recorder = null;
|
| 107 |
let chunkDuration = 1000;
|
| 108 |
let websocketUrl = "ws://localhost:8000/asr";
|
| 109 |
-
|
| 110 |
-
// Tracks whether the user voluntarily closed the WebSocket
|
| 111 |
let userClosing = false;
|
| 112 |
|
| 113 |
const statusText = document.getElementById("status");
|
| 114 |
const recordButton = document.getElementById("recordButton");
|
| 115 |
const chunkSelector = document.getElementById("chunkSelector");
|
| 116 |
const websocketInput = document.getElementById("websocketInput");
|
| 117 |
-
const
|
| 118 |
|
| 119 |
-
let fullTranscription = ""; // Store confirmed transcription
|
| 120 |
-
|
| 121 |
-
// Update chunk duration based on the selector
|
| 122 |
chunkSelector.addEventListener("change", () => {
|
| 123 |
chunkDuration = parseInt(chunkSelector.value);
|
| 124 |
});
|
| 125 |
|
| 126 |
-
// Update WebSocket URL dynamically, with some basic checks
|
| 127 |
websocketInput.addEventListener("change", () => {
|
| 128 |
const urlValue = websocketInput.value.trim();
|
| 129 |
-
|
| 130 |
-
// Quick check to see if it starts with ws:// or wss://
|
| 131 |
if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
|
| 132 |
-
statusText.textContent =
|
| 133 |
-
"Invalid WebSocket URL. It should start with ws:// or wss://";
|
| 134 |
return;
|
| 135 |
}
|
| 136 |
websocketUrl = urlValue;
|
| 137 |
statusText.textContent = "WebSocket URL updated. Ready to connect.";
|
| 138 |
});
|
| 139 |
|
| 140 |
-
/**
|
| 141 |
-
* Opens webSocket connection.
|
| 142 |
-
* returns a Promise that resolves when the connection is open.
|
| 143 |
-
* rejects if there was an error.
|
| 144 |
-
*/
|
| 145 |
function setupWebSocket() {
|
| 146 |
return new Promise((resolve, reject) => {
|
| 147 |
try {
|
| 148 |
websocket = new WebSocket(websocketUrl);
|
| 149 |
} catch (error) {
|
| 150 |
-
statusText.textContent =
|
| 151 |
-
"Invalid WebSocket URL. Please check the URL and try again.";
|
| 152 |
reject(error);
|
| 153 |
return;
|
| 154 |
}
|
| 155 |
|
| 156 |
websocket.onopen = () => {
|
| 157 |
-
statusText.textContent = "Connected to server";
|
| 158 |
resolve();
|
| 159 |
};
|
| 160 |
|
| 161 |
-
websocket.onclose = (
|
| 162 |
-
// If we manually closed it, we say so
|
| 163 |
if (userClosing) {
|
| 164 |
statusText.textContent = "WebSocket closed by user.";
|
| 165 |
} else {
|
| 166 |
-
statusText.textContent =
|
|
|
|
| 167 |
}
|
| 168 |
userClosing = false;
|
| 169 |
};
|
| 170 |
|
| 171 |
websocket.onerror = () => {
|
| 172 |
-
statusText.textContent = "Error connecting to WebSocket";
|
| 173 |
reject(new Error("Error connecting to WebSocket"));
|
| 174 |
};
|
| 175 |
|
|
|
|
| 176 |
websocket.onmessage = (event) => {
|
| 177 |
const data = JSON.parse(event.data);
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
| 188 |
};
|
| 189 |
});
|
| 190 |
}
|
| 191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
async function startRecording() {
|
| 193 |
try {
|
| 194 |
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
@@ -202,22 +224,18 @@
|
|
| 202 |
isRecording = true;
|
| 203 |
updateUI();
|
| 204 |
} catch (err) {
|
| 205 |
-
statusText.textContent =
|
| 206 |
-
"Error accessing microphone. Please allow microphone access.";
|
| 207 |
}
|
| 208 |
}
|
| 209 |
|
| 210 |
function stopRecording() {
|
| 211 |
userClosing = true;
|
| 212 |
-
|
| 213 |
-
// Stop the recorder if it exists
|
| 214 |
if (recorder) {
|
| 215 |
recorder.stop();
|
| 216 |
recorder = null;
|
| 217 |
}
|
| 218 |
isRecording = false;
|
| 219 |
|
| 220 |
-
// Close the websocket if it exists
|
| 221 |
if (websocket) {
|
| 222 |
websocket.close();
|
| 223 |
websocket = null;
|
|
@@ -228,15 +246,12 @@
|
|
| 228 |
|
| 229 |
async function toggleRecording() {
|
| 230 |
if (!isRecording) {
|
| 231 |
-
|
| 232 |
-
transcriptionsDiv.innerHTML = "";
|
| 233 |
-
|
| 234 |
try {
|
| 235 |
await setupWebSocket();
|
| 236 |
await startRecording();
|
| 237 |
} catch (err) {
|
| 238 |
-
statusText.textContent =
|
| 239 |
-
"Could not connect to WebSocket or access mic. Recording aborted.";
|
| 240 |
}
|
| 241 |
} else {
|
| 242 |
stopRecording();
|
|
@@ -245,9 +260,7 @@
|
|
| 245 |
|
| 246 |
function updateUI() {
|
| 247 |
recordButton.classList.toggle("recording", isRecording);
|
| 248 |
-
statusText.textContent = isRecording
|
| 249 |
-
? "Recording..."
|
| 250 |
-
: "Click to start transcription";
|
| 251 |
}
|
| 252 |
|
| 253 |
recordButton.addEventListener("click", toggleRecording);
|
|
|
|
| 7 |
<style>
|
| 8 |
body {
|
| 9 |
font-family: 'Inter', sans-serif;
|
|
|
|
| 10 |
margin: 20px;
|
| 11 |
+
text-align: center;
|
| 12 |
}
|
| 13 |
#recordButton {
|
| 14 |
width: 80px;
|
|
|
|
| 28 |
#recordButton:active {
|
| 29 |
transform: scale(0.95);
|
| 30 |
}
|
| 31 |
+
#status {
|
| 32 |
margin-top: 20px;
|
| 33 |
+
font-size: 16px;
|
| 34 |
+
color: #333;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
}
|
| 36 |
.settings-container {
|
| 37 |
display: flex;
|
|
|
|
| 65 |
label {
|
| 66 |
font-size: 14px;
|
| 67 |
}
|
| 68 |
+
/* Speaker-labeled transcript area */
|
| 69 |
+
#linesTranscript {
|
| 70 |
+
margin: 20px auto;
|
| 71 |
+
max-width: 600px;
|
| 72 |
+
text-align: left;
|
| 73 |
+
font-size: 16px;
|
| 74 |
+
}
|
| 75 |
+
#linesTranscript p {
|
| 76 |
+
margin: 5px 0;
|
| 77 |
+
}
|
| 78 |
+
#linesTranscript strong {
|
| 79 |
+
color: #333;
|
| 80 |
+
}
|
| 81 |
+
/* Grey buffer styling */
|
| 82 |
+
.buffer {
|
| 83 |
+
color: rgb(180, 180, 180);
|
| 84 |
+
font-style: italic;
|
| 85 |
+
margin-left: 4px;
|
| 86 |
+
}
|
| 87 |
</style>
|
| 88 |
</head>
|
| 89 |
<body>
|
| 90 |
+
|
| 91 |
<div class="settings-container">
|
| 92 |
<button id="recordButton">🎙️</button>
|
| 93 |
<div class="settings">
|
|
|
|
| 108 |
</div>
|
| 109 |
</div>
|
| 110 |
</div>
|
| 111 |
+
|
| 112 |
<p id="status"></p>
|
| 113 |
|
| 114 |
+
<!-- Speaker-labeled transcript -->
|
| 115 |
+
<div id="linesTranscript"></div>
|
| 116 |
|
| 117 |
<script>
|
| 118 |
let isRecording = false;
|
|
|
|
| 120 |
let recorder = null;
|
| 121 |
let chunkDuration = 1000;
|
| 122 |
let websocketUrl = "ws://localhost:8000/asr";
|
|
|
|
|
|
|
| 123 |
let userClosing = false;
|
| 124 |
|
| 125 |
const statusText = document.getElementById("status");
|
| 126 |
const recordButton = document.getElementById("recordButton");
|
| 127 |
const chunkSelector = document.getElementById("chunkSelector");
|
| 128 |
const websocketInput = document.getElementById("websocketInput");
|
| 129 |
+
const linesTranscriptDiv = document.getElementById("linesTranscript");
|
| 130 |
|
|
|
|
|
|
|
|
|
|
| 131 |
chunkSelector.addEventListener("change", () => {
|
| 132 |
chunkDuration = parseInt(chunkSelector.value);
|
| 133 |
});
|
| 134 |
|
|
|
|
| 135 |
websocketInput.addEventListener("change", () => {
|
| 136 |
const urlValue = websocketInput.value.trim();
|
|
|
|
|
|
|
| 137 |
if (!urlValue.startsWith("ws://") && !urlValue.startsWith("wss://")) {
|
| 138 |
+
statusText.textContent = "Invalid WebSocket URL (must start with ws:// or wss://)";
|
|
|
|
| 139 |
return;
|
| 140 |
}
|
| 141 |
websocketUrl = urlValue;
|
| 142 |
statusText.textContent = "WebSocket URL updated. Ready to connect.";
|
| 143 |
});
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
function setupWebSocket() {
|
| 146 |
return new Promise((resolve, reject) => {
|
| 147 |
try {
|
| 148 |
websocket = new WebSocket(websocketUrl);
|
| 149 |
} catch (error) {
|
| 150 |
+
statusText.textContent = "Invalid WebSocket URL. Please check and try again.";
|
|
|
|
| 151 |
reject(error);
|
| 152 |
return;
|
| 153 |
}
|
| 154 |
|
| 155 |
websocket.onopen = () => {
|
| 156 |
+
statusText.textContent = "Connected to server.";
|
| 157 |
resolve();
|
| 158 |
};
|
| 159 |
|
| 160 |
+
websocket.onclose = () => {
|
|
|
|
| 161 |
if (userClosing) {
|
| 162 |
statusText.textContent = "WebSocket closed by user.";
|
| 163 |
} else {
|
| 164 |
+
statusText.textContent =
|
| 165 |
+
"Disconnected from the WebSocket server. (Check logs if model is loading.)";
|
| 166 |
}
|
| 167 |
userClosing = false;
|
| 168 |
};
|
| 169 |
|
| 170 |
websocket.onerror = () => {
|
| 171 |
+
statusText.textContent = "Error connecting to WebSocket.";
|
| 172 |
reject(new Error("Error connecting to WebSocket"));
|
| 173 |
};
|
| 174 |
|
| 175 |
+
// Handle messages from server
|
| 176 |
websocket.onmessage = (event) => {
|
| 177 |
const data = JSON.parse(event.data);
|
| 178 |
+
/*
|
| 179 |
+
The server might send:
|
| 180 |
+
{
|
| 181 |
+
"lines": [
|
| 182 |
+
{"speaker": 0, "text": "Hello."},
|
| 183 |
+
{"speaker": 1, "text": "Bonjour."},
|
| 184 |
+
...
|
| 185 |
+
],
|
| 186 |
+
"buffer": "..."
|
| 187 |
+
}
|
| 188 |
+
*/
|
| 189 |
+
const { lines = [], buffer = "" } = data;
|
| 190 |
+
renderLinesWithBuffer(lines, buffer);
|
| 191 |
};
|
| 192 |
});
|
| 193 |
}
|
| 194 |
|
| 195 |
+
function renderLinesWithBuffer(lines, buffer) {
|
| 196 |
+
// Clears if no lines
|
| 197 |
+
if (!Array.isArray(lines) || lines.length === 0) {
|
| 198 |
+
linesTranscriptDiv.innerHTML = "";
|
| 199 |
+
return;
|
| 200 |
+
}
|
| 201 |
+
// Build the HTML
|
| 202 |
+
// The buffer is appended to the last line if it's non-empty
|
| 203 |
+
const linesHtml = lines.map((item, idx) => {
|
| 204 |
+
let textContent = item.text;
|
| 205 |
+
if (idx === lines.length - 1 && buffer) {
|
| 206 |
+
textContent += `<span class="buffer">${buffer}</span>`;
|
| 207 |
+
}
|
| 208 |
+
return `<p><strong>Speaker ${item.speaker}:</strong> ${textContent}</p>`;
|
| 209 |
+
}).join("");
|
| 210 |
+
|
| 211 |
+
linesTranscriptDiv.innerHTML = linesHtml;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
async function startRecording() {
|
| 215 |
try {
|
| 216 |
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
|
|
| 224 |
isRecording = true;
|
| 225 |
updateUI();
|
| 226 |
} catch (err) {
|
| 227 |
+
statusText.textContent = "Error accessing microphone. Please allow microphone access.";
|
|
|
|
| 228 |
}
|
| 229 |
}
|
| 230 |
|
| 231 |
function stopRecording() {
|
| 232 |
userClosing = true;
|
|
|
|
|
|
|
| 233 |
if (recorder) {
|
| 234 |
recorder.stop();
|
| 235 |
recorder = null;
|
| 236 |
}
|
| 237 |
isRecording = false;
|
| 238 |
|
|
|
|
| 239 |
if (websocket) {
|
| 240 |
websocket.close();
|
| 241 |
websocket = null;
|
|
|
|
| 246 |
|
| 247 |
async function toggleRecording() {
|
| 248 |
if (!isRecording) {
|
| 249 |
+
linesTranscriptDiv.innerHTML = "";
|
|
|
|
|
|
|
| 250 |
try {
|
| 251 |
await setupWebSocket();
|
| 252 |
await startRecording();
|
| 253 |
} catch (err) {
|
| 254 |
+
statusText.textContent = "Could not connect to WebSocket or access mic. Aborted.";
|
|
|
|
| 255 |
}
|
| 256 |
} else {
|
| 257 |
stopRecording();
|
|
|
|
| 260 |
|
| 261 |
function updateUI() {
|
| 262 |
recordButton.classList.toggle("recording", isRecording);
|
| 263 |
+
statusText.textContent = isRecording ? "Recording..." : "Click to start transcription";
|
|
|
|
|
|
|
| 264 |
}
|
| 265 |
|
| 266 |
recordButton.addEventListener("click", toggleRecording);
|
whisper_fastapi_online_server.py
CHANGED
|
@@ -90,6 +90,7 @@ async def start_ffmpeg_decoder():
|
|
| 90 |
return process
|
| 91 |
|
| 92 |
|
|
|
|
| 93 |
@app.websocket("/asr")
|
| 94 |
async def websocket_endpoint(websocket: WebSocket):
|
| 95 |
await websocket.accept()
|
|
@@ -110,6 +111,9 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 110 |
loop = asyncio.get_event_loop()
|
| 111 |
full_transcription = ""
|
| 112 |
beg = time()
|
|
|
|
|
|
|
|
|
|
| 113 |
while True:
|
| 114 |
try:
|
| 115 |
elapsed_time = int(time() - beg)
|
|
@@ -137,8 +141,17 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 137 |
)
|
| 138 |
pcm_buffer = bytearray()
|
| 139 |
online.insert_audio_chunk(pcm_array)
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
if args.vac:
|
| 143 |
buffer = online.online.to_flush(
|
| 144 |
online.online.transcript_buffer.buffer
|
|
@@ -151,11 +164,30 @@ async def websocket_endpoint(websocket: WebSocket):
|
|
| 151 |
buffer in full_transcription
|
| 152 |
): # With VAC, the buffer is not updated until the next chunk is processed
|
| 153 |
buffer = ""
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
if args.diarization:
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
await websocket.send_json(response)
|
| 160 |
|
| 161 |
except Exception as e:
|
|
|
|
| 90 |
return process
|
| 91 |
|
| 92 |
|
| 93 |
+
|
| 94 |
@app.websocket("/asr")
|
| 95 |
async def websocket_endpoint(websocket: WebSocket):
|
| 96 |
await websocket.accept()
|
|
|
|
| 111 |
loop = asyncio.get_event_loop()
|
| 112 |
full_transcription = ""
|
| 113 |
beg = time()
|
| 114 |
+
|
| 115 |
+
chunk_history = [] # Will store dicts: {beg, end, text, speaker}
|
| 116 |
+
|
| 117 |
while True:
|
| 118 |
try:
|
| 119 |
elapsed_time = int(time() - beg)
|
|
|
|
| 141 |
)
|
| 142 |
pcm_buffer = bytearray()
|
| 143 |
online.insert_audio_chunk(pcm_array)
|
| 144 |
+
beg_trans, end_trans, trans = online.process_iter()
|
| 145 |
+
|
| 146 |
+
if trans:
|
| 147 |
+
chunk_history.append({
|
| 148 |
+
"beg": beg_trans,
|
| 149 |
+
"end": end_trans,
|
| 150 |
+
"text": trans,
|
| 151 |
+
"speaker": "0"
|
| 152 |
+
})
|
| 153 |
+
|
| 154 |
+
full_transcription += trans
|
| 155 |
if args.vac:
|
| 156 |
buffer = online.online.to_flush(
|
| 157 |
online.online.transcript_buffer.buffer
|
|
|
|
| 164 |
buffer in full_transcription
|
| 165 |
): # With VAC, the buffer is not updated until the next chunk is processed
|
| 166 |
buffer = ""
|
| 167 |
+
|
| 168 |
+
lines = [
|
| 169 |
+
{
|
| 170 |
+
"speaker": "0",
|
| 171 |
+
"text": "",
|
| 172 |
+
}
|
| 173 |
+
]
|
| 174 |
+
|
| 175 |
if args.diarization:
|
| 176 |
+
await diarization.diarize(pcm_array)
|
| 177 |
+
diarization.assign_speakers_to_chunks(chunk_history)
|
| 178 |
+
|
| 179 |
+
for ch in chunk_history:
|
| 180 |
+
if args.diarization and ch["speaker"] and ch["speaker"][-1] != lines[-1]["speaker"]:
|
| 181 |
+
lines.append(
|
| 182 |
+
{
|
| 183 |
+
"speaker": ch["speaker"][-1],
|
| 184 |
+
"text": ch['text'],
|
| 185 |
+
}
|
| 186 |
+
)
|
| 187 |
+
else:
|
| 188 |
+
lines[-1]["text"] += ch['text']
|
| 189 |
+
|
| 190 |
+
response = {"lines": lines, "buffer": buffer}
|
| 191 |
await websocket.send_json(response)
|
| 192 |
|
| 193 |
except Exception as e:
|