Spaces:
Sleeping
Sleeping
Commit
Β·
4641c1c
1
Parent(s):
9eec0a3
Backend connection
Browse files- README.md +64 -0
- inference.py +61 -11
- ui.py +307 -39
README.md
CHANGED
|
@@ -9,3 +9,67 @@ license: mit
|
|
| 9 |
---
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 12 |
+
|
| 13 |
+
# Real-Time Speaker Diarization
|
| 14 |
+
|
| 15 |
+
This project implements real-time speaker diarization using WebRTC, FastAPI, and Gradio. It automatically transcribes speech and identifies different speakers in real-time.
|
| 16 |
+
|
| 17 |
+
## Architecture
|
| 18 |
+
|
| 19 |
+
The system is split into two components:
|
| 20 |
+
|
| 21 |
+
1. **Model Server (Hugging Face Space)**: Runs the speech recognition and speaker diarization models
|
| 22 |
+
2. **Signaling Server (Render)**: Handles WebRTC signaling for direct audio streaming from browser
|
| 23 |
+
|
| 24 |
+
## Deployment Instructions
|
| 25 |
+
|
| 26 |
+
### Deploy Model Server on Hugging Face Space
|
| 27 |
+
|
| 28 |
+
1. Create a new Space on Hugging Face (Docker SDK)
|
| 29 |
+
2. Upload all files from the `Speaker-Diarization` directory
|
| 30 |
+
3. In Space settings:
|
| 31 |
+
- Set Hardware to CPU (or GPU if available)
|
| 32 |
+
- Set the public visibility
|
| 33 |
+
- Environment: Make sure Docker SDK is selected
|
| 34 |
+
|
| 35 |
+
### Deploy Signaling Server on Render
|
| 36 |
+
|
| 37 |
+
1. Create a new Render Web Service
|
| 38 |
+
2. Connect to your GitHub repo containing the `render-signal` directory
|
| 39 |
+
3. Configure Render service:
|
| 40 |
+
- Set Build Command: `cd render-signal && pip install -r requirements.txt`
|
| 41 |
+
- Set Start Command: `cd render-signal && python backend.py`
|
| 42 |
+
- Select Environment: Python 3
|
| 43 |
+
- Set Environment Variables:
|
| 44 |
+
- `HF_SPACE_URL`: Set to your Hugging Face Space URL (e.g., `your-username-speaker-diarization.hf.space`)
|
| 45 |
+
|
| 46 |
+
### Update Configuration
|
| 47 |
+
|
| 48 |
+
After both services are deployed:
|
| 49 |
+
|
| 50 |
+
1. Update `ui.py` on your Hugging Face Space:
|
| 51 |
+
- Change `RENDER_SIGNALING_URL` to your Render app URL (`wss://your-app.onrender.com/stream`)
|
| 52 |
+
- Make sure `HF_SPACE_URL` matches your actual Hugging Face Space URL
|
| 53 |
+
|
| 54 |
+
2. Update `backend.py` on your Render service:
|
| 55 |
+
- Set `API_WS` to your Hugging Face Space WebSocket URL (`wss://your-username-speaker-diarization.hf.space/ws_inference`)
|
| 56 |
+
|
| 57 |
+
## Usage
|
| 58 |
+
|
| 59 |
+
1. Open your Hugging Face Space URL in a web browser
|
| 60 |
+
2. Click "Start Listening" to begin
|
| 61 |
+
3. Speak into your microphone
|
| 62 |
+
4. The system will transcribe your speech and identify different speakers in real-time
|
| 63 |
+
|
| 64 |
+
## Technology Stack
|
| 65 |
+
|
| 66 |
+
- **Frontend**: Gradio UI with WebRTC for audio streaming
|
| 67 |
+
- **Signaling**: FastRTC on Render for WebRTC signaling
|
| 68 |
+
- **Backend**: FastAPI + WebSockets
|
| 69 |
+
- **Models**:
|
| 70 |
+
- SpeechBrain ECAPA-TDNN for speaker embeddings
|
| 71 |
+
- Automatic Speech Recognition for transcription
|
| 72 |
+
|
| 73 |
+
## License
|
| 74 |
+
|
| 75 |
+
MIT
|
inference.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
-
from fastapi import FastAPI, WebSocket
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
from shared import RealtimeSpeakerDiarization
|
|
|
|
| 4 |
import uvicorn
|
| 5 |
import logging
|
|
|
|
| 6 |
|
| 7 |
# Set up logging
|
| 8 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -21,33 +23,81 @@ app.add_middleware(
|
|
| 21 |
)
|
| 22 |
|
| 23 |
# Initialize the diarization system
|
|
|
|
| 24 |
diart = RealtimeSpeakerDiarization()
|
| 25 |
success = diart.initialize_models()
|
| 26 |
logger.info(f"Models initialized: {success}")
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
@app.get("/health")
|
| 30 |
async def health_check():
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
@app.websocket("/ws_inference")
|
| 34 |
async def ws_inference(ws: WebSocket):
|
| 35 |
"""WebSocket endpoint for real-time audio processing"""
|
| 36 |
await ws.accept()
|
| 37 |
-
|
|
|
|
| 38 |
|
| 39 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
async for chunk in ws.iter_bytes():
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
except Exception as e:
|
| 48 |
logger.error(f"WebSocket error: {e}")
|
| 49 |
finally:
|
| 50 |
-
|
|
|
|
| 51 |
|
| 52 |
@app.get("/conversation")
|
| 53 |
async def get_conversation():
|
|
|
|
| 1 |
+
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
from shared import RealtimeSpeakerDiarization
|
| 4 |
+
import numpy as np
|
| 5 |
import uvicorn
|
| 6 |
import logging
|
| 7 |
+
import asyncio
|
| 8 |
|
| 9 |
# Set up logging
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 23 |
)
|
| 24 |
|
| 25 |
# Initialize the diarization system
|
| 26 |
+
logger.info("Initializing diarization system...")
|
| 27 |
diart = RealtimeSpeakerDiarization()
|
| 28 |
success = diart.initialize_models()
|
| 29 |
logger.info(f"Models initialized: {success}")
|
| 30 |
+
if success:
|
| 31 |
+
diart.start_recording()
|
| 32 |
+
|
| 33 |
+
# Track active WebSocket connections
|
| 34 |
+
active_connections = set()
|
| 35 |
+
|
| 36 |
+
# Periodic status update function
|
| 37 |
+
async def send_conversation_updates():
|
| 38 |
+
"""Periodically send conversation updates to all connected clients"""
|
| 39 |
+
while True:
|
| 40 |
+
if active_connections:
|
| 41 |
+
try:
|
| 42 |
+
# Get current conversation HTML
|
| 43 |
+
conversation_html = diart.get_formatted_conversation()
|
| 44 |
+
|
| 45 |
+
# Send to all active connections
|
| 46 |
+
for ws in active_connections.copy():
|
| 47 |
+
try:
|
| 48 |
+
await ws.send_text(conversation_html)
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logger.error(f"Error sending to WebSocket: {e}")
|
| 51 |
+
active_connections.discard(ws)
|
| 52 |
+
except Exception as e:
|
| 53 |
+
logger.error(f"Error in conversation update: {e}")
|
| 54 |
+
|
| 55 |
+
# Wait before sending next update
|
| 56 |
+
await asyncio.sleep(0.5) # 500ms update interval
|
| 57 |
+
|
| 58 |
+
@app.on_event("startup")
|
| 59 |
+
async def startup_event():
|
| 60 |
+
"""Start background tasks when the app starts"""
|
| 61 |
+
asyncio.create_task(send_conversation_updates())
|
| 62 |
|
| 63 |
@app.get("/health")
|
| 64 |
async def health_check():
|
| 65 |
+
"""Health check endpoint"""
|
| 66 |
+
return {
|
| 67 |
+
"status": "healthy",
|
| 68 |
+
"system_running": diart.is_running,
|
| 69 |
+
"active_connections": len(active_connections)
|
| 70 |
+
}
|
| 71 |
|
| 72 |
@app.websocket("/ws_inference")
|
| 73 |
async def ws_inference(ws: WebSocket):
|
| 74 |
"""WebSocket endpoint for real-time audio processing"""
|
| 75 |
await ws.accept()
|
| 76 |
+
active_connections.add(ws)
|
| 77 |
+
logger.info(f"WebSocket connection established. Total connections: {len(active_connections)}")
|
| 78 |
|
| 79 |
try:
|
| 80 |
+
# Send initial conversation state
|
| 81 |
+
conversation_html = diart.get_formatted_conversation()
|
| 82 |
+
await ws.send_text(conversation_html)
|
| 83 |
+
|
| 84 |
+
# Process incoming audio chunks
|
| 85 |
async for chunk in ws.iter_bytes():
|
| 86 |
+
try:
|
| 87 |
+
# Process raw audio bytes
|
| 88 |
+
if chunk:
|
| 89 |
+
# Process audio data - this updates the internal conversation state
|
| 90 |
+
diart.process_audio_chunk(chunk)
|
| 91 |
+
except Exception as e:
|
| 92 |
+
logger.error(f"Error processing audio chunk: {e}")
|
| 93 |
+
|
| 94 |
+
except WebSocketDisconnect:
|
| 95 |
+
logger.info("WebSocket disconnected")
|
| 96 |
except Exception as e:
|
| 97 |
logger.error(f"WebSocket error: {e}")
|
| 98 |
finally:
|
| 99 |
+
active_connections.discard(ws)
|
| 100 |
+
logger.info(f"WebSocket connection closed. Remaining connections: {len(active_connections)}")
|
| 101 |
|
| 102 |
@app.get("/conversation")
|
| 103 |
async def get_conversation():
|
ui.py
CHANGED
|
@@ -2,58 +2,251 @@ import gradio as gr
|
|
| 2 |
from fastapi import FastAPI
|
| 3 |
from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS
|
| 4 |
|
| 5 |
-
#
|
| 6 |
-
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def build_ui():
|
| 9 |
"""Build Gradio UI for speaker diarization"""
|
| 10 |
with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
gr.Markdown("# π€ Live Speaker Diarization")
|
| 12 |
gr.Markdown("Real-time speech recognition with automatic speaker identification")
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
with gr.Row():
|
| 15 |
with gr.Column(scale=2):
|
| 16 |
-
# Conversation display with embedded JavaScript
|
| 17 |
-
|
| 18 |
"""
|
| 19 |
-
<div class='output' style='padding:20px; background:#
|
| 20 |
-
|
|
|
|
| 21 |
</div>
|
|
|
|
| 22 |
<script>
|
| 23 |
-
|
| 24 |
-
let
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
};
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
};
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
}
|
| 53 |
|
|
|
|
| 54 |
document.addEventListener('DOMContentLoaded', () => {
|
| 55 |
-
|
| 56 |
-
document.querySelector('button[aria-label="Stop"]').onclick = stopStream;
|
| 57 |
});
|
| 58 |
</script>
|
| 59 |
""",
|
|
@@ -67,11 +260,14 @@ def build_ui():
|
|
| 67 |
clear_btn = gr.Button("ποΈ Clear", variant="secondary", size="lg")
|
| 68 |
|
| 69 |
# Status display
|
| 70 |
-
status_output = gr.
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
| 75 |
)
|
| 76 |
|
| 77 |
with gr.Column(scale=1):
|
|
@@ -84,7 +280,7 @@ def build_ui():
|
|
| 84 |
step=0.05,
|
| 85 |
value=DEFAULT_CHANGE_THRESHOLD,
|
| 86 |
label="Speaker Change Sensitivity",
|
| 87 |
-
info="Lower = more sensitive"
|
| 88 |
)
|
| 89 |
|
| 90 |
max_speakers_slider = gr.Slider(
|
|
@@ -101,16 +297,88 @@ def build_ui():
|
|
| 101 |
gr.Markdown("""
|
| 102 |
## π Instructions
|
| 103 |
1. **Start Listening** - allows browser to access microphone
|
| 104 |
-
2. **Speak** - system will
|
| 105 |
3. **Stop** when finished
|
|
|
|
| 106 |
|
| 107 |
## π¨ Speaker Colors
|
| 108 |
- π΄ Speaker 1 (Red)
|
| 109 |
- π’ Speaker 2 (Teal)
|
| 110 |
- π΅ Speaker 3 (Blue)
|
| 111 |
- π‘ Speaker 4 (Green)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
""")
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
return demo
|
| 115 |
|
| 116 |
# Create Gradio interface
|
|
|
|
| 2 |
from fastapi import FastAPI
|
| 3 |
from shared import DEFAULT_CHANGE_THRESHOLD, DEFAULT_MAX_SPEAKERS, ABSOLUTE_MAX_SPEAKERS
|
| 4 |
|
| 5 |
+
# Connection configuration (separate signaling server from model server)
|
| 6 |
+
# These will be replaced at deployment time with the correct URLs
|
| 7 |
+
RENDER_SIGNALING_URL = "wss://your-render-app.onrender.com/stream"
|
| 8 |
+
HF_SPACE_URL = "https://androidguy-speaker-diarization.hf.space"
|
| 9 |
|
| 10 |
def build_ui():
|
| 11 |
"""Build Gradio UI for speaker diarization"""
|
| 12 |
with gr.Blocks(title="Real-time Speaker Diarization", theme=gr.themes.Soft()) as demo:
|
| 13 |
+
# Add configuration variables to page using custom component
|
| 14 |
+
gr.HTML(
|
| 15 |
+
f"""
|
| 16 |
+
<!-- Configuration parameters -->
|
| 17 |
+
<script>
|
| 18 |
+
window.RENDER_SIGNALING_URL = "{RENDER_SIGNALING_URL}";
|
| 19 |
+
window.HF_SPACE_URL = "{HF_SPACE_URL}";
|
| 20 |
+
</script>
|
| 21 |
+
"""
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Header and description
|
| 25 |
gr.Markdown("# π€ Live Speaker Diarization")
|
| 26 |
gr.Markdown("Real-time speech recognition with automatic speaker identification")
|
| 27 |
|
| 28 |
+
# Status indicator
|
| 29 |
+
connection_status = gr.HTML(
|
| 30 |
+
"""<div class="status-indicator">
|
| 31 |
+
<span id="status-text" style="color:#888;">Waiting to connect...</span>
|
| 32 |
+
<span id="status-icon" style="width:10px; height:10px; display:inline-block;
|
| 33 |
+
background-color:#888; border-radius:50%; margin-left:5px;"></span>
|
| 34 |
+
</div>"""
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
with gr.Row():
|
| 38 |
with gr.Column(scale=2):
|
| 39 |
+
# Conversation display with embedded JavaScript for WebRTC and audio handling
|
| 40 |
+
conversation_display = gr.HTML(
|
| 41 |
"""
|
| 42 |
+
<div class='output' id="conversation" style='padding:20px; background:#111; border-radius:10px;
|
| 43 |
+
min-height:400px; font-family:Arial; font-size:16px; line-height:1.5; overflow-y:auto;'>
|
| 44 |
+
<i>Click 'Start Listening' to begin...</i>
|
| 45 |
</div>
|
| 46 |
+
|
| 47 |
<script>
|
| 48 |
+
// Global variables
|
| 49 |
+
let rtcConnection;
|
| 50 |
+
let mediaStream;
|
| 51 |
+
let wsConnection;
|
| 52 |
+
let statusUpdateInterval;
|
| 53 |
+
|
| 54 |
+
// Check connection to HF space
|
| 55 |
+
async function checkHfConnection() {
|
| 56 |
+
try {
|
| 57 |
+
let response = await fetch(`${window.HF_SPACE_URL}/health`);
|
| 58 |
+
return response.ok;
|
| 59 |
+
} catch (err) {
|
| 60 |
+
return false;
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
// Start the connection and audio streaming
|
| 65 |
+
async function startStreaming() {
|
| 66 |
+
try {
|
| 67 |
+
// Update status
|
| 68 |
+
updateStatus('connecting');
|
| 69 |
+
|
| 70 |
+
// Request microphone access
|
| 71 |
+
mediaStream = await navigator.mediaDevices.getUserMedia({audio: {
|
| 72 |
+
echoCancellation: true,
|
| 73 |
+
noiseSuppression: true,
|
| 74 |
+
autoGainControl: true
|
| 75 |
+
}});
|
| 76 |
+
|
| 77 |
+
// Set up WebRTC connection to Render signaling server
|
| 78 |
+
await setupWebRTC();
|
| 79 |
+
|
| 80 |
+
// Also connect WebSocket directly to HF Space for conversation updates
|
| 81 |
+
setupWebSocket();
|
| 82 |
+
|
| 83 |
+
// Start status update interval
|
| 84 |
+
statusUpdateInterval = setInterval(updateConnectionInfo, 5000);
|
| 85 |
+
|
| 86 |
+
// Update status
|
| 87 |
+
updateStatus('connected');
|
| 88 |
+
|
| 89 |
+
document.getElementById("conversation").innerHTML = "<i>Connected! Start speaking...</i>";
|
| 90 |
+
} catch (err) {
|
| 91 |
+
console.error('Error starting stream:', err);
|
| 92 |
+
updateStatus('error', err.message);
|
| 93 |
+
}
|
| 94 |
+
}
|
| 95 |
|
| 96 |
+
// Set up WebRTC connection to Render signaling server
|
| 97 |
+
async function setupWebRTC() {
|
| 98 |
+
if (rtcConnection) {
|
| 99 |
+
rtcConnection.close();
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
// Create new RTCPeerConnection
|
| 103 |
+
rtcConnection = new RTCPeerConnection();
|
| 104 |
+
|
| 105 |
+
// Add audio track to connection
|
| 106 |
+
mediaStream.getAudioTracks().forEach(track => {
|
| 107 |
+
rtcConnection.addTrack(track, mediaStream);
|
| 108 |
+
});
|
| 109 |
+
|
| 110 |
+
// Create data channel for signaling
|
| 111 |
+
const dataChannel = rtcConnection.createDataChannel('audio');
|
| 112 |
+
|
| 113 |
+
// Create and set local description
|
| 114 |
+
const offer = await rtcConnection.createOffer();
|
| 115 |
+
await rtcConnection.setLocalDescription(offer);
|
| 116 |
+
|
| 117 |
+
// Connect to signaling server and exchange SDP
|
| 118 |
+
const signalingUrl = window.RENDER_SIGNALING_URL;
|
| 119 |
+
const response = await fetch(signalingUrl, {
|
| 120 |
+
method: 'POST',
|
| 121 |
+
headers: { 'Content-Type': 'application/json' },
|
| 122 |
+
body: JSON.stringify({ sdp: rtcConnection.localDescription })
|
| 123 |
+
});
|
| 124 |
+
|
| 125 |
+
const data = await response.json();
|
| 126 |
+
await rtcConnection.setRemoteDescription(new RTCSessionDescription(data.sdp));
|
| 127 |
+
|
| 128 |
+
// Handle ICE candidates
|
| 129 |
+
rtcConnection.onicecandidate = event => {
|
| 130 |
+
if (event.candidate) {
|
| 131 |
+
fetch(signalingUrl, {
|
| 132 |
+
method: 'POST',
|
| 133 |
+
headers: { 'Content-Type': 'application/json' },
|
| 134 |
+
body: JSON.stringify({ candidate: event.candidate })
|
| 135 |
+
});
|
| 136 |
+
}
|
| 137 |
+
};
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
// Set up WebSocket connection to HF Space for conversation updates
|
| 141 |
+
function setupWebSocket() {
|
| 142 |
+
const wsUrl = `${window.HF_SPACE_URL.replace('http', 'ws')}/ws_inference`;
|
| 143 |
+
wsConnection = new WebSocket(wsUrl);
|
| 144 |
+
|
| 145 |
+
wsConnection.onopen = () => {
|
| 146 |
+
console.log('WebSocket connection established');
|
| 147 |
+
};
|
| 148 |
+
|
| 149 |
+
wsConnection.onmessage = (event) => {
|
| 150 |
+
document.getElementById("conversation").innerHTML = event.data;
|
| 151 |
+
// Auto-scroll to bottom
|
| 152 |
+
const container = document.getElementById("conversation");
|
| 153 |
+
container.scrollTop = container.scrollHeight;
|
| 154 |
+
};
|
| 155 |
+
|
| 156 |
+
wsConnection.onerror = (error) => {
|
| 157 |
+
console.error('WebSocket error:', error);
|
| 158 |
+
updateStatus('warning', 'WebSocket error');
|
| 159 |
};
|
| 160 |
+
|
| 161 |
+
wsConnection.onclose = () => {
|
| 162 |
+
console.log('WebSocket connection closed');
|
| 163 |
+
// Try to reconnect after a delay
|
| 164 |
+
setTimeout(setupWebSocket, 3000);
|
| 165 |
};
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
// Update connection info in the UI
|
| 169 |
+
async function updateConnectionInfo() {
|
| 170 |
+
try {
|
| 171 |
+
const hfConnected = await checkHfConnection();
|
| 172 |
+
if (!hfConnected) {
|
| 173 |
+
updateStatus('warning', 'HF Space connection issue');
|
| 174 |
+
} else if (rtcConnection?.connectionState === 'connected' ||
|
| 175 |
+
rtcConnection?.iceConnectionState === 'connected') {
|
| 176 |
+
updateStatus('connected');
|
| 177 |
+
} else {
|
| 178 |
+
updateStatus('warning', 'Connection unstable');
|
| 179 |
+
}
|
| 180 |
+
} catch (err) {
|
| 181 |
+
console.error('Error updating connection info:', err);
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
// Update status indicator
|
| 186 |
+
function updateStatus(status, message = '') {
|
| 187 |
+
const statusText = document.getElementById('status-text');
|
| 188 |
+
const statusIcon = document.getElementById('status-icon');
|
| 189 |
+
|
| 190 |
+
switch(status) {
|
| 191 |
+
case 'connected':
|
| 192 |
+
statusText.textContent = 'Connected';
|
| 193 |
+
statusIcon.style.backgroundColor = '#4CAF50';
|
| 194 |
+
break;
|
| 195 |
+
case 'connecting':
|
| 196 |
+
statusText.textContent = 'Connecting...';
|
| 197 |
+
statusIcon.style.backgroundColor = '#FFC107';
|
| 198 |
+
break;
|
| 199 |
+
case 'disconnected':
|
| 200 |
+
statusText.textContent = 'Disconnected';
|
| 201 |
+
statusIcon.style.backgroundColor = '#9E9E9E';
|
| 202 |
+
break;
|
| 203 |
+
case 'error':
|
| 204 |
+
statusText.textContent = 'Error: ' + message;
|
| 205 |
+
statusIcon.style.backgroundColor = '#F44336';
|
| 206 |
+
break;
|
| 207 |
+
case 'warning':
|
| 208 |
+
statusText.textContent = 'Warning: ' + message;
|
| 209 |
+
statusIcon.style.backgroundColor = '#FF9800';
|
| 210 |
+
break;
|
| 211 |
+
default:
|
| 212 |
+
statusText.textContent = 'Unknown';
|
| 213 |
+
statusIcon.style.backgroundColor = '#9E9E9E';
|
| 214 |
+
}
|
| 215 |
}
|
| 216 |
|
| 217 |
+
// Stop streaming and clean up
|
| 218 |
+
function stopStreaming() {
|
| 219 |
+
// Close WebRTC connection
|
| 220 |
+
if (rtcConnection) {
|
| 221 |
+
rtcConnection.close();
|
| 222 |
+
rtcConnection = null;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
// Close WebSocket
|
| 226 |
+
if (wsConnection) {
|
| 227 |
+
wsConnection.close();
|
| 228 |
+
wsConnection = null;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
// Stop all tracks in media stream
|
| 232 |
+
if (mediaStream) {
|
| 233 |
+
mediaStream.getTracks().forEach(track => track.stop());
|
| 234 |
+
mediaStream = null;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
// Clear interval
|
| 238 |
+
if (statusUpdateInterval) {
|
| 239 |
+
clearInterval(statusUpdateInterval);
|
| 240 |
+
statusUpdateInterval = null;
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
// Update status
|
| 244 |
+
updateStatus('disconnected');
|
| 245 |
}
|
| 246 |
|
| 247 |
+
// Set up event listeners when the DOM is loaded
|
| 248 |
document.addEventListener('DOMContentLoaded', () => {
|
| 249 |
+
updateStatus('disconnected');
|
|
|
|
| 250 |
});
|
| 251 |
</script>
|
| 252 |
""",
|
|
|
|
| 260 |
clear_btn = gr.Button("ποΈ Clear", variant="secondary", size="lg")
|
| 261 |
|
| 262 |
# Status display
|
| 263 |
+
status_output = gr.Markdown(
|
| 264 |
+
"""
|
| 265 |
+
## System Status
|
| 266 |
+
Waiting to connect...
|
| 267 |
+
|
| 268 |
+
*Click Start Listening to begin*
|
| 269 |
+
""",
|
| 270 |
+
label="Status Information"
|
| 271 |
)
|
| 272 |
|
| 273 |
with gr.Column(scale=1):
|
|
|
|
| 280 |
step=0.05,
|
| 281 |
value=DEFAULT_CHANGE_THRESHOLD,
|
| 282 |
label="Speaker Change Sensitivity",
|
| 283 |
+
info="Lower = more sensitive (more speaker changes)"
|
| 284 |
)
|
| 285 |
|
| 286 |
max_speakers_slider = gr.Slider(
|
|
|
|
| 297 |
gr.Markdown("""
|
| 298 |
## π Instructions
|
| 299 |
1. **Start Listening** - allows browser to access microphone
|
| 300 |
+
2. **Speak** - system will transcribe and identify speakers
|
| 301 |
3. **Stop** when finished
|
| 302 |
+
4. **Clear** to reset conversation
|
| 303 |
|
| 304 |
## π¨ Speaker Colors
|
| 305 |
- π΄ Speaker 1 (Red)
|
| 306 |
- π’ Speaker 2 (Teal)
|
| 307 |
- π΅ Speaker 3 (Blue)
|
| 308 |
- π‘ Speaker 4 (Green)
|
| 309 |
+
- β Speaker 5 (Yellow)
|
| 310 |
+
- π£ Speaker 6 (Plum)
|
| 311 |
+
- π€ Speaker 7 (Mint)
|
| 312 |
+
- π Speaker 8 (Gold)
|
| 313 |
""")
|
| 314 |
|
| 315 |
+
# JavaScript to connect buttons to the script functions
|
| 316 |
+
gr.HTML("""
|
| 317 |
+
<script>
|
| 318 |
+
// Wait for Gradio to fully load
|
| 319 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 320 |
+
// Wait a bit for Gradio buttons to be created
|
| 321 |
+
setTimeout(() => {
|
| 322 |
+
// Get the buttons
|
| 323 |
+
const startBtn = document.querySelector('button[aria-label="Start Listening"]');
|
| 324 |
+
const stopBtn = document.querySelector('button[aria-label="Stop"]');
|
| 325 |
+
const clearBtn = document.querySelector('button[aria-label="Clear"]');
|
| 326 |
+
|
| 327 |
+
if (startBtn) startBtn.onclick = () => startStreaming();
|
| 328 |
+
if (stopBtn) stopBtn.onclick = () => stopStreaming();
|
| 329 |
+
if (clearBtn) clearBtn.onclick = () => {
|
| 330 |
+
// Make API call to clear conversation
|
| 331 |
+
fetch(`${window.HF_SPACE_URL}/clear`, {
|
| 332 |
+
method: 'POST'
|
| 333 |
+
}).then(resp => resp.json())
|
| 334 |
+
.then(data => {
|
| 335 |
+
document.getElementById("conversation").innerHTML =
|
| 336 |
+
"<i>Conversation cleared. Start speaking again...</i>";
|
| 337 |
+
});
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
// Set up settings update
|
| 341 |
+
const updateBtn = document.querySelector('button[aria-label="Update Settings"]');
|
| 342 |
+
if (updateBtn) updateBtn.onclick = () => {
|
| 343 |
+
const threshold = document.querySelector('input[aria-label="Speaker Change Sensitivity"]').value;
|
| 344 |
+
const maxSpeakers = document.querySelector('input[aria-label="Maximum Speakers"]').value;
|
| 345 |
+
|
| 346 |
+
fetch(`${window.HF_SPACE_URL}/settings?threshold=${threshold}&max_speakers=${maxSpeakers}`, {
|
| 347 |
+
method: 'POST'
|
| 348 |
+
}).then(resp => resp.json())
|
| 349 |
+
.then(data => {
|
| 350 |
+
const statusOutput = document.querySelector('.prose');
|
| 351 |
+
if (statusOutput) {
|
| 352 |
+
statusOutput.innerHTML = `
|
| 353 |
+
<h2>System Status</h2>
|
| 354 |
+
<p>Settings updated:</p>
|
| 355 |
+
<ul>
|
| 356 |
+
<li>Threshold: ${threshold}</li>
|
| 357 |
+
<li>Max Speakers: ${maxSpeakers}</li>
|
| 358 |
+
</ul>
|
| 359 |
+
`;
|
| 360 |
+
}
|
| 361 |
+
});
|
| 362 |
+
}
|
| 363 |
+
}, 1000);
|
| 364 |
+
});
|
| 365 |
+
</script>
|
| 366 |
+
""")
|
| 367 |
+
|
| 368 |
+
# Set up periodic status updates
|
| 369 |
+
def get_status():
|
| 370 |
+
"""API call to get system status - called periodically"""
|
| 371 |
+
import requests
|
| 372 |
+
try:
|
| 373 |
+
resp = requests.get(f"{HF_SPACE_URL}/status")
|
| 374 |
+
if resp.status_code == 200:
|
| 375 |
+
return resp.json().get('status', 'No status information')
|
| 376 |
+
return "Error getting status"
|
| 377 |
+
except Exception as e:
|
| 378 |
+
return f"Connection error: {str(e)}"
|
| 379 |
+
|
| 380 |
+
status_timer = gr.Timer(interval=5, function=get_status, outputs=status_output)
|
| 381 |
+
|
| 382 |
return demo
|
| 383 |
|
| 384 |
# Create Gradio interface
|