Spaces:
Sleeping
Sleeping
Commit
·
f541218
1
Parent(s):
1310d41
Check point 4
Browse files
app.py
CHANGED
|
@@ -419,7 +419,7 @@ class RealtimeSpeakerDiarization:
|
|
| 419 |
# Setup recorder configuration
|
| 420 |
recorder_config = {
|
| 421 |
'spinner': False,
|
| 422 |
-
'use_microphone': False, #
|
| 423 |
'model': FINAL_TRANSCRIPTION_MODEL,
|
| 424 |
'language': TRANSCRIPTION_LANGUAGE,
|
| 425 |
'silero_sensitivity': SILERO_SENSITIVITY,
|
|
@@ -558,6 +558,12 @@ class RealtimeSpeakerDiarization:
|
|
| 558 |
embedding = self.audio_processor.extract_embedding_from_buffer()
|
| 559 |
if embedding is not None:
|
| 560 |
self.speaker_detector.add_embedding(embedding)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
except Exception as e:
|
| 563 |
logger.error(f"Error processing audio chunk: {e}")
|
|
@@ -630,7 +636,18 @@ class DiarizationHandler(AsyncStreamHandler):
|
|
| 630 |
|
| 631 |
# Global instances
|
| 632 |
diarization_system = RealtimeSpeakerDiarization()
|
| 633 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
|
| 635 |
def initialize_system():
|
| 636 |
"""Initialize the diarization system"""
|
|
@@ -639,6 +656,8 @@ def initialize_system():
|
|
| 639 |
success = diarization_system.initialize_models()
|
| 640 |
if success:
|
| 641 |
audio_handler = DiarizationHandler(diarization_system)
|
|
|
|
|
|
|
| 642 |
return "✅ System initialized successfully!"
|
| 643 |
else:
|
| 644 |
return "❌ Failed to initialize system. Check logs for details."
|
|
@@ -646,6 +665,13 @@ def initialize_system():
|
|
| 646 |
logger.error(f"Initialization error: {e}")
|
| 647 |
return f"❌ Initialization error: {str(e)}"
|
| 648 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
def start_recording():
|
| 650 |
"""Start recording and transcription"""
|
| 651 |
try:
|
|
@@ -831,9 +857,6 @@ def create_interface():
|
|
| 831 |
return interface
|
| 832 |
|
| 833 |
|
| 834 |
-
# FastAPI setup for FastRTC integration
|
| 835 |
-
app = FastAPI()
|
| 836 |
-
|
| 837 |
@app.get("/")
|
| 838 |
async def root():
|
| 839 |
return {"message": "Real-time Speaker Diarization API"}
|
|
@@ -875,12 +898,6 @@ async def api_update_settings(threshold: float, max_speakers: int):
|
|
| 875 |
result = update_settings(threshold, max_speakers)
|
| 876 |
return {"result": result}
|
| 877 |
|
| 878 |
-
# FastRTC Stream setup
|
| 879 |
-
if audio_handler:
|
| 880 |
-
stream = Stream(handler=audio_handler)
|
| 881 |
-
app.include_router(stream.router, prefix="/stream")
|
| 882 |
-
|
| 883 |
-
|
| 884 |
# Main execution
|
| 885 |
if __name__ == "__main__":
|
| 886 |
import argparse
|
|
|
|
| 419 |
# Setup recorder configuration
|
| 420 |
recorder_config = {
|
| 421 |
'spinner': False,
|
| 422 |
+
'use_microphone': False, # Explicitly set to False - we'll feed audio via FastRTC
|
| 423 |
'model': FINAL_TRANSCRIPTION_MODEL,
|
| 424 |
'language': TRANSCRIPTION_LANGUAGE,
|
| 425 |
'silero_sensitivity': SILERO_SENSITIVITY,
|
|
|
|
| 558 |
embedding = self.audio_processor.extract_embedding_from_buffer()
|
| 559 |
if embedding is not None:
|
| 560 |
self.speaker_detector.add_embedding(embedding)
|
| 561 |
+
|
| 562 |
+
# Feed audio to the RealtimeSTT recorder
|
| 563 |
+
if self.recorder:
|
| 564 |
+
# Convert float32 audio to int16 bytes format for RealtimeSTT
|
| 565 |
+
audio_bytes = (audio_data * 32768.0).astype(np.int16).tobytes()
|
| 566 |
+
self.recorder.feed_audio(audio_bytes)
|
| 567 |
|
| 568 |
except Exception as e:
|
| 569 |
logger.error(f"Error processing audio chunk: {e}")
|
|
|
|
| 636 |
|
| 637 |
# Global instances
|
| 638 |
diarization_system = RealtimeSpeakerDiarization()
|
| 639 |
+
|
| 640 |
+
# FastAPI setup for FastRTC integration
|
| 641 |
+
app = FastAPI()
|
| 642 |
+
|
| 643 |
+
# Initialize an empty handler (will be set properly in initialize_system function)
|
| 644 |
+
audio_handler = DiarizationHandler(diarization_system)
|
| 645 |
+
|
| 646 |
+
# Create FastRTC stream
|
| 647 |
+
stream = Stream(handler=audio_handler)
|
| 648 |
+
|
| 649 |
+
# Include FastRTC router in FastAPI app
|
| 650 |
+
app.include_router(stream.router, prefix="/stream")
|
| 651 |
|
| 652 |
def initialize_system():
|
| 653 |
"""Initialize the diarization system"""
|
|
|
|
| 656 |
success = diarization_system.initialize_models()
|
| 657 |
if success:
|
| 658 |
audio_handler = DiarizationHandler(diarization_system)
|
| 659 |
+
# Update the stream's handler
|
| 660 |
+
stream.handler = audio_handler
|
| 661 |
return "✅ System initialized successfully!"
|
| 662 |
else:
|
| 663 |
return "❌ Failed to initialize system. Check logs for details."
|
|
|
|
| 665 |
logger.error(f"Initialization error: {e}")
|
| 666 |
return f"❌ Initialization error: {str(e)}"
|
| 667 |
|
| 668 |
+
# Add startup event to initialize the system
|
| 669 |
+
@app.on_event("startup")
|
| 670 |
+
async def startup_event():
|
| 671 |
+
logger.info("Initializing diarization system on startup...")
|
| 672 |
+
result = initialize_system()
|
| 673 |
+
logger.info(f"Initialization result: {result}")
|
| 674 |
+
|
| 675 |
def start_recording():
|
| 676 |
"""Start recording and transcription"""
|
| 677 |
try:
|
|
|
|
| 857 |
return interface
|
| 858 |
|
| 859 |
|
|
|
|
|
|
|
|
|
|
| 860 |
@app.get("/")
|
| 861 |
async def root():
|
| 862 |
return {"message": "Real-time Speaker Diarization API"}
|
|
|
|
| 898 |
result = update_settings(threshold, max_speakers)
|
| 899 |
return {"result": result}
|
| 900 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 901 |
# Main execution
|
| 902 |
if __name__ == "__main__":
|
| 903 |
import argparse
|