Abraham E. Tavarez commited on
Commit
bebc6f9
Β·
1 Parent(s): bd55878

verify voice offloaded to Modal

Browse files
Files changed (2) hide show
  1. app.py +22 -14
  2. modal_app/modal_app.py +79 -0
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import gradio as gr
2
- from detector.face import verify_faces, analyze_face
3
  from detector.voice import verify_voices
4
  from detector.video import verify_faces_in_video
5
  from reports.pdf_report import generate_pdf_report
6
  from utils.youtube_utils import download_youtube_video
7
  import modal
8
  verify_faces_remote = modal.Function.lookup("deepface-agent", "verify_faces_remote")
 
 
9
 
10
 
11
  # Holds latest results
@@ -13,7 +14,6 @@ last_face_result = None
13
  last_voice_result = None
14
  last_video_results = None
15
 
16
- # @app.local_entrypoint()
17
  def compare_faces(img1_path: str, img2_path: str) -> str:
18
  """Use this tool to compare to faces for a match
19
  Args:
@@ -51,20 +51,28 @@ def compare_voices(audio1: str, audio2: str) -> str:
51
  audio2: The path to the second audio file
52
  """
53
  global last_voice_result
54
- result = verify_voices(audio1, audio2)
55
- result_text = ""
 
 
 
 
 
 
56
 
57
- if "error" in result:
58
- return f"❌ Error: {result['error']}"
59
 
60
- if result["match"]:
61
- result_text = f"βœ… Same speaker detected. Similarity: {result['similarity']} (Threshold: {result['threshold']})"
62
- last_voice_result = result_text
63
- return result_text
64
- else:
65
- result_text = f"❌ Different speakers. Similarity: {result['similarity']} (Threshold: {result['threshold']})"
66
- last_voice_result = result_text
67
- return result_text
 
 
68
 
69
 
70
  def scan_video(video_file: str, ref_img: str, youtube_url="") -> str:
 
1
  import gradio as gr
 
2
  from detector.voice import verify_voices
3
  from detector.video import verify_faces_in_video
4
  from reports.pdf_report import generate_pdf_report
5
  from utils.youtube_utils import download_youtube_video
6
  import modal
7
  verify_faces_remote = modal.Function.lookup("deepface-agent", "verify_faces_remote")
8
+ verify_voices_remote = modal.Function.lookup("deepface-agent", "verify_voices_remote")
9
+
10
 
11
 
12
  # Holds latest results
 
14
  last_voice_result = None
15
  last_video_results = None
16
 
 
17
  def compare_faces(img1_path: str, img2_path: str) -> str:
18
  """Use this tool to compare to faces for a match
19
  Args:
 
51
  audio2: The path to the second audio file
52
  """
53
  global last_voice_result
54
+
55
+ try:
56
+ with open(audio1, "rb") as a1, open(audio2, "rb") as a2:
57
+ audio1_bytes = a1.read()
58
+ audio2_bytes = a2.read()
59
+
60
+ result = verify_voices_remote.remote(audio1_bytes, audio2_bytes)
61
+ result_text = ""
62
 
63
+ if "error" in result:
64
+ return f"❌ Error: {result['error']}"
65
 
66
+ if result["match"]:
67
+ result_text = f"βœ… Same speaker detected. Similarity: {result['similarity']} (Threshold: {result['threshold']})"
68
+ last_voice_result = result_text
69
+ return result_text
70
+ else:
71
+ result_text = f"❌ Different speakers. Similarity: {result['similarity']} (Threshold: {result['threshold']})"
72
+ last_voice_result = result_text
73
+ return result_text
74
+ except Exception as e:
75
+ return f"❌ Error reading audio files: {str(e)}"
76
 
77
 
78
  def scan_video(video_file: str, ref_img: str, youtube_url="") -> str:
modal_app/modal_app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import modal
2
+
3
+ app = modal.App("deepface-agent")
4
+
5
+ # Container Image
6
+ image = (
7
+ modal.Image.debian_slim()
8
+ .apt_install("libgl1", "libglib2.0-0")
9
+ .pip_install(
10
+ "deepface",
11
+ "opencv-python",
12
+ "numpy",
13
+ "Pillow",
14
+ "tensorflow==2.19.0",
15
+ "tf-keras>=2.19.0",
16
+ "librosa",
17
+ "scipy",
18
+ "speechbrain",
19
+ "torchaudio",
20
+ )
21
+ )
22
+
23
+ # βœ… This block runs *inside* the Modal container only
24
+ # To void repeatedly loading the model on each function call
25
+ with image.imports():
26
+ from speechbrain.pretrained import SpeakerRecognition
27
+
28
+ # Model for voice recognition
29
+ verification = SpeakerRecognition.from_hparams(
30
+ source="speechbrain/spkrec-ecapa-voxceleb",
31
+ savedir="pretrained_models/spkrec-ecapa-voxceleb",
32
+ )
33
+
34
+
35
+ @app.function(image=image, gpu="any")
36
+ def verify_faces_remote(img1_bytes, img2_bytes):
37
+ """
38
+ Accepts images bytes and compare them for a match.
39
+ """
40
+ from deepface import DeepFace
41
+ from PIL import Image
42
+ from io import BytesIO
43
+ import numpy as np
44
+
45
+ img1 = np.array(Image.open(BytesIO(img1_bytes)))
46
+ img2 = np.array(Image.open(BytesIO(img2_bytes)))
47
+
48
+ result = DeepFace.verify(img1, img2)
49
+ return result
50
+
51
+
52
+ @app.function(image=image, gpu="any")
53
+ def verify_voices_remote(audio1_bytes, audio2_bytes):
54
+ """
55
+ Accepts audio bytes and compare them for a match.
56
+ """
57
+
58
+ import tempfile
59
+ import pathlib
60
+
61
+ with (
62
+ tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f1,
63
+ tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f2,
64
+ ):
65
+ f1.write(audio1_bytes)
66
+ f2.write(audio2_bytes)
67
+ audio1_path = f1.name
68
+ audio2_path = f2.name
69
+
70
+ try:
71
+ score, prediction = verification.verify_files(audio1_path, audio2_path)
72
+
73
+ return {"match": prediction, "similarity": float(score), "threshold": 0.75}
74
+ except Exception as e:
75
+ return {"error": str(e)}
76
+
77
+ finally:
78
+ pathlib.Path(audio1_path).unlink()
79
+ pathlib.Path(audio2_path).unlink()