File size: 4,335 Bytes
bebc6f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a448f8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import modal

app = modal.App("deepface-agent")

# Container Image
image = (
    modal.Image.debian_slim()
    .apt_install("libgl1", "libglib2.0-0")
    .pip_install(
        "deepface",
        "opencv-python",
        "numpy",
        "Pillow",
        "tensorflow==2.19.0",
        "tf-keras>=2.19.0",
        "librosa",
        "scipy",
        "speechbrain",
        "torchaudio",
    )
)

# ✅ This block runs *inside* the Modal container only
# To void repeatedly loading the model on each function call
with image.imports():
    from speechbrain.pretrained import SpeakerRecognition

    # Model for voice recognition
    verification = SpeakerRecognition.from_hparams(
        source="speechbrain/spkrec-ecapa-voxceleb",
        savedir="pretrained_models/spkrec-ecapa-voxceleb",
    )


@app.function(image=image, gpu="any")
def verify_faces_remote(img1_bytes, img2_bytes):
    """
    Accepts images bytes and compare them for a match.
    """
    from deepface import DeepFace
    from PIL import Image
    from io import BytesIO
    import numpy as np

    img1 = np.array(Image.open(BytesIO(img1_bytes)))
    img2 = np.array(Image.open(BytesIO(img2_bytes)))

    result = DeepFace.verify(img1, img2)
    return result


@app.function(image=image, gpu="any")
def verify_voices_remote(audio1_bytes, audio2_bytes):
    """
    Accepts audio bytes and compare them for a match.
    """

    import tempfile
    import pathlib

    with (
        tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f1,
        tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f2,
    ):
        f1.write(audio1_bytes)
        f2.write(audio2_bytes)
        audio1_path = f1.name
        audio2_path = f2.name

    try:
        score, prediction = verification.verify_files(audio1_path, audio2_path)

        return {"match": prediction, "similarity": float(score), "threshold": 0.75}
    except Exception as e:
        return {"error": str(e)}

    finally:
        pathlib.Path(audio1_path).unlink()
        pathlib.Path(audio2_path).unlink()


@app.function(image=image, gpu="any", timeout=600)
def verify_faces_in_video_remote(video_bytes: bytes, ref_img_bytes: bytes, interval: int = 30):
    import cv2
    import tempfile
    import os
    import numpy as np
    from PIL import Image
    from deepface import DeepFace
    from io import BytesIO

    results = []
    frame_paths = []

    with (
        tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as video_temp,
        tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as ref_img_temp,
    ):
        video_temp.write(video_bytes)
        ref_img_temp.write(ref_img_bytes)
        video_path = video_temp.name
        ref_img_path = ref_img_temp.name

    try:
        # --- FRAME EXTRACTION ---
        cap = cv2.VideoCapture(video_path)
        frame_count = 0
        temp_dir = tempfile.gettempdir()
        frame_paths = []

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            if frame_count % interval == 0:
                frame_path = os.path.join(temp_dir, f"frame_{frame_count}.jpg")
                success = cv2.imwrite(frame_path, frame)
                if success:
                    frame_paths.append((frame_count, frame_path))

            frame_count += 1

        cap.release()

        # --- FACE VERIFICATION ---
        for frame_id, frame_path in frame_paths:
            try:
                result = DeepFace.verify(
                    img1_path=ref_img_path,
                    img2_path=frame_path,
                    enforce_detection=False,
                )
                results.append({
                    "frame": frame_id,
                    "distance": round(result["distance"], 4),
                    "verified": result["verified"]
                })
            except Exception as e:
                results.append({
                    "frame": frame_id,
                    "error": str(e)
                })

        return results

    except Exception as e:
        return [{"error": str(e)}]

    finally:
        os.remove(video_path)
        os.remove(ref_img_path)
        for _, frame_path in frame_paths:
            try:
                os.remove(frame_path)
            except Exception:
                pass