File size: 4,335 Bytes
bebc6f9 a448f8b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import modal
app = modal.App("deepface-agent")
# Container Image
image = (
modal.Image.debian_slim()
.apt_install("libgl1", "libglib2.0-0")
.pip_install(
"deepface",
"opencv-python",
"numpy",
"Pillow",
"tensorflow==2.19.0",
"tf-keras>=2.19.0",
"librosa",
"scipy",
"speechbrain",
"torchaudio",
)
)
# ✅ This block runs *inside* the Modal container only
# To void repeatedly loading the model on each function call
with image.imports():
from speechbrain.pretrained import SpeakerRecognition
# Model for voice recognition
verification = SpeakerRecognition.from_hparams(
source="speechbrain/spkrec-ecapa-voxceleb",
savedir="pretrained_models/spkrec-ecapa-voxceleb",
)
@app.function(image=image, gpu="any")
def verify_faces_remote(img1_bytes, img2_bytes):
"""
Accepts images bytes and compare them for a match.
"""
from deepface import DeepFace
from PIL import Image
from io import BytesIO
import numpy as np
img1 = np.array(Image.open(BytesIO(img1_bytes)))
img2 = np.array(Image.open(BytesIO(img2_bytes)))
result = DeepFace.verify(img1, img2)
return result
@app.function(image=image, gpu="any")
def verify_voices_remote(audio1_bytes, audio2_bytes):
"""
Accepts audio bytes and compare them for a match.
"""
import tempfile
import pathlib
with (
tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f1,
tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f2,
):
f1.write(audio1_bytes)
f2.write(audio2_bytes)
audio1_path = f1.name
audio2_path = f2.name
try:
score, prediction = verification.verify_files(audio1_path, audio2_path)
return {"match": prediction, "similarity": float(score), "threshold": 0.75}
except Exception as e:
return {"error": str(e)}
finally:
pathlib.Path(audio1_path).unlink()
pathlib.Path(audio2_path).unlink()
@app.function(image=image, gpu="any", timeout=600)
def verify_faces_in_video_remote(video_bytes: bytes, ref_img_bytes: bytes, interval: int = 30):
import cv2
import tempfile
import os
import numpy as np
from PIL import Image
from deepface import DeepFace
from io import BytesIO
results = []
frame_paths = []
with (
tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as video_temp,
tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as ref_img_temp,
):
video_temp.write(video_bytes)
ref_img_temp.write(ref_img_bytes)
video_path = video_temp.name
ref_img_path = ref_img_temp.name
try:
# --- FRAME EXTRACTION ---
cap = cv2.VideoCapture(video_path)
frame_count = 0
temp_dir = tempfile.gettempdir()
frame_paths = []
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
if frame_count % interval == 0:
frame_path = os.path.join(temp_dir, f"frame_{frame_count}.jpg")
success = cv2.imwrite(frame_path, frame)
if success:
frame_paths.append((frame_count, frame_path))
frame_count += 1
cap.release()
# --- FACE VERIFICATION ---
for frame_id, frame_path in frame_paths:
try:
result = DeepFace.verify(
img1_path=ref_img_path,
img2_path=frame_path,
enforce_detection=False,
)
results.append({
"frame": frame_id,
"distance": round(result["distance"], 4),
"verified": result["verified"]
})
except Exception as e:
results.append({
"frame": frame_id,
"error": str(e)
})
return results
except Exception as e:
return [{"error": str(e)}]
finally:
os.remove(video_path)
os.remove(ref_img_path)
for _, frame_path in frame_paths:
try:
os.remove(frame_path)
except Exception:
pass
|