Spaces:

cc1234
/

stashface

Running

App Files Files Community

cc1234 commited on Apr 20

Commit

244b0b6

1 Parent(s): 0dc8e91

init

Browse files

Files changed (22) hide show

.deepface/weights/arcface_weights.h5 +3 -0
.deepface/weights/facenet512_weights.h5 +3 -0
.deepface/weights/yolov8n-face.pt +3 -0
.gitattributes +13 -0
.gitignore +4 -0
README.md +2 -2
app.py +25 -0
data/face_arc.voy +3 -0
data/face_facenet.voy +3 -0
data/faces.json +0 -0
data/persons.zip +3 -0
models/__init__.py +1 -0
models/data_manager.py +105 -0
models/face_recognition.py +102 -0
models/image_processor.py +140 -0
requirements.txt +127 -0
tests/__init__.py +3 -0
tests/test_vtt_parser.py +85 -0
utils/__init__.py +1 -0
utils/vtt_parser.py +44 -0
web/__init__.py +1 -0
web/interface.py +174 -0

.deepface/weights/arcface_weights.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6336979c0c602cae08d1122a66f4dfb862d059bbcd8ef80306aef2b2249b0c93
+size 137026640

.deepface/weights/facenet512_weights.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f76b5117a9ca574d536af8199e6720089eb4ad3dc7e93534496d88265de864f
+size 94955648

.deepface/weights/yolov8n-face.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d545bf1add5aa736a4febac4f4f9245a6d596cd0fe70d5d57989fe0cb9e626ca
+size 6389512

.gitattributes CHANGED Viewed

@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.db filter=lfs diff=lfs merge=lfs -text
+face.json filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/yolov8n-face.pt filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/face_recognition_sface_2021dec.onnx filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/centerface.onnx filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/deploy.prototxt filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/facenet512_weights.h5 filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/retinaface.h5 filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/face_detection_yunet_2023mar.onnx filter=lfs diff=lfs merge=lfs -text
+.deepface/weights/arcface_weights.h5 filter=lfs diff=lfs merge=lfs -text
+face_arc.voy filter=lfs diff=lfs merge=lfs -text
+face_facenet.voy filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+venv
+flagged
+temp.jpg
+__pycache__

README.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 title: Stashface
-emoji: 💻
-colorFrom: yellow
 colorTo: purple
 sdk: gradio
 sdk_version: 5.25.2

 ---
 title: Stashface
+emoji: 👀
+colorFrom: indigo
 colorTo: purple
 sdk: gradio
 sdk_version: 5.25.2

app.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+# Set DeepFace home directory
+os.environ["DEEPFACE_HOME"] = "."
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+from models.data_manager import DataManager
+from web.interface import WebInterface
+def main():
+    """Main entry point for the application"""
+    # Initialize data manager
+    data_manager = DataManager(
+        faces_path="data/faces.json",
+        performers_zip="data/persons.zip",
+        facenet_index_path="data/face_facenet.voy",
+        arc_index_path="data/face_arc.voy"
+    )
+    # Initialize and launch web interface
+    web_interface = WebInterface(data_manager, default_threshold=0.5)
+    web_interface.launch(server_name="0.0.0.0", server_port=7860, share=False)
+if __name__ == "__main__":
+    main()

data/face_arc.voy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1913fae047f492d1b1da7100cb7835d12de5fd527b2f47a1d77996641abec8aa
+size 56782035

data/face_facenet.voy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d722e23bfeb24a661ae1372b7648b61378c2c11870a46e57bdf06f8fe7fee969
+size 56781931

data/faces.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/persons.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9eb10173cddf6a4bd0339218eb2cd9b3621e29d7d111dbcb41b8062ee43e8ed
+size 5031776

models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # models package

models/data_manager.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import json
+import pyzipper
+from typing import Dict, Any, Optional
+from voyager import Index, Space, StorageDataType
+class DataManager:
+    def __init__(self, faces_path: str = "data/faces.json",
+                 performers_zip: str = "data/persons.zip",
+                 facenet_index_path: str = "data/face_facenet.voy",
+                 arc_index_path: str = "data/face_arc.voy"):
+        """
+        Initialize the data manager.
+        Parameters:
+        faces_path: Path to the faces.json file
+        performers_zip: Path to the performers zip file
+        facenet_index_path: Path to the facenet index file
+        arc_index_path: Path to the arc index file
+        """
+        self.faces_path = faces_path
+        self.performers_zip = performers_zip
+        self.facenet_index_path = facenet_index_path
+        self.arc_index_path = arc_index_path
+        # Initialize indices
+        self.index_arc = Index(Space.Cosine, num_dimensions=512, storage_data_type=StorageDataType.E4M3)
+        self.index_facenet = Index(Space.Cosine, num_dimensions=512, storage_data_type=StorageDataType.E4M3)
+        # Load data
+        self.faces = {}
+        self.performer_db = {}
+        self.load_data()
+    def load_data(self):
+        """Load all data from files"""
+        self._load_faces()
+        self._load_performer_db()
+        self._load_indices()
+    def _load_faces(self):
+        """Load faces from JSON file"""
+        try:
+            with open(self.faces_path, 'r') as f:
+                self.faces = json.load(f)
+        except Exception as e:
+            print(f"Error loading faces: {e}")
+            self.faces = {}
+    def _load_performer_db(self):
+        """Load performer database from encrypted zip file"""
+        try:
+            with pyzipper.AESZipFile(self.performers_zip) as zf:
+                password = os.getenv("VISAGE_KEY", "").encode('ascii')
+                zf.setpassword(password)
+                self.performer_db = json.loads(zf.read('performers.json'))
+        except Exception as e:
+            print(f"Error loading performer database: {e}")
+            self.performer_db = {}
+    def _load_indices(self):
+        """Load face recognition indices"""
+        try:
+            with open(self.arc_index_path, 'rb') as f:
+                self.index_arc = self.index_arc.load(f)
+            with open(self.facenet_index_path, 'rb') as f:
+                self.index_facenet = self.index_facenet.load(f)
+        except Exception as e:
+            print(f"Error loading indices: {e}")
+    def get_performer_info(self, stash_id: str, confidence: float) -> Optional[Dict[str, Any]]:
+        """
+        Get performer information from the database
+        Parameters:
+        stash_id: Stash ID of the performer
+        confidence: Confidence score (0-1)
+        Returns:
+        Dictionary with performer information or None if not found
+        """
+        performer = self.performer_db.get(stash_id, [])
+        if not performer:
+            return None
+        confidence_int = int(confidence * 100)
+        return {
+            'id': stash_id,
+            "name": performer['name'],
+            "confidence": confidence_int,
+            'image': performer['image'],
+            'country': performer['country'],
+            'hits': 1,
+            'distance': confidence_int,
+            'performer_url': f"https://stashdb.org/performers/{stash_id}"
+        }
+    def query_facenet_index(self, embedding, limit):
+        """Query the facenet index with an embedding"""
+        return self.index_facenet.query(embedding, limit)
+    def query_arc_index(self, embedding, limit):
+        """Query the arc index with an embedding"""
+        return self.index_arc.query(embedding, limit)

models/face_recognition.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import numpy as np
+from typing import Dict, List, Tuple
+from deepface import DeepFace
+class EnsembleFaceRecognition:
+    def __init__(self, model_weights: Dict[str, float] = None):
+        """
+        Initialize ensemble face recognition system.
+        Parameters:
+        model_weights: Dictionary mapping model names to their weights
+                        If None, all models are weighted equally
+        """
+        self.model_weights = model_weights or {}
+        self.boost_factor = 1.8
+    def normalize_distances(self, distances: np.ndarray) -> np.ndarray:
+        """Normalize distances to [0,1] range within each model's predictions"""
+        min_dist = np.min(distances)
+        max_dist = np.max(distances)
+        if max_dist == min_dist:
+            return np.zeros_like(distances)
+        return (distances - min_dist) / (max_dist - min_dist)
+    def compute_model_confidence(self,
+                                distances: np.ndarray,
+                                temperature: float = 0.1) -> np.ndarray:
+        """Convert distances to confidence scores for a single model"""
+        normalized_distances = self.normalize_distances(distances)
+        exp_distances = np.exp(-normalized_distances / temperature)
+        return exp_distances / np.sum(exp_distances)
+    def get_face_embeddings(self, image: np.ndarray) -> Dict[str, np.ndarray]:
+        """Get face embeddings for each model"""
+        return {
+            'facenet': DeepFace.represent(img_path=image, detector_backend='skip', model_name='Facenet512', normalization='Facenet2018',align=True)[0]['embedding'],
+            'arc': DeepFace.represent(img_path=image, detector_backend='skip', model_name='ArcFace',align=True)[0]['embedding']}
+    def ensemble_prediction(self,
+                            model_predictions: Dict[str, Tuple[List[str], List[float]]],
+                            temperature: float = 0.1,
+                            min_agreement: float = 0.5) -> List[Tuple[str, float]]:
+        """
+        Combine predictions from multiple models.
+        Parameters:
+        model_predictions: Dictionary mapping model names to their (distances, names) predictions
+        temperature: Temperature parameter for softmax scaling
+        min_agreement: Minimum agreement threshold between models
+        Returns:
+        final_predictions: List of (name, confidence) tuples
+        """
+        # Initialize vote counting
+        vote_dict = {}
+        confidence_dict = {}
+        # Process each model's predictions
+        for model_name, (names, distances) in model_predictions.items():
+            # Get model weight (default to 1.0 if not specified)
+            model_weight = self.model_weights.get(model_name, 1.0)
+            # Compute confidence scores for this model
+            confidences = self.compute_model_confidence(np.array(distances), temperature)
+            # Add weighted votes for top prediction
+            top_name = names[0]
+            top_confidence = confidences[0]
+            vote_dict[top_name] = vote_dict.get(top_name, 0) + model_weight
+            confidence_dict[top_name] = confidence_dict.get(top_name, [])
+            confidence_dict[top_name].append(top_confidence)
+        # Normalize votes
+        total_weight = sum(self.model_weights.values()) if self.model_weights else len(model_predictions)
+        # Compute final results with minimum agreement check
+        final_results = []
+        for name, votes in vote_dict.items():
+            normalized_votes = votes / total_weight
+            # Only include results that meet minimum agreement threshold
+            if normalized_votes >= min_agreement:
+                avg_confidence = np.mean(confidence_dict[name])
+                final_score = normalized_votes * avg_confidence * self.boost_factor
+                final_score = min(final_score, 1.0)  # Cap at 1.0
+                final_results.append((name, final_score))
+        # Sort by final score
+        final_results.sort(key=lambda x: x[1], reverse=True)
+        return final_results
+def extract_faces(image):
+    """Extract faces from an image using DeepFace"""
+    return DeepFace.extract_faces(image, detector_backend="yolov8")
+def extract_faces_mediapipe(image, enforce_detection=False, align=False):
+    """Extract faces from an image using MediaPipe backend"""
+    return DeepFace.extract_faces(image, detector_backend="mediapipe",
+                                 enforce_detection=enforce_detection,
+                                 align=align)

models/image_processor.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import io
+import base64
+import numpy as np
+from uuid import uuid4
+from PIL import Image as PILImage
+from typing import List, Dict, Any, Tuple
+from models.face_recognition import EnsembleFaceRecognition, extract_faces, extract_faces_mediapipe
+from models.data_manager import DataManager
+from utils.vtt_parser import parse_vtt_offsets
+def get_face_predictions(face, ensemble, data_manager, results):
+    """
+    Get predictions for a single face
+    Parameters:
+    face: Face image array
+    ensemble: EnsembleFaceRecognition instance
+    data_manager: DataManager instance
+    results: Number of results to return
+    Returns:
+    List of (name, confidence) tuples
+    """
+    # Get embeddings for original and flipped images
+    embeddings_orig = ensemble.get_face_embeddings(face)
+    embeddings_flip = ensemble.get_face_embeddings(np.fliplr(face))
+    # Average the embeddings
+    facenet = np.mean([embeddings_orig['facenet'], embeddings_flip['facenet']], axis=0)
+    arc = np.mean([embeddings_orig['arc'], embeddings_flip['arc']], axis=0)
+    # Get predictions from both models
+    model_predictions = {
+        'facenet': data_manager.query_facenet_index(facenet, max(results, 50)),
+        'arc': data_manager.query_arc_index(arc, max(results, 50)),
+    }
+    return ensemble.ensemble_prediction(model_predictions)
+def image_search_performer(image, data_manager, threshold=0.5, results=3):
+    """
+    Search for a performer in an image
+    Parameters:
+    image: PIL Image object
+    data_manager: DataManager instance
+    threshold: Confidence threshold
+    results: Number of results to return
+    Returns:
+    List of performer information dictionaries
+    """
+    image_array = np.array(image)
+    ensemble = EnsembleFaceRecognition({"facenet": 1.0, "arc": 1.0})
+    try:
+        faces = extract_faces(image_array)
+    except ValueError:
+        raise ValueError("No faces found")
+    predictions = get_face_predictions(faces[0]['face'], ensemble, data_manager, results)
+    response = []
+    for name, confidence in predictions:
+        performer_info = data_manager.get_performer_info(data_manager.faces[name], confidence)
+        if performer_info:
+            response.append(performer_info)
+    return response
+def image_search_performers(image, data_manager, threshold=0.5, results=3):
+    """
+    Search for multiple performers in an image
+    Parameters:
+    image: PIL Image object
+    data_manager: DataManager instance
+    threshold: Confidence threshold
+    results: Number of results to return
+    Returns:
+    List of dictionaries with face image and performer information
+    """
+    image_array = np.array(image)
+    ensemble = EnsembleFaceRecognition({"facenet": 1.0, "arc": 1.0})
+    try:
+        faces = extract_faces(image_array)
+    except ValueError:
+        raise ValueError("No faces found")
+    response = []
+    for face in faces:
+        predictions = get_face_predictions(face['face'], ensemble, data_manager, results)
+        # Crop and encode face image
+        area = face['facial_area']
+        cimage = image.crop((area['x'], area['y'], area['x'] + area['w'], area['y'] + area['h']))
+        buf = io.BytesIO()
+        cimage.save(buf, format='JPEG')
+        im_b64 = base64.b64encode(buf.getvalue()).decode('ascii')
+        # Get performer information
+        performers = []
+        for name, confidence in predictions:
+            performer_info = data_manager.get_performer_info(data_manager.faces[name], confidence)
+            if performer_info:
+                performers.append(performer_info)
+        response.append({
+            'image': im_b64,
+            'confidence': face['confidence'],
+            'performers': performers
+        })
+    return response
+def find_faces_in_sprite(image, vtt_data):
+    """
+    Find faces in a sprite image using VTT data
+    Parameters:
+    image: PIL Image object
+    vtt_data: Base64 encoded VTT data
+    Returns:
+    List of dictionaries with face information
+    """
+    vtt = base64.b64decode(vtt_data.replace("data:text/vtt;base64,", ""))
+    sprite = PILImage.fromarray(image)
+    results = []
+    for i, (left, top, right, bottom, time_seconds) in enumerate(parse_vtt_offsets(vtt)):
+        cut_frame = sprite.crop((left, top, left + right, top + bottom))
+        faces = extract_faces_mediapipe(np.asarray(cut_frame), enforce_detection=False, align=False)
+        faces = [face for face in faces if face['confidence'] > 0.6]
+        if faces:
+            size = faces[0]['facial_area']['w'] * faces[0]['facial_area']['h']
+            data = {'id': str(uuid4()), "offset": (left, top, right, bottom), "frame": i, "time": time_seconds, 'size': size}
+            results.append(data)
+    return results

requirements.txt ADDED Viewed

	@@ -0,0 +1,127 @@

+absl-py==2.2.2
+aiofiles==24.1.0
+annotated-types==0.7.0
+anyio==4.9.0
+astunparse==1.6.3
+beautifulsoup4==4.13.4
+blinker==1.9.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+contourpy==1.3.2
+cycler==0.12.1
+deepface @ git+https://github.com/serengil/deepface.git@cc484b54be5188eb47faf132995af16a871d70b9
+fastapi==0.115.12
+ffmpy==0.5.0
+filelock==3.18.0
+fire==0.7.0
+flask==3.1.0
+flask-cors==5.0.1
+flatbuffers==25.2.10
+fonttools==4.57.0
+fsspec==2025.3.2
+gast==0.6.0
+gdown==5.2.0
+google-pasta==0.2.0
+gradio==5.25.2
+gradio-client==1.8.0
+groovy==0.1.2
+grpcio==1.71.0
+gunicorn==23.0.0
+h11==0.14.0
+h5py==3.13.0
+httpcore==1.0.8
+httpx==0.28.1
+huggingface-hub==0.30.2
+idna==3.10
+itsdangerous==2.2.0
+jinja2==3.1.6
+joblib==1.4.2
+keras==3.9.2
+kiwisolver==1.4.8
+libclang==18.1.1
+lz4==4.4.4
+markdown==3.8
+markdown-it-py==3.0.0
+markupsafe==3.0.2
+matplotlib==3.10.1
+mdurl==0.1.2
+ml-dtypes==0.5.1
+mpmath==1.3.0
+mtcnn==1.0.0
+namex==0.0.8
+networkx==3.4.2
+numpy==2.1.3
+nvidia-cublas-cu12==12.4.5.8
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.2.1.3
+nvidia-curand-cu12==10.3.5.147
+nvidia-cusolver-cu12==11.6.1.9
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nccl-cu12==2.21.5
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvtx-cu12==12.4.127
+opencv-python==4.11.0.86
+opt-einsum==3.4.0
+optree==0.15.0
+orjson==3.10.16
+packaging==25.0
+pandas==2.2.3
+pillow==11.2.1
+protobuf==5.29.4
+psutil==7.0.0
+py-cpuinfo==9.0.0
+pycryptodomex==3.22.0
+pydantic==2.11.3
+pydantic-core==2.33.1
+pydub==0.25.1
+pygments==2.19.1
+pyparsing==3.2.3
+pysocks==1.7.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.2
+pyyaml==6.0.2
+pyzipper==0.3.6
+requests==2.32.3
+retina-face==0.0.17
+rich==14.0.0
+ruff==0.11.6
+safehttpx==0.1.6
+scipy==1.15.2
+seaborn==0.13.2
+semantic-version==2.10.0
+setuptools==78.1.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.6
+starlette==0.46.2
+sympy==1.13.1
+tensorboard==2.19.0
+tensorboard-data-server==0.7.2
+tensorflow==2.19.0
+termcolor==3.0.1
+tf-keras==2.19.0
+tomlkit==0.13.2
+torch==2.6.0
+torchvision==0.21.0
+tqdm==4.67.1
+triton==3.2.0
+typer==0.15.2
+typing-extensions==4.13.2
+typing-inspection==0.4.0
+tzdata==2025.2
+ultralytics==8.3.69
+ultralytics-thop==2.0.14
+urllib3==2.4.0
+uvicorn==0.34.2
+voyager==2.1.0
+websockets==15.0.1
+werkzeug==3.1.3
+wheel==0.45.1
+wrapt==1.17.2

tests/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Test package initialization
+"""

tests/test_vtt_parser.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import pytest
+from utils.vtt_parser import parse_vtt_offsets
+def test_parse_simple_vtt():
+    """Test parsing a simple VTT file with one timestamp and coordinates"""
+    vtt_content = """WEBVTT
+00:00:05.000 --> 00:00:10.000
+xywh=100,200,300,400
+"""
+    result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
+    assert len(result) == 1
+    left, top, right, bottom, time = result[0]
+    assert left == 100
+    assert top == 200
+    assert right == 300
+    assert bottom == 400
+    assert time == 5.0
+def test_parse_multiple_entries():
+    """Test parsing multiple timestamps and coordinates"""
+    vtt_content = """WEBVTT
+00:00:05.000 --> 00:00:10.000
+xywh=100,200,300,400
+00:01:30.500 --> 00:01:35.000
+xywh=150,250,350,450
+"""
+    result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
+    assert len(result) == 2
+    # First entry
+    left, top, right, bottom, time = result[0]
+    assert (left, top, right, bottom) == (100, 200, 300, 400)
+    assert time == 5.0
+    # Second entry
+    left, top, right, bottom, time = result[1]
+    assert (left, top, right, bottom) == (150, 250, 350, 450)
+    assert time == 90.5  # 1 minute 30.5 seconds
+def test_parse_empty_vtt():
+    """Test parsing an empty VTT file"""
+    vtt_content = "WEBVTT\n"
+    result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
+    assert len(result) == 0
+def test_parse_invalid_format():
+    """Test parsing VTT with invalid format should not yield results"""
+    vtt_content = """WEBVTT
+00:00:05.000 --> 00:00:10.000
+invalid_line
+"""
+    result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
+    assert len(result) == 0
+def test_parse_hour_timestamp():
+    """Test parsing timestamp with hours"""
+    vtt_content = """WEBVTT
+01:30:05.000 --> 01:30:10.000
+xywh=100,200,300,400
+"""
+    result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
+    assert len(result) == 1
+    left, top, right, bottom, time = result[0]
+    assert time == 5405.0  # 1 hour + 30 minutes + 5 seconds
+def test_parse_missing_coordinates():
+    """Test that entries without coordinates are skipped"""
+    vtt_content = """WEBVTT
+00:00:05.000 --> 00:00:10.000
+Some text content
+00:00:10.000 --> 00:00:15.000
+xywh=100,200,300,400
+"""
+    result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
+    assert len(result) == 1
+    left, top, right, bottom, time = result[0]
+    assert time == 10.0
+    assert (left, top, right, bottom) == (100, 200, 300, 400)

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # utils package

utils/vtt_parser.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import List, Tuple, Generator
+def parse_vtt_offsets(vtt_content: bytes) -> Generator[Tuple[int, int, int, int, float], None, None]:
+    """
+    Parse VTT file content and extract offsets and timestamps.
+    Parameters:
+    vtt_content: Raw VTT file content as bytes
+    Returns:
+    Generator yielding tuples of (left, top, right, bottom, time_seconds)
+    """
+    time_seconds = 0
+    left = top = right = bottom = None
+    for line in vtt_content.decode("utf-8").split("\n"):
+        line = line.strip()
+        if "-->" in line:
+            # grab the start time
+            # 00:00:00.000 --> 00:00:41.000
+            start = line.split("-->")[0].strip().split(":")
+            # convert to seconds
+            time_seconds = (
+                int(start[0]) * 3600
+                + int(start[1]) * 60
+                + float(start[2])
+            )
+            left = top = right = bottom = None
+        elif "xywh=" in line:
+            left, top, right, bottom = line.split("xywh=")[-1].split(",")
+            left, top, right, bottom = (
+                int(left),
+                int(top),
+                int(right),
+                int(bottom),
+            )
+        else:
+            continue
+        if not left:
+            continue
+        yield left, top, right, bottom, time_seconds

web/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # web package

web/interface.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import gradio as gr
+from typing import Dict, Any
+from models.data_manager import DataManager
+from models.image_processor import (
+    image_search_performer,
+    image_search_performers,
+    find_faces_in_sprite
+)
+class WebInterface:
+    def __init__(self, data_manager: DataManager, default_threshold: float = 0.5):
+        """
+        Initialize the web interface.
+        Parameters:
+        data_manager: DataManager instance
+        default_threshold: Default confidence threshold
+        """
+        self.data_manager = data_manager
+        self.default_threshold = default_threshold
+    def image_search(self, img, threshold, results):
+        """Wrapper for the image search function"""
+        return image_search_performer(img, self.data_manager, threshold, results)
+    def multiple_image_search(self, img, threshold, results):
+        """Wrapper for the multiple image search function"""
+        return image_search_performers(img, self.data_manager, threshold, results)
+    def vector_search(self, vector_json, threshold, results):
+        """Wrapper for the vector search function (deprecated)"""
+        return {'status': 'not implemented'}
+    def _create_image_search_interface(self):
+        """Create the single face search interface"""
+        with gr.Blocks() as interface:
+            gr.Markdown("# Who is in the photo?")
+            gr.Markdown("Upload an image of a person and we'll tell you who it is.")
+            with gr.Row():
+                with gr.Column():
+                    img_input = gr.Image()
+                    threshold = gr.Slider(
+                        label="threshold",
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=self.default_threshold
+                    )
+                    results_count = gr.Slider(
+                        label="results",
+                        minimum=0,
+                        maximum=50,
+                        value=3,
+                        step=1
+                    )
+                    search_btn = gr.Button("Search")
+                with gr.Column():
+                    output = gr.JSON(label="Results")
+            search_btn.click(
+                fn=self.image_search,
+                inputs=[img_input, threshold, results_count],
+                outputs=output
+            )
+        return interface
+    def _create_multiple_image_search_interface(self):
+        """Create the multiple face search interface"""
+        with gr.Blocks() as interface:
+            gr.Markdown("# Who is in the photo?")
+            gr.Markdown("Upload an image of a person(s) and we'll tell you who it is.")
+            with gr.Row():
+                with gr.Column():
+                    img_input = gr.Image(type="pil")
+                    threshold = gr.Slider(
+                        label="threshold",
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=self.default_threshold
+                    )
+                    results_count = gr.Slider(
+                        label="results",
+                        minimum=0,
+                        maximum=50,
+                        value=3,
+                        step=1
+                    )
+                    search_btn = gr.Button("Search")
+                with gr.Column():
+                    output = gr.JSON(label="Results")
+            search_btn.click(
+                fn=self.multiple_image_search,
+                inputs=[img_input, threshold, results_count],
+                outputs=output
+            )
+        return interface
+    def _create_vector_search_interface(self):
+        """Create the vector search interface (deprecated)"""
+        with gr.Blocks() as interface:
+            gr.Markdown("# Vector Search (deprecated)")
+            with gr.Row():
+                with gr.Column():
+                    vector_input = gr.Textbox()
+                    threshold = gr.Slider(
+                        label="threshold",
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=self.default_threshold
+                    )
+                    results_count = gr.Slider(
+                        label="results",
+                        minimum=0,
+                        maximum=50,
+                        value=3,
+                        step=1
+                    )
+                    search_btn = gr.Button("Search")
+                with gr.Column():
+                    output = gr.JSON(label="Results")
+            search_btn.click(
+                fn=self.vector_search,
+                inputs=[vector_input, threshold, results_count],
+                outputs=output
+            )
+        return interface
+    def _create_faces_in_sprite_interface(self):
+        """Create the faces in sprite interface"""
+        with gr.Blocks() as interface:
+            gr.Markdown("# Find Faces in Sprite")
+            with gr.Row():
+                with gr.Column():
+                    img_input = gr.Image()
+                    vtt_input = gr.Textbox(label="VTT file")
+                    search_btn = gr.Button("Process")
+                with gr.Column():
+                    output = gr.JSON(label="Results")
+            search_btn.click(
+                fn=find_faces_in_sprite,
+                inputs=[img_input, vtt_input],
+                outputs=output
+            )
+        return interface
+    def launch(self, server_name="0.0.0.0", server_port=7860, share=True):
+        """Launch the web interface"""
+        with gr.Blocks() as demo:
+            with gr.Tabs() as tabs:
+                with gr.TabItem("Single Face Search"):
+                    self._create_image_search_interface()
+                with gr.TabItem("Multiple Face Search"):
+                    self._create_multiple_image_search_interface()
+                with gr.TabItem("Vector Search"):
+                    self._create_vector_search_interface()
+                with gr.TabItem("Faces in Sprite"):
+                    self._create_faces_in_sprite_interface()
+        demo.queue().launch(share=share, ssr_mode=False)