Spaces:
Runtime error
Runtime error
Gradio app
Browse files- .gitignore +4 -0
- README.md +14 -3
- actors_matching/__init__.py +0 -0
- actors_matching/api.py +38 -0
- app.py +58 -0
- images/example_hannibal_barca.jpg +0 -0
- images/example_joan_of_arc.jpg +0 -0
- images/example_marie_curie.jpg +0 -0
- images/example_scipio_africanus.jpg +0 -0
- models/actors_annoy_index.ann +0 -0
- models/actors_annoy_metadata.json +1 -0
- models/actors_mapping.json +0 -0
- requirements.txt +3 -1
.gitignore
CHANGED
@@ -1,9 +1,13 @@
|
|
|
|
|
|
|
|
1 |
# data files from imdb
|
2 |
data/title.*.tsv*
|
3 |
data/name.*.tsv*
|
4 |
|
5 |
# Byte-compiled / optimized / DLL files
|
6 |
__pycache__/
|
|
|
7 |
*.py[cod]
|
8 |
*$py.class
|
9 |
|
|
|
1 |
+
# IDE
|
2 |
+
.vscode
|
3 |
+
|
4 |
# data files from imdb
|
5 |
data/title.*.tsv*
|
6 |
data/name.*.tsv*
|
7 |
|
8 |
# Byte-compiled / optimized / DLL files
|
9 |
__pycache__/
|
10 |
+
*/__pycache__/
|
11 |
*.py[cod]
|
12 |
*$py.class
|
13 |
|
README.md
CHANGED
@@ -19,7 +19,18 @@ Note that due to API limits, I only took images from 1,000 actors.
|
|
19 |
|
20 |
The application is built with Gradio and deployed on HuggingFace Space. In the background, it uses:
|
21 |
|
22 |
-
1. The [`face_recognition` library](https://github.com/ageitgey/face_recognition) to compute an embedding of
|
23 |
-
2. Spotify's `annoy` library to efficiently search the closest actors based on the
|
24 |
-
3. Show you
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
|
|
19 |
|
20 |
The application is built with Gradio and deployed on HuggingFace Space. In the background, it uses:
|
21 |
|
22 |
+
1. The [`face_recognition` library](https://github.com/ageitgey/face_recognition) to extract the location of faces in the image and compute an embedding of these faces
|
23 |
+
2. Spotify's `annoy` library to efficiently search the closest actors based on the face embedding and a small database of actors' faces embeddings.
|
24 |
+
3. Show you the best matches!
|
25 |
+
|
26 |
+
This is meant to be a fun and tiny application. There are known issues and biases.
|
27 |
+
|
28 |
+
## Known biases and limitations
|
29 |
+
|
30 |
+
There are a few issues with the dataset and models used:
|
31 |
+
|
32 |
+
- The dataset of actors is limited to a couple thousands actors and actresses and it is therefore not representative of the richness of professionals out there
|
33 |
+
- The subset of actors and actresses selected is based on an aggregated metrics that considers all movies and shows in which the person was listed as an actor/actress. It is the weighted sum of the number of IMDb votes for this movie/show, weighted by the average IMDb score. This is obviously only a rough indicator of popularity but provided me with a quick way of getting a dataset with actors that people may know.
|
34 |
+
- Given the above, the database sampling will have several biases that are intrinsic to (a) the IMDb database and user base itself which is biased towards western/American movies, (b) the movie industry itself with a dominance of white male actors
|
35 |
+
- The pictures of actors and actresses was done through a simple Bing Search and not manually verified, there are several mistakes. For example, Graham Greene has a mix of pictures from Graham Greene, the canadian actor, and Graham Greene, the writer. You may get surprising results from time to time! Let me know if you find mistakes
|
36 |
|
actors_matching/__init__.py
ADDED
File without changes
|
actors_matching/api.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import face_recognition
|
2 |
+
import json
|
3 |
+
import annoy
|
4 |
+
from typing import Tuple
|
5 |
+
|
6 |
+
EMBEDDING_DIMENSION=128
|
7 |
+
ANNOY_INDEX_FILE = "models/actors_annoy_index.ann"
|
8 |
+
ANNOY_METADATA_FILE = "models/actors_annoy_metadata.json"
|
9 |
+
ANNOY_MAPPING_FILE = "models/actors_mapping.json"
|
10 |
+
|
11 |
+
def load_annoy_index(
|
12 |
+
index_file = ANNOY_INDEX_FILE,
|
13 |
+
metadata_file = ANNOY_METADATA_FILE,
|
14 |
+
mapping_file = ANNOY_MAPPING_FILE
|
15 |
+
) -> Tuple[annoy.AnnoyIndex, dict]:
|
16 |
+
"""Load annoy index and associated mapping file"""
|
17 |
+
with open(metadata_file) as f:
|
18 |
+
annoy_index_metadata = json.load(f)
|
19 |
+
|
20 |
+
annoy_index = annoy.AnnoyIndex(f=EMBEDDING_DIMENSION, **annoy_index_metadata)
|
21 |
+
annoy_index.load(index_file)
|
22 |
+
|
23 |
+
with open(mapping_file) as f:
|
24 |
+
mapping = json.load(f)
|
25 |
+
mapping = {int(k): v for k, v in mapping.items()}
|
26 |
+
return annoy_index, mapping
|
27 |
+
|
28 |
+
def analyze_image(image, annoy_index, n_matches: int = 1, num_jitters: int = 1, model: str = "large"):
|
29 |
+
"""Extract face location, embeddings, and top n_matches matches"""
|
30 |
+
face_locations = face_recognition.face_locations(image)
|
31 |
+
embeddings = face_recognition.face_encodings(image, num_jitters=num_jitters, model=model, known_face_locations=face_locations)
|
32 |
+
matches = []
|
33 |
+
distances = []
|
34 |
+
for emb in embeddings:
|
35 |
+
m, d = annoy_index.get_nns_by_vector(emb, n_matches, include_distances=True)
|
36 |
+
matches.append(m)
|
37 |
+
distances.append(d)
|
38 |
+
return [dict(embeddings=e, matches=m, distances=d, face_locations=f) for e,m,d,f in zip(embeddings, matches, distances, face_locations)]
|
app.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
from actors_matching.api import analyze_image, load_annoy_index
|
4 |
+
|
5 |
+
annoy_index, actors_mapping = load_annoy_index()
|
6 |
+
|
7 |
+
def get_image_html(actor: dict):
|
8 |
+
url = actor["url"]
|
9 |
+
name = actor["name"]
|
10 |
+
imdb_url = f"https://www.imdb.com/name/{actor['nconst']}/"
|
11 |
+
return f'''
|
12 |
+
<div style="position: relative; text-align: center; color: white;">
|
13 |
+
<img src="{url}" alt="{name} matches the input image" style="height: 500px">
|
14 |
+
<div style="padding: 0.2em; position: absolute; bottom: 16px; left: 16px; background-color: #aacccccc; font-size: 2em;">
|
15 |
+
<p>{name}</p>
|
16 |
+
<p style="font-size:0.5em"><a href={imdb_url} target="_blank">Click to see on IMDb</></p>
|
17 |
+
</div>
|
18 |
+
</div>
|
19 |
+
'''
|
20 |
+
|
21 |
+
def get_best_matches(image, n_matches: int):
|
22 |
+
return analyze_image(image, annoy_index=annoy_index, n_matches=n_matches)
|
23 |
+
|
24 |
+
def find_matching_actors(input_img, title, n_matches: int = 10):
|
25 |
+
best_matches_list = get_best_matches(input_img, n_matches=n_matches)
|
26 |
+
best_matches = best_matches_list[0] # TODO: allow looping through characters
|
27 |
+
|
28 |
+
# Show how the initial image was parsed (ie: which person is displayed)
|
29 |
+
|
30 |
+
# Build htmls to display the result
|
31 |
+
output_htmls = []
|
32 |
+
for match in best_matches["matches"]:
|
33 |
+
actor = actors_mapping[match]
|
34 |
+
output_htmls.append(get_image_html(actor))
|
35 |
+
|
36 |
+
return output_htmls
|
37 |
+
|
38 |
+
iface = gr.Interface(
|
39 |
+
find_matching_actors,
|
40 |
+
title="Which actor or actress looks like you?",
|
41 |
+
description="""Who is the best person to play a movie about you? Upload a picture and find out!
|
42 |
+
Or maybe you'd like to know who would best interpret your favorite historical character?
|
43 |
+
Give it a shot or try one of the sample images below.""",
|
44 |
+
inputs=[
|
45 |
+
gr.inputs.Image(shape=(256, 256), label="Your image"),
|
46 |
+
gr.inputs.Textbox(label="Who's that?", placeholder="Optional, you can leave this blank"),
|
47 |
+
#gr.inputs.Slider(minimum=1, maximum=10, step=1, default=5, label="Number of matches"),
|
48 |
+
],
|
49 |
+
outputs=gr.outputs.Carousel(gr.outputs.HTML(), label="Matching actors & actresses"),
|
50 |
+
examples=[
|
51 |
+
["images/example_marie_curie.jpg", "Marie Curie"],
|
52 |
+
["images/example_hannibal_barca.jpg", "Hannibal (the one with the elephants...)"],
|
53 |
+
["images/example_scipio_africanus.jpg", "Scipio Africanus"],
|
54 |
+
["images/example_joan_of_arc.jpg", "Jeanne d'Arc"]
|
55 |
+
]
|
56 |
+
)
|
57 |
+
|
58 |
+
iface.launch()
|
images/example_hannibal_barca.jpg
ADDED
images/example_joan_of_arc.jpg
ADDED
images/example_marie_curie.jpg
ADDED
images/example_scipio_africanus.jpg
ADDED
models/actors_annoy_index.ann
ADDED
Binary file (1.52 MB). View file
|
|
models/actors_annoy_metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metric": "angular"}
|
models/actors_mapping.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
-
#
|
2 |
cmake # required for dlib (used by face_recognition)
|
3 |
face_recognition
|
4 |
annoy
|
|
|
|
|
5 |
|
6 |
# Preprocessing
|
7 |
microsoft-bing-imagesearch
|
|
|
1 |
+
# App
|
2 |
cmake # required for dlib (used by face_recognition)
|
3 |
face_recognition
|
4 |
annoy
|
5 |
+
matplotlib
|
6 |
+
gradio
|
7 |
|
8 |
# Preprocessing
|
9 |
microsoft-bing-imagesearch
|