Spaces:
Runtime error
Runtime error
geonmo.gu
commited on
Commit
Β·
fba8607
1
Parent(s):
6019f50
initial commit
Browse files- README.md +6 -4
- app.py +277 -0
- k21-1.jpg +0 -0
- prompts/categories_places365.txt +365 -0
- prompts/extract_text_features.py +154 -0
- prompts/openimage-classnames.csv +0 -0
- prompts/place365-classnames.txt +365 -0
- prompts/tencent-ml-classnames.txt +0 -0
- prompts/tencent-ml-images.txt +0 -0
- requirements.txt +8 -0
README.md
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
---
|
| 2 |
-
title: Socratic Models Image Captioning
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.1.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Socratic Models Image Captioning
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 3.1.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
models:
|
| 11 |
+
- bigscience/bloom
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import time
|
| 5 |
+
import clip
|
| 6 |
+
#from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 7 |
+
#from flores200_codes import flores_codes
|
| 8 |
+
import requests
|
| 9 |
+
import csv
|
| 10 |
+
import json
|
| 11 |
+
import wget
|
| 12 |
+
|
| 13 |
+
url_dict = {'clip_ViTL14_openimage_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_openimage_classifier_weights.pt',
|
| 14 |
+
'clip_ViTL14_place365_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_place365_classifier_weights.pt',
|
| 15 |
+
'clip_ViTL14_tencentml_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_tencentml_classifier_weights.pt'}
|
| 16 |
+
|
| 17 |
+
os.makedirs('./prompts', exist_ok=True)
|
| 18 |
+
for k, v in url_dict.items():
|
| 19 |
+
wget.download(v, out='./prompts')
|
| 20 |
+
|
| 21 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
| 22 |
+
|
| 23 |
+
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
|
| 24 |
+
HF_TOKEN = os.environ["HF_TOKEN"]
|
| 25 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 26 |
+
|
| 27 |
+
def load_openimage_classnames(csv_path):
|
| 28 |
+
csv_data = open(csv_path)
|
| 29 |
+
csv_reader = csv.reader(csv_data)
|
| 30 |
+
classnames = {idx: row[-1] for idx, row in enumerate(csv_reader)}
|
| 31 |
+
return classnames
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def load_tencentml_classnames(txt_path):
|
| 35 |
+
txt_data = open(txt_path)
|
| 36 |
+
lines = txt_data.readlines()
|
| 37 |
+
classnames = {idx: line.strip() for idx, line in enumerate(lines)}
|
| 38 |
+
return classnames
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def build_simple_classifier(clip_model, text_list, template, device):
|
| 42 |
+
with torch.no_grad():
|
| 43 |
+
texts = [template(text) for text in text_list]
|
| 44 |
+
text_inputs = clip.tokenize(texts).to(device)
|
| 45 |
+
text_features = clip_model.encode_text(text_inputs)
|
| 46 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
| 47 |
+
|
| 48 |
+
return text_features, {idx: text for idx, text in enumerate(text_list)}
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def load_models():
|
| 52 |
+
# build model and tokenizer
|
| 53 |
+
model_dict = {}
|
| 54 |
+
|
| 55 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 56 |
+
print('\tLoading CLIP ViT-L/14')
|
| 57 |
+
clip_model, clip_preprocess = clip.load("ViT-L/14", device=device)
|
| 58 |
+
print('\tLoading precomputed zeroshot classifier')
|
| 59 |
+
openimage_classifier_weights = torch.load('./prompts/clip_ViTL14_openimage_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
|
| 60 |
+
openimage_classnames = load_openimage_classnames('./prompts/openimage-classnames.csv')
|
| 61 |
+
tencentml_classifier_weights = torch.load('./prompts/clip_ViTL14_tencentml_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
|
| 62 |
+
tencentml_classnames = load_tencentml_classnames('./prompts/tencent-ml-classnames.txt')
|
| 63 |
+
place365_classifier_weights = torch.load('./prompts/clip_ViTL14_place365_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
|
| 64 |
+
place365_classnames = load_tencentml_classnames('./prompts/place365-classnames.txt')
|
| 65 |
+
|
| 66 |
+
print('\tBuilding simple zeroshot classifier')
|
| 67 |
+
img_types = ['photo', 'cartoon', 'sketch', 'painting']
|
| 68 |
+
ppl_texts = ['no people', 'people']
|
| 69 |
+
ifppl_texts = ['is one person', 'are two people', 'are three people', 'are several people', 'are many people']
|
| 70 |
+
imgtype_classifier_weights, imgtype_classnames = build_simple_classifier(clip_model, img_types, lambda c: f'This is a {c}.', device)
|
| 71 |
+
ppl_classifier_weights, ppl_classnames = build_simple_classifier(clip_model, ppl_texts, lambda c: f'There are {c} in this photo.', device)
|
| 72 |
+
ifppl_classifier_weights, ifppl_classnames = build_simple_classifier(clip_model, ifppl_texts, lambda c: f'There {c} in this photo.', device)
|
| 73 |
+
|
| 74 |
+
model_dict['clip_model'] = clip_model
|
| 75 |
+
model_dict['clip_preprocess'] = clip_preprocess
|
| 76 |
+
model_dict['openimage_classifier_weights'] = openimage_classifier_weights
|
| 77 |
+
model_dict['openimage_classnames'] = openimage_classnames
|
| 78 |
+
model_dict['tencentml_classifier_weights'] = tencentml_classifier_weights
|
| 79 |
+
model_dict['tencentml_classnames'] = tencentml_classnames
|
| 80 |
+
model_dict['place365_classifier_weights'] = place365_classifier_weights
|
| 81 |
+
model_dict['place365_classnames'] = place365_classnames
|
| 82 |
+
model_dict['imgtype_classifier_weights'] = imgtype_classifier_weights
|
| 83 |
+
model_dict['imgtype_classnames'] = imgtype_classnames
|
| 84 |
+
model_dict['ppl_classifier_weights'] = ppl_classifier_weights
|
| 85 |
+
model_dict['ppl_classnames'] = ppl_classnames
|
| 86 |
+
model_dict['ifppl_classifier_weights'] = ifppl_classifier_weights
|
| 87 |
+
model_dict['ifppl_classnames'] = ifppl_classnames
|
| 88 |
+
model_dict['device'] = device
|
| 89 |
+
|
| 90 |
+
return model_dict
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def drop_gpu(tensor):
|
| 94 |
+
if torch.cuda.is_available():
|
| 95 |
+
return tensor.cpu().numpy()
|
| 96 |
+
else:
|
| 97 |
+
return tensor.numpy()
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def zeroshot_classifier(image):
|
| 101 |
+
image_input = model_dict['clip_preprocess'](image).unsqueeze(0).to(model_dict['device'])
|
| 102 |
+
with torch.no_grad():
|
| 103 |
+
image_features = model_dict['clip_model'].encode_image(image_input)
|
| 104 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
| 105 |
+
|
| 106 |
+
sim = (100.0 * image_features @ model_dict['openimage_classifier_weights'].T).softmax(dim=-1)
|
| 107 |
+
openimage_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
|
| 108 |
+
openimage_classes = [model_dict['openimage_classnames'][idx] for idx in indices]
|
| 109 |
+
|
| 110 |
+
sim = (100.0 * image_features @ model_dict['tencentml_classifier_weights'].T).softmax(dim=-1)
|
| 111 |
+
tencentml_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
|
| 112 |
+
tencentml_classes = [model_dict['tencentml_classnames'][idx] for idx in indices]
|
| 113 |
+
|
| 114 |
+
sim = (100.0 * image_features @ model_dict['place365_classifier_weights'].T).softmax(dim=-1)
|
| 115 |
+
place365_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
|
| 116 |
+
place365_classes = [model_dict['place365_classnames'][idx] for idx in indices]
|
| 117 |
+
|
| 118 |
+
sim = (100.0 * image_features @ model_dict['imgtype_classifier_weights'].T).softmax(dim=-1)
|
| 119 |
+
imgtype_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['imgtype_classnames']))]
|
| 120 |
+
imgtype_classes = [model_dict['imgtype_classnames'][idx] for idx in indices]
|
| 121 |
+
|
| 122 |
+
sim = (100.0 * image_features @ model_dict['ppl_classifier_weights'].T).softmax(dim=-1)
|
| 123 |
+
ppl_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['ppl_classnames']))]
|
| 124 |
+
ppl_classes = [model_dict['ppl_classnames'][idx] for idx in indices]
|
| 125 |
+
|
| 126 |
+
sim = (100.0 * image_features @ model_dict['ifppl_classifier_weights'].T).softmax(dim=-1)
|
| 127 |
+
ifppl_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['ifppl_classnames']))]
|
| 128 |
+
ifppl_classes = [model_dict['ifppl_classnames'][idx] for idx in indices]
|
| 129 |
+
|
| 130 |
+
return image_features, openimage_scores, openimage_classes, tencentml_scores, tencentml_classes,\
|
| 131 |
+
place365_scores, place365_classes, imgtype_scores, imgtype_classes,\
|
| 132 |
+
ppl_scores, ppl_classes, ifppl_scores, ifppl_classes
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def generate_prompt(openimage_classes, tencentml_classes, place365_classes, imgtype_classes, ppl_classes, ifppl_classes):
|
| 136 |
+
img_type = imgtype_classes[0]
|
| 137 |
+
ppl_result = ppl_classes[0]
|
| 138 |
+
if ppl_result == 'people':
|
| 139 |
+
ppl_result = ifppl_classes[0]
|
| 140 |
+
else:
|
| 141 |
+
ppl_result = 'are %s' % ppl_result
|
| 142 |
+
|
| 143 |
+
sorted_places = place365_classes
|
| 144 |
+
|
| 145 |
+
object_list = ''
|
| 146 |
+
for cls in tencentml_classes:
|
| 147 |
+
object_list += f'{cls}, '
|
| 148 |
+
for cls in openimage_classes[:2]:
|
| 149 |
+
object_list += f'{cls}, '
|
| 150 |
+
object_list = object_list[:-2]
|
| 151 |
+
|
| 152 |
+
prompt_caption = f'''I am an intelligent image captioning bot.
|
| 153 |
+
This image is a {img_type}. There {ppl_result}.
|
| 154 |
+
I think this photo was taken at a {sorted_places[0]}, {sorted_places[1]}, or {sorted_places[2]}.
|
| 155 |
+
I think there might be a {object_list} in this {img_type}.
|
| 156 |
+
A creative short caption I can generate to describe this image is:'''
|
| 157 |
+
|
| 158 |
+
#prompt_search = f'''Let's list keywords that include the following description.
|
| 159 |
+
#This image is a {img_type}. There {ppl_result}.
|
| 160 |
+
#I think this photo was taken at a {sorted_places[0]}, {sorted_places[1]}, or {sorted_places[2]}.
|
| 161 |
+
#I think there might be a {object_list} in this {img_type}.
|
| 162 |
+
#Relevant keywords which we can list and are seperated with comma are:'''
|
| 163 |
+
|
| 164 |
+
return prompt_caption
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def generate_captions(prompt, num_captions=3):
|
| 168 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 169 |
+
|
| 170 |
+
max_length = 16
|
| 171 |
+
seed = 42
|
| 172 |
+
sample_or_greedy = 'Greedy'
|
| 173 |
+
input_sentence = prompt
|
| 174 |
+
if sample_or_greedy == "Sample":
|
| 175 |
+
parameters = {
|
| 176 |
+
"max_new_tokens": max_length,
|
| 177 |
+
"top_p": 0.7,
|
| 178 |
+
"do_sample": True,
|
| 179 |
+
"seed": seed,
|
| 180 |
+
"early_stopping": False,
|
| 181 |
+
"length_penalty": 0.0,
|
| 182 |
+
"eos_token_id": None,
|
| 183 |
+
}
|
| 184 |
+
else:
|
| 185 |
+
parameters = {
|
| 186 |
+
"max_new_tokens": max_length,
|
| 187 |
+
"do_sample": False,
|
| 188 |
+
"seed": seed,
|
| 189 |
+
"early_stopping": False,
|
| 190 |
+
"length_penalty": 0.0,
|
| 191 |
+
"eos_token_id": None,
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
payload = {"inputs": input_sentence, "parameters": parameters,"options" : {"use_cache": False}}
|
| 195 |
+
|
| 196 |
+
bloom_results = []
|
| 197 |
+
for _ in range(num_captions):
|
| 198 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
| 199 |
+
output = response.json()
|
| 200 |
+
generated_text = output[0]['generated_text'].replace(prompt, '')
|
| 201 |
+
bloom_results.append(generated_text)
|
| 202 |
+
return bloom_results
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def sorting_texts(image_features, captions):
|
| 206 |
+
with torch.no_grad():
|
| 207 |
+
text_inputs = clip.tokenize(captions).to(model_dict['device'])
|
| 208 |
+
text_features = model_dict['clip_model'].encode_text(text_inputs)
|
| 209 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
| 210 |
+
|
| 211 |
+
sim = (100.0 * image_features @ text_features.T).softmax(dim=-1)
|
| 212 |
+
scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(captions))]
|
| 213 |
+
sorted_captions = [captions[idx] for idx in indices]
|
| 214 |
+
|
| 215 |
+
return scores, sorted_captions
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def postprocess_results(scores, classes):
|
| 219 |
+
scores = [float('%.4f' % float(val)) for val in scores]
|
| 220 |
+
outputs = []
|
| 221 |
+
for score, cls in zip(scores, classes):
|
| 222 |
+
outputs.append({'score': score, 'output': cls})
|
| 223 |
+
return outputs
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def image_captioning(image):
|
| 227 |
+
start_time = time.time()
|
| 228 |
+
image_features, openimage_scores, openimage_classes, tencentml_scores, tencentml_classes, place365_scores, place365_classes, imgtype_scores, imgtype_classes, ppl_scores, ppl_classes, ifppl_scores, ifppl_classes = zeroshot_classifier(image)
|
| 229 |
+
end_zeroshot = time.time()
|
| 230 |
+
prompt_caption = generate_prompt(openimage_classes, tencentml_classes, place365_classes, imgtype_classes, ppl_classes, ifppl_classes)
|
| 231 |
+
generated_captions = generate_captions(prompt_caption, num_captions=1)
|
| 232 |
+
end_bloom = time.time()
|
| 233 |
+
caption_scores, sorted_captions = sorting_texts(image_features, generated_captions)
|
| 234 |
+
|
| 235 |
+
output_dict = {}
|
| 236 |
+
output_dict['inference_time'] = {'CLIP inference': end_zeroshot - start_time,
|
| 237 |
+
'BLOOM request': end_bloom - end_zeroshot}
|
| 238 |
+
|
| 239 |
+
output_dict['generated_captions'] = postprocess_results(caption_scores, sorted_captions)
|
| 240 |
+
output_dict['reasoning'] = {'openimage_results': postprocess_results(openimage_scores, openimage_classes),
|
| 241 |
+
'tencentml_results': postprocess_results(tencentml_scores, tencentml_classes),
|
| 242 |
+
'place365_results': postprocess_results(place365_scores, place365_classes),
|
| 243 |
+
'imgtype_results': postprocess_results(imgtype_scores, imgtype_classes),
|
| 244 |
+
'ppl_results': postprocess_results(ppl_scores, ppl_classes),
|
| 245 |
+
'ifppl_results': postprocess_results(ifppl_scores, ifppl_classes)}
|
| 246 |
+
return output_dict
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
if __name__ == '__main__':
|
| 250 |
+
print('\tinit models')
|
| 251 |
+
|
| 252 |
+
global model_dict
|
| 253 |
+
|
| 254 |
+
model_dict = load_models()
|
| 255 |
+
|
| 256 |
+
# define gradio demo
|
| 257 |
+
inputs = [gr.inputs.Image(type="pil", label="Image")
|
| 258 |
+
]
|
| 259 |
+
|
| 260 |
+
outputs = gr.outputs.JSON()
|
| 261 |
+
|
| 262 |
+
title = "Socratic models for image captioning with BLOOM"
|
| 263 |
+
|
| 264 |
+
demo_status = "Demo is running on CPU"
|
| 265 |
+
description = f"Details: https://github.com/geonm/socratic-models-demo. {demo_status}"
|
| 266 |
+
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2204.00598'>Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language</a></p>"
|
| 267 |
+
examples = ['k21-1.jpg']
|
| 268 |
+
|
| 269 |
+
gr.Interface(image_captioning,
|
| 270 |
+
inputs,
|
| 271 |
+
outputs,
|
| 272 |
+
title=title,
|
| 273 |
+
description=description,
|
| 274 |
+
article=article,
|
| 275 |
+
examples=examples,
|
| 276 |
+
#examples_per_page=50,
|
| 277 |
+
).launch()
|
k21-1.jpg
ADDED
|
prompts/categories_places365.txt
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/a/airfield 0
|
| 2 |
+
/a/airplane_cabin 1
|
| 3 |
+
/a/airport_terminal 2
|
| 4 |
+
/a/alcove 3
|
| 5 |
+
/a/alley 4
|
| 6 |
+
/a/amphitheater 5
|
| 7 |
+
/a/amusement_arcade 6
|
| 8 |
+
/a/amusement_park 7
|
| 9 |
+
/a/apartment_building/outdoor 8
|
| 10 |
+
/a/aquarium 9
|
| 11 |
+
/a/aqueduct 10
|
| 12 |
+
/a/arcade 11
|
| 13 |
+
/a/arch 12
|
| 14 |
+
/a/archaelogical_excavation 13
|
| 15 |
+
/a/archive 14
|
| 16 |
+
/a/arena/hockey 15
|
| 17 |
+
/a/arena/performance 16
|
| 18 |
+
/a/arena/rodeo 17
|
| 19 |
+
/a/army_base 18
|
| 20 |
+
/a/art_gallery 19
|
| 21 |
+
/a/art_school 20
|
| 22 |
+
/a/art_studio 21
|
| 23 |
+
/a/artists_loft 22
|
| 24 |
+
/a/assembly_line 23
|
| 25 |
+
/a/athletic_field/outdoor 24
|
| 26 |
+
/a/atrium/public 25
|
| 27 |
+
/a/attic 26
|
| 28 |
+
/a/auditorium 27
|
| 29 |
+
/a/auto_factory 28
|
| 30 |
+
/a/auto_showroom 29
|
| 31 |
+
/b/badlands 30
|
| 32 |
+
/b/bakery/shop 31
|
| 33 |
+
/b/balcony/exterior 32
|
| 34 |
+
/b/balcony/interior 33
|
| 35 |
+
/b/ball_pit 34
|
| 36 |
+
/b/ballroom 35
|
| 37 |
+
/b/bamboo_forest 36
|
| 38 |
+
/b/bank_vault 37
|
| 39 |
+
/b/banquet_hall 38
|
| 40 |
+
/b/bar 39
|
| 41 |
+
/b/barn 40
|
| 42 |
+
/b/barndoor 41
|
| 43 |
+
/b/baseball_field 42
|
| 44 |
+
/b/basement 43
|
| 45 |
+
/b/basketball_court/indoor 44
|
| 46 |
+
/b/bathroom 45
|
| 47 |
+
/b/bazaar/indoor 46
|
| 48 |
+
/b/bazaar/outdoor 47
|
| 49 |
+
/b/beach 48
|
| 50 |
+
/b/beach_house 49
|
| 51 |
+
/b/beauty_salon 50
|
| 52 |
+
/b/bedchamber 51
|
| 53 |
+
/b/bedroom 52
|
| 54 |
+
/b/beer_garden 53
|
| 55 |
+
/b/beer_hall 54
|
| 56 |
+
/b/berth 55
|
| 57 |
+
/b/biology_laboratory 56
|
| 58 |
+
/b/boardwalk 57
|
| 59 |
+
/b/boat_deck 58
|
| 60 |
+
/b/boathouse 59
|
| 61 |
+
/b/bookstore 60
|
| 62 |
+
/b/booth/indoor 61
|
| 63 |
+
/b/botanical_garden 62
|
| 64 |
+
/b/bow_window/indoor 63
|
| 65 |
+
/b/bowling_alley 64
|
| 66 |
+
/b/boxing_ring 65
|
| 67 |
+
/b/bridge 66
|
| 68 |
+
/b/building_facade 67
|
| 69 |
+
/b/bullring 68
|
| 70 |
+
/b/burial_chamber 69
|
| 71 |
+
/b/bus_interior 70
|
| 72 |
+
/b/bus_station/indoor 71
|
| 73 |
+
/b/butchers_shop 72
|
| 74 |
+
/b/butte 73
|
| 75 |
+
/c/cabin/outdoor 74
|
| 76 |
+
/c/cafeteria 75
|
| 77 |
+
/c/campsite 76
|
| 78 |
+
/c/campus 77
|
| 79 |
+
/c/canal/natural 78
|
| 80 |
+
/c/canal/urban 79
|
| 81 |
+
/c/candy_store 80
|
| 82 |
+
/c/canyon 81
|
| 83 |
+
/c/car_interior 82
|
| 84 |
+
/c/carrousel 83
|
| 85 |
+
/c/castle 84
|
| 86 |
+
/c/catacomb 85
|
| 87 |
+
/c/cemetery 86
|
| 88 |
+
/c/chalet 87
|
| 89 |
+
/c/chemistry_lab 88
|
| 90 |
+
/c/childs_room 89
|
| 91 |
+
/c/church/indoor 90
|
| 92 |
+
/c/church/outdoor 91
|
| 93 |
+
/c/classroom 92
|
| 94 |
+
/c/clean_room 93
|
| 95 |
+
/c/cliff 94
|
| 96 |
+
/c/closet 95
|
| 97 |
+
/c/clothing_store 96
|
| 98 |
+
/c/coast 97
|
| 99 |
+
/c/cockpit 98
|
| 100 |
+
/c/coffee_shop 99
|
| 101 |
+
/c/computer_room 100
|
| 102 |
+
/c/conference_center 101
|
| 103 |
+
/c/conference_room 102
|
| 104 |
+
/c/construction_site 103
|
| 105 |
+
/c/corn_field 104
|
| 106 |
+
/c/corral 105
|
| 107 |
+
/c/corridor 106
|
| 108 |
+
/c/cottage 107
|
| 109 |
+
/c/courthouse 108
|
| 110 |
+
/c/courtyard 109
|
| 111 |
+
/c/creek 110
|
| 112 |
+
/c/crevasse 111
|
| 113 |
+
/c/crosswalk 112
|
| 114 |
+
/d/dam 113
|
| 115 |
+
/d/delicatessen 114
|
| 116 |
+
/d/department_store 115
|
| 117 |
+
/d/desert/sand 116
|
| 118 |
+
/d/desert/vegetation 117
|
| 119 |
+
/d/desert_road 118
|
| 120 |
+
/d/diner/outdoor 119
|
| 121 |
+
/d/dining_hall 120
|
| 122 |
+
/d/dining_room 121
|
| 123 |
+
/d/discotheque 122
|
| 124 |
+
/d/doorway/outdoor 123
|
| 125 |
+
/d/dorm_room 124
|
| 126 |
+
/d/downtown 125
|
| 127 |
+
/d/dressing_room 126
|
| 128 |
+
/d/driveway 127
|
| 129 |
+
/d/drugstore 128
|
| 130 |
+
/e/elevator/door 129
|
| 131 |
+
/e/elevator_lobby 130
|
| 132 |
+
/e/elevator_shaft 131
|
| 133 |
+
/e/embassy 132
|
| 134 |
+
/e/engine_room 133
|
| 135 |
+
/e/entrance_hall 134
|
| 136 |
+
/e/escalator/indoor 135
|
| 137 |
+
/e/excavation 136
|
| 138 |
+
/f/fabric_store 137
|
| 139 |
+
/f/farm 138
|
| 140 |
+
/f/fastfood_restaurant 139
|
| 141 |
+
/f/field/cultivated 140
|
| 142 |
+
/f/field/wild 141
|
| 143 |
+
/f/field_road 142
|
| 144 |
+
/f/fire_escape 143
|
| 145 |
+
/f/fire_station 144
|
| 146 |
+
/f/fishpond 145
|
| 147 |
+
/f/flea_market/indoor 146
|
| 148 |
+
/f/florist_shop/indoor 147
|
| 149 |
+
/f/food_court 148
|
| 150 |
+
/f/football_field 149
|
| 151 |
+
/f/forest/broadleaf 150
|
| 152 |
+
/f/forest_path 151
|
| 153 |
+
/f/forest_road 152
|
| 154 |
+
/f/formal_garden 153
|
| 155 |
+
/f/fountain 154
|
| 156 |
+
/g/galley 155
|
| 157 |
+
/g/garage/indoor 156
|
| 158 |
+
/g/garage/outdoor 157
|
| 159 |
+
/g/gas_station 158
|
| 160 |
+
/g/gazebo/exterior 159
|
| 161 |
+
/g/general_store/indoor 160
|
| 162 |
+
/g/general_store/outdoor 161
|
| 163 |
+
/g/gift_shop 162
|
| 164 |
+
/g/glacier 163
|
| 165 |
+
/g/golf_course 164
|
| 166 |
+
/g/greenhouse/indoor 165
|
| 167 |
+
/g/greenhouse/outdoor 166
|
| 168 |
+
/g/grotto 167
|
| 169 |
+
/g/gymnasium/indoor 168
|
| 170 |
+
/h/hangar/indoor 169
|
| 171 |
+
/h/hangar/outdoor 170
|
| 172 |
+
/h/harbor 171
|
| 173 |
+
/h/hardware_store 172
|
| 174 |
+
/h/hayfield 173
|
| 175 |
+
/h/heliport 174
|
| 176 |
+
/h/highway 175
|
| 177 |
+
/h/home_office 176
|
| 178 |
+
/h/home_theater 177
|
| 179 |
+
/h/hospital 178
|
| 180 |
+
/h/hospital_room 179
|
| 181 |
+
/h/hot_spring 180
|
| 182 |
+
/h/hotel/outdoor 181
|
| 183 |
+
/h/hotel_room 182
|
| 184 |
+
/h/house 183
|
| 185 |
+
/h/hunting_lodge/outdoor 184
|
| 186 |
+
/i/ice_cream_parlor 185
|
| 187 |
+
/i/ice_floe 186
|
| 188 |
+
/i/ice_shelf 187
|
| 189 |
+
/i/ice_skating_rink/indoor 188
|
| 190 |
+
/i/ice_skating_rink/outdoor 189
|
| 191 |
+
/i/iceberg 190
|
| 192 |
+
/i/igloo 191
|
| 193 |
+
/i/industrial_area 192
|
| 194 |
+
/i/inn/outdoor 193
|
| 195 |
+
/i/islet 194
|
| 196 |
+
/j/jacuzzi/indoor 195
|
| 197 |
+
/j/jail_cell 196
|
| 198 |
+
/j/japanese_garden 197
|
| 199 |
+
/j/jewelry_shop 198
|
| 200 |
+
/j/junkyard 199
|
| 201 |
+
/k/kasbah 200
|
| 202 |
+
/k/kennel/outdoor 201
|
| 203 |
+
/k/kindergarden_classroom 202
|
| 204 |
+
/k/kitchen 203
|
| 205 |
+
/l/lagoon 204
|
| 206 |
+
/l/lake/natural 205
|
| 207 |
+
/l/landfill 206
|
| 208 |
+
/l/landing_deck 207
|
| 209 |
+
/l/laundromat 208
|
| 210 |
+
/l/lawn 209
|
| 211 |
+
/l/lecture_room 210
|
| 212 |
+
/l/legislative_chamber 211
|
| 213 |
+
/l/library/indoor 212
|
| 214 |
+
/l/library/outdoor 213
|
| 215 |
+
/l/lighthouse 214
|
| 216 |
+
/l/living_room 215
|
| 217 |
+
/l/loading_dock 216
|
| 218 |
+
/l/lobby 217
|
| 219 |
+
/l/lock_chamber 218
|
| 220 |
+
/l/locker_room 219
|
| 221 |
+
/m/mansion 220
|
| 222 |
+
/m/manufactured_home 221
|
| 223 |
+
/m/market/indoor 222
|
| 224 |
+
/m/market/outdoor 223
|
| 225 |
+
/m/marsh 224
|
| 226 |
+
/m/martial_arts_gym 225
|
| 227 |
+
/m/mausoleum 226
|
| 228 |
+
/m/medina 227
|
| 229 |
+
/m/mezzanine 228
|
| 230 |
+
/m/moat/water 229
|
| 231 |
+
/m/mosque/outdoor 230
|
| 232 |
+
/m/motel 231
|
| 233 |
+
/m/mountain 232
|
| 234 |
+
/m/mountain_path 233
|
| 235 |
+
/m/mountain_snowy 234
|
| 236 |
+
/m/movie_theater/indoor 235
|
| 237 |
+
/m/museum/indoor 236
|
| 238 |
+
/m/museum/outdoor 237
|
| 239 |
+
/m/music_studio 238
|
| 240 |
+
/n/natural_history_museum 239
|
| 241 |
+
/n/nursery 240
|
| 242 |
+
/n/nursing_home 241
|
| 243 |
+
/o/oast_house 242
|
| 244 |
+
/o/ocean 243
|
| 245 |
+
/o/office 244
|
| 246 |
+
/o/office_building 245
|
| 247 |
+
/o/office_cubicles 246
|
| 248 |
+
/o/oilrig 247
|
| 249 |
+
/o/operating_room 248
|
| 250 |
+
/o/orchard 249
|
| 251 |
+
/o/orchestra_pit 250
|
| 252 |
+
/p/pagoda 251
|
| 253 |
+
/p/palace 252
|
| 254 |
+
/p/pantry 253
|
| 255 |
+
/p/park 254
|
| 256 |
+
/p/parking_garage/indoor 255
|
| 257 |
+
/p/parking_garage/outdoor 256
|
| 258 |
+
/p/parking_lot 257
|
| 259 |
+
/p/pasture 258
|
| 260 |
+
/p/patio 259
|
| 261 |
+
/p/pavilion 260
|
| 262 |
+
/p/pet_shop 261
|
| 263 |
+
/p/pharmacy 262
|
| 264 |
+
/p/phone_booth 263
|
| 265 |
+
/p/physics_laboratory 264
|
| 266 |
+
/p/picnic_area 265
|
| 267 |
+
/p/pier 266
|
| 268 |
+
/p/pizzeria 267
|
| 269 |
+
/p/playground 268
|
| 270 |
+
/p/playroom 269
|
| 271 |
+
/p/plaza 270
|
| 272 |
+
/p/pond 271
|
| 273 |
+
/p/porch 272
|
| 274 |
+
/p/promenade 273
|
| 275 |
+
/p/pub/indoor 274
|
| 276 |
+
/r/racecourse 275
|
| 277 |
+
/r/raceway 276
|
| 278 |
+
/r/raft 277
|
| 279 |
+
/r/railroad_track 278
|
| 280 |
+
/r/rainforest 279
|
| 281 |
+
/r/reception 280
|
| 282 |
+
/r/recreation_room 281
|
| 283 |
+
/r/repair_shop 282
|
| 284 |
+
/r/residential_neighborhood 283
|
| 285 |
+
/r/restaurant 284
|
| 286 |
+
/r/restaurant_kitchen 285
|
| 287 |
+
/r/restaurant_patio 286
|
| 288 |
+
/r/rice_paddy 287
|
| 289 |
+
/r/river 288
|
| 290 |
+
/r/rock_arch 289
|
| 291 |
+
/r/roof_garden 290
|
| 292 |
+
/r/rope_bridge 291
|
| 293 |
+
/r/ruin 292
|
| 294 |
+
/r/runway 293
|
| 295 |
+
/s/sandbox 294
|
| 296 |
+
/s/sauna 295
|
| 297 |
+
/s/schoolhouse 296
|
| 298 |
+
/s/science_museum 297
|
| 299 |
+
/s/server_room 298
|
| 300 |
+
/s/shed 299
|
| 301 |
+
/s/shoe_shop 300
|
| 302 |
+
/s/shopfront 301
|
| 303 |
+
/s/shopping_mall/indoor 302
|
| 304 |
+
/s/shower 303
|
| 305 |
+
/s/ski_resort 304
|
| 306 |
+
/s/ski_slope 305
|
| 307 |
+
/s/sky 306
|
| 308 |
+
/s/skyscraper 307
|
| 309 |
+
/s/slum 308
|
| 310 |
+
/s/snowfield 309
|
| 311 |
+
/s/soccer_field 310
|
| 312 |
+
/s/stable 311
|
| 313 |
+
/s/stadium/baseball 312
|
| 314 |
+
/s/stadium/football 313
|
| 315 |
+
/s/stadium/soccer 314
|
| 316 |
+
/s/stage/indoor 315
|
| 317 |
+
/s/stage/outdoor 316
|
| 318 |
+
/s/staircase 317
|
| 319 |
+
/s/storage_room 318
|
| 320 |
+
/s/street 319
|
| 321 |
+
/s/subway_station/platform 320
|
| 322 |
+
/s/supermarket 321
|
| 323 |
+
/s/sushi_bar 322
|
| 324 |
+
/s/swamp 323
|
| 325 |
+
/s/swimming_hole 324
|
| 326 |
+
/s/swimming_pool/indoor 325
|
| 327 |
+
/s/swimming_pool/outdoor 326
|
| 328 |
+
/s/synagogue/outdoor 327
|
| 329 |
+
/t/television_room 328
|
| 330 |
+
/t/television_studio 329
|
| 331 |
+
/t/temple/asia 330
|
| 332 |
+
/t/throne_room 331
|
| 333 |
+
/t/ticket_booth 332
|
| 334 |
+
/t/topiary_garden 333
|
| 335 |
+
/t/tower 334
|
| 336 |
+
/t/toyshop 335
|
| 337 |
+
/t/train_interior 336
|
| 338 |
+
/t/train_station/platform 337
|
| 339 |
+
/t/tree_farm 338
|
| 340 |
+
/t/tree_house 339
|
| 341 |
+
/t/trench 340
|
| 342 |
+
/t/tundra 341
|
| 343 |
+
/u/underwater/ocean_deep 342
|
| 344 |
+
/u/utility_room 343
|
| 345 |
+
/v/valley 344
|
| 346 |
+
/v/vegetable_garden 345
|
| 347 |
+
/v/veterinarians_office 346
|
| 348 |
+
/v/viaduct 347
|
| 349 |
+
/v/village 348
|
| 350 |
+
/v/vineyard 349
|
| 351 |
+
/v/volcano 350
|
| 352 |
+
/v/volleyball_court/outdoor 351
|
| 353 |
+
/w/waiting_room 352
|
| 354 |
+
/w/water_park 353
|
| 355 |
+
/w/water_tower 354
|
| 356 |
+
/w/waterfall 355
|
| 357 |
+
/w/watering_hole 356
|
| 358 |
+
/w/wave 357
|
| 359 |
+
/w/wet_bar 358
|
| 360 |
+
/w/wheat_field 359
|
| 361 |
+
/w/wind_farm 360
|
| 362 |
+
/w/windmill 361
|
| 363 |
+
/y/yard 362
|
| 364 |
+
/y/youth_hostel 363
|
| 365 |
+
/z/zen_garden 364
|
prompts/extract_text_features.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torch
|
| 4 |
+
import clip
|
| 5 |
+
import csv
|
| 6 |
+
import tqdm
|
| 7 |
+
from profanity_filter import ProfanityFilter
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
templates = [
|
| 11 |
+
lambda c: f'a bad photo of a {c}.',
|
| 12 |
+
lambda c: f'a photo of many {c}.',
|
| 13 |
+
lambda c: f'a sculpture of a {c}.',
|
| 14 |
+
lambda c: f'a photo of the hard to see {c}.',
|
| 15 |
+
lambda c: f'a low resolution photo of the {c}.',
|
| 16 |
+
lambda c: f'a rendering of a {c}.',
|
| 17 |
+
lambda c: f'graffiti of a {c}.',
|
| 18 |
+
lambda c: f'a bad photo of the {c}.',
|
| 19 |
+
lambda c: f'a cropped photo of the {c}.',
|
| 20 |
+
lambda c: f'a tattoo of a {c}.',
|
| 21 |
+
lambda c: f'the embroidered {c}.',
|
| 22 |
+
lambda c: f'a photo of a hard to see {c}.',
|
| 23 |
+
lambda c: f'a bright photo of a {c}.',
|
| 24 |
+
lambda c: f'a photo of a clean {c}.',
|
| 25 |
+
lambda c: f'a photo of a dirty {c}.',
|
| 26 |
+
lambda c: f'a dark photo of the {c}.',
|
| 27 |
+
lambda c: f'a drawing of a {c}.',
|
| 28 |
+
lambda c: f'a photo of my {c}.',
|
| 29 |
+
lambda c: f'the plastic {c}.',
|
| 30 |
+
lambda c: f'a photo of the cool {c}.',
|
| 31 |
+
lambda c: f'a close-up photo of a {c}.',
|
| 32 |
+
lambda c: f'a black and white photo of the {c}.',
|
| 33 |
+
lambda c: f'a painting of the {c}.',
|
| 34 |
+
lambda c: f'a painting of a {c}.',
|
| 35 |
+
lambda c: f'a pixelated photo of the {c}.',
|
| 36 |
+
lambda c: f'a sculpture of the {c}.',
|
| 37 |
+
lambda c: f'a bright photo of the {c}.',
|
| 38 |
+
lambda c: f'a cropped photo of a {c}.',
|
| 39 |
+
lambda c: f'a plastic {c}.',
|
| 40 |
+
lambda c: f'a photo of the dirty {c}.',
|
| 41 |
+
lambda c: f'a jpeg corrupted photo of a {c}.',
|
| 42 |
+
lambda c: f'a blurry photo of the {c}.',
|
| 43 |
+
lambda c: f'a photo of the {c}.',
|
| 44 |
+
lambda c: f'a good photo of the {c}.',
|
| 45 |
+
lambda c: f'a rendering of the {c}.',
|
| 46 |
+
lambda c: f'a {c} in a video game.',
|
| 47 |
+
lambda c: f'a photo of one {c}.',
|
| 48 |
+
lambda c: f'a doodle of a {c}.',
|
| 49 |
+
lambda c: f'a close-up photo of the {c}.',
|
| 50 |
+
lambda c: f'a photo of a {c}.',
|
| 51 |
+
lambda c: f'the origami {c}.',
|
| 52 |
+
lambda c: f'the {c} in a video game.',
|
| 53 |
+
lambda c: f'a sketch of a {c}.',
|
| 54 |
+
lambda c: f'a doodle of the {c}.',
|
| 55 |
+
lambda c: f'a origami {c}.',
|
| 56 |
+
lambda c: f'a low resolution photo of a {c}.',
|
| 57 |
+
lambda c: f'the toy {c}.',
|
| 58 |
+
lambda c: f'a rendition of the {c}.',
|
| 59 |
+
lambda c: f'a photo of the clean {c}.',
|
| 60 |
+
lambda c: f'a photo of a large {c}.',
|
| 61 |
+
lambda c: f'a rendition of a {c}.',
|
| 62 |
+
lambda c: f'a photo of a nice {c}.',
|
| 63 |
+
lambda c: f'a photo of a weird {c}.',
|
| 64 |
+
lambda c: f'a blurry photo of a {c}.',
|
| 65 |
+
lambda c: f'a cartoon {c}.',
|
| 66 |
+
lambda c: f'art of a {c}.',
|
| 67 |
+
lambda c: f'a sketch of the {c}.',
|
| 68 |
+
lambda c: f'a embroidered {c}.',
|
| 69 |
+
lambda c: f'a pixelated photo of a {c}.',
|
| 70 |
+
lambda c: f'itap of the {c}.',
|
| 71 |
+
lambda c: f'a jpeg corrupted photo of the {c}.',
|
| 72 |
+
lambda c: f'a good photo of a {c}.',
|
| 73 |
+
lambda c: f'a plushie {c}.',
|
| 74 |
+
lambda c: f'a photo of the nice {c}.',
|
| 75 |
+
lambda c: f'a photo of the small {c}.',
|
| 76 |
+
lambda c: f'a photo of the weird {c}.',
|
| 77 |
+
lambda c: f'the cartoon {c}.',
|
| 78 |
+
lambda c: f'art of the {c}.',
|
| 79 |
+
lambda c: f'a drawing of the {c}.',
|
| 80 |
+
lambda c: f'a photo of the large {c}.',
|
| 81 |
+
lambda c: f'a black and white photo of a {c}.',
|
| 82 |
+
lambda c: f'the plushie {c}.',
|
| 83 |
+
lambda c: f'a dark photo of a {c}.',
|
| 84 |
+
lambda c: f'itap of a {c}.',
|
| 85 |
+
lambda c: f'graffiti of the {c}.',
|
| 86 |
+
lambda c: f'a toy {c}.',
|
| 87 |
+
lambda c: f'itap of my {c}.',
|
| 88 |
+
lambda c: f'a photo of a cool {c}.',
|
| 89 |
+
lambda c: f'a photo of a small {c}.',
|
| 90 |
+
lambda c: f'a tattoo of the {c}.',
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
| 94 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 95 |
+
clip_model, clip_preprocess = clip.load("ViT-L/14", device=device)
|
| 96 |
+
|
| 97 |
+
'''
|
| 98 |
+
csv_data = open('openimage-classnames.csv')
|
| 99 |
+
csv_reader = csv.reader(csv_data)
|
| 100 |
+
class_names = []
|
| 101 |
+
for row in csv_reader:
|
| 102 |
+
class_names.append(row[-1])
|
| 103 |
+
'''
|
| 104 |
+
'''
|
| 105 |
+
txt_data = open('tencent-ml-images.txt')
|
| 106 |
+
pf = ProfanityFilter()
|
| 107 |
+
lines = txt_data.readlines()
|
| 108 |
+
class_names = []
|
| 109 |
+
for line in lines[4:]:
|
| 110 |
+
class_name_precook = line.strip().split('\t')[-1]
|
| 111 |
+
safe_list = ''
|
| 112 |
+
for class_name in class_name_precook.split(', '):
|
| 113 |
+
if pf.is_clean(class_name):
|
| 114 |
+
safe_list += '%s, ' % class_name
|
| 115 |
+
safe_list = safe_list[:-2]
|
| 116 |
+
if len(safe_list) > 0:
|
| 117 |
+
class_names.append(safe_list)
|
| 118 |
+
f_w = open('tencent-ml-classnames.txt', 'w')
|
| 119 |
+
for cln in class_names:
|
| 120 |
+
f_w.write('%s\n' % cln)
|
| 121 |
+
f_w.close()
|
| 122 |
+
'''
|
| 123 |
+
place_categories = np.loadtxt('categories_places365.txt', dtype=str)
|
| 124 |
+
place_texts = []
|
| 125 |
+
for place in place_categories[:, 0]:
|
| 126 |
+
place = place.split('/')[2:]
|
| 127 |
+
if len(place) > 1:
|
| 128 |
+
place = place[1] + ' ' + place[0]
|
| 129 |
+
else:
|
| 130 |
+
place = place[0]
|
| 131 |
+
place = place.replace('_', ' ')
|
| 132 |
+
place_texts.append(place)
|
| 133 |
+
class_names = place_texts
|
| 134 |
+
f_w = open('place365-classnames.txt', 'w')
|
| 135 |
+
for cln in class_names:
|
| 136 |
+
f_w.write('%s\n' % cln)
|
| 137 |
+
f_w.close()
|
| 138 |
+
print(class_names)
|
| 139 |
+
|
| 140 |
+
class_weights = []
|
| 141 |
+
with torch.no_grad():
|
| 142 |
+
for classname in tqdm.tqdm(class_names, desc='encoding text'):
|
| 143 |
+
texts = [template(classname) for template in templates]
|
| 144 |
+
text_inputs = clip.tokenize(texts).to(device)
|
| 145 |
+
text_features = clip_model.encode_text(text_inputs)
|
| 146 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
| 147 |
+
text_features = text_features.mean(dim=0)
|
| 148 |
+
text_features /= text_features.norm()
|
| 149 |
+
class_weights.append(text_features)
|
| 150 |
+
|
| 151 |
+
class_weights = torch.stack(class_weights)
|
| 152 |
+
print(class_weights.shape)
|
| 153 |
+
#torch.save(class_weights, 'clip_ViTL14_openimage_classifier_weights.pt')
|
| 154 |
+
torch.save(class_weights, 'clip_ViTL14_place365_classifier_weights.pt')
|
prompts/openimage-classnames.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
prompts/place365-classnames.txt
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
airfield
|
| 2 |
+
airplane cabin
|
| 3 |
+
airport terminal
|
| 4 |
+
alcove
|
| 5 |
+
alley
|
| 6 |
+
amphitheater
|
| 7 |
+
amusement arcade
|
| 8 |
+
amusement park
|
| 9 |
+
outdoor apartment building
|
| 10 |
+
aquarium
|
| 11 |
+
aqueduct
|
| 12 |
+
arcade
|
| 13 |
+
arch
|
| 14 |
+
archaelogical excavation
|
| 15 |
+
archive
|
| 16 |
+
hockey arena
|
| 17 |
+
performance arena
|
| 18 |
+
rodeo arena
|
| 19 |
+
army base
|
| 20 |
+
art gallery
|
| 21 |
+
art school
|
| 22 |
+
art studio
|
| 23 |
+
artists loft
|
| 24 |
+
assembly line
|
| 25 |
+
outdoor athletic field
|
| 26 |
+
public atrium
|
| 27 |
+
attic
|
| 28 |
+
auditorium
|
| 29 |
+
auto factory
|
| 30 |
+
auto showroom
|
| 31 |
+
badlands
|
| 32 |
+
shop bakery
|
| 33 |
+
exterior balcony
|
| 34 |
+
interior balcony
|
| 35 |
+
ball pit
|
| 36 |
+
ballroom
|
| 37 |
+
bamboo forest
|
| 38 |
+
bank vault
|
| 39 |
+
banquet hall
|
| 40 |
+
bar
|
| 41 |
+
barn
|
| 42 |
+
barndoor
|
| 43 |
+
baseball field
|
| 44 |
+
basement
|
| 45 |
+
indoor basketball court
|
| 46 |
+
bathroom
|
| 47 |
+
indoor bazaar
|
| 48 |
+
outdoor bazaar
|
| 49 |
+
beach
|
| 50 |
+
beach house
|
| 51 |
+
beauty salon
|
| 52 |
+
bedchamber
|
| 53 |
+
bedroom
|
| 54 |
+
beer garden
|
| 55 |
+
beer hall
|
| 56 |
+
berth
|
| 57 |
+
biology laboratory
|
| 58 |
+
boardwalk
|
| 59 |
+
boat deck
|
| 60 |
+
boathouse
|
| 61 |
+
bookstore
|
| 62 |
+
indoor booth
|
| 63 |
+
botanical garden
|
| 64 |
+
indoor bow window
|
| 65 |
+
bowling alley
|
| 66 |
+
boxing ring
|
| 67 |
+
bridge
|
| 68 |
+
building facade
|
| 69 |
+
bullring
|
| 70 |
+
burial chamber
|
| 71 |
+
bus interior
|
| 72 |
+
indoor bus station
|
| 73 |
+
butchers shop
|
| 74 |
+
butte
|
| 75 |
+
outdoor cabin
|
| 76 |
+
cafeteria
|
| 77 |
+
campsite
|
| 78 |
+
campus
|
| 79 |
+
natural canal
|
| 80 |
+
urban canal
|
| 81 |
+
candy store
|
| 82 |
+
canyon
|
| 83 |
+
car interior
|
| 84 |
+
carrousel
|
| 85 |
+
castle
|
| 86 |
+
catacomb
|
| 87 |
+
cemetery
|
| 88 |
+
chalet
|
| 89 |
+
chemistry lab
|
| 90 |
+
childs room
|
| 91 |
+
indoor church
|
| 92 |
+
outdoor church
|
| 93 |
+
classroom
|
| 94 |
+
clean room
|
| 95 |
+
cliff
|
| 96 |
+
closet
|
| 97 |
+
clothing store
|
| 98 |
+
coast
|
| 99 |
+
cockpit
|
| 100 |
+
coffee shop
|
| 101 |
+
computer room
|
| 102 |
+
conference center
|
| 103 |
+
conference room
|
| 104 |
+
construction site
|
| 105 |
+
corn field
|
| 106 |
+
corral
|
| 107 |
+
corridor
|
| 108 |
+
cottage
|
| 109 |
+
courthouse
|
| 110 |
+
courtyard
|
| 111 |
+
creek
|
| 112 |
+
crevasse
|
| 113 |
+
crosswalk
|
| 114 |
+
dam
|
| 115 |
+
delicatessen
|
| 116 |
+
department store
|
| 117 |
+
sand desert
|
| 118 |
+
vegetation desert
|
| 119 |
+
desert road
|
| 120 |
+
outdoor diner
|
| 121 |
+
dining hall
|
| 122 |
+
dining room
|
| 123 |
+
discotheque
|
| 124 |
+
outdoor doorway
|
| 125 |
+
dorm room
|
| 126 |
+
downtown
|
| 127 |
+
dressing room
|
| 128 |
+
driveway
|
| 129 |
+
drugstore
|
| 130 |
+
door elevator
|
| 131 |
+
elevator lobby
|
| 132 |
+
elevator shaft
|
| 133 |
+
embassy
|
| 134 |
+
engine room
|
| 135 |
+
entrance hall
|
| 136 |
+
indoor escalator
|
| 137 |
+
excavation
|
| 138 |
+
fabric store
|
| 139 |
+
farm
|
| 140 |
+
fastfood restaurant
|
| 141 |
+
cultivated field
|
| 142 |
+
wild field
|
| 143 |
+
field road
|
| 144 |
+
fire escape
|
| 145 |
+
fire station
|
| 146 |
+
fishpond
|
| 147 |
+
indoor flea market
|
| 148 |
+
indoor florist shop
|
| 149 |
+
food court
|
| 150 |
+
football field
|
| 151 |
+
broadleaf forest
|
| 152 |
+
forest path
|
| 153 |
+
forest road
|
| 154 |
+
formal garden
|
| 155 |
+
fountain
|
| 156 |
+
galley
|
| 157 |
+
indoor garage
|
| 158 |
+
outdoor garage
|
| 159 |
+
gas station
|
| 160 |
+
exterior gazebo
|
| 161 |
+
indoor general store
|
| 162 |
+
outdoor general store
|
| 163 |
+
gift shop
|
| 164 |
+
glacier
|
| 165 |
+
golf course
|
| 166 |
+
indoor greenhouse
|
| 167 |
+
outdoor greenhouse
|
| 168 |
+
grotto
|
| 169 |
+
indoor gymnasium
|
| 170 |
+
indoor hangar
|
| 171 |
+
outdoor hangar
|
| 172 |
+
harbor
|
| 173 |
+
hardware store
|
| 174 |
+
hayfield
|
| 175 |
+
heliport
|
| 176 |
+
highway
|
| 177 |
+
home office
|
| 178 |
+
home theater
|
| 179 |
+
hospital
|
| 180 |
+
hospital room
|
| 181 |
+
hot spring
|
| 182 |
+
outdoor hotel
|
| 183 |
+
hotel room
|
| 184 |
+
house
|
| 185 |
+
outdoor hunting lodge
|
| 186 |
+
ice cream parlor
|
| 187 |
+
ice floe
|
| 188 |
+
ice shelf
|
| 189 |
+
indoor ice skating rink
|
| 190 |
+
outdoor ice skating rink
|
| 191 |
+
iceberg
|
| 192 |
+
igloo
|
| 193 |
+
industrial area
|
| 194 |
+
outdoor inn
|
| 195 |
+
islet
|
| 196 |
+
indoor jacuzzi
|
| 197 |
+
jail cell
|
| 198 |
+
japanese garden
|
| 199 |
+
jewelry shop
|
| 200 |
+
junkyard
|
| 201 |
+
kasbah
|
| 202 |
+
outdoor kennel
|
| 203 |
+
kindergarden classroom
|
| 204 |
+
kitchen
|
| 205 |
+
lagoon
|
| 206 |
+
natural lake
|
| 207 |
+
landfill
|
| 208 |
+
landing deck
|
| 209 |
+
laundromat
|
| 210 |
+
lawn
|
| 211 |
+
lecture room
|
| 212 |
+
legislative chamber
|
| 213 |
+
indoor library
|
| 214 |
+
outdoor library
|
| 215 |
+
lighthouse
|
| 216 |
+
living room
|
| 217 |
+
loading dock
|
| 218 |
+
lobby
|
| 219 |
+
lock chamber
|
| 220 |
+
locker room
|
| 221 |
+
mansion
|
| 222 |
+
manufactured home
|
| 223 |
+
indoor market
|
| 224 |
+
outdoor market
|
| 225 |
+
marsh
|
| 226 |
+
martial arts gym
|
| 227 |
+
mausoleum
|
| 228 |
+
medina
|
| 229 |
+
mezzanine
|
| 230 |
+
water moat
|
| 231 |
+
outdoor mosque
|
| 232 |
+
motel
|
| 233 |
+
mountain
|
| 234 |
+
mountain path
|
| 235 |
+
mountain snowy
|
| 236 |
+
indoor movie theater
|
| 237 |
+
indoor museum
|
| 238 |
+
outdoor museum
|
| 239 |
+
music studio
|
| 240 |
+
natural history museum
|
| 241 |
+
nursery
|
| 242 |
+
nursing home
|
| 243 |
+
oast house
|
| 244 |
+
ocean
|
| 245 |
+
office
|
| 246 |
+
office building
|
| 247 |
+
office cubicles
|
| 248 |
+
oilrig
|
| 249 |
+
operating room
|
| 250 |
+
orchard
|
| 251 |
+
orchestra pit
|
| 252 |
+
pagoda
|
| 253 |
+
palace
|
| 254 |
+
pantry
|
| 255 |
+
park
|
| 256 |
+
indoor parking garage
|
| 257 |
+
outdoor parking garage
|
| 258 |
+
parking lot
|
| 259 |
+
pasture
|
| 260 |
+
patio
|
| 261 |
+
pavilion
|
| 262 |
+
pet shop
|
| 263 |
+
pharmacy
|
| 264 |
+
phone booth
|
| 265 |
+
physics laboratory
|
| 266 |
+
picnic area
|
| 267 |
+
pier
|
| 268 |
+
pizzeria
|
| 269 |
+
playground
|
| 270 |
+
playroom
|
| 271 |
+
plaza
|
| 272 |
+
pond
|
| 273 |
+
porch
|
| 274 |
+
promenade
|
| 275 |
+
indoor pub
|
| 276 |
+
racecourse
|
| 277 |
+
raceway
|
| 278 |
+
raft
|
| 279 |
+
railroad track
|
| 280 |
+
rainforest
|
| 281 |
+
reception
|
| 282 |
+
recreation room
|
| 283 |
+
repair shop
|
| 284 |
+
residential neighborhood
|
| 285 |
+
restaurant
|
| 286 |
+
restaurant kitchen
|
| 287 |
+
restaurant patio
|
| 288 |
+
rice paddy
|
| 289 |
+
river
|
| 290 |
+
rock arch
|
| 291 |
+
roof garden
|
| 292 |
+
rope bridge
|
| 293 |
+
ruin
|
| 294 |
+
runway
|
| 295 |
+
sandbox
|
| 296 |
+
sauna
|
| 297 |
+
schoolhouse
|
| 298 |
+
science museum
|
| 299 |
+
server room
|
| 300 |
+
shed
|
| 301 |
+
shoe shop
|
| 302 |
+
shopfront
|
| 303 |
+
indoor shopping mall
|
| 304 |
+
shower
|
| 305 |
+
ski resort
|
| 306 |
+
ski slope
|
| 307 |
+
sky
|
| 308 |
+
skyscraper
|
| 309 |
+
slum
|
| 310 |
+
snowfield
|
| 311 |
+
soccer field
|
| 312 |
+
stable
|
| 313 |
+
baseball stadium
|
| 314 |
+
football stadium
|
| 315 |
+
soccer stadium
|
| 316 |
+
indoor stage
|
| 317 |
+
outdoor stage
|
| 318 |
+
staircase
|
| 319 |
+
storage room
|
| 320 |
+
street
|
| 321 |
+
platform subway station
|
| 322 |
+
supermarket
|
| 323 |
+
sushi bar
|
| 324 |
+
swamp
|
| 325 |
+
swimming hole
|
| 326 |
+
indoor swimming pool
|
| 327 |
+
outdoor swimming pool
|
| 328 |
+
outdoor synagogue
|
| 329 |
+
television room
|
| 330 |
+
television studio
|
| 331 |
+
asia temple
|
| 332 |
+
throne room
|
| 333 |
+
ticket booth
|
| 334 |
+
topiary garden
|
| 335 |
+
tower
|
| 336 |
+
toyshop
|
| 337 |
+
train interior
|
| 338 |
+
platform train station
|
| 339 |
+
tree farm
|
| 340 |
+
tree house
|
| 341 |
+
trench
|
| 342 |
+
tundra
|
| 343 |
+
ocean deep underwater
|
| 344 |
+
utility room
|
| 345 |
+
valley
|
| 346 |
+
vegetable garden
|
| 347 |
+
veterinarians office
|
| 348 |
+
viaduct
|
| 349 |
+
village
|
| 350 |
+
vineyard
|
| 351 |
+
volcano
|
| 352 |
+
outdoor volleyball court
|
| 353 |
+
waiting room
|
| 354 |
+
water park
|
| 355 |
+
water tower
|
| 356 |
+
waterfall
|
| 357 |
+
watering hole
|
| 358 |
+
wave
|
| 359 |
+
wet bar
|
| 360 |
+
wheat field
|
| 361 |
+
wind farm
|
| 362 |
+
windmill
|
| 363 |
+
yard
|
| 364 |
+
youth hostel
|
| 365 |
+
zen garden
|
prompts/tencent-ml-classnames.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
prompts/tencent-ml-images.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
git+https://github.com/huggingface/transformers
|
| 2 |
+
ftfy
|
| 3 |
+
regex
|
| 4 |
+
tqdm
|
| 5 |
+
git+https://github.com/openai/CLIP.git
|
| 6 |
+
gradio
|
| 7 |
+
torch
|
| 8 |
+
wget
|