Spaces:
Runtime error
Runtime error
Commit
·
ad1fd8e
1
Parent(s):
ca37dd4
VQGAN attempt
Browse files- app.py +34 -17
- flavors.jpg +0 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -32,7 +32,7 @@ import subprocess
|
|
| 32 |
import imageio
|
| 33 |
from PIL import ImageFile, Image
|
| 34 |
import time
|
| 35 |
-
|
| 36 |
|
| 37 |
import hashlib
|
| 38 |
from PIL.PngImagePlugin import PngImageFile, PngInfo
|
|
@@ -41,6 +41,7 @@ import urllib.request
|
|
| 41 |
from random import randint
|
| 42 |
from pathvalidate import sanitize_filename
|
| 43 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 44 |
|
| 45 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 46 |
print("Using device:", device)
|
|
@@ -75,7 +76,7 @@ perceptor = (
|
|
| 75 |
.requires_grad_(False)
|
| 76 |
.to(device)
|
| 77 |
)
|
| 78 |
-
def run_all(user_input,num_steps, template, width,height):
|
| 79 |
import random
|
| 80 |
#if uploaded_file is not None:
|
| 81 |
#uploaded_folder = f"{DefaultPaths.root_path}/uploaded"
|
|
@@ -89,8 +90,7 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 89 |
#pass
|
| 90 |
#else:
|
| 91 |
image_path = None
|
| 92 |
-
|
| 93 |
-
|
| 94 |
args2 = argparse.Namespace(
|
| 95 |
prompt=user_input,
|
| 96 |
seed=int(random.randint(0, 2147483647)),
|
|
@@ -103,7 +103,7 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 103 |
template=template,
|
| 104 |
vqgan_model='ImageNet 16384',
|
| 105 |
seed_image=image_path,
|
| 106 |
-
image_file="
|
| 107 |
#frame_dir=intermediary_folder,
|
| 108 |
)
|
| 109 |
if args2.seed is not None:
|
|
@@ -1299,6 +1299,7 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 1299 |
z_orig = z.tensor.clone()
|
| 1300 |
z.requires_grad_(True)
|
| 1301 |
# opt = optim.AdamW(z.parameters(), lr=args.mse_step_size, weight_decay=0.00000000)
|
|
|
|
| 1302 |
if self.normal_flip_optim == True:
|
| 1303 |
if randint(1, 2) == 1:
|
| 1304 |
opt = torch.optim.AdamW(
|
|
@@ -1430,8 +1431,7 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 1430 |
|
| 1431 |
sys.stdout.write("Iteration {}".format(i) + "\n")
|
| 1432 |
sys.stdout.flush()
|
| 1433 |
-
|
| 1434 |
-
if i % args2.update == 0:
|
| 1435 |
self.checkin(i, lossAll, x)
|
| 1436 |
|
| 1437 |
loss = sum(lossAll)
|
|
@@ -1493,6 +1493,8 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 1493 |
def run(self, x):
|
| 1494 |
j = 0
|
| 1495 |
try:
|
|
|
|
|
|
|
| 1496 |
before_start_time = time.perf_counter()
|
| 1497 |
total_steps = int(args.max_iterations + args.mse_end) - 1
|
| 1498 |
for _ in range(total_steps):
|
|
@@ -1516,9 +1518,9 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 1516 |
import shutil
|
| 1517 |
import os
|
| 1518 |
|
| 1519 |
-
image_data = Image.open(args2.image_file)
|
| 1520 |
-
|
| 1521 |
-
return(image_data)
|
| 1522 |
|
| 1523 |
except KeyboardInterrupt:
|
| 1524 |
pass
|
|
@@ -2289,14 +2291,16 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 2289 |
is_gumbel=is_gumbel,
|
| 2290 |
gen_seed=gen_seed,
|
| 2291 |
)
|
| 2292 |
-
|
| 2293 |
mh = ModelHost(args)
|
| 2294 |
x = 0
|
| 2295 |
|
| 2296 |
#for x in range(batch_size):
|
| 2297 |
mh.setup_model(x)
|
| 2298 |
-
|
| 2299 |
-
|
|
|
|
|
|
|
|
|
|
| 2300 |
#x = x + 1
|
| 2301 |
|
| 2302 |
if zoom:
|
|
@@ -2322,18 +2326,31 @@ def run_all(user_input,num_steps, template, width,height):
|
|
| 2322 |
|
| 2323 |
##################### START GRADIO HERE ############################
|
| 2324 |
image = gr.outputs.Image(type="pil", label="Your result")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2325 |
iface = gr.Interface(
|
| 2326 |
fn=run_all,
|
| 2327 |
inputs=[
|
| 2328 |
gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
|
| 2329 |
-
gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=
|
| 2330 |
-
gr.inputs.Dropdown(label="
|
|
|
|
|
|
|
| 2331 |
gr.inputs.Radio(label="Width", choices=[32,64,128,256,512],default=256),
|
| 2332 |
gr.inputs.Radio(label="Height", choices=[32,64,128,256,512],default=256),
|
| 2333 |
],
|
| 2334 |
outputs=image,
|
| 2335 |
-
title="Generate images from text with VQGAN+CLIP",
|
| 2336 |
#description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
|
| 2337 |
#article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>"
|
| 2338 |
)
|
| 2339 |
-
iface.launch(
|
|
|
|
| 32 |
import imageio
|
| 33 |
from PIL import ImageFile, Image
|
| 34 |
import time
|
| 35 |
+
import base64
|
| 36 |
|
| 37 |
import hashlib
|
| 38 |
from PIL.PngImagePlugin import PngImageFile, PngInfo
|
|
|
|
| 41 |
from random import randint
|
| 42 |
from pathvalidate import sanitize_filename
|
| 43 |
from huggingface_hub import hf_hub_download
|
| 44 |
+
import shortuuid
|
| 45 |
|
| 46 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 47 |
print("Using device:", device)
|
|
|
|
| 76 |
.requires_grad_(False)
|
| 77 |
.to(device)
|
| 78 |
)
|
| 79 |
+
def run_all(user_input, num_steps, flavor, markdown, template, width,height):
|
| 80 |
import random
|
| 81 |
#if uploaded_file is not None:
|
| 82 |
#uploaded_folder = f"{DefaultPaths.root_path}/uploaded"
|
|
|
|
| 90 |
#pass
|
| 91 |
#else:
|
| 92 |
image_path = None
|
| 93 |
+
url = shortuuid.uuid()
|
|
|
|
| 94 |
args2 = argparse.Namespace(
|
| 95 |
prompt=user_input,
|
| 96 |
seed=int(random.randint(0, 2147483647)),
|
|
|
|
| 103 |
template=template,
|
| 104 |
vqgan_model='ImageNet 16384',
|
| 105 |
seed_image=image_path,
|
| 106 |
+
image_file=f"{url}.png",
|
| 107 |
#frame_dir=intermediary_folder,
|
| 108 |
)
|
| 109 |
if args2.seed is not None:
|
|
|
|
| 1299 |
z_orig = z.tensor.clone()
|
| 1300 |
z.requires_grad_(True)
|
| 1301 |
# opt = optim.AdamW(z.parameters(), lr=args.mse_step_size, weight_decay=0.00000000)
|
| 1302 |
+
print("Step size inside:", args.step_size)
|
| 1303 |
if self.normal_flip_optim == True:
|
| 1304 |
if randint(1, 2) == 1:
|
| 1305 |
opt = torch.optim.AdamW(
|
|
|
|
| 1431 |
|
| 1432 |
sys.stdout.write("Iteration {}".format(i) + "\n")
|
| 1433 |
sys.stdout.flush()
|
| 1434 |
+
if i % (args2.iterations-2) == 0:
|
|
|
|
| 1435 |
self.checkin(i, lossAll, x)
|
| 1436 |
|
| 1437 |
loss = sum(lossAll)
|
|
|
|
| 1493 |
def run(self, x):
|
| 1494 |
j = 0
|
| 1495 |
try:
|
| 1496 |
+
print("Step size: ", args.step_size)
|
| 1497 |
+
print("Step MSE size: ", args.mse_step_size)
|
| 1498 |
before_start_time = time.perf_counter()
|
| 1499 |
total_steps = int(args.max_iterations + args.mse_end) - 1
|
| 1500 |
for _ in range(total_steps):
|
|
|
|
| 1518 |
import shutil
|
| 1519 |
import os
|
| 1520 |
|
| 1521 |
+
#image_data = Image.open(args2.image_file)
|
| 1522 |
+
#os.remove(args2.image_file)
|
| 1523 |
+
#return(image_data)
|
| 1524 |
|
| 1525 |
except KeyboardInterrupt:
|
| 1526 |
pass
|
|
|
|
| 2291 |
is_gumbel=is_gumbel,
|
| 2292 |
gen_seed=gen_seed,
|
| 2293 |
)
|
|
|
|
| 2294 |
mh = ModelHost(args)
|
| 2295 |
x = 0
|
| 2296 |
|
| 2297 |
#for x in range(batch_size):
|
| 2298 |
mh.setup_model(x)
|
| 2299 |
+
mh.run(x)
|
| 2300 |
+
image_data = Image.open(args2.image_file)
|
| 2301 |
+
os.remove(args2.image_file)
|
| 2302 |
+
return(image_data)
|
| 2303 |
+
#return(last_iter)
|
| 2304 |
#x = x + 1
|
| 2305 |
|
| 2306 |
if zoom:
|
|
|
|
| 2326 |
|
| 2327 |
##################### START GRADIO HERE ############################
|
| 2328 |
image = gr.outputs.Image(type="pil", label="Your result")
|
| 2329 |
+
def cvt_2_base64(file_name):
|
| 2330 |
+
with open(file_name , "rb") as image_file :
|
| 2331 |
+
data = base64.b64encode(image_file.read())
|
| 2332 |
+
return data.decode('utf-8')
|
| 2333 |
+
base64image = "data:image/jpg;base64,"+cvt_2_base64('flavors.jpg')
|
| 2334 |
+
markdown = gr.Markdown("<img src='"+base64image+"' />")
|
| 2335 |
+
def test(raw_input):
|
| 2336 |
+
pass
|
| 2337 |
+
setattr(markdown, "requires_permissions", False)
|
| 2338 |
+
setattr(markdown, "label", "Flavors")
|
| 2339 |
+
setattr(markdown, "preprocess", test)
|
| 2340 |
iface = gr.Interface(
|
| 2341 |
fn=run_all,
|
| 2342 |
inputs=[
|
| 2343 |
gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
|
| 2344 |
+
gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=250,minimum=1,step=1),
|
| 2345 |
+
gr.inputs.Dropdown(label="Flavor",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu", "custom"]),
|
| 2346 |
+
markdown,
|
| 2347 |
+
gr.inputs.Dropdown(label="Style",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results"),
|
| 2348 |
gr.inputs.Radio(label="Width", choices=[32,64,128,256,512],default=256),
|
| 2349 |
gr.inputs.Radio(label="Height", choices=[32,64,128,256,512],default=256),
|
| 2350 |
],
|
| 2351 |
outputs=image,
|
| 2352 |
+
title="Generate images from text with VQGAN+CLIP (Hypertron v2)",
|
| 2353 |
#description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
|
| 2354 |
#article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>"
|
| 2355 |
)
|
| 2356 |
+
iface.launch()
|
flavors.jpg
ADDED
|
requirements.txt
CHANGED
|
@@ -25,4 +25,5 @@ pathvalidate
|
|
| 25 |
stegano
|
| 26 |
imgtag
|
| 27 |
timm
|
| 28 |
-
python-xmp-toolkit
|
|
|
|
|
|
| 25 |
stegano
|
| 26 |
imgtag
|
| 27 |
timm
|
| 28 |
+
python-xmp-toolkit
|
| 29 |
+
shortuuid
|