Spaces:
Running
on
Zero
Running
on
Zero
Update pops.py
Browse files
pops.py
CHANGED
|
@@ -15,15 +15,15 @@ prior_instruct_repo: str = 'models/instruct/learned_prior.pth'
|
|
| 15 |
prior_scene_repo: str = 'models/scene/learned_prior.pth'
|
| 16 |
prior_repo = "pOpsPaper/operators"
|
| 17 |
|
| 18 |
-
gpu = torch.device('cuda')
|
| 19 |
-
cpu = torch.device('cpu')
|
| 20 |
|
| 21 |
class PopsPipelines:
|
| 22 |
def __init__(self):
|
| 23 |
weight_dtype = torch.float16
|
| 24 |
self.weight_dtype = weight_dtype
|
| 25 |
-
device = 'cuda
|
| 26 |
-
self.device = device
|
| 27 |
self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(kandinsky_prior_repo,
|
| 28 |
subfolder='image_encoder',
|
| 29 |
torch_dtype=weight_dtype).eval()
|
|
@@ -84,6 +84,7 @@ class PopsPipelines:
|
|
| 84 |
return image
|
| 85 |
|
| 86 |
def process_text(self, text):
|
|
|
|
| 87 |
text_inputs = self.tokenizer(
|
| 88 |
text,
|
| 89 |
padding="max_length",
|
|
@@ -96,12 +97,14 @@ class PopsPipelines:
|
|
| 96 |
text_encoder_output = self.text_encoder(text_inputs.input_ids.to(self.device))
|
| 97 |
text_encoder_hidden_states = text_encoder_output.last_hidden_state
|
| 98 |
text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()]
|
|
|
|
| 99 |
return text_encoder_concat
|
| 100 |
|
| 101 |
def run_binary(self, input_a, input_b, prior_type):
|
| 102 |
# Move pipeline to GPU
|
| 103 |
pipeline = self.priors_dict[prior_type]['pipeline']
|
| 104 |
pipeline.to('cuda')
|
|
|
|
| 105 |
input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, input_b,
|
| 106 |
self.image_encoder,
|
| 107 |
pipeline.prior.clip_mean.detach(),
|
|
@@ -131,14 +134,17 @@ class PopsPipelines:
|
|
| 131 |
|
| 132 |
# Move pipeline to CPU
|
| 133 |
pipeline.to('cpu')
|
|
|
|
| 134 |
return img_emb
|
| 135 |
|
| 136 |
def run_instruct(self, input_a, text):
|
|
|
|
| 137 |
text_encodings = self.process_text(text)
|
| 138 |
|
| 139 |
# Move pipeline to GPU
|
| 140 |
instruct_pipeline = self.priors_dict['instruct']['pipeline']
|
| 141 |
instruct_pipeline.to('cuda')
|
|
|
|
| 142 |
input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, None,
|
| 143 |
self.image_encoder,
|
| 144 |
instruct_pipeline.prior.clip_mean.detach(), instruct_pipeline.prior.clip_std.detach(),
|
|
@@ -155,13 +161,15 @@ class PopsPipelines:
|
|
| 155 |
|
| 156 |
# Move pipeline to CPU
|
| 157 |
instruct_pipeline.to('cpu')
|
|
|
|
| 158 |
return img_emb
|
| 159 |
|
| 160 |
def render(self, img_emb):
|
|
|
|
| 161 |
images = self.decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=img_emb.negative_image_embeds,
|
| 162 |
num_inference_steps=50, height=512,
|
| 163 |
width=512, guidance_scale=4).images
|
| 164 |
-
|
| 165 |
return images[0]
|
| 166 |
|
| 167 |
def run_instruct_texture(self, image_object_path, text_instruct, image_texture_path):
|
|
|
|
| 15 |
prior_scene_repo: str = 'models/scene/learned_prior.pth'
|
| 16 |
prior_repo = "pOpsPaper/operators"
|
| 17 |
|
| 18 |
+
# gpu = torch.device('cuda')
|
| 19 |
+
# cpu = torch.device('cpu')
|
| 20 |
|
| 21 |
class PopsPipelines:
|
| 22 |
def __init__(self):
|
| 23 |
weight_dtype = torch.float16
|
| 24 |
self.weight_dtype = weight_dtype
|
| 25 |
+
device = 'cpu' #torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 26 |
+
self.device = 'cuda' #device
|
| 27 |
self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(kandinsky_prior_repo,
|
| 28 |
subfolder='image_encoder',
|
| 29 |
torch_dtype=weight_dtype).eval()
|
|
|
|
| 84 |
return image
|
| 85 |
|
| 86 |
def process_text(self, text):
|
| 87 |
+
self.text_encoder.to('cuda')
|
| 88 |
text_inputs = self.tokenizer(
|
| 89 |
text,
|
| 90 |
padding="max_length",
|
|
|
|
| 97 |
text_encoder_output = self.text_encoder(text_inputs.input_ids.to(self.device))
|
| 98 |
text_encoder_hidden_states = text_encoder_output.last_hidden_state
|
| 99 |
text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()]
|
| 100 |
+
self.text_encoder.to('cpu')
|
| 101 |
return text_encoder_concat
|
| 102 |
|
| 103 |
def run_binary(self, input_a, input_b, prior_type):
|
| 104 |
# Move pipeline to GPU
|
| 105 |
pipeline = self.priors_dict[prior_type]['pipeline']
|
| 106 |
pipeline.to('cuda')
|
| 107 |
+
self.image_encoder.to('cuda')
|
| 108 |
input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, input_b,
|
| 109 |
self.image_encoder,
|
| 110 |
pipeline.prior.clip_mean.detach(),
|
|
|
|
| 134 |
|
| 135 |
# Move pipeline to CPU
|
| 136 |
pipeline.to('cpu')
|
| 137 |
+
self.image_encoder.to('cpu')
|
| 138 |
return img_emb
|
| 139 |
|
| 140 |
def run_instruct(self, input_a, text):
|
| 141 |
+
|
| 142 |
text_encodings = self.process_text(text)
|
| 143 |
|
| 144 |
# Move pipeline to GPU
|
| 145 |
instruct_pipeline = self.priors_dict['instruct']['pipeline']
|
| 146 |
instruct_pipeline.to('cuda')
|
| 147 |
+
self.image_encoder.to('cuda')
|
| 148 |
input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, None,
|
| 149 |
self.image_encoder,
|
| 150 |
instruct_pipeline.prior.clip_mean.detach(), instruct_pipeline.prior.clip_std.detach(),
|
|
|
|
| 161 |
|
| 162 |
# Move pipeline to CPU
|
| 163 |
instruct_pipeline.to('cpu')
|
| 164 |
+
self.image_encoder.to('cpu')
|
| 165 |
return img_emb
|
| 166 |
|
| 167 |
def render(self, img_emb):
|
| 168 |
+
self.decoder.to('cuda')
|
| 169 |
images = self.decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=img_emb.negative_image_embeds,
|
| 170 |
num_inference_steps=50, height=512,
|
| 171 |
width=512, guidance_scale=4).images
|
| 172 |
+
self.decoder.to('cpu')
|
| 173 |
return images[0]
|
| 174 |
|
| 175 |
def run_instruct_texture(self, image_object_path, text_instruct, image_texture_path):
|