Spaces:

pOpsPaper
/

pOps-space

Sleeping

App Files Files Community

pOpsPaper commited on Jun 7, 2024

Commit

d05f06b

1 Parent(s): 83b8300

Update pops.py

Browse files

Files changed (1) hide show

pops.py +13 -5

pops.py CHANGED Viewed

@@ -15,15 +15,15 @@ prior_instruct_repo: str = 'models/instruct/learned_prior.pth'
 prior_scene_repo: str = 'models/scene/learned_prior.pth'
 prior_repo = "pOpsPaper/operators"
-gpu = torch.device('cuda')
-cpu = torch.device('cpu')
 class PopsPipelines:
     def __init__(self):
         weight_dtype = torch.float16
         self.weight_dtype = weight_dtype
-        device = 'cuda:0'
-        self.device = device
         self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(kandinsky_prior_repo,
                                                                       subfolder='image_encoder',
                                                                       torch_dtype=weight_dtype).eval()
@@ -84,6 +84,7 @@ class PopsPipelines:
         return image
     def process_text(self, text):
         text_inputs = self.tokenizer(
             text,
             padding="max_length",
@@ -96,12 +97,14 @@ class PopsPipelines:
         text_encoder_output = self.text_encoder(text_inputs.input_ids.to(self.device))
         text_encoder_hidden_states = text_encoder_output.last_hidden_state
         text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()]
         return text_encoder_concat
     def run_binary(self, input_a, input_b, prior_type):
         # Move pipeline to GPU
         pipeline = self.priors_dict[prior_type]['pipeline']
         pipeline.to('cuda')
         input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, input_b,
                                                                        self.image_encoder,
                                                                        pipeline.prior.clip_mean.detach(),
@@ -131,14 +134,17 @@ class PopsPipelines:
         # Move pipeline to CPU
         pipeline.to('cpu')
         return img_emb
     def run_instruct(self, input_a, text):
         text_encodings = self.process_text(text)
         # Move pipeline to GPU
         instruct_pipeline = self.priors_dict['instruct']['pipeline']
         instruct_pipeline.to('cuda')
         input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, None,
                                                            self.image_encoder,
                                                            instruct_pipeline.prior.clip_mean.detach(), instruct_pipeline.prior.clip_std.detach(),
@@ -155,13 +161,15 @@ class PopsPipelines:
         # Move pipeline to CPU
         instruct_pipeline.to('cpu')
         return img_emb
     def render(self, img_emb):
         images = self.decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=img_emb.negative_image_embeds,
                          num_inference_steps=50, height=512,
                          width=512, guidance_scale=4).images
         return images[0]
     def run_instruct_texture(self, image_object_path, text_instruct, image_texture_path):

 prior_scene_repo: str = 'models/scene/learned_prior.pth'
 prior_repo = "pOpsPaper/operators"
+# gpu = torch.device('cuda')
+# cpu = torch.device('cpu')
 class PopsPipelines:
     def __init__(self):
         weight_dtype = torch.float16
         self.weight_dtype = weight_dtype
+        device = 'cpu' #torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = 'cuda' #device
         self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(kandinsky_prior_repo,
                                                                       subfolder='image_encoder',
                                                                       torch_dtype=weight_dtype).eval()
         return image
     def process_text(self, text):
+        self.text_encoder.to('cuda')
         text_inputs = self.tokenizer(
             text,
             padding="max_length",
         text_encoder_output = self.text_encoder(text_inputs.input_ids.to(self.device))
         text_encoder_hidden_states = text_encoder_output.last_hidden_state
         text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()]
+        self.text_encoder.to('cpu')
         return text_encoder_concat
     def run_binary(self, input_a, input_b, prior_type):
         # Move pipeline to GPU
         pipeline = self.priors_dict[prior_type]['pipeline']
         pipeline.to('cuda')
+        self.image_encoder.to('cuda')
         input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, input_b,
                                                                        self.image_encoder,
                                                                        pipeline.prior.clip_mean.detach(),
         # Move pipeline to CPU
         pipeline.to('cpu')
+        self.image_encoder.to('cpu')
         return img_emb
     def run_instruct(self, input_a, text):
         text_encodings = self.process_text(text)
         # Move pipeline to GPU
         instruct_pipeline = self.priors_dict['instruct']['pipeline']
         instruct_pipeline.to('cuda')
+        self.image_encoder.to('cuda')
         input_image_embeds, input_hidden_state = pops_utils.preprocess(input_a, None,
                                                            self.image_encoder,
                                                            instruct_pipeline.prior.clip_mean.detach(), instruct_pipeline.prior.clip_std.detach(),
         # Move pipeline to CPU
         instruct_pipeline.to('cpu')
+        self.image_encoder.to('cpu')
         return img_emb
     def render(self, img_emb):
+        self.decoder.to('cuda')
         images = self.decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=img_emb.negative_image_embeds,
                          num_inference_steps=50, height=512,
                          width=512, guidance_scale=4).images
+        self.decoder.to('cpu')
         return images[0]
     def run_instruct_texture(self, image_object_path, text_instruct, image_texture_path):