RealVis_v5.0_BF16_G

Running on Zero

App Files Files Community

ford442 commited on Jan 11

Commit

c3b6a89

verified ·

1 Parent(s): 54370eb

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -15

app.py CHANGED Viewed

@@ -87,10 +87,10 @@ os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 def load_and_prepare_model():
-    vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False).to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
     #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
-    sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
     #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
     pipe = StableDiffusionXLPipeline.from_pretrained(
         'ford442/RealVisXL_V5.0_BF16',
@@ -99,29 +99,25 @@ def load_and_prepare_model():
        # low_cpu_mem_usage = False,
         add_watermarker=False,
     )
-    pipe.vae = vaeXL #.to(torch.bfloat16)
     pipe.scheduler = sched
     #pipe.vae.do_resize=False
     #pipe.vae.vae_scale_factor=8
     #pipe.to(device=device, dtype=torch.bfloat16)
     pipe.to(device)
-    #Make sure the unet is contiguous
-    pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
     pipe.to(torch.bfloat16)
     pipe.vae.set_default_attn_processor()
     print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
     pipe.watermark=None
-    pipe.safety_checker=None
     return pipe
-# Preload and compile both models
-pipe = load_and_prepare_model()
           # for compile
 hidet.option.parallel_build(True)
 torch._dynamo.config.suppress_errors = True
 torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
 # more search
@@ -134,9 +130,9 @@ hidet.torch.dynamo_config.use_fp16(True)
 hidet.torch.dynamo_config.use_fp16_reduction(True)
 # use tensorcore
 hidet.torch.dynamo_config.use_tensor_core()
-pipe.unet = torch.compile(pipe.unet, backend="hidet")
 MAX_SEED = np.iinfo(np.int64).max

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 def load_and_prepare_model():
+    vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
     #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
+    #sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
+    sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1) #,use_karras_sigmas=True)
     #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
     pipe = StableDiffusionXLPipeline.from_pretrained(
         'ford442/RealVisXL_V5.0_BF16',
        # low_cpu_mem_usage = False,
         add_watermarker=False,
     )
+    pipe.vae = vaeXL.to(torch.bfloat16)
     pipe.scheduler = sched
     #pipe.vae.do_resize=False
     #pipe.vae.vae_scale_factor=8
     #pipe.to(device=device, dtype=torch.bfloat16)
     pipe.to(device)
     pipe.to(torch.bfloat16)
     pipe.vae.set_default_attn_processor()
     print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
     pipe.watermark=None
+    pipe.safety_checker=None
+    pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
+    pipe.unet = torch.compile(pipe.unet, backend="hidet")
     return pipe
           # for compile
 hidet.option.parallel_build(True)
+hidet.option.parallel_tune(-1,16.0)
 torch._dynamo.config.suppress_errors = True
 torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
 # more search
 hidet.torch.dynamo_config.use_fp16_reduction(True)
 # use tensorcore
 hidet.torch.dynamo_config.use_tensor_core()
+# Preload and compile both models
+pipe = load_and_prepare_model()
 MAX_SEED = np.iinfo(np.int64).max