Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -87,10 +87,10 @@ os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
|
|
87 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
88 |
|
89 |
def load_and_prepare_model():
|
90 |
-
vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False)
|
91 |
#sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
|
92 |
-
sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
|
93 |
-
|
94 |
#sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
|
95 |
pipe = StableDiffusionXLPipeline.from_pretrained(
|
96 |
'ford442/RealVisXL_V5.0_BF16',
|
@@ -99,29 +99,25 @@ def load_and_prepare_model():
|
|
99 |
# low_cpu_mem_usage = False,
|
100 |
add_watermarker=False,
|
101 |
)
|
102 |
-
pipe.vae = vaeXL
|
103 |
pipe.scheduler = sched
|
104 |
#pipe.vae.do_resize=False
|
105 |
#pipe.vae.vae_scale_factor=8
|
106 |
#pipe.to(device=device, dtype=torch.bfloat16)
|
107 |
pipe.to(device)
|
108 |
-
|
109 |
-
#Make sure the unet is contiguous
|
110 |
-
pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
|
111 |
-
|
112 |
pipe.to(torch.bfloat16)
|
113 |
pipe.vae.set_default_attn_processor()
|
114 |
print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
|
115 |
pipe.watermark=None
|
116 |
-
pipe.safety_checker=None
|
|
|
|
|
|
|
117 |
return pipe
|
118 |
|
119 |
-
# Preload and compile both models
|
120 |
-
pipe = load_and_prepare_model()
|
121 |
-
|
122 |
-
|
123 |
# for compile
|
124 |
hidet.option.parallel_build(True)
|
|
|
125 |
torch._dynamo.config.suppress_errors = True
|
126 |
torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
|
127 |
# more search
|
@@ -134,9 +130,9 @@ hidet.torch.dynamo_config.use_fp16(True)
|
|
134 |
hidet.torch.dynamo_config.use_fp16_reduction(True)
|
135 |
# use tensorcore
|
136 |
hidet.torch.dynamo_config.use_tensor_core()
|
137 |
-
|
138 |
-
|
139 |
|
|
|
140 |
|
141 |
MAX_SEED = np.iinfo(np.int64).max
|
142 |
|
|
|
87 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
88 |
|
89 |
def load_and_prepare_model():
|
90 |
+
vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
|
91 |
#sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
|
92 |
+
#sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
|
93 |
+
sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1) #,use_karras_sigmas=True)
|
94 |
#sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
|
95 |
pipe = StableDiffusionXLPipeline.from_pretrained(
|
96 |
'ford442/RealVisXL_V5.0_BF16',
|
|
|
99 |
# low_cpu_mem_usage = False,
|
100 |
add_watermarker=False,
|
101 |
)
|
102 |
+
pipe.vae = vaeXL.to(torch.bfloat16)
|
103 |
pipe.scheduler = sched
|
104 |
#pipe.vae.do_resize=False
|
105 |
#pipe.vae.vae_scale_factor=8
|
106 |
#pipe.to(device=device, dtype=torch.bfloat16)
|
107 |
pipe.to(device)
|
|
|
|
|
|
|
|
|
108 |
pipe.to(torch.bfloat16)
|
109 |
pipe.vae.set_default_attn_processor()
|
110 |
print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
|
111 |
pipe.watermark=None
|
112 |
+
pipe.safety_checker=None
|
113 |
+
pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
|
114 |
+
pipe.unet = torch.compile(pipe.unet, backend="hidet")
|
115 |
+
|
116 |
return pipe
|
117 |
|
|
|
|
|
|
|
|
|
118 |
# for compile
|
119 |
hidet.option.parallel_build(True)
|
120 |
+
hidet.option.parallel_tune(-1,16.0)
|
121 |
torch._dynamo.config.suppress_errors = True
|
122 |
torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
|
123 |
# more search
|
|
|
130 |
hidet.torch.dynamo_config.use_fp16_reduction(True)
|
131 |
# use tensorcore
|
132 |
hidet.torch.dynamo_config.use_tensor_core()
|
133 |
+
# Preload and compile both models
|
|
|
134 |
|
135 |
+
pipe = load_and_prepare_model()
|
136 |
|
137 |
MAX_SEED = np.iinfo(np.int64).max
|
138 |
|