FLUX.1-dev-base

Running on Zero

App Files Files Community

cbensimon HF Staff commited on Jul 1

Commit

9544e60

1 Parent(s): 4d0c9f3

Begin actual demo

Browse files

Files changed (1) hide show

app.py +54 -49

app.py CHANGED Viewed

@@ -15,75 +15,80 @@ import gradio as gr
 import spaces
 import torch
 import torch._inductor
 from torch._inductor.package import package_aoti
 from torch.export.pt2_archive._package import AOTICompiledModel
 from torch.export.pt2_archive._package_weights import Weights
-from torchvision.models import ResNet18_Weights, resnet18
-model = resnet18(weights=ResNet18_Weights.DEFAULT)
-model.eval()
-model.to('cuda')
-package_path = os.path.join(os.getcwd(), 'resnet18.pt2')
-inductor_configs = {'max_autotune': True}
-example_inputs = (torch.randn(2, 3, 224, 224, device='cuda'),)
 @spaces.GPU
-def compile_model():
-    with torch.inference_mode():
-        exported_program = torch.export.export(
-            model,
-            example_inputs,
-        )
-        artifacts = torch._inductor.aot_compile(exported_program.module(), *exported_program.example_inputs, options={
-            'aot_inductor.package_constants_in_so': False,
-            'aot_inductor.package_constants_on_disk': True,
-            'aot_inductor.package': True,
-            'max_autotune': True,
-        })
     files = [file for file in artifacts if isinstance(file, str)]
     package_aoti(package_path, files)
     weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
     weights_: dict[str, torch.Tensor] = {}
     for name in weights:
         tensor, _properties = weights.get_weight(name)
         tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
         weights_[name] = tensor_.copy_(tensor).detach().share_memory_()
     return weights_
-weights = compile_model()
 weights = {name: tensor.to('cuda') for name, tensor in weights.items()}
-del model
-compiled_model: AOTICompiledModel | None = None
 @spaces.GPU
-def run_model():
-    # TODO: compiled model loading should actually go in worker init ...args: (path, weights)
-    # Something like: @spaces.GPU(aoti_load=(package_path, weights))
-    # It will probably solve the Driver runtime error when idle-reusing
-    # And avoids manually handling state with global
-    # Ou autrement :
-    #    pipeline.transformer = ZeroGPUCompiledModel(pt2_path, weights)
-    # Puis les instances de ZeroGPUCompiledModel sont chargées automatiquement pendant le worker init
-    # C'est encore mieux ça (je crois que c'était l'idée que j'avais de base)
-    # Une inferface encore plus high-level ce serait :
-    #    pipeline.transformer = ZeroGPUCompile(pipeline.transformer, kwargs=example_kwargs)
-    # Et la compilation avec @spaces.GPU, le packaging, les wiehgts séparées, etc.
-    # Tout ça serait géré automatiquement
-    # Bon mais faut laisser plusieurs niveaux d'abstraction je pense
-    # Et peut-être commencer par le low-level (voire pas d'helper du tout et tout en mode manuel mais pour le moment j'ai une driver context runtime error)
-    # Je vais quand-même pouvoir trouver un niveau d'abstraction idéal
-    global compiled_model
-    if compiled_model is None:
-        compiled_model = torch._inductor.aoti_load_package(package_path)
-        compiled_model.load_constants(weights, check_full_update=True, user_managed=True)
-    with torch.inference_mode():
-        compiled_model(example_inputs)
-    with torch.inference_mode():
-        return str(compiled_model(example_inputs))
-gr.Interface(run_model, [], 'text').launch(show_error=True)

 import spaces
 import torch
 import torch._inductor
+from diffusers import FluxPipeline
 from torch._inductor.package import package_aoti
 from torch.export.pt2_archive._package import AOTICompiledModel
 from torch.export.pt2_archive._package_weights import Weights
+pipeline = FluxPipeline.from_pretrained('black-forest-labs/FLUX.1-schnell', torch_dtype=torch.bfloat16).to('cuda')
+package_path = 'pipeline.pt2'
 @spaces.GPU
+def compile_transformer():
+    def _example_tensor(*shape):
+        return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
+    is_timestep_distilled = not pipeline.transformer.config.guidance_embeds
+    seq_length = 256 if is_timestep_distilled else 512
+    transformer_kwargs = {
+        'hidden_states': _example_tensor(1, 4096, 64),
+        'timestep': torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
+        'guidance': None if is_timestep_distilled else torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
+        'pooled_projections': _example_tensor(1, 768),
+        'encoder_hidden_states': _example_tensor(1, seq_length, 4096),
+        'txt_ids': _example_tensor(seq_length, 3),
+        'img_ids': _example_tensor(4096, 3),
+        'joint_attention_kwargs': {},
+        'return_dict': False,
+    }
+    inductor_configs = {
+        'conv_1x1_as_mm': True,
+        'epilogue_fusion': False,
+        'coordinate_descent_tuning': True,
+        'coordinate_descent_check_all_directions': True,
+        'max_autotune': True,
+        'triton.cudagraphs': True,
+    }
+    exported = torch.export.export(pipeline.transformer, args=(), kwargs=transformer_kwargs)
+    artifacts = torch._inductor.aot_compile(exported.module(), *exported.example_inputs, options=inductor_configs | {
+        'aot_inductor.package_constants_in_so': False,
+        'aot_inductor.package_constants_on_disk': True,
+        'aot_inductor.package': True,
+    })
     files = [file for file in artifacts if isinstance(file, str)]
     package_aoti(package_path, files)
     weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
     weights_: dict[str, torch.Tensor] = {}
     for name in weights:
         tensor, _properties = weights.get_weight(name)
         tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
         weights_[name] = tensor_.copy_(tensor).detach().share_memory_()
     return weights_
+weights = compile_transformer()
 weights = {name: tensor.to('cuda') for name, tensor in weights.items()}
+pipeline.transformer = None
 @spaces.GPU
+def generate_image(prompt: str):
+    compiled_transformer: AOTICompiledModel = torch._inductor.aoti_load_package(package_path)
+    compiled_transformer.load_constants(weights, check_full_update=True, user_managed=True)
+    pipeline.transformer = compiled_transformer
+    return pipeline(prompt, num_inference_steps=4).images[0]
+gr.Interface(generate_image, 'text', 'image').launch(show_error=True)