ragesh-stable-diffusion-3.5-large

Runtime error

App Files Files Community

RageshAntony commited on Dec 29, 2024

Commit

a8c6b1a

verified ·

1 Parent(s): 02919e4

added deep cleanup

Browse files

Files changed (1) hide show

app.py +133 -28

app.py CHANGED Viewed

@@ -8,6 +8,13 @@ from diffusers import (
     LuminaText2ImgPipeline
 )
 import spaces
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
@@ -47,18 +54,85 @@ MODEL_CONFIGS = {
     }
 }
-# Initialize model pipelines
 pipes = {}
 def load_pipeline(model_name):
     config = MODEL_CONFIGS[model_name]
     pipe = config["pipeline_class"].from_pretrained(
         config["repo_id"],
         torch_dtype=TORCH_DTYPE
     )
     pipe = pipe.to(DEVICE)
     if hasattr(pipe, 'enable_model_cpu_offload'):
         pipe.enable_model_cpu_offload()
     return pipe
 @spaces.GPU(duration=180)
@@ -74,33 +148,48 @@ def generate_image(
     num_inference_steps=40,
     progress=gr.Progress(track_tqdm=True)
 ):
-    progress(0, desc=f"Loading {model_name} model...")
-    # Load model if not already loaded
-    if model_name not in pipes:
-        pipes[model_name] = load_pipeline(model_name)
-    pipe = pipes[model_name]
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(DEVICE).manual_seed(seed)
-    progress(0.3, desc=f"Generating image with {model_name}...")
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    progress(1.0, desc=f"Generation complete with {model_name}")
-    return image, seed
 # Gradio Interface
 css = """
@@ -173,6 +262,9 @@ with gr.Blocks(css=css) as demo:
                     value=40,
                 )
         # Create tabs for each model
         with gr.Tabs() as tabs:
             results = {}
@@ -188,6 +280,14 @@ with gr.Blocks(css=css) as demo:
         ]
         gr.Examples(examples=examples, inputs=[prompt])
     # Handle generation for each model
     def generate_all(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress()):
         outputs = []
@@ -199,9 +299,14 @@ with gr.Blocks(css=css) as demo:
                     num_inference_steps, progress
                 )
                 outputs.extend([image, used_seed])
             except Exception as e:
                 outputs.extend([None, None])
                 print(f"Error generating with {model_name}: {str(e)}")
         return outputs
     # Set up the generation trigger

     LuminaText2ImgPipeline
 )
 import spaces
+import gc
+import os
+import psutil
+import threading
+from pathlib import Path
+import shutil
+import time
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
     }
 }
+# Dictionary to store model pipelines
 pipes = {}
+model_locks = {model_name: threading.Lock() for model_name in MODEL_CONFIGS.keys()}
+def get_process_memory():
+    """Get memory usage of current process in GB"""
+    process = psutil.Process(os.getpid())
+    return process.memory_info().rss / 1024 / 1024 / 1024
+def clear_torch_cache():
+    """Clear PyTorch's CUDA cache"""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+def remove_cache_dir(model_name):
+    """Remove the model's cache directory"""
+    cache_dir = Path.home() / '.cache' / 'huggingface' / 'diffusers' / MODEL_CONFIGS[model_name]['repo_id'].replace('/', '--')
+    if cache_dir.exists():
+        shutil.rmtree(cache_dir, ignore_errors=True)
+def deep_cleanup(model_name, pipe):
+    """Perform deep cleanup of model resources"""
+    try:
+        # 1. Move model to CPU first (helps prevent CUDA memory fragmentation)
+        if hasattr(pipe, 'to'):
+            pipe.to('cpu')
+        # 2. Delete all model components explicitly
+        for attr_name in list(pipe.__dict__.keys()):
+            if hasattr(pipe, attr_name):
+                delattr(pipe, attr_name)
+        # 3. Remove from pipes dictionary
+        if model_name in pipes:
+            del pipes[model_name]
+        # 4. Clear CUDA cache
+        clear_torch_cache()
+        # 5. Run garbage collection multiple times
+        for _ in range(3):
+            gc.collect()
+        # 6. Remove cached files
+        remove_cache_dir(model_name)
+        # 7. Additional CUDA cleanup if available
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+        # 8. Wait a small amount of time to ensure cleanup
+        time.sleep(1)
+    except Exception as e:
+        print(f"Error during cleanup of {model_name}: {str(e)}")
+    finally:
+        # Final garbage collection
+        gc.collect()
+        clear_torch_cache()
 def load_pipeline(model_name):
+    """Load model pipeline with memory tracking"""
+    initial_memory = get_process_memory()
     config = MODEL_CONFIGS[model_name]
     pipe = config["pipeline_class"].from_pretrained(
         config["repo_id"],
         torch_dtype=TORCH_DTYPE
     )
     pipe = pipe.to(DEVICE)
     if hasattr(pipe, 'enable_model_cpu_offload'):
         pipe.enable_model_cpu_offload()
+    final_memory = get_process_memory()
+    print(f"Memory used by {model_name}: {final_memory - initial_memory:.2f} GB")
     return pipe
 @spaces.GPU(duration=180)
     num_inference_steps=40,
     progress=gr.Progress(track_tqdm=True)
 ):
+    with model_locks[model_name]:
+        try:
+            progress(0, desc=f"Loading {model_name} model...")
+            # Load model if not already loaded
+            if model_name not in pipes:
+                pipes[model_name] = load_pipeline(model_name)
+            pipe = pipes[model_name]
+            if randomize_seed:
+                seed = random.randint(0, MAX_SEED)
+            generator = torch.Generator(DEVICE).manual_seed(seed)
+            progress(0.3, desc=f"Generating image with {model_name}...")
+            # Generate image
+            image = pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps,
+                width=width,
+                height=height,
+                generator=generator,
+            ).images[0]
+            progress(0.9, desc=f"Cleaning up {model_name} resources...")
+            # Cleanup after generation
+            deep_cleanup(model_name, pipe)
+            progress(1.0, desc=f"Generation complete with {model_name}")
+            return image, seed
+        except Exception as e:
+            print(f"Error with {model_name}: {str(e)}")
+            # Ensure cleanup happens even if generation fails
+            if model_name in pipes:
+                deep_cleanup(model_name, pipes[model_name])
+            raise e
 # Gradio Interface
 css = """
                     value=40,
                 )
+        # Memory usage indicator
+        memory_indicator = gr.Markdown("Current memory usage: 0 GB")
         # Create tabs for each model
         with gr.Tabs() as tabs:
             results = {}
         ]
         gr.Examples(examples=examples, inputs=[prompt])
+    def update_memory_usage():
+        """Update memory usage display"""
+        memory_gb = get_process_memory()
+        if torch.cuda.is_available():
+            cuda_memory_gb = torch.cuda.memory_allocated() / 1024 / 1024 / 1024
+            return f"Current memory usage: System RAM: {memory_gb:.2f} GB, CUDA: {cuda_memory_gb:.2f} GB"
+        return f"Current memory usage: System RAM: {memory_gb:.2f} GB"
     # Handle generation for each model
     def generate_all(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress()):
         outputs = []
                     num_inference_steps, progress
                 )
                 outputs.extend([image, used_seed])
+                # Update memory usage after each model
+                memory_indicator.update(update_memory_usage())
             except Exception as e:
                 outputs.extend([None, None])
                 print(f"Error generating with {model_name}: {str(e)}")
         return outputs
     # Set up the generation trigger