Spaces:

cheeseman182
/

Generative_Suite

Running

App Files Files Community

cheeseman182 commited on Jul 23

Commit

de3c817

verified ·

1 Parent(s): df92597

Update media.py

Browse files

Files changed (1) hide show

media.py +121 -71

media.py CHANGED Viewed

@@ -1,111 +1,159 @@
 # --- LIBRARIES ---
 import torch
 import gradio as gr
 import random
 import time
-from diffusers import AutoPipelineForText2Image, TextToVideoSDPipeline
 import gc
 import os
 import imageio
-# --- DYNAMIC HARDWARE DETECTION (THE FIX) ---
-# Check if a CUDA-enabled GPU is available, otherwise use the CPU
-if torch.cuda.is_available():
-    device = "cuda"
-    torch_dtype = torch.float16 # Use float16 for GPU
-    print("✅ GPU detected. Using CUDA.")
-else:
-    device = "cpu"
-    torch_dtype = torch.float32 # Use float32 for CPU
-    print("⚠️ No GPU detected. Using CPU. Performance will be slower.")
-# --- AUTHENTICATION FOR HUGGING FACE SPACES ---
-try:
-    from huggingface_hub import login
-    HF_TOKEN = os.environ.get('HF_TOKEN')
-    if HF_TOKEN:
         login(token=HF_TOKEN)
         print("✅ Hugging Face Authentication successful.")
-    else:
-        print("⚠️ Hugging Face token not found in Space Secrets. Gated models may not be available.")
-except ImportError:
-    print("Could not import huggingface_hub. Please ensure it's in requirements.txt")
 # --- CONFIGURATION & STATE ---
 available_models = {
     "Fast Image (SDXL Turbo)": "stabilityai/sdxl-turbo",
     "Quality Image (SDXL)": "stabilityai/stable-diffusion-xl-base-1.0",
     "Video (Damo-Vilab)": "damo-vilab/text-to-video-ms-1.7b"
 }
 model_state = { "current_pipe": None, "loaded_model_name": None }
-# --- CORE GENERATION FUNCTION ---
-def generate_media(model_key, prompt, negative_prompt, steps, cfg_scale, width, height, seed, num_frames):
     if model_state.get("loaded_model_name") != model_key:
-        print(f"Switching to {model_key}. Unloading previous model...")
-        yield {status_textbox: f"Unloading previous model..."}
         if model_state.get("current_pipe"):
-            del model_state["current_pipe"]
-            gc.collect()
-            if device == "cuda":
-                torch.cuda.empty_cache()
         model_id = available_models[model_key]
-        print(f"Loading {model_id}...")
-        yield {status_textbox: f"Loading {model_id}... This can take a minute."}
-        # Adapt model loading based on hardware
-        if "Image" in model_key:
-            pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch_dtype, variant="fp16" if device == "cuda" else "fp32")
-        elif "Video" in model_key:
             pipe = TextToVideoSDPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
-        # Move pipe to the detected device
         pipe.to(device)
-        # CPU offloading only makes sense on a GPU setup
-        if device == "cuda" and "Turbo" not in model_key and "Video" not in model_key:
-            pipe.enable_model_cpu_offload()
         model_state["current_pipe"] = pipe
         model_state["loaded_model_name"] = model_key
-        print(f"✅ Model loaded successfully on {device.upper()}.")
     pipe = model_state["current_pipe"]
     generator = torch.Generator(device).manual_seed(seed)
-    yield {status_textbox: f"Generating with {model_key} on {device.upper()}..."}
-    if "Image" in model_key:
-        print("Generating image...")
-        if "Turbo" in model_key:
-            num_steps, guidance_scale = 1, 0.0
-        else:
-            num_steps, guidance_scale = int(steps), float(cfg_scale)
-        image = pipe(
-            prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_steps,
-            guidance_scale=guidance_scale, width=int(width), height=int(height), generator=generator
-        ).images[0]
-        print("✅ Image generation complete.")
-        yield {output_image: image, output_video: None, status_textbox: f"Seed used: {seed}"}
-    elif "Video" in model_key:
-        print("Generating video...")
         video_frames = pipe(prompt=prompt, num_inference_steps=int(steps), height=320, width=576, num_frames=int(num_frames), generator=generator).frames
-        video_path = f"/tmp/video_{seed}.mp4"
-        imageio.mimsave(video_path, video_frames, fps=12)
-        print(f"✅ Video saved to {video_path}")
-        yield {output_image: None, output_video: video_path, status_textbox: f"Seed used: {seed}"}
-# --- GRADIO USER INTERFACE (No changes needed here) ---
 with gr.Blocks(theme='gradio/soft') as demo:
     gr.Markdown("# The Generative Media Suite")
-    # ... (rest of the UI code is identical to before)
-    gr.Markdown("Create fast images, high-quality images, or short videos. Created by cheeseman182.")
     seed_state = gr.State(-1)
     with gr.Row():
         with gr.Column(scale=2):
@@ -125,6 +173,7 @@ with gr.Blocks(theme='gradio/soft') as demo:
             output_image = gr.Image(label="Image Result", interactive=False, height="60vh", visible=True)
             output_video = gr.Video(label="Video Result", interactive=False, height="60vh", visible=False)
             status_textbox = gr.Textbox(label="Status", interactive=False)
     def update_ui_on_model_change(model_key):
         is_video = "Video" in model_key
         is_turbo = "Turbo" in model_key
@@ -138,13 +187,14 @@ with gr.Blocks(theme='gradio/soft') as demo:
             output_video: gr.update(visible=is_video)
         }
     model_selector.change(update_ui_on_model_change, model_selector, [steps_slider, cfg_slider, width_slider, height_slider, num_frames_slider, output_image, output_video])
     click_event = generate_button.click(
         fn=lambda s: (s if s != -1 else random.randint(0, 2**32 - 1)),
         inputs=seed_input,
         outputs=seed_state,
         queue=False
     ).then(
-        fn=generate_media,
         inputs=[model_selector, prompt_input, negative_prompt_input, steps_slider, cfg_slider, width_slider, height_slider, seed_state, num_frames_slider],
         outputs=[output_image, output_video, status_textbox]
     )

+# --- START OF FILE media.py (FINAL WITH LIVE PROGRESS) ---
 # --- LIBRARIES ---
 import torch
 import gradio as gr
 import random
 import time
+from diffusers import AutoPipelineForText2Image, TextToVideoSDPipeline, EulerAncestralDiscreteScheduler
 import gc
 import os
 import imageio
+import numpy as np
+import threading
+from queue import Queue, Empty as QueueEmpty
+from PIL import Image
+# --- SECURE AUTHENTICATION FOR HUGGING FACE SPACES ---
+import os
+from huggingface_hub import login
+# This code will attempt to read the HF_TOKEN from the Space's secrets.
+# On your local machine, this will do nothing unless you set it up, which isn't necessary.
+# On the Hugging Face server, it will find the secret you just saved.
+HF_TOKEN = os.environ.get('HF_TOKEN')
+if HF_TOKEN:
+    print("✅ Found HF_TOKEN secret. Logging in...")
+    try:
         login(token=HF_TOKEN)
         print("✅ Hugging Face Authentication successful.")
+    except Exception as e:
+        print(f"❌ Hugging Face login failed: {e}")
+else:
+    print("⚠️ No HF_TOKEN secret found. Gated models may not be available on the deployed app.")
 # --- CONFIGURATION & STATE ---
 available_models = {
     "Fast Image (SDXL Turbo)": "stabilityai/sdxl-turbo",
     "Quality Image (SDXL)": "stabilityai/stable-diffusion-xl-base-1.0",
+    "Photorealism (Juggernaut)": "RunDiffusion/Juggernaut-XL-v9",
     "Video (Damo-Vilab)": "damo-vilab/text-to-video-ms-1.7b"
 }
 model_state = { "current_pipe": None, "loaded_model_name": None }
+# --- THE FINAL GENERATION FUNCTION WITH LIVE PROGRESS ---
+def generate_media_live_progress(model_key, prompt, negative_prompt, steps, cfg_scale, width, height, seed, num_frames):
+    # --- Model Loading (Unchanged) ---
     if model_state.get("loaded_model_name") != model_key:
+        yield {output_image: None, output_video: None, status_textbox: f"Loading {model_key}..."}
         if model_state.get("current_pipe"):
+            del model_state["current_pipe"]; gc.collect(); torch.cuda.empty_cache()
         model_id = available_models[model_key]
+        if "Video" in model_key:
             pipe = TextToVideoSDPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
+        else:
+            pipe = AutoPipelineForText2Image.from_pretrained(model_id, torch_dtype=torch_dtype, variant="fp16")
+        pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
         pipe.to(device)
+        if device == "cuda":
+             if "Video" not in model_key: pipe.enable_model_cpu_offload()
+             pipe.enable_vae_slicing()
         model_state["current_pipe"] = pipe
         model_state["loaded_model_name"] = model_key
+        print(f"✅ Model loaded on {device.upper()}.")
     pipe = model_state["current_pipe"]
     generator = torch.Generator(device).manual_seed(seed)
+    # --- Generation Logic ---
+    if "Video" in model_key:
+        # For video, we'll keep the simple status updates for now
+        yield {output_image: None, output_video: None, status_textbox: "Generating video..."}
         video_frames = pipe(prompt=prompt, num_inference_steps=int(steps), height=320, width=576, num_frames=int(num_frames), generator=generator).frames
+        video_frames_5d = np.array(video_frames)
+        video_frames_4d = np.squeeze(video_frames_5d)
+        video_uint8 = (video_frames_4d * 255).astype(np.uint8)
+        list_of_frames = [frame for frame in video_uint8]
+        video_path = f"video_{seed}.mp4"
+        imageio.mimsave(video_path, list_of_frames, fps=12)
+        yield {output_image: None, output_video: video_path, status_textbox: f"Video saved! Seed: {seed}"}
+    else: # Image Generation with Live Progress
+        progress_queue = Queue()
+        def run_pipe():
+            # This function runs in a separate thread
+            start_time = time.time()
+            def progress_callback(pipe, step, timestep, callback_kwargs):
+                # This is called by the pipeline at each step
+                elapsed_time = time.time() - start_time
+                # Avoid division by zero on the first step
+                if elapsed_time > 0:
+                    its_per_sec = (step + 1) / elapsed_time
+                    progress_queue.put((step + 1, its_per_sec))
+                return callback_kwargs
+            try:
+                # The final image is still generated using the pipeline's high-quality VAE
+                final_image = pipe(
+                    prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=int(steps),
+                    guidance_scale=float(cfg_scale), width=int(width), height=int(height),
+                    generator=generator,
+                    callback_on_step_end=progress_callback
+                ).images[0]
+                progress_queue.put(final_image) # Put the final result on the queue
+            except Exception as e:
+                print(f"An error occurred in the generation thread: {e}")
+                progress_queue.put(None) # Signal an error
+        # Start the generation in the background
+        thread = threading.Thread(target=run_pipe)
+        thread.start()
+        # In the main thread, listen for updates from the queue and yield to Gradio
+        total_steps = int(steps)
+        yield {status_textbox: "Generating..."} # Initial status
+        while True:
+            try:
+                update = progress_queue.get(timeout=1.0) # Wait for an update
+                if isinstance(update, Image.Image): # It's the final image
+                    yield {output_image: update, status_textbox: f"Generation complete! Seed: {seed}"}
+                    break
+                elif isinstance(update, tuple): # It's a progress update (step, speed)
+                    current_step, its_per_sec = update
+                    progress_percent = (current_step / total_steps) * 100
+                    steps_remaining = total_steps - current_step
+                    eta_seconds = steps_remaining / its_per_sec if its_per_sec > 0 else 0
+                    eta_minutes, eta_seconds_rem = divmod(int(eta_seconds), 60)
+                    status_text = (
+                        f"Generating... {progress_percent:.0f}% ({current_step}/{total_steps}) | "
+                        f"{its_per_sec:.2f}it/s | "
+                        f"ETA: {eta_minutes:02d}:{eta_seconds_rem:02d}"
+                    )
+                    yield {status_textbox: status_text}
+                elif update is None: # An error occurred
+                     yield {status_textbox: "Error during generation. Check console."}
+                     break
+            except QueueEmpty:
+                if not thread.is_alive():
+                    print("⚠️ Generation thread finished unexpectedly.")
+                    yield {status_textbox: "Generation failed. Check console for details."}
+                    break
+        thread.join()
+# --- GRADIO UI ---
 with gr.Blocks(theme='gradio/soft') as demo:
+    # (UI layout is the same, just point to the new function)
     gr.Markdown("# The Generative Media Suite")
+    gr.Markdown("Create fast images, high-quality images, or short videos. Created by cheeseman182. (note: the speed on the status bar is wrong)")
     seed_state = gr.State(-1)
     with gr.Row():
         with gr.Column(scale=2):
             output_image = gr.Image(label="Image Result", interactive=False, height="60vh", visible=True)
             output_video = gr.Video(label="Video Result", interactive=False, height="60vh", visible=False)
             status_textbox = gr.Textbox(label="Status", interactive=False)
     def update_ui_on_model_change(model_key):
         is_video = "Video" in model_key
         is_turbo = "Turbo" in model_key
             output_video: gr.update(visible=is_video)
         }
     model_selector.change(update_ui_on_model_change, model_selector, [steps_slider, cfg_slider, width_slider, height_slider, num_frames_slider, output_image, output_video])
     click_event = generate_button.click(
         fn=lambda s: (s if s != -1 else random.randint(0, 2**32 - 1)),
         inputs=seed_input,
         outputs=seed_state,
         queue=False
     ).then(
+        fn=generate_media_live_progress, # Use the new function with progress
         inputs=[model_selector, prompt_input, negative_prompt_input, steps_slider, cfg_slider, width_slider, height_slider, seed_state, num_frames_slider],
         outputs=[output_image, output_video, status_textbox]
     )