see-2-sound

Runtime error

App Files Files Community

jadechoghari commited on Oct 12, 2024

Commit

7e40a31

verified ·

1 Parent(s): 2879448

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -22

app.py CHANGED Viewed

@@ -1,55 +1,153 @@
 from typing import Tuple, Union
 import gradio as gr
-import os
-from PIL import Image
 import spaces
-CACHE_DIR = "gradio_cached_examples"
 def load_cached_example_outputs(example_index: int) -> Tuple[str, str]:
     cached_dir = os.path.join(CACHE_DIR, str(example_index))  # Use the example index to find the directory
     cached_image_path = os.path.join(cached_dir, "processed_image.png")
     cached_audio_path = os.path.join(cached_dir, "audio.wav")
     if os.path.exists(cached_image_path) and os.path.exists(cached_audio_path):
         return cached_image_path, cached_audio_path
     else:
         raise FileNotFoundError(f"Cached outputs not found for example {example_index}")
-description_text = """# SEE-2-SOUND 🔊 Demo
 Official demo for *SEE-2-SOUND 🔊: Zero-Shot Spatial Environment-to-Spatial Sound*.
 """
 css = """
-h1 { text-align: center; }
 """
-@spaces.GPU
 with gr.Blocks(css=css) as demo:
     gr.Markdown(description_text)
     with gr.Row():
         with gr.Column():
-            image = gr.Image(label="Select an image", sources=["upload", "webcam"], type="filepath")
             with gr.Accordion("Advanced Settings", open=False):
-                steps = gr.Slider(label="Diffusion Steps", minimum=1, maximum=1000, step=1, value=500)
-                prompt = gr.Text(label="Prompt", max_lines=1, placeholder="Enter your prompt")
-                num_audios = gr.Slider(label="Number of Audios", minimum=1, maximum=10, step=1, value=3)
             submit_button = gr.Button("Submit")
         with gr.Column():
             processed_image = gr.Image(label="Processed Image")
-            generated_audio = gr.Audio(label="Generated Audio", show_download_button=True)
-    def on_example_click(*args, **kwargs):
-        return load_cached_example_outputs(1)  # Always load example 1 for now
     gr.Examples(
         examples=[["examples/1.png", 3, "A scenic mountain view", 500]],  # Example input
@@ -59,12 +157,13 @@ with gr.Blocks(css=css) as demo:
         fn=on_example_click  # Load the cached output when the example is clicked
     )
-    submit_button.click(
-        fn=on_example_click,
         inputs=[image, num_audios, prompt, steps],
-        outputs=[processed_image, generated_audio]
     )
 if __name__ == "__main__":
-    demo.launch()

+Hugging Face's logo
+Hugging Face
+Search models, datasets, users...
+Models
+Datasets
+Spaces
+Posts
+Docs
+Pricing
+Spaces:
+rishitdagli
+/
+see-2-sound
+like
+18
+App
+Files
+Community
+see-2-sound
+/
+app.py
+rishitdagli's picture
+rishitdagli
+Update app.py
+852e4aa
+verified
+about 14 hours ago
+raw
+Copy download link
+history
+blame
+contribute
+delete
+3.41 kB
 from typing import Tuple, Union
 import gradio as gr
+import numpy as np
+import see2sound
 import spaces
+import torch
+import yaml
+from huggingface_hub import snapshot_download
+model_id = "rishitdagli/see-2-sound"
+base_path = snapshot_download(repo_id=model_id)
+with open("config.yaml", "r") as file:
+    data = yaml.safe_load(file)
+data_str = yaml.dump(data)
+updated_data_str = data_str.replace("checkpoints", base_path)
+updated_data = yaml.safe_load(updated_data_str)
+with open("config.yaml", "w") as file:
+    yaml.safe_dump(updated_data, file)
+model = see2sound.See2Sound(config_path="config.yaml")
+model.setup()
+#for local cache
 def load_cached_example_outputs(example_index: int) -> Tuple[str, str]:
     cached_dir = os.path.join(CACHE_DIR, str(example_index))  # Use the example index to find the directory
     cached_image_path = os.path.join(cached_dir, "processed_image.png")
     cached_audio_path = os.path.join(cached_dir, "audio.wav")
+    # Ensure cached files exist
     if os.path.exists(cached_image_path) and os.path.exists(cached_audio_path):
         return cached_image_path, cached_audio_path
     else:
         raise FileNotFoundError(f"Cached outputs not found for example {example_index}")
+# Function to handle the example click, it now accepts arbitrary arguments
+def on_example_click(*args, **kwargs):
+    return load_cached_example_outputs(1)  # Always load example 1 for now
+@spaces.GPU(duration=280)
+@torch.no_grad()
+def process_image(
+    image: str, num_audios: int, prompt: Union[str, None], steps: Union[int, None]
+) -> Tuple[str, str]:
+    model.run(
+        path=image,
+        output_path="audio.wav",
+        num_audios=num_audios,
+        prompt=prompt,
+        steps=steps,
+    )
+    return image, "audio.wav"
+description_text = """# SEE-2-SOUND 🔊 Demo
 Official demo for *SEE-2-SOUND 🔊: Zero-Shot Spatial Environment-to-Spatial Sound*.
+Please refer to our [paper](https://arxiv.org/abs/2406.06612), [project page](https://see2sound.github.io/), or [github](https://github.com/see2sound/see2sound) for more details.
+> Note: You should make sure that your hardware supports spatial audio.
+This demo allows you to generate spatial audio given an image. Upload an image (with an optional text prompt in the advanced settings) to geenrate spatial audio to accompany the image.
 """
 css = """
+h1 {
+    text-align: center;
+}
 """
 with gr.Blocks(css=css) as demo:
     gr.Markdown(description_text)
     with gr.Row():
         with gr.Column():
+            image = gr.Image(
+                label="Select an image", sources=["upload", "webcam"], type="filepath"
+            )
             with gr.Accordion("Advanced Settings", open=False):
+                steps = gr.Slider(
+                    label="Diffusion Steps", minimum=1, maximum=1000, step=1, value=500
+                )
+                prompt = gr.Text(
+                    label="Prompt",
+                    show_label=True,
+                    max_lines=1,
+                    placeholder="Enter your prompt",
+                    container=True,
+                )
+                num_audios = gr.Slider(
+                    label="Number of Audios", minimum=1, maximum=10, step=1, value=3
+                )
             submit_button = gr.Button("Submit")
         with gr.Column():
             processed_image = gr.Image(label="Processed Image")
+            generated_audio = gr.Audio(
+                label="Generated Audio",
+                show_download_button=True,
+                show_share_button=True,
+                waveform_options=gr.WaveformOptions(
+                    waveform_color="#01C6FF",
+                    waveform_progress_color="#0066B4",
+                    show_controls=True,
+                ),
+            )
     gr.Examples(
         examples=[["examples/1.png", 3, "A scenic mountain view", 500]],  # Example input
         fn=on_example_click  # Load the cached output when the example is clicked
     )
+    gr.on(
+        triggers=[submit_button.click],
+        fn=process_image,
         inputs=[image, num_audios, prompt, steps],
+        outputs=[processed_image, generated_audio],
     )
 if __name__ == "__main__":
+    demo.launch()