dccrn-demo

Runtime error

App Files Files Community

chenxie95 commited on Sep 15

Commit

265d119

verified ·

1 Parent(s): 00aa78e

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -14

app.py CHANGED Viewed

@@ -10,15 +10,15 @@ from model import DCCRN  # requires model.py and utils/ dependencies
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 SR = int(os.getenv("SAMPLE_RATE", "16000"))
-# Model repo & filename (override via Space Variables)
-REPO_ID  = os.getenv("MODEL_REPO_ID", "chenxie95/DCCRN")   # <- change default if needed
-FILENAME = os.getenv("MODEL_FILENAME", "epoch=44-step=113895.ckpt")
 TOKEN    = os.getenv("HF_TOKEN")  # only required if the model repo is private
 # ===== Download & load weights =====
 ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, token=TOKEN)
-net = DCCRN()  # instantiate with your training-time args if they differ
 ckpt = torch.load(ckpt_path, map_location=DEVICE)
 state = ckpt.get("state_dict", ckpt)
 state = {k.replace("model.", "").replace("module.", ""): v for k, v in state.items()}
@@ -31,11 +31,14 @@ def enhance(audio_path: str):
     x = torch.from_numpy(wav).float().to(DEVICE)
     if x.ndim == 1:
         x = x.unsqueeze(0)  # [1, T]
     with torch.no_grad():
         try:
-            y = net(x.unsqueeze(1))   # try [B,1,T]
         except Exception:
-            y = net(x)                # fallback [B,T]
     y = y.squeeze().detach().cpu().numpy()
     return (SR, y)
@@ -44,28 +47,26 @@ with gr.Blocks() as demo:
     gr.Markdown(
         """
 # 🎧 DCCRN Speech Enhancement (Demo)
-**How to use:** drag & drop a noisy audio clip (or upload / record) → click **Enhance** → listen & download the result.
 **Sample audio:** click a sample below to auto-fill the input, then click **Enhance**.
         """
     )
     with gr.Row():
         inp = gr.Audio(
-            sources=["upload", "microphone"],      # drag & drop supported
             type="filepath",
-            label="Input: noisy speech",
-            placeholder="Drag & drop or click to upload / record",
-            show_label=True,
         )
         out = gr.Audio(
             label="Output: enhanced speech (downloadable)",
-            show_download_button=True,
         )
     enhance_btn = gr.Button("Enhance")
-    # On-page sample clips (ensure these files exist under examples/)
     gr.Examples(
         examples=[
             ["examples/noisy_1.wav"],
@@ -80,6 +81,6 @@ with gr.Blocks() as demo:
     # Gradio ≥4.44: set concurrency on the event listener
     enhance_btn.click(enhance, inputs=inp, outputs=out, concurrency_limit=1)
-# Keep a small queue to avoid OOM
 demo.queue(max_size=16)
 demo.launch()

 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 SR = int(os.getenv("SAMPLE_RATE", "16000"))
+# Read model repo and filename from environment variables
+REPO_ID  = os.getenv("MODEL_REPO_ID", "Ada312/DCCRN")       # change default if needed
+FILENAME = os.getenv("MODEL_FILENAME", "dccrn.ckpt")
 TOKEN    = os.getenv("HF_TOKEN")  # only required if the model repo is private
 # ===== Download & load weights =====
 ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME, token=TOKEN)
+net = DCCRN()  # if you trained with custom args, instantiate with the same args here
 ckpt = torch.load(ckpt_path, map_location=DEVICE)
 state = ckpt.get("state_dict", ckpt)
 state = {k.replace("model.", "").replace("module.", ""): v for k, v in state.items()}
     x = torch.from_numpy(wav).float().to(DEVICE)
     if x.ndim == 1:
         x = x.unsqueeze(0)  # [1, T]
     with torch.no_grad():
+        # Many DCCRNs expect [B,1,T]; try that first, fallback to [B,T]
         try:
+            y = net(x.unsqueeze(1))   # [1, 1, T]
         except Exception:
+            y = net(x)                # [1, T]
     y = y.squeeze().detach().cpu().numpy()
     return (SR, y)
     gr.Markdown(
         """
 # 🎧 DCCRN Speech Enhancement (Demo)
+**How to use:** drag & drop a noisy audio clip (or upload / record) → click **Enhance** → listen & download the result.
 **Sample audio:** click a sample below to auto-fill the input, then click **Enhance**.
         """
     )
     with gr.Row():
         inp = gr.Audio(
+            sources=["upload", "microphone"],     # drag & drop supported by default
             type="filepath",
+            label="Input: noisy speech (drag & drop or upload / record)"
         )
         out = gr.Audio(
             label="Output: enhanced speech (downloadable)",
+            show_download_button=True
         )
     enhance_btn = gr.Button("Enhance")
+    # On-page sample clips (make sure these files exist in the repo)
     gr.Examples(
         examples=[
             ["examples/noisy_1.wav"],
     # Gradio ≥4.44: set concurrency on the event listener
     enhance_btn.click(enhance, inputs=inp, outputs=out, concurrency_limit=1)
+# Queue: keep a small queue to avoid OOM
 demo.queue(max_size=16)
 demo.launch()