anton-l HF staff commited on
Commit
d342025
·
1 Parent(s): 82e87c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -1,13 +1,20 @@
1
  import os
2
  import gradio as gr
3
  import torch
4
- import librosa
5
  import torchaudio
6
  from torchaudio.sox_effects import apply_effects_tensor
 
7
  from transformers import AutoFeatureExtractor, AutoModelForAudioXVector
8
 
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
 
 
 
 
 
 
11
  STYLE = """
12
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
13
  """
@@ -57,9 +64,9 @@ def similarity_fn(path1, path2):
57
  if not (path1 and path2):
58
  return '<b style="color:red">ERROR: Please record audio for *both* speakers!</b>'
59
 
60
- wav1, sr1 = librosa.load(path1, mono=True)
61
  wav1, _ = apply_effects_tensor(torch.tensor(wav1).unsqueeze(0), sr1, EFFECTS)
62
- wav2, sr2 = librosa.load(path2, mono=True)
63
  wav2, _ = apply_effects_tensor(torch.tensor(wav2).unsqueeze(0), sr2, EFFECTS)
64
  print(wav1.shape, wav2.shape)
65
 
 
1
  import os
2
  import gradio as gr
3
  import torch
4
+ import pydub
5
  import torchaudio
6
  from torchaudio.sox_effects import apply_effects_tensor
7
+ import numpy as np
8
  from transformers import AutoFeatureExtractor, AutoModelForAudioXVector
9
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
 
12
+ def load_audio(file_name):
13
+ audio = pydub.AudioSegment.from_file(file_name)
14
+ arr = np.array(audio.get_array_of_samples(), dtype=np.float32)
15
+ arr = arr / (1 << (8 * audio.sample_width - 1))
16
+ return arr, audio.frame_rate
17
+
18
  STYLE = """
19
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
20
  """
 
64
  if not (path1 and path2):
65
  return '<b style="color:red">ERROR: Please record audio for *both* speakers!</b>'
66
 
67
+ wav1, sr1 = load_audio(path1)
68
  wav1, _ = apply_effects_tensor(torch.tensor(wav1).unsqueeze(0), sr1, EFFECTS)
69
+ wav2, sr2 = load_audio(path2)
70
  wav2, _ = apply_effects_tensor(torch.tensor(wav2).unsqueeze(0), sr2, EFFECTS)
71
  print(wav1.shape, wav2.shape)
72