Staticaliza commited on
Commit
234b163
·
verified ·
1 Parent(s): c251257

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -34,11 +34,21 @@ def generate(input, language, speaker_audio, emotion_happy, emotion_sad, emotion
34
 
35
  speaker_embedding = None
36
  if speaker_audio is not None:
 
 
37
  wav, sr = torchaudio.load(speaker_audio)
 
 
 
38
  speaker_embedding = (model.make_speaker_embedding(wav, sr).to(device, dtype=torch.bfloat16))
 
 
39
 
40
  emotion_tensor = torch.tensor([emotion_happy, emotion_sad, emotion_disgust, emotion_fear, emotion_surprise, emotion_anger, emotion_other, emotion_neutral], device=device, dtype=torch.bfloat16)
41
  vq_tensor = torch.tensor([clarity] * 8, device=device, dtype=torch.bfloat16).unsqueeze(0)
 
 
 
42
 
43
  cond_dict = make_cond_dict(
44
  text=input,
@@ -52,8 +62,12 @@ def generate(input, language, speaker_audio, emotion_happy, emotion_sad, emotion
52
  dnsmos_ovrl=float(dnsmos_ovrl),
53
  device=device,
54
  )
 
 
55
 
56
  conditioning = model.prepare_conditioning(cond_dict)
 
 
57
 
58
  codes = model.generate(
59
  prefix_conditioning=conditioning,
@@ -62,12 +76,20 @@ def generate(input, language, speaker_audio, emotion_happy, emotion_sad, emotion
62
  batch_size=1,
63
  sampling_params=dict(min_p=float(min_p)),
64
  )
 
 
65
 
66
  wav_out = model.autoencoder.decode(codes).cpu().detach()
67
  sr_out = model.autoencoder.sampling_rate
 
 
 
68
 
69
  if wav_out.dim() == 2 and wav_out.size(0) > 1: wav_out = wav_out[0:1, :]
70
-
 
 
 
71
  return (sr_out, wav_out.squeeze().numpy())
72
 
73
  # Initialize
 
34
 
35
  speaker_embedding = None
36
  if speaker_audio is not None:
37
+ print(1)
38
+ print(speaker_audio)
39
  wav, sr = torchaudio.load(speaker_audio)
40
+ print(2)
41
+ print(wav)
42
+ print(sr)
43
  speaker_embedding = (model.make_speaker_embedding(wav, sr).to(device, dtype=torch.bfloat16))
44
+ print(3)
45
+ print(speaker_embedding)
46
 
47
  emotion_tensor = torch.tensor([emotion_happy, emotion_sad, emotion_disgust, emotion_fear, emotion_surprise, emotion_anger, emotion_other, emotion_neutral], device=device, dtype=torch.bfloat16)
48
  vq_tensor = torch.tensor([clarity] * 8, device=device, dtype=torch.bfloat16).unsqueeze(0)
49
+ print(4)
50
+ print(emotion_tensor)
51
+ print(vq_tensor)
52
 
53
  cond_dict = make_cond_dict(
54
  text=input,
 
62
  dnsmos_ovrl=float(dnsmos_ovrl),
63
  device=device,
64
  )
65
+ print(5)
66
+ print(cond_dict)
67
 
68
  conditioning = model.prepare_conditioning(cond_dict)
69
+ print(6)
70
+ print(conditioning)
71
 
72
  codes = model.generate(
73
  prefix_conditioning=conditioning,
 
76
  batch_size=1,
77
  sampling_params=dict(min_p=float(min_p)),
78
  )
79
+ print(7)
80
+ print(codes)
81
 
82
  wav_out = model.autoencoder.decode(codes).cpu().detach()
83
  sr_out = model.autoencoder.sampling_rate
84
+ print(8)
85
+ print(wav_output)
86
+ print(sr_output)
87
 
88
  if wav_out.dim() == 2 and wav_out.size(0) > 1: wav_out = wav_out[0:1, :]
89
+
90
+ print(9)
91
+ print((sr_out, wav_out.squeeze().numpy()))
92
+
93
  return (sr_out, wav_out.squeeze().numpy())
94
 
95
  # Initialize