khof312 commited on
Commit
e42ffa2
1 Parent(s): 3a35ac0

Troubleshoot file writing problem.

Browse files

Work with coqui synthesis in memory so that file is never written. Switch espeakng to synthesize on command line with subprocess package.

Files changed (1) hide show
  1. src/synthesize.py +12 -13
src/synthesize.py CHANGED
@@ -8,6 +8,7 @@ import subprocess
8
  from scipy.io import wavfile
9
  from transformers import pipeline
10
  import os
 
11
 
12
  def synth_mms(text:str, model:str):
13
  '''
@@ -42,7 +43,9 @@ def synth_coqui(text:str, model:str):
42
  text: Text to synthesze
43
  model: Model code
44
  Returns:
45
- Streaming Wav and sampling rate.
 
 
46
  '''
47
  if model is not None:
48
  # Get device
@@ -50,14 +53,11 @@ def synth_coqui(text:str, model:str):
50
 
51
  # Init TTS
52
  tts = TTS(model, progress_bar=False).to(device)
53
-
54
- tts.tts_to_file(text=text, file_path="test.wav", is_multi_speaker=False)
55
-
56
- sampling_rate, wav = wavfile.read('test.wav')
57
- os.remove("test.wav")
58
 
59
- #wav = tts.tts(text=text)
60
- return wav, sampling_rate
61
  else:
62
  return None
63
 
@@ -74,12 +74,11 @@ def synth_espeakng(text:str, model:str):
74
  '''
75
  if model is not None:
76
 
77
- #subprocess.run(['espeak-ng', f'-v{model}', "-w test.wav", text]) #.returncode
78
- esng = espeakng.Speaker()
79
- esng.voice = model
80
- esng.say(text, export_path="test.wav")
81
 
82
- print(os.listdir())
83
  sampling_rate, wav = wavfile.read('test.wav')
84
  os.remove("test.wav")
85
 
 
8
  from scipy.io import wavfile
9
  from transformers import pipeline
10
  import os
11
+ import numpy as np
12
 
13
  def synth_mms(text:str, model:str):
14
  '''
 
43
  text: Text to synthesze
44
  model: Model code
45
  Returns:
46
+ Streaming Wav and sampling rate.
47
+
48
+ IMPORTANT: Current implementation assumes 22050 sampling rate, this should be verified when adding a new model.
49
  '''
50
  if model is not None:
51
  # Get device
 
53
 
54
  # Init TTS
55
  tts = TTS(model, progress_bar=False).to(device)
56
+
57
+ # Infer
58
+ wav = tts.tts(text=text) # is_multi_speaker=False
 
 
59
 
60
+ return np.array(wav), 22050
 
61
  else:
62
  return None
63
 
 
74
  '''
75
  if model is not None:
76
 
77
+ subprocess.run(['espeak-ng', f'-v{model}', "-w test.wav", text])
78
+ #esng = espeakng.Speaker()
79
+ #esng.voice = model
80
+ #esng.say(text, export_path="test.wav")
81
 
 
82
  sampling_rate, wav = wavfile.read('test.wav')
83
  os.remove("test.wav")
84