Spaces:
Running
on
A10G
Running
on
A10G
Fix OOM
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import pathlib
|
|
5 |
import time
|
6 |
import tempfile
|
7 |
import platform
|
|
|
8 |
if platform.system().lower() == 'windows':
|
9 |
temp = pathlib.PosixPath
|
10 |
pathlib.PosixPath = pathlib.WindowsPath
|
@@ -113,6 +114,9 @@ def transcribe_one(model, audio_path):
|
|
113 |
text_pr = result.text
|
114 |
if text_pr.strip(" ")[-1] not in "?!.,。,?!。、":
|
115 |
text_pr += "."
|
|
|
|
|
|
|
116 |
return lang, text_pr
|
117 |
|
118 |
def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
|
@@ -154,6 +158,10 @@ def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
|
|
154 |
# save as npz file
|
155 |
np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
|
156 |
audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
|
|
|
|
|
|
|
|
|
157 |
return message, os.path.join(tempfile.gettempdir(), f"{name}.npz")
|
158 |
|
159 |
|
@@ -176,6 +184,8 @@ def make_prompt(name, wav, sr, save=True):
|
|
176 |
if not save:
|
177 |
os.remove(f"./prompts/{name}.wav")
|
178 |
os.remove(f"./prompts/{name}.txt")
|
|
|
|
|
179 |
|
180 |
return text, lang
|
181 |
|
@@ -250,6 +260,8 @@ def infer_from_audio(text, language, accent, audio_prompt, record_audio_prompt,
|
|
250 |
)
|
251 |
|
252 |
message = f"text prompt: {text_pr}\nsythesized text: {text}"
|
|
|
|
|
253 |
return message, (24000, samples[0][0].cpu().numpy())
|
254 |
|
255 |
@torch.no_grad()
|
@@ -306,6 +318,9 @@ def infer_from_prompt(text, language, accent, preset_prompt, prompt_file):
|
|
306 |
)
|
307 |
|
308 |
message = f"sythesized text: {text}"
|
|
|
|
|
|
|
309 |
return message, (24000, samples[0][0].cpu().numpy())
|
310 |
|
311 |
|
@@ -439,6 +454,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
|
|
439 |
[(complete_tokens, None)]
|
440 |
)
|
441 |
message = f"Cut into {len(sentences)} sentences"
|
|
|
442 |
return message, (24000, samples[0][0].cpu().numpy())
|
443 |
else:
|
444 |
raise ValueError(f"No such mode {mode}")
|
|
|
5 |
import time
|
6 |
import tempfile
|
7 |
import platform
|
8 |
+
import gc
|
9 |
if platform.system().lower() == 'windows':
|
10 |
temp = pathlib.PosixPath
|
11 |
pathlib.PosixPath = pathlib.WindowsPath
|
|
|
114 |
text_pr = result.text
|
115 |
if text_pr.strip(" ")[-1] not in "?!.,。,?!。、":
|
116 |
text_pr += "."
|
117 |
+
|
118 |
+
# delete all variables
|
119 |
+
del audio, mel, probs, result
|
120 |
return lang, text_pr
|
121 |
|
122 |
def make_npz_prompt(name, uploaded_audio, recorded_audio, transcript_content):
|
|
|
158 |
# save as npz file
|
159 |
np.savez(os.path.join(tempfile.gettempdir(), f"{name}.npz"),
|
160 |
audio_tokens=audio_tokens, text_tokens=text_tokens, lang_code=lang2code[lang_pr])
|
161 |
+
|
162 |
+
# delete all variables
|
163 |
+
del audio_tokens, text_tokens, phonemes, lang_pr, text_pr, wav_pr, sr, uploaded_audio, recorded_audio
|
164 |
+
|
165 |
return message, os.path.join(tempfile.gettempdir(), f"{name}.npz")
|
166 |
|
167 |
|
|
|
184 |
if not save:
|
185 |
os.remove(f"./prompts/{name}.wav")
|
186 |
os.remove(f"./prompts/{name}.txt")
|
187 |
+
# delete all variables
|
188 |
+
del lang_token, wav, sr
|
189 |
|
190 |
return text, lang
|
191 |
|
|
|
260 |
)
|
261 |
|
262 |
message = f"text prompt: {text_pr}\nsythesized text: {text}"
|
263 |
+
# delete all variables
|
264 |
+
del audio_prompts, text_tokens, text_prompts, phone_tokens, encoded_frames, wav_pr, sr, audio_prompt, record_audio_prompt, transcript_content
|
265 |
return message, (24000, samples[0][0].cpu().numpy())
|
266 |
|
267 |
@torch.no_grad()
|
|
|
318 |
)
|
319 |
|
320 |
message = f"sythesized text: {text}"
|
321 |
+
|
322 |
+
# delete all variables
|
323 |
+
del audio_prompts, text_tokens, text_prompts, phone_tokens, encoded_frames, prompt_file, preset_prompt
|
324 |
return message, (24000, samples[0][0].cpu().numpy())
|
325 |
|
326 |
|
|
|
454 |
[(complete_tokens, None)]
|
455 |
)
|
456 |
message = f"Cut into {len(sentences)} sentences"
|
457 |
+
|
458 |
return message, (24000, samples[0][0].cpu().numpy())
|
459 |
else:
|
460 |
raise ValueError(f"No such mode {mode}")
|