Update KOKORO/utils.py
Browse files- KOKORO/utils.py +9 -6
KOKORO/utils.py
CHANGED
|
@@ -226,7 +226,7 @@ def parse_speechtypes_text(gen_text):
|
|
| 226 |
|
| 227 |
return segments
|
| 228 |
|
| 229 |
-
def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0, remove_silence=True, minimum_silence=50):
|
| 230 |
segments = parse_speechtypes_text(gen_text)
|
| 231 |
speed = clamp_speed(speed)
|
| 232 |
trim = clamp_trim(trim)
|
|
@@ -276,16 +276,20 @@ def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0,
|
|
| 276 |
return output_file
|
| 277 |
|
| 278 |
def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
|
| 279 |
-
|
| 280 |
-
segments = large_text(text, voice_name)
|
| 281 |
voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
|
| 283 |
speed = clamp_speed(speed)
|
| 284 |
trim = clamp_trim(trim)
|
| 285 |
silence_duration = clamp_trim(pad_between_segments)
|
| 286 |
output_file=get_random_file_name(output_file)
|
| 287 |
if debug:
|
| 288 |
-
print(f'Loaded voice: {
|
| 289 |
print(f"Speed: {speed}")
|
| 290 |
print(f"Trim: {trim}")
|
| 291 |
print(f"Silence duration: {silence_duration}")
|
|
@@ -305,7 +309,7 @@ def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments
|
|
| 305 |
text = i[1]
|
| 306 |
if debug:
|
| 307 |
print(i)
|
| 308 |
-
audio, out_ps = generate(MODEL, text, VOICEPACK, lang=
|
| 309 |
audio = trim_if_needed(audio, trim)
|
| 310 |
|
| 311 |
# Scale audio from float32 to int16
|
|
@@ -339,4 +343,3 @@ def tts_file_name(text):
|
|
| 339 |
# Construct the file name
|
| 340 |
file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
|
| 341 |
return file_name
|
| 342 |
-
|
|
|
|
| 226 |
|
| 227 |
return segments
|
| 228 |
|
| 229 |
+
def podcast(MODEL, device, gen_text, speed=1.0, trim=0.5, pad_between_segments=0, remove_silence=True, minimum_silence=50):
|
| 230 |
segments = parse_speechtypes_text(gen_text)
|
| 231 |
speed = clamp_speed(speed)
|
| 232 |
trim = clamp_trim(trim)
|
|
|
|
| 276 |
return output_file
|
| 277 |
|
| 278 |
def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
|
| 279 |
+
language = voice_name[0]
|
|
|
|
| 280 |
voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
|
| 281 |
+
if voice_name.endswith(".pt"):
|
| 282 |
+
language="a"
|
| 283 |
+
voice_pack_path=voice_name
|
| 284 |
+
text=clean_text(text)
|
| 285 |
+
segments = large_text(text, language)
|
| 286 |
VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
|
| 287 |
speed = clamp_speed(speed)
|
| 288 |
trim = clamp_trim(trim)
|
| 289 |
silence_duration = clamp_trim(pad_between_segments)
|
| 290 |
output_file=get_random_file_name(output_file)
|
| 291 |
if debug:
|
| 292 |
+
print(f'Loaded voice: {voice_pack_path}')
|
| 293 |
print(f"Speed: {speed}")
|
| 294 |
print(f"Trim: {trim}")
|
| 295 |
print(f"Silence duration: {silence_duration}")
|
|
|
|
| 309 |
text = i[1]
|
| 310 |
if debug:
|
| 311 |
print(i)
|
| 312 |
+
audio, out_ps = generate(MODEL, text, VOICEPACK, lang=language, speed=speed)
|
| 313 |
audio = trim_if_needed(audio, trim)
|
| 314 |
|
| 315 |
# Scale audio from float32 to int16
|
|
|
|
| 343 |
# Construct the file name
|
| 344 |
file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
|
| 345 |
return file_name
|
|
|