Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -23,16 +23,6 @@ from mutagen.mp3 import MP3
|
|
23 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
24 |
from TTS.api import TTS
|
25 |
import pickle
|
26 |
-
import subprocess
|
27 |
-
|
28 |
-
# Install OpenVoice from GitHub during startup
|
29 |
-
try:
|
30 |
-
import openvoice
|
31 |
-
except ImportError:
|
32 |
-
print("Installing OpenVoice from GitHub...")
|
33 |
-
subprocess.run(["pip", "install", "git+https://github.com/myshell-ai/OpenVoice.git"])
|
34 |
-
from openvoice.se_extractor import get_se
|
35 |
-
from openvoice.api import ToneColorConverter
|
36 |
|
37 |
# Suppress warnings
|
38 |
warnings.filterwarnings("ignore")
|
@@ -292,23 +282,6 @@ def generate_tts(text):
|
|
292 |
tts.tts_to_file(text=text, file_path=out_path)
|
293 |
return out_path
|
294 |
|
295 |
-
# === Save/Load Project File (.aiproj) ===
|
296 |
-
def save_project(audio_path, preset_name, effects):
|
297 |
-
project_data = {
|
298 |
-
"audio": AudioSegment.from_file(audio_path).raw_data,
|
299 |
-
"preset": preset_name,
|
300 |
-
"effects": effects
|
301 |
-
}
|
302 |
-
out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
|
303 |
-
with open(out_path, "wb") as f:
|
304 |
-
pickle.dump(project_data, f)
|
305 |
-
return out_path
|
306 |
-
|
307 |
-
def load_project(project_file):
|
308 |
-
with open(project_file.name, "rb") as f:
|
309 |
-
data = pickle.load(f)
|
310 |
-
return data["preset"], data["effects"]
|
311 |
-
|
312 |
# === Trim Silence Automatically (VAD) ===
|
313 |
def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
314 |
audio = AudioSegment.from_file(audio_file)
|
@@ -336,27 +309,244 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
336 |
mixed.export(out_path, format="wav")
|
337 |
return out_path
|
338 |
|
339 |
-
# ===
|
340 |
-
|
341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
|
343 |
-
|
|
|
|
|
|
|
344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
def clone_voice(source_audio, target_audio, text):
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
|
24 |
from TTS.api import TTS
|
25 |
import pickle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Suppress warnings
|
28 |
warnings.filterwarnings("ignore")
|
|
|
282 |
tts.tts_to_file(text=text, file_path=out_path)
|
283 |
return out_path
|
284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
# === Trim Silence Automatically (VAD) ===
|
286 |
def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
287 |
audio = AudioSegment.from_file(audio_file)
|
|
|
309 |
mixed.export(out_path, format="wav")
|
310 |
return out_path
|
311 |
|
312 |
+
# === Save/Load Project File (.aiproj) ===
|
313 |
+
def save_project(audio_path, preset_name, effects):
|
314 |
+
project_data = {
|
315 |
+
"audio": AudioSegment.from_file(audio_path).raw_data,
|
316 |
+
"preset": preset_name,
|
317 |
+
"effects": effects
|
318 |
+
}
|
319 |
+
out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
|
320 |
+
with open(out_path, "wb") as f:
|
321 |
+
pickle.dump(project_data, f)
|
322 |
+
return out_path
|
323 |
|
324 |
+
def load_project(project_file):
|
325 |
+
with open(project_file.name, "rb") as f:
|
326 |
+
data = pickle.load(f)
|
327 |
+
return data["preset"], data["effects"]
|
328 |
|
329 |
+
# === Auto-Save / Resume Sessions ===
|
330 |
+
def save_or_resume_session(audio, preset, effects, action="save"):
|
331 |
+
if action == "save":
|
332 |
+
return {"audio": audio, "preset": preset, "effects": effects}, None, None, None
|
333 |
+
elif action == "load" and isinstance(audio, dict):
|
334 |
+
return (
|
335 |
+
None,
|
336 |
+
audio.get("audio"),
|
337 |
+
audio.get("preset"),
|
338 |
+
audio.get("effects")
|
339 |
+
)
|
340 |
+
return None, None, None, None
|
341 |
+
|
342 |
+
# === Voice Cloning β Fallback Version for Hugging Face ===
|
343 |
def clone_voice(source_audio, target_audio, text):
|
344 |
+
print("β οΈ Voice cloning not available in browser version β use local install for full support")
|
345 |
+
return generate_tts(text)
|
346 |
+
|
347 |
+
# === UI Setup ===
|
348 |
+
effect_options = [
|
349 |
+
"Noise Reduction",
|
350 |
+
"Compress Dynamic Range",
|
351 |
+
"Add Reverb",
|
352 |
+
"Pitch Shift",
|
353 |
+
"Echo",
|
354 |
+
"Stereo Widening",
|
355 |
+
"Bass Boost",
|
356 |
+
"Treble Boost",
|
357 |
+
"Normalize"
|
358 |
+
]
|
359 |
+
|
360 |
+
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
361 |
+
gr.Markdown("## π§ Ultimate AI Audio Studio\nUpload, edit, export β powered by AI!")
|
362 |
+
|
363 |
+
# --- Single File Studio ---
|
364 |
+
with gr.Tab("π΅ Single File Studio"):
|
365 |
+
gr.Interface(
|
366 |
+
fn=process_audio,
|
367 |
+
inputs=[
|
368 |
+
gr.Audio(label="Upload Audio", type="filepath"),
|
369 |
+
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
370 |
+
gr.Checkbox(label="Isolate Vocals After Effects"),
|
371 |
+
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
|
372 |
+
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
373 |
+
],
|
374 |
+
outputs=[
|
375 |
+
gr.Audio(label="Processed Audio", type="filepath"),
|
376 |
+
gr.Image(label="Waveform Preview"),
|
377 |
+
gr.Textbox(label="Session Log (JSON)", lines=5),
|
378 |
+
gr.Textbox(label="Detected Genre", lines=1),
|
379 |
+
gr.Textbox(label="Status", value="β
Ready", lines=1)
|
380 |
+
],
|
381 |
+
title="Edit One File at a Time",
|
382 |
+
description="Apply effects, preview waveform, and get full session log.",
|
383 |
+
flagging_mode="never",
|
384 |
+
submit_btn="Process Audio",
|
385 |
+
clear_btn=None
|
386 |
+
)
|
387 |
+
|
388 |
+
# --- Batch Processing ---
|
389 |
+
with gr.Tab("π Batch Processing"):
|
390 |
+
gr.Interface(
|
391 |
+
fn=batch_process_audio,
|
392 |
+
inputs=[
|
393 |
+
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
394 |
+
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
395 |
+
gr.Checkbox(label="Isolate Vocals After Effects"),
|
396 |
+
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
|
397 |
+
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
398 |
+
],
|
399 |
+
outputs=[
|
400 |
+
gr.File(label="Download ZIP of All Processed Files"),
|
401 |
+
gr.Textbox(label="Status", value="β
Ready", lines=1)
|
402 |
+
],
|
403 |
+
title="Batch Audio Processor",
|
404 |
+
description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
|
405 |
+
flagging_mode="never",
|
406 |
+
submit_btn="Process All Files",
|
407 |
+
clear_btn=None
|
408 |
+
)
|
409 |
+
|
410 |
+
# --- Remix Mode ---
|
411 |
+
with gr.Tab("π Remix Mode"):
|
412 |
+
gr.Interface(
|
413 |
+
fn=stem_split,
|
414 |
+
inputs=gr.Audio(label="Upload Music Track", type="filepath"),
|
415 |
+
outputs=[
|
416 |
+
gr.File(label="Vocals"),
|
417 |
+
gr.File(label="Drums"),
|
418 |
+
gr.File(label="Bass"),
|
419 |
+
gr.File(label="Other")
|
420 |
+
],
|
421 |
+
title="Split Into Drums, Bass, Vocals, and More",
|
422 |
+
description="Use AI to separate musical elements like vocals, drums, and bass.",
|
423 |
+
flagging_mode="never",
|
424 |
+
clear_btn=None
|
425 |
+
)
|
426 |
+
|
427 |
+
# --- Transcribe & Edit ---
|
428 |
+
with gr.Tab("π Transcribe & Edit"):
|
429 |
+
gr.Interface(
|
430 |
+
fn=transcribe_audio,
|
431 |
+
inputs=gr.Audio(label="Upload Audio", type="filepath"),
|
432 |
+
outputs=gr.Textbox(label="Transcribed Text", lines=10),
|
433 |
+
title="Transcribe Spoken Content",
|
434 |
+
description="Convert voice to text and edit it before exporting again."
|
435 |
+
)
|
436 |
+
|
437 |
+
# --- TTS Voice Generator ---
|
438 |
+
with gr.Tab("π¬ TTS Voice Generator"):
|
439 |
+
gr.Interface(
|
440 |
+
fn=generate_tts,
|
441 |
+
inputs=gr.Textbox(label="Enter Text", lines=5),
|
442 |
+
outputs=gr.Audio(label="Generated Speech", type="filepath"),
|
443 |
+
title="Text-to-Speech Generator",
|
444 |
+
description="Type anything and turn it into natural-sounding speech."
|
445 |
+
)
|
446 |
+
|
447 |
+
# --- VAD β Detect & Remove Silence ===
|
448 |
+
with gr.Tab("βοΈ Trim Silence Automatically"):
|
449 |
+
gr.Interface(
|
450 |
+
fn=detect_silence,
|
451 |
+
inputs=[
|
452 |
+
gr.File(label="Upload Track"),
|
453 |
+
gr.Slider(minimum=-100, maximum=-10, value=-50, label="Silence Threshold (dB)"),
|
454 |
+
gr.Number(label="Min Silence Length (ms)", value=1000)
|
455 |
+
],
|
456 |
+
outputs=gr.File(label="Trimmed Output"),
|
457 |
+
title="Auto-Detect & Remove Silence",
|
458 |
+
description="Detect and trim silence at start/end or between words"
|
459 |
+
)
|
460 |
+
|
461 |
+
# --- Load/Save Project File (.aiproj) ===
|
462 |
+
with gr.Tab("π Save/Load Project"):
|
463 |
+
gr.Interface(
|
464 |
+
fn=save_project,
|
465 |
+
inputs=[
|
466 |
+
gr.File(label="Original Audio"),
|
467 |
+
gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
|
468 |
+
gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
|
469 |
+
],
|
470 |
+
outputs=gr.File(label="Project File (.aiproj)"),
|
471 |
+
title="Save Everything Together",
|
472 |
+
description="Save your session, effects, and settings in one file to reuse later."
|
473 |
+
)
|
474 |
+
|
475 |
+
gr.Interface(
|
476 |
+
fn=load_project,
|
477 |
+
inputs=gr.File(label="Upload .aiproj File"),
|
478 |
+
outputs=[
|
479 |
+
gr.Dropdown(choices=preset_names, label="Loaded Preset"),
|
480 |
+
gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
|
481 |
+
],
|
482 |
+
title="Resume Last Project",
|
483 |
+
description="Load your saved session"
|
484 |
+
)
|
485 |
+
|
486 |
+
# --- Auto-Save / Resume Sessions ===
|
487 |
+
session_state = gr.State()
|
488 |
+
|
489 |
+
with gr.Tab("π§Ύ Auto-Save & Resume"):
|
490 |
+
gr.Markdown("Save your current state and resume editing later.")
|
491 |
+
|
492 |
+
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
493 |
+
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|
494 |
+
preset_dropdown = gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0] if preset_names else None)
|
495 |
+
effect_checkbox = gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
|
496 |
+
save_btn = gr.Button("Save or Load Session")
|
497 |
+
|
498 |
+
loaded_audio = gr.Audio(label="Loaded Audio", type="filepath")
|
499 |
+
loaded_preset = gr.Dropdown(choices=preset_names, label="Loaded Preset")
|
500 |
+
loaded_effects = gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
|
501 |
+
|
502 |
+
save_btn.click(
|
503 |
+
fn=save_or_resume_session,
|
504 |
+
inputs=[audio_input, preset_dropdown, effect_checkbox, action_radio],
|
505 |
+
outputs=[session_state, loaded_audio, loaded_preset, loaded_effects]
|
506 |
+
)
|
507 |
+
|
508 |
+
# --- Mix Two Tracks ===
|
509 |
+
with gr.Tab("π Mix Two Tracks"):
|
510 |
+
gr.Interface(
|
511 |
+
fn=mix_tracks,
|
512 |
+
inputs=[
|
513 |
+
gr.File(label="Main Track"),
|
514 |
+
gr.File(label="Background Track"),
|
515 |
+
gr.Slider(minimum=-10, maximum=10, value=0, label="Volume Offset (dB)")
|
516 |
+
],
|
517 |
+
outputs=gr.File(label="Mixed Output"),
|
518 |
+
title="Overlay Two Tracks",
|
519 |
+
description="Mix, blend, or subtract two audio files."
|
520 |
+
)
|
521 |
+
|
522 |
+
# === Voice Style Transfer (Dummy) ===
|
523 |
+
def apply_style_transfer(audio_path, mood="Happy"):
|
524 |
+
return audio_path
|
525 |
+
|
526 |
+
with gr.Tab("π§ Voice Style Transfer"):
|
527 |
+
gr.Interface(
|
528 |
+
fn=apply_style_transfer,
|
529 |
+
inputs=[
|
530 |
+
gr.Audio(label="Upload Voice Clip", type="filepath"),
|
531 |
+
gr.Radio(["Happy", "Sad", "Angry", "Calm"], label="Choose Tone")
|
532 |
+
],
|
533 |
+
outputs=gr.Audio(label="Stylized Output", type="filepath"),
|
534 |
+
title="Change Emotional Tone of Voice",
|
535 |
+
description="Shift the emotional style of any voice clip."
|
536 |
+
)
|
537 |
+
|
538 |
+
# --- Voice Cloning (Fallback) ===
|
539 |
+
with gr.Tab("π Voice Cloning (Demo)"):
|
540 |
+
gr.Interface(
|
541 |
+
fn=clone_voice,
|
542 |
+
inputs=[
|
543 |
+
gr.File(label="Source Voice Clip"),
|
544 |
+
gr.File(label="Target Voice Clip"),
|
545 |
+
gr.Textbox(label="Text to Clone", lines=5)
|
546 |
+
],
|
547 |
+
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
548 |
+
title="Replace One Voice With Another (Demo)",
|
549 |
+
description="Clone voice from source to target speaker using AI"
|
550 |
+
)
|
551 |
+
|
552 |
+
demo.launch()
|