Spaces:
Running
Running
""" | |
app.py – Gradio portal for COMP5300 voice‑cloning study (Hugging Face Spaces) | |
------------------------------------------------------------------------- | |
• Consistent sentence list (prompts.txt). One prompt shown at a time. | |
• Volunteer enters Speaker‑ID, records, clicks **Submit & Next**. | |
• WAV saved locally in /persistent/raw/<speaker>/ | |
• Metadata appended to /persistent/meta.csv → speaker_id,prompt_idx,prompt_text,path | |
• Tracks completed prompts and total recording duration in /persistent/progress.json. | |
• Resumes from the next incomplete prompt for a given Speaker-ID. | |
Tested on **Gradio** and **Python** in Hugging Face Spaces – May 2025. | |
Install deps: | |
pip install gradio soundfile numpy | |
Run locally (for testing): | |
python app.py --prompts prompts.txt | |
""" | |
from __future__ import annotations | |
import argparse | |
import csv | |
import datetime as dt | |
import io | |
from pathlib import Path | |
from typing import List, Tuple, Union | |
import os | |
import json | |
import gradio as gr | |
import numpy as np | |
import soundfile as sf | |
AudioLike = Union[Tuple[int, np.ndarray], str, dict] | |
# Define the root directory for persistent storage in Hugging Face Spaces | |
LOCAL_ROOT = Path("/persistent") | |
# Define the progress file path within persistent storage | |
PROGRESS_FILE = LOCAL_ROOT / "progress.json" | |
META_FILE = LOCAL_ROOT / "meta.csv" | |
RAW_AUDIO_DIR = LOCAL_ROOT / "raw" | |
# ----------------------------------------------------------------------------- | |
# Helpers | |
# ----------------------------------------------------------------------------- | |
def load_prompts(path: Path) -> List[str]: | |
"""Load non‑empty lines from prompts.txt.""" | |
# Check if running in Hugging Face Space (a common indicator is the presence of a 'HOME' environment variable) | |
if os.environ.get("HOME") == "/home/user": | |
prompts_file_path = Path("./prompts.txt") # Path relative to the app.py file in the Space | |
else: | |
prompts_file_path = path # Use the provided path if running locally | |
if prompts_file_path.exists(): | |
return [ln.strip() for ln in prompts_file_path.read_text(encoding="utf8").splitlines() if ln.strip()] | |
else: | |
raise FileNotFoundError(f"Prompts file not found at: {prompts_file_path}") | |
def audio_to_wav_bytes(audio: AudioLike) -> bytes: | |
"""Convert Gradio Audio return‑value to raw WAV bytes.""" | |
if isinstance(audio, tuple) and len(audio) == 2: | |
sr, wav = audio # type: ignore | |
buf = io.BytesIO() | |
sf.write(buf, wav, sr, format="WAV") | |
return buf.getvalue() | |
if isinstance(audio, dict): | |
if "data" in audio and audio["data"]: | |
sr, wav = audio["data"] # type: ignore | |
buf = io.BytesIO() | |
sf.write(buf, wav, sr, format="WAV") | |
return buf.getvalue() | |
if "path" in audio and audio["path"]: | |
return Path(audio["path"]).read_bytes() # type: ignore | |
if isinstance(audio, str) and Path(audio).exists(): | |
return Path(audio).read_bytes() | |
raise ValueError("Unrecognized audio format from Gradio component") | |
def load_progress(progress_file: Path) -> dict: | |
"""Load progress data from JSON file.""" | |
if progress_file.exists(): | |
try: | |
with progress_file.open("r") as f: | |
return json.load(f) | |
except json.JSONDecodeError: | |
print("Error decoding progress.json. Starting with an empty progress.") | |
return {} | |
else: | |
return {} | |
def save_progress(progress_file: Path, speaker_id: str, prompt_idx: int, audio_duration: float) -> None: | |
"""Save progress to a JSON file.""" | |
progress = load_progress(progress_file) | |
if speaker_id not in progress: | |
progress[speaker_id] = { | |
"completed_prompts": [], | |
"total_duration_seconds": 0.0, | |
} | |
if prompt_idx not in progress[speaker_id]["completed_prompts"]: | |
progress[speaker_id]["completed_prompts"].append(prompt_idx) | |
progress[speaker_id]["total_duration_seconds"] += audio_duration | |
progress[speaker_id]["completed_prompts"] = sorted(list(set(progress[speaker_id]["completed_prompts"]))) | |
with progress_file.open("w") as f: | |
json.dump(progress, f, indent=2) | |
def save_local(data: bytes, path: Path): | |
"""Save data to a local file.""" | |
path.parent.mkdir(parents=True, exist_ok=True) | |
path.write_bytes(data) | |
# ----------------------------------------------------------------------------- | |
# Callback | |
# ----------------------------------------------------------------------------- | |
def record_and_save(speaker_id: str, | |
prompt_idx: int, | |
audio: AudioLike, | |
prompts: list[str]): | |
if not speaker_id.strip(): | |
return gr.Warning("Please enter Speaker‑ID first."), prompts[prompt_idx], prompt_idx, "", "" | |
if audio is None: | |
return gr.Warning("Please record before submitting."), prompts[prompt_idx], prompt_idx, "", "" | |
try: | |
wav_bytes = audio_to_wav_bytes(audio) | |
except Exception as e: | |
return gr.Warning(f"Audio processing error: {e}"), prompts[prompt_idx], prompt_idx, "", "" | |
timestamp = dt.datetime.now().strftime("%Y%m%d-%H%M%S") | |
fname = f"{speaker_id}_{prompt_idx:03d}_{timestamp}.wav" | |
local_audio_path = RAW_AUDIO_DIR / speaker_id / fname | |
path_str = str(local_audio_path) | |
save_local(wav_bytes, local_audio_path) | |
META_FILE.parent.mkdir(parents=True, exist_ok=True) | |
with META_FILE.open("a", newline="", encoding="utf8") as f: | |
csv.writer(f).writerow([speaker_id, prompt_idx, prompts[prompt_idx], path_str]) | |
try: | |
audio_info = sf.info(local_audio_path) | |
audio_duration = audio_info.duration | |
except Exception as e: | |
print(f"Error getting audio info: {e}") | |
audio_duration = 0.0 | |
save_progress(PROGRESS_FILE, speaker_id, prompt_idx, audio_duration) | |
progress_data = load_progress(PROGRESS_FILE) | |
completed_count = len(progress_data.get(speaker_id, {}).get("completed_prompts", [])) | |
total_duration = progress_data.get(speaker_id, {}).get("total_duration_seconds", 0.0) | |
completed_prompts = set(progress_data.get(speaker_id, {}).get("completed_prompts", [])) | |
next_prompt_idx = -1 | |
for i in range(len(prompts)): | |
if i not in completed_prompts: | |
next_prompt_idx = i | |
break | |
if next_prompt_idx == -1: | |
next_prompt_idx = 0 | |
return f"✅ Saved to {path_str}", prompts[next_prompt_idx], next_prompt_idx, f"Completed: {completed_count}/{len(prompts)}", f"Total Duration: {total_duration:.2f} seconds" | |
def update_prompt_on_speaker_change(speaker_id: str, prompts: list[str]) -> Tuple[str, int]: | |
"""Load progress and determine the next prompt when the speaker ID changes.""" | |
if not speaker_id.strip(): | |
return prompts[0], 0 | |
progress_data = load_progress(PROGRESS_FILE) | |
completed_prompts = set(progress_data.get(speaker_id, {}).get("completed_prompts", [])) | |
next_prompt_idx = -1 | |
for i in range(len(prompts)): | |
if i not in completed_prompts: | |
next_prompt_idx = i | |
break | |
if next_prompt_idx == -1: | |
next_prompt_idx = 0 | |
return prompts[next_prompt_idx], next_prompt_idx | |
# ----------------------------------------------------------------------------- | |
# UI builder | |
# ----------------------------------------------------------------------------- | |
def build_ui(prompts: list[str]): | |
with gr.Blocks(title="COMP5300 Voice‑Recording Portal") as demo: | |
gr.Markdown("""## Speaking Phase\n### Record sentences for the voice‑cloning study\n1. Find a quiet space.\n2. Click the microphone, read the sentence (mistakes are alright as long as you are speaking naturally, click stop.\n3. Hit **Submit & Next**. Repeat until done.""") | |
gr.Markdown("""**Note:** This is a research study. Your recordings will be used to train a voice model.\nPlease enter your `Speaker-ID` before recording. Use PV username (e.g. Jane Doe = `jdoe`).""") | |
speaker = gr.Text(label="Speaker‑ID") | |
prompt_box = gr.Textbox(label="Sentence to read") | |
idx_state = gr.State(0) | |
progress_display = gr.Markdown(label="Progress") | |
duration_display = gr.Markdown(label="Total Duration") | |
mic = gr.Audio(sources=["microphone"], format="wav", label="🎙️ Record here") | |
status = gr.Markdown() | |
btn = gr.Button("Submit & Next ➡️") | |
speaker.change(fn=update_prompt_on_speaker_change, | |
inputs=[speaker, gr.State(prompts)], | |
outputs=[prompt_box, idx_state]) | |
btn.click(record_and_save, | |
inputs=[speaker, idx_state, mic, gr.State(prompts)], | |
outputs=[status, prompt_box, idx_state, progress_display, duration_display]) | |
return demo | |
# ----------------------------------------------------------------------------- | |
# main | |
# ----------------------------------------------------------------------------- | |
def main(): | |
ap = argparse.ArgumentParser() | |
ap.add_argument("--prompts", type=Path, required=True, help="Text file with one sentence per line") | |
args = ap.parse_args() | |
prompts = load_prompts(args.prompts) | |
ui = build_ui(prompts) | |
ui.launch() | |
if __name__ == "__main__": | |
if os.environ.get("HOME") == "/home/user": | |
# Running in Hugging Face Space, use /data for persistent storage | |
LOCAL_ROOT = Path("/data") | |
PROGRESS_FILE = LOCAL_ROOT / "progress.json" | |
META_FILE = LOCAL_ROOT / "meta.csv" | |
RAW_AUDIO_DIR = LOCAL_ROOT / "raw" | |
# Ensure parent directories exist | |
RAW_AUDIO_DIR.mkdir(parents=True, exist_ok=True) | |
PROGRESS_FILE.parent.mkdir(parents=True, exist_ok=True) | |
META_FILE.parent.mkdir(parents=True, exist_ok=True) | |
prompts = load_prompts(Path("./prompts.txt")) | |
ui = build_ui(prompts) | |
ui.launch() | |
else: | |
# Running locally, use command-line arguments | |
main() |