IndexTTS

Runtime error

App Files Files Community

svjack commited on 7 days ago

Commit

6327a30

verified ·

1 Parent(s): 00d0b1b

Update webui.py

Browse files

Files changed (1) hide show

webui.py +105 -0

webui.py CHANGED Viewed

@@ -1,3 +1,108 @@
 import spaces
 import os
 import shutil

+'''
+pip install datasets soundfile huggingface_hub librosa
+from datasets import load_dataset
+import soundfile as sf
+import os
+from collections import defaultdict
+import io
+def download_voices_with_dynamic_counting(output_folder='genshin_voices_sample_5', max_files_per_speaker=5):
+    """动态统计并下载所有speaker的音频和转录文件（兼容bytes/path格式）"""
+    # 加载数据集（流式模式）
+    dataset = load_dataset('simon3000/genshin-voice', split='train', streaming=True)
+    # 过滤条件：中文、有转录、类型为对话
+    filtered_data = dataset.filter(
+        lambda x: (
+            x['language'] == 'Chinese' and
+            x['transcription'] != '' and
+            x['type'] == 'Dialog'
+        )
+    )
+    # 动态统计speaker计数和文件下载
+    speaker_counts = defaultdict(int)
+    speaker_file_indices = defaultdict(int)
+    os.makedirs(output_folder, exist_ok=True)
+    for voice in filtered_data:
+        speaker = voice['speaker']
+        # 如果该speaker已下载足够文件，跳过
+        if speaker_counts[speaker] >= max_files_per_speaker:
+            continue
+        # 更新speaker计数
+        speaker_counts[speaker] += 1
+        file_num = str(speaker_file_indices[speaker] + 1).zfill(5)  # 从00001开始
+        # 创建speaker子文件夹
+        speaker_folder = os.path.join(output_folder, speaker)
+        os.makedirs(speaker_folder, exist_ok=True)
+        # 构建文件路径
+        audio_path = os.path.join(speaker_folder, f'{speaker}_{file_num}.wav')
+        transcription_path = os.path.join(speaker_folder, f'{speaker}_{file_num}.txt')
+        # 处理音频数据（兼容bytes或path格式）
+        audio_data = voice['audio']
+        try:
+            if 'bytes' in audio_data and audio_data['bytes'] is not None:
+                # 从bytes直接读取音频
+                with io.BytesIO(audio_data['bytes']) as audio_bytes:
+                    data, samplerate = sf.read(audio_bytes)
+                    sf.write(audio_path, data, samplerate)
+                '''
+                elif 'path' in audio_data and os.path.exists(audio_data['path']):
+                # 如果提供path且文件存在，直接复制
+                data, samplerate = sf.read(audio_data['path'])
+                sf.write(audio_path, data, samplerate)
+                '''
+            else:
+                print(f"警告: {speaker}的音频数据格式不支持，跳过")
+                speaker_counts[speaker] -= 1  # 回滚计数
+                continue
+        except Exception as e:
+            print(f"处理{speaker}的音频时出错: {str(e)}")
+            speaker_counts[speaker] -= 1
+            continue
+        # 保存转录文件
+        with open(transcription_path, 'w', encoding='utf-8') as f:
+            f.write(voice['transcription'])
+        speaker_file_indices[speaker] += 1
+        print(
+            f"[下载进度] {speaker}_{file_num} | "
+            f"进度: {speaker_counts[speaker]}/{max_files_per_speaker}"
+        )
+    # 打印最终统计
+    print("\n=== 下载结果 ===")
+    for speaker, count in speaker_counts.items():
+        print(f"{speaker}: {count}个文件")
+if __name__ == '__main__':
+    download_voices_with_dynamic_counting()
+from gradio_client import Client, handle_file
+client = Client("http://localhost:7860")
+result = client.predict(
+		prompt=handle_file('genshin_voices_sample_5/Ahangar/Ahangar_00001.wav'),
+		text="偷窃者没有好下场",
+		api_name="/gen_single"
+)
+print(result)
+from shutil import copy2
+copy2(result["value"], result["value"].split("/")[-1])
+'''
 import spaces
 import os
 import shutil