svjack commited on
Commit
6327a30
·
verified ·
1 Parent(s): 00d0b1b

Update webui.py

Browse files
Files changed (1) hide show
  1. webui.py +105 -0
webui.py CHANGED
@@ -1,3 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import spaces
2
  import os
3
  import shutil
 
1
+ '''
2
+ pip install datasets soundfile huggingface_hub librosa
3
+
4
+
5
+ from datasets import load_dataset
6
+ import soundfile as sf
7
+ import os
8
+ from collections import defaultdict
9
+ import io
10
+
11
+ def download_voices_with_dynamic_counting(output_folder='genshin_voices_sample_5', max_files_per_speaker=5):
12
+ """动态统计并下载所有speaker的音频和转录文件(兼容bytes/path格式)"""
13
+ # 加载数据集(流式模式)
14
+ dataset = load_dataset('simon3000/genshin-voice', split='train', streaming=True)
15
+
16
+ # 过滤条件:中文、有转录、类型为对话
17
+ filtered_data = dataset.filter(
18
+ lambda x: (
19
+ x['language'] == 'Chinese' and
20
+ x['transcription'] != '' and
21
+ x['type'] == 'Dialog'
22
+ )
23
+ )
24
+
25
+ # 动态统计speaker计数和文件下载
26
+ speaker_counts = defaultdict(int)
27
+ speaker_file_indices = defaultdict(int)
28
+
29
+ os.makedirs(output_folder, exist_ok=True)
30
+
31
+ for voice in filtered_data:
32
+ speaker = voice['speaker']
33
+
34
+ # 如果该speaker已下载足够文件,跳过
35
+ if speaker_counts[speaker] >= max_files_per_speaker:
36
+ continue
37
+
38
+ # 更新speaker计数
39
+ speaker_counts[speaker] += 1
40
+ file_num = str(speaker_file_indices[speaker] + 1).zfill(5) # 从00001开始
41
+
42
+ # 创建speaker子文件夹
43
+ speaker_folder = os.path.join(output_folder, speaker)
44
+ os.makedirs(speaker_folder, exist_ok=True)
45
+
46
+ # 构建文件路径
47
+ audio_path = os.path.join(speaker_folder, f'{speaker}_{file_num}.wav')
48
+ transcription_path = os.path.join(speaker_folder, f'{speaker}_{file_num}.txt')
49
+
50
+ # 处理音频数据(兼容bytes或path格式)
51
+ audio_data = voice['audio']
52
+ try:
53
+ if 'bytes' in audio_data and audio_data['bytes'] is not None:
54
+ # 从bytes直接读取音频
55
+ with io.BytesIO(audio_data['bytes']) as audio_bytes:
56
+ data, samplerate = sf.read(audio_bytes)
57
+ sf.write(audio_path, data, samplerate)
58
+ '''
59
+ elif 'path' in audio_data and os.path.exists(audio_data['path']):
60
+ # 如果提供path且文件存在,直接复制
61
+ data, samplerate = sf.read(audio_data['path'])
62
+ sf.write(audio_path, data, samplerate)
63
+ '''
64
+ else:
65
+ print(f"警告: {speaker}的音频数据格式不支持,跳过")
66
+ speaker_counts[speaker] -= 1 # 回滚计数
67
+ continue
68
+ except Exception as e:
69
+ print(f"处理{speaker}的音频时出错: {str(e)}")
70
+ speaker_counts[speaker] -= 1
71
+ continue
72
+
73
+ # 保存转录文件
74
+ with open(transcription_path, 'w', encoding='utf-8') as f:
75
+ f.write(voice['transcription'])
76
+
77
+ speaker_file_indices[speaker] += 1
78
+ print(
79
+ f"[下载进度] {speaker}_{file_num} | "
80
+ f"进度: {speaker_counts[speaker]}/{max_files_per_speaker}"
81
+ )
82
+
83
+ # 打印最终统计
84
+ print("\n=== 下载结果 ===")
85
+ for speaker, count in speaker_counts.items():
86
+ print(f"{speaker}: {count}个文件")
87
+
88
+ if __name__ == '__main__':
89
+ download_voices_with_dynamic_counting()
90
+
91
+
92
+ from gradio_client import Client, handle_file
93
+
94
+ client = Client("http://localhost:7860")
95
+ result = client.predict(
96
+ prompt=handle_file('genshin_voices_sample_5/Ahangar/Ahangar_00001.wav'),
97
+ text="偷窃者没有好下场",
98
+ api_name="/gen_single"
99
+ )
100
+ print(result)
101
+ from shutil import copy2
102
+ copy2(result["value"], result["value"].split("/")[-1])
103
+
104
+ '''
105
+
106
  import spaces
107
  import os
108
  import shutil