mrfakename commited on
Commit
ca99332
·
verified ·
1 Parent(s): 27c474b

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

src/f5_tts/infer/speech_edit.py CHANGED
@@ -9,6 +9,7 @@ import torch.nn.functional as F
9
  import torchaudio
10
  from hydra.utils import get_class
11
  from omegaconf import OmegaConf
 
12
 
13
  from f5_tts.infer.utils_infer import load_checkpoint, load_vocoder, save_spectrogram
14
  from f5_tts.model import CFM
@@ -55,7 +56,8 @@ win_length = model_cfg.model.mel_spec.win_length
55
  n_fft = model_cfg.model.mel_spec.n_fft
56
 
57
 
58
- ckpt_path = str(files("f5_tts").joinpath("../../")) + f"ckpts/{exp_name}/model_{ckpt_step}.safetensors"
 
59
  output_dir = "tests"
60
 
61
 
 
9
  import torchaudio
10
  from hydra.utils import get_class
11
  from omegaconf import OmegaConf
12
+ from cached_path import cached_path
13
 
14
  from f5_tts.infer.utils_infer import load_checkpoint, load_vocoder, save_spectrogram
15
  from f5_tts.model import CFM
 
56
  n_fft = model_cfg.model.mel_spec.n_fft
57
 
58
 
59
+ # ckpt_path = str(files("f5_tts").joinpath("../../")) + f"/ckpts/{exp_name}/model_{ckpt_step}.safetensors"
60
+ ckpt_path = str(cached_path(f"hf://SWivid/F5-TTS/{exp_name}/model_{ckpt_step}.safetensors"))
61
  output_dir = "tests"
62
 
63
 
src/f5_tts/train/datasets/prepare_emilia_v2.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # put in src/f5_tts/train/datasets/prepare_emilia_v2.py
2
+ # prepares Emilia dataset with the new format w/ Emilia-YODAS
3
+
4
+ import os
5
+ import json
6
+ from concurrent.futures import ProcessPoolExecutor
7
+ from pathlib import Path
8
+ from tqdm import tqdm
9
+ from datasets.arrow_writer import ArrowWriter
10
+ from importlib.resources import files
11
+
12
+ from f5_tts.model.utils import (
13
+ repetition_found,
14
+ )
15
+
16
+ # Define filters for exclusion
17
+ out_en = set()
18
+ en_filters = ["ا", "い", "て"]
19
+
20
+
21
+ def process_audio_directory(audio_dir):
22
+ sub_result, durations, vocab_set = [], [], set()
23
+ bad_case_en = 0
24
+
25
+ for file in audio_dir.iterdir():
26
+ if file.suffix == ".json":
27
+ with open(file, "r") as f:
28
+ obj = json.load(f)
29
+ text = obj["text"]
30
+ if any(f in text for f in en_filters) or repetition_found(text, length=4):
31
+ bad_case_en += 1
32
+ continue
33
+
34
+ duration = obj["duration"]
35
+ audio_file = file.with_suffix(".mp3")
36
+ if audio_file.exists():
37
+ sub_result.append({"audio_path": str(audio_file), "text": text, "duration": duration})
38
+ durations.append(duration)
39
+ vocab_set.update(list(text))
40
+
41
+ return sub_result, durations, vocab_set, bad_case_en
42
+
43
+
44
+ def main():
45
+ assert tokenizer in ["pinyin", "char"]
46
+ result, duration_list, text_vocab_set = [], [], set()
47
+ total_bad_case_en = 0
48
+
49
+ executor = ProcessPoolExecutor(max_workers=max_workers)
50
+ futures = []
51
+ dataset_path = Path(dataset_dir)
52
+ for sub_dir in dataset_path.iterdir():
53
+ if sub_dir.is_dir():
54
+ futures.append(executor.submit(process_audio_directory, sub_dir))
55
+
56
+ for future in tqdm(futures, total=len(futures)):
57
+ sub_result, durations, vocab_set, bad_case_en = future.result()
58
+ result.extend(sub_result)
59
+ duration_list.extend(durations)
60
+ text_vocab_set.update(vocab_set)
61
+ total_bad_case_en += bad_case_en
62
+
63
+ executor.shutdown()
64
+
65
+ if not os.path.exists(f"{save_dir}"):
66
+ os.makedirs(f"{save_dir}")
67
+
68
+ with ArrowWriter(path=f"{save_dir}/raw.arrow") as writer:
69
+ for line in tqdm(result, desc="Writing to raw.arrow ..."):
70
+ writer.write(line)
71
+
72
+ with open(f"{save_dir}/duration.json", "w", encoding="utf-8") as f:
73
+ json.dump({"duration": duration_list}, f, ensure_ascii=False)
74
+
75
+ with open(f"{save_dir}/vocab.txt", "w") as f:
76
+ for vocab in sorted(text_vocab_set):
77
+ f.write(vocab + "\n")
78
+
79
+ print(f"For {dataset_name}, sample count: {len(result)}")
80
+ print(f"For {dataset_name}, vocab size is: {len(text_vocab_set)}")
81
+ print(f"For {dataset_name}, total {sum(duration_list) / 3600:.2f} hours")
82
+ print(f"Bad en transcription case: {total_bad_case_en}\n")
83
+
84
+
85
+ if __name__ == "__main__":
86
+ max_workers = 32
87
+ tokenizer = "char"
88
+ dataset_dir = "/home/ubuntu/emilia-dataset/Emilia-YODAS/EN"
89
+ dataset_name = f"Emilia_EN_{tokenizer}"
90
+ # save_dir = os.path.expanduser(f"~/F5-TTS/data/{dataset_name}")
91
+ save_dir = str(files("f5_tts").joinpath("../../")) + f"/data/{dataset_name}"
92
+
93
+ print(f"Prepare for {dataset_name}, will save to {save_dir}\n")
94
+ main()