JotunnBurton commited on
Commit
b910c48
·
verified ·
1 Parent(s): 2359fbe

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +461 -0
utils.py ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import argparse
4
+ import logging
5
+ import json
6
+ import shutil
7
+ import subprocess
8
+ import numpy as np
9
+ from huggingface_hub import hf_hub_download
10
+ from scipy.io.wavfile import read
11
+ import torch
12
+ import re
13
+
14
+ MATPLOTLIB_FLAG = False
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def download_emo_models(mirror, repo_id, model_name):
20
+ if mirror == "openi":
21
+ import openi
22
+
23
+ openi.model.download_model(
24
+ "Stardust_minus/Bert-VITS2",
25
+ repo_id.split("/")[-1],
26
+ "./emotional",
27
+ )
28
+ else:
29
+ hf_hub_download(
30
+ repo_id,
31
+ "pytorch_model.bin",
32
+ local_dir=model_name,
33
+ local_dir_use_symlinks=False,
34
+ )
35
+
36
+
37
+ def download_checkpoint(
38
+ dir_path, repo_config, token=None, regex="G_*.pth", mirror="openi"
39
+ ):
40
+ repo_id = repo_config["repo_id"]
41
+ f_list = glob.glob(os.path.join(dir_path, regex))
42
+ if f_list:
43
+ print("Use existed model, skip downloading.")
44
+ return
45
+ if mirror.lower() == "openi":
46
+ import openi
47
+
48
+ kwargs = {"token": token} if token else {}
49
+ openi.login(**kwargs)
50
+
51
+ model_image = repo_config["model_image"]
52
+ openi.model.download_model(repo_id, model_image, dir_path)
53
+
54
+ fs = glob.glob(os.path.join(dir_path, model_image, "*.pth"))
55
+ for file in fs:
56
+ shutil.move(file, dir_path)
57
+ shutil.rmtree(os.path.join(dir_path, model_image))
58
+ else:
59
+ for file in ["DUR_0.pth", "D_0.pth", "G_0.pth"]:
60
+ hf_hub_download(
61
+ repo_id, file, local_dir=dir_path, local_dir_use_symlinks=False
62
+ )
63
+
64
+
65
+ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False):
66
+ assert os.path.isfile(checkpoint_path)
67
+ checkpoint_dict = torch.load(checkpoint_path, map_location="cpu")
68
+ iteration = checkpoint_dict["iteration"]
69
+ learning_rate = checkpoint_dict["learning_rate"]
70
+ if (
71
+ optimizer is not None
72
+ and not skip_optimizer
73
+ and checkpoint_dict["optimizer"] is not None
74
+ ):
75
+ optimizer.load_state_dict(checkpoint_dict["optimizer"])
76
+ elif optimizer is None and not skip_optimizer:
77
+ # else: Disable this line if Infer and resume checkpoint,then enable the line upper
78
+ new_opt_dict = optimizer.state_dict()
79
+ new_opt_dict_params = new_opt_dict["param_groups"][0]["params"]
80
+ new_opt_dict["param_groups"] = checkpoint_dict["optimizer"]["param_groups"]
81
+ new_opt_dict["param_groups"][0]["params"] = new_opt_dict_params
82
+ optimizer.load_state_dict(new_opt_dict)
83
+
84
+ saved_state_dict = checkpoint_dict["model"]
85
+ if hasattr(model, "module"):
86
+ state_dict = model.module.state_dict()
87
+ else:
88
+ state_dict = model.state_dict()
89
+
90
+ new_state_dict = {}
91
+ for k, v in state_dict.items():
92
+ try:
93
+ # assert "emb_g" not in k
94
+ new_state_dict[k] = saved_state_dict[k]
95
+ assert saved_state_dict[k].shape == v.shape, (
96
+ saved_state_dict[k].shape,
97
+ v.shape,
98
+ )
99
+ except:
100
+ # For upgrading from the old version
101
+ if "ja_bert_proj" in k:
102
+ v = torch.zeros_like(v)
103
+ logger.warn(
104
+ f"Seems you are using the old version of the model, the {k} is automatically set to zero for backward compatibility"
105
+ )
106
+ else:
107
+ logger.error(f"{k} is not in the checkpoint")
108
+
109
+ new_state_dict[k] = v
110
+
111
+ if hasattr(model, "module"):
112
+ model.module.load_state_dict(new_state_dict, strict=False)
113
+ else:
114
+ model.load_state_dict(new_state_dict, strict=False)
115
+
116
+ logger.info(
117
+ "Loaded checkpoint '{}' (iteration {})".format(checkpoint_path, iteration)
118
+ )
119
+
120
+ return model, optimizer, learning_rate, iteration
121
+
122
+
123
+ def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
124
+ logger.info(
125
+ "Saving model and optimizer state at iteration {} to {}".format(
126
+ iteration, checkpoint_path
127
+ )
128
+ )
129
+ if hasattr(model, "module"):
130
+ state_dict = model.module.state_dict()
131
+ else:
132
+ state_dict = model.state_dict()
133
+ torch.save(
134
+ {
135
+ "model": state_dict,
136
+ "iteration": iteration,
137
+ "optimizer": optimizer.state_dict(),
138
+ "learning_rate": learning_rate,
139
+ },
140
+ checkpoint_path,
141
+ )
142
+
143
+
144
+ def summarize(
145
+ writer,
146
+ global_step,
147
+ scalars={},
148
+ histograms={},
149
+ images={},
150
+ audios={},
151
+ audio_sampling_rate=22050,
152
+ ):
153
+ for k, v in scalars.items():
154
+ writer.add_scalar(k, v, global_step)
155
+ for k, v in histograms.items():
156
+ writer.add_histogram(k, v, global_step)
157
+ for k, v in images.items():
158
+ writer.add_image(k, v, global_step, dataformats="HWC")
159
+ for k, v in audios.items():
160
+ writer.add_audio(k, v, global_step, audio_sampling_rate)
161
+
162
+
163
+ def latest_checkpoint_path(dir_path, regex="G_*.pth"):
164
+ f_list = glob.glob(os.path.join(dir_path, regex))
165
+ f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
166
+ x = f_list[-1]
167
+ return x
168
+
169
+
170
+ def plot_spectrogram_to_numpy(spectrogram):
171
+ global MATPLOTLIB_FLAG
172
+ if not MATPLOTLIB_FLAG:
173
+ import matplotlib
174
+
175
+ matplotlib.use("Agg")
176
+ MATPLOTLIB_FLAG = True
177
+ mpl_logger = logging.getLogger("matplotlib")
178
+ mpl_logger.setLevel(logging.WARNING)
179
+ import matplotlib.pylab as plt
180
+ import numpy as np
181
+
182
+ fig, ax = plt.subplots(figsize=(10, 2))
183
+ im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
184
+ plt.colorbar(im, ax=ax)
185
+ plt.xlabel("Frames")
186
+ plt.ylabel("Channels")
187
+ plt.tight_layout()
188
+
189
+ fig.canvas.draw()
190
+ data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
191
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
192
+ plt.close()
193
+ return data
194
+
195
+
196
+ def plot_alignment_to_numpy(alignment, info=None):
197
+ global MATPLOTLIB_FLAG
198
+ if not MATPLOTLIB_FLAG:
199
+ import matplotlib
200
+
201
+ matplotlib.use("Agg")
202
+ MATPLOTLIB_FLAG = True
203
+ mpl_logger = logging.getLogger("matplotlib")
204
+ mpl_logger.setLevel(logging.WARNING)
205
+ import matplotlib.pylab as plt
206
+ import numpy as np
207
+
208
+ fig, ax = plt.subplots(figsize=(6, 4))
209
+ im = ax.imshow(
210
+ alignment.transpose(), aspect="auto", origin="lower", interpolation="none"
211
+ )
212
+ fig.colorbar(im, ax=ax)
213
+ xlabel = "Decoder timestep"
214
+ if info is not None:
215
+ xlabel += "\n\n" + info
216
+ plt.xlabel(xlabel)
217
+ plt.ylabel("Encoder timestep")
218
+ plt.tight_layout()
219
+
220
+ fig.canvas.draw()
221
+ data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
222
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
223
+ plt.close()
224
+ return data
225
+
226
+
227
+ def load_wav_to_torch(full_path):
228
+ sampling_rate, data = read(full_path)
229
+ return torch.FloatTensor(data.astype(np.float32)), sampling_rate
230
+
231
+
232
+ def load_filepaths_and_text(filename, split="|"):
233
+ with open(filename, encoding="utf-8") as f:
234
+ filepaths_and_text = [line.strip().split(split) for line in f]
235
+ return filepaths_and_text
236
+
237
+
238
+ def get_hparams(init=True):
239
+ parser = argparse.ArgumentParser()
240
+ parser.add_argument(
241
+ "-c",
242
+ "--config",
243
+ type=str,
244
+ default="./configs/base.json",
245
+ help="JSON file for configuration",
246
+ )
247
+ parser.add_argument("-m", "--model", type=str, required=True, help="Model name")
248
+
249
+ args = parser.parse_args()
250
+ model_dir = os.path.join("./logs", args.model)
251
+
252
+ if not os.path.exists(model_dir):
253
+ os.makedirs(model_dir)
254
+
255
+ config_path = args.config
256
+ config_save_path = os.path.join(model_dir, "config.json")
257
+ if init:
258
+ with open(config_path, "r", encoding="utf-8") as f:
259
+ data = f.read()
260
+ with open(config_save_path, "w", encoding="utf-8") as f:
261
+ f.write(data)
262
+ else:
263
+ with open(config_save_path, "r", vencoding="utf-8") as f:
264
+ data = f.read()
265
+ config = json.loads(data)
266
+ hparams = HParams(**config)
267
+ hparams.model_dir = model_dir
268
+ return hparams
269
+
270
+
271
+ def clean_checkpoints(path_to_models="logs/44k/", n_ckpts_to_keep=2, sort_by_time=True):
272
+ """Freeing up space by deleting saved ckpts
273
+
274
+ Arguments:
275
+ path_to_models -- Path to the model directory
276
+ n_ckpts_to_keep -- Number of ckpts to keep, excluding G_0.pth and D_0.pth
277
+ sort_by_time -- True -> chronologically delete ckpts
278
+ False -> lexicographically delete ckpts
279
+ """
280
+ import re
281
+
282
+ ckpts_files = [
283
+ f
284
+ for f in os.listdir(path_to_models)
285
+ if os.path.isfile(os.path.join(path_to_models, f))
286
+ ]
287
+
288
+ def name_key(_f):
289
+ return int(re.compile("._(\\d+)\\.pth").match(_f).group(1))
290
+
291
+ def time_key(_f):
292
+ return os.path.getmtime(os.path.join(path_to_models, _f))
293
+
294
+ sort_key = time_key if sort_by_time else name_key
295
+
296
+ def x_sorted(_x):
297
+ return sorted(
298
+ [f for f in ckpts_files if f.startswith(_x) and not f.endswith("_0.pth")],
299
+ key=sort_key,
300
+ )
301
+
302
+ to_del = [
303
+ os.path.join(path_to_models, fn)
304
+ for fn in (
305
+ x_sorted("G")[:-n_ckpts_to_keep]
306
+ + x_sorted("D")[:-n_ckpts_to_keep]
307
+ + x_sorted("WD")[:-n_ckpts_to_keep]
308
+ )
309
+ ]
310
+
311
+ def del_info(fn):
312
+ return logger.info(f".. Free up space by deleting ckpt {fn}")
313
+
314
+ def del_routine(x):
315
+ return [os.remove(x), del_info(x)]
316
+
317
+ [del_routine(fn) for fn in to_del]
318
+
319
+
320
+ def get_hparams_from_dir(model_dir):
321
+ config_save_path = os.path.join(model_dir, "config.json")
322
+ with open(config_save_path, "r", encoding="utf-8") as f:
323
+ data = f.read()
324
+ config = json.loads(data)
325
+
326
+ hparams = HParams(**config)
327
+ hparams.model_dir = model_dir
328
+ return hparams
329
+
330
+
331
+ def get_hparams_from_file(config_path):
332
+ # print("config_path: ", config_path)
333
+ with open(config_path, "r", encoding="utf-8") as f:
334
+ data = f.read()
335
+ config = json.loads(data)
336
+
337
+ hparams = HParams(**config)
338
+ return hparams
339
+
340
+
341
+ def check_git_hash(model_dir):
342
+ source_dir = os.path.dirname(os.path.realpath(__file__))
343
+ if not os.path.exists(os.path.join(source_dir, ".git")):
344
+ logger.warn(
345
+ "{} is not a git repository, therefore hash value comparison will be ignored.".format(
346
+ source_dir
347
+ )
348
+ )
349
+ return
350
+
351
+ cur_hash = subprocess.getoutput("git rev-parse HEAD")
352
+
353
+ path = os.path.join(model_dir, "githash")
354
+ if os.path.exists(path):
355
+ saved_hash = open(path).read()
356
+ if saved_hash != cur_hash:
357
+ logger.warn(
358
+ "git hash values are different. {}(saved) != {}(current)".format(
359
+ saved_hash[:8], cur_hash[:8]
360
+ )
361
+ )
362
+ else:
363
+ open(path, "w").write(cur_hash)
364
+
365
+
366
+ def get_logger(model_dir, filename="train.log"):
367
+ global logger
368
+ logger = logging.getLogger(os.path.basename(model_dir))
369
+ logger.setLevel(logging.DEBUG)
370
+
371
+ formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
372
+ if not os.path.exists(model_dir):
373
+ os.makedirs(model_dir)
374
+ h = logging.FileHandler(os.path.join(model_dir, filename))
375
+ h.setLevel(logging.DEBUG)
376
+ h.setFormatter(formatter)
377
+ logger.addHandler(h)
378
+ return logger
379
+
380
+
381
+ class HParams:
382
+ def __init__(self, **kwargs):
383
+ for k, v in kwargs.items():
384
+ if type(v) == dict:
385
+ v = HParams(**v)
386
+ self[k] = v
387
+
388
+ def keys(self):
389
+ return self.__dict__.keys()
390
+
391
+ def items(self):
392
+ return self.__dict__.items()
393
+
394
+ def values(self):
395
+ return self.__dict__.values()
396
+
397
+ def __len__(self):
398
+ return len(self.__dict__)
399
+
400
+ def __getitem__(self, key):
401
+ return getattr(self, key)
402
+
403
+ def __setitem__(self, key, value):
404
+ return setattr(self, key, value)
405
+
406
+ def __contains__(self, key):
407
+ return key in self.__dict__
408
+
409
+ def __repr__(self):
410
+ return self.__dict__.__repr__()
411
+
412
+
413
+ def load_model(model_path, config_path):
414
+ hps = get_hparams_from_file(config_path)
415
+ net = SynthesizerTrn(
416
+ # len(symbols),
417
+ 108,
418
+ hps.data.filter_length // 2 + 1,
419
+ hps.train.segment_size // hps.data.hop_length,
420
+ n_speakers=hps.data.n_speakers,
421
+ **hps.model,
422
+ ).to("cpu")
423
+ _ = net.eval()
424
+ _ = load_checkpoint(model_path, net, None, skip_optimizer=True)
425
+ return net
426
+
427
+
428
+ def mix_model(
429
+ network1, network2, output_path, voice_ratio=(0.5, 0.5), tone_ratio=(0.5, 0.5)
430
+ ):
431
+ if hasattr(network1, "module"):
432
+ state_dict1 = network1.module.state_dict()
433
+ state_dict2 = network2.module.state_dict()
434
+ else:
435
+ state_dict1 = network1.state_dict()
436
+ state_dict2 = network2.state_dict()
437
+ for k in state_dict1.keys():
438
+ if k not in state_dict2.keys():
439
+ continue
440
+ if "enc_p" in k:
441
+ state_dict1[k] = (
442
+ state_dict1[k].clone() * tone_ratio[0]
443
+ + state_dict2[k].clone() * tone_ratio[1]
444
+ )
445
+ else:
446
+ state_dict1[k] = (
447
+ state_dict1[k].clone() * voice_ratio[0]
448
+ + state_dict2[k].clone() * voice_ratio[1]
449
+ )
450
+ for k in state_dict2.keys():
451
+ if k not in state_dict1.keys():
452
+ state_dict1[k] = state_dict2[k].clone()
453
+ torch.save(
454
+ {"model": state_dict1, "iteration": 0, "optimizer": None, "learning_rate": 0},
455
+ output_path,
456
+ )
457
+
458
+
459
+ def get_steps(model_path):
460
+ matches = re.findall(r"\d+", model_path)
461
+ return matches[-1] if matches else None