jarredou commited on
Commit
8fac3b9
·
verified ·
1 Parent(s): b0b446d

Upload 2 files

Browse files
config_htdemucs_similarity(1).yaml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 132300 # samplerate * segment
3
+ min_mean_abs: 0.001
4
+ hop_length: 1024
5
+
6
+ training:
7
+ batch_size: 5
8
+ gradient_accumulation_steps: 3
9
+ grad_clip: 0
10
+ segment: 3
11
+ shift: 1
12
+ samplerate: 44100
13
+ channels: 2
14
+ normalize: true
15
+ instruments: ['similarity', 'difference']
16
+ target_instrument: null
17
+ num_epochs: 1000
18
+ num_steps: 1000
19
+ optimizer: prodigy
20
+ lr: 1.0
21
+ patience: 80
22
+ reduce_factor: 0.95
23
+ q: 0.95
24
+ coarse_loss_clip: true
25
+ ema_momentum: 0.999
26
+ other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
27
+ use_amp: false # enable or disable usage of mixed precision (float16) - usually it must be true
28
+
29
+ augmentations:
30
+ enable: false # enable or disable all augmentations (to fast disable if needed)
31
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
32
+ loudness_min: 0.5
33
+ loudness_max: 1.5
34
+ mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
35
+ mixup_probs: [0.2, 0.02]
36
+ mixup_loudness_min: 0.5
37
+ mixup_loudness_max: 1.5
38
+
39
+ inference:
40
+ num_overlap: 4
41
+ batch_size: 18
42
+
43
+ loss_multistft:
44
+ fft_sizes:
45
+ - 2048
46
+ - 4096
47
+ hop_sizes:
48
+ - 1024
49
+ - 2048
50
+ win_lengths:
51
+ - 2048
52
+ - 4096
53
+ window: "hann_window"
54
+ scale: "mel"
55
+ n_bins: 128
56
+ sample_rate: 44100
57
+ perceptual_weighting: true
58
+ w_sc: 1.0
59
+ w_log_mag: 1.0
60
+ w_lin_mag: 0.0
61
+ w_phs: 0.0
62
+ mag_distance: "L1"
63
+
64
+ model: htdemucs
65
+
66
+ htdemucs: # see demucs/htdemucs.py for a detailed description
67
+ # Channels
68
+ channels: 48
69
+ channels_time:
70
+ growth: 2
71
+ # STFT
72
+ num_subbands: 1
73
+ nfft: 4096
74
+ wiener_iters: 0
75
+ end_iters: 0
76
+ wiener_residual: false
77
+ cac: true
78
+ # Main structure
79
+ depth: 4
80
+ rewrite: true
81
+ # Frequency Branch
82
+ multi_freqs: []
83
+ multi_freqs_depth: 3
84
+ freq_emb: 0.2
85
+ emb_scale: 10
86
+ emb_smooth: true
87
+ # Convolutions
88
+ kernel_size: 8
89
+ stride: 4
90
+ time_stride: 2
91
+ context: 1
92
+ context_enc: 0
93
+ # normalization
94
+ norm_starts: 4
95
+ norm_groups: 4
96
+ # DConv residual branch
97
+ dconv_mode: 3
98
+ dconv_depth: 2
99
+ dconv_comp: 8
100
+ dconv_init: 1e-3
101
+ # Before the Transformer
102
+ bottom_channels: 512
103
+ # CrossTransformer
104
+ # ------ Common to all
105
+ # Regular parameters
106
+ t_layers: 5
107
+ t_hidden_scale: 4.0
108
+ t_heads: 8
109
+ t_dropout: 0.0
110
+ t_layer_scale: True
111
+ t_gelu: True
112
+ # ------------- Positional Embedding
113
+ t_emb: sin
114
+ t_max_positions: 10000 # for the scaled embedding
115
+ t_max_period: 10000.0
116
+ t_weight_pos_embed: 1.0
117
+ t_cape_mean_normalize: True
118
+ t_cape_augment: True
119
+ t_cape_glob_loc_scale: [5000.0, 1.0, 1.4]
120
+ t_sin_random_shift: 0
121
+ # ------------- norm before a transformer encoder
122
+ t_norm_in: True
123
+ t_norm_in_group: False
124
+ # ------------- norm inside the encoder
125
+ t_group_norm: False
126
+ t_norm_first: True
127
+ t_norm_out: True
128
+ # ------------- optim
129
+ t_weight_decay: 0.0
130
+ t_lr:
131
+ # ------------- sparsity
132
+ t_sparse_self_attn: False
133
+ t_sparse_cross_attn: False
134
+ t_mask_type: diag
135
+ t_mask_random_seed: 42
136
+ t_sparse_attn_window: 400
137
+ t_global_window: 100
138
+ t_sparsity: 0.95
139
+ t_auto_sparsity: False
140
+ # Cross Encoder First (False)
141
+ t_cross_first: False
142
+ # Weight init
143
+ rescale: 0.1
144
+
model_htdemucs_ep_21_sdr_13.6970(1).ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9970dca36a15c0d0cf3338d24592aa35469697dc3be4b07e9056f5d54b82185
3
+ size 168122809