File size: 693 Bytes
65e5d19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
seed: 999
cudnn_deterministic: false
train_data_jsons:
- data/train/1splits/data.0.tiny.json
valid_data_jsons:
- data/val/1splits/data.0.tiny.json
batch_scale: 2000
max_length: 1500
min_length: 1
n_worker: 4
local_rank: -1
minibatch_debug: -1
n_epoch: 1
grad_accum: 64
global_learning_rate: 2.0e-06
local_learning_rate: 4.0e-06
grad_clip: 2.0
warmup_steps: 10000
total_steps: 55
dim: 4096
text_card: 1
existing_text_padding_id: 3
n_q: 16
dep_q: 8
card: 2048
num_heads: 32
num_layers: 32
hidden_scale: 4.5
causal: true
context: 3000
exp_dir: exp_data/Moshi/v3_full_emo_v0
print_freq: 5
save_interval: 10000
resume: null
merged_model_path: exp_data/Moshi/v3_full_emo_v0/model_merged.safetensors