File size: 693 Bytes
65e5d19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
seed: 999
cudnn_deterministic: false
train_data_jsons:
- data/train/1splits/data.0.tiny.json
valid_data_jsons:
- data/val/1splits/data.0.tiny.json
batch_scale: 2000
max_length: 1500
min_length: 1
n_worker: 4
local_rank: -1
minibatch_debug: -1
n_epoch: 1
grad_accum: 64
global_learning_rate: 2.0e-06
local_learning_rate: 4.0e-06
grad_clip: 2.0
warmup_steps: 10000
total_steps: 55
dim: 4096
text_card: 1
existing_text_padding_id: 3
n_q: 16
dep_q: 8
card: 2048
num_heads: 32
num_layers: 32
hidden_scale: 4.5
causal: true
context: 3000
exp_dir: exp_data/Moshi/v3_full_emo_v0
print_freq: 5
save_interval: 10000
resume: null
merged_model_path: exp_data/Moshi/v3_full_emo_v0/model_merged.safetensors
|