File size: 2,312 Bytes
f973cec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
_wandb:
    value:
        cli_version: 0.18.1
        m: []
        python_version: 3.11.10
        t:
            "1":
                - 1
                - 11
                - 41
                - 49
                - 50
                - 51
                - 55
                - 71
                - 100
            "2":
                - 1
                - 11
                - 41
                - 49
                - 50
                - 51
                - 55
                - 71
                - 100
            "3":
                - 2
                - 15
                - 16
                - 23
                - 55
                - 61
            "4": 3.11.10
            "5": 0.18.1
            "6": 4.44.2
            "8":
                - 5
            "12": 0.18.1
            "13": linux-x86_64
checkpoint:
    value:
        every_steps: 2500
data:
    value:
        before_mask_input_length: 1137
        input_length: 1024
        mean_noise_span_length: 3
        mlm_probability: 0.15
        num_workers: 16
        target_length: 229
device:
    value: gpu
eval:
    value:
        corrected_steps: 500
        every_steps: 1000000000
        steps: 500
eval_only:
    value: false
logging:
    value:
        every_steps: 25
        grad_l2: true
        use_wandb: true
        wandb_config:
            entity: pszemraj
            mode: online
            project: nanoT5
            tags:
                - 24x24
                - "1024"
        weights_l2: true
mode:
    value: pt
model:
    value:
        checkpoint_path: ""
        compile: true
        klass: hf_t5
        name: pszemraj/tFINE-850m-24x24-512ctx
        overwrite:
            dropout_rate: 0
        random_init: false
n_all_param:
    value: 853929472
optim:
    value:
        base_lr: 0.01
        batch_size: 128
        epochs: -1
        final_cosine: 2e-05
        grad_acc: 8
        grad_clip: 1
        lr_scheduler: cosine
        name: adamwscale
        total_steps: 20000
        warmup_steps: 5000
        weight_decay: 0
precision:
    value: bf16
predict_only:
    value: false
seed:
    value: 34534
slurm_id:
    value: none
tokenizer:
    value:
        name: BEE-spoke-data/slimpajama_tok-48128-BPE-forT5
working_dir:
    value: /workspace/nanoT5/outputs/2024-09-26/05-19-51