pietrolesci commited on
Commit
313f892
·
verified ·
1 Parent(s): 62f940f

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Experiment Configuration
2
+ ```yaml
3
+ callbacks:
4
+ grad_accum:
5
+ _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
6
+ scheduling:
7
+ 0: 2
8
+ grad_norm:
9
+ _target_: src.callbacks.grad_norm.GradNorm
10
+ check_clipping: false
11
+ group_separator: /
12
+ histogram_freq: null
13
+ log_weight_distribution: false
14
+ norm_type: 2
15
+ only_total: true
16
+ lr_monitor:
17
+ _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
18
+ model_checkpoint:
19
+ _target_: src.callbacks.model_checkpoint.ModelCheckpoint
20
+ dirpath: .checkpoints
21
+ enable_version_counter: false
22
+ every_n_train_steps: 2000
23
+ filename: '{step}'
24
+ save_initial_checkpoint: true
25
+ save_last: link
26
+ save_top_k: -1
27
+ verbose: true
28
+ speed_monitor:
29
+ _target_: src.callbacks.speed_monitor.SpeedMonitor
30
+ data:
31
+ batch_size: 16
32
+ drop_last: false
33
+ eval_batch_size: 128
34
+ multiprocessing_context: null
35
+ num_workers: 12
36
+ persistent_workers: false
37
+ pin_memory: true
38
+ prefetch_factor: 2
39
+ shuffle: true
40
+ dataset: minipile
41
+ loggers:
42
+ tensorboard:
43
+ _target_: src.loggers.TensorBoardLogger
44
+ name: ''
45
+ save_dir: ./
46
+ version: null
47
+ model: smol_llama-370M-tied
48
+ optim:
49
+ lr: 0.0006
50
+ num_warmup_steps: 2000
51
+ optim_kwargs:
52
+ betas:
53
+ - 0.9
54
+ - 0.95
55
+ eps: 1.0e-08
56
+ fused: true
57
+ optim_name: adamw
58
+ scheduler_kwargs:
59
+ min_lr_ratio: 0.01
60
+ num_decay_steps: 2000
61
+ num_stable_steps: 46000
62
+ scheduler_name: warmup_stable_decay
63
+ weight_decay: 0.1
64
+ out_parent_folder: model_train
65
+ pwd: .
66
+ resume_from_checkpoint: .checkpoints/last.ckpt
67
+ run_folder: .
68
+ save_initial_checkpoint: true
69
+ seed: 42
70
+ tok_name: bpe32000minipile
71
+ tok_path: ./outputs/tokenizers/bpe32000minipile
72
+ torch_compile: true
73
+ train_data_path: ./data/minipile/bpe32000minipile/train
74
+ trainer:
75
+ accelerator: gpu
76
+ deterministic: false
77
+ devices: 4
78
+ enable_progress_bar: true
79
+ fast_dev_run: false
80
+ gradient_clip_algorithm: norm
81
+ gradient_clip_val: 1.0
82
+ limit_val_batches: 500
83
+ log_every_n_steps: 1
84
+ max_steps: 50000
85
+ precision: bf16-true
86
+ val_check_interval: 2000
87
+ val_data_path: ./data/minipile/bpe32000minipile/validation
88
+ ```
hparams.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loggers:
2
+ tensorboard:
3
+ _target_: src.loggers.TensorBoardLogger
4
+ save_dir: ./
5
+ name: ''
6
+ version: null
7
+ callbacks:
8
+ lr_monitor:
9
+ _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
10
+ grad_norm:
11
+ _target_: src.callbacks.grad_norm.GradNorm
12
+ norm_type: 2
13
+ group_separator: /
14
+ histogram_freq: null
15
+ check_clipping: false
16
+ log_weight_distribution: false
17
+ only_total: true
18
+ speed_monitor:
19
+ _target_: src.callbacks.speed_monitor.SpeedMonitor
20
+ grad_accum:
21
+ _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
22
+ scheduling:
23
+ 0: 2
24
+ model_checkpoint:
25
+ _target_: src.callbacks.model_checkpoint.ModelCheckpoint
26
+ dirpath: .checkpoints
27
+ filename: '{step}'
28
+ enable_version_counter: false
29
+ every_n_train_steps: 2000
30
+ save_top_k: -1
31
+ save_last: link
32
+ verbose: true
33
+ save_initial_checkpoint: true
34
+ tok_path: /home/pl487/rds/hpc-work/rdd/outputs/tokenizers/bpe32000minipile
35
+ run_folder: .
36
+ out_parent_folder: model_train
37
+ tok_name: bpe32000minipile
38
+ dataset: minipile
39
+ pwd: /home/pl487/rds/hpc-work/rdd
40
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
41
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
42
+ model: smol_llama-370M-tied
43
+ resume_from_checkpoint: .checkpoints/last.ckpt
44
+ save_initial_checkpoint: true
45
+ seed: 42
46
+ torch_compile: true
47
+ data:
48
+ batch_size: 16
49
+ eval_batch_size: 128
50
+ shuffle: true
51
+ drop_last: false
52
+ num_workers: 12
53
+ pin_memory: true
54
+ persistent_workers: false
55
+ prefetch_factor: 2
56
+ multiprocessing_context: null
57
+ optim:
58
+ optim_name: adamw
59
+ lr: 0.0006
60
+ weight_decay: 0.1
61
+ optim_kwargs:
62
+ fused: true
63
+ eps: 1.0e-08
64
+ betas:
65
+ - 0.9
66
+ - 0.95
67
+ scheduler_name: warmup_stable_decay
68
+ num_warmup_steps: 2000
69
+ scheduler_kwargs:
70
+ num_stable_steps: 46000
71
+ num_decay_steps: 2000
72
+ min_lr_ratio: 0.01
73
+ trainer:
74
+ accelerator: gpu
75
+ devices: 4
76
+ precision: bf16-true
77
+ deterministic: false
78
+ log_every_n_steps: 1
79
+ enable_progress_bar: true
80
+ fast_dev_run: false
81
+ gradient_clip_val: 1.0
82
+ gradient_clip_algorithm: norm
83
+ val_check_interval: 2000
84
+ max_steps: 50000
85
+ limit_val_batches: 500
tb_logs.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90d6e44b3588e2535b0dd5bf75ddc43357ba073794194f0ff36941f39b508799
3
+ size 410705
version_0/events.out.tfevents.1740076201.gpu-q-64.450716.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39d3646e730a9824daed4ab4b6216a3994e6ea194acfdf469bd01f36ce0ce2a
3
+ size 6989694
version_0/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 128
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
version_1/events.out.tfevents.1740342798.gpu-q-40.305079.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b8e68593b07d73d30da7af7aabc8d9a22e7a0cc177626b57619b9374d91ea73
3
+ size 7063299
version_1/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 128
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
version_2/events.out.tfevents.1740418997.gpu-q-26.301883.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d01a4b6c9ba46639cca7c60733a2e279acd5fed5724370410a604fc7df09df74
3
+ size 6896780
version_2/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 128
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
version_3/events.out.tfevents.1740465311.gpu-q-14.26324.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:858cf32191e45d0cf0f26b6f13c070302b2684b7b7bbc481578160a0db2acd2e
3
+ size 6961792
version_3/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 128
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation
version_4/events.out.tfevents.1740520324.gpu-q-75.43935.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34b01ed42f078ce361fcfd32424a1d3a9962d7302b2b2417a9b2e8140c51f8f7
3
+ size 5975203
version_4/hparams.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataloader_config: !!python/object:src.datamodule.DataloaderConfig
2
+ batch_size: 16
3
+ drop_last: false
4
+ eval_batch_size: 128
5
+ multiprocessing_context: null
6
+ num_workers: 12
7
+ persistent_workers: false
8
+ pin_memory: true
9
+ prefetch_factor: 2
10
+ shuffle: true
11
+ eod_token_id: 0
12
+ max_position_embeddings: 2048
13
+ optim_config: !!python/object:src.module.OptimCofig
14
+ keller_kwargs: {}
15
+ lr: 0.0006
16
+ num_warmup_steps: 2000
17
+ optim_kwargs:
18
+ betas:
19
+ - 0.9
20
+ - 0.95
21
+ eps: 1.0e-08
22
+ fused: true
23
+ optim_name: adamw
24
+ scheduler_kwargs:
25
+ min_lr_ratio: 0.01
26
+ num_decay_steps: 2000
27
+ num_stable_steps: 46000
28
+ scheduler_name: warmup_stable_decay
29
+ weight_decay: 0.1
30
+ train_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/train
31
+ val_data_path: /home/pl487/rds/hpc-work/rdd/data/minipile/bpe32000minipile/validation