Upload folder using huggingface_hub
Browse files- train4/rwkv-0.pth +3 -0
- train4/rwkv-1.pth +3 -0
- train4/rwkv-10.pth +3 -0
- train4/rwkv-11.pth +3 -0
- train4/rwkv-12.pth +3 -0
- train4/rwkv-13.pth +3 -0
- train4/rwkv-14.pth +3 -0
- train4/rwkv-15.pth +3 -0
- train4/rwkv-16.pth +3 -0
- train4/rwkv-2.pth +3 -0
- train4/rwkv-3.pth +3 -0
- train4/rwkv-4.pth +3 -0
- train4/rwkv-5.pth +3 -0
- train4/rwkv-6.pth +3 -0
- train4/rwkv-7.pth +3 -0
- train4/rwkv-8.pth +3 -0
- train4/rwkv-9.pth +3 -0
- train4/train_log.txt +29 -0
train4/rwkv-0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ab952c6770d996564498f8fda41b767367d182f29b276e5ddbd3cc8f604b546
|
3 |
+
size 204292621
|
train4/rwkv-1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32c9f22d0c449eb80c8659dbd9baed26cabfffcac5465546c20ede30834456cd
|
3 |
+
size 204292621
|
train4/rwkv-10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b2d0e4cd575fcb34bb44aa688708a0b1a009f8c88ffb1ae2a151c93f705ffbc
|
3 |
+
size 204292692
|
train4/rwkv-11.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6a5bb7317ddce3aab9981704a0d39eaa86767c247651669a095e5fca37372d6
|
3 |
+
size 204292692
|
train4/rwkv-12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4abfec8507c64ecbf00fe3316dfdc8e75f12a23cf5d4b60962be6f7489b4d1a6
|
3 |
+
size 204292692
|
train4/rwkv-13.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8e409587a8ca372cd21ef9626aabe4b6da5dc754a41aa7721235873f697f9d6
|
3 |
+
size 204292692
|
train4/rwkv-14.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e616ee9d2557c94e8dd824ad878df8a7938b5e385cdf1196c76b69af235fb7c3
|
3 |
+
size 204292692
|
train4/rwkv-15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8f36da9bbf6d2088863f99d0862c2d6d3b4f13554c05d275888d9b40d90e966
|
3 |
+
size 204292692
|
train4/rwkv-16.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41dbd1cdb8b12cdcfb9dfde5b01bf46c5eb458c4b2eaf749a0c2819a97c3fc21
|
3 |
+
size 204292692
|
train4/rwkv-2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f594020ae0ea7634a4f6a23f3a59762891c4793aaf7f4814e0694a27f8f7c386
|
3 |
+
size 204292621
|
train4/rwkv-3.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f50cde821fae0a01ae471a0be8b18bf10d1aa3eb888b627a10889ff74d4ffd5
|
3 |
+
size 204292621
|
train4/rwkv-4.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f97a880c0b1f42ce7e812a12cb44aba3c1b4de53c7f8c4ca2349ff4b44f645f2
|
3 |
+
size 204292621
|
train4/rwkv-5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c96272982cfa0f0218ed58aec35bd6895c1b77c445138fbefc2dacf2d9068e0
|
3 |
+
size 204292621
|
train4/rwkv-6.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:234b43aba921bc06796e5f2818af0d990e45d9501505f984c85331d6d8050b36
|
3 |
+
size 204292621
|
train4/rwkv-7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6d99abc3925beff55a2676653918c5f159f71681b2e5dbf8100c7059747b99b
|
3 |
+
size 204292621
|
train4/rwkv-8.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d15174fe80b350249c847b086aff1a268db5dfa8699ca2ef629b119193c9f77
|
3 |
+
size 204292621
|
train4/rwkv-9.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a62f03df3a8faae097146195433c4f80ebea99f4898746451e13210bf41356f3
|
3 |
+
size 204292621
|
train4/train_log.txt
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
NEW RUN 2023-12-10-16-53-12
|
2 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k-LoRA-rwkv-12.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA//lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 2000, 'epoch_count': 200, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 128.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-10-16-53-12', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
3 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
4 |
+
NEW RUN 2023-12-10-16-53-12
|
5 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k-LoRA-rwkv-12.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA//lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 2000, 'epoch_count': 200, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 128.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-10-16-53-12', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
6 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
7 |
+
NEW RUN 2023-12-10-16-53-12
|
8 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k-LoRA-rwkv-12.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA//lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 2000, 'epoch_count': 200, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 128.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-10-16-53-12', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
9 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
10 |
+
NEW RUN 2023-12-10-16-53-12
|
11 |
+
{'load_model': '/content/RWKV-LM-LoRA/RWKV-v4neo/RWKV-4-7B-world-one-novel-tuned-65k-LoRA-rwkv-12.pth', 'wandb': '', 'proj_dir': '/content/RWKV-LM-LoRA//lora/', 'random_seed': -1, 'data_file': '/content/RWKV-LM-LoRA/RWKV-v4neo/data_text_document', 'data_type': 'binidx', 'vocab_size': 65536, 'ctx_len': 1024, 'epoch_steps': 2000, 'epoch_count': 200, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 2, 'n_layer': 32, 'n_embd': 4096, 'dim_att': 4096, 'dim_ffn': 16384, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 5e-05, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.999, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'lora': True, 'lora_load': '', 'lora_r': 128, 'lora_alpha': 128.0, 'lora_dropout': 0.01, 'lora_parts': 'att,ln,time', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': 4, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed_stage_2', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-12-10-16-53-12', 'betas': (0.9, 0.999), 'real_bsz': 2, 'run_name': '65536 ctx1024 L32 D4096'}
|
12 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 4, 'train_micro_batch_size_per_gpu': 2, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}}
|
13 |
+
0 2.196787 8.9961 0.00004990 2023-12-10 17:09:27.011339 0
|
14 |
+
1 2.177770 8.8266 0.00004980 2023-12-10 17:23:17.475495 1
|
15 |
+
2 2.181141 8.8564 0.00004970 2023-12-10 17:37:08.332684 2
|
16 |
+
3 2.166461 8.7273 0.00004960 2023-12-10 17:50:58.941680 3
|
17 |
+
4 2.170504 8.7627 0.00004950 2023-12-10 18:04:49.591724 4
|
18 |
+
5 2.164547 8.7107 0.00004940 2023-12-10 18:18:40.549475 5
|
19 |
+
6 2.162645 8.6941 0.00004930 2023-12-10 18:32:31.672345 6
|
20 |
+
7 2.163926 8.7052 0.00004920 2023-12-10 18:46:22.697187 7
|
21 |
+
8 2.161668 8.6856 0.00004910 2023-12-10 19:00:13.362314 8
|
22 |
+
9 2.160648 8.6768 0.00004900 2023-12-10 19:14:04.227370 9
|
23 |
+
10 2.158289 8.6563 0.00004891 2023-12-10 19:27:55.412297 10
|
24 |
+
11 2.159008 8.6625 0.00004881 2023-12-10 19:41:46.268648 11
|
25 |
+
12 2.157488 8.6494 0.00004871 2023-12-10 19:55:37.045079 12
|
26 |
+
13 2.154871 8.6268 0.00004861 2023-12-10 20:09:28.171833 13
|
27 |
+
14 2.149664 8.5820 0.00004851 2023-12-10 20:23:18.914219 14
|
28 |
+
15 2.142840 8.5236 0.00004842 2023-12-10 20:37:09.509170 15
|
29 |
+
16 2.150937 8.5929 0.00004832 2023-12-10 20:51:00.255558 16
|