diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..54e45ac78bf516cfedd1b4743b235d8df1add43b --- /dev/null +++ b/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "/mnt/amlfs-01/home/seonghyeony/checkpoints/groot_s_idm_so100", + "action_dim": 32, + "action_head_cfg": { + "_convert_": "object", + "_target_": "gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDM", + "config": { + "_recursive_": false, + "_target_": "gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDMConfig", + "action_dim": 32, + "action_horizon": 16, + "add_pos_embed": true, + "add_seperator_token": true, + "add_view_embed": true, + "backbone_features_projector_cfg": null, + "diffusion_model_cfg": { + "_target_": "gr00t.model.action_head.cross_attention_dit.DiT", + "attention_head_dim": 64, + "dropout": 0.2, + "final_dropout": true, + "interleave_self_attention": true, + "norm_type": "ada_norm", + "num_attention_heads": 16, + "num_layers": 8, + "output_dim": 1024, + "positional_embeddings": null + }, + "hidden_size": 1024, + "max_action_dim": 32, + "max_num_views": 3, + "max_state_dim": 44, + "mm_projector_cfg": { + "_convert_": "object", + "_target_": "gr00t.model.action_head.multimodal_projector.MultimodalProjector", + "config": { + "_target_": "gr00t.model.action_head.multimodal_projector.MultimodalProjectorConfig", + "hidden_size": 1024, + "mm_hidden_size": 1024, + "mm_projector_type": "mlp_doubledownsample" + } + }, + "mm_vision_select_layer": -2, + "model_dtype": "float32", + "noise_beta_alpha": 1.5, + "noise_beta_beta": 1.0, + "noise_s": 0.999, + "num_inference_timesteps": 16, + "num_timestep_buckets": 1000, + "siglip_hidden_size": 1024, + "siglip_model_cfg": { + "_convert_": "object", + "_target_": "gr00t.model.action_head.siglip.SiglipModel.from_pretrained", + "pretrained_model_name_or_path": "google/siglip2-large-patch16-256" + }, + "tune_vision_tower": true, + "vl_self_attention_cfg": { + "_target_": "gr00t.model.action_head.cross_attention_dit.SelfAttentionTransformer", + "attention_head_dim": 64, + "dropout": 0.2, + "final_dropout": true, + "num_attention_heads": 16, + "num_layers": 4, + "positional_embeddings": null + } + } + }, + "action_horizon": 16, + "architectures": [ + "DualBrain" + ], + "backbone_cfg": { + "_target_": "gr00t.model.backbone.IdentityBackbone" + }, + "hidden_size": 0, + "model_dtype": "float32", + "model_type": "dual_brain", + "resume_path": "/mnt/amlfs-01/home/seonghyeony/checkpoints/groot_s_idm_so100", + "torch_dtype": "bfloat16", + "transformers_version": "4.45.2" +} diff --git a/experiment_cfg/conf.yaml b/experiment_cfg/conf.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a8811c761f5ccf06b9c254f4bed889f073677052 --- /dev/null +++ b/experiment_cfg/conf.yaml @@ -0,0 +1,11099 @@ +model: + _target_: gr00t.model.idm.IDM + _convert_: object + config: + _target_: gr00t.model.idm.IDMConfig + _recursive_: false + model_dtype: float32 + hidden_size: 0 + action_horizon: 16 + action_dim: 32 + backbone_cfg: + _target_: gr00t.model.backbone.IdentityBackbone + action_head_cfg: + _target_: gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDM + _convert_: object + config: + _target_: gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDMConfig + _recursive_: false + add_seperator_token: true + add_pos_embed: true + model_dtype: float32 + mm_vision_select_layer: -2 + max_state_dim: 44 + max_action_dim: 32 + hidden_size: 1024 + tune_vision_tower: true + add_view_embed: true + max_num_views: 3 + siglip_model_cfg: + _target_: gr00t.model.action_head.siglip.SiglipModel.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + siglip_hidden_size: 1024 + vl_self_attention_cfg: + _target_: gr00t.model.action_head.cross_attention_dit.SelfAttentionTransformer + positional_embeddings: null + num_layers: 4 + num_attention_heads: 16 + attention_head_dim: 64 + dropout: 0.2 + final_dropout: true + diffusion_model_cfg: + _target_: gr00t.model.action_head.cross_attention_dit.DiT + positional_embeddings: null + num_layers: 8 + num_attention_heads: 16 + attention_head_dim: 64 + norm_type: ada_norm + dropout: 0.2 + final_dropout: true + output_dim: 1024 + interleave_self_attention: true + mm_projector_cfg: + _target_: gr00t.model.action_head.multimodal_projector.MultimodalProjector + _convert_: object + config: + _target_: gr00t.model.action_head.multimodal_projector.MultimodalProjectorConfig + hidden_size: 1024 + mm_hidden_size: 1024 + mm_projector_type: mlp_doubledownsample + action_dim: 32 + action_horizon: 16 + num_inference_timesteps: 16 + noise_beta_alpha: 1.5 + noise_beta_beta: 1.0 + noise_s: 0.999 + num_timestep_buckets: 1000 + backbone_features_projector_cfg: null +train_dataset: + _target_: gr00t.data.dataset.lerobot_sharded.ShardedLeRobotMixtureDataset.from_mixture_spec + _convert_: object + mixture_spec: + - dataset_path: + - /mnt/amlfs-01/home/seonghyeony/data/0418/so100.strawberry_grape_10 + - /mnt/amlfs-01/home/seonghyeony/data/0418/so100.tictac-bot_13 + dataset_weight: 1.0 + dataset_class: gr00t.data.dataset.lerobot_sharded.ShardedLeRobotSingleDataset + all_modality_configs: + robocasa_gr1_arms_only_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_gr1_arms_waist_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_gr1_fixed_lower_body_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_bimanual_panda_parallel_gripper: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_bimanual_panda_inspire_hand: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_panda_omron: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + gr1_unified: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_droid: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_position + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_fractal: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_pad_res256_freq03 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.world_vector + - action.rotation_delta + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.natural_language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_language_table: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.rgb_pad_res256_freq10 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.effector_translation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.action + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_bridge: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_0 + - video.image_1 + - video.image_2 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_mutex: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image + - video.wrist_image + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.joint_angles + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_plex: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image + - video.wrist_image + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.state + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_roboset: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_left + - video.image_right + - video.image_wrist + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.joint_position + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.joint_position + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + hot3d_hands_only: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + agibot: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.top_head + - video.hand_left + - video.hand_right + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.agibot.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + lapa: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + dream: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + gr1_unified_segmentation: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.segmentation_target + - action.segmentation_target_mask + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + so100: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.webcam + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + all_transforms: + robocasa_gr1_arms_only_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_gr1_arms_waist_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + state.waist: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + action.waist: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_gr1_fixed_lower_body_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + state.waist: min_max + state.neck: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + action.waist: min_max + action.neck: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_bimanual_panda_parallel_gripper: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + normalization_modes: + state.right_arm_eef_pos: min_max + state.right_gripper_qpos: min_max + state.left_arm_eef_pos: min_max + state.left_gripper_qpos: min_max + target_rotations: + state.right_arm_eef_quat: rotation_6d + state.left_arm_eef_quat: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + normalization_modes: + action.right_gripper_close: binary + action.left_gripper_close: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + action_concat_order: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_bimanual_panda_inspire_hand: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + normalization_modes: + state.right_arm_eef_pos: min_max + state.right_hand: min_max + state.left_arm_eef_pos: min_max + state.left_hand: min_max + target_rotations: + state.right_arm_eef_quat: rotation_6d + state.left_arm_eef_quat: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + normalization_modes: + action.right_hand: min_max + action.left_hand: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + action_concat_order: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_panda_omron: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + normalization_modes: + state.end_effector_position_relative: min_max + state.end_effector_rotation_relative: min_max + state.gripper_qpos: min_max + state.base_position: min_max + state.base_rotation: min_max + target_rotations: + state.end_effector_rotation_relative: rotation_6d + state.base_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + normalization_modes: + action.end_effector_position: min_max + action.end_effector_rotation: min_max + action.gripper_close: binary + action.base_motion: min_max + action.control_mode: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + state_concat_order: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + action_concat_order: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + gr1_unified: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionSinCosTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + normalization_modes: + action.left_arm: min_max + action.right_arm: min_max + action.left_hand: min_max + action.right_hand: min_max + action.waist: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_droid: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_position + normalization_modes: + state.eef_position: min_max + state.gripper_position: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation_delta: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_position + action_concat_order: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_fractal: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_pad_res256_freq03 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_pad_res256_freq03 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_pad_res256_freq03 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_pad_res256_freq03 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_pad_res256_freq03 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + normalization_modes: + state.eef_position: min_max + state.gripper_closedness_commanded: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.world_vector + - action.rotation_delta + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.world_vector + - action.rotation_delta + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.rotation_delta: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_pad_res256_freq03 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + action_concat_order: + - action.world_vector + - action.rotation_delta + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_language_table: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.rgb_pad_res256_freq10 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.rgb_pad_res256_freq10 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.rgb_pad_res256_freq10 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.rgb_pad_res256_freq10 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.rgb_pad_res256_freq10 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.effector_translation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.effector_translation + normalization_modes: + state.effector_translation: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.action + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.action + normalization_modes: + action.action: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.rgb_pad_res256_freq10 + state_concat_order: + - state.effector_translation + action_concat_order: + - action.action + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_bridge: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + normalization_modes: + state.eef_position: min_max + state.gripper_closed: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_0 + - video.image_1 + - video.image_2 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + hot3d_hands_only: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + normalization_modes: + state.left_wrist_position: min_max + state.right_wrist_position: min_max + target_rotations: + state.left_wrist_rotation: quaternion + state.right_wrist_rotation: quaternion + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + normalization_modes: + action.left_wrist_position: min_max + action.right_wrist_position: min_max + target_rotations: + action.left_wrist_rotation: quaternion + action.right_wrist_rotation: quaternion + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view + state_concat_order: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + action_concat_order: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + agibot: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + normalization_modes: + state.left_arm_joint_position: min_max + state.right_arm_joint_position: min_max + state.left_effector_position: min_max + state.right_effector_position: min_max + state.head_position: min_max + state.waist_position: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + normalization_modes: + action.left_arm_joint_position: min_max + action.right_arm_joint_position: min_max + action.left_effector_position: min_max + action.right_effector_position: min_max + action.head_position: min_max + action.waist_position: min_max + action.robot_velocity: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.top_head + - video.hand_left + - video.hand_right + state_concat_order: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + action_concat_order: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_mutex: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image + - video.wrist_image + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image + - video.wrist_image + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image + - video.wrist_image + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.joint_angles + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.joint_angles + - state.gripper_closed + normalization_modes: + state.joint_angles: min_max + state.gripper_closed: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image + - video.wrist_image + state_concat_order: + - state.joint_angles + - state.gripper_closed + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_plex: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image + - video.wrist_image + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image + - video.wrist_image + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image + - video.wrist_image + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.state + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.state + normalization_modes: + state.state: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image + - video.wrist_image + state_concat_order: + - state.state + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_roboset: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.joint_position + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.joint_position + - state.gripper_closed + normalization_modes: + state.joint_position: min_max + state.gripper_closed: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.joint_position + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.joint_position + - action.gripper_position + normalization_modes: + action.joint_position: min_max + action.gripper_position: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_left + - video.image_right + - video.image_wrist + state_concat_order: + - state.joint_position + - state.gripper_closed + action_concat_order: + - action.joint_position + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + lapa: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + dream: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + gr1_unified_segmentation: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionSinCosTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.segmentation_target + - action.segmentation_target_mask + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.segmentation_target + - action.segmentation_target_mask + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + so100: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.webcam + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.webcam + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.webcam + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.webcam + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.webcam + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + normalization_modes: + state.main_shoulder_pan: min_max + state.main_shoulder_lift: min_max + state.main_elbow_flex: min_max + state.main_wrist_flex: min_max + state.main_wrist_roll: min_max + state.main_gripper: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + normalization_modes: + action.main_shoulder_pan: min_max + action.main_shoulder_lift: min_max + action.main_elbow_flex: min_max + action.main_wrist_flex: min_max + action.main_wrist_roll: min_max + action.main_gripper: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.webcam + state_concat_order: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + action_concat_order: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + metadata_versions: + robocasa_gr1_arms_only_fourier_hands: '0217' + robocasa_gr1_fixed_lower_body_fourier_hands: '0217' + robocasa_bimanual_panda_parallel_gripper: '0217' + robocasa_bimanual_panda_inspire_hand: '0217' + robocasa_panda_omron: '0217' + gr1_unified: '0304' + oxe_droid: '0221' + oxe_fractal: '0221' + oxe_language_table: '0221' + oxe_bridge: '0221' + robocasa_gr1_arms_waist_fourier_hands: '0225' + hot3d_hands_only: '0220' + agibot: '0306' + oxe_mutex: '0303' + oxe_plex: '0303' + oxe_roboset: '0303' + lapa: '0305' + dream: '0308' + gr1_unified_segmentation: '0309' + so100: '0427' + dataset_kwargs: + video_backend: decord + use_global_metadata: true + mixture_kwargs: + training: true + balance_dataset_weights: false + seed: 42 +trainer: + _target_: gr00t.experiment.dual_brain.experiment.DualBrainTrainer + _partial_: true + _recursive_: false + callbacks: null + model: ??? + train_dataset: ??? + compute_dtype: ??? + benchmark_time: false + enable_profiling: false + profiling_steps: 5 +wandb_project: dream_idm +output_dir: /mnt/amlfs-01/home/seonghyeony/checkpoints/gr00t_s_idm_so100 +load_from_yaml: null +gear_credentials: /mnt/amlfs-01/home/seonghyeony/.gear/data_credentials +upload_checkpoints: false +upload_every: 10000 +upload_last_n_checkpoints: 5 +remove_unused_columns: false +bf16: true +tf32: true +global_batch_size: 256 +raise_error_if_global_batch_size_not_set: false +per_device_train_batch_size: 8 +per_device_eval_batch_size: 64 +gradient_accumulation_steps: 1 +dataloader_num_workers: 6 +dataloader_pin_memory: false +dataloader_persistent_workers: true +optim: adamw_torch +learning_rate: 0.0001 +adam_beta1: 0.95 +adam_beta2: 0.999 +adam_epsilon: 1.0e-08 +weight_decay: 1.0e-05 +lr_scheduler_type: cosine +warmup_ratio: 0.05 +logging_steps: 10.0 +num_train_epochs: 1000 +max_steps: 30000 +save_strategy: steps +save_steps: 1000 +eval_strategy: 'no' +save_total_limit: 20 +report_to: wandb +seed: 42 +do_eval: false +gradient_checkpointing: false +ddp_find_unused_parameters: false +ddp_bucket_cap_mb: 100 +ray_num_workers: 32 +eval_bf16: true +torch_compile_mode: null +pretrained_model_path: null +only_tune_projectors: false +training_args: + _target_: transformers.TrainingArguments + output_dir: /mnt/amlfs-01/home/seonghyeony/checkpoints/gr00t_s_idm_so100 + run_name: gr00t_s_idm_so100 + remove_unused_columns: false + deepspeed: gr00t/gr00t/experiment/dual_brain/configs/deepspeed/zero2.json + gradient_checkpointing: false + bf16: true + tf32: true + per_device_train_batch_size: 8 + per_device_eval_batch_size: 64 + gradient_accumulation_steps: 1 + dataloader_num_workers: 6 + dataloader_pin_memory: false + dataloader_persistent_workers: true + optim: adamw_torch + adam_beta1: 0.95 + adam_beta2: 0.999 + adam_epsilon: 1.0e-08 + learning_rate: 0.0001 + weight_decay: 1.0e-05 + warmup_ratio: 0.05 + lr_scheduler_type: cosine + logging_steps: 10.0 + num_train_epochs: 1000 + max_steps: 30000 + save_strategy: steps + save_steps: 1000 + save_total_limit: 20 + report_to: wandb + seed: 42 + do_eval: false + ddp_find_unused_parameters: false + ddp_bucket_cap_mb: 100 + torch_compile_mode: null +add_seperator_token: true +add_pos_embed: true +hidden_size: 1024 +attn_dropout: 0.2 +siglip_hidden_size: 1024 +siglip_version: google/siglip2-large-patch16-256 +action_head_cfg: + _target_: gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDM + _convert_: object + config: + _target_: gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDMConfig + _recursive_: false + add_seperator_token: true + add_pos_embed: true + model_dtype: float32 + mm_vision_select_layer: -2 + max_state_dim: 44 + max_action_dim: 32 + hidden_size: 1024 + tune_vision_tower: true + add_view_embed: true + max_num_views: 3 + siglip_model_cfg: + _target_: gr00t.model.action_head.siglip.SiglipModel.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + siglip_hidden_size: 1024 + vl_self_attention_cfg: + _target_: gr00t.model.action_head.cross_attention_dit.SelfAttentionTransformer + positional_embeddings: null + num_layers: 4 + num_attention_heads: 16 + attention_head_dim: 64 + dropout: 0.2 + final_dropout: true + diffusion_model_cfg: + _target_: gr00t.model.action_head.cross_attention_dit.DiT + positional_embeddings: null + num_layers: 8 + num_attention_heads: 16 + attention_head_dim: 64 + norm_type: ada_norm + dropout: 0.2 + final_dropout: true + output_dim: 1024 + interleave_self_attention: true + mm_projector_cfg: + _target_: gr00t.model.action_head.multimodal_projector.MultimodalProjector + _convert_: object + config: + _target_: gr00t.model.action_head.multimodal_projector.MultimodalProjectorConfig + hidden_size: 1024 + mm_hidden_size: 1024 + mm_projector_type: mlp_doubledownsample + action_dim: 32 + action_horizon: 16 + num_inference_timesteps: 16 + noise_beta_alpha: 1.5 + noise_beta_beta: 1.0 + noise_s: 0.999 + num_timestep_buckets: 1000 + backbone_features_projector_cfg: null +backbone_hidden_size: 0 +backbone_cfg: + _target_: gr00t.model.backbone.IdentityBackbone +embodiment_tag_to_projector_index: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +num_visual_tokens_per_frame: 16 +max_action_dim: 32 +language_dropout_prob: 0.0 +model_image_resolution: 224 +max_sequence_length: 112 +model_specific_transform: + _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +data_collator: + _target_: gr00t.model.transform.xemb_siglip_idm_nostate_lang.DefaultDataCollator +use_global_metadata: true +action_horizon: 16 +state_horizon: 1 +image_resolution: 224 +totensor_cfg: + _target_: gr00t.data.transform.VideoToTensor + apply_to: ??? +crop_cfg: + _target_: gr00t.data.transform.VideoCrop + apply_to: ??? + scale: 0.95 + mode: random +resize_cfg: + _target_: gr00t.data.transform.VideoResize + apply_to: ??? + height: 224 + width: 224 + interpolation: linear +color_jitter_cfg: + _target_: gr00t.data.transform.VideoColorJitter + apply_to: ??? + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 +random_grayscale_cfg: + _target_: gr00t.data.transform.VideoRandomGrayscale + apply_to: ??? + p: 0.1 +random_posterize_cfg: + _target_: gr00t.data.transform.VideoRandomPosterize + apply_to: ??? + bits: 4 + p: 0.1 +to_numpy_cfg: + _target_: gr00t.data.transform.VideoToNumpy + apply_to: ??? +modality_config_robocasa_gr1_arms_only_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_robocasa_gr1_arms_only_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_robocasa_gr1_arms_waist_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_robocasa_gr1_arms_waist_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + state.waist: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + action.waist: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_robocasa_panda_omron: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_robocasa_panda_omron: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + normalization_modes: + state.end_effector_position_relative: min_max + state.end_effector_rotation_relative: min_max + state.gripper_qpos: min_max + state.base_position: min_max + state.base_rotation: min_max + target_rotations: + state.end_effector_rotation_relative: rotation_6d + state.base_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + normalization_modes: + action.end_effector_position: min_max + action.end_effector_rotation: min_max + action.gripper_close: binary + action.base_motion: min_max + action.control_mode: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + state_concat_order: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + action_concat_order: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_robocasa_gr1_fixed_lower_body_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_robocasa_gr1_fixed_lower_body_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + state.waist: min_max + state.neck: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + action.waist: min_max + action.neck: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_robocasa_bimanual_panda_parallel_gripper: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_robocasa_bimanual_panda_parallel_gripper: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + normalization_modes: + state.right_arm_eef_pos: min_max + state.right_gripper_qpos: min_max + state.left_arm_eef_pos: min_max + state.left_gripper_qpos: min_max + target_rotations: + state.right_arm_eef_quat: rotation_6d + state.left_arm_eef_quat: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + normalization_modes: + action.right_gripper_close: binary + action.left_gripper_close: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + action_concat_order: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_robocasa_bimanual_panda_inspire_hand: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_robocasa_bimanual_panda_inspire_hand: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + normalization_modes: + state.right_arm_eef_pos: min_max + state.right_hand: min_max + state.left_arm_eef_pos: min_max + state.left_hand: min_max + target_rotations: + state.right_arm_eef_quat: rotation_6d + state.left_arm_eef_quat: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + normalization_modes: + action.right_hand: min_max + action.left_hand: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + action_concat_order: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_gr1_unified_segmentation: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.segmentation_target + - action.segmentation_target_mask + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_gr1_unified_segmentation: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionSinCosTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.segmentation_target + - action.segmentation_target_mask + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.segmentation_target + - action.segmentation_target_mask + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_gr1_unified: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_gr1_unified: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionSinCosTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + normalization_modes: + action.left_arm: min_max + action.right_arm: min_max + action.left_hand: min_max + action.right_hand: min_max + action.waist: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_so100: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.webcam + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_so100: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.webcam + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.webcam + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.webcam + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.webcam + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.webcam + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + normalization_modes: + state.main_shoulder_pan: min_max + state.main_shoulder_lift: min_max + state.main_elbow_flex: min_max + state.main_wrist_flex: min_max + state.main_wrist_roll: min_max + state.main_gripper: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + normalization_modes: + action.main_shoulder_pan: min_max + action.main_shoulder_lift: min_max + action.main_elbow_flex: min_max + action.main_wrist_flex: min_max + action.main_wrist_roll: min_max + action.main_gripper: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.webcam + state_concat_order: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + action_concat_order: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_oxe_droid: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_position + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_oxe_droid: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_position + normalization_modes: + state.eef_position: min_max + state.gripper_position: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation_delta: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_position + action_concat_order: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_oxe_fractal: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_pad_res256_freq03 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.world_vector + - action.rotation_delta + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.natural_language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_oxe_fractal: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_pad_res256_freq03 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_pad_res256_freq03 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_pad_res256_freq03 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_pad_res256_freq03 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_pad_res256_freq03 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + normalization_modes: + state.eef_position: min_max + state.gripper_closedness_commanded: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.world_vector + - action.rotation_delta + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.world_vector + - action.rotation_delta + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.rotation_delta: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_pad_res256_freq03 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + action_concat_order: + - action.world_vector + - action.rotation_delta + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_oxe_language_table: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.rgb_pad_res256_freq10 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.effector_translation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.action + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_oxe_language_table: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.rgb_pad_res256_freq10 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.rgb_pad_res256_freq10 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.rgb_pad_res256_freq10 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.rgb_pad_res256_freq10 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.rgb_pad_res256_freq10 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.effector_translation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.effector_translation + normalization_modes: + state.effector_translation: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.action + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.action + normalization_modes: + action.action: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.rgb_pad_res256_freq10 + state_concat_order: + - state.effector_translation + action_concat_order: + - action.action + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_oxe_bridge: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_0 + - video.image_1 + - video.image_2 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_oxe_bridge: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + normalization_modes: + state.eef_position: min_max + state.gripper_closed: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_0 + - video.image_1 + - video.image_2 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_hot3d_hands_only: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_hot3d_hands_only: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + normalization_modes: + state.left_wrist_position: min_max + state.right_wrist_position: min_max + target_rotations: + state.left_wrist_rotation: quaternion + state.right_wrist_rotation: quaternion + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + normalization_modes: + action.left_wrist_position: min_max + action.right_wrist_position: min_max + target_rotations: + action.left_wrist_rotation: quaternion + action.right_wrist_rotation: quaternion + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view + state_concat_order: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + action_concat_order: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_agibot: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.top_head + - video.hand_left + - video.hand_right + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.agibot.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_agibot: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + normalization_modes: + state.left_arm_joint_position: min_max + state.right_arm_joint_position: min_max + state.left_effector_position: min_max + state.right_effector_position: min_max + state.head_position: min_max + state.waist_position: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + normalization_modes: + action.left_arm_joint_position: min_max + action.right_arm_joint_position: min_max + action.left_effector_position: min_max + action.right_effector_position: min_max + action.head_position: min_max + action.waist_position: min_max + action.robot_velocity: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.top_head + - video.hand_left + - video.hand_right + state_concat_order: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + action_concat_order: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_oxe_mutex: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image + - video.wrist_image + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.joint_angles + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_oxe_mutex: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image + - video.wrist_image + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image + - video.wrist_image + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image + - video.wrist_image + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.joint_angles + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.joint_angles + - state.gripper_closed + normalization_modes: + state.joint_angles: min_max + state.gripper_closed: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image + - video.wrist_image + state_concat_order: + - state.joint_angles + - state.gripper_closed + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_oxe_plex: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image + - video.wrist_image + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.state + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_oxe_plex: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image + - video.wrist_image + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image + - video.wrist_image + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image + - video.wrist_image + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.state + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.state + normalization_modes: + state.state: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image + - video.wrist_image + state_concat_order: + - state.state + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_oxe_roboset: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_left + - video.image_right + - video.image_wrist + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.joint_position + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.joint_position + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_oxe_roboset: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.joint_position + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.joint_position + - state.gripper_closed + normalization_modes: + state.joint_position: min_max + state.gripper_closed: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.joint_position + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.joint_position + - action.gripper_position + normalization_modes: + action.joint_position: min_max + action.gripper_position: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_left + - video.image_right + - video.image_wrist + state_concat_order: + - state.joint_position + - state.gripper_closed + action_concat_order: + - action.joint_position + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_lapa: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_lapa: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_config_dream: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transform_dream: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +modality_configs: + robocasa_gr1_arms_only_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_gr1_arms_waist_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_gr1_fixed_lower_body_fourier_hands: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_bimanual_panda_parallel_gripper: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_bimanual_panda_inspire_hand: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + robocasa_panda_omron: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + gr1_unified: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_droid: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_position + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_fractal: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_pad_res256_freq03 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.world_vector + - action.rotation_delta + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.natural_language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_language_table: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.rgb_pad_res256_freq10 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.effector_translation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.action + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_bridge: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_0 + - video.image_1 + - video.image_2 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_mutex: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image + - video.wrist_image + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.joint_angles + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_plex: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image + - video.wrist_image + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.state + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.eef_position + - action.eef_rotation + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + oxe_roboset: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.image_left + - video.image_right + - video.image_wrist + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.joint_position + - state.gripper_closed + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.joint_position + - action.gripper_position + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.language.language_instruction + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + hot3d_hands_only: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + agibot: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.top_head + - video.hand_left + - video.hand_right + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.agibot.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + lapa: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.action.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + dream: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + gr1_unified_segmentation: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.ego_view_bg_crop_pad_res256_freq20 + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.segmentation_target + - action.segmentation_target_mask + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.coarse_action + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions + so100: + video: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 16 + modality_keys: + - video.webcam + state: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + - 1 + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + - 9 + - 10 + - 11 + - 12 + - 13 + - 14 + - 15 + modality_keys: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + language: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - annotation.human.task_description + lapa_action: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - lapa_action + dream_actions: + _target_: gr00t.data.dataset.ModalityConfig + delta_indices: + - 0 + modality_keys: + - dream_actions +transforms: + robocasa_gr1_arms_only_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_gr1_arms_waist_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + state.waist: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + action.waist: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_gr1_fixed_lower_body_fourier_hands: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + normalization_modes: + state.left_arm: min_max + state.right_arm: min_max + state.left_hand: min_max + state.right_hand: min_max + state.waist: min_max + state.neck: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + normalization_modes: + action.right_arm: min_max + action.left_arm: min_max + action.right_hand: min_max + action.left_hand: min_max + action.waist: min_max + action.neck: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - state.neck + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - action.neck + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_bimanual_panda_parallel_gripper: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + normalization_modes: + state.right_arm_eef_pos: min_max + state.right_gripper_qpos: min_max + state.left_arm_eef_pos: min_max + state.left_gripper_qpos: min_max + target_rotations: + state.right_arm_eef_quat: rotation_6d + state.left_arm_eef_quat: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + normalization_modes: + action.right_gripper_close: binary + action.left_gripper_close: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_gripper_qpos + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_gripper_qpos + action_concat_order: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_gripper_close + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_gripper_close + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_bimanual_panda_inspire_hand: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + normalization_modes: + state.right_arm_eef_pos: min_max + state.right_hand: min_max + state.left_arm_eef_pos: min_max + state.left_hand: min_max + target_rotations: + state.right_arm_eef_quat: rotation_6d + state.left_arm_eef_quat: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + normalization_modes: + action.right_hand: min_max + action.left_hand: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.robot0_eye_in_hand_pad_res256_freq20 + - video.robot1_eye_in_hand_pad_res256_freq20 + - video.agentview_pad_res256_freq20 + state_concat_order: + - state.right_arm_eef_pos + - state.right_arm_eef_quat + - state.right_hand + - state.left_arm_eef_pos + - state.left_arm_eef_quat + - state.left_hand + action_concat_order: + - action.right_arm_eef_pos + - action.right_arm_eef_rot + - action.right_hand + - action.left_arm_eef_pos + - action.left_arm_eef_rot + - action.left_hand + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + robocasa_panda_omron: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + normalization_modes: + state.end_effector_position_relative: min_max + state.end_effector_rotation_relative: min_max + state.gripper_qpos: min_max + state.base_position: min_max + state.base_rotation: min_max + target_rotations: + state.end_effector_rotation_relative: rotation_6d + state.base_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + normalization_modes: + action.end_effector_position: min_max + action.end_effector_rotation: min_max + action.gripper_close: binary + action.base_motion: min_max + action.control_mode: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.res256_image_side_0 + - video.res256_image_side_1 + - video.res256_image_wrist_0 + state_concat_order: + - state.end_effector_position_relative + - state.end_effector_rotation_relative + - state.gripper_qpos + - state.base_position + - state.base_rotation + action_concat_order: + - action.end_effector_position + - action.end_effector_rotation + - action.gripper_close + - action.base_motion + - action.control_mode + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + gr1_unified: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionSinCosTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + normalization_modes: + action.left_arm: min_max + action.right_arm: min_max + action.left_hand: min_max + action.right_hand: min_max + action.waist: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_droid: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_position + normalization_modes: + state.eef_position: min_max + state.gripper_position: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation_delta: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.exterior_image_1_left_pad_res256_freq15 + - video.exterior_image_2_left_pad_res256_freq15 + - video.wrist_image_left_pad_res256_freq15 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_position + action_concat_order: + - action.eef_position_delta + - action.eef_rotation_delta + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_fractal: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_pad_res256_freq03 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_pad_res256_freq03 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_pad_res256_freq03 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_pad_res256_freq03 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_pad_res256_freq03 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + normalization_modes: + state.eef_position: min_max + state.gripper_closedness_commanded: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.world_vector + - action.rotation_delta + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.world_vector + - action.rotation_delta + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.rotation_delta: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_pad_res256_freq03 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_closedness_commanded + action_concat_order: + - action.world_vector + - action.rotation_delta + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_language_table: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.rgb_pad_res256_freq10 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.rgb_pad_res256_freq10 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.rgb_pad_res256_freq10 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.rgb_pad_res256_freq10 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.rgb_pad_res256_freq10 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.effector_translation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.effector_translation + normalization_modes: + state.effector_translation: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.action + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.action + normalization_modes: + action.action: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.rgb_pad_res256_freq10 + state_concat_order: + - state.effector_translation + action_concat_order: + - action.action + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_bridge: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_0 + - video.image_1 + - video.image_2 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + normalization_modes: + state.eef_position: min_max + state.gripper_closed: min_max + target_rotations: + state.eef_rotation: rotation_6d + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_0 + - video.image_1 + - video.image_2 + state_concat_order: + - state.eef_position + - state.eef_rotation + - state.gripper_closed + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + hot3d_hands_only: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + normalization_modes: + state.left_wrist_position: min_max + state.right_wrist_position: min_max + target_rotations: + state.left_wrist_rotation: quaternion + state.right_wrist_rotation: quaternion + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + normalization_modes: + action.left_wrist_position: min_max + action.right_wrist_position: min_max + target_rotations: + action.left_wrist_rotation: quaternion + action.right_wrist_rotation: quaternion + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view + state_concat_order: + - state.left_wrist_position + - state.left_wrist_rotation + - state.left_joint_rotation + - state.right_wrist_position + - state.right_wrist_rotation + - state.right_joint_rotation + action_concat_order: + - action.left_wrist_position + - action.left_wrist_rotation + - action.left_joint_rotation + - action.right_wrist_position + - action.right_wrist_rotation + - action.right_joint_rotation + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + agibot: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.top_head + - video.hand_left + - video.hand_right + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + normalization_modes: + state.left_arm_joint_position: min_max + state.right_arm_joint_position: min_max + state.left_effector_position: min_max + state.right_effector_position: min_max + state.head_position: min_max + state.waist_position: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + normalization_modes: + action.left_arm_joint_position: min_max + action.right_arm_joint_position: min_max + action.left_effector_position: min_max + action.right_effector_position: min_max + action.head_position: min_max + action.waist_position: min_max + action.robot_velocity: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.top_head + - video.hand_left + - video.hand_right + state_concat_order: + - state.left_arm_joint_position + - state.right_arm_joint_position + - state.left_effector_position + - state.right_effector_position + - state.head_position + - state.waist_position + action_concat_order: + - action.left_arm_joint_position + - action.right_arm_joint_position + - action.left_effector_position + - action.right_effector_position + - action.head_position + - action.waist_position + - action.robot_velocity + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_mutex: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image + - video.wrist_image + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image + - video.wrist_image + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image + - video.wrist_image + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.joint_angles + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.joint_angles + - state.gripper_closed + normalization_modes: + state.joint_angles: min_max + state.gripper_closed: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image + - video.wrist_image + state_concat_order: + - state.joint_angles + - state.gripper_closed + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_plex: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image + - video.wrist_image + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image + - video.wrist_image + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image + - video.wrist_image + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image + - video.wrist_image + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.state + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.state + normalization_modes: + state.state: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.eef_position + - action.eef_rotation + - action.gripper_position + normalization_modes: + action.gripper_position: binary + target_rotations: + action.eef_rotation: axis_angle + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image + - video.wrist_image + state_concat_order: + - state.state + action_concat_order: + - action.eef_position + - action.eef_rotation + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + oxe_roboset: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.image_left + - video.image_right + - video.image_wrist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.joint_position + - state.gripper_closed + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.joint_position + - state.gripper_closed + normalization_modes: + state.joint_position: min_max + state.gripper_closed: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.joint_position + - action.gripper_position + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.joint_position + - action.gripper_position + normalization_modes: + action.joint_position: min_max + action.gripper_position: binary + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.image_left + - video.image_right + - video.image_wrist + state_concat_order: + - state.joint_position + - state.gripper_closed + action_concat_order: + - action.joint_position + - action.gripper_position + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + lapa: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + dream: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.left_arm + - action.right_arm + - action.left_hand + - action.right_hand + - action.waist + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + gr1_unified_segmentation: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.ego_view_bg_crop_pad_res256_freq20 + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionSinCosTransform + apply_to: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.segmentation_target + - action.segmentation_target_mask + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.ego_view_bg_crop_pad_res256_freq20 + state_concat_order: + - state.left_arm + - state.right_arm + - state.left_hand + - state.right_hand + - state.waist + action_concat_order: + - action.segmentation_target + - action.segmentation_target_mask + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 + so100: + _target_: gr00t.data.transform.ComposedModalityTransform + transforms: + - _target_: gr00t.data.transform.VideoToTensor + apply_to: + - video.webcam + - _target_: gr00t.data.transform.VideoCrop + apply_to: + - video.webcam + scale: 0.95 + mode: random + - _target_: gr00t.data.transform.VideoResize + apply_to: + - video.webcam + height: 224 + width: 224 + interpolation: linear + - _target_: gr00t.data.transform.VideoColorJitter + apply_to: + - video.webcam + brightness: 0.3 + contrast: 0.4 + saturation: 0.5 + hue: 0.08 + - _target_: gr00t.data.transform.VideoToNumpy + apply_to: + - video.webcam + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + normalization_modes: + state.main_shoulder_pan: min_max + state.main_shoulder_lift: min_max + state.main_elbow_flex: min_max + state.main_wrist_flex: min_max + state.main_wrist_roll: min_max + state.main_gripper: min_max + - _target_: gr00t.data.transform.StateActionToTensor + apply_to: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + - _target_: gr00t.data.transform.StateActionTransform + apply_to: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + normalization_modes: + action.main_shoulder_pan: min_max + action.main_shoulder_lift: min_max + action.main_elbow_flex: min_max + action.main_wrist_flex: min_max + action.main_wrist_roll: min_max + action.main_gripper: min_max + - _target_: gr00t.data.transform.ConcatTransform + video_concat_order: + - video.webcam + state_concat_order: + - state.main_shoulder_pan + - state.main_shoulder_lift + - state.main_elbow_flex + - state.main_wrist_flex + - state.main_wrist_roll + - state.main_gripper + action_concat_order: + - action.main_shoulder_pan + - action.main_shoulder_lift + - action.main_elbow_flex + - action.main_wrist_flex + - action.main_wrist_roll + - action.main_gripper + - _target_: gr00t.model.transforms_idm.GR00TIDMTransform + default_instruction: Perform the default behavior. + num_visual_tokens_per_frame: 16 + max_num_images_per_sequence: 6 + max_action_dim: 32 + max_sequence_length: 112 + action_horizon: 16 + siglip_processor: + _target_: gr00t.model.action_head.siglip.SiglipProcessor.from_pretrained + _convert_: object + pretrained_model_name_or_path: google/siglip2-large-patch16-256 + embodiment_tag_mapping: + real_gr1_arms_only: 0 + real_gr1_arms_only_annotated: 1 + real_gr1_arms_waist: 2 + real_gr1_arms_waist_annotated: 3 + dexmg_gr1_arms_only_inspire: 4 + dexmg_gr1_arms_only_fourier: 5 + dexmg_gr1_arms_waist_fourier: 6 + robocasa_single_arm: 7 + onex_eve_gripper: 8 + robocasa_gr1_arms_only_inspire_hands: 9 + robocasa_gr1_arms_only_fourier_hands: 10 + robocasa_gr1_fixed_lower_body_inspire_hands: 11 + robocasa_gr1_fixed_lower_body_fourier_hands: 12 + robocasa_panda_omron: 13 + robocasa_bimanual_panda_parallel_gripper: 15 + robocasa_bimanual_panda_inspire_hand: 16 + oxe_droid: 17 + oxe_fractal: 18 + oxe_language_table: 19 + oxe_bridge: 20 + real_panda_single_arm: 21 + unknown: 22 + hot3d_hands_only: 23 + gr1_unified: 24 + robocasa_gr1_arms_waist_fourier_hands: 25 + lapa: 27 + oxe_mutex: 28 + oxe_roboset: 29 + oxe_plex: 30 + dream: 31 + gr1_unified_segmentation: 14 + so100: 26 +metadata_versions: + robocasa_gr1_arms_only_fourier_hands: '0217' + robocasa_gr1_fixed_lower_body_fourier_hands: '0217' + robocasa_bimanual_panda_parallel_gripper: '0217' + robocasa_bimanual_panda_inspire_hand: '0217' + robocasa_panda_omron: '0217' + gr1_unified: '0304' + oxe_droid: '0221' + oxe_fractal: '0221' + oxe_language_table: '0221' + oxe_bridge: '0221' + robocasa_gr1_arms_waist_fourier_hands: '0225' + hot3d_hands_only: '0220' + agibot: '0306' + oxe_mutex: '0303' + oxe_plex: '0303' + oxe_roboset: '0303' + lapa: '0305' + dream: '0308' + gr1_unified_segmentation: '0309' + so100: '0427' +max_state_dim: 44 +data_root1: /mnt/amlfs-01/home/seonghyeony/data/0418 +mixture_dataset_cls: gr00t.data.dataset.lerobot_sharded.ShardedLeRobotMixtureDataset.from_mixture_spec +single_dataset_cls: gr00t.data.dataset.lerobot_sharded.ShardedLeRobotSingleDataset +gr00t_commit_hash: 028a33876f359d1aea814e5eb4d4198a9bb9c60c +total_training_steps: 655360000000 diff --git a/experiment_cfg/metadata.json b/experiment_cfg/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..982062ce3e10bece32bd307d9a204058a347949f --- /dev/null +++ b/experiment_cfg/metadata.json @@ -0,0 +1,380 @@ +{ + "so100": { + "dataset_name": "so100:FullSet, so100:FullSet", + "dataset_statistics": { + "state": { + "main_shoulder_pan": { + "max": [ + 206.19140625 + ], + "min": [ + 38.056640625 + ], + "mean": [ + 113.23931884765625 + ], + "std": [ + 36.5380744934082 + ], + "q01": [ + 59.0625 + ], + "q99": [ + 194.4140625 + ] + }, + "main_shoulder_lift": { + "max": [ + 192.48046875 + ], + "min": [ + 35.947265625 + ], + "mean": [ + 114.70135498046875 + ], + "std": [ + 34.050819396972656 + ], + "q01": [ + 50.09765625 + ], + "q99": [ + 188.525390625 + ] + }, + "main_elbow_flex": { + "max": [ + 173.583984375 + ], + "min": [ + 22.412109375 + ], + "mean": [ + 109.06269073486328 + ], + "std": [ + 24.762542724609375 + ], + "q01": [ + 43.857421875 + ], + "q99": [ + 164.794921875 + ] + }, + "main_wrist_flex": { + "max": [ + 112.67578125 + ], + "min": [ + -5.185546875 + ], + "mean": [ + 73.29328155517578 + ], + "std": [ + 22.162004470825195 + ], + "q01": [ + 22.412109375 + ], + "q99": [ + 111.005859375 + ] + }, + "main_wrist_roll": { + "max": [ + 61.435546875 + ], + "min": [ + -165.5859375 + ], + "mean": [ + -42.29624557495117 + ], + "std": [ + 37.70439529418945 + ], + "q01": [ + -134.384765625 + ], + "q99": [ + 11.337890625 + ] + }, + "main_gripper": { + "max": [ + 57.46003723144531 + ], + "min": [ + -3.5523979663848877 + ], + "mean": [ + 16.143075942993164 + ], + "std": [ + 13.105600357055664 + ], + "q01": [ + -1.509769082069397 + ], + "q99": [ + 47.51332092285156 + ] + } + }, + "action": { + "main_shoulder_pan": { + "max": [ + 207.24609375 + ], + "min": [ + 38.84765625 + ], + "mean": [ + 113.70831298828125 + ], + "std": [ + 36.58461380004883 + ], + "q01": [ + 59.677734375 + ], + "q99": [ + 195.029296875 + ] + }, + "main_shoulder_lift": { + "max": [ + 194.58984375 + ], + "min": [ + 37.705078125 + ], + "mean": [ + 115.85425567626953 + ], + "std": [ + 33.65233612060547 + ], + "q01": [ + 51.6796875 + ], + "q99": [ + 189.31640625 + ] + }, + "main_elbow_flex": { + "max": [ + 174.990234375 + ], + "min": [ + 20.91796875 + ], + "mean": [ + 108.63793182373047 + ], + "std": [ + 25.02613067626953 + ], + "q01": [ + 42.451171875 + ], + "q99": [ + 164.794921875 + ] + }, + "main_wrist_flex": { + "max": [ + 115.927734375 + ], + "min": [ + -7.20703125 + ], + "mean": [ + 73.76486206054688 + ], + "std": [ + 22.343610763549805 + ], + "q01": [ + 22.236328125 + ], + "q99": [ + 111.97265625 + ] + }, + "main_wrist_roll": { + "max": [ + 62.75390625 + ], + "min": [ + -166.9921875 + ], + "mean": [ + -42.69032287597656 + ], + "std": [ + 37.90693283081055 + ], + "q01": [ + -134.912109375 + ], + "q99": [ + 11.689453125 + ] + }, + "main_gripper": { + "max": [ + 58.67697525024414 + ], + "min": [ + -7.30240535736084 + ], + "mean": [ + 16.021728515625 + ], + "std": [ + 14.06087589263916 + ], + "q01": [ + -3.6941580772399902 + ], + "q99": [ + 48.539520263671875 + ] + } + }, + "total_trajectory_length": 171312, + "num_trajectories": 180 + }, + "modalities": { + "video": { + "webcam": { + "resolution": [ + 256, + 256 + ], + "channels": 3, + "fps": 30.0 + } + }, + "state": { + "main_shoulder_pan": { + "absolute": true, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_shoulder_lift": { + "absolute": true, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_elbow_flex": { + "absolute": true, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_wrist_flex": { + "absolute": true, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_wrist_roll": { + "absolute": true, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_gripper": { + "absolute": true, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + } + }, + "action": { + "main_shoulder_pan": { + "absolute": false, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_shoulder_lift": { + "absolute": false, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_elbow_flex": { + "absolute": false, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_wrist_flex": { + "absolute": false, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_wrist_roll": { + "absolute": false, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + }, + "main_gripper": { + "absolute": false, + "rotation_type": null, + "shape": [ + 1 + ], + "continuous": true + } + }, + "annotation": { + "human": [ + "task_description" + ] + } + }, + "embodiment": { + "robot_name": "SO100", + "robot_type": "SO100", + "record_frequency": 20.0, + "body_controller_frequency": null, + "hand_controller_frequency": null, + "embodiment_tag": "so100" + }, + "processing": null, + "version": null + } +} \ No newline at end of file diff --git a/global_step30000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd330fc2163517c594e8b12af579a95c5283907c --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9181e10ad00d8276163d7c8a51486643de946b1b71a73eafc5b9850145c65791 +size 229581296 diff --git a/global_step30000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9bdd668173e558efce061d2b3a7a3ef558a4b28 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74333d8cbe53050c5422c60fc6070abc7c2930065c1a6535c097eee1d6aa4ee0 +size 229580988 diff --git a/global_step30000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a80f50df15683d3fcd6aaa305dfe24416e4b3b0 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd359371e60c15fde3a7a14bcb39f1d0b2f4b64e445082dee148281a23317e75 +size 229581116 diff --git a/global_step30000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddecf12ad8021895ec9c00052bc5e36a5ce578d2 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f426e75219ce7457c090f91e7770250f92edefbfaefacf01de3aa75111ecda50 +size 229580988 diff --git a/global_step30000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95437fbac4fd5d0845731a6951bc5c1b61092931 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:566c52f64c3568484c05f878fcd6b68bd6861809199192d22a130db05b92219a +size 229580988 diff --git a/global_step30000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2575380ee35eb4eb6508af84e42b8c9c61aa6f59 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8e4e43deb536a55ab186741344d13a2c5c00e60dcbe80cc2ab66cc7cdf15b8 +size 229580988 diff --git a/global_step30000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..044d20c6657cfaf7fcc54215b972c2b83e3b2b08 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e09c7d63f33ae9707d95e4f2a28076d6c73b7e01dab4b946ff17e0b41e90f4c +size 229580924 diff --git a/global_step30000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d553be0d956534f19ec2d9090e9bd455180ca7b --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac18718e34e1fdab190b71ecb008e01bda99466ccddc7c9c0344d76ac87285e +size 229580796 diff --git a/global_step30000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..715eaf6cfa3f1db414299222032b4f7f3a8195e1 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90838224ef6745bbdfa2fd400830ef02c3e5b605ef12fcf92b12dbaff0409db2 +size 229581116 diff --git a/global_step30000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bbad42252830f1ef89c96bcffe0b3ecbb9081c0 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deaa2bb081e4ec908e4079be56fe6516f9589206e95a24f913a1091832f1fc68 +size 229580732 diff --git a/global_step30000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3ce7ed64858388aad42be367f418df53e8539f9 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71051466b8c5be0af477ded13c194058eda339123d5527c98e41c299b012781d +size 229580796 diff --git a/global_step30000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bd107e4c5b9c5008bff3efe71a73d990db75dac --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34d6fd0ef9047c90ac8aacf2a2071cdee14ed0dc7a65d7cc2f82d71b40e93c3b +size 229580848 diff --git a/global_step30000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8210b83a766b3a95f6e1759cd32cb96d811d6fcd --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb9b9a3e7375b0653eba214154536897c43d10a1a438a28596034cc009fc689 +size 229580860 diff --git a/global_step30000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..69c7753da5759cc9a1c7ad2fc493cabbb52f4880 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7773e50a1363635fc9198dd687013c501e1b1bdc64bb4c912770225bd694b018 +size 229580540 diff --git a/global_step30000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..daa387bc23a785d7b697edb63f8657eab5222f7b --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ee575e1738dff20f87da5bb4ff434cc17e677da032a6a1a4f60077b2da6c2a +size 229580540 diff --git a/global_step30000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..27db9c2d5f5a78f5794e10a5f47ce82ae01a1653 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4208f416aa8a128c3ea06568d55c10fcc1b2cce81db301b7cebd49f5693890af +size 229580604 diff --git a/global_step30000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f05eb76afcdc021d50b8050ac426071a7bb8edc1 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37bff13bbd6d18dffb06bd8368c1466686a9c4d1c5b07629f5ac04a8e1437bb9 +size 229580028 diff --git a/global_step30000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ee3e326048e6e5b038f59443a0816f8ec36c05 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6c71b8b260c84124ad46e5e4ab5e2fa19f25e21cbfaa0c319bd9a68c6ec3e3 +size 229579516 diff --git a/global_step30000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf76329f7d2ebbcc2ba61ff9c9df6bbb982949e4 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f8c7e82cff4a9648b31bf64c0f0cedb3f6ab4bfe05c0b993c0bc9d2f513888 +size 229579580 diff --git a/global_step30000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a213c81e9f135d7110c485c1430599df6b1404e --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4cac52bce6b95d857c36b5d1972e250ad92f739110cd480f4918a23933b580 +size 229580092 diff --git a/global_step30000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2df83fd73eb9edee10581815d253989575b6fdf1 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47187fc51a55012745952071e33c6a48d8faa7ae6cb8c0d257aab9b99ce04169 +size 229580028 diff --git a/global_step30000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..24fe86144adfc419d9f7f73dbb460d15173c5810 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9641af7888d7d0ab9cb05e31bac1c6ab661cc3593d060926d5d09f1ae4680ba +size 229580092 diff --git a/global_step30000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fef1abe489a6b071cd11df8a37c28993c30a8c3 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e26ac0e19e0306f3484a91a4c68a35c535da82c04a4532b9e3d1e75c07092e +size 229581168 diff --git a/global_step30000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..288066267434abed84e97f868ff842696c7c3ea0 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29859949d81aee755cbaeaac26b69a9793a8076592a7fb7fa7ec00c06560665c +size 229579580 diff --git a/global_step30000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5430fbb5176f1cf2a2f42ac00767d41baf7a684c --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e226e49f9dcf3a705c60ce1fab366249be5f0a4a73995994321087eb599500e +size 229579836 diff --git a/global_step30000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e66d4c25a56d1acbbdb7deb4198ec0dded10169 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5888ac9c37b9393286576b65ac444b4bdb4f3d010b6814b78d2a8ea3b1820e69 +size 229580848 diff --git a/global_step30000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..101fe5106935fe3b1e48418ebbaea359b5b94bda --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3daa6dbf7c78fa6e2ec727234bc0e757008cd7f9ea6df76132badda1cae7d5b2 +size 229581168 diff --git a/global_step30000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bac74e3a6559bffda0aeb2a92c9460c4d0305b21 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a7833991b36b09573305bab0286f77ccfa67c17012ef71ae76fe2371550e56 +size 229580720 diff --git a/global_step30000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..764f471f4249609a88519b652047f63e6226f10c --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97a2242e8fbb5cc0e0276df68b8222c806ce634640234d2147f476d670c0f60a +size 229581104 diff --git a/global_step30000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5011514dce7b3aab86f2f9d495a8d831229729a3 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096ffa693e550d8c9455a956fd2c8f7a352fe4548180c4dc5e2ef91d6bdd3a0d +size 229580848 diff --git a/global_step30000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d54a9f3b224412b9027bef6b581edd630fafc36d --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8628445b4a756444f36002958eec3cc5ae9ca115a839957990015ca10a1fc221 +size 229581104 diff --git a/global_step30000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/global_step30000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ee20e1ced8fdd5bc5ecf9f661fcdc69af32ef76 --- /dev/null +++ b/global_step30000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5d79a8ff349fbfeafebd9d3ef8ec3d95c409b5a88ca87f1f42d2de53b548c50 +size 229581104 diff --git a/global_step30000/mp_rank_00_model_states.pt b/global_step30000/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..23bf62a24c0401f0039c0d300e7e7de4b1be1f68 --- /dev/null +++ b/global_step30000/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103fee7f19a0f062f8dbbe44bdb89fc246e8da0a2df2fa83980bb6a805c54ce8 +size 1325387042 diff --git a/latest b/latest new file mode 100644 index 0000000000000000000000000000000000000000..54b8e15159f25e49e4fc5dc30ed5ee02c6e47403 --- /dev/null +++ b/latest @@ -0,0 +1 @@ +global_step30000 \ No newline at end of file diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89aab658cef1bdb7444e36018c99ee98cac66caf --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6463c645b31948e35674ff9f21988d44cdfa101c2db2127333fdd4ef0f86ac02 +size 1274863076 diff --git a/rng_state_0.pth b/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e92e22627d2b573c82bc103da4f12855f0987b7 --- /dev/null +++ b/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b28a72a65afefc2f2890e8522c9d3f32ffd886bc4c9ed5cc1d638fd494cfca1 +size 14256 diff --git a/rng_state_1.pth b/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..361fee3aaa1d3c12920c26cc8994bbc37eabd1e7 --- /dev/null +++ b/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0212a636fb4674c4997436e4cfd3e1c47c36fdbfd83d64e408cda2d39fafaed1 +size 14256 diff --git a/rng_state_10.pth b/rng_state_10.pth new file mode 100644 index 0000000000000000000000000000000000000000..42befe2d76468b90b999b6e3b239d6b7210d53ed --- /dev/null +++ b/rng_state_10.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98927eb7e95395014a34610680e3fccd68a4bc49d8726efa96dde589ce4428fa +size 14262 diff --git a/rng_state_11.pth b/rng_state_11.pth new file mode 100644 index 0000000000000000000000000000000000000000..3dae2b0805d24b5a17ced2547b1d4378d4a51221 --- /dev/null +++ b/rng_state_11.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f43c649d7128e343831a2e7d878cd77c4830ce2d76a87fdfa9dc234634b4461 +size 14262 diff --git a/rng_state_12.pth b/rng_state_12.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b96952c7f0bb04f9d38f449e0c4db0b6eec2bc8 --- /dev/null +++ b/rng_state_12.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0bb6a466e036ba8e9618ce8a6b31d7336438f510174dc24f219d7c2297fd14 +size 14262 diff --git a/rng_state_13.pth b/rng_state_13.pth new file mode 100644 index 0000000000000000000000000000000000000000..357591cec5e9097820b75806bc94e412e09e02db --- /dev/null +++ b/rng_state_13.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c8aef82dcd1eec84ceaa101997d4a74e2bc481dacb3ff3864f9df5cef1a457 +size 14262 diff --git a/rng_state_14.pth b/rng_state_14.pth new file mode 100644 index 0000000000000000000000000000000000000000..853f6d9f5290626c236781808716cf5bdfdfc481 --- /dev/null +++ b/rng_state_14.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9df84f40a4bc10da09171110ce8b6b2e986b506cc4482d4f5443c01b42d6047 +size 14262 diff --git a/rng_state_15.pth b/rng_state_15.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a8db3a30a8d4f14128cb41d22b9d430b4bf09d2 --- /dev/null +++ b/rng_state_15.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a671ea24c19f21f68b90683baecab90674c594e04d70b170a8a02eede4922152 +size 14262 diff --git a/rng_state_16.pth b/rng_state_16.pth new file mode 100644 index 0000000000000000000000000000000000000000..edb8b9649a630f527c94950c32aea83844239fcf --- /dev/null +++ b/rng_state_16.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf956ef96f04f3055dac1278b10d30752de09a648be7ca5096d444e56944b07 +size 14262 diff --git a/rng_state_17.pth b/rng_state_17.pth new file mode 100644 index 0000000000000000000000000000000000000000..945f3d40b2617d3cf6be332dd30f3bce32785cff --- /dev/null +++ b/rng_state_17.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20803d617c3f7a95c6b6a68cdeb637fe666559212e8ff96884d8202541844de3 +size 14262 diff --git a/rng_state_18.pth b/rng_state_18.pth new file mode 100644 index 0000000000000000000000000000000000000000..61c71b3d3d90623847dffde6a7dc67812509d745 --- /dev/null +++ b/rng_state_18.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0b2570b68811c9b6b4a69de0085c1f2089ce8cc19f8418fb93a8ceb04f1831 +size 14262 diff --git a/rng_state_19.pth b/rng_state_19.pth new file mode 100644 index 0000000000000000000000000000000000000000..92e5f8960b53f9d52343ef07e5dfed48defa1bbd --- /dev/null +++ b/rng_state_19.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c10313639e63cb2a5cc024b08d743776dd3135a453dd701570a52a1f86c5f9e +size 14262 diff --git a/rng_state_2.pth b/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..e27677cc3c76b2481ea7722102e141accb4f3e5f --- /dev/null +++ b/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184682e344a044cf23a64c1e8fbedc4d64f07112ea46a42f126e84020143aee3 +size 14256 diff --git a/rng_state_20.pth b/rng_state_20.pth new file mode 100644 index 0000000000000000000000000000000000000000..726d739be81a925bfac1f7fd9c9bd99d5135ca1a --- /dev/null +++ b/rng_state_20.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5e356d498e66c3a75a69dec8585521cad3ebf8a9e1e2bbc529f2536ccdc8578 +size 14262 diff --git a/rng_state_21.pth b/rng_state_21.pth new file mode 100644 index 0000000000000000000000000000000000000000..769b879facac3ccf9b47e28a3e537b87cb991fad --- /dev/null +++ b/rng_state_21.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4f3bd3a45afa91b57d8b38e761fc2af4781f1cfbb84dd58070f195878d7f2a +size 14262 diff --git a/rng_state_22.pth b/rng_state_22.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f9de2ce52408f4c554d5e908fe6d76a27034d34 --- /dev/null +++ b/rng_state_22.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d951a9fcb94d84e514d250962e48b1aed9ec4d252a30ca51fcd80c7ad45f4b +size 14262 diff --git a/rng_state_23.pth b/rng_state_23.pth new file mode 100644 index 0000000000000000000000000000000000000000..894981fdd1321d219d6b5616d1827acbe77be208 --- /dev/null +++ b/rng_state_23.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9039d36e1060654c493a5bd62d1950d1fbd340c05287397111ca8af8384fd0fb +size 14262 diff --git a/rng_state_24.pth b/rng_state_24.pth new file mode 100644 index 0000000000000000000000000000000000000000..38d84e4632212dae813e1b33cb44642d4ad259dd --- /dev/null +++ b/rng_state_24.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e472a73fc69244b1710e6beb4f794838c0aa95a36951b6a70799d7f6054d18a2 +size 14262 diff --git a/rng_state_25.pth b/rng_state_25.pth new file mode 100644 index 0000000000000000000000000000000000000000..163f65ce67b3fe95ac413af1cfb20508436e16ec --- /dev/null +++ b/rng_state_25.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b89b6f2c03c1c41cbb2cfe3e7fced7977032ba31ea5e2bd8064d33a7c1a2863 +size 14262 diff --git a/rng_state_26.pth b/rng_state_26.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c8ec737387c73b5f2d424338c7e2e684c7e66ab --- /dev/null +++ b/rng_state_26.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d544211dc265edfb0eda3989a76a24c96d55ebf31cc1fc4dab60b7dd324f797 +size 14262 diff --git a/rng_state_27.pth b/rng_state_27.pth new file mode 100644 index 0000000000000000000000000000000000000000..62df567408aaddc6b87855437d72c00a0cc198df --- /dev/null +++ b/rng_state_27.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96fd30cd056f6db2ea3fed1dc23960cdb5618f4a2fc171b0ab9c5505aa667fdd +size 14262 diff --git a/rng_state_28.pth b/rng_state_28.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd63f84b47c8a4df0efc8caced1be1ba4ebbce77 --- /dev/null +++ b/rng_state_28.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0440a01bc36df4d2ab4182398719bde6d3cf058d68a5f7db8500a86741d1511c +size 14262 diff --git a/rng_state_29.pth b/rng_state_29.pth new file mode 100644 index 0000000000000000000000000000000000000000..61c2ee173ecd2fc1438868078298273b13be3d5d --- /dev/null +++ b/rng_state_29.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b0af7a0ae1d0666e33011c82330ca6bba7d2e2d148d4e2fc64eaa0c49eb5517 +size 14262 diff --git a/rng_state_3.pth b/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e760de6554bc5c639c041056f52dc713866a9f3 --- /dev/null +++ b/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9886973b410ff3f231a02a5921fea2496ac2f7cc534dffd3b637ce0ad5f3c167 +size 14256 diff --git a/rng_state_30.pth b/rng_state_30.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf6667f7c14dd3d4281581aadf85d5d0295f1250 --- /dev/null +++ b/rng_state_30.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179a2c74790b7bb6f9995c7f37a3967b0897eea13776b2362416f662803afe3b +size 14262 diff --git a/rng_state_31.pth b/rng_state_31.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ee4162411d0e8a0c1f8197a3465b8dd1042ef7f --- /dev/null +++ b/rng_state_31.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a397854a9d643d4a9f1440a0679e361b4725ceb778d2ece5379acc148d53ba2 +size 14262 diff --git a/rng_state_4.pth b/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..79a799c51ee18663c85051c90b6e6aca59a1c97f --- /dev/null +++ b/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0643be67d2d0e161d8b1bc193275466bc4b957fc47c6c3c2d6eaf2b7f6e9ef7a +size 14256 diff --git a/rng_state_5.pth b/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..364032a411899b2e0d23c909ba521738a574ae97 --- /dev/null +++ b/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ea40acf6f2644b35f964707c4e5093e12476e5b77cc6f9e3a2c397922be9bc +size 14256 diff --git a/rng_state_6.pth b/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..232c3d6afaf5da1f56cc6086e81d5367b10fe153 --- /dev/null +++ b/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b7f74686e4825295dd9c71c52340b105ff0e01edd816f688251661aad24573 +size 14256 diff --git a/rng_state_7.pth b/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd3d2bfd25a73ff017ba88296622eef912396d71 --- /dev/null +++ b/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554fdf323fe7310f6241c3ca2f1b8d78239ee97dce5fbb392ca34e4794ee4bb3 +size 14256 diff --git a/rng_state_8.pth b/rng_state_8.pth new file mode 100644 index 0000000000000000000000000000000000000000..73de87620a995beb975fffc2d8640f88ccac0230 --- /dev/null +++ b/rng_state_8.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4561829b49aea1680041a388df19623137324290e632672bd43f9b7aa44626d1 +size 14256 diff --git a/rng_state_9.pth b/rng_state_9.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ab8d5102e116db2be6249da463d4804f3a86d6d --- /dev/null +++ b/rng_state_9.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29e987832867a985ed99d0e2502c418794db8f4d5f3830bc1e3cb939639fe64 +size 14256 diff --git a/scheduler.pt b/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f29cb8b576c4bbf1197dac9a1a68fdfed241bf6 --- /dev/null +++ b/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8c620e9a7808e10f2bc64fa81fe5b1efb6dcc2acb3596ed32f58119a5368c1 +size 1064 diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2903ab5a981ff963def5cfe016107bfde5dd0a03 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,321033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.57763671875e-05, + "eval_steps": 500, + "global_step": 30000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "model_forward_time": 1.0319106578826904, + "step": 0 + }, + { + "epoch": 0, + "step": 0, + "training_step_time": 1.5901906490325928 + }, + { + "epoch": 1.52587890625e-09, + "model_forward_time": 0.028571128845214844, + "step": 1 + }, + { + "epoch": 1.52587890625e-09, + "step": 1, + "training_step_time": 0.22123479843139648 + }, + { + "epoch": 3.0517578125e-09, + "model_forward_time": 0.02512216567993164, + "step": 2 + }, + { + "epoch": 3.0517578125e-09, + "step": 2, + "training_step_time": 0.11980462074279785 + }, + { + "epoch": 4.57763671875e-09, + "model_forward_time": 0.02453923225402832, + "step": 3 + }, + { + "epoch": 4.57763671875e-09, + "step": 3, + "training_step_time": 0.12976622581481934 + }, + { + "epoch": 6.103515625e-09, + "model_forward_time": 0.02606821060180664, + "step": 4 + }, + { + "epoch": 6.103515625e-09, + "step": 4, + "training_step_time": 0.10907411575317383 + }, + { + "epoch": 7.62939453125e-09, + "model_forward_time": 0.026157140731811523, + "step": 5 + }, + { + "epoch": 7.62939453125e-09, + "step": 5, + "training_step_time": 0.11301732063293457 + }, + { + "epoch": 9.1552734375e-09, + "model_forward_time": 0.02605152130126953, + "step": 6 + }, + { + "epoch": 9.1552734375e-09, + "step": 6, + "training_step_time": 0.12163853645324707 + }, + { + "epoch": 1.068115234375e-08, + "model_forward_time": 0.025647640228271484, + "step": 7 + }, + { + "epoch": 1.068115234375e-08, + "step": 7, + "training_step_time": 0.11937880516052246 + }, + { + "epoch": 1.220703125e-08, + "model_forward_time": 0.02511310577392578, + "step": 8 + }, + { + "epoch": 1.220703125e-08, + "step": 8, + "training_step_time": 0.11348962783813477 + }, + { + "epoch": 1.373291015625e-08, + "model_forward_time": 0.025819778442382812, + "step": 9 + }, + { + "epoch": 1.373291015625e-08, + "step": 9, + "training_step_time": 0.11962628364562988 + }, + { + "epoch": 1.52587890625e-08, + "grad_norm": 6.067507266998291, + "learning_rate": 6.666666666666667e-07, + "loss": 1.2252, + "step": 10 + }, + { + "epoch": 1.52587890625e-08, + "model_forward_time": 0.028791189193725586, + "step": 10 + }, + { + "epoch": 1.52587890625e-08, + "step": 10, + "training_step_time": 0.12380552291870117 + }, + { + "epoch": 1.678466796875e-08, + "model_forward_time": 0.025547266006469727, + "step": 11 + }, + { + "epoch": 1.678466796875e-08, + "step": 11, + "training_step_time": 0.11302661895751953 + }, + { + "epoch": 1.8310546875e-08, + "model_forward_time": 0.026027679443359375, + "step": 12 + }, + { + "epoch": 1.8310546875e-08, + "step": 12, + "training_step_time": 0.11699748039245605 + }, + { + "epoch": 1.983642578125e-08, + "model_forward_time": 0.025533676147460938, + "step": 13 + }, + { + "epoch": 1.983642578125e-08, + "step": 13, + "training_step_time": 0.10940051078796387 + }, + { + "epoch": 2.13623046875e-08, + "model_forward_time": 0.025338411331176758, + "step": 14 + }, + { + "epoch": 2.13623046875e-08, + "step": 14, + "training_step_time": 0.16063308715820312 + }, + { + "epoch": 2.288818359375e-08, + "model_forward_time": 0.024246692657470703, + "step": 15 + }, + { + "epoch": 2.288818359375e-08, + "step": 15, + "training_step_time": 0.14876723289489746 + }, + { + "epoch": 2.44140625e-08, + "model_forward_time": 0.024797916412353516, + "step": 16 + }, + { + "epoch": 2.44140625e-08, + "step": 16, + "training_step_time": 0.16344356536865234 + }, + { + "epoch": 2.593994140625e-08, + "model_forward_time": 0.024475812911987305, + "step": 17 + }, + { + "epoch": 2.593994140625e-08, + "step": 17, + "training_step_time": 0.17724943161010742 + }, + { + "epoch": 2.74658203125e-08, + "model_forward_time": 0.02528214454650879, + "step": 18 + }, + { + "epoch": 2.74658203125e-08, + "step": 18, + "training_step_time": 0.15558290481567383 + }, + { + "epoch": 2.899169921875e-08, + "model_forward_time": 0.02420949935913086, + "step": 19 + }, + { + "epoch": 2.899169921875e-08, + "step": 19, + "training_step_time": 0.15831494331359863 + }, + { + "epoch": 3.0517578125e-08, + "grad_norm": 4.578792572021484, + "learning_rate": 1.3333333333333334e-06, + "loss": 1.2077, + "step": 20 + }, + { + "epoch": 3.0517578125e-08, + "model_forward_time": 0.024424314498901367, + "step": 20 + }, + { + "epoch": 3.0517578125e-08, + "step": 20, + "training_step_time": 0.10786700248718262 + }, + { + "epoch": 3.204345703125e-08, + "model_forward_time": 0.02824997901916504, + "step": 21 + }, + { + "epoch": 3.204345703125e-08, + "step": 21, + "training_step_time": 0.1069478988647461 + }, + { + "epoch": 3.35693359375e-08, + "model_forward_time": 0.02910923957824707, + "step": 22 + }, + { + "epoch": 3.35693359375e-08, + "step": 22, + "training_step_time": 0.11163830757141113 + }, + { + "epoch": 3.509521484375e-08, + "model_forward_time": 0.029635906219482422, + "step": 23 + }, + { + "epoch": 3.509521484375e-08, + "step": 23, + "training_step_time": 0.11366629600524902 + }, + { + "epoch": 3.662109375e-08, + "model_forward_time": 0.02586197853088379, + "step": 24 + }, + { + "epoch": 3.662109375e-08, + "step": 24, + "training_step_time": 0.10776495933532715 + }, + { + "epoch": 3.814697265625e-08, + "model_forward_time": 0.02928924560546875, + "step": 25 + }, + { + "epoch": 3.814697265625e-08, + "step": 25, + "training_step_time": 0.11644697189331055 + }, + { + "epoch": 3.96728515625e-08, + "model_forward_time": 0.02590775489807129, + "step": 26 + }, + { + "epoch": 3.96728515625e-08, + "step": 26, + "training_step_time": 0.11479544639587402 + }, + { + "epoch": 4.119873046875e-08, + "model_forward_time": 0.026750564575195312, + "step": 27 + }, + { + "epoch": 4.119873046875e-08, + "step": 27, + "training_step_time": 0.151716947555542 + }, + { + "epoch": 4.2724609375e-08, + "model_forward_time": 0.025519847869873047, + "step": 28 + }, + { + "epoch": 4.2724609375e-08, + "step": 28, + "training_step_time": 0.11794662475585938 + }, + { + "epoch": 4.425048828125e-08, + "model_forward_time": 0.02509760856628418, + "step": 29 + }, + { + "epoch": 4.425048828125e-08, + "step": 29, + "training_step_time": 0.10537981986999512 + }, + { + "epoch": 4.57763671875e-08, + "grad_norm": 3.404512882232666, + "learning_rate": 2.0000000000000003e-06, + "loss": 1.1347, + "step": 30 + }, + { + "epoch": 4.57763671875e-08, + "model_forward_time": 0.026080846786499023, + "step": 30 + }, + { + "epoch": 4.57763671875e-08, + "step": 30, + "training_step_time": 0.10661101341247559 + }, + { + "epoch": 4.730224609375e-08, + "model_forward_time": 0.029267549514770508, + "step": 31 + }, + { + "epoch": 4.730224609375e-08, + "step": 31, + "training_step_time": 0.11377143859863281 + }, + { + "epoch": 4.8828125e-08, + "model_forward_time": 0.02588963508605957, + "step": 32 + }, + { + "epoch": 4.8828125e-08, + "step": 32, + "training_step_time": 0.16434931755065918 + }, + { + "epoch": 5.035400390625e-08, + "model_forward_time": 0.025550127029418945, + "step": 33 + }, + { + "epoch": 5.035400390625e-08, + "step": 33, + "training_step_time": 0.13167810440063477 + }, + { + "epoch": 5.18798828125e-08, + "model_forward_time": 0.02539205551147461, + "step": 34 + }, + { + "epoch": 5.18798828125e-08, + "step": 34, + "training_step_time": 0.11022114753723145 + }, + { + "epoch": 5.340576171875e-08, + "model_forward_time": 0.026363134384155273, + "step": 35 + }, + { + "epoch": 5.340576171875e-08, + "step": 35, + "training_step_time": 0.11951398849487305 + }, + { + "epoch": 5.4931640625e-08, + "model_forward_time": 0.02743220329284668, + "step": 36 + }, + { + "epoch": 5.4931640625e-08, + "step": 36, + "training_step_time": 0.1782093048095703 + }, + { + "epoch": 5.645751953125e-08, + "model_forward_time": 0.025634288787841797, + "step": 37 + }, + { + "epoch": 5.645751953125e-08, + "step": 37, + "training_step_time": 0.1223607063293457 + }, + { + "epoch": 5.79833984375e-08, + "model_forward_time": 0.027596235275268555, + "step": 38 + }, + { + "epoch": 5.79833984375e-08, + "step": 38, + "training_step_time": 0.10950684547424316 + }, + { + "epoch": 5.950927734375e-08, + "model_forward_time": 0.02748847007751465, + "step": 39 + }, + { + "epoch": 5.950927734375e-08, + "step": 39, + "training_step_time": 0.1121678352355957 + }, + { + "epoch": 6.103515625e-08, + "grad_norm": 3.3540265560150146, + "learning_rate": 2.666666666666667e-06, + "loss": 1.1275, + "step": 40 + }, + { + "epoch": 6.103515625e-08, + "model_forward_time": 0.02660655975341797, + "step": 40 + }, + { + "epoch": 6.103515625e-08, + "step": 40, + "training_step_time": 0.1085958480834961 + }, + { + "epoch": 6.256103515625e-08, + "model_forward_time": 0.026361942291259766, + "step": 41 + }, + { + "epoch": 6.256103515625e-08, + "step": 41, + "training_step_time": 0.10995984077453613 + }, + { + "epoch": 6.40869140625e-08, + "model_forward_time": 0.025968313217163086, + "step": 42 + }, + { + "epoch": 6.40869140625e-08, + "step": 42, + "training_step_time": 0.11075258255004883 + }, + { + "epoch": 6.561279296875e-08, + "model_forward_time": 0.025856733322143555, + "step": 43 + }, + { + "epoch": 6.561279296875e-08, + "step": 43, + "training_step_time": 0.10966372489929199 + }, + { + "epoch": 6.7138671875e-08, + "model_forward_time": 0.02678656578063965, + "step": 44 + }, + { + "epoch": 6.7138671875e-08, + "step": 44, + "training_step_time": 0.10839462280273438 + }, + { + "epoch": 6.866455078125e-08, + "model_forward_time": 0.026245832443237305, + "step": 45 + }, + { + "epoch": 6.866455078125e-08, + "step": 45, + "training_step_time": 0.10392260551452637 + }, + { + "epoch": 7.01904296875e-08, + "model_forward_time": 0.025958776473999023, + "step": 46 + }, + { + "epoch": 7.01904296875e-08, + "step": 46, + "training_step_time": 0.10898923873901367 + }, + { + "epoch": 7.171630859375e-08, + "model_forward_time": 0.02625560760498047, + "step": 47 + }, + { + "epoch": 7.171630859375e-08, + "step": 47, + "training_step_time": 0.10556769371032715 + }, + { + "epoch": 7.32421875e-08, + "model_forward_time": 0.026059389114379883, + "step": 48 + }, + { + "epoch": 7.32421875e-08, + "step": 48, + "training_step_time": 0.10772347450256348 + }, + { + "epoch": 7.476806640625e-08, + "model_forward_time": 0.026355743408203125, + "step": 49 + }, + { + "epoch": 7.476806640625e-08, + "step": 49, + "training_step_time": 0.10431146621704102 + }, + { + "epoch": 7.62939453125e-08, + "grad_norm": 5.17725133895874, + "learning_rate": 3.3333333333333333e-06, + "loss": 1.1185, + "step": 50 + }, + { + "epoch": 7.62939453125e-08, + "model_forward_time": 0.0267946720123291, + "step": 50 + }, + { + "epoch": 7.62939453125e-08, + "step": 50, + "training_step_time": 0.10547614097595215 + }, + { + "epoch": 7.781982421875e-08, + "model_forward_time": 0.026325225830078125, + "step": 51 + }, + { + "epoch": 7.781982421875e-08, + "step": 51, + "training_step_time": 0.10610151290893555 + }, + { + "epoch": 7.9345703125e-08, + "model_forward_time": 0.02591419219970703, + "step": 52 + }, + { + "epoch": 7.9345703125e-08, + "step": 52, + "training_step_time": 0.10524606704711914 + }, + { + "epoch": 8.087158203125e-08, + "model_forward_time": 0.025983810424804688, + "step": 53 + }, + { + "epoch": 8.087158203125e-08, + "step": 53, + "training_step_time": 0.10518741607666016 + }, + { + "epoch": 8.23974609375e-08, + "model_forward_time": 0.026117563247680664, + "step": 54 + }, + { + "epoch": 8.23974609375e-08, + "step": 54, + "training_step_time": 0.10506796836853027 + }, + { + "epoch": 8.392333984375e-08, + "model_forward_time": 0.027858734130859375, + "step": 55 + }, + { + "epoch": 8.392333984375e-08, + "step": 55, + "training_step_time": 0.1100766658782959 + }, + { + "epoch": 8.544921875e-08, + "model_forward_time": 0.02553868293762207, + "step": 56 + }, + { + "epoch": 8.544921875e-08, + "step": 56, + "training_step_time": 0.10886931419372559 + }, + { + "epoch": 8.697509765625e-08, + "model_forward_time": 0.025895118713378906, + "step": 57 + }, + { + "epoch": 8.697509765625e-08, + "step": 57, + "training_step_time": 0.1085052490234375 + }, + { + "epoch": 8.85009765625e-08, + "model_forward_time": 0.026247262954711914, + "step": 58 + }, + { + "epoch": 8.85009765625e-08, + "step": 58, + "training_step_time": 0.11170578002929688 + }, + { + "epoch": 9.002685546875e-08, + "model_forward_time": 0.02557516098022461, + "step": 59 + }, + { + "epoch": 9.002685546875e-08, + "step": 59, + "training_step_time": 0.10515999794006348 + }, + { + "epoch": 9.1552734375e-08, + "grad_norm": 2.941340923309326, + "learning_rate": 4.000000000000001e-06, + "loss": 1.1342, + "step": 60 + }, + { + "epoch": 9.1552734375e-08, + "model_forward_time": 0.027450084686279297, + "step": 60 + }, + { + "epoch": 9.1552734375e-08, + "step": 60, + "training_step_time": 0.11174392700195312 + }, + { + "epoch": 9.307861328125e-08, + "model_forward_time": 0.027616024017333984, + "step": 61 + }, + { + "epoch": 9.307861328125e-08, + "step": 61, + "training_step_time": 0.1144108772277832 + }, + { + "epoch": 9.46044921875e-08, + "model_forward_time": 0.026006698608398438, + "step": 62 + }, + { + "epoch": 9.46044921875e-08, + "step": 62, + "training_step_time": 0.1659221649169922 + }, + { + "epoch": 9.613037109375e-08, + "model_forward_time": 0.025553226470947266, + "step": 63 + }, + { + "epoch": 9.613037109375e-08, + "step": 63, + "training_step_time": 0.18537402153015137 + }, + { + "epoch": 9.765625e-08, + "model_forward_time": 0.025078296661376953, + "step": 64 + }, + { + "epoch": 9.765625e-08, + "step": 64, + "training_step_time": 0.17681241035461426 + }, + { + "epoch": 9.918212890625e-08, + "model_forward_time": 0.02532052993774414, + "step": 65 + }, + { + "epoch": 9.918212890625e-08, + "step": 65, + "training_step_time": 0.16931819915771484 + }, + { + "epoch": 1.007080078125e-07, + "model_forward_time": 0.02660965919494629, + "step": 66 + }, + { + "epoch": 1.007080078125e-07, + "step": 66, + "training_step_time": 0.12088370323181152 + }, + { + "epoch": 1.0223388671875e-07, + "model_forward_time": 0.02500319480895996, + "step": 67 + }, + { + "epoch": 1.0223388671875e-07, + "step": 67, + "training_step_time": 0.1154329776763916 + }, + { + "epoch": 1.03759765625e-07, + "model_forward_time": 0.025840282440185547, + "step": 68 + }, + { + "epoch": 1.03759765625e-07, + "step": 68, + "training_step_time": 0.10432648658752441 + }, + { + "epoch": 1.0528564453125e-07, + "model_forward_time": 0.02615952491760254, + "step": 69 + }, + { + "epoch": 1.0528564453125e-07, + "step": 69, + "training_step_time": 0.1107783317565918 + }, + { + "epoch": 1.068115234375e-07, + "grad_norm": 2.9935081005096436, + "learning_rate": 4.666666666666667e-06, + "loss": 1.1149, + "step": 70 + }, + { + "epoch": 1.068115234375e-07, + "model_forward_time": 0.026018619537353516, + "step": 70 + }, + { + "epoch": 1.068115234375e-07, + "step": 70, + "training_step_time": 0.10485291481018066 + }, + { + "epoch": 1.0833740234375e-07, + "model_forward_time": 0.026154279708862305, + "step": 71 + }, + { + "epoch": 1.0833740234375e-07, + "step": 71, + "training_step_time": 0.11275053024291992 + }, + { + "epoch": 1.0986328125e-07, + "model_forward_time": 0.025632143020629883, + "step": 72 + }, + { + "epoch": 1.0986328125e-07, + "step": 72, + "training_step_time": 0.1080777645111084 + }, + { + "epoch": 1.1138916015625e-07, + "model_forward_time": 0.02599644660949707, + "step": 73 + }, + { + "epoch": 1.1138916015625e-07, + "step": 73, + "training_step_time": 0.10730195045471191 + }, + { + "epoch": 1.129150390625e-07, + "model_forward_time": 0.025754928588867188, + "step": 74 + }, + { + "epoch": 1.129150390625e-07, + "step": 74, + "training_step_time": 0.12090826034545898 + }, + { + "epoch": 1.1444091796875e-07, + "model_forward_time": 0.02681899070739746, + "step": 75 + }, + { + "epoch": 1.1444091796875e-07, + "step": 75, + "training_step_time": 0.1279304027557373 + }, + { + "epoch": 1.15966796875e-07, + "model_forward_time": 0.026015043258666992, + "step": 76 + }, + { + "epoch": 1.15966796875e-07, + "step": 76, + "training_step_time": 0.1549975872039795 + }, + { + "epoch": 1.1749267578125e-07, + "model_forward_time": 0.02524256706237793, + "step": 77 + }, + { + "epoch": 1.1749267578125e-07, + "step": 77, + "training_step_time": 0.16358280181884766 + }, + { + "epoch": 1.190185546875e-07, + "model_forward_time": 0.025020122528076172, + "step": 78 + }, + { + "epoch": 1.190185546875e-07, + "step": 78, + "training_step_time": 0.16798901557922363 + }, + { + "epoch": 1.2054443359375e-07, + "model_forward_time": 0.02513861656188965, + "step": 79 + }, + { + "epoch": 1.2054443359375e-07, + "step": 79, + "training_step_time": 0.1597733497619629 + }, + { + "epoch": 1.220703125e-07, + "grad_norm": 2.160930871963501, + "learning_rate": 5.333333333333334e-06, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 1.220703125e-07, + "model_forward_time": 0.0246734619140625, + "step": 80 + }, + { + "epoch": 1.220703125e-07, + "step": 80, + "training_step_time": 0.11692357063293457 + }, + { + "epoch": 1.2359619140625e-07, + "model_forward_time": 0.02527141571044922, + "step": 81 + }, + { + "epoch": 1.2359619140625e-07, + "step": 81, + "training_step_time": 0.10662055015563965 + }, + { + "epoch": 1.251220703125e-07, + "model_forward_time": 0.028119802474975586, + "step": 82 + }, + { + "epoch": 1.251220703125e-07, + "step": 82, + "training_step_time": 0.11094474792480469 + }, + { + "epoch": 1.2664794921875e-07, + "model_forward_time": 0.02660393714904785, + "step": 83 + }, + { + "epoch": 1.2664794921875e-07, + "step": 83, + "training_step_time": 0.2058873176574707 + }, + { + "epoch": 1.28173828125e-07, + "model_forward_time": 0.02728438377380371, + "step": 84 + }, + { + "epoch": 1.28173828125e-07, + "step": 84, + "training_step_time": 0.11332988739013672 + }, + { + "epoch": 1.2969970703125e-07, + "model_forward_time": 0.025354385375976562, + "step": 85 + }, + { + "epoch": 1.2969970703125e-07, + "step": 85, + "training_step_time": 0.1064918041229248 + }, + { + "epoch": 1.312255859375e-07, + "model_forward_time": 0.02603292465209961, + "step": 86 + }, + { + "epoch": 1.312255859375e-07, + "step": 86, + "training_step_time": 0.10985732078552246 + }, + { + "epoch": 1.3275146484375e-07, + "model_forward_time": 0.02564835548400879, + "step": 87 + }, + { + "epoch": 1.3275146484375e-07, + "step": 87, + "training_step_time": 0.10858607292175293 + }, + { + "epoch": 1.3427734375e-07, + "model_forward_time": 0.025949716567993164, + "step": 88 + }, + { + "epoch": 1.3427734375e-07, + "step": 88, + "training_step_time": 0.1072390079498291 + }, + { + "epoch": 1.3580322265625e-07, + "model_forward_time": 0.029630661010742188, + "step": 89 + }, + { + "epoch": 1.3580322265625e-07, + "step": 89, + "training_step_time": 0.11060118675231934 + }, + { + "epoch": 1.373291015625e-07, + "grad_norm": 2.017200231552124, + "learning_rate": 6e-06, + "loss": 1.1021, + "step": 90 + }, + { + "epoch": 1.373291015625e-07, + "model_forward_time": 0.025426387786865234, + "step": 90 + }, + { + "epoch": 1.373291015625e-07, + "step": 90, + "training_step_time": 0.10535740852355957 + }, + { + "epoch": 1.3885498046875e-07, + "model_forward_time": 0.025750398635864258, + "step": 91 + }, + { + "epoch": 1.3885498046875e-07, + "step": 91, + "training_step_time": 0.10763072967529297 + }, + { + "epoch": 1.40380859375e-07, + "model_forward_time": 0.026279449462890625, + "step": 92 + }, + { + "epoch": 1.40380859375e-07, + "step": 92, + "training_step_time": 0.10863423347473145 + }, + { + "epoch": 1.4190673828125e-07, + "model_forward_time": 0.02568960189819336, + "step": 93 + }, + { + "epoch": 1.4190673828125e-07, + "step": 93, + "training_step_time": 0.1126718521118164 + }, + { + "epoch": 1.434326171875e-07, + "model_forward_time": 0.025844812393188477, + "step": 94 + }, + { + "epoch": 1.434326171875e-07, + "step": 94, + "training_step_time": 0.10429811477661133 + }, + { + "epoch": 1.4495849609375e-07, + "model_forward_time": 0.025232315063476562, + "step": 95 + }, + { + "epoch": 1.4495849609375e-07, + "step": 95, + "training_step_time": 0.10566186904907227 + }, + { + "epoch": 1.46484375e-07, + "model_forward_time": 0.025871753692626953, + "step": 96 + }, + { + "epoch": 1.46484375e-07, + "step": 96, + "training_step_time": 0.10646677017211914 + }, + { + "epoch": 1.4801025390625e-07, + "model_forward_time": 0.025615692138671875, + "step": 97 + }, + { + "epoch": 1.4801025390625e-07, + "step": 97, + "training_step_time": 0.1059727668762207 + }, + { + "epoch": 1.495361328125e-07, + "model_forward_time": 0.025202035903930664, + "step": 98 + }, + { + "epoch": 1.495361328125e-07, + "step": 98, + "training_step_time": 0.10299944877624512 + }, + { + "epoch": 1.5106201171875e-07, + "model_forward_time": 0.025269746780395508, + "step": 99 + }, + { + "epoch": 1.5106201171875e-07, + "step": 99, + "training_step_time": 0.10377955436706543 + }, + { + "epoch": 1.52587890625e-07, + "grad_norm": 2.2146012783050537, + "learning_rate": 6.666666666666667e-06, + "loss": 1.1055, + "step": 100 + }, + { + "epoch": 1.52587890625e-07, + "model_forward_time": 0.025841474533081055, + "step": 100 + }, + { + "epoch": 1.52587890625e-07, + "step": 100, + "training_step_time": 0.10974383354187012 + }, + { + "epoch": 1.5411376953125e-07, + "model_forward_time": 0.025582075119018555, + "step": 101 + }, + { + "epoch": 1.5411376953125e-07, + "step": 101, + "training_step_time": 0.10512566566467285 + }, + { + "epoch": 1.556396484375e-07, + "model_forward_time": 0.02594161033630371, + "step": 102 + }, + { + "epoch": 1.556396484375e-07, + "step": 102, + "training_step_time": 0.1070563793182373 + }, + { + "epoch": 1.5716552734375e-07, + "model_forward_time": 0.025438785552978516, + "step": 103 + }, + { + "epoch": 1.5716552734375e-07, + "step": 103, + "training_step_time": 0.10385584831237793 + }, + { + "epoch": 1.5869140625e-07, + "model_forward_time": 0.025485754013061523, + "step": 104 + }, + { + "epoch": 1.5869140625e-07, + "step": 104, + "training_step_time": 0.10833215713500977 + }, + { + "epoch": 1.6021728515625e-07, + "model_forward_time": 0.025707244873046875, + "step": 105 + }, + { + "epoch": 1.6021728515625e-07, + "step": 105, + "training_step_time": 0.1103818416595459 + }, + { + "epoch": 1.617431640625e-07, + "model_forward_time": 0.025554418563842773, + "step": 106 + }, + { + "epoch": 1.617431640625e-07, + "step": 106, + "training_step_time": 0.1063225269317627 + }, + { + "epoch": 1.6326904296875e-07, + "model_forward_time": 0.02517247200012207, + "step": 107 + }, + { + "epoch": 1.6326904296875e-07, + "step": 107, + "training_step_time": 0.11746406555175781 + }, + { + "epoch": 1.64794921875e-07, + "model_forward_time": 0.02513718605041504, + "step": 108 + }, + { + "epoch": 1.64794921875e-07, + "step": 108, + "training_step_time": 0.10857868194580078 + }, + { + "epoch": 1.6632080078125e-07, + "model_forward_time": 0.025552749633789062, + "step": 109 + }, + { + "epoch": 1.6632080078125e-07, + "step": 109, + "training_step_time": 0.19937992095947266 + }, + { + "epoch": 1.678466796875e-07, + "grad_norm": 3.705718755722046, + "learning_rate": 7.333333333333334e-06, + "loss": 1.0923, + "step": 110 + }, + { + "epoch": 1.678466796875e-07, + "model_forward_time": 0.024410247802734375, + "step": 110 + }, + { + "epoch": 1.678466796875e-07, + "step": 110, + "training_step_time": 0.20553088188171387 + }, + { + "epoch": 1.6937255859375e-07, + "model_forward_time": 0.025051116943359375, + "step": 111 + }, + { + "epoch": 1.6937255859375e-07, + "step": 111, + "training_step_time": 0.11757636070251465 + }, + { + "epoch": 1.708984375e-07, + "model_forward_time": 0.024251699447631836, + "step": 112 + }, + { + "epoch": 1.708984375e-07, + "step": 112, + "training_step_time": 0.1045370101928711 + }, + { + "epoch": 1.7242431640625e-07, + "model_forward_time": 0.02516627311706543, + "step": 113 + }, + { + "epoch": 1.7242431640625e-07, + "step": 113, + "training_step_time": 0.21181869506835938 + }, + { + "epoch": 1.739501953125e-07, + "model_forward_time": 0.024795055389404297, + "step": 114 + }, + { + "epoch": 1.739501953125e-07, + "step": 114, + "training_step_time": 0.1017763614654541 + }, + { + "epoch": 1.7547607421875e-07, + "model_forward_time": 0.024666547775268555, + "step": 115 + }, + { + "epoch": 1.7547607421875e-07, + "step": 115, + "training_step_time": 0.10803914070129395 + }, + { + "epoch": 1.77001953125e-07, + "model_forward_time": 0.024348974227905273, + "step": 116 + }, + { + "epoch": 1.77001953125e-07, + "step": 116, + "training_step_time": 0.10791945457458496 + }, + { + "epoch": 1.7852783203125e-07, + "model_forward_time": 0.025171995162963867, + "step": 117 + }, + { + "epoch": 1.7852783203125e-07, + "step": 117, + "training_step_time": 0.10659551620483398 + }, + { + "epoch": 1.800537109375e-07, + "model_forward_time": 0.025080204010009766, + "step": 118 + }, + { + "epoch": 1.800537109375e-07, + "step": 118, + "training_step_time": 0.10491943359375 + }, + { + "epoch": 1.8157958984375e-07, + "model_forward_time": 0.025238513946533203, + "step": 119 + }, + { + "epoch": 1.8157958984375e-07, + "step": 119, + "training_step_time": 0.175095796585083 + }, + { + "epoch": 1.8310546875e-07, + "grad_norm": 2.5202622413635254, + "learning_rate": 8.000000000000001e-06, + "loss": 1.0777, + "step": 120 + }, + { + "epoch": 1.8310546875e-07, + "model_forward_time": 0.025106430053710938, + "step": 120 + }, + { + "epoch": 1.8310546875e-07, + "step": 120, + "training_step_time": 0.11800432205200195 + }, + { + "epoch": 1.8463134765625e-07, + "model_forward_time": 0.024784088134765625, + "step": 121 + }, + { + "epoch": 1.8463134765625e-07, + "step": 121, + "training_step_time": 0.1031346321105957 + }, + { + "epoch": 1.861572265625e-07, + "model_forward_time": 0.026371479034423828, + "step": 122 + }, + { + "epoch": 1.861572265625e-07, + "step": 122, + "training_step_time": 0.10760188102722168 + }, + { + "epoch": 1.8768310546875e-07, + "model_forward_time": 0.025882720947265625, + "step": 123 + }, + { + "epoch": 1.8768310546875e-07, + "step": 123, + "training_step_time": 0.10884928703308105 + }, + { + "epoch": 1.89208984375e-07, + "model_forward_time": 0.024898767471313477, + "step": 124 + }, + { + "epoch": 1.89208984375e-07, + "step": 124, + "training_step_time": 0.1766490936279297 + }, + { + "epoch": 1.9073486328125e-07, + "model_forward_time": 0.024541854858398438, + "step": 125 + }, + { + "epoch": 1.9073486328125e-07, + "step": 125, + "training_step_time": 0.10937190055847168 + }, + { + "epoch": 1.922607421875e-07, + "model_forward_time": 0.024924755096435547, + "step": 126 + }, + { + "epoch": 1.922607421875e-07, + "step": 126, + "training_step_time": 0.10846853256225586 + }, + { + "epoch": 1.9378662109375e-07, + "model_forward_time": 0.02446126937866211, + "step": 127 + }, + { + "epoch": 1.9378662109375e-07, + "step": 127, + "training_step_time": 0.20496296882629395 + }, + { + "epoch": 1.953125e-07, + "model_forward_time": 0.023757457733154297, + "step": 128 + }, + { + "epoch": 1.953125e-07, + "step": 128, + "training_step_time": 0.10406255722045898 + }, + { + "epoch": 1.9683837890625e-07, + "model_forward_time": 0.024302959442138672, + "step": 129 + }, + { + "epoch": 1.9683837890625e-07, + "step": 129, + "training_step_time": 0.11093688011169434 + }, + { + "epoch": 1.983642578125e-07, + "grad_norm": 3.2971274852752686, + "learning_rate": 8.666666666666668e-06, + "loss": 1.087, + "step": 130 + }, + { + "epoch": 1.983642578125e-07, + "model_forward_time": 0.025298118591308594, + "step": 130 + }, + { + "epoch": 1.983642578125e-07, + "step": 130, + "training_step_time": 0.1963181495666504 + }, + { + "epoch": 1.9989013671875e-07, + "model_forward_time": 0.024568796157836914, + "step": 131 + }, + { + "epoch": 1.9989013671875e-07, + "step": 131, + "training_step_time": 0.10664820671081543 + }, + { + "epoch": 2.01416015625e-07, + "model_forward_time": 0.024847030639648438, + "step": 132 + }, + { + "epoch": 2.01416015625e-07, + "step": 132, + "training_step_time": 0.10486078262329102 + }, + { + "epoch": 2.0294189453125e-07, + "model_forward_time": 0.025294065475463867, + "step": 133 + }, + { + "epoch": 2.0294189453125e-07, + "step": 133, + "training_step_time": 0.10913729667663574 + }, + { + "epoch": 2.044677734375e-07, + "model_forward_time": 0.02522587776184082, + "step": 134 + }, + { + "epoch": 2.044677734375e-07, + "step": 134, + "training_step_time": 0.11135077476501465 + }, + { + "epoch": 2.0599365234375e-07, + "model_forward_time": 0.025368452072143555, + "step": 135 + }, + { + "epoch": 2.0599365234375e-07, + "step": 135, + "training_step_time": 0.10720133781433105 + }, + { + "epoch": 2.0751953125e-07, + "model_forward_time": 0.025255441665649414, + "step": 136 + }, + { + "epoch": 2.0751953125e-07, + "step": 136, + "training_step_time": 0.10765504837036133 + }, + { + "epoch": 2.0904541015625e-07, + "model_forward_time": 0.025819778442382812, + "step": 137 + }, + { + "epoch": 2.0904541015625e-07, + "step": 137, + "training_step_time": 0.10836267471313477 + }, + { + "epoch": 2.105712890625e-07, + "model_forward_time": 0.025656461715698242, + "step": 138 + }, + { + "epoch": 2.105712890625e-07, + "step": 138, + "training_step_time": 0.10537075996398926 + }, + { + "epoch": 2.1209716796875e-07, + "model_forward_time": 0.024921894073486328, + "step": 139 + }, + { + "epoch": 2.1209716796875e-07, + "step": 139, + "training_step_time": 0.10823225975036621 + }, + { + "epoch": 2.13623046875e-07, + "grad_norm": 2.060201406478882, + "learning_rate": 9.333333333333334e-06, + "loss": 1.1007, + "step": 140 + }, + { + "epoch": 2.13623046875e-07, + "model_forward_time": 0.025678396224975586, + "step": 140 + }, + { + "epoch": 2.13623046875e-07, + "step": 140, + "training_step_time": 0.10832762718200684 + }, + { + "epoch": 2.1514892578125e-07, + "model_forward_time": 0.02512669563293457, + "step": 141 + }, + { + "epoch": 2.1514892578125e-07, + "step": 141, + "training_step_time": 0.11315178871154785 + }, + { + "epoch": 2.166748046875e-07, + "model_forward_time": 0.02522563934326172, + "step": 142 + }, + { + "epoch": 2.166748046875e-07, + "step": 142, + "training_step_time": 0.1073002815246582 + }, + { + "epoch": 2.1820068359375e-07, + "model_forward_time": 0.025441646575927734, + "step": 143 + }, + { + "epoch": 2.1820068359375e-07, + "step": 143, + "training_step_time": 0.10685944557189941 + }, + { + "epoch": 2.197265625e-07, + "model_forward_time": 0.02531743049621582, + "step": 144 + }, + { + "epoch": 2.197265625e-07, + "step": 144, + "training_step_time": 0.10764336585998535 + }, + { + "epoch": 2.2125244140625e-07, + "model_forward_time": 0.025152206420898438, + "step": 145 + }, + { + "epoch": 2.2125244140625e-07, + "step": 145, + "training_step_time": 0.11270904541015625 + }, + { + "epoch": 2.227783203125e-07, + "model_forward_time": 0.027230024337768555, + "step": 146 + }, + { + "epoch": 2.227783203125e-07, + "step": 146, + "training_step_time": 0.11264300346374512 + }, + { + "epoch": 2.2430419921875e-07, + "model_forward_time": 0.025463104248046875, + "step": 147 + }, + { + "epoch": 2.2430419921875e-07, + "step": 147, + "training_step_time": 0.11224055290222168 + }, + { + "epoch": 2.25830078125e-07, + "model_forward_time": 0.025217294692993164, + "step": 148 + }, + { + "epoch": 2.25830078125e-07, + "step": 148, + "training_step_time": 0.11015510559082031 + }, + { + "epoch": 2.2735595703125e-07, + "model_forward_time": 0.026349782943725586, + "step": 149 + }, + { + "epoch": 2.2735595703125e-07, + "step": 149, + "training_step_time": 0.10867691040039062 + }, + { + "epoch": 2.288818359375e-07, + "grad_norm": 1.9003688097000122, + "learning_rate": 1e-05, + "loss": 1.0665, + "step": 150 + }, + { + "epoch": 2.288818359375e-07, + "model_forward_time": 0.02533888816833496, + "step": 150 + }, + { + "epoch": 2.288818359375e-07, + "step": 150, + "training_step_time": 0.10929298400878906 + }, + { + "epoch": 2.3040771484375e-07, + "model_forward_time": 0.025371074676513672, + "step": 151 + }, + { + "epoch": 2.3040771484375e-07, + "step": 151, + "training_step_time": 0.10710024833679199 + }, + { + "epoch": 2.3193359375e-07, + "model_forward_time": 0.024374961853027344, + "step": 152 + }, + { + "epoch": 2.3193359375e-07, + "step": 152, + "training_step_time": 0.18261027336120605 + }, + { + "epoch": 2.3345947265625e-07, + "model_forward_time": 0.024361848831176758, + "step": 153 + }, + { + "epoch": 2.3345947265625e-07, + "step": 153, + "training_step_time": 0.10788965225219727 + }, + { + "epoch": 2.349853515625e-07, + "model_forward_time": 0.024384498596191406, + "step": 154 + }, + { + "epoch": 2.349853515625e-07, + "step": 154, + "training_step_time": 0.20012164115905762 + }, + { + "epoch": 2.3651123046875e-07, + "model_forward_time": 0.024213552474975586, + "step": 155 + }, + { + "epoch": 2.3651123046875e-07, + "step": 155, + "training_step_time": 0.1328420639038086 + }, + { + "epoch": 2.38037109375e-07, + "model_forward_time": 0.02397465705871582, + "step": 156 + }, + { + "epoch": 2.38037109375e-07, + "step": 156, + "training_step_time": 0.14797019958496094 + }, + { + "epoch": 2.3956298828125e-07, + "model_forward_time": 0.024573564529418945, + "step": 157 + }, + { + "epoch": 2.3956298828125e-07, + "step": 157, + "training_step_time": 0.17415404319763184 + }, + { + "epoch": 2.410888671875e-07, + "model_forward_time": 0.024271011352539062, + "step": 158 + }, + { + "epoch": 2.410888671875e-07, + "step": 158, + "training_step_time": 0.15529704093933105 + }, + { + "epoch": 2.4261474609375e-07, + "model_forward_time": 0.024670839309692383, + "step": 159 + }, + { + "epoch": 2.4261474609375e-07, + "step": 159, + "training_step_time": 0.10644721984863281 + }, + { + "epoch": 2.44140625e-07, + "grad_norm": 2.8054840564727783, + "learning_rate": 1.0666666666666667e-05, + "loss": 1.1201, + "step": 160 + }, + { + "epoch": 2.44140625e-07, + "model_forward_time": 0.02489161491394043, + "step": 160 + }, + { + "epoch": 2.44140625e-07, + "step": 160, + "training_step_time": 0.1070854663848877 + }, + { + "epoch": 2.4566650390625e-07, + "model_forward_time": 0.025034427642822266, + "step": 161 + }, + { + "epoch": 2.4566650390625e-07, + "step": 161, + "training_step_time": 0.10844230651855469 + }, + { + "epoch": 2.471923828125e-07, + "model_forward_time": 0.02515697479248047, + "step": 162 + }, + { + "epoch": 2.471923828125e-07, + "step": 162, + "training_step_time": 0.10387110710144043 + }, + { + "epoch": 2.4871826171875e-07, + "model_forward_time": 0.02525186538696289, + "step": 163 + }, + { + "epoch": 2.4871826171875e-07, + "step": 163, + "training_step_time": 0.1042320728302002 + }, + { + "epoch": 2.50244140625e-07, + "model_forward_time": 0.025216341018676758, + "step": 164 + }, + { + "epoch": 2.50244140625e-07, + "step": 164, + "training_step_time": 0.10904598236083984 + }, + { + "epoch": 2.5177001953125e-07, + "model_forward_time": 0.025246381759643555, + "step": 165 + }, + { + "epoch": 2.5177001953125e-07, + "step": 165, + "training_step_time": 0.20741724967956543 + }, + { + "epoch": 2.532958984375e-07, + "model_forward_time": 0.024412155151367188, + "step": 166 + }, + { + "epoch": 2.532958984375e-07, + "step": 166, + "training_step_time": 0.10574102401733398 + }, + { + "epoch": 2.5482177734375e-07, + "model_forward_time": 0.024666786193847656, + "step": 167 + }, + { + "epoch": 2.5482177734375e-07, + "step": 167, + "training_step_time": 0.10556483268737793 + }, + { + "epoch": 2.5634765625e-07, + "model_forward_time": 0.025375843048095703, + "step": 168 + }, + { + "epoch": 2.5634765625e-07, + "step": 168, + "training_step_time": 0.11011004447937012 + }, + { + "epoch": 2.5787353515625e-07, + "model_forward_time": 0.025117874145507812, + "step": 169 + }, + { + "epoch": 2.5787353515625e-07, + "step": 169, + "training_step_time": 0.1689453125 + }, + { + "epoch": 2.593994140625e-07, + "grad_norm": 1.4153610467910767, + "learning_rate": 1.1333333333333334e-05, + "loss": 1.0695, + "step": 170 + }, + { + "epoch": 2.593994140625e-07, + "model_forward_time": 0.024693965911865234, + "step": 170 + }, + { + "epoch": 2.593994140625e-07, + "step": 170, + "training_step_time": 0.10523772239685059 + }, + { + "epoch": 2.6092529296875e-07, + "model_forward_time": 0.025215864181518555, + "step": 171 + }, + { + "epoch": 2.6092529296875e-07, + "step": 171, + "training_step_time": 0.10704302787780762 + }, + { + "epoch": 2.62451171875e-07, + "model_forward_time": 0.025417804718017578, + "step": 172 + }, + { + "epoch": 2.62451171875e-07, + "step": 172, + "training_step_time": 0.20757031440734863 + }, + { + "epoch": 2.6397705078125e-07, + "model_forward_time": 0.024976253509521484, + "step": 173 + }, + { + "epoch": 2.6397705078125e-07, + "step": 173, + "training_step_time": 0.10101461410522461 + }, + { + "epoch": 2.655029296875e-07, + "model_forward_time": 0.02486872673034668, + "step": 174 + }, + { + "epoch": 2.655029296875e-07, + "step": 174, + "training_step_time": 0.1085202693939209 + }, + { + "epoch": 2.6702880859375e-07, + "model_forward_time": 0.025509119033813477, + "step": 175 + }, + { + "epoch": 2.6702880859375e-07, + "step": 175, + "training_step_time": 0.20956778526306152 + }, + { + "epoch": 2.685546875e-07, + "model_forward_time": 0.024161815643310547, + "step": 176 + }, + { + "epoch": 2.685546875e-07, + "step": 176, + "training_step_time": 0.10477256774902344 + }, + { + "epoch": 2.7008056640625e-07, + "model_forward_time": 0.024560928344726562, + "step": 177 + }, + { + "epoch": 2.7008056640625e-07, + "step": 177, + "training_step_time": 0.10026764869689941 + }, + { + "epoch": 2.716064453125e-07, + "model_forward_time": 0.025931358337402344, + "step": 178 + }, + { + "epoch": 2.716064453125e-07, + "step": 178, + "training_step_time": 0.10803103446960449 + }, + { + "epoch": 2.7313232421875e-07, + "model_forward_time": 0.025135040283203125, + "step": 179 + }, + { + "epoch": 2.7313232421875e-07, + "step": 179, + "training_step_time": 0.10832476615905762 + }, + { + "epoch": 2.74658203125e-07, + "grad_norm": 1.289467215538025, + "learning_rate": 1.2e-05, + "loss": 1.059, + "step": 180 + }, + { + "epoch": 2.74658203125e-07, + "model_forward_time": 0.02535223960876465, + "step": 180 + }, + { + "epoch": 2.74658203125e-07, + "step": 180, + "training_step_time": 0.10510659217834473 + }, + { + "epoch": 2.7618408203125e-07, + "model_forward_time": 0.024930715560913086, + "step": 181 + }, + { + "epoch": 2.7618408203125e-07, + "step": 181, + "training_step_time": 0.10405206680297852 + }, + { + "epoch": 2.777099609375e-07, + "model_forward_time": 0.027380943298339844, + "step": 182 + }, + { + "epoch": 2.777099609375e-07, + "step": 182, + "training_step_time": 0.10753679275512695 + }, + { + "epoch": 2.7923583984375e-07, + "model_forward_time": 0.025535106658935547, + "step": 183 + }, + { + "epoch": 2.7923583984375e-07, + "step": 183, + "training_step_time": 0.10632085800170898 + }, + { + "epoch": 2.8076171875e-07, + "model_forward_time": 0.02551746368408203, + "step": 184 + }, + { + "epoch": 2.8076171875e-07, + "step": 184, + "training_step_time": 0.10328292846679688 + }, + { + "epoch": 2.8228759765625e-07, + "model_forward_time": 0.025577068328857422, + "step": 185 + }, + { + "epoch": 2.8228759765625e-07, + "step": 185, + "training_step_time": 0.10737752914428711 + }, + { + "epoch": 2.838134765625e-07, + "model_forward_time": 0.025345325469970703, + "step": 186 + }, + { + "epoch": 2.838134765625e-07, + "step": 186, + "training_step_time": 0.10521984100341797 + }, + { + "epoch": 2.8533935546875e-07, + "model_forward_time": 0.02513742446899414, + "step": 187 + }, + { + "epoch": 2.8533935546875e-07, + "step": 187, + "training_step_time": 0.10754847526550293 + }, + { + "epoch": 2.86865234375e-07, + "model_forward_time": 0.025455236434936523, + "step": 188 + }, + { + "epoch": 2.86865234375e-07, + "step": 188, + "training_step_time": 0.10693955421447754 + }, + { + "epoch": 2.8839111328125e-07, + "model_forward_time": 0.025736331939697266, + "step": 189 + }, + { + "epoch": 2.8839111328125e-07, + "step": 189, + "training_step_time": 0.10713648796081543 + }, + { + "epoch": 2.899169921875e-07, + "grad_norm": 1.8123581409454346, + "learning_rate": 1.2666666666666668e-05, + "loss": 1.042, + "step": 190 + }, + { + "epoch": 2.899169921875e-07, + "model_forward_time": 0.025921106338500977, + "step": 190 + }, + { + "epoch": 2.899169921875e-07, + "step": 190, + "training_step_time": 0.1091923713684082 + }, + { + "epoch": 2.9144287109375e-07, + "model_forward_time": 0.025386333465576172, + "step": 191 + }, + { + "epoch": 2.9144287109375e-07, + "step": 191, + "training_step_time": 0.10807228088378906 + }, + { + "epoch": 2.9296875e-07, + "model_forward_time": 0.026799678802490234, + "step": 192 + }, + { + "epoch": 2.9296875e-07, + "step": 192, + "training_step_time": 0.10712909698486328 + }, + { + "epoch": 2.9449462890625e-07, + "model_forward_time": 0.025065183639526367, + "step": 193 + }, + { + "epoch": 2.9449462890625e-07, + "step": 193, + "training_step_time": 0.10766220092773438 + }, + { + "epoch": 2.960205078125e-07, + "model_forward_time": 0.02542567253112793, + "step": 194 + }, + { + "epoch": 2.960205078125e-07, + "step": 194, + "training_step_time": 0.10897135734558105 + }, + { + "epoch": 2.9754638671875e-07, + "model_forward_time": 0.02498006820678711, + "step": 195 + }, + { + "epoch": 2.9754638671875e-07, + "step": 195, + "training_step_time": 0.10578083992004395 + }, + { + "epoch": 2.99072265625e-07, + "model_forward_time": 0.025472640991210938, + "step": 196 + }, + { + "epoch": 2.99072265625e-07, + "step": 196, + "training_step_time": 0.10597109794616699 + }, + { + "epoch": 3.0059814453125e-07, + "model_forward_time": 0.025658845901489258, + "step": 197 + }, + { + "epoch": 3.0059814453125e-07, + "step": 197, + "training_step_time": 0.10854721069335938 + }, + { + "epoch": 3.021240234375e-07, + "model_forward_time": 0.02532052993774414, + "step": 198 + }, + { + "epoch": 3.021240234375e-07, + "step": 198, + "training_step_time": 0.11224126815795898 + }, + { + "epoch": 3.0364990234375e-07, + "model_forward_time": 0.025629758834838867, + "step": 199 + }, + { + "epoch": 3.0364990234375e-07, + "step": 199, + "training_step_time": 0.10959386825561523 + }, + { + "epoch": 3.0517578125e-07, + "grad_norm": 2.2768733501434326, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.0852, + "step": 200 + }, + { + "epoch": 3.0517578125e-07, + "model_forward_time": 0.026217937469482422, + "step": 200 + }, + { + "epoch": 3.0517578125e-07, + "step": 200, + "training_step_time": 0.20072722434997559 + }, + { + "epoch": 3.0670166015625e-07, + "model_forward_time": 0.024330854415893555, + "step": 201 + }, + { + "epoch": 3.0670166015625e-07, + "step": 201, + "training_step_time": 0.1308908462524414 + }, + { + "epoch": 3.082275390625e-07, + "model_forward_time": 0.025192737579345703, + "step": 202 + }, + { + "epoch": 3.082275390625e-07, + "step": 202, + "training_step_time": 0.14418363571166992 + }, + { + "epoch": 3.0975341796875e-07, + "model_forward_time": 0.02505040168762207, + "step": 203 + }, + { + "epoch": 3.0975341796875e-07, + "step": 203, + "training_step_time": 0.1721487045288086 + }, + { + "epoch": 3.11279296875e-07, + "model_forward_time": 0.024411439895629883, + "step": 204 + }, + { + "epoch": 3.11279296875e-07, + "step": 204, + "training_step_time": 0.11894965171813965 + }, + { + "epoch": 3.1280517578125e-07, + "model_forward_time": 0.0242464542388916, + "step": 205 + }, + { + "epoch": 3.1280517578125e-07, + "step": 205, + "training_step_time": 0.11566329002380371 + }, + { + "epoch": 3.143310546875e-07, + "model_forward_time": 0.02573990821838379, + "step": 206 + }, + { + "epoch": 3.143310546875e-07, + "step": 206, + "training_step_time": 0.10853457450866699 + }, + { + "epoch": 3.1585693359375e-07, + "model_forward_time": 0.02544426918029785, + "step": 207 + }, + { + "epoch": 3.1585693359375e-07, + "step": 207, + "training_step_time": 0.11241364479064941 + }, + { + "epoch": 3.173828125e-07, + "model_forward_time": 0.025177001953125, + "step": 208 + }, + { + "epoch": 3.173828125e-07, + "step": 208, + "training_step_time": 0.10628056526184082 + }, + { + "epoch": 3.1890869140625e-07, + "model_forward_time": 0.025434017181396484, + "step": 209 + }, + { + "epoch": 3.1890869140625e-07, + "step": 209, + "training_step_time": 0.1055746078491211 + }, + { + "epoch": 3.204345703125e-07, + "grad_norm": 2.139814853668213, + "learning_rate": 1.4000000000000001e-05, + "loss": 1.0817, + "step": 210 + }, + { + "epoch": 3.204345703125e-07, + "model_forward_time": 0.025125503540039062, + "step": 210 + }, + { + "epoch": 3.204345703125e-07, + "step": 210, + "training_step_time": 0.10978817939758301 + }, + { + "epoch": 3.2196044921875e-07, + "model_forward_time": 0.025634765625, + "step": 211 + }, + { + "epoch": 3.2196044921875e-07, + "step": 211, + "training_step_time": 0.1635749340057373 + }, + { + "epoch": 3.23486328125e-07, + "model_forward_time": 0.024929046630859375, + "step": 212 + }, + { + "epoch": 3.23486328125e-07, + "step": 212, + "training_step_time": 0.14980244636535645 + }, + { + "epoch": 3.2501220703125e-07, + "model_forward_time": 0.025199174880981445, + "step": 213 + }, + { + "epoch": 3.2501220703125e-07, + "step": 213, + "training_step_time": 0.1106722354888916 + }, + { + "epoch": 3.265380859375e-07, + "model_forward_time": 0.0253293514251709, + "step": 214 + }, + { + "epoch": 3.265380859375e-07, + "step": 214, + "training_step_time": 0.10408616065979004 + }, + { + "epoch": 3.2806396484375e-07, + "model_forward_time": 0.02529764175415039, + "step": 215 + }, + { + "epoch": 3.2806396484375e-07, + "step": 215, + "training_step_time": 0.1448988914489746 + }, + { + "epoch": 3.2958984375e-07, + "model_forward_time": 0.02536630630493164, + "step": 216 + }, + { + "epoch": 3.2958984375e-07, + "step": 216, + "training_step_time": 0.14667034149169922 + }, + { + "epoch": 3.3111572265625e-07, + "model_forward_time": 0.0243074893951416, + "step": 217 + }, + { + "epoch": 3.3111572265625e-07, + "step": 217, + "training_step_time": 0.10565853118896484 + }, + { + "epoch": 3.326416015625e-07, + "model_forward_time": 0.02464008331298828, + "step": 218 + }, + { + "epoch": 3.326416015625e-07, + "step": 218, + "training_step_time": 0.15975522994995117 + }, + { + "epoch": 3.3416748046875e-07, + "model_forward_time": 0.024682998657226562, + "step": 219 + }, + { + "epoch": 3.3416748046875e-07, + "step": 219, + "training_step_time": 0.15303301811218262 + }, + { + "epoch": 3.35693359375e-07, + "grad_norm": 1.3726691007614136, + "learning_rate": 1.4666666666666668e-05, + "loss": 1.0744, + "step": 220 + }, + { + "epoch": 3.35693359375e-07, + "model_forward_time": 0.025442838668823242, + "step": 220 + }, + { + "epoch": 3.35693359375e-07, + "step": 220, + "training_step_time": 0.1061403751373291 + }, + { + "epoch": 3.3721923828125e-07, + "model_forward_time": 0.024499893188476562, + "step": 221 + }, + { + "epoch": 3.3721923828125e-07, + "step": 221, + "training_step_time": 0.20609641075134277 + }, + { + "epoch": 3.387451171875e-07, + "model_forward_time": 0.024770736694335938, + "step": 222 + }, + { + "epoch": 3.387451171875e-07, + "step": 222, + "training_step_time": 0.10871124267578125 + }, + { + "epoch": 3.4027099609375e-07, + "model_forward_time": 0.024731159210205078, + "step": 223 + }, + { + "epoch": 3.4027099609375e-07, + "step": 223, + "training_step_time": 0.1062014102935791 + }, + { + "epoch": 3.41796875e-07, + "model_forward_time": 0.025137901306152344, + "step": 224 + }, + { + "epoch": 3.41796875e-07, + "step": 224, + "training_step_time": 0.10837364196777344 + }, + { + "epoch": 3.4332275390625e-07, + "model_forward_time": 0.025831937789916992, + "step": 225 + }, + { + "epoch": 3.4332275390625e-07, + "step": 225, + "training_step_time": 0.10949540138244629 + }, + { + "epoch": 3.448486328125e-07, + "model_forward_time": 0.025206327438354492, + "step": 226 + }, + { + "epoch": 3.448486328125e-07, + "step": 226, + "training_step_time": 0.11171412467956543 + }, + { + "epoch": 3.4637451171875e-07, + "model_forward_time": 0.02624988555908203, + "step": 227 + }, + { + "epoch": 3.4637451171875e-07, + "step": 227, + "training_step_time": 0.11604642868041992 + }, + { + "epoch": 3.47900390625e-07, + "model_forward_time": 0.026447534561157227, + "step": 228 + }, + { + "epoch": 3.47900390625e-07, + "step": 228, + "training_step_time": 0.11378717422485352 + }, + { + "epoch": 3.4942626953125e-07, + "model_forward_time": 0.026441335678100586, + "step": 229 + }, + { + "epoch": 3.4942626953125e-07, + "step": 229, + "training_step_time": 0.11003470420837402 + }, + { + "epoch": 3.509521484375e-07, + "grad_norm": 3.7555150985717773, + "learning_rate": 1.5333333333333334e-05, + "loss": 1.0905, + "step": 230 + }, + { + "epoch": 3.509521484375e-07, + "model_forward_time": 0.02584552764892578, + "step": 230 + }, + { + "epoch": 3.509521484375e-07, + "step": 230, + "training_step_time": 0.10566830635070801 + }, + { + "epoch": 3.5247802734375e-07, + "model_forward_time": 0.025495290756225586, + "step": 231 + }, + { + "epoch": 3.5247802734375e-07, + "step": 231, + "training_step_time": 0.11129093170166016 + }, + { + "epoch": 3.5400390625e-07, + "model_forward_time": 0.02561187744140625, + "step": 232 + }, + { + "epoch": 3.5400390625e-07, + "step": 232, + "training_step_time": 0.10994148254394531 + }, + { + "epoch": 3.5552978515625e-07, + "model_forward_time": 0.02617359161376953, + "step": 233 + }, + { + "epoch": 3.5552978515625e-07, + "step": 233, + "training_step_time": 0.10953950881958008 + }, + { + "epoch": 3.570556640625e-07, + "model_forward_time": 0.025429964065551758, + "step": 234 + }, + { + "epoch": 3.570556640625e-07, + "step": 234, + "training_step_time": 0.10425972938537598 + }, + { + "epoch": 3.5858154296875e-07, + "model_forward_time": 0.025571823120117188, + "step": 235 + }, + { + "epoch": 3.5858154296875e-07, + "step": 235, + "training_step_time": 0.10515356063842773 + }, + { + "epoch": 3.60107421875e-07, + "model_forward_time": 0.02615976333618164, + "step": 236 + }, + { + "epoch": 3.60107421875e-07, + "step": 236, + "training_step_time": 0.10771536827087402 + }, + { + "epoch": 3.6163330078125e-07, + "model_forward_time": 0.026056766510009766, + "step": 237 + }, + { + "epoch": 3.6163330078125e-07, + "step": 237, + "training_step_time": 0.10839581489562988 + }, + { + "epoch": 3.631591796875e-07, + "model_forward_time": 0.025864839553833008, + "step": 238 + }, + { + "epoch": 3.631591796875e-07, + "step": 238, + "training_step_time": 0.1052699089050293 + }, + { + "epoch": 3.6468505859375e-07, + "model_forward_time": 0.02572035789489746, + "step": 239 + }, + { + "epoch": 3.6468505859375e-07, + "step": 239, + "training_step_time": 0.10449099540710449 + }, + { + "epoch": 3.662109375e-07, + "grad_norm": 1.8919063806533813, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.0872, + "step": 240 + }, + { + "epoch": 3.662109375e-07, + "model_forward_time": 0.02666187286376953, + "step": 240 + }, + { + "epoch": 3.662109375e-07, + "step": 240, + "training_step_time": 0.10679841041564941 + }, + { + "epoch": 3.6773681640625e-07, + "model_forward_time": 0.026041507720947266, + "step": 241 + }, + { + "epoch": 3.6773681640625e-07, + "step": 241, + "training_step_time": 0.10630631446838379 + }, + { + "epoch": 3.692626953125e-07, + "model_forward_time": 0.025934457778930664, + "step": 242 + }, + { + "epoch": 3.692626953125e-07, + "step": 242, + "training_step_time": 0.1068878173828125 + }, + { + "epoch": 3.7078857421875e-07, + "model_forward_time": 0.026000499725341797, + "step": 243 + }, + { + "epoch": 3.7078857421875e-07, + "step": 243, + "training_step_time": 0.1785283088684082 + }, + { + "epoch": 3.72314453125e-07, + "model_forward_time": 0.025198698043823242, + "step": 244 + }, + { + "epoch": 3.72314453125e-07, + "step": 244, + "training_step_time": 0.1068873405456543 + }, + { + "epoch": 3.7384033203125e-07, + "model_forward_time": 0.025313377380371094, + "step": 245 + }, + { + "epoch": 3.7384033203125e-07, + "step": 245, + "training_step_time": 0.21465086936950684 + }, + { + "epoch": 3.753662109375e-07, + "model_forward_time": 0.025408267974853516, + "step": 246 + }, + { + "epoch": 3.753662109375e-07, + "step": 246, + "training_step_time": 0.17536520957946777 + }, + { + "epoch": 3.7689208984375e-07, + "model_forward_time": 0.027303457260131836, + "step": 247 + }, + { + "epoch": 3.7689208984375e-07, + "step": 247, + "training_step_time": 0.14746475219726562 + }, + { + "epoch": 3.7841796875e-07, + "model_forward_time": 0.026276111602783203, + "step": 248 + }, + { + "epoch": 3.7841796875e-07, + "step": 248, + "training_step_time": 0.12691950798034668 + }, + { + "epoch": 3.7994384765625e-07, + "model_forward_time": 0.024385452270507812, + "step": 249 + }, + { + "epoch": 3.7994384765625e-07, + "step": 249, + "training_step_time": 0.11448836326599121 + }, + { + "epoch": 3.814697265625e-07, + "grad_norm": 3.5808424949645996, + "learning_rate": 1.6666666666666667e-05, + "loss": 1.0672, + "step": 250 + }, + { + "epoch": 3.814697265625e-07, + "model_forward_time": 0.025554418563842773, + "step": 250 + }, + { + "epoch": 3.814697265625e-07, + "step": 250, + "training_step_time": 0.11302661895751953 + }, + { + "epoch": 3.8299560546875e-07, + "model_forward_time": 0.025761842727661133, + "step": 251 + }, + { + "epoch": 3.8299560546875e-07, + "step": 251, + "training_step_time": 0.11169314384460449 + }, + { + "epoch": 3.84521484375e-07, + "model_forward_time": 0.02521657943725586, + "step": 252 + }, + { + "epoch": 3.84521484375e-07, + "step": 252, + "training_step_time": 0.1096949577331543 + }, + { + "epoch": 3.8604736328125e-07, + "model_forward_time": 0.025208473205566406, + "step": 253 + }, + { + "epoch": 3.8604736328125e-07, + "step": 253, + "training_step_time": 0.10798931121826172 + }, + { + "epoch": 3.875732421875e-07, + "model_forward_time": 0.025070667266845703, + "step": 254 + }, + { + "epoch": 3.875732421875e-07, + "step": 254, + "training_step_time": 0.10565304756164551 + }, + { + "epoch": 3.8909912109375e-07, + "model_forward_time": 0.025752544403076172, + "step": 255 + }, + { + "epoch": 3.8909912109375e-07, + "step": 255, + "training_step_time": 0.1708359718322754 + }, + { + "epoch": 3.90625e-07, + "model_forward_time": 0.02519512176513672, + "step": 256 + }, + { + "epoch": 3.90625e-07, + "step": 256, + "training_step_time": 0.12517976760864258 + }, + { + "epoch": 3.9215087890625e-07, + "model_forward_time": 0.024870872497558594, + "step": 257 + }, + { + "epoch": 3.9215087890625e-07, + "step": 257, + "training_step_time": 0.1068265438079834 + }, + { + "epoch": 3.936767578125e-07, + "model_forward_time": 0.029593944549560547, + "step": 258 + }, + { + "epoch": 3.936767578125e-07, + "step": 258, + "training_step_time": 0.10765361785888672 + }, + { + "epoch": 3.9520263671875e-07, + "model_forward_time": 0.02561044692993164, + "step": 259 + }, + { + "epoch": 3.9520263671875e-07, + "step": 259, + "training_step_time": 0.10733556747436523 + }, + { + "epoch": 3.96728515625e-07, + "grad_norm": 12.873846054077148, + "learning_rate": 1.7333333333333336e-05, + "loss": 1.1058, + "step": 260 + }, + { + "epoch": 3.96728515625e-07, + "model_forward_time": 0.02564263343811035, + "step": 260 + }, + { + "epoch": 3.96728515625e-07, + "step": 260, + "training_step_time": 0.1722714900970459 + }, + { + "epoch": 3.9825439453125e-07, + "model_forward_time": 0.024820327758789062, + "step": 261 + }, + { + "epoch": 3.9825439453125e-07, + "step": 261, + "training_step_time": 0.10611677169799805 + }, + { + "epoch": 3.997802734375e-07, + "model_forward_time": 0.025095462799072266, + "step": 262 + }, + { + "epoch": 3.997802734375e-07, + "step": 262, + "training_step_time": 0.10972094535827637 + }, + { + "epoch": 4.0130615234375e-07, + "model_forward_time": 0.026111125946044922, + "step": 263 + }, + { + "epoch": 4.0130615234375e-07, + "step": 263, + "training_step_time": 0.1816272735595703 + }, + { + "epoch": 4.0283203125e-07, + "model_forward_time": 0.0251309871673584, + "step": 264 + }, + { + "epoch": 4.0283203125e-07, + "step": 264, + "training_step_time": 0.12833762168884277 + }, + { + "epoch": 4.0435791015625e-07, + "model_forward_time": 0.026359081268310547, + "step": 265 + }, + { + "epoch": 4.0435791015625e-07, + "step": 265, + "training_step_time": 0.11410975456237793 + }, + { + "epoch": 4.058837890625e-07, + "model_forward_time": 0.026052474975585938, + "step": 266 + }, + { + "epoch": 4.058837890625e-07, + "step": 266, + "training_step_time": 0.10692667961120605 + }, + { + "epoch": 4.0740966796875e-07, + "model_forward_time": 0.02548813819885254, + "step": 267 + }, + { + "epoch": 4.0740966796875e-07, + "step": 267, + "training_step_time": 0.19517922401428223 + }, + { + "epoch": 4.08935546875e-07, + "model_forward_time": 0.02492523193359375, + "step": 268 + }, + { + "epoch": 4.08935546875e-07, + "step": 268, + "training_step_time": 0.10407400131225586 + }, + { + "epoch": 4.1046142578125e-07, + "model_forward_time": 0.02457261085510254, + "step": 269 + }, + { + "epoch": 4.1046142578125e-07, + "step": 269, + "training_step_time": 0.10704469680786133 + }, + { + "epoch": 4.119873046875e-07, + "grad_norm": 2.260190010070801, + "learning_rate": 1.8e-05, + "loss": 1.1271, + "step": 270 + }, + { + "epoch": 4.119873046875e-07, + "model_forward_time": 0.025186777114868164, + "step": 270 + }, + { + "epoch": 4.119873046875e-07, + "step": 270, + "training_step_time": 0.10310244560241699 + }, + { + "epoch": 4.1351318359375e-07, + "model_forward_time": 0.025582551956176758, + "step": 271 + }, + { + "epoch": 4.1351318359375e-07, + "step": 271, + "training_step_time": 0.10373687744140625 + }, + { + "epoch": 4.150390625e-07, + "model_forward_time": 0.025163650512695312, + "step": 272 + }, + { + "epoch": 4.150390625e-07, + "step": 272, + "training_step_time": 0.10535073280334473 + }, + { + "epoch": 4.1656494140625e-07, + "model_forward_time": 0.02567458152770996, + "step": 273 + }, + { + "epoch": 4.1656494140625e-07, + "step": 273, + "training_step_time": 0.10604214668273926 + }, + { + "epoch": 4.180908203125e-07, + "model_forward_time": 0.02625131607055664, + "step": 274 + }, + { + "epoch": 4.180908203125e-07, + "step": 274, + "training_step_time": 0.10631108283996582 + }, + { + "epoch": 4.1961669921875e-07, + "model_forward_time": 0.025867223739624023, + "step": 275 + }, + { + "epoch": 4.1961669921875e-07, + "step": 275, + "training_step_time": 0.10478878021240234 + }, + { + "epoch": 4.21142578125e-07, + "model_forward_time": 0.0256192684173584, + "step": 276 + }, + { + "epoch": 4.21142578125e-07, + "step": 276, + "training_step_time": 0.10482239723205566 + }, + { + "epoch": 4.2266845703125e-07, + "model_forward_time": 0.025732755661010742, + "step": 277 + }, + { + "epoch": 4.2266845703125e-07, + "step": 277, + "training_step_time": 0.10580730438232422 + }, + { + "epoch": 4.241943359375e-07, + "model_forward_time": 0.02573704719543457, + "step": 278 + }, + { + "epoch": 4.241943359375e-07, + "step": 278, + "training_step_time": 0.10732054710388184 + }, + { + "epoch": 4.2572021484375e-07, + "model_forward_time": 0.025740385055541992, + "step": 279 + }, + { + "epoch": 4.2572021484375e-07, + "step": 279, + "training_step_time": 0.10444331169128418 + }, + { + "epoch": 4.2724609375e-07, + "grad_norm": 2.123913288116455, + "learning_rate": 1.866666666666667e-05, + "loss": 1.0774, + "step": 280 + }, + { + "epoch": 4.2724609375e-07, + "model_forward_time": 0.025602102279663086, + "step": 280 + }, + { + "epoch": 4.2724609375e-07, + "step": 280, + "training_step_time": 0.10497379302978516 + }, + { + "epoch": 4.2877197265625e-07, + "model_forward_time": 0.025228023529052734, + "step": 281 + }, + { + "epoch": 4.2877197265625e-07, + "step": 281, + "training_step_time": 0.10668349266052246 + }, + { + "epoch": 4.302978515625e-07, + "model_forward_time": 0.02588510513305664, + "step": 282 + }, + { + "epoch": 4.302978515625e-07, + "step": 282, + "training_step_time": 0.11024165153503418 + }, + { + "epoch": 4.3182373046875e-07, + "model_forward_time": 0.02536606788635254, + "step": 283 + }, + { + "epoch": 4.3182373046875e-07, + "step": 283, + "training_step_time": 0.10803508758544922 + }, + { + "epoch": 4.33349609375e-07, + "model_forward_time": 0.025258541107177734, + "step": 284 + }, + { + "epoch": 4.33349609375e-07, + "step": 284, + "training_step_time": 0.10373044013977051 + }, + { + "epoch": 4.3487548828125e-07, + "model_forward_time": 0.025148630142211914, + "step": 285 + }, + { + "epoch": 4.3487548828125e-07, + "step": 285, + "training_step_time": 0.1060020923614502 + }, + { + "epoch": 4.364013671875e-07, + "model_forward_time": 0.025150537490844727, + "step": 286 + }, + { + "epoch": 4.364013671875e-07, + "step": 286, + "training_step_time": 0.10913610458374023 + }, + { + "epoch": 4.3792724609375e-07, + "model_forward_time": 0.025230884552001953, + "step": 287 + }, + { + "epoch": 4.3792724609375e-07, + "step": 287, + "training_step_time": 0.10769248008728027 + }, + { + "epoch": 4.39453125e-07, + "model_forward_time": 0.02524399757385254, + "step": 288 + }, + { + "epoch": 4.39453125e-07, + "step": 288, + "training_step_time": 0.11211633682250977 + }, + { + "epoch": 4.4097900390625e-07, + "model_forward_time": 0.025160551071166992, + "step": 289 + }, + { + "epoch": 4.4097900390625e-07, + "step": 289, + "training_step_time": 0.10717606544494629 + }, + { + "epoch": 4.425048828125e-07, + "grad_norm": 1.9012972116470337, + "learning_rate": 1.9333333333333333e-05, + "loss": 1.0857, + "step": 290 + }, + { + "epoch": 4.425048828125e-07, + "model_forward_time": 0.025286436080932617, + "step": 290 + }, + { + "epoch": 4.425048828125e-07, + "step": 290, + "training_step_time": 0.1056680679321289 + }, + { + "epoch": 4.4403076171875e-07, + "model_forward_time": 0.025116682052612305, + "step": 291 + }, + { + "epoch": 4.4403076171875e-07, + "step": 291, + "training_step_time": 0.1090090274810791 + }, + { + "epoch": 4.45556640625e-07, + "model_forward_time": 0.025147199630737305, + "step": 292 + }, + { + "epoch": 4.45556640625e-07, + "step": 292, + "training_step_time": 0.20943307876586914 + }, + { + "epoch": 4.4708251953125e-07, + "model_forward_time": 0.025385379791259766, + "step": 293 + }, + { + "epoch": 4.4708251953125e-07, + "step": 293, + "training_step_time": 0.15311980247497559 + }, + { + "epoch": 4.486083984375e-07, + "model_forward_time": 0.025290489196777344, + "step": 294 + }, + { + "epoch": 4.486083984375e-07, + "step": 294, + "training_step_time": 0.17037463188171387 + }, + { + "epoch": 4.5013427734375e-07, + "model_forward_time": 0.025030851364135742, + "step": 295 + }, + { + "epoch": 4.5013427734375e-07, + "step": 295, + "training_step_time": 0.16298389434814453 + }, + { + "epoch": 4.5166015625e-07, + "model_forward_time": 0.02533555030822754, + "step": 296 + }, + { + "epoch": 4.5166015625e-07, + "step": 296, + "training_step_time": 0.11112737655639648 + }, + { + "epoch": 4.5318603515625e-07, + "model_forward_time": 0.024981260299682617, + "step": 297 + }, + { + "epoch": 4.5318603515625e-07, + "step": 297, + "training_step_time": 0.10686969757080078 + }, + { + "epoch": 4.547119140625e-07, + "model_forward_time": 0.02520132064819336, + "step": 298 + }, + { + "epoch": 4.547119140625e-07, + "step": 298, + "training_step_time": 0.10563445091247559 + }, + { + "epoch": 4.5623779296875e-07, + "model_forward_time": 0.025437355041503906, + "step": 299 + }, + { + "epoch": 4.5623779296875e-07, + "step": 299, + "training_step_time": 0.1054999828338623 + }, + { + "epoch": 4.57763671875e-07, + "grad_norm": 1.348624587059021, + "learning_rate": 2e-05, + "loss": 1.0482, + "step": 300 + }, + { + "epoch": 4.57763671875e-07, + "model_forward_time": 0.025567054748535156, + "step": 300 + }, + { + "epoch": 4.57763671875e-07, + "step": 300, + "training_step_time": 0.10553264617919922 + }, + { + "epoch": 4.5928955078125e-07, + "model_forward_time": 0.025410175323486328, + "step": 301 + }, + { + "epoch": 4.5928955078125e-07, + "step": 301, + "training_step_time": 0.11336088180541992 + }, + { + "epoch": 4.608154296875e-07, + "model_forward_time": 0.025510311126708984, + "step": 302 + }, + { + "epoch": 4.608154296875e-07, + "step": 302, + "training_step_time": 0.10550570487976074 + }, + { + "epoch": 4.6234130859375e-07, + "model_forward_time": 0.0255124568939209, + "step": 303 + }, + { + "epoch": 4.6234130859375e-07, + "step": 303, + "training_step_time": 0.12787961959838867 + }, + { + "epoch": 4.638671875e-07, + "model_forward_time": 0.0253751277923584, + "step": 304 + }, + { + "epoch": 4.638671875e-07, + "step": 304, + "training_step_time": 0.10251665115356445 + }, + { + "epoch": 4.6539306640625e-07, + "model_forward_time": 0.025760173797607422, + "step": 305 + }, + { + "epoch": 4.6539306640625e-07, + "step": 305, + "training_step_time": 0.10862398147583008 + }, + { + "epoch": 4.669189453125e-07, + "model_forward_time": 0.02561497688293457, + "step": 306 + }, + { + "epoch": 4.669189453125e-07, + "step": 306, + "training_step_time": 0.10998392105102539 + }, + { + "epoch": 4.6844482421875e-07, + "model_forward_time": 0.025400400161743164, + "step": 307 + }, + { + "epoch": 4.6844482421875e-07, + "step": 307, + "training_step_time": 0.16315722465515137 + }, + { + "epoch": 4.69970703125e-07, + "model_forward_time": 0.024495601654052734, + "step": 308 + }, + { + "epoch": 4.69970703125e-07, + "step": 308, + "training_step_time": 0.10726475715637207 + }, + { + "epoch": 4.7149658203125e-07, + "model_forward_time": 0.024821758270263672, + "step": 309 + }, + { + "epoch": 4.7149658203125e-07, + "step": 309, + "training_step_time": 0.10771346092224121 + }, + { + "epoch": 4.730224609375e-07, + "grad_norm": 1.6407850980758667, + "learning_rate": 2.0666666666666666e-05, + "loss": 1.0763, + "step": 310 + }, + { + "epoch": 4.730224609375e-07, + "model_forward_time": 0.024608850479125977, + "step": 310 + }, + { + "epoch": 4.730224609375e-07, + "step": 310, + "training_step_time": 0.10258984565734863 + }, + { + "epoch": 4.7454833984375e-07, + "model_forward_time": 0.0257415771484375, + "step": 311 + }, + { + "epoch": 4.7454833984375e-07, + "step": 311, + "training_step_time": 0.10520243644714355 + }, + { + "epoch": 4.7607421875e-07, + "model_forward_time": 0.025485754013061523, + "step": 312 + }, + { + "epoch": 4.7607421875e-07, + "step": 312, + "training_step_time": 0.21076297760009766 + }, + { + "epoch": 4.7760009765625e-07, + "model_forward_time": 0.02455306053161621, + "step": 313 + }, + { + "epoch": 4.7760009765625e-07, + "step": 313, + "training_step_time": 0.10784077644348145 + }, + { + "epoch": 4.791259765625e-07, + "model_forward_time": 0.02463364601135254, + "step": 314 + }, + { + "epoch": 4.791259765625e-07, + "step": 314, + "training_step_time": 0.10654139518737793 + }, + { + "epoch": 4.8065185546875e-07, + "model_forward_time": 0.025342464447021484, + "step": 315 + }, + { + "epoch": 4.8065185546875e-07, + "step": 315, + "training_step_time": 0.20109272003173828 + }, + { + "epoch": 4.82177734375e-07, + "model_forward_time": 0.024034500122070312, + "step": 316 + }, + { + "epoch": 4.82177734375e-07, + "step": 316, + "training_step_time": 0.10872626304626465 + }, + { + "epoch": 4.8370361328125e-07, + "model_forward_time": 0.025923967361450195, + "step": 317 + }, + { + "epoch": 4.8370361328125e-07, + "step": 317, + "training_step_time": 0.10268092155456543 + }, + { + "epoch": 4.852294921875e-07, + "model_forward_time": 0.0257265567779541, + "step": 318 + }, + { + "epoch": 4.852294921875e-07, + "step": 318, + "training_step_time": 0.10426831245422363 + }, + { + "epoch": 4.8675537109375e-07, + "model_forward_time": 0.025304079055786133, + "step": 319 + }, + { + "epoch": 4.8675537109375e-07, + "step": 319, + "training_step_time": 0.10774612426757812 + }, + { + "epoch": 4.8828125e-07, + "grad_norm": 2.499673843383789, + "learning_rate": 2.1333333333333335e-05, + "loss": 1.072, + "step": 320 + }, + { + "epoch": 4.8828125e-07, + "model_forward_time": 0.02578425407409668, + "step": 320 + }, + { + "epoch": 4.8828125e-07, + "step": 320, + "training_step_time": 0.10923504829406738 + }, + { + "epoch": 4.8980712890625e-07, + "model_forward_time": 0.02539992332458496, + "step": 321 + }, + { + "epoch": 4.8980712890625e-07, + "step": 321, + "training_step_time": 0.11181998252868652 + }, + { + "epoch": 4.913330078125e-07, + "model_forward_time": 0.025756359100341797, + "step": 322 + }, + { + "epoch": 4.913330078125e-07, + "step": 322, + "training_step_time": 0.1070413589477539 + }, + { + "epoch": 4.9285888671875e-07, + "model_forward_time": 0.027256488800048828, + "step": 323 + }, + { + "epoch": 4.9285888671875e-07, + "step": 323, + "training_step_time": 0.11181211471557617 + }, + { + "epoch": 4.94384765625e-07, + "model_forward_time": 0.025214672088623047, + "step": 324 + }, + { + "epoch": 4.94384765625e-07, + "step": 324, + "training_step_time": 0.10771632194519043 + }, + { + "epoch": 4.9591064453125e-07, + "model_forward_time": 0.025455474853515625, + "step": 325 + }, + { + "epoch": 4.9591064453125e-07, + "step": 325, + "training_step_time": 0.10553407669067383 + }, + { + "epoch": 4.974365234375e-07, + "model_forward_time": 0.026754140853881836, + "step": 326 + }, + { + "epoch": 4.974365234375e-07, + "step": 326, + "training_step_time": 0.10756516456604004 + }, + { + "epoch": 4.9896240234375e-07, + "model_forward_time": 0.025522232055664062, + "step": 327 + }, + { + "epoch": 4.9896240234375e-07, + "step": 327, + "training_step_time": 0.10539746284484863 + }, + { + "epoch": 5.0048828125e-07, + "model_forward_time": 0.02558135986328125, + "step": 328 + }, + { + "epoch": 5.0048828125e-07, + "step": 328, + "training_step_time": 0.10802626609802246 + }, + { + "epoch": 5.0201416015625e-07, + "model_forward_time": 0.024978160858154297, + "step": 329 + }, + { + "epoch": 5.0201416015625e-07, + "step": 329, + "training_step_time": 0.10753297805786133 + }, + { + "epoch": 5.035400390625e-07, + "grad_norm": 1.2639737129211426, + "learning_rate": 2.2000000000000003e-05, + "loss": 1.05, + "step": 330 + }, + { + "epoch": 5.035400390625e-07, + "model_forward_time": 0.02574777603149414, + "step": 330 + }, + { + "epoch": 5.035400390625e-07, + "step": 330, + "training_step_time": 0.10523700714111328 + }, + { + "epoch": 5.0506591796875e-07, + "model_forward_time": 0.025259733200073242, + "step": 331 + }, + { + "epoch": 5.0506591796875e-07, + "step": 331, + "training_step_time": 0.10706496238708496 + }, + { + "epoch": 5.06591796875e-07, + "model_forward_time": 0.025692224502563477, + "step": 332 + }, + { + "epoch": 5.06591796875e-07, + "step": 332, + "training_step_time": 0.1057133674621582 + }, + { + "epoch": 5.0811767578125e-07, + "model_forward_time": 0.025745868682861328, + "step": 333 + }, + { + "epoch": 5.0811767578125e-07, + "step": 333, + "training_step_time": 0.10755729675292969 + }, + { + "epoch": 5.096435546875e-07, + "model_forward_time": 0.024929046630859375, + "step": 334 + }, + { + "epoch": 5.096435546875e-07, + "step": 334, + "training_step_time": 0.10422444343566895 + }, + { + "epoch": 5.1116943359375e-07, + "model_forward_time": 0.025197267532348633, + "step": 335 + }, + { + "epoch": 5.1116943359375e-07, + "step": 335, + "training_step_time": 0.10595965385437012 + }, + { + "epoch": 5.126953125e-07, + "model_forward_time": 0.02550029754638672, + "step": 336 + }, + { + "epoch": 5.126953125e-07, + "step": 336, + "training_step_time": 0.10481834411621094 + }, + { + "epoch": 5.1422119140625e-07, + "model_forward_time": 0.02560877799987793, + "step": 337 + }, + { + "epoch": 5.1422119140625e-07, + "step": 337, + "training_step_time": 0.11575055122375488 + }, + { + "epoch": 5.157470703125e-07, + "model_forward_time": 0.02524399757385254, + "step": 338 + }, + { + "epoch": 5.157470703125e-07, + "step": 338, + "training_step_time": 0.1834111213684082 + }, + { + "epoch": 5.1727294921875e-07, + "model_forward_time": 0.02512645721435547, + "step": 339 + }, + { + "epoch": 5.1727294921875e-07, + "step": 339, + "training_step_time": 0.12683677673339844 + }, + { + "epoch": 5.18798828125e-07, + "grad_norm": 1.6825470924377441, + "learning_rate": 2.2666666666666668e-05, + "loss": 1.05, + "step": 340 + }, + { + "epoch": 5.18798828125e-07, + "model_forward_time": 0.025676727294921875, + "step": 340 + }, + { + "epoch": 5.18798828125e-07, + "step": 340, + "training_step_time": 0.14236116409301758 + }, + { + "epoch": 5.2032470703125e-07, + "model_forward_time": 0.02499103546142578, + "step": 341 + }, + { + "epoch": 5.2032470703125e-07, + "step": 341, + "training_step_time": 0.18015098571777344 + }, + { + "epoch": 5.218505859375e-07, + "model_forward_time": 0.027096033096313477, + "step": 342 + }, + { + "epoch": 5.218505859375e-07, + "step": 342, + "training_step_time": 0.18549227714538574 + }, + { + "epoch": 5.2337646484375e-07, + "model_forward_time": 0.024728059768676758, + "step": 343 + }, + { + "epoch": 5.2337646484375e-07, + "step": 343, + "training_step_time": 0.18679380416870117 + }, + { + "epoch": 5.2490234375e-07, + "model_forward_time": 0.0243222713470459, + "step": 344 + }, + { + "epoch": 5.2490234375e-07, + "step": 344, + "training_step_time": 0.1028437614440918 + }, + { + "epoch": 5.2642822265625e-07, + "model_forward_time": 0.02447366714477539, + "step": 345 + }, + { + "epoch": 5.2642822265625e-07, + "step": 345, + "training_step_time": 0.10272026062011719 + }, + { + "epoch": 5.279541015625e-07, + "model_forward_time": 0.025460243225097656, + "step": 346 + }, + { + "epoch": 5.279541015625e-07, + "step": 346, + "training_step_time": 0.10613131523132324 + }, + { + "epoch": 5.2947998046875e-07, + "model_forward_time": 0.02540898323059082, + "step": 347 + }, + { + "epoch": 5.2947998046875e-07, + "step": 347, + "training_step_time": 0.10527825355529785 + }, + { + "epoch": 5.31005859375e-07, + "model_forward_time": 0.02550983428955078, + "step": 348 + }, + { + "epoch": 5.31005859375e-07, + "step": 348, + "training_step_time": 0.11838769912719727 + }, + { + "epoch": 5.3253173828125e-07, + "model_forward_time": 0.025302886962890625, + "step": 349 + }, + { + "epoch": 5.3253173828125e-07, + "step": 349, + "training_step_time": 0.12597060203552246 + }, + { + "epoch": 5.340576171875e-07, + "grad_norm": 2.2448384761810303, + "learning_rate": 2.3333333333333336e-05, + "loss": 1.0383, + "step": 350 + }, + { + "epoch": 5.340576171875e-07, + "model_forward_time": 0.025449514389038086, + "step": 350 + }, + { + "epoch": 5.340576171875e-07, + "step": 350, + "training_step_time": 0.10931801795959473 + }, + { + "epoch": 5.3558349609375e-07, + "model_forward_time": 0.025323152542114258, + "step": 351 + }, + { + "epoch": 5.3558349609375e-07, + "step": 351, + "training_step_time": 0.20163488388061523 + }, + { + "epoch": 5.37109375e-07, + "model_forward_time": 0.025542020797729492, + "step": 352 + }, + { + "epoch": 5.37109375e-07, + "step": 352, + "training_step_time": 0.16962862014770508 + }, + { + "epoch": 5.3863525390625e-07, + "model_forward_time": 0.025031566619873047, + "step": 353 + }, + { + "epoch": 5.3863525390625e-07, + "step": 353, + "training_step_time": 0.11136651039123535 + }, + { + "epoch": 5.401611328125e-07, + "model_forward_time": 0.024865388870239258, + "step": 354 + }, + { + "epoch": 5.401611328125e-07, + "step": 354, + "training_step_time": 0.10259413719177246 + }, + { + "epoch": 5.4168701171875e-07, + "model_forward_time": 0.025754451751708984, + "step": 355 + }, + { + "epoch": 5.4168701171875e-07, + "step": 355, + "training_step_time": 0.10812711715698242 + }, + { + "epoch": 5.43212890625e-07, + "model_forward_time": 0.025992870330810547, + "step": 356 + }, + { + "epoch": 5.43212890625e-07, + "step": 356, + "training_step_time": 0.10624837875366211 + }, + { + "epoch": 5.4473876953125e-07, + "model_forward_time": 0.029392719268798828, + "step": 357 + }, + { + "epoch": 5.4473876953125e-07, + "step": 357, + "training_step_time": 0.16276955604553223 + }, + { + "epoch": 5.462646484375e-07, + "model_forward_time": 0.02474355697631836, + "step": 358 + }, + { + "epoch": 5.462646484375e-07, + "step": 358, + "training_step_time": 0.16207194328308105 + }, + { + "epoch": 5.4779052734375e-07, + "model_forward_time": 0.02451801300048828, + "step": 359 + }, + { + "epoch": 5.4779052734375e-07, + "step": 359, + "training_step_time": 0.10290765762329102 + }, + { + "epoch": 5.4931640625e-07, + "grad_norm": 3.320141315460205, + "learning_rate": 2.4e-05, + "loss": 0.971, + "step": 360 + }, + { + "epoch": 5.4931640625e-07, + "model_forward_time": 0.025188207626342773, + "step": 360 + }, + { + "epoch": 5.4931640625e-07, + "step": 360, + "training_step_time": 0.20246171951293945 + }, + { + "epoch": 5.5084228515625e-07, + "model_forward_time": 0.025125980377197266, + "step": 361 + }, + { + "epoch": 5.5084228515625e-07, + "step": 361, + "training_step_time": 0.11040949821472168 + }, + { + "epoch": 5.523681640625e-07, + "model_forward_time": 0.024952173233032227, + "step": 362 + }, + { + "epoch": 5.523681640625e-07, + "step": 362, + "training_step_time": 0.10251593589782715 + }, + { + "epoch": 5.5389404296875e-07, + "model_forward_time": 0.025482654571533203, + "step": 363 + }, + { + "epoch": 5.5389404296875e-07, + "step": 363, + "training_step_time": 0.10642600059509277 + }, + { + "epoch": 5.55419921875e-07, + "model_forward_time": 0.026999473571777344, + "step": 364 + }, + { + "epoch": 5.55419921875e-07, + "step": 364, + "training_step_time": 0.10745072364807129 + }, + { + "epoch": 5.5694580078125e-07, + "model_forward_time": 0.02547168731689453, + "step": 365 + }, + { + "epoch": 5.5694580078125e-07, + "step": 365, + "training_step_time": 0.10812687873840332 + }, + { + "epoch": 5.584716796875e-07, + "model_forward_time": 0.025760173797607422, + "step": 366 + }, + { + "epoch": 5.584716796875e-07, + "step": 366, + "training_step_time": 0.1073451042175293 + }, + { + "epoch": 5.5999755859375e-07, + "model_forward_time": 0.025269508361816406, + "step": 367 + }, + { + "epoch": 5.5999755859375e-07, + "step": 367, + "training_step_time": 0.1066281795501709 + }, + { + "epoch": 5.615234375e-07, + "model_forward_time": 0.025445938110351562, + "step": 368 + }, + { + "epoch": 5.615234375e-07, + "step": 368, + "training_step_time": 0.10691142082214355 + }, + { + "epoch": 5.6304931640625e-07, + "model_forward_time": 0.025239229202270508, + "step": 369 + }, + { + "epoch": 5.6304931640625e-07, + "step": 369, + "training_step_time": 0.11104178428649902 + }, + { + "epoch": 5.645751953125e-07, + "grad_norm": 2.2641561031341553, + "learning_rate": 2.466666666666667e-05, + "loss": 0.9609, + "step": 370 + }, + { + "epoch": 5.645751953125e-07, + "model_forward_time": 0.025769710540771484, + "step": 370 + }, + { + "epoch": 5.645751953125e-07, + "step": 370, + "training_step_time": 0.11025595664978027 + }, + { + "epoch": 5.6610107421875e-07, + "model_forward_time": 0.025458097457885742, + "step": 371 + }, + { + "epoch": 5.6610107421875e-07, + "step": 371, + "training_step_time": 0.10838937759399414 + }, + { + "epoch": 5.67626953125e-07, + "model_forward_time": 0.025336265563964844, + "step": 372 + }, + { + "epoch": 5.67626953125e-07, + "step": 372, + "training_step_time": 0.10942554473876953 + }, + { + "epoch": 5.6915283203125e-07, + "model_forward_time": 0.026434898376464844, + "step": 373 + }, + { + "epoch": 5.6915283203125e-07, + "step": 373, + "training_step_time": 0.10899901390075684 + }, + { + "epoch": 5.706787109375e-07, + "model_forward_time": 0.02594923973083496, + "step": 374 + }, + { + "epoch": 5.706787109375e-07, + "step": 374, + "training_step_time": 0.1111440658569336 + }, + { + "epoch": 5.7220458984375e-07, + "model_forward_time": 0.025658845901489258, + "step": 375 + }, + { + "epoch": 5.7220458984375e-07, + "step": 375, + "training_step_time": 0.1086115837097168 + }, + { + "epoch": 5.7373046875e-07, + "model_forward_time": 0.025714874267578125, + "step": 376 + }, + { + "epoch": 5.7373046875e-07, + "step": 376, + "training_step_time": 0.11087250709533691 + }, + { + "epoch": 5.7525634765625e-07, + "model_forward_time": 0.02591109275817871, + "step": 377 + }, + { + "epoch": 5.7525634765625e-07, + "step": 377, + "training_step_time": 0.11223721504211426 + }, + { + "epoch": 5.767822265625e-07, + "model_forward_time": 0.026454687118530273, + "step": 378 + }, + { + "epoch": 5.767822265625e-07, + "step": 378, + "training_step_time": 0.10787200927734375 + }, + { + "epoch": 5.7830810546875e-07, + "model_forward_time": 0.025312423706054688, + "step": 379 + }, + { + "epoch": 5.7830810546875e-07, + "step": 379, + "training_step_time": 0.11315321922302246 + }, + { + "epoch": 5.79833984375e-07, + "grad_norm": 3.570995330810547, + "learning_rate": 2.5333333333333337e-05, + "loss": 0.8392, + "step": 380 + }, + { + "epoch": 5.79833984375e-07, + "model_forward_time": 0.02517867088317871, + "step": 380 + }, + { + "epoch": 5.79833984375e-07, + "step": 380, + "training_step_time": 0.10422563552856445 + }, + { + "epoch": 5.8135986328125e-07, + "model_forward_time": 0.0268707275390625, + "step": 381 + }, + { + "epoch": 5.8135986328125e-07, + "step": 381, + "training_step_time": 0.14196085929870605 + }, + { + "epoch": 5.828857421875e-07, + "model_forward_time": 0.025556564331054688, + "step": 382 + }, + { + "epoch": 5.828857421875e-07, + "step": 382, + "training_step_time": 0.10823917388916016 + }, + { + "epoch": 5.8441162109375e-07, + "model_forward_time": 0.025725364685058594, + "step": 383 + }, + { + "epoch": 5.8441162109375e-07, + "step": 383, + "training_step_time": 0.19945335388183594 + }, + { + "epoch": 5.859375e-07, + "model_forward_time": 0.024982213973999023, + "step": 384 + }, + { + "epoch": 5.859375e-07, + "step": 384, + "training_step_time": 0.14513134956359863 + }, + { + "epoch": 5.8746337890625e-07, + "model_forward_time": 0.025201797485351562, + "step": 385 + }, + { + "epoch": 5.8746337890625e-07, + "step": 385, + "training_step_time": 0.1823711395263672 + }, + { + "epoch": 5.889892578125e-07, + "model_forward_time": 0.0253298282623291, + "step": 386 + }, + { + "epoch": 5.889892578125e-07, + "step": 386, + "training_step_time": 0.10529184341430664 + }, + { + "epoch": 5.9051513671875e-07, + "model_forward_time": 0.024760007858276367, + "step": 387 + }, + { + "epoch": 5.9051513671875e-07, + "step": 387, + "training_step_time": 0.10816597938537598 + }, + { + "epoch": 5.92041015625e-07, + "model_forward_time": 0.025652170181274414, + "step": 388 + }, + { + "epoch": 5.92041015625e-07, + "step": 388, + "training_step_time": 0.10868072509765625 + }, + { + "epoch": 5.9356689453125e-07, + "model_forward_time": 0.02540111541748047, + "step": 389 + }, + { + "epoch": 5.9356689453125e-07, + "step": 389, + "training_step_time": 0.11518192291259766 + }, + { + "epoch": 5.950927734375e-07, + "grad_norm": 3.672109365463257, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.7457, + "step": 390 + }, + { + "epoch": 5.950927734375e-07, + "model_forward_time": 0.02523350715637207, + "step": 390 + }, + { + "epoch": 5.950927734375e-07, + "step": 390, + "training_step_time": 0.10444092750549316 + }, + { + "epoch": 5.9661865234375e-07, + "model_forward_time": 0.02541184425354004, + "step": 391 + }, + { + "epoch": 5.9661865234375e-07, + "step": 391, + "training_step_time": 0.1040034294128418 + }, + { + "epoch": 5.9814453125e-07, + "model_forward_time": 0.025331974029541016, + "step": 392 + }, + { + "epoch": 5.9814453125e-07, + "step": 392, + "training_step_time": 0.1037437915802002 + }, + { + "epoch": 5.9967041015625e-07, + "model_forward_time": 0.02550196647644043, + "step": 393 + }, + { + "epoch": 5.9967041015625e-07, + "step": 393, + "training_step_time": 0.15365886688232422 + }, + { + "epoch": 6.011962890625e-07, + "model_forward_time": 0.02547001838684082, + "step": 394 + }, + { + "epoch": 6.011962890625e-07, + "step": 394, + "training_step_time": 0.11852598190307617 + }, + { + "epoch": 6.0272216796875e-07, + "model_forward_time": 0.027571678161621094, + "step": 395 + }, + { + "epoch": 6.0272216796875e-07, + "step": 395, + "training_step_time": 0.11009788513183594 + }, + { + "epoch": 6.04248046875e-07, + "model_forward_time": 0.025635480880737305, + "step": 396 + }, + { + "epoch": 6.04248046875e-07, + "step": 396, + "training_step_time": 0.10553550720214844 + }, + { + "epoch": 6.0577392578125e-07, + "model_forward_time": 0.025054931640625, + "step": 397 + }, + { + "epoch": 6.0577392578125e-07, + "step": 397, + "training_step_time": 0.10789656639099121 + }, + { + "epoch": 6.072998046875e-07, + "model_forward_time": 0.02522754669189453, + "step": 398 + }, + { + "epoch": 6.072998046875e-07, + "step": 398, + "training_step_time": 0.16695785522460938 + }, + { + "epoch": 6.0882568359375e-07, + "model_forward_time": 0.02484440803527832, + "step": 399 + }, + { + "epoch": 6.0882568359375e-07, + "step": 399, + "training_step_time": 0.10576224327087402 + }, + { + "epoch": 6.103515625e-07, + "grad_norm": 4.7100019454956055, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.6335, + "step": 400 + }, + { + "epoch": 6.103515625e-07, + "model_forward_time": 0.025056123733520508, + "step": 400 + }, + { + "epoch": 6.103515625e-07, + "step": 400, + "training_step_time": 0.1040499210357666 + }, + { + "epoch": 6.1187744140625e-07, + "model_forward_time": 0.0249481201171875, + "step": 401 + }, + { + "epoch": 6.1187744140625e-07, + "step": 401, + "training_step_time": 0.10461187362670898 + }, + { + "epoch": 6.134033203125e-07, + "model_forward_time": 0.025114059448242188, + "step": 402 + }, + { + "epoch": 6.134033203125e-07, + "step": 402, + "training_step_time": 0.10811042785644531 + }, + { + "epoch": 6.1492919921875e-07, + "model_forward_time": 0.025277137756347656, + "step": 403 + }, + { + "epoch": 6.1492919921875e-07, + "step": 403, + "training_step_time": 0.19456076622009277 + }, + { + "epoch": 6.16455078125e-07, + "model_forward_time": 0.025078535079956055, + "step": 404 + }, + { + "epoch": 6.16455078125e-07, + "step": 404, + "training_step_time": 0.1071159839630127 + }, + { + "epoch": 6.1798095703125e-07, + "model_forward_time": 0.024881839752197266, + "step": 405 + }, + { + "epoch": 6.1798095703125e-07, + "step": 405, + "training_step_time": 0.10774731636047363 + }, + { + "epoch": 6.195068359375e-07, + "model_forward_time": 0.025405406951904297, + "step": 406 + }, + { + "epoch": 6.195068359375e-07, + "step": 406, + "training_step_time": 0.10582637786865234 + }, + { + "epoch": 6.2103271484375e-07, + "model_forward_time": 0.02497267723083496, + "step": 407 + }, + { + "epoch": 6.2103271484375e-07, + "step": 407, + "training_step_time": 0.20045137405395508 + }, + { + "epoch": 6.2255859375e-07, + "model_forward_time": 0.024927377700805664, + "step": 408 + }, + { + "epoch": 6.2255859375e-07, + "step": 408, + "training_step_time": 0.10573315620422363 + }, + { + "epoch": 6.2408447265625e-07, + "model_forward_time": 0.02510809898376465, + "step": 409 + }, + { + "epoch": 6.2408447265625e-07, + "step": 409, + "training_step_time": 0.10335302352905273 + }, + { + "epoch": 6.256103515625e-07, + "grad_norm": 3.612847089767456, + "learning_rate": 2.733333333333333e-05, + "loss": 0.524, + "step": 410 + }, + { + "epoch": 6.256103515625e-07, + "model_forward_time": 0.025872468948364258, + "step": 410 + }, + { + "epoch": 6.256103515625e-07, + "step": 410, + "training_step_time": 0.10776209831237793 + }, + { + "epoch": 6.2713623046875e-07, + "model_forward_time": 0.025758743286132812, + "step": 411 + }, + { + "epoch": 6.2713623046875e-07, + "step": 411, + "training_step_time": 0.10558581352233887 + }, + { + "epoch": 6.28662109375e-07, + "model_forward_time": 0.02567291259765625, + "step": 412 + }, + { + "epoch": 6.28662109375e-07, + "step": 412, + "training_step_time": 0.1086130142211914 + }, + { + "epoch": 6.3018798828125e-07, + "model_forward_time": 0.025747060775756836, + "step": 413 + }, + { + "epoch": 6.3018798828125e-07, + "step": 413, + "training_step_time": 0.11069107055664062 + }, + { + "epoch": 6.317138671875e-07, + "model_forward_time": 0.02549123764038086, + "step": 414 + }, + { + "epoch": 6.317138671875e-07, + "step": 414, + "training_step_time": 0.10621237754821777 + }, + { + "epoch": 6.3323974609375e-07, + "model_forward_time": 0.02505326271057129, + "step": 415 + }, + { + "epoch": 6.3323974609375e-07, + "step": 415, + "training_step_time": 0.10746026039123535 + }, + { + "epoch": 6.34765625e-07, + "model_forward_time": 0.025353193283081055, + "step": 416 + }, + { + "epoch": 6.34765625e-07, + "step": 416, + "training_step_time": 0.10498046875 + }, + { + "epoch": 6.3629150390625e-07, + "model_forward_time": 0.026362180709838867, + "step": 417 + }, + { + "epoch": 6.3629150390625e-07, + "step": 417, + "training_step_time": 0.10766363143920898 + }, + { + "epoch": 6.378173828125e-07, + "model_forward_time": 0.025040388107299805, + "step": 418 + }, + { + "epoch": 6.378173828125e-07, + "step": 418, + "training_step_time": 0.10718846321105957 + }, + { + "epoch": 6.3934326171875e-07, + "model_forward_time": 0.025144577026367188, + "step": 419 + }, + { + "epoch": 6.3934326171875e-07, + "step": 419, + "training_step_time": 0.10942196846008301 + }, + { + "epoch": 6.40869140625e-07, + "grad_norm": 4.447332859039307, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.4736, + "step": 420 + }, + { + "epoch": 6.40869140625e-07, + "model_forward_time": 0.025609254837036133, + "step": 420 + }, + { + "epoch": 6.40869140625e-07, + "step": 420, + "training_step_time": 0.11010408401489258 + }, + { + "epoch": 6.4239501953125e-07, + "model_forward_time": 0.02518439292907715, + "step": 421 + }, + { + "epoch": 6.4239501953125e-07, + "step": 421, + "training_step_time": 0.10423970222473145 + }, + { + "epoch": 6.439208984375e-07, + "model_forward_time": 0.0253753662109375, + "step": 422 + }, + { + "epoch": 6.439208984375e-07, + "step": 422, + "training_step_time": 0.10927891731262207 + }, + { + "epoch": 6.4544677734375e-07, + "model_forward_time": 0.025458335876464844, + "step": 423 + }, + { + "epoch": 6.4544677734375e-07, + "step": 423, + "training_step_time": 0.10473871231079102 + }, + { + "epoch": 6.4697265625e-07, + "model_forward_time": 0.0249636173248291, + "step": 424 + }, + { + "epoch": 6.4697265625e-07, + "step": 424, + "training_step_time": 0.10613298416137695 + }, + { + "epoch": 6.4849853515625e-07, + "model_forward_time": 0.025556564331054688, + "step": 425 + }, + { + "epoch": 6.4849853515625e-07, + "step": 425, + "training_step_time": 0.10716867446899414 + }, + { + "epoch": 6.500244140625e-07, + "model_forward_time": 0.0252993106842041, + "step": 426 + }, + { + "epoch": 6.500244140625e-07, + "step": 426, + "training_step_time": 0.10672450065612793 + }, + { + "epoch": 6.5155029296875e-07, + "model_forward_time": 0.025406837463378906, + "step": 427 + }, + { + "epoch": 6.5155029296875e-07, + "step": 427, + "training_step_time": 0.10779905319213867 + }, + { + "epoch": 6.53076171875e-07, + "model_forward_time": 0.025732994079589844, + "step": 428 + }, + { + "epoch": 6.53076171875e-07, + "step": 428, + "training_step_time": 0.11041879653930664 + }, + { + "epoch": 6.5460205078125e-07, + "model_forward_time": 0.025339603424072266, + "step": 429 + }, + { + "epoch": 6.5460205078125e-07, + "step": 429, + "training_step_time": 0.10991930961608887 + }, + { + "epoch": 6.561279296875e-07, + "grad_norm": 5.554104328155518, + "learning_rate": 2.8666666666666668e-05, + "loss": 0.4451, + "step": 430 + }, + { + "epoch": 6.561279296875e-07, + "model_forward_time": 0.02515697479248047, + "step": 430 + }, + { + "epoch": 6.561279296875e-07, + "step": 430, + "training_step_time": 0.17908143997192383 + }, + { + "epoch": 6.5765380859375e-07, + "model_forward_time": 0.02454686164855957, + "step": 431 + }, + { + "epoch": 6.5765380859375e-07, + "step": 431, + "training_step_time": 0.1193380355834961 + }, + { + "epoch": 6.591796875e-07, + "model_forward_time": 0.024424076080322266, + "step": 432 + }, + { + "epoch": 6.591796875e-07, + "step": 432, + "training_step_time": 0.11492109298706055 + }, + { + "epoch": 6.6070556640625e-07, + "model_forward_time": 0.025267362594604492, + "step": 433 + }, + { + "epoch": 6.6070556640625e-07, + "step": 433, + "training_step_time": 0.21300530433654785 + }, + { + "epoch": 6.622314453125e-07, + "model_forward_time": 0.024451732635498047, + "step": 434 + }, + { + "epoch": 6.622314453125e-07, + "step": 434, + "training_step_time": 0.10646843910217285 + }, + { + "epoch": 6.6375732421875e-07, + "model_forward_time": 0.024306058883666992, + "step": 435 + }, + { + "epoch": 6.6375732421875e-07, + "step": 435, + "training_step_time": 0.10809659957885742 + }, + { + "epoch": 6.65283203125e-07, + "model_forward_time": 0.025240421295166016, + "step": 436 + }, + { + "epoch": 6.65283203125e-07, + "step": 436, + "training_step_time": 0.10815548896789551 + }, + { + "epoch": 6.6680908203125e-07, + "model_forward_time": 0.025450706481933594, + "step": 437 + }, + { + "epoch": 6.6680908203125e-07, + "step": 437, + "training_step_time": 0.10820126533508301 + }, + { + "epoch": 6.683349609375e-07, + "model_forward_time": 0.026177406311035156, + "step": 438 + }, + { + "epoch": 6.683349609375e-07, + "step": 438, + "training_step_time": 0.10497260093688965 + }, + { + "epoch": 6.6986083984375e-07, + "model_forward_time": 0.025262832641601562, + "step": 439 + }, + { + "epoch": 6.6986083984375e-07, + "step": 439, + "training_step_time": 0.10571670532226562 + }, + { + "epoch": 6.7138671875e-07, + "grad_norm": 4.159182548522949, + "learning_rate": 2.9333333333333336e-05, + "loss": 0.3914, + "step": 440 + }, + { + "epoch": 6.7138671875e-07, + "model_forward_time": 0.024928808212280273, + "step": 440 + }, + { + "epoch": 6.7138671875e-07, + "step": 440, + "training_step_time": 0.10728621482849121 + }, + { + "epoch": 6.7291259765625e-07, + "model_forward_time": 0.025130271911621094, + "step": 441 + }, + { + "epoch": 6.7291259765625e-07, + "step": 441, + "training_step_time": 0.11249804496765137 + }, + { + "epoch": 6.744384765625e-07, + "model_forward_time": 0.025495052337646484, + "step": 442 + }, + { + "epoch": 6.744384765625e-07, + "step": 442, + "training_step_time": 0.12575984001159668 + }, + { + "epoch": 6.7596435546875e-07, + "model_forward_time": 0.025611400604248047, + "step": 443 + }, + { + "epoch": 6.7596435546875e-07, + "step": 443, + "training_step_time": 0.1971604824066162 + }, + { + "epoch": 6.77490234375e-07, + "model_forward_time": 0.02467823028564453, + "step": 444 + }, + { + "epoch": 6.77490234375e-07, + "step": 444, + "training_step_time": 0.10952949523925781 + }, + { + "epoch": 6.7901611328125e-07, + "model_forward_time": 0.024302959442138672, + "step": 445 + }, + { + "epoch": 6.7901611328125e-07, + "step": 445, + "training_step_time": 0.1618640422821045 + }, + { + "epoch": 6.805419921875e-07, + "model_forward_time": 0.024413585662841797, + "step": 446 + }, + { + "epoch": 6.805419921875e-07, + "step": 446, + "training_step_time": 0.10695552825927734 + }, + { + "epoch": 6.8206787109375e-07, + "model_forward_time": 0.024196863174438477, + "step": 447 + }, + { + "epoch": 6.8206787109375e-07, + "step": 447, + "training_step_time": 0.10132122039794922 + }, + { + "epoch": 6.8359375e-07, + "model_forward_time": 0.02486586570739746, + "step": 448 + }, + { + "epoch": 6.8359375e-07, + "step": 448, + "training_step_time": 0.10365796089172363 + }, + { + "epoch": 6.8511962890625e-07, + "model_forward_time": 0.025591373443603516, + "step": 449 + }, + { + "epoch": 6.8511962890625e-07, + "step": 449, + "training_step_time": 0.1045384407043457 + }, + { + "epoch": 6.866455078125e-07, + "grad_norm": 3.1207408905029297, + "learning_rate": 3e-05, + "loss": 0.3128, + "step": 450 + }, + { + "epoch": 6.866455078125e-07, + "model_forward_time": 0.024986743927001953, + "step": 450 + }, + { + "epoch": 6.866455078125e-07, + "step": 450, + "training_step_time": 0.1294558048248291 + }, + { + "epoch": 6.8817138671875e-07, + "model_forward_time": 0.024494409561157227, + "step": 451 + }, + { + "epoch": 6.8817138671875e-07, + "step": 451, + "training_step_time": 0.12233781814575195 + }, + { + "epoch": 6.89697265625e-07, + "model_forward_time": 0.025536537170410156, + "step": 452 + }, + { + "epoch": 6.89697265625e-07, + "step": 452, + "training_step_time": 0.1077566146850586 + }, + { + "epoch": 6.9122314453125e-07, + "model_forward_time": 0.025454044342041016, + "step": 453 + }, + { + "epoch": 6.9122314453125e-07, + "step": 453, + "training_step_time": 0.10560798645019531 + }, + { + "epoch": 6.927490234375e-07, + "model_forward_time": 0.02541351318359375, + "step": 454 + }, + { + "epoch": 6.927490234375e-07, + "step": 454, + "training_step_time": 0.19307446479797363 + }, + { + "epoch": 6.9427490234375e-07, + "model_forward_time": 0.024988174438476562, + "step": 455 + }, + { + "epoch": 6.9427490234375e-07, + "step": 455, + "training_step_time": 0.1047065258026123 + }, + { + "epoch": 6.9580078125e-07, + "model_forward_time": 0.024570703506469727, + "step": 456 + }, + { + "epoch": 6.9580078125e-07, + "step": 456, + "training_step_time": 0.10119986534118652 + }, + { + "epoch": 6.9732666015625e-07, + "model_forward_time": 0.024893760681152344, + "step": 457 + }, + { + "epoch": 6.9732666015625e-07, + "step": 457, + "training_step_time": 0.1074984073638916 + }, + { + "epoch": 6.988525390625e-07, + "model_forward_time": 0.02536916732788086, + "step": 458 + }, + { + "epoch": 6.988525390625e-07, + "step": 458, + "training_step_time": 0.11094975471496582 + }, + { + "epoch": 7.0037841796875e-07, + "model_forward_time": 0.024385929107666016, + "step": 459 + }, + { + "epoch": 7.0037841796875e-07, + "step": 459, + "training_step_time": 0.10678339004516602 + }, + { + "epoch": 7.01904296875e-07, + "grad_norm": 1.8441038131713867, + "learning_rate": 3.066666666666667e-05, + "loss": 0.2592, + "step": 460 + }, + { + "epoch": 7.01904296875e-07, + "model_forward_time": 0.024442672729492188, + "step": 460 + }, + { + "epoch": 7.01904296875e-07, + "step": 460, + "training_step_time": 0.11397433280944824 + }, + { + "epoch": 7.0343017578125e-07, + "model_forward_time": 0.02572154998779297, + "step": 461 + }, + { + "epoch": 7.0343017578125e-07, + "step": 461, + "training_step_time": 0.10576844215393066 + }, + { + "epoch": 7.049560546875e-07, + "model_forward_time": 0.025360107421875, + "step": 462 + }, + { + "epoch": 7.049560546875e-07, + "step": 462, + "training_step_time": 0.10760259628295898 + }, + { + "epoch": 7.0648193359375e-07, + "model_forward_time": 0.025515317916870117, + "step": 463 + }, + { + "epoch": 7.0648193359375e-07, + "step": 463, + "training_step_time": 0.10414266586303711 + }, + { + "epoch": 7.080078125e-07, + "model_forward_time": 0.02516627311706543, + "step": 464 + }, + { + "epoch": 7.080078125e-07, + "step": 464, + "training_step_time": 0.1067345142364502 + }, + { + "epoch": 7.0953369140625e-07, + "model_forward_time": 0.025510787963867188, + "step": 465 + }, + { + "epoch": 7.0953369140625e-07, + "step": 465, + "training_step_time": 0.10615801811218262 + }, + { + "epoch": 7.110595703125e-07, + "model_forward_time": 0.02575373649597168, + "step": 466 + }, + { + "epoch": 7.110595703125e-07, + "step": 466, + "training_step_time": 0.11486959457397461 + }, + { + "epoch": 7.1258544921875e-07, + "model_forward_time": 0.02523636817932129, + "step": 467 + }, + { + "epoch": 7.1258544921875e-07, + "step": 467, + "training_step_time": 0.10658001899719238 + }, + { + "epoch": 7.14111328125e-07, + "model_forward_time": 0.025739192962646484, + "step": 468 + }, + { + "epoch": 7.14111328125e-07, + "step": 468, + "training_step_time": 0.10756826400756836 + }, + { + "epoch": 7.1563720703125e-07, + "model_forward_time": 0.025443553924560547, + "step": 469 + }, + { + "epoch": 7.1563720703125e-07, + "step": 469, + "training_step_time": 0.1066594123840332 + }, + { + "epoch": 7.171630859375e-07, + "grad_norm": 2.409151792526245, + "learning_rate": 3.1333333333333334e-05, + "loss": 0.235, + "step": 470 + }, + { + "epoch": 7.171630859375e-07, + "model_forward_time": 0.025129079818725586, + "step": 470 + }, + { + "epoch": 7.171630859375e-07, + "step": 470, + "training_step_time": 0.1112515926361084 + }, + { + "epoch": 7.1868896484375e-07, + "model_forward_time": 0.025244951248168945, + "step": 471 + }, + { + "epoch": 7.1868896484375e-07, + "step": 471, + "training_step_time": 0.10385394096374512 + }, + { + "epoch": 7.2021484375e-07, + "model_forward_time": 0.02536773681640625, + "step": 472 + }, + { + "epoch": 7.2021484375e-07, + "step": 472, + "training_step_time": 0.10448050498962402 + }, + { + "epoch": 7.2174072265625e-07, + "model_forward_time": 0.025127649307250977, + "step": 473 + }, + { + "epoch": 7.2174072265625e-07, + "step": 473, + "training_step_time": 0.10458064079284668 + }, + { + "epoch": 7.232666015625e-07, + "model_forward_time": 0.0251767635345459, + "step": 474 + }, + { + "epoch": 7.232666015625e-07, + "step": 474, + "training_step_time": 0.10782837867736816 + }, + { + "epoch": 7.2479248046875e-07, + "model_forward_time": 0.0252840518951416, + "step": 475 + }, + { + "epoch": 7.2479248046875e-07, + "step": 475, + "training_step_time": 0.10557746887207031 + }, + { + "epoch": 7.26318359375e-07, + "model_forward_time": 0.025723934173583984, + "step": 476 + }, + { + "epoch": 7.26318359375e-07, + "step": 476, + "training_step_time": 0.13434576988220215 + }, + { + "epoch": 7.2784423828125e-07, + "model_forward_time": 0.026075124740600586, + "step": 477 + }, + { + "epoch": 7.2784423828125e-07, + "step": 477, + "training_step_time": 0.10866594314575195 + }, + { + "epoch": 7.293701171875e-07, + "model_forward_time": 0.025175094604492188, + "step": 478 + }, + { + "epoch": 7.293701171875e-07, + "step": 478, + "training_step_time": 0.19896578788757324 + }, + { + "epoch": 7.3089599609375e-07, + "model_forward_time": 0.0244600772857666, + "step": 479 + }, + { + "epoch": 7.3089599609375e-07, + "step": 479, + "training_step_time": 0.1251230239868164 + }, + { + "epoch": 7.32421875e-07, + "grad_norm": 2.1561572551727295, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.2503, + "step": 480 + }, + { + "epoch": 7.32421875e-07, + "model_forward_time": 0.02434682846069336, + "step": 480 + }, + { + "epoch": 7.32421875e-07, + "step": 480, + "training_step_time": 0.19761943817138672 + }, + { + "epoch": 7.3394775390625e-07, + "model_forward_time": 0.027183055877685547, + "step": 481 + }, + { + "epoch": 7.3394775390625e-07, + "step": 481, + "training_step_time": 0.1759166717529297 + }, + { + "epoch": 7.354736328125e-07, + "model_forward_time": 0.024228334426879883, + "step": 482 + }, + { + "epoch": 7.354736328125e-07, + "step": 482, + "training_step_time": 0.12036585807800293 + }, + { + "epoch": 7.3699951171875e-07, + "model_forward_time": 0.024077892303466797, + "step": 483 + }, + { + "epoch": 7.3699951171875e-07, + "step": 483, + "training_step_time": 0.11739826202392578 + }, + { + "epoch": 7.38525390625e-07, + "model_forward_time": 0.025698184967041016, + "step": 484 + }, + { + "epoch": 7.38525390625e-07, + "step": 484, + "training_step_time": 0.10518527030944824 + }, + { + "epoch": 7.4005126953125e-07, + "model_forward_time": 0.025905132293701172, + "step": 485 + }, + { + "epoch": 7.4005126953125e-07, + "step": 485, + "training_step_time": 0.1078939437866211 + }, + { + "epoch": 7.415771484375e-07, + "model_forward_time": 0.02554917335510254, + "step": 486 + }, + { + "epoch": 7.415771484375e-07, + "step": 486, + "training_step_time": 0.11050152778625488 + }, + { + "epoch": 7.4310302734375e-07, + "model_forward_time": 0.025230884552001953, + "step": 487 + }, + { + "epoch": 7.4310302734375e-07, + "step": 487, + "training_step_time": 0.11203527450561523 + }, + { + "epoch": 7.4462890625e-07, + "model_forward_time": 0.02565479278564453, + "step": 488 + }, + { + "epoch": 7.4462890625e-07, + "step": 488, + "training_step_time": 0.10901045799255371 + }, + { + "epoch": 7.4615478515625e-07, + "model_forward_time": 0.025597333908081055, + "step": 489 + }, + { + "epoch": 7.4615478515625e-07, + "step": 489, + "training_step_time": 0.10728049278259277 + }, + { + "epoch": 7.476806640625e-07, + "grad_norm": 2.016129493713379, + "learning_rate": 3.266666666666667e-05, + "loss": 0.263, + "step": 490 + }, + { + "epoch": 7.476806640625e-07, + "model_forward_time": 0.025243043899536133, + "step": 490 + }, + { + "epoch": 7.476806640625e-07, + "step": 490, + "training_step_time": 0.19534730911254883 + }, + { + "epoch": 7.4920654296875e-07, + "model_forward_time": 0.02427816390991211, + "step": 491 + }, + { + "epoch": 7.4920654296875e-07, + "step": 491, + "training_step_time": 0.10170364379882812 + }, + { + "epoch": 7.50732421875e-07, + "model_forward_time": 0.024798870086669922, + "step": 492 + }, + { + "epoch": 7.50732421875e-07, + "step": 492, + "training_step_time": 0.18979692459106445 + }, + { + "epoch": 7.5225830078125e-07, + "model_forward_time": 0.024420499801635742, + "step": 493 + }, + { + "epoch": 7.5225830078125e-07, + "step": 493, + "training_step_time": 0.10238790512084961 + }, + { + "epoch": 7.537841796875e-07, + "model_forward_time": 0.024649620056152344, + "step": 494 + }, + { + "epoch": 7.537841796875e-07, + "step": 494, + "training_step_time": 0.11126470565795898 + }, + { + "epoch": 7.5531005859375e-07, + "model_forward_time": 0.025481700897216797, + "step": 495 + }, + { + "epoch": 7.5531005859375e-07, + "step": 495, + "training_step_time": 0.10887026786804199 + }, + { + "epoch": 7.568359375e-07, + "model_forward_time": 0.026036739349365234, + "step": 496 + }, + { + "epoch": 7.568359375e-07, + "step": 496, + "training_step_time": 0.10697698593139648 + }, + { + "epoch": 7.5836181640625e-07, + "model_forward_time": 0.02568531036376953, + "step": 497 + }, + { + "epoch": 7.5836181640625e-07, + "step": 497, + "training_step_time": 0.20721054077148438 + }, + { + "epoch": 7.598876953125e-07, + "model_forward_time": 0.02572941780090332, + "step": 498 + }, + { + "epoch": 7.598876953125e-07, + "step": 498, + "training_step_time": 0.1076967716217041 + }, + { + "epoch": 7.6141357421875e-07, + "model_forward_time": 0.024526357650756836, + "step": 499 + }, + { + "epoch": 7.6141357421875e-07, + "step": 499, + "training_step_time": 0.10853838920593262 + }, + { + "epoch": 7.62939453125e-07, + "grad_norm": 3.2406880855560303, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2272, + "step": 500 + }, + { + "epoch": 7.62939453125e-07, + "model_forward_time": 0.025294780731201172, + "step": 500 + }, + { + "epoch": 7.62939453125e-07, + "step": 500, + "training_step_time": 0.19763755798339844 + }, + { + "epoch": 7.6446533203125e-07, + "model_forward_time": 0.02438831329345703, + "step": 501 + }, + { + "epoch": 7.6446533203125e-07, + "step": 501, + "training_step_time": 0.1108393669128418 + }, + { + "epoch": 7.659912109375e-07, + "model_forward_time": 0.024283170700073242, + "step": 502 + }, + { + "epoch": 7.659912109375e-07, + "step": 502, + "training_step_time": 0.10459184646606445 + }, + { + "epoch": 7.6751708984375e-07, + "model_forward_time": 0.025375843048095703, + "step": 503 + }, + { + "epoch": 7.6751708984375e-07, + "step": 503, + "training_step_time": 0.10614657402038574 + }, + { + "epoch": 7.6904296875e-07, + "model_forward_time": 0.025356292724609375, + "step": 504 + }, + { + "epoch": 7.6904296875e-07, + "step": 504, + "training_step_time": 0.10770487785339355 + }, + { + "epoch": 7.7056884765625e-07, + "model_forward_time": 0.025294780731201172, + "step": 505 + }, + { + "epoch": 7.7056884765625e-07, + "step": 505, + "training_step_time": 0.10694241523742676 + }, + { + "epoch": 7.720947265625e-07, + "model_forward_time": 0.02536463737487793, + "step": 506 + }, + { + "epoch": 7.720947265625e-07, + "step": 506, + "training_step_time": 0.10753917694091797 + }, + { + "epoch": 7.7362060546875e-07, + "model_forward_time": 0.025621414184570312, + "step": 507 + }, + { + "epoch": 7.7362060546875e-07, + "step": 507, + "training_step_time": 0.11487579345703125 + }, + { + "epoch": 7.75146484375e-07, + "model_forward_time": 0.025267839431762695, + "step": 508 + }, + { + "epoch": 7.75146484375e-07, + "step": 508, + "training_step_time": 0.10576748847961426 + }, + { + "epoch": 7.7667236328125e-07, + "model_forward_time": 0.025400638580322266, + "step": 509 + }, + { + "epoch": 7.7667236328125e-07, + "step": 509, + "training_step_time": 0.10762190818786621 + }, + { + "epoch": 7.781982421875e-07, + "grad_norm": 2.047429084777832, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.284, + "step": 510 + }, + { + "epoch": 7.781982421875e-07, + "model_forward_time": 0.025446176528930664, + "step": 510 + }, + { + "epoch": 7.781982421875e-07, + "step": 510, + "training_step_time": 0.11067008972167969 + }, + { + "epoch": 7.7972412109375e-07, + "model_forward_time": 0.030070781707763672, + "step": 511 + }, + { + "epoch": 7.7972412109375e-07, + "step": 511, + "training_step_time": 0.12609171867370605 + }, + { + "epoch": 7.8125e-07, + "model_forward_time": 0.025276660919189453, + "step": 512 + }, + { + "epoch": 7.8125e-07, + "step": 512, + "training_step_time": 0.10894942283630371 + }, + { + "epoch": 7.8277587890625e-07, + "model_forward_time": 0.02492499351501465, + "step": 513 + }, + { + "epoch": 7.8277587890625e-07, + "step": 513, + "training_step_time": 0.10774707794189453 + }, + { + "epoch": 7.843017578125e-07, + "model_forward_time": 0.025327444076538086, + "step": 514 + }, + { + "epoch": 7.843017578125e-07, + "step": 514, + "training_step_time": 0.10639381408691406 + }, + { + "epoch": 7.8582763671875e-07, + "model_forward_time": 0.025114059448242188, + "step": 515 + }, + { + "epoch": 7.8582763671875e-07, + "step": 515, + "training_step_time": 0.11034345626831055 + }, + { + "epoch": 7.87353515625e-07, + "model_forward_time": 0.025187969207763672, + "step": 516 + }, + { + "epoch": 7.87353515625e-07, + "step": 516, + "training_step_time": 0.10793042182922363 + }, + { + "epoch": 7.8887939453125e-07, + "model_forward_time": 0.025238752365112305, + "step": 517 + }, + { + "epoch": 7.8887939453125e-07, + "step": 517, + "training_step_time": 0.10804891586303711 + }, + { + "epoch": 7.904052734375e-07, + "model_forward_time": 0.025426626205444336, + "step": 518 + }, + { + "epoch": 7.904052734375e-07, + "step": 518, + "training_step_time": 0.10785818099975586 + }, + { + "epoch": 7.9193115234375e-07, + "model_forward_time": 0.025222063064575195, + "step": 519 + }, + { + "epoch": 7.9193115234375e-07, + "step": 519, + "training_step_time": 0.11051344871520996 + }, + { + "epoch": 7.9345703125e-07, + "grad_norm": 3.525778293609619, + "learning_rate": 3.466666666666667e-05, + "loss": 0.2519, + "step": 520 + }, + { + "epoch": 7.9345703125e-07, + "model_forward_time": 0.02473926544189453, + "step": 520 + }, + { + "epoch": 7.9345703125e-07, + "step": 520, + "training_step_time": 0.10759711265563965 + }, + { + "epoch": 7.9498291015625e-07, + "model_forward_time": 0.02552938461303711, + "step": 521 + }, + { + "epoch": 7.9498291015625e-07, + "step": 521, + "training_step_time": 0.13205361366271973 + }, + { + "epoch": 7.965087890625e-07, + "model_forward_time": 0.025357484817504883, + "step": 522 + }, + { + "epoch": 7.965087890625e-07, + "step": 522, + "training_step_time": 0.10655045509338379 + }, + { + "epoch": 7.9803466796875e-07, + "model_forward_time": 0.025282859802246094, + "step": 523 + }, + { + "epoch": 7.9803466796875e-07, + "step": 523, + "training_step_time": 0.20685696601867676 + }, + { + "epoch": 7.99560546875e-07, + "model_forward_time": 0.02485370635986328, + "step": 524 + }, + { + "epoch": 7.99560546875e-07, + "step": 524, + "training_step_time": 0.12297701835632324 + }, + { + "epoch": 8.0108642578125e-07, + "model_forward_time": 0.024420738220214844, + "step": 525 + }, + { + "epoch": 8.0108642578125e-07, + "step": 525, + "training_step_time": 0.1862623691558838 + }, + { + "epoch": 8.026123046875e-07, + "model_forward_time": 0.028551578521728516, + "step": 526 + }, + { + "epoch": 8.026123046875e-07, + "step": 526, + "training_step_time": 0.11381125450134277 + }, + { + "epoch": 8.0413818359375e-07, + "model_forward_time": 0.025136470794677734, + "step": 527 + }, + { + "epoch": 8.0413818359375e-07, + "step": 527, + "training_step_time": 0.11017727851867676 + }, + { + "epoch": 8.056640625e-07, + "model_forward_time": 0.025244474411010742, + "step": 528 + }, + { + "epoch": 8.056640625e-07, + "step": 528, + "training_step_time": 0.19591426849365234 + }, + { + "epoch": 8.0718994140625e-07, + "model_forward_time": 0.02447223663330078, + "step": 529 + }, + { + "epoch": 8.0718994140625e-07, + "step": 529, + "training_step_time": 0.10188841819763184 + }, + { + "epoch": 8.087158203125e-07, + "grad_norm": 3.6519904136657715, + "learning_rate": 3.5333333333333336e-05, + "loss": 0.2759, + "step": 530 + }, + { + "epoch": 8.087158203125e-07, + "model_forward_time": 0.02434825897216797, + "step": 530 + }, + { + "epoch": 8.087158203125e-07, + "step": 530, + "training_step_time": 0.10427713394165039 + }, + { + "epoch": 8.1024169921875e-07, + "model_forward_time": 0.02560281753540039, + "step": 531 + }, + { + "epoch": 8.1024169921875e-07, + "step": 531, + "training_step_time": 0.1071784496307373 + }, + { + "epoch": 8.11767578125e-07, + "model_forward_time": 0.024807453155517578, + "step": 532 + }, + { + "epoch": 8.11767578125e-07, + "step": 532, + "training_step_time": 0.11983394622802734 + }, + { + "epoch": 8.1329345703125e-07, + "model_forward_time": 0.025418519973754883, + "step": 533 + }, + { + "epoch": 8.1329345703125e-07, + "step": 533, + "training_step_time": 0.12928223609924316 + }, + { + "epoch": 8.148193359375e-07, + "model_forward_time": 0.02558135986328125, + "step": 534 + }, + { + "epoch": 8.148193359375e-07, + "step": 534, + "training_step_time": 0.10668778419494629 + }, + { + "epoch": 8.1634521484375e-07, + "model_forward_time": 0.025340795516967773, + "step": 535 + }, + { + "epoch": 8.1634521484375e-07, + "step": 535, + "training_step_time": 0.10706448554992676 + }, + { + "epoch": 8.1787109375e-07, + "model_forward_time": 0.025357484817504883, + "step": 536 + }, + { + "epoch": 8.1787109375e-07, + "step": 536, + "training_step_time": 0.10590600967407227 + }, + { + "epoch": 8.1939697265625e-07, + "model_forward_time": 0.02528095245361328, + "step": 537 + }, + { + "epoch": 8.1939697265625e-07, + "step": 537, + "training_step_time": 0.17280125617980957 + }, + { + "epoch": 8.209228515625e-07, + "model_forward_time": 0.024541139602661133, + "step": 538 + }, + { + "epoch": 8.209228515625e-07, + "step": 538, + "training_step_time": 0.10708093643188477 + }, + { + "epoch": 8.2244873046875e-07, + "model_forward_time": 0.025463104248046875, + "step": 539 + }, + { + "epoch": 8.2244873046875e-07, + "step": 539, + "training_step_time": 0.11112284660339355 + }, + { + "epoch": 8.23974609375e-07, + "grad_norm": 2.5824191570281982, + "learning_rate": 3.6e-05, + "loss": 0.2215, + "step": 540 + }, + { + "epoch": 8.23974609375e-07, + "model_forward_time": 0.025034427642822266, + "step": 540 + }, + { + "epoch": 8.23974609375e-07, + "step": 540, + "training_step_time": 0.11156415939331055 + }, + { + "epoch": 8.2550048828125e-07, + "model_forward_time": 0.02607583999633789, + "step": 541 + }, + { + "epoch": 8.2550048828125e-07, + "step": 541, + "training_step_time": 0.11567115783691406 + }, + { + "epoch": 8.270263671875e-07, + "model_forward_time": 0.025719881057739258, + "step": 542 + }, + { + "epoch": 8.270263671875e-07, + "step": 542, + "training_step_time": 0.10983848571777344 + }, + { + "epoch": 8.2855224609375e-07, + "model_forward_time": 0.02523946762084961, + "step": 543 + }, + { + "epoch": 8.2855224609375e-07, + "step": 543, + "training_step_time": 0.21196937561035156 + }, + { + "epoch": 8.30078125e-07, + "model_forward_time": 0.024362802505493164, + "step": 544 + }, + { + "epoch": 8.30078125e-07, + "step": 544, + "training_step_time": 0.11173820495605469 + }, + { + "epoch": 8.3160400390625e-07, + "model_forward_time": 0.0249788761138916, + "step": 545 + }, + { + "epoch": 8.3160400390625e-07, + "step": 545, + "training_step_time": 0.1525402069091797 + }, + { + "epoch": 8.331298828125e-07, + "model_forward_time": 0.025024890899658203, + "step": 546 + }, + { + "epoch": 8.331298828125e-07, + "step": 546, + "training_step_time": 0.15052437782287598 + }, + { + "epoch": 8.3465576171875e-07, + "model_forward_time": 0.024741411209106445, + "step": 547 + }, + { + "epoch": 8.3465576171875e-07, + "step": 547, + "training_step_time": 0.10706686973571777 + }, + { + "epoch": 8.36181640625e-07, + "model_forward_time": 0.028111696243286133, + "step": 548 + }, + { + "epoch": 8.36181640625e-07, + "step": 548, + "training_step_time": 0.11016035079956055 + }, + { + "epoch": 8.3770751953125e-07, + "model_forward_time": 0.02521371841430664, + "step": 549 + }, + { + "epoch": 8.3770751953125e-07, + "step": 549, + "training_step_time": 0.10727596282958984 + }, + { + "epoch": 8.392333984375e-07, + "grad_norm": 2.894325017929077, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.2651, + "step": 550 + }, + { + "epoch": 8.392333984375e-07, + "model_forward_time": 0.024968624114990234, + "step": 550 + }, + { + "epoch": 8.392333984375e-07, + "step": 550, + "training_step_time": 0.10645294189453125 + }, + { + "epoch": 8.4075927734375e-07, + "model_forward_time": 0.024770259857177734, + "step": 551 + }, + { + "epoch": 8.4075927734375e-07, + "step": 551, + "training_step_time": 0.10753011703491211 + }, + { + "epoch": 8.4228515625e-07, + "model_forward_time": 0.025012731552124023, + "step": 552 + }, + { + "epoch": 8.4228515625e-07, + "step": 552, + "training_step_time": 0.10584521293640137 + }, + { + "epoch": 8.4381103515625e-07, + "model_forward_time": 0.025426387786865234, + "step": 553 + }, + { + "epoch": 8.4381103515625e-07, + "step": 553, + "training_step_time": 0.11516141891479492 + }, + { + "epoch": 8.453369140625e-07, + "model_forward_time": 0.025162458419799805, + "step": 554 + }, + { + "epoch": 8.453369140625e-07, + "step": 554, + "training_step_time": 0.11070060729980469 + }, + { + "epoch": 8.4686279296875e-07, + "model_forward_time": 0.025748014450073242, + "step": 555 + }, + { + "epoch": 8.4686279296875e-07, + "step": 555, + "training_step_time": 0.10787487030029297 + }, + { + "epoch": 8.48388671875e-07, + "model_forward_time": 0.025131940841674805, + "step": 556 + }, + { + "epoch": 8.48388671875e-07, + "step": 556, + "training_step_time": 0.1057441234588623 + }, + { + "epoch": 8.4991455078125e-07, + "model_forward_time": 0.025133848190307617, + "step": 557 + }, + { + "epoch": 8.4991455078125e-07, + "step": 557, + "training_step_time": 0.10863590240478516 + }, + { + "epoch": 8.514404296875e-07, + "model_forward_time": 0.025732755661010742, + "step": 558 + }, + { + "epoch": 8.514404296875e-07, + "step": 558, + "training_step_time": 0.11274504661560059 + }, + { + "epoch": 8.5296630859375e-07, + "model_forward_time": 0.02541661262512207, + "step": 559 + }, + { + "epoch": 8.5296630859375e-07, + "step": 559, + "training_step_time": 0.10598015785217285 + }, + { + "epoch": 8.544921875e-07, + "grad_norm": 1.9275555610656738, + "learning_rate": 3.733333333333334e-05, + "loss": 0.2481, + "step": 560 + }, + { + "epoch": 8.544921875e-07, + "model_forward_time": 0.02519822120666504, + "step": 560 + }, + { + "epoch": 8.544921875e-07, + "step": 560, + "training_step_time": 0.10515832901000977 + }, + { + "epoch": 8.5601806640625e-07, + "model_forward_time": 0.02512836456298828, + "step": 561 + }, + { + "epoch": 8.5601806640625e-07, + "step": 561, + "training_step_time": 0.1047215461730957 + }, + { + "epoch": 8.575439453125e-07, + "model_forward_time": 0.025547504425048828, + "step": 562 + }, + { + "epoch": 8.575439453125e-07, + "step": 562, + "training_step_time": 0.10657382011413574 + }, + { + "epoch": 8.5906982421875e-07, + "model_forward_time": 0.026045799255371094, + "step": 563 + }, + { + "epoch": 8.5906982421875e-07, + "step": 563, + "training_step_time": 0.10592174530029297 + }, + { + "epoch": 8.60595703125e-07, + "model_forward_time": 0.025443077087402344, + "step": 564 + }, + { + "epoch": 8.60595703125e-07, + "step": 564, + "training_step_time": 0.1064920425415039 + }, + { + "epoch": 8.6212158203125e-07, + "model_forward_time": 0.02847743034362793, + "step": 565 + }, + { + "epoch": 8.6212158203125e-07, + "step": 565, + "training_step_time": 0.11181163787841797 + }, + { + "epoch": 8.636474609375e-07, + "model_forward_time": 0.025569677352905273, + "step": 566 + }, + { + "epoch": 8.636474609375e-07, + "step": 566, + "training_step_time": 0.10891580581665039 + }, + { + "epoch": 8.6517333984375e-07, + "model_forward_time": 0.028377056121826172, + "step": 567 + }, + { + "epoch": 8.6517333984375e-07, + "step": 567, + "training_step_time": 0.1757512092590332 + }, + { + "epoch": 8.6669921875e-07, + "model_forward_time": 0.02479839324951172, + "step": 568 + }, + { + "epoch": 8.6669921875e-07, + "step": 568, + "training_step_time": 0.10520267486572266 + }, + { + "epoch": 8.6822509765625e-07, + "model_forward_time": 0.02490973472595215, + "step": 569 + }, + { + "epoch": 8.6822509765625e-07, + "step": 569, + "training_step_time": 0.20324015617370605 + }, + { + "epoch": 8.697509765625e-07, + "grad_norm": 2.22090220451355, + "learning_rate": 3.8e-05, + "loss": 0.2211, + "step": 570 + }, + { + "epoch": 8.697509765625e-07, + "model_forward_time": 0.024982452392578125, + "step": 570 + }, + { + "epoch": 8.697509765625e-07, + "step": 570, + "training_step_time": 0.1636977195739746 + }, + { + "epoch": 8.7127685546875e-07, + "model_forward_time": 0.024549245834350586, + "step": 571 + }, + { + "epoch": 8.7127685546875e-07, + "step": 571, + "training_step_time": 0.16640734672546387 + }, + { + "epoch": 8.72802734375e-07, + "model_forward_time": 0.024205923080444336, + "step": 572 + }, + { + "epoch": 8.72802734375e-07, + "step": 572, + "training_step_time": 0.17044806480407715 + }, + { + "epoch": 8.7432861328125e-07, + "model_forward_time": 0.02430582046508789, + "step": 573 + }, + { + "epoch": 8.7432861328125e-07, + "step": 573, + "training_step_time": 0.18944811820983887 + }, + { + "epoch": 8.758544921875e-07, + "model_forward_time": 0.025097370147705078, + "step": 574 + }, + { + "epoch": 8.758544921875e-07, + "step": 574, + "training_step_time": 0.10761809349060059 + }, + { + "epoch": 8.7738037109375e-07, + "model_forward_time": 0.02455282211303711, + "step": 575 + }, + { + "epoch": 8.7738037109375e-07, + "step": 575, + "training_step_time": 0.10445213317871094 + }, + { + "epoch": 8.7890625e-07, + "model_forward_time": 0.02567315101623535, + "step": 576 + }, + { + "epoch": 8.7890625e-07, + "step": 576, + "training_step_time": 0.10565567016601562 + }, + { + "epoch": 8.8043212890625e-07, + "model_forward_time": 0.025488615036010742, + "step": 577 + }, + { + "epoch": 8.8043212890625e-07, + "step": 577, + "training_step_time": 0.15418624877929688 + }, + { + "epoch": 8.819580078125e-07, + "model_forward_time": 0.02508997917175293, + "step": 578 + }, + { + "epoch": 8.819580078125e-07, + "step": 578, + "training_step_time": 0.12519001960754395 + }, + { + "epoch": 8.8348388671875e-07, + "model_forward_time": 0.024870872497558594, + "step": 579 + }, + { + "epoch": 8.8348388671875e-07, + "step": 579, + "training_step_time": 0.1559433937072754 + }, + { + "epoch": 8.85009765625e-07, + "grad_norm": 1.7533249855041504, + "learning_rate": 3.866666666666667e-05, + "loss": 0.2078, + "step": 580 + }, + { + "epoch": 8.85009765625e-07, + "model_forward_time": 0.0251157283782959, + "step": 580 + }, + { + "epoch": 8.85009765625e-07, + "step": 580, + "training_step_time": 0.15197062492370605 + }, + { + "epoch": 8.8653564453125e-07, + "model_forward_time": 0.025473833084106445, + "step": 581 + }, + { + "epoch": 8.8653564453125e-07, + "step": 581, + "training_step_time": 0.14590120315551758 + }, + { + "epoch": 8.880615234375e-07, + "model_forward_time": 0.025167226791381836, + "step": 582 + }, + { + "epoch": 8.880615234375e-07, + "step": 582, + "training_step_time": 0.13872075080871582 + }, + { + "epoch": 8.8958740234375e-07, + "model_forward_time": 0.02444767951965332, + "step": 583 + }, + { + "epoch": 8.8958740234375e-07, + "step": 583, + "training_step_time": 0.10766482353210449 + }, + { + "epoch": 8.9111328125e-07, + "model_forward_time": 0.025127649307250977, + "step": 584 + }, + { + "epoch": 8.9111328125e-07, + "step": 584, + "training_step_time": 0.10739350318908691 + }, + { + "epoch": 8.9263916015625e-07, + "model_forward_time": 0.02514481544494629, + "step": 585 + }, + { + "epoch": 8.9263916015625e-07, + "step": 585, + "training_step_time": 0.10929989814758301 + }, + { + "epoch": 8.941650390625e-07, + "model_forward_time": 0.025516748428344727, + "step": 586 + }, + { + "epoch": 8.941650390625e-07, + "step": 586, + "training_step_time": 0.11003756523132324 + }, + { + "epoch": 8.9569091796875e-07, + "model_forward_time": 0.02533721923828125, + "step": 587 + }, + { + "epoch": 8.9569091796875e-07, + "step": 587, + "training_step_time": 0.20212364196777344 + }, + { + "epoch": 8.97216796875e-07, + "model_forward_time": 0.02463555335998535, + "step": 588 + }, + { + "epoch": 8.97216796875e-07, + "step": 588, + "training_step_time": 0.11271810531616211 + }, + { + "epoch": 8.9874267578125e-07, + "model_forward_time": 0.024624109268188477, + "step": 589 + }, + { + "epoch": 8.9874267578125e-07, + "step": 589, + "training_step_time": 0.10683798789978027 + }, + { + "epoch": 9.002685546875e-07, + "grad_norm": 1.4527404308319092, + "learning_rate": 3.933333333333333e-05, + "loss": 0.1965, + "step": 590 + }, + { + "epoch": 9.002685546875e-07, + "model_forward_time": 0.025640249252319336, + "step": 590 + }, + { + "epoch": 9.002685546875e-07, + "step": 590, + "training_step_time": 0.19601798057556152 + }, + { + "epoch": 9.0179443359375e-07, + "model_forward_time": 0.024501800537109375, + "step": 591 + }, + { + "epoch": 9.0179443359375e-07, + "step": 591, + "training_step_time": 0.10685420036315918 + }, + { + "epoch": 9.033203125e-07, + "model_forward_time": 0.024839401245117188, + "step": 592 + }, + { + "epoch": 9.033203125e-07, + "step": 592, + "training_step_time": 0.10669231414794922 + }, + { + "epoch": 9.0484619140625e-07, + "model_forward_time": 0.02524399757385254, + "step": 593 + }, + { + "epoch": 9.0484619140625e-07, + "step": 593, + "training_step_time": 0.1076211929321289 + }, + { + "epoch": 9.063720703125e-07, + "model_forward_time": 0.025626420974731445, + "step": 594 + }, + { + "epoch": 9.063720703125e-07, + "step": 594, + "training_step_time": 0.11132454872131348 + }, + { + "epoch": 9.0789794921875e-07, + "model_forward_time": 0.02546215057373047, + "step": 595 + }, + { + "epoch": 9.0789794921875e-07, + "step": 595, + "training_step_time": 0.11248183250427246 + }, + { + "epoch": 9.09423828125e-07, + "model_forward_time": 0.025142192840576172, + "step": 596 + }, + { + "epoch": 9.09423828125e-07, + "step": 596, + "training_step_time": 0.11154317855834961 + }, + { + "epoch": 9.1094970703125e-07, + "model_forward_time": 0.025377273559570312, + "step": 597 + }, + { + "epoch": 9.1094970703125e-07, + "step": 597, + "training_step_time": 0.10977506637573242 + }, + { + "epoch": 9.124755859375e-07, + "model_forward_time": 0.02558445930480957, + "step": 598 + }, + { + "epoch": 9.124755859375e-07, + "step": 598, + "training_step_time": 0.1071016788482666 + }, + { + "epoch": 9.1400146484375e-07, + "model_forward_time": 0.025009632110595703, + "step": 599 + }, + { + "epoch": 9.1400146484375e-07, + "step": 599, + "training_step_time": 0.10792064666748047 + }, + { + "epoch": 9.1552734375e-07, + "grad_norm": 1.198233723640442, + "learning_rate": 4e-05, + "loss": 0.2007, + "step": 600 + }, + { + "epoch": 9.1552734375e-07, + "model_forward_time": 0.026806116104125977, + "step": 600 + }, + { + "epoch": 9.1552734375e-07, + "step": 600, + "training_step_time": 0.10895800590515137 + }, + { + "epoch": 9.1705322265625e-07, + "model_forward_time": 0.02562117576599121, + "step": 601 + }, + { + "epoch": 9.1705322265625e-07, + "step": 601, + "training_step_time": 0.11057209968566895 + }, + { + "epoch": 9.185791015625e-07, + "model_forward_time": 0.025543689727783203, + "step": 602 + }, + { + "epoch": 9.185791015625e-07, + "step": 602, + "training_step_time": 0.10936522483825684 + }, + { + "epoch": 9.2010498046875e-07, + "model_forward_time": 0.025423049926757812, + "step": 603 + }, + { + "epoch": 9.2010498046875e-07, + "step": 603, + "training_step_time": 0.11207270622253418 + }, + { + "epoch": 9.21630859375e-07, + "model_forward_time": 0.025262117385864258, + "step": 604 + }, + { + "epoch": 9.21630859375e-07, + "step": 604, + "training_step_time": 0.10583043098449707 + }, + { + "epoch": 9.2315673828125e-07, + "model_forward_time": 0.02632451057434082, + "step": 605 + }, + { + "epoch": 9.2315673828125e-07, + "step": 605, + "training_step_time": 0.10900664329528809 + }, + { + "epoch": 9.246826171875e-07, + "model_forward_time": 0.025408506393432617, + "step": 606 + }, + { + "epoch": 9.246826171875e-07, + "step": 606, + "training_step_time": 0.10456633567810059 + }, + { + "epoch": 9.2620849609375e-07, + "model_forward_time": 0.025197505950927734, + "step": 607 + }, + { + "epoch": 9.2620849609375e-07, + "step": 607, + "training_step_time": 0.10456705093383789 + }, + { + "epoch": 9.27734375e-07, + "model_forward_time": 0.02599024772644043, + "step": 608 + }, + { + "epoch": 9.27734375e-07, + "step": 608, + "training_step_time": 0.10758280754089355 + }, + { + "epoch": 9.2926025390625e-07, + "model_forward_time": 0.026047706604003906, + "step": 609 + }, + { + "epoch": 9.2926025390625e-07, + "step": 609, + "training_step_time": 0.10677289962768555 + }, + { + "epoch": 9.307861328125e-07, + "grad_norm": 1.9473224878311157, + "learning_rate": 4.066666666666667e-05, + "loss": 0.2071, + "step": 610 + }, + { + "epoch": 9.307861328125e-07, + "model_forward_time": 0.02504706382751465, + "step": 610 + }, + { + "epoch": 9.307861328125e-07, + "step": 610, + "training_step_time": 0.10535693168640137 + }, + { + "epoch": 9.3231201171875e-07, + "model_forward_time": 0.025238752365112305, + "step": 611 + }, + { + "epoch": 9.3231201171875e-07, + "step": 611, + "training_step_time": 0.16564345359802246 + }, + { + "epoch": 9.33837890625e-07, + "model_forward_time": 0.02473926544189453, + "step": 612 + }, + { + "epoch": 9.33837890625e-07, + "step": 612, + "training_step_time": 0.11058211326599121 + }, + { + "epoch": 9.3536376953125e-07, + "model_forward_time": 0.024482250213623047, + "step": 613 + }, + { + "epoch": 9.3536376953125e-07, + "step": 613, + "training_step_time": 0.20367789268493652 + }, + { + "epoch": 9.368896484375e-07, + "model_forward_time": 0.02383112907409668, + "step": 614 + }, + { + "epoch": 9.368896484375e-07, + "step": 614, + "training_step_time": 0.18216753005981445 + }, + { + "epoch": 9.3841552734375e-07, + "model_forward_time": 0.023954391479492188, + "step": 615 + }, + { + "epoch": 9.3841552734375e-07, + "step": 615, + "training_step_time": 0.19987225532531738 + }, + { + "epoch": 9.3994140625e-07, + "model_forward_time": 0.024993896484375, + "step": 616 + }, + { + "epoch": 9.3994140625e-07, + "step": 616, + "training_step_time": 0.10952091217041016 + }, + { + "epoch": 9.4146728515625e-07, + "model_forward_time": 0.02446436882019043, + "step": 617 + }, + { + "epoch": 9.4146728515625e-07, + "step": 617, + "training_step_time": 0.10840129852294922 + }, + { + "epoch": 9.429931640625e-07, + "model_forward_time": 0.02523493766784668, + "step": 618 + }, + { + "epoch": 9.429931640625e-07, + "step": 618, + "training_step_time": 0.11473250389099121 + }, + { + "epoch": 9.4451904296875e-07, + "model_forward_time": 0.02508068084716797, + "step": 619 + }, + { + "epoch": 9.4451904296875e-07, + "step": 619, + "training_step_time": 0.12007665634155273 + }, + { + "epoch": 9.46044921875e-07, + "grad_norm": 2.3724663257598877, + "learning_rate": 4.133333333333333e-05, + "loss": 0.2242, + "step": 620 + }, + { + "epoch": 9.46044921875e-07, + "model_forward_time": 0.025259733200073242, + "step": 620 + }, + { + "epoch": 9.46044921875e-07, + "step": 620, + "training_step_time": 0.11275982856750488 + }, + { + "epoch": 9.4757080078125e-07, + "model_forward_time": 0.026327848434448242, + "step": 621 + }, + { + "epoch": 9.4757080078125e-07, + "step": 621, + "training_step_time": 0.1233530044555664 + }, + { + "epoch": 9.490966796875e-07, + "model_forward_time": 0.025542020797729492, + "step": 622 + }, + { + "epoch": 9.490966796875e-07, + "step": 622, + "training_step_time": 0.2064199447631836 + }, + { + "epoch": 9.5062255859375e-07, + "model_forward_time": 0.024581193923950195, + "step": 623 + }, + { + "epoch": 9.5062255859375e-07, + "step": 623, + "training_step_time": 0.10877299308776855 + }, + { + "epoch": 9.521484375e-07, + "model_forward_time": 0.0247189998626709, + "step": 624 + }, + { + "epoch": 9.521484375e-07, + "step": 624, + "training_step_time": 0.10849666595458984 + }, + { + "epoch": 9.5367431640625e-07, + "model_forward_time": 0.024769067764282227, + "step": 625 + }, + { + "epoch": 9.5367431640625e-07, + "step": 625, + "training_step_time": 0.15665078163146973 + }, + { + "epoch": 9.552001953125e-07, + "model_forward_time": 0.024300813674926758, + "step": 626 + }, + { + "epoch": 9.552001953125e-07, + "step": 626, + "training_step_time": 0.1331627368927002 + }, + { + "epoch": 9.5672607421875e-07, + "model_forward_time": 0.024785280227661133, + "step": 627 + }, + { + "epoch": 9.5672607421875e-07, + "step": 627, + "training_step_time": 0.10618805885314941 + }, + { + "epoch": 9.58251953125e-07, + "model_forward_time": 0.025282859802246094, + "step": 628 + }, + { + "epoch": 9.58251953125e-07, + "step": 628, + "training_step_time": 0.11105489730834961 + }, + { + "epoch": 9.5977783203125e-07, + "model_forward_time": 0.0250856876373291, + "step": 629 + }, + { + "epoch": 9.5977783203125e-07, + "step": 629, + "training_step_time": 0.1077427864074707 + }, + { + "epoch": 9.613037109375e-07, + "grad_norm": 2.2837870121002197, + "learning_rate": 4.2e-05, + "loss": 0.2133, + "step": 630 + }, + { + "epoch": 9.613037109375e-07, + "model_forward_time": 0.025562047958374023, + "step": 630 + }, + { + "epoch": 9.613037109375e-07, + "step": 630, + "training_step_time": 0.11121892929077148 + }, + { + "epoch": 9.6282958984375e-07, + "model_forward_time": 0.0256350040435791, + "step": 631 + }, + { + "epoch": 9.6282958984375e-07, + "step": 631, + "training_step_time": 0.10900688171386719 + }, + { + "epoch": 9.6435546875e-07, + "model_forward_time": 0.02538442611694336, + "step": 632 + }, + { + "epoch": 9.6435546875e-07, + "step": 632, + "training_step_time": 0.20986294746398926 + }, + { + "epoch": 9.6588134765625e-07, + "model_forward_time": 0.025243759155273438, + "step": 633 + }, + { + "epoch": 9.6588134765625e-07, + "step": 633, + "training_step_time": 0.10717272758483887 + }, + { + "epoch": 9.674072265625e-07, + "model_forward_time": 0.024759769439697266, + "step": 634 + }, + { + "epoch": 9.674072265625e-07, + "step": 634, + "training_step_time": 0.11149835586547852 + }, + { + "epoch": 9.6893310546875e-07, + "model_forward_time": 0.025780677795410156, + "step": 635 + }, + { + "epoch": 9.6893310546875e-07, + "step": 635, + "training_step_time": 0.20018362998962402 + }, + { + "epoch": 9.70458984375e-07, + "model_forward_time": 0.024682998657226562, + "step": 636 + }, + { + "epoch": 9.70458984375e-07, + "step": 636, + "training_step_time": 0.1079702377319336 + }, + { + "epoch": 9.7198486328125e-07, + "model_forward_time": 0.024491548538208008, + "step": 637 + }, + { + "epoch": 9.7198486328125e-07, + "step": 637, + "training_step_time": 0.1093893051147461 + }, + { + "epoch": 9.735107421875e-07, + "model_forward_time": 0.025505781173706055, + "step": 638 + }, + { + "epoch": 9.735107421875e-07, + "step": 638, + "training_step_time": 0.11040329933166504 + }, + { + "epoch": 9.7503662109375e-07, + "model_forward_time": 0.025437593460083008, + "step": 639 + }, + { + "epoch": 9.7503662109375e-07, + "step": 639, + "training_step_time": 0.10527706146240234 + }, + { + "epoch": 9.765625e-07, + "grad_norm": 1.5601938962936401, + "learning_rate": 4.266666666666667e-05, + "loss": 0.22, + "step": 640 + }, + { + "epoch": 9.765625e-07, + "model_forward_time": 0.025427818298339844, + "step": 640 + }, + { + "epoch": 9.765625e-07, + "step": 640, + "training_step_time": 0.10495877265930176 + }, + { + "epoch": 9.7808837890625e-07, + "model_forward_time": 0.025714874267578125, + "step": 641 + }, + { + "epoch": 9.7808837890625e-07, + "step": 641, + "training_step_time": 0.10501790046691895 + }, + { + "epoch": 9.796142578125e-07, + "model_forward_time": 0.02526378631591797, + "step": 642 + }, + { + "epoch": 9.796142578125e-07, + "step": 642, + "training_step_time": 0.10541796684265137 + }, + { + "epoch": 9.8114013671875e-07, + "model_forward_time": 0.02615833282470703, + "step": 643 + }, + { + "epoch": 9.8114013671875e-07, + "step": 643, + "training_step_time": 0.10561585426330566 + }, + { + "epoch": 9.82666015625e-07, + "model_forward_time": 0.02657938003540039, + "step": 644 + }, + { + "epoch": 9.82666015625e-07, + "step": 644, + "training_step_time": 0.10756278038024902 + }, + { + "epoch": 9.8419189453125e-07, + "model_forward_time": 0.025615692138671875, + "step": 645 + }, + { + "epoch": 9.8419189453125e-07, + "step": 645, + "training_step_time": 0.11654090881347656 + }, + { + "epoch": 9.857177734375e-07, + "model_forward_time": 0.025333642959594727, + "step": 646 + }, + { + "epoch": 9.857177734375e-07, + "step": 646, + "training_step_time": 0.10930633544921875 + }, + { + "epoch": 9.8724365234375e-07, + "model_forward_time": 0.02539658546447754, + "step": 647 + }, + { + "epoch": 9.8724365234375e-07, + "step": 647, + "training_step_time": 0.10354161262512207 + }, + { + "epoch": 9.8876953125e-07, + "model_forward_time": 0.027733325958251953, + "step": 648 + }, + { + "epoch": 9.8876953125e-07, + "step": 648, + "training_step_time": 0.11020636558532715 + }, + { + "epoch": 9.9029541015625e-07, + "model_forward_time": 0.026613473892211914, + "step": 649 + }, + { + "epoch": 9.9029541015625e-07, + "step": 649, + "training_step_time": 0.10996341705322266 + }, + { + "epoch": 9.918212890625e-07, + "grad_norm": 2.4662177562713623, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.2189, + "step": 650 + }, + { + "epoch": 9.918212890625e-07, + "model_forward_time": 0.025188922882080078, + "step": 650 + }, + { + "epoch": 9.918212890625e-07, + "step": 650, + "training_step_time": 0.10556530952453613 + }, + { + "epoch": 9.9334716796875e-07, + "model_forward_time": 0.025713205337524414, + "step": 651 + }, + { + "epoch": 9.9334716796875e-07, + "step": 651, + "training_step_time": 0.10782814025878906 + }, + { + "epoch": 9.94873046875e-07, + "model_forward_time": 0.025780916213989258, + "step": 652 + }, + { + "epoch": 9.94873046875e-07, + "step": 652, + "training_step_time": 0.10580801963806152 + }, + { + "epoch": 9.9639892578125e-07, + "model_forward_time": 0.025099754333496094, + "step": 653 + }, + { + "epoch": 9.9639892578125e-07, + "step": 653, + "training_step_time": 0.10844945907592773 + }, + { + "epoch": 9.979248046875e-07, + "model_forward_time": 0.02531743049621582, + "step": 654 + }, + { + "epoch": 9.979248046875e-07, + "step": 654, + "training_step_time": 0.10998678207397461 + }, + { + "epoch": 9.9945068359375e-07, + "model_forward_time": 0.02523660659790039, + "step": 655 + }, + { + "epoch": 9.9945068359375e-07, + "step": 655, + "training_step_time": 0.1447737216949463 + }, + { + "epoch": 1.0009765625e-06, + "model_forward_time": 0.024217844009399414, + "step": 656 + }, + { + "epoch": 1.0009765625e-06, + "step": 656, + "training_step_time": 0.1732311248779297 + }, + { + "epoch": 1.00250244140625e-06, + "model_forward_time": 0.02477884292602539, + "step": 657 + }, + { + "epoch": 1.00250244140625e-06, + "step": 657, + "training_step_time": 0.15137910842895508 + }, + { + "epoch": 1.0040283203125e-06, + "model_forward_time": 0.025076627731323242, + "step": 658 + }, + { + "epoch": 1.0040283203125e-06, + "step": 658, + "training_step_time": 0.1648862361907959 + }, + { + "epoch": 1.00555419921875e-06, + "model_forward_time": 0.024783849716186523, + "step": 659 + }, + { + "epoch": 1.00555419921875e-06, + "step": 659, + "training_step_time": 0.12984800338745117 + }, + { + "epoch": 1.007080078125e-06, + "grad_norm": 1.6586493253707886, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.2396, + "step": 660 + }, + { + "epoch": 1.007080078125e-06, + "model_forward_time": 0.02385234832763672, + "step": 660 + }, + { + "epoch": 1.007080078125e-06, + "step": 660, + "training_step_time": 0.19134187698364258 + }, + { + "epoch": 1.00860595703125e-06, + "model_forward_time": 0.024228334426879883, + "step": 661 + }, + { + "epoch": 1.00860595703125e-06, + "step": 661, + "training_step_time": 0.1558971405029297 + }, + { + "epoch": 1.0101318359375e-06, + "model_forward_time": 0.024208545684814453, + "step": 662 + }, + { + "epoch": 1.0101318359375e-06, + "step": 662, + "training_step_time": 0.10957694053649902 + }, + { + "epoch": 1.01165771484375e-06, + "model_forward_time": 0.024410247802734375, + "step": 663 + }, + { + "epoch": 1.01165771484375e-06, + "step": 663, + "training_step_time": 0.11977529525756836 + }, + { + "epoch": 1.01318359375e-06, + "model_forward_time": 0.024786949157714844, + "step": 664 + }, + { + "epoch": 1.01318359375e-06, + "step": 664, + "training_step_time": 0.10622620582580566 + }, + { + "epoch": 1.01470947265625e-06, + "model_forward_time": 0.025542497634887695, + "step": 665 + }, + { + "epoch": 1.01470947265625e-06, + "step": 665, + "training_step_time": 0.10407471656799316 + }, + { + "epoch": 1.0162353515625e-06, + "model_forward_time": 0.02534937858581543, + "step": 666 + }, + { + "epoch": 1.0162353515625e-06, + "step": 666, + "training_step_time": 0.16604065895080566 + }, + { + "epoch": 1.01776123046875e-06, + "model_forward_time": 0.024855375289916992, + "step": 667 + }, + { + "epoch": 1.01776123046875e-06, + "step": 667, + "training_step_time": 0.12375545501708984 + }, + { + "epoch": 1.019287109375e-06, + "model_forward_time": 0.024580955505371094, + "step": 668 + }, + { + "epoch": 1.019287109375e-06, + "step": 668, + "training_step_time": 0.10639286041259766 + }, + { + "epoch": 1.02081298828125e-06, + "model_forward_time": 0.02517104148864746, + "step": 669 + }, + { + "epoch": 1.02081298828125e-06, + "step": 669, + "training_step_time": 0.11187219619750977 + }, + { + "epoch": 1.0223388671875e-06, + "grad_norm": 0.8760672807693481, + "learning_rate": 4.466666666666667e-05, + "loss": 0.1729, + "step": 670 + }, + { + "epoch": 1.0223388671875e-06, + "model_forward_time": 0.025316715240478516, + "step": 670 + }, + { + "epoch": 1.0223388671875e-06, + "step": 670, + "training_step_time": 0.23067688941955566 + }, + { + "epoch": 1.02386474609375e-06, + "model_forward_time": 0.025102615356445312, + "step": 671 + }, + { + "epoch": 1.02386474609375e-06, + "step": 671, + "training_step_time": 0.21572446823120117 + }, + { + "epoch": 1.025390625e-06, + "model_forward_time": 0.024440526962280273, + "step": 672 + }, + { + "epoch": 1.025390625e-06, + "step": 672, + "training_step_time": 0.19784760475158691 + }, + { + "epoch": 1.02691650390625e-06, + "model_forward_time": 0.024303197860717773, + "step": 673 + }, + { + "epoch": 1.02691650390625e-06, + "step": 673, + "training_step_time": 0.18396782875061035 + }, + { + "epoch": 1.0284423828125e-06, + "model_forward_time": 0.024264097213745117, + "step": 674 + }, + { + "epoch": 1.0284423828125e-06, + "step": 674, + "training_step_time": 0.16685247421264648 + }, + { + "epoch": 1.02996826171875e-06, + "model_forward_time": 0.024719953536987305, + "step": 675 + }, + { + "epoch": 1.02996826171875e-06, + "step": 675, + "training_step_time": 0.11927175521850586 + }, + { + "epoch": 1.031494140625e-06, + "model_forward_time": 0.02491021156311035, + "step": 676 + }, + { + "epoch": 1.031494140625e-06, + "step": 676, + "training_step_time": 0.10755157470703125 + }, + { + "epoch": 1.03302001953125e-06, + "model_forward_time": 0.02597975730895996, + "step": 677 + }, + { + "epoch": 1.03302001953125e-06, + "step": 677, + "training_step_time": 0.10407495498657227 + }, + { + "epoch": 1.0345458984375e-06, + "model_forward_time": 0.025519132614135742, + "step": 678 + }, + { + "epoch": 1.0345458984375e-06, + "step": 678, + "training_step_time": 0.20398902893066406 + }, + { + "epoch": 1.03607177734375e-06, + "model_forward_time": 0.02556300163269043, + "step": 679 + }, + { + "epoch": 1.03607177734375e-06, + "step": 679, + "training_step_time": 0.10717654228210449 + }, + { + "epoch": 1.03759765625e-06, + "grad_norm": 1.6215095520019531, + "learning_rate": 4.5333333333333335e-05, + "loss": 0.1979, + "step": 680 + }, + { + "epoch": 1.03759765625e-06, + "model_forward_time": 0.0251615047454834, + "step": 680 + }, + { + "epoch": 1.03759765625e-06, + "step": 680, + "training_step_time": 0.1102452278137207 + }, + { + "epoch": 1.03912353515625e-06, + "model_forward_time": 0.025139570236206055, + "step": 681 + }, + { + "epoch": 1.03912353515625e-06, + "step": 681, + "training_step_time": 0.10706734657287598 + }, + { + "epoch": 1.0406494140625e-06, + "model_forward_time": 0.025552988052368164, + "step": 682 + }, + { + "epoch": 1.0406494140625e-06, + "step": 682, + "training_step_time": 0.10704517364501953 + }, + { + "epoch": 1.04217529296875e-06, + "model_forward_time": 0.025260448455810547, + "step": 683 + }, + { + "epoch": 1.04217529296875e-06, + "step": 683, + "training_step_time": 0.10667943954467773 + }, + { + "epoch": 1.043701171875e-06, + "model_forward_time": 0.026265621185302734, + "step": 684 + }, + { + "epoch": 1.043701171875e-06, + "step": 684, + "training_step_time": 0.10892581939697266 + }, + { + "epoch": 1.04522705078125e-06, + "model_forward_time": 0.025774478912353516, + "step": 685 + }, + { + "epoch": 1.04522705078125e-06, + "step": 685, + "training_step_time": 0.10874509811401367 + }, + { + "epoch": 1.0467529296875e-06, + "model_forward_time": 0.025040626525878906, + "step": 686 + }, + { + "epoch": 1.0467529296875e-06, + "step": 686, + "training_step_time": 0.10829544067382812 + }, + { + "epoch": 1.04827880859375e-06, + "model_forward_time": 0.025452852249145508, + "step": 687 + }, + { + "epoch": 1.04827880859375e-06, + "step": 687, + "training_step_time": 0.11186695098876953 + }, + { + "epoch": 1.0498046875e-06, + "model_forward_time": 0.025548219680786133, + "step": 688 + }, + { + "epoch": 1.0498046875e-06, + "step": 688, + "training_step_time": 0.14309239387512207 + }, + { + "epoch": 1.05133056640625e-06, + "model_forward_time": 0.026767253875732422, + "step": 689 + }, + { + "epoch": 1.05133056640625e-06, + "step": 689, + "training_step_time": 0.13017725944519043 + }, + { + "epoch": 1.0528564453125e-06, + "grad_norm": 1.5063233375549316, + "learning_rate": 4.600000000000001e-05, + "loss": 0.1865, + "step": 690 + }, + { + "epoch": 1.0528564453125e-06, + "model_forward_time": 0.025287389755249023, + "step": 690 + }, + { + "epoch": 1.0528564453125e-06, + "step": 690, + "training_step_time": 0.1168820858001709 + }, + { + "epoch": 1.05438232421875e-06, + "model_forward_time": 0.025073766708374023, + "step": 691 + }, + { + "epoch": 1.05438232421875e-06, + "step": 691, + "training_step_time": 0.11841344833374023 + }, + { + "epoch": 1.055908203125e-06, + "model_forward_time": 0.025400400161743164, + "step": 692 + }, + { + "epoch": 1.055908203125e-06, + "step": 692, + "training_step_time": 0.1200706958770752 + }, + { + "epoch": 1.05743408203125e-06, + "model_forward_time": 0.024979829788208008, + "step": 693 + }, + { + "epoch": 1.05743408203125e-06, + "step": 693, + "training_step_time": 0.1238718032836914 + }, + { + "epoch": 1.0589599609375e-06, + "model_forward_time": 0.025542736053466797, + "step": 694 + }, + { + "epoch": 1.0589599609375e-06, + "step": 694, + "training_step_time": 0.11020517349243164 + }, + { + "epoch": 1.06048583984375e-06, + "model_forward_time": 0.02611565589904785, + "step": 695 + }, + { + "epoch": 1.06048583984375e-06, + "step": 695, + "training_step_time": 0.11434555053710938 + }, + { + "epoch": 1.06201171875e-06, + "model_forward_time": 0.025331497192382812, + "step": 696 + }, + { + "epoch": 1.06201171875e-06, + "step": 696, + "training_step_time": 0.11041998863220215 + }, + { + "epoch": 1.06353759765625e-06, + "model_forward_time": 0.025292158126831055, + "step": 697 + }, + { + "epoch": 1.06353759765625e-06, + "step": 697, + "training_step_time": 0.10665297508239746 + }, + { + "epoch": 1.0650634765625e-06, + "model_forward_time": 0.02544569969177246, + "step": 698 + }, + { + "epoch": 1.0650634765625e-06, + "step": 698, + "training_step_time": 0.10870766639709473 + }, + { + "epoch": 1.06658935546875e-06, + "model_forward_time": 0.025304079055786133, + "step": 699 + }, + { + "epoch": 1.06658935546875e-06, + "step": 699, + "training_step_time": 0.11166167259216309 + }, + { + "epoch": 1.068115234375e-06, + "grad_norm": 1.679792046546936, + "learning_rate": 4.666666666666667e-05, + "loss": 0.1993, + "step": 700 + }, + { + "epoch": 1.068115234375e-06, + "model_forward_time": 0.024828195571899414, + "step": 700 + }, + { + "epoch": 1.068115234375e-06, + "step": 700, + "training_step_time": 0.15088129043579102 + }, + { + "epoch": 1.06964111328125e-06, + "model_forward_time": 0.025300025939941406, + "step": 701 + }, + { + "epoch": 1.06964111328125e-06, + "step": 701, + "training_step_time": 0.15264678001403809 + }, + { + "epoch": 1.0711669921875e-06, + "model_forward_time": 0.025058984756469727, + "step": 702 + }, + { + "epoch": 1.0711669921875e-06, + "step": 702, + "training_step_time": 0.1292116641998291 + }, + { + "epoch": 1.07269287109375e-06, + "model_forward_time": 0.024895429611206055, + "step": 703 + }, + { + "epoch": 1.07269287109375e-06, + "step": 703, + "training_step_time": 0.1650996208190918 + }, + { + "epoch": 1.07421875e-06, + "model_forward_time": 0.0276033878326416, + "step": 704 + }, + { + "epoch": 1.07421875e-06, + "step": 704, + "training_step_time": 0.12588858604431152 + }, + { + "epoch": 1.07574462890625e-06, + "model_forward_time": 0.02512216567993164, + "step": 705 + }, + { + "epoch": 1.07574462890625e-06, + "step": 705, + "training_step_time": 0.18714165687561035 + }, + { + "epoch": 1.0772705078125e-06, + "model_forward_time": 0.024817466735839844, + "step": 706 + }, + { + "epoch": 1.0772705078125e-06, + "step": 706, + "training_step_time": 0.12075018882751465 + }, + { + "epoch": 1.07879638671875e-06, + "model_forward_time": 0.024956703186035156, + "step": 707 + }, + { + "epoch": 1.07879638671875e-06, + "step": 707, + "training_step_time": 0.11095571517944336 + }, + { + "epoch": 1.080322265625e-06, + "model_forward_time": 0.025403499603271484, + "step": 708 + }, + { + "epoch": 1.080322265625e-06, + "step": 708, + "training_step_time": 0.10695981979370117 + }, + { + "epoch": 1.08184814453125e-06, + "model_forward_time": 0.02510380744934082, + "step": 709 + }, + { + "epoch": 1.08184814453125e-06, + "step": 709, + "training_step_time": 0.10754847526550293 + }, + { + "epoch": 1.0833740234375e-06, + "grad_norm": 1.7046579122543335, + "learning_rate": 4.7333333333333336e-05, + "loss": 0.1636, + "step": 710 + }, + { + "epoch": 1.0833740234375e-06, + "model_forward_time": 0.025192737579345703, + "step": 710 + }, + { + "epoch": 1.0833740234375e-06, + "step": 710, + "training_step_time": 0.1233370304107666 + }, + { + "epoch": 1.08489990234375e-06, + "model_forward_time": 0.024378299713134766, + "step": 711 + }, + { + "epoch": 1.08489990234375e-06, + "step": 711, + "training_step_time": 0.10846662521362305 + }, + { + "epoch": 1.08642578125e-06, + "model_forward_time": 0.025356054306030273, + "step": 712 + }, + { + "epoch": 1.08642578125e-06, + "step": 712, + "training_step_time": 0.10688662528991699 + }, + { + "epoch": 1.08795166015625e-06, + "model_forward_time": 0.025412559509277344, + "step": 713 + }, + { + "epoch": 1.08795166015625e-06, + "step": 713, + "training_step_time": 0.10759639739990234 + }, + { + "epoch": 1.0894775390625e-06, + "model_forward_time": 0.02541494369506836, + "step": 714 + }, + { + "epoch": 1.0894775390625e-06, + "step": 714, + "training_step_time": 0.17981958389282227 + }, + { + "epoch": 1.09100341796875e-06, + "model_forward_time": 0.02480030059814453, + "step": 715 + }, + { + "epoch": 1.09100341796875e-06, + "step": 715, + "training_step_time": 0.10857892036437988 + }, + { + "epoch": 1.092529296875e-06, + "model_forward_time": 0.024381637573242188, + "step": 716 + }, + { + "epoch": 1.092529296875e-06, + "step": 716, + "training_step_time": 0.10985398292541504 + }, + { + "epoch": 1.09405517578125e-06, + "model_forward_time": 0.02529120445251465, + "step": 717 + }, + { + "epoch": 1.09405517578125e-06, + "step": 717, + "training_step_time": 0.10835027694702148 + }, + { + "epoch": 1.0955810546875e-06, + "model_forward_time": 0.025506973266601562, + "step": 718 + }, + { + "epoch": 1.0955810546875e-06, + "step": 718, + "training_step_time": 0.1086118221282959 + }, + { + "epoch": 1.09710693359375e-06, + "model_forward_time": 0.02535557746887207, + "step": 719 + }, + { + "epoch": 1.09710693359375e-06, + "step": 719, + "training_step_time": 0.1255626678466797 + }, + { + "epoch": 1.0986328125e-06, + "grad_norm": 1.6197246313095093, + "learning_rate": 4.8e-05, + "loss": 0.172, + "step": 720 + }, + { + "epoch": 1.0986328125e-06, + "model_forward_time": 0.024336814880371094, + "step": 720 + }, + { + "epoch": 1.0986328125e-06, + "step": 720, + "training_step_time": 0.1492152214050293 + }, + { + "epoch": 1.10015869140625e-06, + "model_forward_time": 0.02407240867614746, + "step": 721 + }, + { + "epoch": 1.10015869140625e-06, + "step": 721, + "training_step_time": 0.18077611923217773 + }, + { + "epoch": 1.1016845703125e-06, + "model_forward_time": 0.024986982345581055, + "step": 722 + }, + { + "epoch": 1.1016845703125e-06, + "step": 722, + "training_step_time": 0.12772655487060547 + }, + { + "epoch": 1.10321044921875e-06, + "model_forward_time": 0.025682449340820312, + "step": 723 + }, + { + "epoch": 1.10321044921875e-06, + "step": 723, + "training_step_time": 0.19490957260131836 + }, + { + "epoch": 1.104736328125e-06, + "model_forward_time": 0.024980783462524414, + "step": 724 + }, + { + "epoch": 1.104736328125e-06, + "step": 724, + "training_step_time": 0.21149587631225586 + }, + { + "epoch": 1.10626220703125e-06, + "model_forward_time": 0.02501392364501953, + "step": 725 + }, + { + "epoch": 1.10626220703125e-06, + "step": 725, + "training_step_time": 0.11554503440856934 + }, + { + "epoch": 1.1077880859375e-06, + "model_forward_time": 0.025061368942260742, + "step": 726 + }, + { + "epoch": 1.1077880859375e-06, + "step": 726, + "training_step_time": 0.10441207885742188 + }, + { + "epoch": 1.10931396484375e-06, + "model_forward_time": 0.025601625442504883, + "step": 727 + }, + { + "epoch": 1.10931396484375e-06, + "step": 727, + "training_step_time": 0.10971212387084961 + }, + { + "epoch": 1.11083984375e-06, + "model_forward_time": 0.02520751953125, + "step": 728 + }, + { + "epoch": 1.11083984375e-06, + "step": 728, + "training_step_time": 0.10643768310546875 + }, + { + "epoch": 1.11236572265625e-06, + "model_forward_time": 0.02508068084716797, + "step": 729 + }, + { + "epoch": 1.11236572265625e-06, + "step": 729, + "training_step_time": 0.11068010330200195 + }, + { + "epoch": 1.1138916015625e-06, + "grad_norm": 1.5712406635284424, + "learning_rate": 4.866666666666667e-05, + "loss": 0.1658, + "step": 730 + }, + { + "epoch": 1.1138916015625e-06, + "model_forward_time": 0.025307893753051758, + "step": 730 + }, + { + "epoch": 1.1138916015625e-06, + "step": 730, + "training_step_time": 0.11095976829528809 + }, + { + "epoch": 1.11541748046875e-06, + "model_forward_time": 0.02530050277709961, + "step": 731 + }, + { + "epoch": 1.11541748046875e-06, + "step": 731, + "training_step_time": 0.11368393898010254 + }, + { + "epoch": 1.116943359375e-06, + "model_forward_time": 0.025435447692871094, + "step": 732 + }, + { + "epoch": 1.116943359375e-06, + "step": 732, + "training_step_time": 0.11254763603210449 + }, + { + "epoch": 1.11846923828125e-06, + "model_forward_time": 0.025848865509033203, + "step": 733 + }, + { + "epoch": 1.11846923828125e-06, + "step": 733, + "training_step_time": 0.11185479164123535 + }, + { + "epoch": 1.1199951171875e-06, + "model_forward_time": 0.025411128997802734, + "step": 734 + }, + { + "epoch": 1.1199951171875e-06, + "step": 734, + "training_step_time": 0.10872364044189453 + }, + { + "epoch": 1.12152099609375e-06, + "model_forward_time": 0.025285005569458008, + "step": 735 + }, + { + "epoch": 1.12152099609375e-06, + "step": 735, + "training_step_time": 0.10886693000793457 + }, + { + "epoch": 1.123046875e-06, + "model_forward_time": 0.025758028030395508, + "step": 736 + }, + { + "epoch": 1.123046875e-06, + "step": 736, + "training_step_time": 0.11107420921325684 + }, + { + "epoch": 1.12457275390625e-06, + "model_forward_time": 0.02525019645690918, + "step": 737 + }, + { + "epoch": 1.12457275390625e-06, + "step": 737, + "training_step_time": 0.10840344429016113 + }, + { + "epoch": 1.1260986328125e-06, + "model_forward_time": 0.0252840518951416, + "step": 738 + }, + { + "epoch": 1.1260986328125e-06, + "step": 738, + "training_step_time": 0.11023712158203125 + }, + { + "epoch": 1.12762451171875e-06, + "model_forward_time": 0.025641918182373047, + "step": 739 + }, + { + "epoch": 1.12762451171875e-06, + "step": 739, + "training_step_time": 0.11357975006103516 + }, + { + "epoch": 1.129150390625e-06, + "grad_norm": 1.2396936416625977, + "learning_rate": 4.933333333333334e-05, + "loss": 0.1443, + "step": 740 + }, + { + "epoch": 1.129150390625e-06, + "model_forward_time": 0.02529597282409668, + "step": 740 + }, + { + "epoch": 1.129150390625e-06, + "step": 740, + "training_step_time": 0.11376833915710449 + }, + { + "epoch": 1.13067626953125e-06, + "model_forward_time": 0.025099754333496094, + "step": 741 + }, + { + "epoch": 1.13067626953125e-06, + "step": 741, + "training_step_time": 0.1101534366607666 + }, + { + "epoch": 1.1322021484375e-06, + "model_forward_time": 0.025180578231811523, + "step": 742 + }, + { + "epoch": 1.1322021484375e-06, + "step": 742, + "training_step_time": 0.11317563056945801 + }, + { + "epoch": 1.13372802734375e-06, + "model_forward_time": 0.02505660057067871, + "step": 743 + }, + { + "epoch": 1.13372802734375e-06, + "step": 743, + "training_step_time": 0.10860848426818848 + }, + { + "epoch": 1.13525390625e-06, + "model_forward_time": 0.02516913414001465, + "step": 744 + }, + { + "epoch": 1.13525390625e-06, + "step": 744, + "training_step_time": 0.12061500549316406 + }, + { + "epoch": 1.13677978515625e-06, + "model_forward_time": 0.025065183639526367, + "step": 745 + }, + { + "epoch": 1.13677978515625e-06, + "step": 745, + "training_step_time": 0.10983467102050781 + }, + { + "epoch": 1.1383056640625e-06, + "model_forward_time": 0.025249004364013672, + "step": 746 + }, + { + "epoch": 1.1383056640625e-06, + "step": 746, + "training_step_time": 0.20649242401123047 + }, + { + "epoch": 1.13983154296875e-06, + "model_forward_time": 0.025673627853393555, + "step": 747 + }, + { + "epoch": 1.13983154296875e-06, + "step": 747, + "training_step_time": 0.18613719940185547 + }, + { + "epoch": 1.141357421875e-06, + "model_forward_time": 0.02463388442993164, + "step": 748 + }, + { + "epoch": 1.141357421875e-06, + "step": 748, + "training_step_time": 0.18962574005126953 + }, + { + "epoch": 1.14288330078125e-06, + "model_forward_time": 0.024750471115112305, + "step": 749 + }, + { + "epoch": 1.14288330078125e-06, + "step": 749, + "training_step_time": 0.1727430820465088 + }, + { + "epoch": 1.1444091796875e-06, + "grad_norm": 1.2854273319244385, + "learning_rate": 5e-05, + "loss": 0.1799, + "step": 750 + }, + { + "epoch": 1.1444091796875e-06, + "model_forward_time": 0.02433037757873535, + "step": 750 + }, + { + "epoch": 1.1444091796875e-06, + "step": 750, + "training_step_time": 0.11752486228942871 + }, + { + "epoch": 1.14593505859375e-06, + "model_forward_time": 0.024219036102294922, + "step": 751 + }, + { + "epoch": 1.14593505859375e-06, + "step": 751, + "training_step_time": 0.11133933067321777 + }, + { + "epoch": 1.1474609375e-06, + "model_forward_time": 0.025148391723632812, + "step": 752 + }, + { + "epoch": 1.1474609375e-06, + "step": 752, + "training_step_time": 0.10635495185852051 + }, + { + "epoch": 1.14898681640625e-06, + "model_forward_time": 0.02523064613342285, + "step": 753 + }, + { + "epoch": 1.14898681640625e-06, + "step": 753, + "training_step_time": 0.14212560653686523 + }, + { + "epoch": 1.1505126953125e-06, + "model_forward_time": 0.02510857582092285, + "step": 754 + }, + { + "epoch": 1.1505126953125e-06, + "step": 754, + "training_step_time": 0.12310910224914551 + }, + { + "epoch": 1.15203857421875e-06, + "model_forward_time": 0.026286840438842773, + "step": 755 + }, + { + "epoch": 1.15203857421875e-06, + "step": 755, + "training_step_time": 0.15250372886657715 + }, + { + "epoch": 1.153564453125e-06, + "model_forward_time": 0.02438831329345703, + "step": 756 + }, + { + "epoch": 1.153564453125e-06, + "step": 756, + "training_step_time": 0.16698646545410156 + }, + { + "epoch": 1.15509033203125e-06, + "model_forward_time": 0.024314165115356445, + "step": 757 + }, + { + "epoch": 1.15509033203125e-06, + "step": 757, + "training_step_time": 0.17577767372131348 + }, + { + "epoch": 1.1566162109375e-06, + "model_forward_time": 0.024173974990844727, + "step": 758 + }, + { + "epoch": 1.1566162109375e-06, + "step": 758, + "training_step_time": 0.1104731559753418 + }, + { + "epoch": 1.15814208984375e-06, + "model_forward_time": 0.025225162506103516, + "step": 759 + }, + { + "epoch": 1.15814208984375e-06, + "step": 759, + "training_step_time": 0.10592007637023926 + }, + { + "epoch": 1.15966796875e-06, + "grad_norm": 1.571570634841919, + "learning_rate": 5.0666666666666674e-05, + "loss": 0.1561, + "step": 760 + }, + { + "epoch": 1.15966796875e-06, + "model_forward_time": 0.02786540985107422, + "step": 760 + }, + { + "epoch": 1.15966796875e-06, + "step": 760, + "training_step_time": 0.11235785484313965 + }, + { + "epoch": 1.16119384765625e-06, + "model_forward_time": 0.02533888816833496, + "step": 761 + }, + { + "epoch": 1.16119384765625e-06, + "step": 761, + "training_step_time": 0.10991120338439941 + }, + { + "epoch": 1.1627197265625e-06, + "model_forward_time": 0.025574922561645508, + "step": 762 + }, + { + "epoch": 1.1627197265625e-06, + "step": 762, + "training_step_time": 0.10943102836608887 + }, + { + "epoch": 1.16424560546875e-06, + "model_forward_time": 0.025574207305908203, + "step": 763 + }, + { + "epoch": 1.16424560546875e-06, + "step": 763, + "training_step_time": 0.10944700241088867 + }, + { + "epoch": 1.165771484375e-06, + "model_forward_time": 0.02519369125366211, + "step": 764 + }, + { + "epoch": 1.165771484375e-06, + "step": 764, + "training_step_time": 0.10709524154663086 + }, + { + "epoch": 1.16729736328125e-06, + "model_forward_time": 0.025970935821533203, + "step": 765 + }, + { + "epoch": 1.16729736328125e-06, + "step": 765, + "training_step_time": 0.18401122093200684 + }, + { + "epoch": 1.1688232421875e-06, + "model_forward_time": 0.02460455894470215, + "step": 766 + }, + { + "epoch": 1.1688232421875e-06, + "step": 766, + "training_step_time": 0.11684060096740723 + }, + { + "epoch": 1.17034912109375e-06, + "model_forward_time": 0.025210857391357422, + "step": 767 + }, + { + "epoch": 1.17034912109375e-06, + "step": 767, + "training_step_time": 0.11130547523498535 + }, + { + "epoch": 1.171875e-06, + "model_forward_time": 0.02577352523803711, + "step": 768 + }, + { + "epoch": 1.171875e-06, + "step": 768, + "training_step_time": 0.10989856719970703 + }, + { + "epoch": 1.17340087890625e-06, + "model_forward_time": 0.02577948570251465, + "step": 769 + }, + { + "epoch": 1.17340087890625e-06, + "step": 769, + "training_step_time": 0.19843149185180664 + }, + { + "epoch": 1.1749267578125e-06, + "grad_norm": 2.9330291748046875, + "learning_rate": 5.133333333333333e-05, + "loss": 0.2266, + "step": 770 + }, + { + "epoch": 1.1749267578125e-06, + "model_forward_time": 0.024728775024414062, + "step": 770 + }, + { + "epoch": 1.1749267578125e-06, + "step": 770, + "training_step_time": 0.11417913436889648 + }, + { + "epoch": 1.17645263671875e-06, + "model_forward_time": 0.02676701545715332, + "step": 771 + }, + { + "epoch": 1.17645263671875e-06, + "step": 771, + "training_step_time": 0.10785508155822754 + }, + { + "epoch": 1.177978515625e-06, + "model_forward_time": 0.02538323402404785, + "step": 772 + }, + { + "epoch": 1.177978515625e-06, + "step": 772, + "training_step_time": 0.10771942138671875 + }, + { + "epoch": 1.17950439453125e-06, + "model_forward_time": 0.025602340698242188, + "step": 773 + }, + { + "epoch": 1.17950439453125e-06, + "step": 773, + "training_step_time": 0.10840415954589844 + }, + { + "epoch": 1.1810302734375e-06, + "model_forward_time": 0.025500774383544922, + "step": 774 + }, + { + "epoch": 1.1810302734375e-06, + "step": 774, + "training_step_time": 0.10975027084350586 + }, + { + "epoch": 1.18255615234375e-06, + "model_forward_time": 0.025704622268676758, + "step": 775 + }, + { + "epoch": 1.18255615234375e-06, + "step": 775, + "training_step_time": 0.10821175575256348 + }, + { + "epoch": 1.18408203125e-06, + "model_forward_time": 0.025078296661376953, + "step": 776 + }, + { + "epoch": 1.18408203125e-06, + "step": 776, + "training_step_time": 0.10561895370483398 + }, + { + "epoch": 1.18560791015625e-06, + "model_forward_time": 0.0257108211517334, + "step": 777 + }, + { + "epoch": 1.18560791015625e-06, + "step": 777, + "training_step_time": 0.11000680923461914 + }, + { + "epoch": 1.1871337890625e-06, + "model_forward_time": 0.025235891342163086, + "step": 778 + }, + { + "epoch": 1.1871337890625e-06, + "step": 778, + "training_step_time": 0.10790348052978516 + }, + { + "epoch": 1.18865966796875e-06, + "model_forward_time": 0.02523207664489746, + "step": 779 + }, + { + "epoch": 1.18865966796875e-06, + "step": 779, + "training_step_time": 0.10610651969909668 + }, + { + "epoch": 1.190185546875e-06, + "grad_norm": 1.3401633501052856, + "learning_rate": 5.2000000000000004e-05, + "loss": 0.2208, + "step": 780 + }, + { + "epoch": 1.190185546875e-06, + "model_forward_time": 0.0252072811126709, + "step": 780 + }, + { + "epoch": 1.190185546875e-06, + "step": 780, + "training_step_time": 0.11088132858276367 + }, + { + "epoch": 1.19171142578125e-06, + "model_forward_time": 0.025643587112426758, + "step": 781 + }, + { + "epoch": 1.19171142578125e-06, + "step": 781, + "training_step_time": 0.10860586166381836 + }, + { + "epoch": 1.1932373046875e-06, + "model_forward_time": 0.024716615676879883, + "step": 782 + }, + { + "epoch": 1.1932373046875e-06, + "step": 782, + "training_step_time": 0.10994243621826172 + }, + { + "epoch": 1.19476318359375e-06, + "model_forward_time": 0.02512526512145996, + "step": 783 + }, + { + "epoch": 1.19476318359375e-06, + "step": 783, + "training_step_time": 0.10535097122192383 + }, + { + "epoch": 1.1962890625e-06, + "model_forward_time": 0.02538323402404785, + "step": 784 + }, + { + "epoch": 1.1962890625e-06, + "step": 784, + "training_step_time": 0.10880637168884277 + }, + { + "epoch": 1.19781494140625e-06, + "model_forward_time": 0.025083541870117188, + "step": 785 + }, + { + "epoch": 1.19781494140625e-06, + "step": 785, + "training_step_time": 0.10922622680664062 + }, + { + "epoch": 1.1993408203125e-06, + "model_forward_time": 0.024970531463623047, + "step": 786 + }, + { + "epoch": 1.1993408203125e-06, + "step": 786, + "training_step_time": 0.10834074020385742 + }, + { + "epoch": 1.20086669921875e-06, + "model_forward_time": 0.02490854263305664, + "step": 787 + }, + { + "epoch": 1.20086669921875e-06, + "step": 787, + "training_step_time": 0.10458636283874512 + }, + { + "epoch": 1.202392578125e-06, + "model_forward_time": 0.02508544921875, + "step": 788 + }, + { + "epoch": 1.202392578125e-06, + "step": 788, + "training_step_time": 0.10519790649414062 + }, + { + "epoch": 1.20391845703125e-06, + "model_forward_time": 0.024126768112182617, + "step": 789 + }, + { + "epoch": 1.20391845703125e-06, + "step": 789, + "training_step_time": 0.13147473335266113 + }, + { + "epoch": 1.2054443359375e-06, + "grad_norm": 0.9743676781654358, + "learning_rate": 5.266666666666666e-05, + "loss": 0.178, + "step": 790 + }, + { + "epoch": 1.2054443359375e-06, + "model_forward_time": 0.025664091110229492, + "step": 790 + }, + { + "epoch": 1.2054443359375e-06, + "step": 790, + "training_step_time": 0.16959929466247559 + }, + { + "epoch": 1.20697021484375e-06, + "model_forward_time": 0.024892807006835938, + "step": 791 + }, + { + "epoch": 1.20697021484375e-06, + "step": 791, + "training_step_time": 0.17504000663757324 + }, + { + "epoch": 1.20849609375e-06, + "model_forward_time": 0.02520012855529785, + "step": 792 + }, + { + "epoch": 1.20849609375e-06, + "step": 792, + "training_step_time": 0.16748404502868652 + }, + { + "epoch": 1.21002197265625e-06, + "model_forward_time": 0.02446293830871582, + "step": 793 + }, + { + "epoch": 1.21002197265625e-06, + "step": 793, + "training_step_time": 0.14552044868469238 + }, + { + "epoch": 1.2115478515625e-06, + "model_forward_time": 0.02431631088256836, + "step": 794 + }, + { + "epoch": 1.2115478515625e-06, + "step": 794, + "training_step_time": 0.15413522720336914 + }, + { + "epoch": 1.21307373046875e-06, + "model_forward_time": 0.02455592155456543, + "step": 795 + }, + { + "epoch": 1.21307373046875e-06, + "step": 795, + "training_step_time": 0.11932706832885742 + }, + { + "epoch": 1.214599609375e-06, + "model_forward_time": 0.02487492561340332, + "step": 796 + }, + { + "epoch": 1.214599609375e-06, + "step": 796, + "training_step_time": 0.10904192924499512 + }, + { + "epoch": 1.21612548828125e-06, + "model_forward_time": 0.025246620178222656, + "step": 797 + }, + { + "epoch": 1.21612548828125e-06, + "step": 797, + "training_step_time": 0.10794281959533691 + }, + { + "epoch": 1.2176513671875e-06, + "model_forward_time": 0.025601863861083984, + "step": 798 + }, + { + "epoch": 1.2176513671875e-06, + "step": 798, + "training_step_time": 0.1671278476715088 + }, + { + "epoch": 1.21917724609375e-06, + "model_forward_time": 0.02472829818725586, + "step": 799 + }, + { + "epoch": 1.21917724609375e-06, + "step": 799, + "training_step_time": 0.11611461639404297 + }, + { + "epoch": 1.220703125e-06, + "grad_norm": 1.5637664794921875, + "learning_rate": 5.333333333333333e-05, + "loss": 0.1663, + "step": 800 + }, + { + "epoch": 1.220703125e-06, + "model_forward_time": 0.023726463317871094, + "step": 800 + }, + { + "epoch": 1.220703125e-06, + "step": 800, + "training_step_time": 0.2040729522705078 + }, + { + "epoch": 1.22222900390625e-06, + "model_forward_time": 0.0251157283782959, + "step": 801 + }, + { + "epoch": 1.22222900390625e-06, + "step": 801, + "training_step_time": 0.10689282417297363 + }, + { + "epoch": 1.2237548828125e-06, + "model_forward_time": 0.024749040603637695, + "step": 802 + }, + { + "epoch": 1.2237548828125e-06, + "step": 802, + "training_step_time": 0.17762041091918945 + }, + { + "epoch": 1.22528076171875e-06, + "model_forward_time": 0.025236129760742188, + "step": 803 + }, + { + "epoch": 1.22528076171875e-06, + "step": 803, + "training_step_time": 0.11282753944396973 + }, + { + "epoch": 1.226806640625e-06, + "model_forward_time": 0.024543046951293945, + "step": 804 + }, + { + "epoch": 1.226806640625e-06, + "step": 804, + "training_step_time": 0.10494351387023926 + }, + { + "epoch": 1.22833251953125e-06, + "model_forward_time": 0.025599956512451172, + "step": 805 + }, + { + "epoch": 1.22833251953125e-06, + "step": 805, + "training_step_time": 0.1090855598449707 + }, + { + "epoch": 1.2298583984375e-06, + "model_forward_time": 0.025291919708251953, + "step": 806 + }, + { + "epoch": 1.2298583984375e-06, + "step": 806, + "training_step_time": 0.11017894744873047 + }, + { + "epoch": 1.23138427734375e-06, + "model_forward_time": 0.02561354637145996, + "step": 807 + }, + { + "epoch": 1.23138427734375e-06, + "step": 807, + "training_step_time": 0.10650086402893066 + }, + { + "epoch": 1.23291015625e-06, + "model_forward_time": 0.025461196899414062, + "step": 808 + }, + { + "epoch": 1.23291015625e-06, + "step": 808, + "training_step_time": 0.10768532752990723 + }, + { + "epoch": 1.23443603515625e-06, + "model_forward_time": 0.025017499923706055, + "step": 809 + }, + { + "epoch": 1.23443603515625e-06, + "step": 809, + "training_step_time": 0.11121582984924316 + }, + { + "epoch": 1.2359619140625e-06, + "grad_norm": 1.7824689149856567, + "learning_rate": 5.4000000000000005e-05, + "loss": 0.1815, + "step": 810 + }, + { + "epoch": 1.2359619140625e-06, + "model_forward_time": 0.026363849639892578, + "step": 810 + }, + { + "epoch": 1.2359619140625e-06, + "step": 810, + "training_step_time": 0.10753965377807617 + }, + { + "epoch": 1.23748779296875e-06, + "model_forward_time": 0.02601027488708496, + "step": 811 + }, + { + "epoch": 1.23748779296875e-06, + "step": 811, + "training_step_time": 0.20797109603881836 + }, + { + "epoch": 1.239013671875e-06, + "model_forward_time": 0.02433919906616211, + "step": 812 + }, + { + "epoch": 1.239013671875e-06, + "step": 812, + "training_step_time": 0.10780191421508789 + }, + { + "epoch": 1.24053955078125e-06, + "model_forward_time": 0.02543783187866211, + "step": 813 + }, + { + "epoch": 1.24053955078125e-06, + "step": 813, + "training_step_time": 0.1097879409790039 + }, + { + "epoch": 1.2420654296875e-06, + "model_forward_time": 0.025086402893066406, + "step": 814 + }, + { + "epoch": 1.2420654296875e-06, + "step": 814, + "training_step_time": 0.1974022388458252 + }, + { + "epoch": 1.24359130859375e-06, + "model_forward_time": 0.024779796600341797, + "step": 815 + }, + { + "epoch": 1.24359130859375e-06, + "step": 815, + "training_step_time": 0.10794305801391602 + }, + { + "epoch": 1.2451171875e-06, + "model_forward_time": 0.02465224266052246, + "step": 816 + }, + { + "epoch": 1.2451171875e-06, + "step": 816, + "training_step_time": 0.1103818416595459 + }, + { + "epoch": 1.24664306640625e-06, + "model_forward_time": 0.02538776397705078, + "step": 817 + }, + { + "epoch": 1.24664306640625e-06, + "step": 817, + "training_step_time": 0.10701918601989746 + }, + { + "epoch": 1.2481689453125e-06, + "model_forward_time": 0.02614760398864746, + "step": 818 + }, + { + "epoch": 1.2481689453125e-06, + "step": 818, + "training_step_time": 0.10727715492248535 + }, + { + "epoch": 1.24969482421875e-06, + "model_forward_time": 0.02506542205810547, + "step": 819 + }, + { + "epoch": 1.24969482421875e-06, + "step": 819, + "training_step_time": 0.11271452903747559 + }, + { + "epoch": 1.251220703125e-06, + "grad_norm": 0.9247801899909973, + "learning_rate": 5.466666666666666e-05, + "loss": 0.1907, + "step": 820 + }, + { + "epoch": 1.251220703125e-06, + "model_forward_time": 0.026698589324951172, + "step": 820 + }, + { + "epoch": 1.251220703125e-06, + "step": 820, + "training_step_time": 0.11146426200866699 + }, + { + "epoch": 1.25274658203125e-06, + "model_forward_time": 0.02523946762084961, + "step": 821 + }, + { + "epoch": 1.25274658203125e-06, + "step": 821, + "training_step_time": 0.11313247680664062 + }, + { + "epoch": 1.2542724609375e-06, + "model_forward_time": 0.02542877197265625, + "step": 822 + }, + { + "epoch": 1.2542724609375e-06, + "step": 822, + "training_step_time": 0.10615134239196777 + }, + { + "epoch": 1.25579833984375e-06, + "model_forward_time": 0.02531576156616211, + "step": 823 + }, + { + "epoch": 1.25579833984375e-06, + "step": 823, + "training_step_time": 0.10747337341308594 + }, + { + "epoch": 1.25732421875e-06, + "model_forward_time": 0.025635242462158203, + "step": 824 + }, + { + "epoch": 1.25732421875e-06, + "step": 824, + "training_step_time": 0.10699796676635742 + }, + { + "epoch": 1.25885009765625e-06, + "model_forward_time": 0.02538895606994629, + "step": 825 + }, + { + "epoch": 1.25885009765625e-06, + "step": 825, + "training_step_time": 0.1090703010559082 + }, + { + "epoch": 1.2603759765625e-06, + "model_forward_time": 0.02551889419555664, + "step": 826 + }, + { + "epoch": 1.2603759765625e-06, + "step": 826, + "training_step_time": 0.10820293426513672 + }, + { + "epoch": 1.26190185546875e-06, + "model_forward_time": 0.025539875030517578, + "step": 827 + }, + { + "epoch": 1.26190185546875e-06, + "step": 827, + "training_step_time": 0.10791134834289551 + }, + { + "epoch": 1.263427734375e-06, + "model_forward_time": 0.025384902954101562, + "step": 828 + }, + { + "epoch": 1.263427734375e-06, + "step": 828, + "training_step_time": 0.10955405235290527 + }, + { + "epoch": 1.26495361328125e-06, + "model_forward_time": 0.025703907012939453, + "step": 829 + }, + { + "epoch": 1.26495361328125e-06, + "step": 829, + "training_step_time": 0.10976171493530273 + }, + { + "epoch": 1.2664794921875e-06, + "grad_norm": 1.4237875938415527, + "learning_rate": 5.5333333333333334e-05, + "loss": 0.1956, + "step": 830 + }, + { + "epoch": 1.2664794921875e-06, + "model_forward_time": 0.025047779083251953, + "step": 830 + }, + { + "epoch": 1.2664794921875e-06, + "step": 830, + "training_step_time": 0.1073763370513916 + }, + { + "epoch": 1.26800537109375e-06, + "model_forward_time": 0.0254056453704834, + "step": 831 + }, + { + "epoch": 1.26800537109375e-06, + "step": 831, + "training_step_time": 0.11088395118713379 + }, + { + "epoch": 1.26953125e-06, + "model_forward_time": 0.025291919708251953, + "step": 832 + }, + { + "epoch": 1.26953125e-06, + "step": 832, + "training_step_time": 0.10759282112121582 + }, + { + "epoch": 1.27105712890625e-06, + "model_forward_time": 0.02551865577697754, + "step": 833 + }, + { + "epoch": 1.27105712890625e-06, + "step": 833, + "training_step_time": 0.19334745407104492 + }, + { + "epoch": 1.2725830078125e-06, + "model_forward_time": 0.02462935447692871, + "step": 834 + }, + { + "epoch": 1.2725830078125e-06, + "step": 834, + "training_step_time": 0.10671043395996094 + }, + { + "epoch": 1.27410888671875e-06, + "model_forward_time": 0.02471470832824707, + "step": 835 + }, + { + "epoch": 1.27410888671875e-06, + "step": 835, + "training_step_time": 0.21355009078979492 + }, + { + "epoch": 1.275634765625e-06, + "model_forward_time": 0.02554607391357422, + "step": 836 + }, + { + "epoch": 1.275634765625e-06, + "step": 836, + "training_step_time": 0.161391019821167 + }, + { + "epoch": 1.27716064453125e-06, + "model_forward_time": 0.024538040161132812, + "step": 837 + }, + { + "epoch": 1.27716064453125e-06, + "step": 837, + "training_step_time": 0.1728515625 + }, + { + "epoch": 1.2786865234375e-06, + "model_forward_time": 0.024572134017944336, + "step": 838 + }, + { + "epoch": 1.2786865234375e-06, + "step": 838, + "training_step_time": 0.14132213592529297 + }, + { + "epoch": 1.28021240234375e-06, + "model_forward_time": 0.025064468383789062, + "step": 839 + }, + { + "epoch": 1.28021240234375e-06, + "step": 839, + "training_step_time": 0.20406007766723633 + }, + { + "epoch": 1.28173828125e-06, + "grad_norm": 1.0427626371383667, + "learning_rate": 5.6000000000000006e-05, + "loss": 0.1682, + "step": 840 + }, + { + "epoch": 1.28173828125e-06, + "model_forward_time": 0.024442672729492188, + "step": 840 + }, + { + "epoch": 1.28173828125e-06, + "step": 840, + "training_step_time": 0.10689544677734375 + }, + { + "epoch": 1.28326416015625e-06, + "model_forward_time": 0.02499222755432129, + "step": 841 + }, + { + "epoch": 1.28326416015625e-06, + "step": 841, + "training_step_time": 0.1052548885345459 + }, + { + "epoch": 1.2847900390625e-06, + "model_forward_time": 0.025845766067504883, + "step": 842 + }, + { + "epoch": 1.2847900390625e-06, + "step": 842, + "training_step_time": 0.15422654151916504 + }, + { + "epoch": 1.28631591796875e-06, + "model_forward_time": 0.025130748748779297, + "step": 843 + }, + { + "epoch": 1.28631591796875e-06, + "step": 843, + "training_step_time": 0.11932563781738281 + }, + { + "epoch": 1.287841796875e-06, + "model_forward_time": 0.02488994598388672, + "step": 844 + }, + { + "epoch": 1.287841796875e-06, + "step": 844, + "training_step_time": 0.10901236534118652 + }, + { + "epoch": 1.28936767578125e-06, + "model_forward_time": 0.02527761459350586, + "step": 845 + }, + { + "epoch": 1.28936767578125e-06, + "step": 845, + "training_step_time": 0.1108694076538086 + }, + { + "epoch": 1.2908935546875e-06, + "model_forward_time": 0.025673389434814453, + "step": 846 + }, + { + "epoch": 1.2908935546875e-06, + "step": 846, + "training_step_time": 0.1560213565826416 + }, + { + "epoch": 1.29241943359375e-06, + "model_forward_time": 0.025255441665649414, + "step": 847 + }, + { + "epoch": 1.29241943359375e-06, + "step": 847, + "training_step_time": 0.12120938301086426 + }, + { + "epoch": 1.2939453125e-06, + "model_forward_time": 0.025014162063598633, + "step": 848 + }, + { + "epoch": 1.2939453125e-06, + "step": 848, + "training_step_time": 0.10799694061279297 + }, + { + "epoch": 1.29547119140625e-06, + "model_forward_time": 0.025567293167114258, + "step": 849 + }, + { + "epoch": 1.29547119140625e-06, + "step": 849, + "training_step_time": 0.10947394371032715 + }, + { + "epoch": 1.2969970703125e-06, + "grad_norm": 1.135888934135437, + "learning_rate": 5.666666666666667e-05, + "loss": 0.1923, + "step": 850 + }, + { + "epoch": 1.2969970703125e-06, + "model_forward_time": 0.025428056716918945, + "step": 850 + }, + { + "epoch": 1.2969970703125e-06, + "step": 850, + "training_step_time": 0.1135861873626709 + }, + { + "epoch": 1.29852294921875e-06, + "model_forward_time": 0.02511882781982422, + "step": 851 + }, + { + "epoch": 1.29852294921875e-06, + "step": 851, + "training_step_time": 0.10964608192443848 + }, + { + "epoch": 1.300048828125e-06, + "model_forward_time": 0.025823116302490234, + "step": 852 + }, + { + "epoch": 1.300048828125e-06, + "step": 852, + "training_step_time": 0.10938477516174316 + }, + { + "epoch": 1.30157470703125e-06, + "model_forward_time": 0.025175809860229492, + "step": 853 + }, + { + "epoch": 1.30157470703125e-06, + "step": 853, + "training_step_time": 0.10630249977111816 + }, + { + "epoch": 1.3031005859375e-06, + "model_forward_time": 0.025522708892822266, + "step": 854 + }, + { + "epoch": 1.3031005859375e-06, + "step": 854, + "training_step_time": 0.1079401969909668 + }, + { + "epoch": 1.30462646484375e-06, + "model_forward_time": 0.025684595108032227, + "step": 855 + }, + { + "epoch": 1.30462646484375e-06, + "step": 855, + "training_step_time": 0.10730624198913574 + }, + { + "epoch": 1.30615234375e-06, + "model_forward_time": 0.0256350040435791, + "step": 856 + }, + { + "epoch": 1.30615234375e-06, + "step": 856, + "training_step_time": 0.20493817329406738 + }, + { + "epoch": 1.30767822265625e-06, + "model_forward_time": 0.025014638900756836, + "step": 857 + }, + { + "epoch": 1.30767822265625e-06, + "step": 857, + "training_step_time": 0.11029052734375 + }, + { + "epoch": 1.3092041015625e-06, + "model_forward_time": 0.025072813034057617, + "step": 858 + }, + { + "epoch": 1.3092041015625e-06, + "step": 858, + "training_step_time": 0.10683917999267578 + }, + { + "epoch": 1.31072998046875e-06, + "model_forward_time": 0.025517940521240234, + "step": 859 + }, + { + "epoch": 1.31072998046875e-06, + "step": 859, + "training_step_time": 0.20279932022094727 + }, + { + "epoch": 1.312255859375e-06, + "grad_norm": 1.374436616897583, + "learning_rate": 5.7333333333333336e-05, + "loss": 0.1921, + "step": 860 + }, + { + "epoch": 1.312255859375e-06, + "model_forward_time": 0.026016712188720703, + "step": 860 + }, + { + "epoch": 1.312255859375e-06, + "step": 860, + "training_step_time": 0.10799336433410645 + }, + { + "epoch": 1.31378173828125e-06, + "model_forward_time": 0.024817466735839844, + "step": 861 + }, + { + "epoch": 1.31378173828125e-06, + "step": 861, + "training_step_time": 0.10469913482666016 + }, + { + "epoch": 1.3153076171875e-06, + "model_forward_time": 0.02522444725036621, + "step": 862 + }, + { + "epoch": 1.3153076171875e-06, + "step": 862, + "training_step_time": 0.10624504089355469 + }, + { + "epoch": 1.31683349609375e-06, + "model_forward_time": 0.0252835750579834, + "step": 863 + }, + { + "epoch": 1.31683349609375e-06, + "step": 863, + "training_step_time": 0.10539531707763672 + }, + { + "epoch": 1.318359375e-06, + "model_forward_time": 0.025434494018554688, + "step": 864 + }, + { + "epoch": 1.318359375e-06, + "step": 864, + "training_step_time": 0.10926222801208496 + }, + { + "epoch": 1.31988525390625e-06, + "model_forward_time": 0.025406837463378906, + "step": 865 + }, + { + "epoch": 1.31988525390625e-06, + "step": 865, + "training_step_time": 0.10640454292297363 + }, + { + "epoch": 1.3214111328125e-06, + "model_forward_time": 0.025209903717041016, + "step": 866 + }, + { + "epoch": 1.3214111328125e-06, + "step": 866, + "training_step_time": 0.10465574264526367 + }, + { + "epoch": 1.32293701171875e-06, + "model_forward_time": 0.025572776794433594, + "step": 867 + }, + { + "epoch": 1.32293701171875e-06, + "step": 867, + "training_step_time": 0.10882973670959473 + }, + { + "epoch": 1.324462890625e-06, + "model_forward_time": 0.025415420532226562, + "step": 868 + }, + { + "epoch": 1.324462890625e-06, + "step": 868, + "training_step_time": 0.10596466064453125 + }, + { + "epoch": 1.32598876953125e-06, + "model_forward_time": 0.02488541603088379, + "step": 869 + }, + { + "epoch": 1.32598876953125e-06, + "step": 869, + "training_step_time": 0.10890769958496094 + }, + { + "epoch": 1.3275146484375e-06, + "grad_norm": 2.062347650527954, + "learning_rate": 5.8e-05, + "loss": 0.2042, + "step": 870 + }, + { + "epoch": 1.3275146484375e-06, + "model_forward_time": 0.02486252784729004, + "step": 870 + }, + { + "epoch": 1.3275146484375e-06, + "step": 870, + "training_step_time": 0.1050269603729248 + }, + { + "epoch": 1.32904052734375e-06, + "model_forward_time": 0.02487969398498535, + "step": 871 + }, + { + "epoch": 1.32904052734375e-06, + "step": 871, + "training_step_time": 0.10508346557617188 + }, + { + "epoch": 1.33056640625e-06, + "model_forward_time": 0.025488615036010742, + "step": 872 + }, + { + "epoch": 1.33056640625e-06, + "step": 872, + "training_step_time": 0.1053617000579834 + }, + { + "epoch": 1.33209228515625e-06, + "model_forward_time": 0.028691768646240234, + "step": 873 + }, + { + "epoch": 1.33209228515625e-06, + "step": 873, + "training_step_time": 0.10998988151550293 + }, + { + "epoch": 1.3336181640625e-06, + "model_forward_time": 0.026524066925048828, + "step": 874 + }, + { + "epoch": 1.3336181640625e-06, + "step": 874, + "training_step_time": 0.1125645637512207 + }, + { + "epoch": 1.33514404296875e-06, + "model_forward_time": 0.025743961334228516, + "step": 875 + }, + { + "epoch": 1.33514404296875e-06, + "step": 875, + "training_step_time": 0.10617494583129883 + }, + { + "epoch": 1.336669921875e-06, + "model_forward_time": 0.025171279907226562, + "step": 876 + }, + { + "epoch": 1.336669921875e-06, + "step": 876, + "training_step_time": 0.11585307121276855 + }, + { + "epoch": 1.33819580078125e-06, + "model_forward_time": 0.02558159828186035, + "step": 877 + }, + { + "epoch": 1.33819580078125e-06, + "step": 877, + "training_step_time": 0.10683536529541016 + }, + { + "epoch": 1.3397216796875e-06, + "model_forward_time": 0.02523636817932129, + "step": 878 + }, + { + "epoch": 1.3397216796875e-06, + "step": 878, + "training_step_time": 0.19900131225585938 + }, + { + "epoch": 1.34124755859375e-06, + "model_forward_time": 0.02442336082458496, + "step": 879 + }, + { + "epoch": 1.34124755859375e-06, + "step": 879, + "training_step_time": 0.10666322708129883 + }, + { + "epoch": 1.3427734375e-06, + "grad_norm": 1.5996571779251099, + "learning_rate": 5.866666666666667e-05, + "loss": 0.1628, + "step": 880 + }, + { + "epoch": 1.3427734375e-06, + "model_forward_time": 0.024828195571899414, + "step": 880 + }, + { + "epoch": 1.3427734375e-06, + "step": 880, + "training_step_time": 0.19969630241394043 + }, + { + "epoch": 1.34429931640625e-06, + "model_forward_time": 0.02455878257751465, + "step": 881 + }, + { + "epoch": 1.34429931640625e-06, + "step": 881, + "training_step_time": 0.11535906791687012 + }, + { + "epoch": 1.3458251953125e-06, + "model_forward_time": 0.025022268295288086, + "step": 882 + }, + { + "epoch": 1.3458251953125e-06, + "step": 882, + "training_step_time": 0.21144580841064453 + }, + { + "epoch": 1.34735107421875e-06, + "model_forward_time": 0.02456045150756836, + "step": 883 + }, + { + "epoch": 1.34735107421875e-06, + "step": 883, + "training_step_time": 0.15330243110656738 + }, + { + "epoch": 1.348876953125e-06, + "model_forward_time": 0.025060415267944336, + "step": 884 + }, + { + "epoch": 1.348876953125e-06, + "step": 884, + "training_step_time": 0.20932865142822266 + }, + { + "epoch": 1.35040283203125e-06, + "model_forward_time": 0.024442195892333984, + "step": 885 + }, + { + "epoch": 1.35040283203125e-06, + "step": 885, + "training_step_time": 0.10668277740478516 + }, + { + "epoch": 1.3519287109375e-06, + "model_forward_time": 0.0246737003326416, + "step": 886 + }, + { + "epoch": 1.3519287109375e-06, + "step": 886, + "training_step_time": 0.10990262031555176 + }, + { + "epoch": 1.35345458984375e-06, + "model_forward_time": 0.025329113006591797, + "step": 887 + }, + { + "epoch": 1.35345458984375e-06, + "step": 887, + "training_step_time": 0.12615203857421875 + }, + { + "epoch": 1.35498046875e-06, + "model_forward_time": 0.028086423873901367, + "step": 888 + }, + { + "epoch": 1.35498046875e-06, + "step": 888, + "training_step_time": 0.11855888366699219 + }, + { + "epoch": 1.35650634765625e-06, + "model_forward_time": 0.025999069213867188, + "step": 889 + }, + { + "epoch": 1.35650634765625e-06, + "step": 889, + "training_step_time": 0.11162257194519043 + }, + { + "epoch": 1.3580322265625e-06, + "grad_norm": 1.142017126083374, + "learning_rate": 5.9333333333333343e-05, + "loss": 0.1634, + "step": 890 + }, + { + "epoch": 1.3580322265625e-06, + "model_forward_time": 0.02574610710144043, + "step": 890 + }, + { + "epoch": 1.3580322265625e-06, + "step": 890, + "training_step_time": 0.19820380210876465 + }, + { + "epoch": 1.35955810546875e-06, + "model_forward_time": 0.0249178409576416, + "step": 891 + }, + { + "epoch": 1.35955810546875e-06, + "step": 891, + "training_step_time": 0.17315220832824707 + }, + { + "epoch": 1.361083984375e-06, + "model_forward_time": 0.024877071380615234, + "step": 892 + }, + { + "epoch": 1.361083984375e-06, + "step": 892, + "training_step_time": 0.1156160831451416 + }, + { + "epoch": 1.36260986328125e-06, + "model_forward_time": 0.025287866592407227, + "step": 893 + }, + { + "epoch": 1.36260986328125e-06, + "step": 893, + "training_step_time": 0.1058206558227539 + }, + { + "epoch": 1.3641357421875e-06, + "model_forward_time": 0.025472164154052734, + "step": 894 + }, + { + "epoch": 1.3641357421875e-06, + "step": 894, + "training_step_time": 0.10944414138793945 + }, + { + "epoch": 1.36566162109375e-06, + "model_forward_time": 0.025201797485351562, + "step": 895 + }, + { + "epoch": 1.36566162109375e-06, + "step": 895, + "training_step_time": 0.10701942443847656 + }, + { + "epoch": 1.3671875e-06, + "model_forward_time": 0.025209665298461914, + "step": 896 + }, + { + "epoch": 1.3671875e-06, + "step": 896, + "training_step_time": 0.10886192321777344 + }, + { + "epoch": 1.36871337890625e-06, + "model_forward_time": 0.025374650955200195, + "step": 897 + }, + { + "epoch": 1.36871337890625e-06, + "step": 897, + "training_step_time": 0.10753178596496582 + }, + { + "epoch": 1.3702392578125e-06, + "model_forward_time": 0.025010347366333008, + "step": 898 + }, + { + "epoch": 1.3702392578125e-06, + "step": 898, + "training_step_time": 0.10671615600585938 + }, + { + "epoch": 1.37176513671875e-06, + "model_forward_time": 0.025665283203125, + "step": 899 + }, + { + "epoch": 1.37176513671875e-06, + "step": 899, + "training_step_time": 0.10579776763916016 + }, + { + "epoch": 1.373291015625e-06, + "grad_norm": 1.4356186389923096, + "learning_rate": 6e-05, + "loss": 0.1725, + "step": 900 + }, + { + "epoch": 1.373291015625e-06, + "model_forward_time": 0.025333642959594727, + "step": 900 + }, + { + "epoch": 1.373291015625e-06, + "step": 900, + "training_step_time": 0.20978879928588867 + }, + { + "epoch": 1.37481689453125e-06, + "model_forward_time": 0.025175809860229492, + "step": 901 + }, + { + "epoch": 1.37481689453125e-06, + "step": 901, + "training_step_time": 0.1122903823852539 + }, + { + "epoch": 1.3763427734375e-06, + "model_forward_time": 0.02466297149658203, + "step": 902 + }, + { + "epoch": 1.3763427734375e-06, + "step": 902, + "training_step_time": 0.1049649715423584 + }, + { + "epoch": 1.37786865234375e-06, + "model_forward_time": 0.025486230850219727, + "step": 903 + }, + { + "epoch": 1.37786865234375e-06, + "step": 903, + "training_step_time": 0.20108819007873535 + }, + { + "epoch": 1.37939453125e-06, + "model_forward_time": 0.024318456649780273, + "step": 904 + }, + { + "epoch": 1.37939453125e-06, + "step": 904, + "training_step_time": 0.10948443412780762 + }, + { + "epoch": 1.38092041015625e-06, + "model_forward_time": 0.024786710739135742, + "step": 905 + }, + { + "epoch": 1.38092041015625e-06, + "step": 905, + "training_step_time": 0.10420083999633789 + }, + { + "epoch": 1.3824462890625e-06, + "model_forward_time": 0.025238752365112305, + "step": 906 + }, + { + "epoch": 1.3824462890625e-06, + "step": 906, + "training_step_time": 0.10785579681396484 + }, + { + "epoch": 1.38397216796875e-06, + "model_forward_time": 0.0261232852935791, + "step": 907 + }, + { + "epoch": 1.38397216796875e-06, + "step": 907, + "training_step_time": 0.10701894760131836 + }, + { + "epoch": 1.385498046875e-06, + "model_forward_time": 0.025119304656982422, + "step": 908 + }, + { + "epoch": 1.385498046875e-06, + "step": 908, + "training_step_time": 0.10891938209533691 + }, + { + "epoch": 1.38702392578125e-06, + "model_forward_time": 0.02426433563232422, + "step": 909 + }, + { + "epoch": 1.38702392578125e-06, + "step": 909, + "training_step_time": 0.11050534248352051 + }, + { + "epoch": 1.3885498046875e-06, + "grad_norm": 1.1422182321548462, + "learning_rate": 6.066666666666667e-05, + "loss": 0.1749, + "step": 910 + }, + { + "epoch": 1.3885498046875e-06, + "model_forward_time": 0.02512979507446289, + "step": 910 + }, + { + "epoch": 1.3885498046875e-06, + "step": 910, + "training_step_time": 0.10527372360229492 + }, + { + "epoch": 1.39007568359375e-06, + "model_forward_time": 0.0250089168548584, + "step": 911 + }, + { + "epoch": 1.39007568359375e-06, + "step": 911, + "training_step_time": 0.11338305473327637 + }, + { + "epoch": 1.3916015625e-06, + "model_forward_time": 0.024873971939086914, + "step": 912 + }, + { + "epoch": 1.3916015625e-06, + "step": 912, + "training_step_time": 0.11277055740356445 + }, + { + "epoch": 1.39312744140625e-06, + "model_forward_time": 0.025377511978149414, + "step": 913 + }, + { + "epoch": 1.39312744140625e-06, + "step": 913, + "training_step_time": 0.10681915283203125 + }, + { + "epoch": 1.3946533203125e-06, + "model_forward_time": 0.025179147720336914, + "step": 914 + }, + { + "epoch": 1.3946533203125e-06, + "step": 914, + "training_step_time": 0.1075286865234375 + }, + { + "epoch": 1.39617919921875e-06, + "model_forward_time": 0.025724172592163086, + "step": 915 + }, + { + "epoch": 1.39617919921875e-06, + "step": 915, + "training_step_time": 0.10797691345214844 + }, + { + "epoch": 1.397705078125e-06, + "model_forward_time": 0.02528071403503418, + "step": 916 + }, + { + "epoch": 1.397705078125e-06, + "step": 916, + "training_step_time": 0.10988545417785645 + }, + { + "epoch": 1.39923095703125e-06, + "model_forward_time": 0.025521039962768555, + "step": 917 + }, + { + "epoch": 1.39923095703125e-06, + "step": 917, + "training_step_time": 0.1135554313659668 + }, + { + "epoch": 1.4007568359375e-06, + "model_forward_time": 0.025025367736816406, + "step": 918 + }, + { + "epoch": 1.4007568359375e-06, + "step": 918, + "training_step_time": 0.11817789077758789 + }, + { + "epoch": 1.40228271484375e-06, + "model_forward_time": 0.02422356605529785, + "step": 919 + }, + { + "epoch": 1.40228271484375e-06, + "step": 919, + "training_step_time": 0.12440180778503418 + }, + { + "epoch": 1.40380859375e-06, + "grad_norm": 1.2643885612487793, + "learning_rate": 6.133333333333334e-05, + "loss": 0.154, + "step": 920 + }, + { + "epoch": 1.40380859375e-06, + "model_forward_time": 0.02406764030456543, + "step": 920 + }, + { + "epoch": 1.40380859375e-06, + "step": 920, + "training_step_time": 0.12764978408813477 + }, + { + "epoch": 1.40533447265625e-06, + "model_forward_time": 0.02401137351989746, + "step": 921 + }, + { + "epoch": 1.40533447265625e-06, + "step": 921, + "training_step_time": 0.11670327186584473 + }, + { + "epoch": 1.4068603515625e-06, + "model_forward_time": 0.02414560317993164, + "step": 922 + }, + { + "epoch": 1.4068603515625e-06, + "step": 922, + "training_step_time": 0.1206510066986084 + }, + { + "epoch": 1.40838623046875e-06, + "model_forward_time": 0.02466607093811035, + "step": 923 + }, + { + "epoch": 1.40838623046875e-06, + "step": 923, + "training_step_time": 0.11596298217773438 + }, + { + "epoch": 1.409912109375e-06, + "model_forward_time": 0.025465965270996094, + "step": 924 + }, + { + "epoch": 1.409912109375e-06, + "step": 924, + "training_step_time": 0.19401812553405762 + }, + { + "epoch": 1.41143798828125e-06, + "model_forward_time": 0.02424764633178711, + "step": 925 + }, + { + "epoch": 1.41143798828125e-06, + "step": 925, + "training_step_time": 0.18452143669128418 + }, + { + "epoch": 1.4129638671875e-06, + "model_forward_time": 0.024307727813720703, + "step": 926 + }, + { + "epoch": 1.4129638671875e-06, + "step": 926, + "training_step_time": 0.1534273624420166 + }, + { + "epoch": 1.41448974609375e-06, + "model_forward_time": 0.024196386337280273, + "step": 927 + }, + { + "epoch": 1.41448974609375e-06, + "step": 927, + "training_step_time": 0.12951970100402832 + }, + { + "epoch": 1.416015625e-06, + "model_forward_time": 0.02430891990661621, + "step": 928 + }, + { + "epoch": 1.416015625e-06, + "step": 928, + "training_step_time": 0.2087705135345459 + }, + { + "epoch": 1.41754150390625e-06, + "model_forward_time": 0.024546146392822266, + "step": 929 + }, + { + "epoch": 1.41754150390625e-06, + "step": 929, + "training_step_time": 0.12098336219787598 + }, + { + "epoch": 1.4190673828125e-06, + "grad_norm": 0.938713788986206, + "learning_rate": 6.2e-05, + "loss": 0.1516, + "step": 930 + }, + { + "epoch": 1.4190673828125e-06, + "model_forward_time": 0.024710416793823242, + "step": 930 + }, + { + "epoch": 1.4190673828125e-06, + "step": 930, + "training_step_time": 0.10630536079406738 + }, + { + "epoch": 1.42059326171875e-06, + "model_forward_time": 0.025505542755126953, + "step": 931 + }, + { + "epoch": 1.42059326171875e-06, + "step": 931, + "training_step_time": 0.10724806785583496 + }, + { + "epoch": 1.422119140625e-06, + "model_forward_time": 0.025040864944458008, + "step": 932 + }, + { + "epoch": 1.422119140625e-06, + "step": 932, + "training_step_time": 0.10668301582336426 + }, + { + "epoch": 1.42364501953125e-06, + "model_forward_time": 0.025459766387939453, + "step": 933 + }, + { + "epoch": 1.42364501953125e-06, + "step": 933, + "training_step_time": 0.16424107551574707 + }, + { + "epoch": 1.4251708984375e-06, + "model_forward_time": 0.024644851684570312, + "step": 934 + }, + { + "epoch": 1.4251708984375e-06, + "step": 934, + "training_step_time": 0.15659189224243164 + }, + { + "epoch": 1.42669677734375e-06, + "model_forward_time": 0.024046659469604492, + "step": 935 + }, + { + "epoch": 1.42669677734375e-06, + "step": 935, + "training_step_time": 0.10995745658874512 + }, + { + "epoch": 1.42822265625e-06, + "model_forward_time": 0.024933576583862305, + "step": 936 + }, + { + "epoch": 1.42822265625e-06, + "step": 936, + "training_step_time": 0.10501241683959961 + }, + { + "epoch": 1.42974853515625e-06, + "model_forward_time": 0.024575233459472656, + "step": 937 + }, + { + "epoch": 1.42974853515625e-06, + "step": 937, + "training_step_time": 0.17602777481079102 + }, + { + "epoch": 1.4312744140625e-06, + "model_forward_time": 0.02487921714782715, + "step": 938 + }, + { + "epoch": 1.4312744140625e-06, + "step": 938, + "training_step_time": 0.10854315757751465 + }, + { + "epoch": 1.43280029296875e-06, + "model_forward_time": 0.02436995506286621, + "step": 939 + }, + { + "epoch": 1.43280029296875e-06, + "step": 939, + "training_step_time": 0.10376501083374023 + }, + { + "epoch": 1.434326171875e-06, + "grad_norm": 1.2495813369750977, + "learning_rate": 6.266666666666667e-05, + "loss": 0.1659, + "step": 940 + }, + { + "epoch": 1.434326171875e-06, + "model_forward_time": 0.025046825408935547, + "step": 940 + }, + { + "epoch": 1.434326171875e-06, + "step": 940, + "training_step_time": 0.1043097972869873 + }, + { + "epoch": 1.43585205078125e-06, + "model_forward_time": 0.02518749237060547, + "step": 941 + }, + { + "epoch": 1.43585205078125e-06, + "step": 941, + "training_step_time": 0.10824966430664062 + }, + { + "epoch": 1.4373779296875e-06, + "model_forward_time": 0.025079727172851562, + "step": 942 + }, + { + "epoch": 1.4373779296875e-06, + "step": 942, + "training_step_time": 0.1077127456665039 + }, + { + "epoch": 1.43890380859375e-06, + "model_forward_time": 0.025205373764038086, + "step": 943 + }, + { + "epoch": 1.43890380859375e-06, + "step": 943, + "training_step_time": 0.11058306694030762 + }, + { + "epoch": 1.4404296875e-06, + "model_forward_time": 0.026085615158081055, + "step": 944 + }, + { + "epoch": 1.4404296875e-06, + "step": 944, + "training_step_time": 0.11014008522033691 + }, + { + "epoch": 1.44195556640625e-06, + "model_forward_time": 0.025360584259033203, + "step": 945 + }, + { + "epoch": 1.44195556640625e-06, + "step": 945, + "training_step_time": 0.19955921173095703 + }, + { + "epoch": 1.4434814453125e-06, + "model_forward_time": 0.024649620056152344, + "step": 946 + }, + { + "epoch": 1.4434814453125e-06, + "step": 946, + "training_step_time": 0.10422062873840332 + }, + { + "epoch": 1.44500732421875e-06, + "model_forward_time": 0.024888038635253906, + "step": 947 + }, + { + "epoch": 1.44500732421875e-06, + "step": 947, + "training_step_time": 0.10775303840637207 + }, + { + "epoch": 1.446533203125e-06, + "model_forward_time": 0.025457382202148438, + "step": 948 + }, + { + "epoch": 1.446533203125e-06, + "step": 948, + "training_step_time": 0.20613670349121094 + }, + { + "epoch": 1.44805908203125e-06, + "model_forward_time": 0.024790525436401367, + "step": 949 + }, + { + "epoch": 1.44805908203125e-06, + "step": 949, + "training_step_time": 0.10842728614807129 + }, + { + "epoch": 1.4495849609375e-06, + "grad_norm": 1.6541813611984253, + "learning_rate": 6.333333333333333e-05, + "loss": 0.1667, + "step": 950 + }, + { + "epoch": 1.4495849609375e-06, + "model_forward_time": 0.02460169792175293, + "step": 950 + }, + { + "epoch": 1.4495849609375e-06, + "step": 950, + "training_step_time": 0.10522675514221191 + }, + { + "epoch": 1.45111083984375e-06, + "model_forward_time": 0.02578425407409668, + "step": 951 + }, + { + "epoch": 1.45111083984375e-06, + "step": 951, + "training_step_time": 0.10966944694519043 + }, + { + "epoch": 1.45263671875e-06, + "model_forward_time": 0.02548074722290039, + "step": 952 + }, + { + "epoch": 1.45263671875e-06, + "step": 952, + "training_step_time": 0.11618733406066895 + }, + { + "epoch": 1.45416259765625e-06, + "model_forward_time": 0.024392127990722656, + "step": 953 + }, + { + "epoch": 1.45416259765625e-06, + "step": 953, + "training_step_time": 0.11213970184326172 + }, + { + "epoch": 1.4556884765625e-06, + "model_forward_time": 0.0252225399017334, + "step": 954 + }, + { + "epoch": 1.4556884765625e-06, + "step": 954, + "training_step_time": 0.1082155704498291 + }, + { + "epoch": 1.45721435546875e-06, + "model_forward_time": 0.025261402130126953, + "step": 955 + }, + { + "epoch": 1.45721435546875e-06, + "step": 955, + "training_step_time": 0.10611438751220703 + }, + { + "epoch": 1.458740234375e-06, + "model_forward_time": 0.025281429290771484, + "step": 956 + }, + { + "epoch": 1.458740234375e-06, + "step": 956, + "training_step_time": 0.1083526611328125 + }, + { + "epoch": 1.46026611328125e-06, + "model_forward_time": 0.025420188903808594, + "step": 957 + }, + { + "epoch": 1.46026611328125e-06, + "step": 957, + "training_step_time": 0.10825753211975098 + }, + { + "epoch": 1.4617919921875e-06, + "model_forward_time": 0.025020599365234375, + "step": 958 + }, + { + "epoch": 1.4617919921875e-06, + "step": 958, + "training_step_time": 0.10666775703430176 + }, + { + "epoch": 1.46331787109375e-06, + "model_forward_time": 0.025125980377197266, + "step": 959 + }, + { + "epoch": 1.46331787109375e-06, + "step": 959, + "training_step_time": 0.18561768531799316 + }, + { + "epoch": 1.46484375e-06, + "grad_norm": 1.3631620407104492, + "learning_rate": 6.400000000000001e-05, + "loss": 0.1641, + "step": 960 + }, + { + "epoch": 1.46484375e-06, + "model_forward_time": 0.027225971221923828, + "step": 960 + }, + { + "epoch": 1.46484375e-06, + "step": 960, + "training_step_time": 0.20401239395141602 + }, + { + "epoch": 1.46636962890625e-06, + "model_forward_time": 0.02450084686279297, + "step": 961 + }, + { + "epoch": 1.46636962890625e-06, + "step": 961, + "training_step_time": 0.19464325904846191 + }, + { + "epoch": 1.4678955078125e-06, + "model_forward_time": 0.02423882484436035, + "step": 962 + }, + { + "epoch": 1.4678955078125e-06, + "step": 962, + "training_step_time": 0.18244552612304688 + }, + { + "epoch": 1.46942138671875e-06, + "model_forward_time": 0.024138927459716797, + "step": 963 + }, + { + "epoch": 1.46942138671875e-06, + "step": 963, + "training_step_time": 0.17023897171020508 + }, + { + "epoch": 1.470947265625e-06, + "model_forward_time": 0.024628400802612305, + "step": 964 + }, + { + "epoch": 1.470947265625e-06, + "step": 964, + "training_step_time": 0.11276507377624512 + }, + { + "epoch": 1.47247314453125e-06, + "model_forward_time": 0.024641036987304688, + "step": 965 + }, + { + "epoch": 1.47247314453125e-06, + "step": 965, + "training_step_time": 0.10498809814453125 + }, + { + "epoch": 1.4739990234375e-06, + "model_forward_time": 0.025558948516845703, + "step": 966 + }, + { + "epoch": 1.4739990234375e-06, + "step": 966, + "training_step_time": 0.20514297485351562 + }, + { + "epoch": 1.47552490234375e-06, + "model_forward_time": 0.024341106414794922, + "step": 967 + }, + { + "epoch": 1.47552490234375e-06, + "step": 967, + "training_step_time": 0.12818026542663574 + }, + { + "epoch": 1.47705078125e-06, + "model_forward_time": 0.02411651611328125, + "step": 968 + }, + { + "epoch": 1.47705078125e-06, + "step": 968, + "training_step_time": 0.10740494728088379 + }, + { + "epoch": 1.47857666015625e-06, + "model_forward_time": 0.025133371353149414, + "step": 969 + }, + { + "epoch": 1.47857666015625e-06, + "step": 969, + "training_step_time": 0.11165308952331543 + }, + { + "epoch": 1.4801025390625e-06, + "grad_norm": 1.1530307531356812, + "learning_rate": 6.466666666666666e-05, + "loss": 0.1916, + "step": 970 + }, + { + "epoch": 1.4801025390625e-06, + "model_forward_time": 0.02537703514099121, + "step": 970 + }, + { + "epoch": 1.4801025390625e-06, + "step": 970, + "training_step_time": 0.16168856620788574 + }, + { + "epoch": 1.48162841796875e-06, + "model_forward_time": 0.024684667587280273, + "step": 971 + }, + { + "epoch": 1.48162841796875e-06, + "step": 971, + "training_step_time": 0.210524320602417 + }, + { + "epoch": 1.483154296875e-06, + "model_forward_time": 0.02805352210998535, + "step": 972 + }, + { + "epoch": 1.483154296875e-06, + "step": 972, + "training_step_time": 0.12474703788757324 + }, + { + "epoch": 1.48468017578125e-06, + "model_forward_time": 0.024551868438720703, + "step": 973 + }, + { + "epoch": 1.48468017578125e-06, + "step": 973, + "training_step_time": 0.11638402938842773 + }, + { + "epoch": 1.4862060546875e-06, + "model_forward_time": 0.024555206298828125, + "step": 974 + }, + { + "epoch": 1.4862060546875e-06, + "step": 974, + "training_step_time": 0.11339402198791504 + }, + { + "epoch": 1.48773193359375e-06, + "model_forward_time": 0.024006366729736328, + "step": 975 + }, + { + "epoch": 1.48773193359375e-06, + "step": 975, + "training_step_time": 0.11186575889587402 + }, + { + "epoch": 1.4892578125e-06, + "model_forward_time": 0.025236129760742188, + "step": 976 + }, + { + "epoch": 1.4892578125e-06, + "step": 976, + "training_step_time": 0.12916326522827148 + }, + { + "epoch": 1.49078369140625e-06, + "model_forward_time": 0.024791717529296875, + "step": 977 + }, + { + "epoch": 1.49078369140625e-06, + "step": 977, + "training_step_time": 0.15468549728393555 + }, + { + "epoch": 1.4923095703125e-06, + "model_forward_time": 0.024564743041992188, + "step": 978 + }, + { + "epoch": 1.4923095703125e-06, + "step": 978, + "training_step_time": 0.1587238311767578 + }, + { + "epoch": 1.49383544921875e-06, + "model_forward_time": 0.024112462997436523, + "step": 979 + }, + { + "epoch": 1.49383544921875e-06, + "step": 979, + "training_step_time": 0.1819000244140625 + }, + { + "epoch": 1.495361328125e-06, + "grad_norm": 0.9992648363113403, + "learning_rate": 6.533333333333334e-05, + "loss": 0.1338, + "step": 980 + }, + { + "epoch": 1.495361328125e-06, + "model_forward_time": 0.024796724319458008, + "step": 980 + }, + { + "epoch": 1.495361328125e-06, + "step": 980, + "training_step_time": 0.10483264923095703 + }, + { + "epoch": 1.49688720703125e-06, + "model_forward_time": 0.024901866912841797, + "step": 981 + }, + { + "epoch": 1.49688720703125e-06, + "step": 981, + "training_step_time": 0.1039280891418457 + }, + { + "epoch": 1.4984130859375e-06, + "model_forward_time": 0.025199413299560547, + "step": 982 + }, + { + "epoch": 1.4984130859375e-06, + "step": 982, + "training_step_time": 0.10642552375793457 + }, + { + "epoch": 1.49993896484375e-06, + "model_forward_time": 0.025878429412841797, + "step": 983 + }, + { + "epoch": 1.49993896484375e-06, + "step": 983, + "training_step_time": 0.10968661308288574 + }, + { + "epoch": 1.50146484375e-06, + "model_forward_time": 0.025981426239013672, + "step": 984 + }, + { + "epoch": 1.50146484375e-06, + "step": 984, + "training_step_time": 0.10543394088745117 + }, + { + "epoch": 1.50299072265625e-06, + "model_forward_time": 0.025553226470947266, + "step": 985 + }, + { + "epoch": 1.50299072265625e-06, + "step": 985, + "training_step_time": 0.10822367668151855 + }, + { + "epoch": 1.5045166015625e-06, + "model_forward_time": 0.025400638580322266, + "step": 986 + }, + { + "epoch": 1.5045166015625e-06, + "step": 986, + "training_step_time": 0.1064004898071289 + }, + { + "epoch": 1.50604248046875e-06, + "model_forward_time": 0.024796485900878906, + "step": 987 + }, + { + "epoch": 1.50604248046875e-06, + "step": 987, + "training_step_time": 0.16751623153686523 + }, + { + "epoch": 1.507568359375e-06, + "model_forward_time": 0.024163246154785156, + "step": 988 + }, + { + "epoch": 1.507568359375e-06, + "step": 988, + "training_step_time": 0.15427350997924805 + }, + { + "epoch": 1.50909423828125e-06, + "model_forward_time": 0.02545905113220215, + "step": 989 + }, + { + "epoch": 1.50909423828125e-06, + "step": 989, + "training_step_time": 0.11192631721496582 + }, + { + "epoch": 1.5106201171875e-06, + "grad_norm": 1.400564193725586, + "learning_rate": 6.6e-05, + "loss": 0.1444, + "step": 990 + }, + { + "epoch": 1.5106201171875e-06, + "model_forward_time": 0.02580571174621582, + "step": 990 + }, + { + "epoch": 1.5106201171875e-06, + "step": 990, + "training_step_time": 0.20200586318969727 + }, + { + "epoch": 1.51214599609375e-06, + "model_forward_time": 0.024875402450561523, + "step": 991 + }, + { + "epoch": 1.51214599609375e-06, + "step": 991, + "training_step_time": 0.10776424407958984 + }, + { + "epoch": 1.513671875e-06, + "model_forward_time": 0.02468729019165039, + "step": 992 + }, + { + "epoch": 1.513671875e-06, + "step": 992, + "training_step_time": 0.10306096076965332 + }, + { + "epoch": 1.51519775390625e-06, + "model_forward_time": 0.025013446807861328, + "step": 993 + }, + { + "epoch": 1.51519775390625e-06, + "step": 993, + "training_step_time": 0.10837674140930176 + }, + { + "epoch": 1.5167236328125e-06, + "model_forward_time": 0.02664804458618164, + "step": 994 + }, + { + "epoch": 1.5167236328125e-06, + "step": 994, + "training_step_time": 0.10871458053588867 + }, + { + "epoch": 1.51824951171875e-06, + "model_forward_time": 0.025187015533447266, + "step": 995 + }, + { + "epoch": 1.51824951171875e-06, + "step": 995, + "training_step_time": 0.10617589950561523 + }, + { + "epoch": 1.519775390625e-06, + "model_forward_time": 0.02538156509399414, + "step": 996 + }, + { + "epoch": 1.519775390625e-06, + "step": 996, + "training_step_time": 0.10701131820678711 + }, + { + "epoch": 1.52130126953125e-06, + "model_forward_time": 0.025536775588989258, + "step": 997 + }, + { + "epoch": 1.52130126953125e-06, + "step": 997, + "training_step_time": 0.1121518611907959 + }, + { + "epoch": 1.5228271484375e-06, + "model_forward_time": 0.02524590492248535, + "step": 998 + }, + { + "epoch": 1.5228271484375e-06, + "step": 998, + "training_step_time": 0.1093595027923584 + }, + { + "epoch": 1.52435302734375e-06, + "model_forward_time": 0.02507781982421875, + "step": 999 + }, + { + "epoch": 1.52435302734375e-06, + "step": 999, + "training_step_time": 0.15689635276794434 + }, + { + "epoch": 1.52587890625e-06, + "grad_norm": 1.3624902963638306, + "learning_rate": 6.666666666666667e-05, + "loss": 0.146, + "step": 1000 + }, + { + "epoch": 1.52587890625e-06, + "model_forward_time": 0.025603532791137695, + "step": 1000 + }, + { + "epoch": 1.52587890625e-06, + "step": 1000, + "training_step_time": 0.1047816276550293 + }, + { + "epoch": 1.52740478515625e-06, + "model_forward_time": 0.025385141372680664, + "step": 1001 + }, + { + "epoch": 1.52740478515625e-06, + "step": 1001, + "training_step_time": 0.1654202938079834 + }, + { + "epoch": 1.5289306640625e-06, + "model_forward_time": 0.024989843368530273, + "step": 1002 + }, + { + "epoch": 1.5289306640625e-06, + "step": 1002, + "training_step_time": 0.12518787384033203 + }, + { + "epoch": 1.53045654296875e-06, + "model_forward_time": 0.024707317352294922, + "step": 1003 + }, + { + "epoch": 1.53045654296875e-06, + "step": 1003, + "training_step_time": 0.10969424247741699 + }, + { + "epoch": 1.531982421875e-06, + "model_forward_time": 0.025409460067749023, + "step": 1004 + }, + { + "epoch": 1.531982421875e-06, + "step": 1004, + "training_step_time": 0.11253118515014648 + }, + { + "epoch": 1.53350830078125e-06, + "model_forward_time": 0.025266647338867188, + "step": 1005 + }, + { + "epoch": 1.53350830078125e-06, + "step": 1005, + "training_step_time": 0.2053070068359375 + }, + { + "epoch": 1.5350341796875e-06, + "model_forward_time": 0.024524450302124023, + "step": 1006 + }, + { + "epoch": 1.5350341796875e-06, + "step": 1006, + "training_step_time": 0.12968015670776367 + }, + { + "epoch": 1.53656005859375e-06, + "model_forward_time": 0.024885177612304688, + "step": 1007 + }, + { + "epoch": 1.53656005859375e-06, + "step": 1007, + "training_step_time": 0.11052060127258301 + }, + { + "epoch": 1.5380859375e-06, + "model_forward_time": 0.025480270385742188, + "step": 1008 + }, + { + "epoch": 1.5380859375e-06, + "step": 1008, + "training_step_time": 0.11458420753479004 + }, + { + "epoch": 1.53961181640625e-06, + "model_forward_time": 0.02569413185119629, + "step": 1009 + }, + { + "epoch": 1.53961181640625e-06, + "step": 1009, + "training_step_time": 0.11475086212158203 + }, + { + "epoch": 1.5411376953125e-06, + "grad_norm": 1.154253602027893, + "learning_rate": 6.733333333333333e-05, + "loss": 0.1322, + "step": 1010 + }, + { + "epoch": 1.5411376953125e-06, + "model_forward_time": 0.02691030502319336, + "step": 1010 + }, + { + "epoch": 1.5411376953125e-06, + "step": 1010, + "training_step_time": 0.18335223197937012 + }, + { + "epoch": 1.54266357421875e-06, + "model_forward_time": 0.02480316162109375, + "step": 1011 + }, + { + "epoch": 1.54266357421875e-06, + "step": 1011, + "training_step_time": 0.12211203575134277 + }, + { + "epoch": 1.544189453125e-06, + "model_forward_time": 0.024060964584350586, + "step": 1012 + }, + { + "epoch": 1.544189453125e-06, + "step": 1012, + "training_step_time": 0.1065986156463623 + }, + { + "epoch": 1.54571533203125e-06, + "model_forward_time": 0.0254666805267334, + "step": 1013 + }, + { + "epoch": 1.54571533203125e-06, + "step": 1013, + "training_step_time": 0.11172652244567871 + }, + { + "epoch": 1.5472412109375e-06, + "model_forward_time": 0.02579474449157715, + "step": 1014 + }, + { + "epoch": 1.5472412109375e-06, + "step": 1014, + "training_step_time": 0.11539030075073242 + }, + { + "epoch": 1.54876708984375e-06, + "model_forward_time": 0.025887250900268555, + "step": 1015 + }, + { + "epoch": 1.54876708984375e-06, + "step": 1015, + "training_step_time": 0.10739731788635254 + }, + { + "epoch": 1.55029296875e-06, + "model_forward_time": 0.025766849517822266, + "step": 1016 + }, + { + "epoch": 1.55029296875e-06, + "step": 1016, + "training_step_time": 0.11510419845581055 + }, + { + "epoch": 1.55181884765625e-06, + "model_forward_time": 0.025266408920288086, + "step": 1017 + }, + { + "epoch": 1.55181884765625e-06, + "step": 1017, + "training_step_time": 0.10869026184082031 + }, + { + "epoch": 1.5533447265625e-06, + "model_forward_time": 0.02482008934020996, + "step": 1018 + }, + { + "epoch": 1.5533447265625e-06, + "step": 1018, + "training_step_time": 0.11139249801635742 + }, + { + "epoch": 1.55487060546875e-06, + "model_forward_time": 0.027956724166870117, + "step": 1019 + }, + { + "epoch": 1.55487060546875e-06, + "step": 1019, + "training_step_time": 0.11034464836120605 + }, + { + "epoch": 1.556396484375e-06, + "grad_norm": 1.202590823173523, + "learning_rate": 6.800000000000001e-05, + "loss": 0.1665, + "step": 1020 + }, + { + "epoch": 1.556396484375e-06, + "model_forward_time": 0.025843381881713867, + "step": 1020 + }, + { + "epoch": 1.556396484375e-06, + "step": 1020, + "training_step_time": 0.11314225196838379 + }, + { + "epoch": 1.55792236328125e-06, + "model_forward_time": 0.02661275863647461, + "step": 1021 + }, + { + "epoch": 1.55792236328125e-06, + "step": 1021, + "training_step_time": 0.11158967018127441 + }, + { + "epoch": 1.5594482421875e-06, + "model_forward_time": 0.02548956871032715, + "step": 1022 + }, + { + "epoch": 1.5594482421875e-06, + "step": 1022, + "training_step_time": 0.21105599403381348 + }, + { + "epoch": 1.56097412109375e-06, + "model_forward_time": 0.02461719512939453, + "step": 1023 + }, + { + "epoch": 1.56097412109375e-06, + "step": 1023, + "training_step_time": 0.11298942565917969 + }, + { + "epoch": 1.5625e-06, + "model_forward_time": 0.024887800216674805, + "step": 1024 + }, + { + "epoch": 1.5625e-06, + "step": 1024, + "training_step_time": 0.10465312004089355 + }, + { + "epoch": 1.56402587890625e-06, + "model_forward_time": 0.02590036392211914, + "step": 1025 + }, + { + "epoch": 1.56402587890625e-06, + "step": 1025, + "training_step_time": 0.10803675651550293 + }, + { + "epoch": 1.5655517578125e-06, + "model_forward_time": 0.02511882781982422, + "step": 1026 + }, + { + "epoch": 1.5655517578125e-06, + "step": 1026, + "training_step_time": 0.10473370552062988 + }, + { + "epoch": 1.56707763671875e-06, + "model_forward_time": 0.025392532348632812, + "step": 1027 + }, + { + "epoch": 1.56707763671875e-06, + "step": 1027, + "training_step_time": 0.10641360282897949 + }, + { + "epoch": 1.568603515625e-06, + "model_forward_time": 0.025468111038208008, + "step": 1028 + }, + { + "epoch": 1.568603515625e-06, + "step": 1028, + "training_step_time": 0.1081857681274414 + }, + { + "epoch": 1.57012939453125e-06, + "model_forward_time": 0.025626659393310547, + "step": 1029 + }, + { + "epoch": 1.57012939453125e-06, + "step": 1029, + "training_step_time": 0.1059579849243164 + }, + { + "epoch": 1.5716552734375e-06, + "grad_norm": 1.2344614267349243, + "learning_rate": 6.866666666666666e-05, + "loss": 0.1456, + "step": 1030 + }, + { + "epoch": 1.5716552734375e-06, + "model_forward_time": 0.025172710418701172, + "step": 1030 + }, + { + "epoch": 1.5716552734375e-06, + "step": 1030, + "training_step_time": 0.10354948043823242 + }, + { + "epoch": 1.57318115234375e-06, + "model_forward_time": 0.025776386260986328, + "step": 1031 + }, + { + "epoch": 1.57318115234375e-06, + "step": 1031, + "training_step_time": 0.11005020141601562 + }, + { + "epoch": 1.57470703125e-06, + "model_forward_time": 0.025769472122192383, + "step": 1032 + }, + { + "epoch": 1.57470703125e-06, + "step": 1032, + "training_step_time": 0.10905647277832031 + }, + { + "epoch": 1.57623291015625e-06, + "model_forward_time": 0.025789737701416016, + "step": 1033 + }, + { + "epoch": 1.57623291015625e-06, + "step": 1033, + "training_step_time": 0.11543011665344238 + }, + { + "epoch": 1.5777587890625e-06, + "model_forward_time": 0.024952173233032227, + "step": 1034 + }, + { + "epoch": 1.5777587890625e-06, + "step": 1034, + "training_step_time": 0.12747621536254883 + }, + { + "epoch": 1.57928466796875e-06, + "model_forward_time": 0.0252377986907959, + "step": 1035 + }, + { + "epoch": 1.57928466796875e-06, + "step": 1035, + "training_step_time": 0.11939525604248047 + }, + { + "epoch": 1.580810546875e-06, + "model_forward_time": 0.02542281150817871, + "step": 1036 + }, + { + "epoch": 1.580810546875e-06, + "step": 1036, + "training_step_time": 0.12109208106994629 + }, + { + "epoch": 1.58233642578125e-06, + "model_forward_time": 0.025468111038208008, + "step": 1037 + }, + { + "epoch": 1.58233642578125e-06, + "step": 1037, + "training_step_time": 0.11703062057495117 + }, + { + "epoch": 1.5838623046875e-06, + "model_forward_time": 0.02591395378112793, + "step": 1038 + }, + { + "epoch": 1.5838623046875e-06, + "step": 1038, + "training_step_time": 0.11315035820007324 + }, + { + "epoch": 1.58538818359375e-06, + "model_forward_time": 0.024561643600463867, + "step": 1039 + }, + { + "epoch": 1.58538818359375e-06, + "step": 1039, + "training_step_time": 0.10950875282287598 + }, + { + "epoch": 1.5869140625e-06, + "grad_norm": 1.288399577140808, + "learning_rate": 6.933333333333334e-05, + "loss": 0.1432, + "step": 1040 + }, + { + "epoch": 1.5869140625e-06, + "model_forward_time": 0.02623152732849121, + "step": 1040 + }, + { + "epoch": 1.5869140625e-06, + "step": 1040, + "training_step_time": 0.1795346736907959 + }, + { + "epoch": 1.58843994140625e-06, + "model_forward_time": 0.024980783462524414, + "step": 1041 + }, + { + "epoch": 1.58843994140625e-06, + "step": 1041, + "training_step_time": 0.10783100128173828 + }, + { + "epoch": 1.5899658203125e-06, + "model_forward_time": 0.024764537811279297, + "step": 1042 + }, + { + "epoch": 1.5899658203125e-06, + "step": 1042, + "training_step_time": 0.1115577220916748 + }, + { + "epoch": 1.59149169921875e-06, + "model_forward_time": 0.026003599166870117, + "step": 1043 + }, + { + "epoch": 1.59149169921875e-06, + "step": 1043, + "training_step_time": 0.12750792503356934 + }, + { + "epoch": 1.593017578125e-06, + "model_forward_time": 0.025630712509155273, + "step": 1044 + }, + { + "epoch": 1.593017578125e-06, + "step": 1044, + "training_step_time": 0.1301584243774414 + }, + { + "epoch": 1.59454345703125e-06, + "model_forward_time": 0.025237321853637695, + "step": 1045 + }, + { + "epoch": 1.59454345703125e-06, + "step": 1045, + "training_step_time": 0.2173008918762207 + }, + { + "epoch": 1.5960693359375e-06, + "model_forward_time": 0.0244443416595459, + "step": 1046 + }, + { + "epoch": 1.5960693359375e-06, + "step": 1046, + "training_step_time": 0.10609722137451172 + }, + { + "epoch": 1.59759521484375e-06, + "model_forward_time": 0.024541854858398438, + "step": 1047 + }, + { + "epoch": 1.59759521484375e-06, + "step": 1047, + "training_step_time": 0.18325495719909668 + }, + { + "epoch": 1.59912109375e-06, + "model_forward_time": 0.02502131462097168, + "step": 1048 + }, + { + "epoch": 1.59912109375e-06, + "step": 1048, + "training_step_time": 0.12955331802368164 + }, + { + "epoch": 1.60064697265625e-06, + "model_forward_time": 0.024990558624267578, + "step": 1049 + }, + { + "epoch": 1.60064697265625e-06, + "step": 1049, + "training_step_time": 0.12085366249084473 + }, + { + "epoch": 1.6021728515625e-06, + "grad_norm": 1.2722276449203491, + "learning_rate": 7e-05, + "loss": 0.14, + "step": 1050 + }, + { + "epoch": 1.6021728515625e-06, + "model_forward_time": 0.02550959587097168, + "step": 1050 + }, + { + "epoch": 1.6021728515625e-06, + "step": 1050, + "training_step_time": 0.10591650009155273 + }, + { + "epoch": 1.60369873046875e-06, + "model_forward_time": 0.025501251220703125, + "step": 1051 + }, + { + "epoch": 1.60369873046875e-06, + "step": 1051, + "training_step_time": 0.1157996654510498 + }, + { + "epoch": 1.605224609375e-06, + "model_forward_time": 0.025095701217651367, + "step": 1052 + }, + { + "epoch": 1.605224609375e-06, + "step": 1052, + "training_step_time": 0.11974263191223145 + }, + { + "epoch": 1.60675048828125e-06, + "model_forward_time": 0.0254056453704834, + "step": 1053 + }, + { + "epoch": 1.60675048828125e-06, + "step": 1053, + "training_step_time": 0.11528444290161133 + }, + { + "epoch": 1.6082763671875e-06, + "model_forward_time": 0.025166034698486328, + "step": 1054 + }, + { + "epoch": 1.6082763671875e-06, + "step": 1054, + "training_step_time": 0.1144866943359375 + }, + { + "epoch": 1.60980224609375e-06, + "model_forward_time": 0.026715517044067383, + "step": 1055 + }, + { + "epoch": 1.60980224609375e-06, + "step": 1055, + "training_step_time": 0.11703324317932129 + }, + { + "epoch": 1.611328125e-06, + "model_forward_time": 0.025484561920166016, + "step": 1056 + }, + { + "epoch": 1.611328125e-06, + "step": 1056, + "training_step_time": 0.1573629379272461 + }, + { + "epoch": 1.61285400390625e-06, + "model_forward_time": 0.02785181999206543, + "step": 1057 + }, + { + "epoch": 1.61285400390625e-06, + "step": 1057, + "training_step_time": 0.15215301513671875 + }, + { + "epoch": 1.6143798828125e-06, + "model_forward_time": 0.02455615997314453, + "step": 1058 + }, + { + "epoch": 1.6143798828125e-06, + "step": 1058, + "training_step_time": 0.10976409912109375 + }, + { + "epoch": 1.61590576171875e-06, + "model_forward_time": 0.024673938751220703, + "step": 1059 + }, + { + "epoch": 1.61590576171875e-06, + "step": 1059, + "training_step_time": 0.10842204093933105 + }, + { + "epoch": 1.617431640625e-06, + "grad_norm": 0.8888778686523438, + "learning_rate": 7.066666666666667e-05, + "loss": 0.1744, + "step": 1060 + }, + { + "epoch": 1.617431640625e-06, + "model_forward_time": 0.025425195693969727, + "step": 1060 + }, + { + "epoch": 1.617431640625e-06, + "step": 1060, + "training_step_time": 0.11020565032958984 + }, + { + "epoch": 1.61895751953125e-06, + "model_forward_time": 0.02549910545349121, + "step": 1061 + }, + { + "epoch": 1.61895751953125e-06, + "step": 1061, + "training_step_time": 0.10889887809753418 + }, + { + "epoch": 1.6204833984375e-06, + "model_forward_time": 0.02554917335510254, + "step": 1062 + }, + { + "epoch": 1.6204833984375e-06, + "step": 1062, + "training_step_time": 0.10643172264099121 + }, + { + "epoch": 1.62200927734375e-06, + "model_forward_time": 0.027194738388061523, + "step": 1063 + }, + { + "epoch": 1.62200927734375e-06, + "step": 1063, + "training_step_time": 0.10718798637390137 + }, + { + "epoch": 1.62353515625e-06, + "model_forward_time": 0.024444580078125, + "step": 1064 + }, + { + "epoch": 1.62353515625e-06, + "step": 1064, + "training_step_time": 0.10753321647644043 + }, + { + "epoch": 1.62506103515625e-06, + "model_forward_time": 0.02469921112060547, + "step": 1065 + }, + { + "epoch": 1.62506103515625e-06, + "step": 1065, + "training_step_time": 0.11422371864318848 + }, + { + "epoch": 1.6265869140625e-06, + "model_forward_time": 0.02574777603149414, + "step": 1066 + }, + { + "epoch": 1.6265869140625e-06, + "step": 1066, + "training_step_time": 0.11976003646850586 + }, + { + "epoch": 1.62811279296875e-06, + "model_forward_time": 0.025679349899291992, + "step": 1067 + }, + { + "epoch": 1.62811279296875e-06, + "step": 1067, + "training_step_time": 0.10972070693969727 + }, + { + "epoch": 1.629638671875e-06, + "model_forward_time": 0.02556157112121582, + "step": 1068 + }, + { + "epoch": 1.629638671875e-06, + "step": 1068, + "training_step_time": 0.210374116897583 + }, + { + "epoch": 1.63116455078125e-06, + "model_forward_time": 0.024676799774169922, + "step": 1069 + }, + { + "epoch": 1.63116455078125e-06, + "step": 1069, + "training_step_time": 0.1168832778930664 + }, + { + "epoch": 1.6326904296875e-06, + "grad_norm": 1.361880898475647, + "learning_rate": 7.133333333333334e-05, + "loss": 0.1801, + "step": 1070 + }, + { + "epoch": 1.6326904296875e-06, + "model_forward_time": 0.02513289451599121, + "step": 1070 + }, + { + "epoch": 1.6326904296875e-06, + "step": 1070, + "training_step_time": 0.10729861259460449 + }, + { + "epoch": 1.63421630859375e-06, + "model_forward_time": 0.02518439292907715, + "step": 1071 + }, + { + "epoch": 1.63421630859375e-06, + "step": 1071, + "training_step_time": 0.10757255554199219 + }, + { + "epoch": 1.6357421875e-06, + "model_forward_time": 0.025432109832763672, + "step": 1072 + }, + { + "epoch": 1.6357421875e-06, + "step": 1072, + "training_step_time": 0.10678672790527344 + }, + { + "epoch": 1.63726806640625e-06, + "model_forward_time": 0.02492070198059082, + "step": 1073 + }, + { + "epoch": 1.63726806640625e-06, + "step": 1073, + "training_step_time": 0.10640501976013184 + }, + { + "epoch": 1.6387939453125e-06, + "model_forward_time": 0.02562880516052246, + "step": 1074 + }, + { + "epoch": 1.6387939453125e-06, + "step": 1074, + "training_step_time": 0.1062004566192627 + }, + { + "epoch": 1.64031982421875e-06, + "model_forward_time": 0.025224685668945312, + "step": 1075 + }, + { + "epoch": 1.64031982421875e-06, + "step": 1075, + "training_step_time": 0.10606169700622559 + }, + { + "epoch": 1.641845703125e-06, + "model_forward_time": 0.026064157485961914, + "step": 1076 + }, + { + "epoch": 1.641845703125e-06, + "step": 1076, + "training_step_time": 0.10569143295288086 + }, + { + "epoch": 1.64337158203125e-06, + "model_forward_time": 0.025818347930908203, + "step": 1077 + }, + { + "epoch": 1.64337158203125e-06, + "step": 1077, + "training_step_time": 0.10859990119934082 + }, + { + "epoch": 1.6448974609375e-06, + "model_forward_time": 0.02905130386352539, + "step": 1078 + }, + { + "epoch": 1.6448974609375e-06, + "step": 1078, + "training_step_time": 0.11166000366210938 + }, + { + "epoch": 1.64642333984375e-06, + "model_forward_time": 0.025181293487548828, + "step": 1079 + }, + { + "epoch": 1.64642333984375e-06, + "step": 1079, + "training_step_time": 0.1067955493927002 + }, + { + "epoch": 1.64794921875e-06, + "grad_norm": 1.405672311782837, + "learning_rate": 7.2e-05, + "loss": 0.209, + "step": 1080 + }, + { + "epoch": 1.64794921875e-06, + "model_forward_time": 0.025452852249145508, + "step": 1080 + }, + { + "epoch": 1.64794921875e-06, + "step": 1080, + "training_step_time": 0.10531067848205566 + }, + { + "epoch": 1.64947509765625e-06, + "model_forward_time": 0.025574922561645508, + "step": 1081 + }, + { + "epoch": 1.64947509765625e-06, + "step": 1081, + "training_step_time": 0.12761259078979492 + }, + { + "epoch": 1.6510009765625e-06, + "model_forward_time": 0.02525782585144043, + "step": 1082 + }, + { + "epoch": 1.6510009765625e-06, + "step": 1082, + "training_step_time": 0.16738271713256836 + }, + { + "epoch": 1.65252685546875e-06, + "model_forward_time": 0.026235580444335938, + "step": 1083 + }, + { + "epoch": 1.65252685546875e-06, + "step": 1083, + "training_step_time": 0.15385866165161133 + }, + { + "epoch": 1.654052734375e-06, + "model_forward_time": 0.023982524871826172, + "step": 1084 + }, + { + "epoch": 1.654052734375e-06, + "step": 1084, + "training_step_time": 0.14533233642578125 + }, + { + "epoch": 1.65557861328125e-06, + "model_forward_time": 0.026013612747192383, + "step": 1085 + }, + { + "epoch": 1.65557861328125e-06, + "step": 1085, + "training_step_time": 0.12836217880249023 + }, + { + "epoch": 1.6571044921875e-06, + "model_forward_time": 0.024506807327270508, + "step": 1086 + }, + { + "epoch": 1.6571044921875e-06, + "step": 1086, + "training_step_time": 0.13121747970581055 + }, + { + "epoch": 1.65863037109375e-06, + "model_forward_time": 0.02429056167602539, + "step": 1087 + }, + { + "epoch": 1.65863037109375e-06, + "step": 1087, + "training_step_time": 0.1856839656829834 + }, + { + "epoch": 1.66015625e-06, + "model_forward_time": 0.024260520935058594, + "step": 1088 + }, + { + "epoch": 1.66015625e-06, + "step": 1088, + "training_step_time": 0.20662331581115723 + }, + { + "epoch": 1.66168212890625e-06, + "model_forward_time": 0.024363279342651367, + "step": 1089 + }, + { + "epoch": 1.66168212890625e-06, + "step": 1089, + "training_step_time": 0.16556668281555176 + }, + { + "epoch": 1.6632080078125e-06, + "grad_norm": 0.9802555441856384, + "learning_rate": 7.266666666666667e-05, + "loss": 0.192, + "step": 1090 + }, + { + "epoch": 1.6632080078125e-06, + "model_forward_time": 0.024334430694580078, + "step": 1090 + }, + { + "epoch": 1.6632080078125e-06, + "step": 1090, + "training_step_time": 0.17417645454406738 + }, + { + "epoch": 1.66473388671875e-06, + "model_forward_time": 0.024724483489990234, + "step": 1091 + }, + { + "epoch": 1.66473388671875e-06, + "step": 1091, + "training_step_time": 0.16185903549194336 + }, + { + "epoch": 1.666259765625e-06, + "model_forward_time": 0.02400684356689453, + "step": 1092 + }, + { + "epoch": 1.666259765625e-06, + "step": 1092, + "training_step_time": 0.12656760215759277 + }, + { + "epoch": 1.66778564453125e-06, + "model_forward_time": 0.024321317672729492, + "step": 1093 + }, + { + "epoch": 1.66778564453125e-06, + "step": 1093, + "training_step_time": 0.11409902572631836 + }, + { + "epoch": 1.6693115234375e-06, + "model_forward_time": 0.024447917938232422, + "step": 1094 + }, + { + "epoch": 1.6693115234375e-06, + "step": 1094, + "training_step_time": 0.10630130767822266 + }, + { + "epoch": 1.67083740234375e-06, + "model_forward_time": 0.025160789489746094, + "step": 1095 + }, + { + "epoch": 1.67083740234375e-06, + "step": 1095, + "training_step_time": 0.16701292991638184 + }, + { + "epoch": 1.67236328125e-06, + "model_forward_time": 0.025110483169555664, + "step": 1096 + }, + { + "epoch": 1.67236328125e-06, + "step": 1096, + "training_step_time": 0.1374666690826416 + }, + { + "epoch": 1.67388916015625e-06, + "model_forward_time": 0.024764299392700195, + "step": 1097 + }, + { + "epoch": 1.67388916015625e-06, + "step": 1097, + "training_step_time": 0.11067771911621094 + }, + { + "epoch": 1.6754150390625e-06, + "model_forward_time": 0.025705575942993164, + "step": 1098 + }, + { + "epoch": 1.6754150390625e-06, + "step": 1098, + "training_step_time": 0.1098320484161377 + }, + { + "epoch": 1.67694091796875e-06, + "model_forward_time": 0.025183439254760742, + "step": 1099 + }, + { + "epoch": 1.67694091796875e-06, + "step": 1099, + "training_step_time": 0.11371445655822754 + }, + { + "epoch": 1.678466796875e-06, + "grad_norm": 1.2202184200286865, + "learning_rate": 7.333333333333333e-05, + "loss": 0.1265, + "step": 1100 + }, + { + "epoch": 1.678466796875e-06, + "model_forward_time": 0.025087356567382812, + "step": 1100 + }, + { + "epoch": 1.678466796875e-06, + "step": 1100, + "training_step_time": 0.1554563045501709 + }, + { + "epoch": 1.67999267578125e-06, + "model_forward_time": 0.02444601058959961, + "step": 1101 + }, + { + "epoch": 1.67999267578125e-06, + "step": 1101, + "training_step_time": 0.14522957801818848 + }, + { + "epoch": 1.6815185546875e-06, + "model_forward_time": 0.02455759048461914, + "step": 1102 + }, + { + "epoch": 1.6815185546875e-06, + "step": 1102, + "training_step_time": 0.10157060623168945 + }, + { + "epoch": 1.68304443359375e-06, + "model_forward_time": 0.025468826293945312, + "step": 1103 + }, + { + "epoch": 1.68304443359375e-06, + "step": 1103, + "training_step_time": 0.11110997200012207 + }, + { + "epoch": 1.6845703125e-06, + "model_forward_time": 0.02529621124267578, + "step": 1104 + }, + { + "epoch": 1.6845703125e-06, + "step": 1104, + "training_step_time": 0.10544061660766602 + }, + { + "epoch": 1.68609619140625e-06, + "model_forward_time": 0.02568531036376953, + "step": 1105 + }, + { + "epoch": 1.68609619140625e-06, + "step": 1105, + "training_step_time": 0.1126554012298584 + }, + { + "epoch": 1.6876220703125e-06, + "model_forward_time": 0.02559518814086914, + "step": 1106 + }, + { + "epoch": 1.6876220703125e-06, + "step": 1106, + "training_step_time": 0.11029887199401855 + }, + { + "epoch": 1.68914794921875e-06, + "model_forward_time": 0.0261080265045166, + "step": 1107 + }, + { + "epoch": 1.68914794921875e-06, + "step": 1107, + "training_step_time": 0.10719037055969238 + }, + { + "epoch": 1.690673828125e-06, + "model_forward_time": 0.025641202926635742, + "step": 1108 + }, + { + "epoch": 1.690673828125e-06, + "step": 1108, + "training_step_time": 0.16606760025024414 + }, + { + "epoch": 1.69219970703125e-06, + "model_forward_time": 0.024934053421020508, + "step": 1109 + }, + { + "epoch": 1.69219970703125e-06, + "step": 1109, + "training_step_time": 0.1658623218536377 + }, + { + "epoch": 1.6937255859375e-06, + "grad_norm": 1.5171630382537842, + "learning_rate": 7.4e-05, + "loss": 0.2076, + "step": 1110 + }, + { + "epoch": 1.6937255859375e-06, + "model_forward_time": 0.025086641311645508, + "step": 1110 + }, + { + "epoch": 1.6937255859375e-06, + "step": 1110, + "training_step_time": 0.10617804527282715 + }, + { + "epoch": 1.69525146484375e-06, + "model_forward_time": 0.025186777114868164, + "step": 1111 + }, + { + "epoch": 1.69525146484375e-06, + "step": 1111, + "training_step_time": 0.1072835922241211 + }, + { + "epoch": 1.69677734375e-06, + "model_forward_time": 0.02573680877685547, + "step": 1112 + }, + { + "epoch": 1.69677734375e-06, + "step": 1112, + "training_step_time": 0.11734127998352051 + }, + { + "epoch": 1.69830322265625e-06, + "model_forward_time": 0.025831222534179688, + "step": 1113 + }, + { + "epoch": 1.69830322265625e-06, + "step": 1113, + "training_step_time": 0.11211228370666504 + }, + { + "epoch": 1.6998291015625e-06, + "model_forward_time": 0.026093721389770508, + "step": 1114 + }, + { + "epoch": 1.6998291015625e-06, + "step": 1114, + "training_step_time": 0.1062004566192627 + }, + { + "epoch": 1.70135498046875e-06, + "model_forward_time": 0.025349140167236328, + "step": 1115 + }, + { + "epoch": 1.70135498046875e-06, + "step": 1115, + "training_step_time": 0.1083824634552002 + }, + { + "epoch": 1.702880859375e-06, + "model_forward_time": 0.025402307510375977, + "step": 1116 + }, + { + "epoch": 1.702880859375e-06, + "step": 1116, + "training_step_time": 0.10837841033935547 + }, + { + "epoch": 1.70440673828125e-06, + "model_forward_time": 0.025722980499267578, + "step": 1117 + }, + { + "epoch": 1.70440673828125e-06, + "step": 1117, + "training_step_time": 0.10738134384155273 + }, + { + "epoch": 1.7059326171875e-06, + "model_forward_time": 0.025147676467895508, + "step": 1118 + }, + { + "epoch": 1.7059326171875e-06, + "step": 1118, + "training_step_time": 0.10828781127929688 + }, + { + "epoch": 1.70745849609375e-06, + "model_forward_time": 0.025195837020874023, + "step": 1119 + }, + { + "epoch": 1.70745849609375e-06, + "step": 1119, + "training_step_time": 0.10542535781860352 + }, + { + "epoch": 1.708984375e-06, + "grad_norm": 1.1591230630874634, + "learning_rate": 7.466666666666667e-05, + "loss": 0.1457, + "step": 1120 + }, + { + "epoch": 1.708984375e-06, + "model_forward_time": 0.025481700897216797, + "step": 1120 + }, + { + "epoch": 1.708984375e-06, + "step": 1120, + "training_step_time": 0.10607600212097168 + }, + { + "epoch": 1.71051025390625e-06, + "model_forward_time": 0.026244640350341797, + "step": 1121 + }, + { + "epoch": 1.71051025390625e-06, + "step": 1121, + "training_step_time": 0.10785222053527832 + }, + { + "epoch": 1.7120361328125e-06, + "model_forward_time": 0.02579784393310547, + "step": 1122 + }, + { + "epoch": 1.7120361328125e-06, + "step": 1122, + "training_step_time": 0.10676932334899902 + }, + { + "epoch": 1.71356201171875e-06, + "model_forward_time": 0.02545166015625, + "step": 1123 + }, + { + "epoch": 1.71356201171875e-06, + "step": 1123, + "training_step_time": 0.10704469680786133 + }, + { + "epoch": 1.715087890625e-06, + "model_forward_time": 0.0267026424407959, + "step": 1124 + }, + { + "epoch": 1.715087890625e-06, + "step": 1124, + "training_step_time": 0.10695505142211914 + }, + { + "epoch": 1.71661376953125e-06, + "model_forward_time": 0.025321483612060547, + "step": 1125 + }, + { + "epoch": 1.71661376953125e-06, + "step": 1125, + "training_step_time": 0.10326433181762695 + }, + { + "epoch": 1.7181396484375e-06, + "model_forward_time": 0.02545642852783203, + "step": 1126 + }, + { + "epoch": 1.7181396484375e-06, + "step": 1126, + "training_step_time": 0.10436439514160156 + }, + { + "epoch": 1.71966552734375e-06, + "model_forward_time": 0.027254104614257812, + "step": 1127 + }, + { + "epoch": 1.71966552734375e-06, + "step": 1127, + "training_step_time": 0.11226606369018555 + }, + { + "epoch": 1.72119140625e-06, + "model_forward_time": 0.02574944496154785, + "step": 1128 + }, + { + "epoch": 1.72119140625e-06, + "step": 1128, + "training_step_time": 0.1058351993560791 + }, + { + "epoch": 1.72271728515625e-06, + "model_forward_time": 0.025252103805541992, + "step": 1129 + }, + { + "epoch": 1.72271728515625e-06, + "step": 1129, + "training_step_time": 0.10646653175354004 + }, + { + "epoch": 1.7242431640625e-06, + "grad_norm": 1.320570707321167, + "learning_rate": 7.533333333333334e-05, + "loss": 0.1719, + "step": 1130 + }, + { + "epoch": 1.7242431640625e-06, + "model_forward_time": 0.025857210159301758, + "step": 1130 + }, + { + "epoch": 1.7242431640625e-06, + "step": 1130, + "training_step_time": 0.10575461387634277 + }, + { + "epoch": 1.72576904296875e-06, + "model_forward_time": 0.025391578674316406, + "step": 1131 + }, + { + "epoch": 1.72576904296875e-06, + "step": 1131, + "training_step_time": 0.19349002838134766 + }, + { + "epoch": 1.727294921875e-06, + "model_forward_time": 0.02381610870361328, + "step": 1132 + }, + { + "epoch": 1.727294921875e-06, + "step": 1132, + "training_step_time": 0.10652017593383789 + }, + { + "epoch": 1.72882080078125e-06, + "model_forward_time": 0.024476289749145508, + "step": 1133 + }, + { + "epoch": 1.72882080078125e-06, + "step": 1133, + "training_step_time": 0.1157069206237793 + }, + { + "epoch": 1.7303466796875e-06, + "model_forward_time": 0.025134563446044922, + "step": 1134 + }, + { + "epoch": 1.7303466796875e-06, + "step": 1134, + "training_step_time": 0.17812681198120117 + }, + { + "epoch": 1.73187255859375e-06, + "model_forward_time": 0.02457880973815918, + "step": 1135 + }, + { + "epoch": 1.73187255859375e-06, + "step": 1135, + "training_step_time": 0.19104671478271484 + }, + { + "epoch": 1.7333984375e-06, + "model_forward_time": 0.024499893188476562, + "step": 1136 + }, + { + "epoch": 1.7333984375e-06, + "step": 1136, + "training_step_time": 0.15276217460632324 + }, + { + "epoch": 1.73492431640625e-06, + "model_forward_time": 0.024434566497802734, + "step": 1137 + }, + { + "epoch": 1.73492431640625e-06, + "step": 1137, + "training_step_time": 0.16435503959655762 + }, + { + "epoch": 1.7364501953125e-06, + "model_forward_time": 0.02401137351989746, + "step": 1138 + }, + { + "epoch": 1.7364501953125e-06, + "step": 1138, + "training_step_time": 0.13677048683166504 + }, + { + "epoch": 1.73797607421875e-06, + "model_forward_time": 0.024253368377685547, + "step": 1139 + }, + { + "epoch": 1.73797607421875e-06, + "step": 1139, + "training_step_time": 0.11883234977722168 + }, + { + "epoch": 1.739501953125e-06, + "grad_norm": 1.3508999347686768, + "learning_rate": 7.6e-05, + "loss": 0.199, + "step": 1140 + }, + { + "epoch": 1.739501953125e-06, + "model_forward_time": 0.025207996368408203, + "step": 1140 + }, + { + "epoch": 1.739501953125e-06, + "step": 1140, + "training_step_time": 0.10568594932556152 + }, + { + "epoch": 1.74102783203125e-06, + "model_forward_time": 0.025411605834960938, + "step": 1141 + }, + { + "epoch": 1.74102783203125e-06, + "step": 1141, + "training_step_time": 0.16100406646728516 + }, + { + "epoch": 1.7425537109375e-06, + "model_forward_time": 0.02485799789428711, + "step": 1142 + }, + { + "epoch": 1.7425537109375e-06, + "step": 1142, + "training_step_time": 0.1254122257232666 + }, + { + "epoch": 1.74407958984375e-06, + "model_forward_time": 0.024767398834228516, + "step": 1143 + }, + { + "epoch": 1.74407958984375e-06, + "step": 1143, + "training_step_time": 0.1106865406036377 + }, + { + "epoch": 1.74560546875e-06, + "model_forward_time": 0.025363683700561523, + "step": 1144 + }, + { + "epoch": 1.74560546875e-06, + "step": 1144, + "training_step_time": 0.11659932136535645 + }, + { + "epoch": 1.74713134765625e-06, + "model_forward_time": 0.025749921798706055, + "step": 1145 + }, + { + "epoch": 1.74713134765625e-06, + "step": 1145, + "training_step_time": 0.11061739921569824 + }, + { + "epoch": 1.7486572265625e-06, + "model_forward_time": 0.02597355842590332, + "step": 1146 + }, + { + "epoch": 1.7486572265625e-06, + "step": 1146, + "training_step_time": 0.11140155792236328 + }, + { + "epoch": 1.75018310546875e-06, + "model_forward_time": 0.02596569061279297, + "step": 1147 + }, + { + "epoch": 1.75018310546875e-06, + "step": 1147, + "training_step_time": 0.20169734954833984 + }, + { + "epoch": 1.751708984375e-06, + "model_forward_time": 0.024764060974121094, + "step": 1148 + }, + { + "epoch": 1.751708984375e-06, + "step": 1148, + "training_step_time": 0.10485363006591797 + }, + { + "epoch": 1.75323486328125e-06, + "model_forward_time": 0.024885892868041992, + "step": 1149 + }, + { + "epoch": 1.75323486328125e-06, + "step": 1149, + "training_step_time": 0.10564446449279785 + }, + { + "epoch": 1.7547607421875e-06, + "grad_norm": 1.5766886472702026, + "learning_rate": 7.666666666666667e-05, + "loss": 0.1969, + "step": 1150 + }, + { + "epoch": 1.7547607421875e-06, + "model_forward_time": 0.025374650955200195, + "step": 1150 + }, + { + "epoch": 1.7547607421875e-06, + "step": 1150, + "training_step_time": 0.10841631889343262 + }, + { + "epoch": 1.75628662109375e-06, + "model_forward_time": 0.02546095848083496, + "step": 1151 + }, + { + "epoch": 1.75628662109375e-06, + "step": 1151, + "training_step_time": 0.10930299758911133 + }, + { + "epoch": 1.7578125e-06, + "model_forward_time": 0.025404930114746094, + "step": 1152 + }, + { + "epoch": 1.7578125e-06, + "step": 1152, + "training_step_time": 0.1210775375366211 + }, + { + "epoch": 1.75933837890625e-06, + "model_forward_time": 0.025130271911621094, + "step": 1153 + }, + { + "epoch": 1.75933837890625e-06, + "step": 1153, + "training_step_time": 0.1463611125946045 + }, + { + "epoch": 1.7608642578125e-06, + "model_forward_time": 0.025393962860107422, + "step": 1154 + }, + { + "epoch": 1.7608642578125e-06, + "step": 1154, + "training_step_time": 0.18546247482299805 + }, + { + "epoch": 1.76239013671875e-06, + "model_forward_time": 0.025324106216430664, + "step": 1155 + }, + { + "epoch": 1.76239013671875e-06, + "step": 1155, + "training_step_time": 0.12699103355407715 + }, + { + "epoch": 1.763916015625e-06, + "model_forward_time": 0.024593114852905273, + "step": 1156 + }, + { + "epoch": 1.763916015625e-06, + "step": 1156, + "training_step_time": 0.12203669548034668 + }, + { + "epoch": 1.76544189453125e-06, + "model_forward_time": 0.025598526000976562, + "step": 1157 + }, + { + "epoch": 1.76544189453125e-06, + "step": 1157, + "training_step_time": 0.19469904899597168 + }, + { + "epoch": 1.7669677734375e-06, + "model_forward_time": 0.02519679069519043, + "step": 1158 + }, + { + "epoch": 1.7669677734375e-06, + "step": 1158, + "training_step_time": 0.11834096908569336 + }, + { + "epoch": 1.76849365234375e-06, + "model_forward_time": 0.023514270782470703, + "step": 1159 + }, + { + "epoch": 1.76849365234375e-06, + "step": 1159, + "training_step_time": 0.10878109931945801 + }, + { + "epoch": 1.77001953125e-06, + "grad_norm": 0.8673734664916992, + "learning_rate": 7.733333333333333e-05, + "loss": 0.1694, + "step": 1160 + }, + { + "epoch": 1.77001953125e-06, + "model_forward_time": 0.025103330612182617, + "step": 1160 + }, + { + "epoch": 1.77001953125e-06, + "step": 1160, + "training_step_time": 0.1074070930480957 + }, + { + "epoch": 1.77154541015625e-06, + "model_forward_time": 0.02619624137878418, + "step": 1161 + }, + { + "epoch": 1.77154541015625e-06, + "step": 1161, + "training_step_time": 0.11041069030761719 + }, + { + "epoch": 1.7730712890625e-06, + "model_forward_time": 0.025389432907104492, + "step": 1162 + }, + { + "epoch": 1.7730712890625e-06, + "step": 1162, + "training_step_time": 0.10895061492919922 + }, + { + "epoch": 1.77459716796875e-06, + "model_forward_time": 0.02507305145263672, + "step": 1163 + }, + { + "epoch": 1.77459716796875e-06, + "step": 1163, + "training_step_time": 0.10828685760498047 + }, + { + "epoch": 1.776123046875e-06, + "model_forward_time": 0.025153160095214844, + "step": 1164 + }, + { + "epoch": 1.776123046875e-06, + "step": 1164, + "training_step_time": 0.10617661476135254 + }, + { + "epoch": 1.77764892578125e-06, + "model_forward_time": 0.024858713150024414, + "step": 1165 + }, + { + "epoch": 1.77764892578125e-06, + "step": 1165, + "training_step_time": 0.10968232154846191 + }, + { + "epoch": 1.7791748046875e-06, + "model_forward_time": 0.02523970603942871, + "step": 1166 + }, + { + "epoch": 1.7791748046875e-06, + "step": 1166, + "training_step_time": 0.1078341007232666 + }, + { + "epoch": 1.78070068359375e-06, + "model_forward_time": 0.025442838668823242, + "step": 1167 + }, + { + "epoch": 1.78070068359375e-06, + "step": 1167, + "training_step_time": 0.10771560668945312 + }, + { + "epoch": 1.7822265625e-06, + "model_forward_time": 0.025145292282104492, + "step": 1168 + }, + { + "epoch": 1.7822265625e-06, + "step": 1168, + "training_step_time": 0.10511112213134766 + }, + { + "epoch": 1.78375244140625e-06, + "model_forward_time": 0.025131702423095703, + "step": 1169 + }, + { + "epoch": 1.78375244140625e-06, + "step": 1169, + "training_step_time": 0.10630130767822266 + }, + { + "epoch": 1.7852783203125e-06, + "grad_norm": 1.313318133354187, + "learning_rate": 7.800000000000001e-05, + "loss": 0.1877, + "step": 1170 + }, + { + "epoch": 1.7852783203125e-06, + "model_forward_time": 0.025074243545532227, + "step": 1170 + }, + { + "epoch": 1.7852783203125e-06, + "step": 1170, + "training_step_time": 0.10598230361938477 + }, + { + "epoch": 1.78680419921875e-06, + "model_forward_time": 0.02524423599243164, + "step": 1171 + }, + { + "epoch": 1.78680419921875e-06, + "step": 1171, + "training_step_time": 0.10527491569519043 + }, + { + "epoch": 1.788330078125e-06, + "model_forward_time": 0.025203466415405273, + "step": 1172 + }, + { + "epoch": 1.788330078125e-06, + "step": 1172, + "training_step_time": 0.10583162307739258 + }, + { + "epoch": 1.78985595703125e-06, + "model_forward_time": 0.024821758270263672, + "step": 1173 + }, + { + "epoch": 1.78985595703125e-06, + "step": 1173, + "training_step_time": 0.10391569137573242 + }, + { + "epoch": 1.7913818359375e-06, + "model_forward_time": 0.02537059783935547, + "step": 1174 + }, + { + "epoch": 1.7913818359375e-06, + "step": 1174, + "training_step_time": 0.10833501815795898 + }, + { + "epoch": 1.79290771484375e-06, + "model_forward_time": 0.025058746337890625, + "step": 1175 + }, + { + "epoch": 1.79290771484375e-06, + "step": 1175, + "training_step_time": 0.10681462287902832 + }, + { + "epoch": 1.79443359375e-06, + "model_forward_time": 0.025298118591308594, + "step": 1176 + }, + { + "epoch": 1.79443359375e-06, + "step": 1176, + "training_step_time": 0.17254924774169922 + }, + { + "epoch": 1.79595947265625e-06, + "model_forward_time": 0.024231672286987305, + "step": 1177 + }, + { + "epoch": 1.79595947265625e-06, + "step": 1177, + "training_step_time": 0.14313292503356934 + }, + { + "epoch": 1.7974853515625e-06, + "model_forward_time": 0.02443671226501465, + "step": 1178 + }, + { + "epoch": 1.7974853515625e-06, + "step": 1178, + "training_step_time": 0.11342692375183105 + }, + { + "epoch": 1.79901123046875e-06, + "model_forward_time": 0.0251615047454834, + "step": 1179 + }, + { + "epoch": 1.79901123046875e-06, + "step": 1179, + "training_step_time": 0.1950817108154297 + }, + { + "epoch": 1.800537109375e-06, + "grad_norm": 0.5958669781684875, + "learning_rate": 7.866666666666666e-05, + "loss": 0.1539, + "step": 1180 + }, + { + "epoch": 1.800537109375e-06, + "model_forward_time": 0.02401566505432129, + "step": 1180 + }, + { + "epoch": 1.800537109375e-06, + "step": 1180, + "training_step_time": 0.16553854942321777 + }, + { + "epoch": 1.80206298828125e-06, + "model_forward_time": 0.024207592010498047, + "step": 1181 + }, + { + "epoch": 1.80206298828125e-06, + "step": 1181, + "training_step_time": 0.16862750053405762 + }, + { + "epoch": 1.8035888671875e-06, + "model_forward_time": 0.02448248863220215, + "step": 1182 + }, + { + "epoch": 1.8035888671875e-06, + "step": 1182, + "training_step_time": 0.16234207153320312 + }, + { + "epoch": 1.80511474609375e-06, + "model_forward_time": 0.023937225341796875, + "step": 1183 + }, + { + "epoch": 1.80511474609375e-06, + "step": 1183, + "training_step_time": 0.1292862892150879 + }, + { + "epoch": 1.806640625e-06, + "model_forward_time": 0.024244070053100586, + "step": 1184 + }, + { + "epoch": 1.806640625e-06, + "step": 1184, + "training_step_time": 0.11646246910095215 + }, + { + "epoch": 1.80816650390625e-06, + "model_forward_time": 0.025247812271118164, + "step": 1185 + }, + { + "epoch": 1.80816650390625e-06, + "step": 1185, + "training_step_time": 0.10264897346496582 + }, + { + "epoch": 1.8096923828125e-06, + "model_forward_time": 0.024966955184936523, + "step": 1186 + }, + { + "epoch": 1.8096923828125e-06, + "step": 1186, + "training_step_time": 0.10508108139038086 + }, + { + "epoch": 1.81121826171875e-06, + "model_forward_time": 0.025043964385986328, + "step": 1187 + }, + { + "epoch": 1.81121826171875e-06, + "step": 1187, + "training_step_time": 0.1116480827331543 + }, + { + "epoch": 1.812744140625e-06, + "model_forward_time": 0.02576756477355957, + "step": 1188 + }, + { + "epoch": 1.812744140625e-06, + "step": 1188, + "training_step_time": 0.12711548805236816 + }, + { + "epoch": 1.81427001953125e-06, + "model_forward_time": 0.025079011917114258, + "step": 1189 + }, + { + "epoch": 1.81427001953125e-06, + "step": 1189, + "training_step_time": 0.129302978515625 + }, + { + "epoch": 1.8157958984375e-06, + "grad_norm": 1.3377705812454224, + "learning_rate": 7.933333333333334e-05, + "loss": 0.1747, + "step": 1190 + }, + { + "epoch": 1.8157958984375e-06, + "model_forward_time": 0.02483820915222168, + "step": 1190 + }, + { + "epoch": 1.8157958984375e-06, + "step": 1190, + "training_step_time": 0.15600013732910156 + }, + { + "epoch": 1.81732177734375e-06, + "model_forward_time": 0.024726152420043945, + "step": 1191 + }, + { + "epoch": 1.81732177734375e-06, + "step": 1191, + "training_step_time": 0.15784597396850586 + }, + { + "epoch": 1.81884765625e-06, + "model_forward_time": 0.024142742156982422, + "step": 1192 + }, + { + "epoch": 1.81884765625e-06, + "step": 1192, + "training_step_time": 0.19258451461791992 + }, + { + "epoch": 1.82037353515625e-06, + "model_forward_time": 0.024500131607055664, + "step": 1193 + }, + { + "epoch": 1.82037353515625e-06, + "step": 1193, + "training_step_time": 0.10904979705810547 + }, + { + "epoch": 1.8218994140625e-06, + "model_forward_time": 0.02503180503845215, + "step": 1194 + }, + { + "epoch": 1.8218994140625e-06, + "step": 1194, + "training_step_time": 0.11116695404052734 + }, + { + "epoch": 1.82342529296875e-06, + "model_forward_time": 0.02527928352355957, + "step": 1195 + }, + { + "epoch": 1.82342529296875e-06, + "step": 1195, + "training_step_time": 0.11079216003417969 + }, + { + "epoch": 1.824951171875e-06, + "model_forward_time": 0.025384187698364258, + "step": 1196 + }, + { + "epoch": 1.824951171875e-06, + "step": 1196, + "training_step_time": 0.10915899276733398 + }, + { + "epoch": 1.82647705078125e-06, + "model_forward_time": 0.025018692016601562, + "step": 1197 + }, + { + "epoch": 1.82647705078125e-06, + "step": 1197, + "training_step_time": 0.10354018211364746 + }, + { + "epoch": 1.8280029296875e-06, + "model_forward_time": 0.024402141571044922, + "step": 1198 + }, + { + "epoch": 1.8280029296875e-06, + "step": 1198, + "training_step_time": 0.10785222053527832 + }, + { + "epoch": 1.82952880859375e-06, + "model_forward_time": 0.025052547454833984, + "step": 1199 + }, + { + "epoch": 1.82952880859375e-06, + "step": 1199, + "training_step_time": 0.11100172996520996 + }, + { + "epoch": 1.8310546875e-06, + "grad_norm": 1.7151702642440796, + "learning_rate": 8e-05, + "loss": 0.1892, + "step": 1200 + }, + { + "epoch": 1.8310546875e-06, + "model_forward_time": 0.025517702102661133, + "step": 1200 + }, + { + "epoch": 1.8310546875e-06, + "step": 1200, + "training_step_time": 0.11256003379821777 + }, + { + "epoch": 1.83258056640625e-06, + "model_forward_time": 0.02570486068725586, + "step": 1201 + }, + { + "epoch": 1.83258056640625e-06, + "step": 1201, + "training_step_time": 0.10862898826599121 + }, + { + "epoch": 1.8341064453125e-06, + "model_forward_time": 0.02572011947631836, + "step": 1202 + }, + { + "epoch": 1.8341064453125e-06, + "step": 1202, + "training_step_time": 0.10820460319519043 + }, + { + "epoch": 1.83563232421875e-06, + "model_forward_time": 0.025420427322387695, + "step": 1203 + }, + { + "epoch": 1.83563232421875e-06, + "step": 1203, + "training_step_time": 0.11374711990356445 + }, + { + "epoch": 1.837158203125e-06, + "model_forward_time": 0.02543163299560547, + "step": 1204 + }, + { + "epoch": 1.837158203125e-06, + "step": 1204, + "training_step_time": 0.1141047477722168 + }, + { + "epoch": 1.83868408203125e-06, + "model_forward_time": 0.025302648544311523, + "step": 1205 + }, + { + "epoch": 1.83868408203125e-06, + "step": 1205, + "training_step_time": 0.10389876365661621 + }, + { + "epoch": 1.8402099609375e-06, + "model_forward_time": 0.025333881378173828, + "step": 1206 + }, + { + "epoch": 1.8402099609375e-06, + "step": 1206, + "training_step_time": 0.10593366622924805 + }, + { + "epoch": 1.84173583984375e-06, + "model_forward_time": 0.02511429786682129, + "step": 1207 + }, + { + "epoch": 1.84173583984375e-06, + "step": 1207, + "training_step_time": 0.10630536079406738 + }, + { + "epoch": 1.84326171875e-06, + "model_forward_time": 0.024990558624267578, + "step": 1208 + }, + { + "epoch": 1.84326171875e-06, + "step": 1208, + "training_step_time": 0.10520148277282715 + }, + { + "epoch": 1.84478759765625e-06, + "model_forward_time": 0.025490522384643555, + "step": 1209 + }, + { + "epoch": 1.84478759765625e-06, + "step": 1209, + "training_step_time": 0.10633516311645508 + }, + { + "epoch": 1.8463134765625e-06, + "grad_norm": 1.1224215030670166, + "learning_rate": 8.066666666666667e-05, + "loss": 0.2051, + "step": 1210 + }, + { + "epoch": 1.8463134765625e-06, + "model_forward_time": 0.025072097778320312, + "step": 1210 + }, + { + "epoch": 1.8463134765625e-06, + "step": 1210, + "training_step_time": 0.1083838939666748 + }, + { + "epoch": 1.84783935546875e-06, + "model_forward_time": 0.02538776397705078, + "step": 1211 + }, + { + "epoch": 1.84783935546875e-06, + "step": 1211, + "training_step_time": 0.10822677612304688 + }, + { + "epoch": 1.849365234375e-06, + "model_forward_time": 0.025068998336791992, + "step": 1212 + }, + { + "epoch": 1.849365234375e-06, + "step": 1212, + "training_step_time": 0.10438132286071777 + }, + { + "epoch": 1.85089111328125e-06, + "model_forward_time": 0.02514934539794922, + "step": 1213 + }, + { + "epoch": 1.85089111328125e-06, + "step": 1213, + "training_step_time": 0.10590362548828125 + }, + { + "epoch": 1.8524169921875e-06, + "model_forward_time": 0.025738954544067383, + "step": 1214 + }, + { + "epoch": 1.8524169921875e-06, + "step": 1214, + "training_step_time": 0.10514092445373535 + }, + { + "epoch": 1.85394287109375e-06, + "model_forward_time": 0.02535247802734375, + "step": 1215 + }, + { + "epoch": 1.85394287109375e-06, + "step": 1215, + "training_step_time": 0.10616397857666016 + }, + { + "epoch": 1.85546875e-06, + "model_forward_time": 0.024689197540283203, + "step": 1216 + }, + { + "epoch": 1.85546875e-06, + "step": 1216, + "training_step_time": 0.1058807373046875 + }, + { + "epoch": 1.85699462890625e-06, + "model_forward_time": 0.026338815689086914, + "step": 1217 + }, + { + "epoch": 1.85699462890625e-06, + "step": 1217, + "training_step_time": 0.10562491416931152 + }, + { + "epoch": 1.8585205078125e-06, + "model_forward_time": 0.025639057159423828, + "step": 1218 + }, + { + "epoch": 1.8585205078125e-06, + "step": 1218, + "training_step_time": 0.1046438217163086 + }, + { + "epoch": 1.86004638671875e-06, + "model_forward_time": 0.02837967872619629, + "step": 1219 + }, + { + "epoch": 1.86004638671875e-06, + "step": 1219, + "training_step_time": 0.10834789276123047 + }, + { + "epoch": 1.861572265625e-06, + "grad_norm": 1.3597825765609741, + "learning_rate": 8.133333333333334e-05, + "loss": 0.178, + "step": 1220 + }, + { + "epoch": 1.861572265625e-06, + "model_forward_time": 0.02520275115966797, + "step": 1220 + }, + { + "epoch": 1.861572265625e-06, + "step": 1220, + "training_step_time": 0.10679030418395996 + }, + { + "epoch": 1.86309814453125e-06, + "model_forward_time": 0.02521991729736328, + "step": 1221 + }, + { + "epoch": 1.86309814453125e-06, + "step": 1221, + "training_step_time": 0.1117544174194336 + }, + { + "epoch": 1.8646240234375e-06, + "model_forward_time": 0.025551557540893555, + "step": 1222 + }, + { + "epoch": 1.8646240234375e-06, + "step": 1222, + "training_step_time": 0.11016273498535156 + }, + { + "epoch": 1.86614990234375e-06, + "model_forward_time": 0.025426149368286133, + "step": 1223 + }, + { + "epoch": 1.86614990234375e-06, + "step": 1223, + "training_step_time": 0.15128302574157715 + }, + { + "epoch": 1.86767578125e-06, + "model_forward_time": 0.02533125877380371, + "step": 1224 + }, + { + "epoch": 1.86767578125e-06, + "step": 1224, + "training_step_time": 0.11080312728881836 + }, + { + "epoch": 1.86920166015625e-06, + "model_forward_time": 0.024776697158813477, + "step": 1225 + }, + { + "epoch": 1.86920166015625e-06, + "step": 1225, + "training_step_time": 0.10645294189453125 + }, + { + "epoch": 1.8707275390625e-06, + "model_forward_time": 0.025487661361694336, + "step": 1226 + }, + { + "epoch": 1.8707275390625e-06, + "step": 1226, + "training_step_time": 0.17316389083862305 + }, + { + "epoch": 1.87225341796875e-06, + "model_forward_time": 0.025194406509399414, + "step": 1227 + }, + { + "epoch": 1.87225341796875e-06, + "step": 1227, + "training_step_time": 0.19153761863708496 + }, + { + "epoch": 1.873779296875e-06, + "model_forward_time": 0.02446889877319336, + "step": 1228 + }, + { + "epoch": 1.873779296875e-06, + "step": 1228, + "training_step_time": 0.15164971351623535 + }, + { + "epoch": 1.87530517578125e-06, + "model_forward_time": 0.02520012855529785, + "step": 1229 + }, + { + "epoch": 1.87530517578125e-06, + "step": 1229, + "training_step_time": 0.15472984313964844 + }, + { + "epoch": 1.8768310546875e-06, + "grad_norm": 1.1429742574691772, + "learning_rate": 8.2e-05, + "loss": 0.2217, + "step": 1230 + }, + { + "epoch": 1.8768310546875e-06, + "model_forward_time": 0.024310588836669922, + "step": 1230 + }, + { + "epoch": 1.8768310546875e-06, + "step": 1230, + "training_step_time": 0.12451791763305664 + }, + { + "epoch": 1.87835693359375e-06, + "model_forward_time": 0.024514198303222656, + "step": 1231 + }, + { + "epoch": 1.87835693359375e-06, + "step": 1231, + "training_step_time": 0.11500954627990723 + }, + { + "epoch": 1.8798828125e-06, + "model_forward_time": 0.025276899337768555, + "step": 1232 + }, + { + "epoch": 1.8798828125e-06, + "step": 1232, + "training_step_time": 0.10410523414611816 + }, + { + "epoch": 1.88140869140625e-06, + "model_forward_time": 0.025158166885375977, + "step": 1233 + }, + { + "epoch": 1.88140869140625e-06, + "step": 1233, + "training_step_time": 0.1769578456878662 + }, + { + "epoch": 1.8829345703125e-06, + "model_forward_time": 0.024852752685546875, + "step": 1234 + }, + { + "epoch": 1.8829345703125e-06, + "step": 1234, + "training_step_time": 0.13149356842041016 + }, + { + "epoch": 1.88446044921875e-06, + "model_forward_time": 0.025168657302856445, + "step": 1235 + }, + { + "epoch": 1.88446044921875e-06, + "step": 1235, + "training_step_time": 0.1051630973815918 + }, + { + "epoch": 1.885986328125e-06, + "model_forward_time": 0.02644062042236328, + "step": 1236 + }, + { + "epoch": 1.885986328125e-06, + "step": 1236, + "training_step_time": 0.1124112606048584 + }, + { + "epoch": 1.88751220703125e-06, + "model_forward_time": 0.025076866149902344, + "step": 1237 + }, + { + "epoch": 1.88751220703125e-06, + "step": 1237, + "training_step_time": 0.11532330513000488 + }, + { + "epoch": 1.8890380859375e-06, + "model_forward_time": 0.025855302810668945, + "step": 1238 + }, + { + "epoch": 1.8890380859375e-06, + "step": 1238, + "training_step_time": 0.10875129699707031 + }, + { + "epoch": 1.89056396484375e-06, + "model_forward_time": 0.025469303131103516, + "step": 1239 + }, + { + "epoch": 1.89056396484375e-06, + "step": 1239, + "training_step_time": 0.16292619705200195 + }, + { + "epoch": 1.89208984375e-06, + "grad_norm": 1.0527982711791992, + "learning_rate": 8.266666666666667e-05, + "loss": 0.1652, + "step": 1240 + }, + { + "epoch": 1.89208984375e-06, + "model_forward_time": 0.025038480758666992, + "step": 1240 + }, + { + "epoch": 1.89208984375e-06, + "step": 1240, + "training_step_time": 0.10667014122009277 + }, + { + "epoch": 1.89361572265625e-06, + "model_forward_time": 0.025140762329101562, + "step": 1241 + }, + { + "epoch": 1.89361572265625e-06, + "step": 1241, + "training_step_time": 0.1059577465057373 + }, + { + "epoch": 1.8951416015625e-06, + "model_forward_time": 0.025479555130004883, + "step": 1242 + }, + { + "epoch": 1.8951416015625e-06, + "step": 1242, + "training_step_time": 0.10683703422546387 + }, + { + "epoch": 1.89666748046875e-06, + "model_forward_time": 0.02544546127319336, + "step": 1243 + }, + { + "epoch": 1.89666748046875e-06, + "step": 1243, + "training_step_time": 0.1068580150604248 + }, + { + "epoch": 1.898193359375e-06, + "model_forward_time": 0.02506089210510254, + "step": 1244 + }, + { + "epoch": 1.898193359375e-06, + "step": 1244, + "training_step_time": 0.10724449157714844 + }, + { + "epoch": 1.89971923828125e-06, + "model_forward_time": 0.025569677352905273, + "step": 1245 + }, + { + "epoch": 1.89971923828125e-06, + "step": 1245, + "training_step_time": 0.10748434066772461 + }, + { + "epoch": 1.9012451171875e-06, + "model_forward_time": 0.02441263198852539, + "step": 1246 + }, + { + "epoch": 1.9012451171875e-06, + "step": 1246, + "training_step_time": 0.10440969467163086 + }, + { + "epoch": 1.90277099609375e-06, + "model_forward_time": 0.02512192726135254, + "step": 1247 + }, + { + "epoch": 1.90277099609375e-06, + "step": 1247, + "training_step_time": 0.11462926864624023 + }, + { + "epoch": 1.904296875e-06, + "model_forward_time": 0.025743961334228516, + "step": 1248 + }, + { + "epoch": 1.904296875e-06, + "step": 1248, + "training_step_time": 0.11517047882080078 + }, + { + "epoch": 1.90582275390625e-06, + "model_forward_time": 0.025432348251342773, + "step": 1249 + }, + { + "epoch": 1.90582275390625e-06, + "step": 1249, + "training_step_time": 0.11037802696228027 + }, + { + "epoch": 1.9073486328125e-06, + "grad_norm": 1.1967273950576782, + "learning_rate": 8.333333333333334e-05, + "loss": 0.1523, + "step": 1250 + }, + { + "epoch": 1.9073486328125e-06, + "model_forward_time": 0.025904178619384766, + "step": 1250 + }, + { + "epoch": 1.9073486328125e-06, + "step": 1250, + "training_step_time": 0.21413612365722656 + }, + { + "epoch": 1.90887451171875e-06, + "model_forward_time": 0.024668216705322266, + "step": 1251 + }, + { + "epoch": 1.90887451171875e-06, + "step": 1251, + "training_step_time": 0.12158465385437012 + }, + { + "epoch": 1.910400390625e-06, + "model_forward_time": 0.024970531463623047, + "step": 1252 + }, + { + "epoch": 1.910400390625e-06, + "step": 1252, + "training_step_time": 0.11132287979125977 + }, + { + "epoch": 1.91192626953125e-06, + "model_forward_time": 0.02595043182373047, + "step": 1253 + }, + { + "epoch": 1.91192626953125e-06, + "step": 1253, + "training_step_time": 0.11145377159118652 + }, + { + "epoch": 1.9134521484375e-06, + "model_forward_time": 0.025386810302734375, + "step": 1254 + }, + { + "epoch": 1.9134521484375e-06, + "step": 1254, + "training_step_time": 0.10927700996398926 + }, + { + "epoch": 1.91497802734375e-06, + "model_forward_time": 0.025403499603271484, + "step": 1255 + }, + { + "epoch": 1.91497802734375e-06, + "step": 1255, + "training_step_time": 0.10837388038635254 + }, + { + "epoch": 1.91650390625e-06, + "model_forward_time": 0.026601552963256836, + "step": 1256 + }, + { + "epoch": 1.91650390625e-06, + "step": 1256, + "training_step_time": 0.11478757858276367 + }, + { + "epoch": 1.91802978515625e-06, + "model_forward_time": 0.025215625762939453, + "step": 1257 + }, + { + "epoch": 1.91802978515625e-06, + "step": 1257, + "training_step_time": 0.11050009727478027 + }, + { + "epoch": 1.9195556640625e-06, + "model_forward_time": 0.025013208389282227, + "step": 1258 + }, + { + "epoch": 1.9195556640625e-06, + "step": 1258, + "training_step_time": 0.11108922958374023 + }, + { + "epoch": 1.92108154296875e-06, + "model_forward_time": 0.02761077880859375, + "step": 1259 + }, + { + "epoch": 1.92108154296875e-06, + "step": 1259, + "training_step_time": 0.11068534851074219 + }, + { + "epoch": 1.922607421875e-06, + "grad_norm": 0.8787569999694824, + "learning_rate": 8.4e-05, + "loss": 0.1607, + "step": 1260 + }, + { + "epoch": 1.922607421875e-06, + "model_forward_time": 0.026442289352416992, + "step": 1260 + }, + { + "epoch": 1.922607421875e-06, + "step": 1260, + "training_step_time": 0.11026668548583984 + }, + { + "epoch": 1.92413330078125e-06, + "model_forward_time": 0.02527475357055664, + "step": 1261 + }, + { + "epoch": 1.92413330078125e-06, + "step": 1261, + "training_step_time": 0.1059408187866211 + }, + { + "epoch": 1.9256591796875e-06, + "model_forward_time": 0.025359630584716797, + "step": 1262 + }, + { + "epoch": 1.9256591796875e-06, + "step": 1262, + "training_step_time": 0.11127209663391113 + }, + { + "epoch": 1.92718505859375e-06, + "model_forward_time": 0.025199174880981445, + "step": 1263 + }, + { + "epoch": 1.92718505859375e-06, + "step": 1263, + "training_step_time": 0.1074681282043457 + }, + { + "epoch": 1.9287109375e-06, + "model_forward_time": 0.025114059448242188, + "step": 1264 + }, + { + "epoch": 1.9287109375e-06, + "step": 1264, + "training_step_time": 0.11225652694702148 + }, + { + "epoch": 1.93023681640625e-06, + "model_forward_time": 0.025652408599853516, + "step": 1265 + }, + { + "epoch": 1.93023681640625e-06, + "step": 1265, + "training_step_time": 0.10862898826599121 + }, + { + "epoch": 1.9317626953125e-06, + "model_forward_time": 0.02547907829284668, + "step": 1266 + }, + { + "epoch": 1.9317626953125e-06, + "step": 1266, + "training_step_time": 0.11748576164245605 + }, + { + "epoch": 1.93328857421875e-06, + "model_forward_time": 0.025145769119262695, + "step": 1267 + }, + { + "epoch": 1.93328857421875e-06, + "step": 1267, + "training_step_time": 0.10688948631286621 + }, + { + "epoch": 1.934814453125e-06, + "model_forward_time": 0.025434494018554688, + "step": 1268 + }, + { + "epoch": 1.934814453125e-06, + "step": 1268, + "training_step_time": 0.11642241477966309 + }, + { + "epoch": 1.93634033203125e-06, + "model_forward_time": 0.02573680877685547, + "step": 1269 + }, + { + "epoch": 1.93634033203125e-06, + "step": 1269, + "training_step_time": 0.19725751876831055 + }, + { + "epoch": 1.9378662109375e-06, + "grad_norm": 0.8215675354003906, + "learning_rate": 8.466666666666667e-05, + "loss": 0.1354, + "step": 1270 + }, + { + "epoch": 1.9378662109375e-06, + "model_forward_time": 0.024491548538208008, + "step": 1270 + }, + { + "epoch": 1.9378662109375e-06, + "step": 1270, + "training_step_time": 0.10608100891113281 + }, + { + "epoch": 1.93939208984375e-06, + "model_forward_time": 0.024990320205688477, + "step": 1271 + }, + { + "epoch": 1.93939208984375e-06, + "step": 1271, + "training_step_time": 0.11183047294616699 + }, + { + "epoch": 1.94091796875e-06, + "model_forward_time": 0.025394916534423828, + "step": 1272 + }, + { + "epoch": 1.94091796875e-06, + "step": 1272, + "training_step_time": 0.12755179405212402 + }, + { + "epoch": 1.94244384765625e-06, + "model_forward_time": 0.02653336524963379, + "step": 1273 + }, + { + "epoch": 1.94244384765625e-06, + "step": 1273, + "training_step_time": 0.11612796783447266 + }, + { + "epoch": 1.9439697265625e-06, + "model_forward_time": 0.025595426559448242, + "step": 1274 + }, + { + "epoch": 1.9439697265625e-06, + "step": 1274, + "training_step_time": 0.13203763961791992 + }, + { + "epoch": 1.94549560546875e-06, + "model_forward_time": 0.02534627914428711, + "step": 1275 + }, + { + "epoch": 1.94549560546875e-06, + "step": 1275, + "training_step_time": 0.15285325050354004 + }, + { + "epoch": 1.947021484375e-06, + "model_forward_time": 0.02494192123413086, + "step": 1276 + }, + { + "epoch": 1.947021484375e-06, + "step": 1276, + "training_step_time": 0.17490196228027344 + }, + { + "epoch": 1.94854736328125e-06, + "model_forward_time": 0.025095224380493164, + "step": 1277 + }, + { + "epoch": 1.94854736328125e-06, + "step": 1277, + "training_step_time": 0.16460013389587402 + }, + { + "epoch": 1.9500732421875e-06, + "model_forward_time": 0.02494978904724121, + "step": 1278 + }, + { + "epoch": 1.9500732421875e-06, + "step": 1278, + "training_step_time": 0.10694408416748047 + }, + { + "epoch": 1.95159912109375e-06, + "model_forward_time": 0.024760723114013672, + "step": 1279 + }, + { + "epoch": 1.95159912109375e-06, + "step": 1279, + "training_step_time": 0.17813968658447266 + }, + { + "epoch": 1.953125e-06, + "grad_norm": 1.017802357673645, + "learning_rate": 8.533333333333334e-05, + "loss": 0.1764, + "step": 1280 + }, + { + "epoch": 1.953125e-06, + "model_forward_time": 0.02478933334350586, + "step": 1280 + }, + { + "epoch": 1.953125e-06, + "step": 1280, + "training_step_time": 0.12899208068847656 + }, + { + "epoch": 1.95465087890625e-06, + "model_forward_time": 0.024315595626831055, + "step": 1281 + }, + { + "epoch": 1.95465087890625e-06, + "step": 1281, + "training_step_time": 0.11281275749206543 + }, + { + "epoch": 1.9561767578125e-06, + "model_forward_time": 0.025557994842529297, + "step": 1282 + }, + { + "epoch": 1.9561767578125e-06, + "step": 1282, + "training_step_time": 0.11891531944274902 + }, + { + "epoch": 1.95770263671875e-06, + "model_forward_time": 0.025480985641479492, + "step": 1283 + }, + { + "epoch": 1.95770263671875e-06, + "step": 1283, + "training_step_time": 0.11455106735229492 + }, + { + "epoch": 1.959228515625e-06, + "model_forward_time": 0.02533268928527832, + "step": 1284 + }, + { + "epoch": 1.959228515625e-06, + "step": 1284, + "training_step_time": 0.1552126407623291 + }, + { + "epoch": 1.96075439453125e-06, + "model_forward_time": 0.024867534637451172, + "step": 1285 + }, + { + "epoch": 1.96075439453125e-06, + "step": 1285, + "training_step_time": 0.1475541591644287 + }, + { + "epoch": 1.9622802734375e-06, + "model_forward_time": 0.024742603302001953, + "step": 1286 + }, + { + "epoch": 1.9622802734375e-06, + "step": 1286, + "training_step_time": 0.10786318778991699 + }, + { + "epoch": 1.96380615234375e-06, + "model_forward_time": 0.025065183639526367, + "step": 1287 + }, + { + "epoch": 1.96380615234375e-06, + "step": 1287, + "training_step_time": 0.10768795013427734 + }, + { + "epoch": 1.96533203125e-06, + "model_forward_time": 0.025559663772583008, + "step": 1288 + }, + { + "epoch": 1.96533203125e-06, + "step": 1288, + "training_step_time": 0.1079401969909668 + }, + { + "epoch": 1.96685791015625e-06, + "model_forward_time": 0.025629520416259766, + "step": 1289 + }, + { + "epoch": 1.96685791015625e-06, + "step": 1289, + "training_step_time": 0.10952162742614746 + }, + { + "epoch": 1.9683837890625e-06, + "grad_norm": 0.8259382843971252, + "learning_rate": 8.6e-05, + "loss": 0.1595, + "step": 1290 + }, + { + "epoch": 1.9683837890625e-06, + "model_forward_time": 0.02487325668334961, + "step": 1290 + }, + { + "epoch": 1.9683837890625e-06, + "step": 1290, + "training_step_time": 0.11021041870117188 + }, + { + "epoch": 1.96990966796875e-06, + "model_forward_time": 0.025946378707885742, + "step": 1291 + }, + { + "epoch": 1.96990966796875e-06, + "step": 1291, + "training_step_time": 0.10572671890258789 + }, + { + "epoch": 1.971435546875e-06, + "model_forward_time": 0.0256500244140625, + "step": 1292 + }, + { + "epoch": 1.971435546875e-06, + "step": 1292, + "training_step_time": 0.18448877334594727 + }, + { + "epoch": 1.97296142578125e-06, + "model_forward_time": 0.025085926055908203, + "step": 1293 + }, + { + "epoch": 1.97296142578125e-06, + "step": 1293, + "training_step_time": 0.15895318984985352 + }, + { + "epoch": 1.9744873046875e-06, + "model_forward_time": 0.024848461151123047, + "step": 1294 + }, + { + "epoch": 1.9744873046875e-06, + "step": 1294, + "training_step_time": 0.11060714721679688 + }, + { + "epoch": 1.97601318359375e-06, + "model_forward_time": 0.025487661361694336, + "step": 1295 + }, + { + "epoch": 1.97601318359375e-06, + "step": 1295, + "training_step_time": 0.10629844665527344 + }, + { + "epoch": 1.9775390625e-06, + "model_forward_time": 0.025498628616333008, + "step": 1296 + }, + { + "epoch": 1.9775390625e-06, + "step": 1296, + "training_step_time": 0.1059727668762207 + }, + { + "epoch": 1.97906494140625e-06, + "model_forward_time": 0.0265653133392334, + "step": 1297 + }, + { + "epoch": 1.97906494140625e-06, + "step": 1297, + "training_step_time": 0.11496949195861816 + }, + { + "epoch": 1.9805908203125e-06, + "model_forward_time": 0.02538919448852539, + "step": 1298 + }, + { + "epoch": 1.9805908203125e-06, + "step": 1298, + "training_step_time": 0.10827040672302246 + }, + { + "epoch": 1.98211669921875e-06, + "model_forward_time": 0.02447652816772461, + "step": 1299 + }, + { + "epoch": 1.98211669921875e-06, + "step": 1299, + "training_step_time": 0.10711097717285156 + }, + { + "epoch": 1.983642578125e-06, + "grad_norm": 1.377303123474121, + "learning_rate": 8.666666666666667e-05, + "loss": 0.1958, + "step": 1300 + }, + { + "epoch": 1.983642578125e-06, + "model_forward_time": 0.025193214416503906, + "step": 1300 + }, + { + "epoch": 1.983642578125e-06, + "step": 1300, + "training_step_time": 0.10614585876464844 + }, + { + "epoch": 1.98516845703125e-06, + "model_forward_time": 0.028905391693115234, + "step": 1301 + }, + { + "epoch": 1.98516845703125e-06, + "step": 1301, + "training_step_time": 0.11077356338500977 + }, + { + "epoch": 1.9866943359375e-06, + "model_forward_time": 0.025732040405273438, + "step": 1302 + }, + { + "epoch": 1.9866943359375e-06, + "step": 1302, + "training_step_time": 0.10677981376647949 + }, + { + "epoch": 1.98822021484375e-06, + "model_forward_time": 0.02523493766784668, + "step": 1303 + }, + { + "epoch": 1.98822021484375e-06, + "step": 1303, + "training_step_time": 0.11117339134216309 + }, + { + "epoch": 1.98974609375e-06, + "model_forward_time": 0.025220870971679688, + "step": 1304 + }, + { + "epoch": 1.98974609375e-06, + "step": 1304, + "training_step_time": 0.11197733879089355 + }, + { + "epoch": 1.99127197265625e-06, + "model_forward_time": 0.024124622344970703, + "step": 1305 + }, + { + "epoch": 1.99127197265625e-06, + "step": 1305, + "training_step_time": 0.10925102233886719 + }, + { + "epoch": 1.9927978515625e-06, + "model_forward_time": 0.025442123413085938, + "step": 1306 + }, + { + "epoch": 1.9927978515625e-06, + "step": 1306, + "training_step_time": 0.10828447341918945 + }, + { + "epoch": 1.99432373046875e-06, + "model_forward_time": 0.02536916732788086, + "step": 1307 + }, + { + "epoch": 1.99432373046875e-06, + "step": 1307, + "training_step_time": 0.10908031463623047 + }, + { + "epoch": 1.995849609375e-06, + "model_forward_time": 0.02540278434753418, + "step": 1308 + }, + { + "epoch": 1.995849609375e-06, + "step": 1308, + "training_step_time": 0.10738182067871094 + }, + { + "epoch": 1.99737548828125e-06, + "model_forward_time": 0.025277376174926758, + "step": 1309 + }, + { + "epoch": 1.99737548828125e-06, + "step": 1309, + "training_step_time": 0.11009049415588379 + }, + { + "epoch": 1.9989013671875e-06, + "grad_norm": 2.1278889179229736, + "learning_rate": 8.733333333333333e-05, + "loss": 0.1806, + "step": 1310 + }, + { + "epoch": 1.9989013671875e-06, + "model_forward_time": 0.02548074722290039, + "step": 1310 + }, + { + "epoch": 1.9989013671875e-06, + "step": 1310, + "training_step_time": 0.10800623893737793 + }, + { + "epoch": 2.00042724609375e-06, + "model_forward_time": 0.02530670166015625, + "step": 1311 + }, + { + "epoch": 2.00042724609375e-06, + "step": 1311, + "training_step_time": 0.10456681251525879 + }, + { + "epoch": 2.001953125e-06, + "model_forward_time": 0.025532960891723633, + "step": 1312 + }, + { + "epoch": 2.001953125e-06, + "step": 1312, + "training_step_time": 0.10766410827636719 + }, + { + "epoch": 2.00347900390625e-06, + "model_forward_time": 0.0254819393157959, + "step": 1313 + }, + { + "epoch": 2.00347900390625e-06, + "step": 1313, + "training_step_time": 0.1063995361328125 + }, + { + "epoch": 2.0050048828125e-06, + "model_forward_time": 0.025454282760620117, + "step": 1314 + }, + { + "epoch": 2.0050048828125e-06, + "step": 1314, + "training_step_time": 0.10877442359924316 + }, + { + "epoch": 2.00653076171875e-06, + "model_forward_time": 0.02510380744934082, + "step": 1315 + }, + { + "epoch": 2.00653076171875e-06, + "step": 1315, + "training_step_time": 0.10476922988891602 + }, + { + "epoch": 2.008056640625e-06, + "model_forward_time": 0.026585817337036133, + "step": 1316 + }, + { + "epoch": 2.008056640625e-06, + "step": 1316, + "training_step_time": 0.15471768379211426 + }, + { + "epoch": 2.00958251953125e-06, + "model_forward_time": 0.025205135345458984, + "step": 1317 + }, + { + "epoch": 2.00958251953125e-06, + "step": 1317, + "training_step_time": 0.11152386665344238 + }, + { + "epoch": 2.0111083984375e-06, + "model_forward_time": 0.02523517608642578, + "step": 1318 + }, + { + "epoch": 2.0111083984375e-06, + "step": 1318, + "training_step_time": 0.21431779861450195 + }, + { + "epoch": 2.01263427734375e-06, + "model_forward_time": 0.024465084075927734, + "step": 1319 + }, + { + "epoch": 2.01263427734375e-06, + "step": 1319, + "training_step_time": 0.14450311660766602 + }, + { + "epoch": 2.01416015625e-06, + "grad_norm": 0.6938613653182983, + "learning_rate": 8.800000000000001e-05, + "loss": 0.1772, + "step": 1320 + }, + { + "epoch": 2.01416015625e-06, + "model_forward_time": 0.024492979049682617, + "step": 1320 + }, + { + "epoch": 2.01416015625e-06, + "step": 1320, + "training_step_time": 0.10764026641845703 + }, + { + "epoch": 2.01568603515625e-06, + "model_forward_time": 0.024759292602539062, + "step": 1321 + }, + { + "epoch": 2.01568603515625e-06, + "step": 1321, + "training_step_time": 0.15001654624938965 + }, + { + "epoch": 2.0172119140625e-06, + "model_forward_time": 0.024561166763305664, + "step": 1322 + }, + { + "epoch": 2.0172119140625e-06, + "step": 1322, + "training_step_time": 0.15971136093139648 + }, + { + "epoch": 2.01873779296875e-06, + "model_forward_time": 0.024271488189697266, + "step": 1323 + }, + { + "epoch": 2.01873779296875e-06, + "step": 1323, + "training_step_time": 0.1512136459350586 + }, + { + "epoch": 2.020263671875e-06, + "model_forward_time": 0.026529550552368164, + "step": 1324 + }, + { + "epoch": 2.020263671875e-06, + "step": 1324, + "training_step_time": 0.11220192909240723 + }, + { + "epoch": 2.02178955078125e-06, + "model_forward_time": 0.024919986724853516, + "step": 1325 + }, + { + "epoch": 2.02178955078125e-06, + "step": 1325, + "training_step_time": 0.10515546798706055 + }, + { + "epoch": 2.0233154296875e-06, + "model_forward_time": 0.025232791900634766, + "step": 1326 + }, + { + "epoch": 2.0233154296875e-06, + "step": 1326, + "training_step_time": 0.12074518203735352 + }, + { + "epoch": 2.02484130859375e-06, + "model_forward_time": 0.025687456130981445, + "step": 1327 + }, + { + "epoch": 2.02484130859375e-06, + "step": 1327, + "training_step_time": 0.13652658462524414 + }, + { + "epoch": 2.0263671875e-06, + "model_forward_time": 0.0252377986907959, + "step": 1328 + }, + { + "epoch": 2.0263671875e-06, + "step": 1328, + "training_step_time": 0.11081981658935547 + }, + { + "epoch": 2.02789306640625e-06, + "model_forward_time": 0.025160789489746094, + "step": 1329 + }, + { + "epoch": 2.02789306640625e-06, + "step": 1329, + "training_step_time": 0.11311936378479004 + }, + { + "epoch": 2.0294189453125e-06, + "grad_norm": 0.8620762825012207, + "learning_rate": 8.866666666666668e-05, + "loss": 0.1365, + "step": 1330 + }, + { + "epoch": 2.0294189453125e-06, + "model_forward_time": 0.025264263153076172, + "step": 1330 + }, + { + "epoch": 2.0294189453125e-06, + "step": 1330, + "training_step_time": 0.11312174797058105 + }, + { + "epoch": 2.03094482421875e-06, + "model_forward_time": 0.024471282958984375, + "step": 1331 + }, + { + "epoch": 2.03094482421875e-06, + "step": 1331, + "training_step_time": 0.1130518913269043 + }, + { + "epoch": 2.032470703125e-06, + "model_forward_time": 0.025809049606323242, + "step": 1332 + }, + { + "epoch": 2.032470703125e-06, + "step": 1332, + "training_step_time": 0.20093965530395508 + }, + { + "epoch": 2.03399658203125e-06, + "model_forward_time": 0.024343252182006836, + "step": 1333 + }, + { + "epoch": 2.03399658203125e-06, + "step": 1333, + "training_step_time": 0.10858726501464844 + }, + { + "epoch": 2.0355224609375e-06, + "model_forward_time": 0.023871660232543945, + "step": 1334 + }, + { + "epoch": 2.0355224609375e-06, + "step": 1334, + "training_step_time": 0.10741734504699707 + }, + { + "epoch": 2.03704833984375e-06, + "model_forward_time": 0.02486586570739746, + "step": 1335 + }, + { + "epoch": 2.03704833984375e-06, + "step": 1335, + "training_step_time": 0.1115102767944336 + }, + { + "epoch": 2.03857421875e-06, + "model_forward_time": 0.0257568359375, + "step": 1336 + }, + { + "epoch": 2.03857421875e-06, + "step": 1336, + "training_step_time": 0.10562515258789062 + }, + { + "epoch": 2.04010009765625e-06, + "model_forward_time": 0.025190114974975586, + "step": 1337 + }, + { + "epoch": 2.04010009765625e-06, + "step": 1337, + "training_step_time": 0.188643217086792 + }, + { + "epoch": 2.0416259765625e-06, + "model_forward_time": 0.026105165481567383, + "step": 1338 + }, + { + "epoch": 2.0416259765625e-06, + "step": 1338, + "training_step_time": 0.1392374038696289 + }, + { + "epoch": 2.04315185546875e-06, + "model_forward_time": 0.024634361267089844, + "step": 1339 + }, + { + "epoch": 2.04315185546875e-06, + "step": 1339, + "training_step_time": 0.12996745109558105 + }, + { + "epoch": 2.044677734375e-06, + "grad_norm": 1.3869773149490356, + "learning_rate": 8.933333333333334e-05, + "loss": 0.147, + "step": 1340 + }, + { + "epoch": 2.044677734375e-06, + "model_forward_time": 0.024804115295410156, + "step": 1340 + }, + { + "epoch": 2.044677734375e-06, + "step": 1340, + "training_step_time": 0.10552811622619629 + }, + { + "epoch": 2.04620361328125e-06, + "model_forward_time": 0.02529740333557129, + "step": 1341 + }, + { + "epoch": 2.04620361328125e-06, + "step": 1341, + "training_step_time": 0.10736823081970215 + }, + { + "epoch": 2.0477294921875e-06, + "model_forward_time": 0.025519609451293945, + "step": 1342 + }, + { + "epoch": 2.0477294921875e-06, + "step": 1342, + "training_step_time": 0.1083841323852539 + }, + { + "epoch": 2.04925537109375e-06, + "model_forward_time": 0.025340795516967773, + "step": 1343 + }, + { + "epoch": 2.04925537109375e-06, + "step": 1343, + "training_step_time": 0.10663962364196777 + }, + { + "epoch": 2.05078125e-06, + "model_forward_time": 0.025208473205566406, + "step": 1344 + }, + { + "epoch": 2.05078125e-06, + "step": 1344, + "training_step_time": 0.1079409122467041 + }, + { + "epoch": 2.05230712890625e-06, + "model_forward_time": 0.02545452117919922, + "step": 1345 + }, + { + "epoch": 2.05230712890625e-06, + "step": 1345, + "training_step_time": 0.1059122085571289 + }, + { + "epoch": 2.0538330078125e-06, + "model_forward_time": 0.024996280670166016, + "step": 1346 + }, + { + "epoch": 2.0538330078125e-06, + "step": 1346, + "training_step_time": 0.11000514030456543 + }, + { + "epoch": 2.05535888671875e-06, + "model_forward_time": 0.025363683700561523, + "step": 1347 + }, + { + "epoch": 2.05535888671875e-06, + "step": 1347, + "training_step_time": 0.10774588584899902 + }, + { + "epoch": 2.056884765625e-06, + "model_forward_time": 0.025640487670898438, + "step": 1348 + }, + { + "epoch": 2.056884765625e-06, + "step": 1348, + "training_step_time": 0.10753393173217773 + }, + { + "epoch": 2.05841064453125e-06, + "model_forward_time": 0.025701522827148438, + "step": 1349 + }, + { + "epoch": 2.05841064453125e-06, + "step": 1349, + "training_step_time": 0.1148684024810791 + }, + { + "epoch": 2.0599365234375e-06, + "grad_norm": 0.7594265341758728, + "learning_rate": 9e-05, + "loss": 0.1954, + "step": 1350 + }, + { + "epoch": 2.0599365234375e-06, + "model_forward_time": 0.025305747985839844, + "step": 1350 + }, + { + "epoch": 2.0599365234375e-06, + "step": 1350, + "training_step_time": 0.10439133644104004 + }, + { + "epoch": 2.06146240234375e-06, + "model_forward_time": 0.025230884552001953, + "step": 1351 + }, + { + "epoch": 2.06146240234375e-06, + "step": 1351, + "training_step_time": 0.11135077476501465 + }, + { + "epoch": 2.06298828125e-06, + "model_forward_time": 0.025582075119018555, + "step": 1352 + }, + { + "epoch": 2.06298828125e-06, + "step": 1352, + "training_step_time": 0.10800790786743164 + }, + { + "epoch": 2.06451416015625e-06, + "model_forward_time": 0.025220632553100586, + "step": 1353 + }, + { + "epoch": 2.06451416015625e-06, + "step": 1353, + "training_step_time": 0.1056063175201416 + }, + { + "epoch": 2.0660400390625e-06, + "model_forward_time": 0.026401042938232422, + "step": 1354 + }, + { + "epoch": 2.0660400390625e-06, + "step": 1354, + "training_step_time": 0.10689640045166016 + }, + { + "epoch": 2.06756591796875e-06, + "model_forward_time": 0.02664041519165039, + "step": 1355 + }, + { + "epoch": 2.06756591796875e-06, + "step": 1355, + "training_step_time": 0.10986995697021484 + }, + { + "epoch": 2.069091796875e-06, + "model_forward_time": 0.025368452072143555, + "step": 1356 + }, + { + "epoch": 2.069091796875e-06, + "step": 1356, + "training_step_time": 0.10695433616638184 + }, + { + "epoch": 2.07061767578125e-06, + "model_forward_time": 0.025503158569335938, + "step": 1357 + }, + { + "epoch": 2.07061767578125e-06, + "step": 1357, + "training_step_time": 0.10642409324645996 + }, + { + "epoch": 2.0721435546875e-06, + "model_forward_time": 0.025232791900634766, + "step": 1358 + }, + { + "epoch": 2.0721435546875e-06, + "step": 1358, + "training_step_time": 0.10681033134460449 + }, + { + "epoch": 2.07366943359375e-06, + "model_forward_time": 0.02518606185913086, + "step": 1359 + }, + { + "epoch": 2.07366943359375e-06, + "step": 1359, + "training_step_time": 0.1127328872680664 + }, + { + "epoch": 2.0751953125e-06, + "grad_norm": 0.98515784740448, + "learning_rate": 9.066666666666667e-05, + "loss": 0.1698, + "step": 1360 + }, + { + "epoch": 2.0751953125e-06, + "model_forward_time": 0.025780677795410156, + "step": 1360 + }, + { + "epoch": 2.0751953125e-06, + "step": 1360, + "training_step_time": 0.10627460479736328 + }, + { + "epoch": 2.07672119140625e-06, + "model_forward_time": 0.02504897117614746, + "step": 1361 + }, + { + "epoch": 2.07672119140625e-06, + "step": 1361, + "training_step_time": 0.10714435577392578 + }, + { + "epoch": 2.0782470703125e-06, + "model_forward_time": 0.024992942810058594, + "step": 1362 + }, + { + "epoch": 2.0782470703125e-06, + "step": 1362, + "training_step_time": 0.20529413223266602 + }, + { + "epoch": 2.07977294921875e-06, + "model_forward_time": 0.02433156967163086, + "step": 1363 + }, + { + "epoch": 2.07977294921875e-06, + "step": 1363, + "training_step_time": 0.10849475860595703 + }, + { + "epoch": 2.081298828125e-06, + "model_forward_time": 0.025172948837280273, + "step": 1364 + }, + { + "epoch": 2.081298828125e-06, + "step": 1364, + "training_step_time": 0.18041586875915527 + }, + { + "epoch": 2.08282470703125e-06, + "model_forward_time": 0.024529695510864258, + "step": 1365 + }, + { + "epoch": 2.08282470703125e-06, + "step": 1365, + "training_step_time": 0.18352413177490234 + }, + { + "epoch": 2.0843505859375e-06, + "model_forward_time": 0.02430438995361328, + "step": 1366 + }, + { + "epoch": 2.0843505859375e-06, + "step": 1366, + "training_step_time": 0.1677396297454834 + }, + { + "epoch": 2.08587646484375e-06, + "model_forward_time": 0.025100231170654297, + "step": 1367 + }, + { + "epoch": 2.08587646484375e-06, + "step": 1367, + "training_step_time": 0.15979933738708496 + }, + { + "epoch": 2.08740234375e-06, + "model_forward_time": 0.024810791015625, + "step": 1368 + }, + { + "epoch": 2.08740234375e-06, + "step": 1368, + "training_step_time": 0.12097287178039551 + }, + { + "epoch": 2.08892822265625e-06, + "model_forward_time": 0.02483081817626953, + "step": 1369 + }, + { + "epoch": 2.08892822265625e-06, + "step": 1369, + "training_step_time": 0.12729477882385254 + }, + { + "epoch": 2.0904541015625e-06, + "grad_norm": 1.365875482559204, + "learning_rate": 9.133333333333334e-05, + "loss": 0.1436, + "step": 1370 + }, + { + "epoch": 2.0904541015625e-06, + "model_forward_time": 0.024931669235229492, + "step": 1370 + }, + { + "epoch": 2.0904541015625e-06, + "step": 1370, + "training_step_time": 0.10365152359008789 + }, + { + "epoch": 2.09197998046875e-06, + "model_forward_time": 0.025618314743041992, + "step": 1371 + }, + { + "epoch": 2.09197998046875e-06, + "step": 1371, + "training_step_time": 0.10644173622131348 + }, + { + "epoch": 2.093505859375e-06, + "model_forward_time": 0.025599956512451172, + "step": 1372 + }, + { + "epoch": 2.093505859375e-06, + "step": 1372, + "training_step_time": 0.166151762008667 + }, + { + "epoch": 2.09503173828125e-06, + "model_forward_time": 0.025098085403442383, + "step": 1373 + }, + { + "epoch": 2.09503173828125e-06, + "step": 1373, + "training_step_time": 0.1393415927886963 + }, + { + "epoch": 2.0965576171875e-06, + "model_forward_time": 0.024698734283447266, + "step": 1374 + }, + { + "epoch": 2.0965576171875e-06, + "step": 1374, + "training_step_time": 0.10863733291625977 + }, + { + "epoch": 2.09808349609375e-06, + "model_forward_time": 0.024762868881225586, + "step": 1375 + }, + { + "epoch": 2.09808349609375e-06, + "step": 1375, + "training_step_time": 0.11086225509643555 + }, + { + "epoch": 2.099609375e-06, + "model_forward_time": 0.025424480438232422, + "step": 1376 + }, + { + "epoch": 2.099609375e-06, + "step": 1376, + "training_step_time": 0.11547994613647461 + }, + { + "epoch": 2.10113525390625e-06, + "model_forward_time": 0.02519989013671875, + "step": 1377 + }, + { + "epoch": 2.10113525390625e-06, + "step": 1377, + "training_step_time": 0.11206674575805664 + }, + { + "epoch": 2.1026611328125e-06, + "model_forward_time": 0.025083303451538086, + "step": 1378 + }, + { + "epoch": 2.1026611328125e-06, + "step": 1378, + "training_step_time": 0.11881184577941895 + }, + { + "epoch": 2.10418701171875e-06, + "model_forward_time": 0.025065898895263672, + "step": 1379 + }, + { + "epoch": 2.10418701171875e-06, + "step": 1379, + "training_step_time": 0.11109232902526855 + }, + { + "epoch": 2.105712890625e-06, + "grad_norm": 1.4226160049438477, + "learning_rate": 9.200000000000001e-05, + "loss": 0.1428, + "step": 1380 + }, + { + "epoch": 2.105712890625e-06, + "model_forward_time": 0.025165319442749023, + "step": 1380 + }, + { + "epoch": 2.105712890625e-06, + "step": 1380, + "training_step_time": 0.1115870475769043 + }, + { + "epoch": 2.10723876953125e-06, + "model_forward_time": 0.025751113891601562, + "step": 1381 + }, + { + "epoch": 2.10723876953125e-06, + "step": 1381, + "training_step_time": 0.10475730895996094 + }, + { + "epoch": 2.1087646484375e-06, + "model_forward_time": 0.02521824836730957, + "step": 1382 + }, + { + "epoch": 2.1087646484375e-06, + "step": 1382, + "training_step_time": 0.11392545700073242 + }, + { + "epoch": 2.11029052734375e-06, + "model_forward_time": 0.024941682815551758, + "step": 1383 + }, + { + "epoch": 2.11029052734375e-06, + "step": 1383, + "training_step_time": 0.11006975173950195 + }, + { + "epoch": 2.11181640625e-06, + "model_forward_time": 0.025444746017456055, + "step": 1384 + }, + { + "epoch": 2.11181640625e-06, + "step": 1384, + "training_step_time": 0.10741901397705078 + }, + { + "epoch": 2.11334228515625e-06, + "model_forward_time": 0.025771141052246094, + "step": 1385 + }, + { + "epoch": 2.11334228515625e-06, + "step": 1385, + "training_step_time": 0.10753822326660156 + }, + { + "epoch": 2.1148681640625e-06, + "model_forward_time": 0.025191545486450195, + "step": 1386 + }, + { + "epoch": 2.1148681640625e-06, + "step": 1386, + "training_step_time": 0.10698175430297852 + }, + { + "epoch": 2.11639404296875e-06, + "model_forward_time": 0.025516510009765625, + "step": 1387 + }, + { + "epoch": 2.11639404296875e-06, + "step": 1387, + "training_step_time": 0.12093162536621094 + }, + { + "epoch": 2.117919921875e-06, + "model_forward_time": 0.025435686111450195, + "step": 1388 + }, + { + "epoch": 2.117919921875e-06, + "step": 1388, + "training_step_time": 0.11258983612060547 + }, + { + "epoch": 2.11944580078125e-06, + "model_forward_time": 0.025190114974975586, + "step": 1389 + }, + { + "epoch": 2.11944580078125e-06, + "step": 1389, + "training_step_time": 0.10873651504516602 + }, + { + "epoch": 2.1209716796875e-06, + "grad_norm": 1.1633135080337524, + "learning_rate": 9.266666666666666e-05, + "loss": 0.1679, + "step": 1390 + }, + { + "epoch": 2.1209716796875e-06, + "model_forward_time": 0.025387048721313477, + "step": 1390 + }, + { + "epoch": 2.1209716796875e-06, + "step": 1390, + "training_step_time": 0.10318899154663086 + }, + { + "epoch": 2.12249755859375e-06, + "model_forward_time": 0.025238513946533203, + "step": 1391 + }, + { + "epoch": 2.12249755859375e-06, + "step": 1391, + "training_step_time": 0.10442447662353516 + }, + { + "epoch": 2.1240234375e-06, + "model_forward_time": 0.025487422943115234, + "step": 1392 + }, + { + "epoch": 2.1240234375e-06, + "step": 1392, + "training_step_time": 0.10919785499572754 + }, + { + "epoch": 2.12554931640625e-06, + "model_forward_time": 0.025844335556030273, + "step": 1393 + }, + { + "epoch": 2.12554931640625e-06, + "step": 1393, + "training_step_time": 0.10811805725097656 + }, + { + "epoch": 2.1270751953125e-06, + "model_forward_time": 0.025135040283203125, + "step": 1394 + }, + { + "epoch": 2.1270751953125e-06, + "step": 1394, + "training_step_time": 0.1067664623260498 + }, + { + "epoch": 2.12860107421875e-06, + "model_forward_time": 0.024470806121826172, + "step": 1395 + }, + { + "epoch": 2.12860107421875e-06, + "step": 1395, + "training_step_time": 0.1043238639831543 + }, + { + "epoch": 2.130126953125e-06, + "model_forward_time": 0.025542259216308594, + "step": 1396 + }, + { + "epoch": 2.130126953125e-06, + "step": 1396, + "training_step_time": 0.10828661918640137 + }, + { + "epoch": 2.13165283203125e-06, + "model_forward_time": 0.0255587100982666, + "step": 1397 + }, + { + "epoch": 2.13165283203125e-06, + "step": 1397, + "training_step_time": 0.11431670188903809 + }, + { + "epoch": 2.1331787109375e-06, + "model_forward_time": 0.02551746368408203, + "step": 1398 + }, + { + "epoch": 2.1331787109375e-06, + "step": 1398, + "training_step_time": 0.13465547561645508 + }, + { + "epoch": 2.13470458984375e-06, + "model_forward_time": 0.0254824161529541, + "step": 1399 + }, + { + "epoch": 2.13470458984375e-06, + "step": 1399, + "training_step_time": 0.1233057975769043 + }, + { + "epoch": 2.13623046875e-06, + "grad_norm": 0.8213808536529541, + "learning_rate": 9.333333333333334e-05, + "loss": 0.1646, + "step": 1400 + }, + { + "epoch": 2.13623046875e-06, + "model_forward_time": 0.02496790885925293, + "step": 1400 + }, + { + "epoch": 2.13623046875e-06, + "step": 1400, + "training_step_time": 0.12115097045898438 + }, + { + "epoch": 2.13775634765625e-06, + "model_forward_time": 0.025421142578125, + "step": 1401 + }, + { + "epoch": 2.13775634765625e-06, + "step": 1401, + "training_step_time": 0.11547350883483887 + }, + { + "epoch": 2.1392822265625e-06, + "model_forward_time": 0.025584697723388672, + "step": 1402 + }, + { + "epoch": 2.1392822265625e-06, + "step": 1402, + "training_step_time": 0.11190557479858398 + }, + { + "epoch": 2.14080810546875e-06, + "model_forward_time": 0.02526092529296875, + "step": 1403 + }, + { + "epoch": 2.14080810546875e-06, + "step": 1403, + "training_step_time": 0.11491799354553223 + }, + { + "epoch": 2.142333984375e-06, + "model_forward_time": 0.025345325469970703, + "step": 1404 + }, + { + "epoch": 2.142333984375e-06, + "step": 1404, + "training_step_time": 0.1106102466583252 + }, + { + "epoch": 2.14385986328125e-06, + "model_forward_time": 0.025770187377929688, + "step": 1405 + }, + { + "epoch": 2.14385986328125e-06, + "step": 1405, + "training_step_time": 0.1104581356048584 + }, + { + "epoch": 2.1453857421875e-06, + "model_forward_time": 0.025198936462402344, + "step": 1406 + }, + { + "epoch": 2.1453857421875e-06, + "step": 1406, + "training_step_time": 0.10603857040405273 + }, + { + "epoch": 2.14691162109375e-06, + "model_forward_time": 0.02532482147216797, + "step": 1407 + }, + { + "epoch": 2.14691162109375e-06, + "step": 1407, + "training_step_time": 0.11012148857116699 + }, + { + "epoch": 2.1484375e-06, + "model_forward_time": 0.025563716888427734, + "step": 1408 + }, + { + "epoch": 2.1484375e-06, + "step": 1408, + "training_step_time": 0.10848116874694824 + }, + { + "epoch": 2.14996337890625e-06, + "model_forward_time": 0.025565147399902344, + "step": 1409 + }, + { + "epoch": 2.14996337890625e-06, + "step": 1409, + "training_step_time": 0.18502593040466309 + }, + { + "epoch": 2.1514892578125e-06, + "grad_norm": 0.6870678067207336, + "learning_rate": 9.4e-05, + "loss": 0.1654, + "step": 1410 + }, + { + "epoch": 2.1514892578125e-06, + "model_forward_time": 0.024663448333740234, + "step": 1410 + }, + { + "epoch": 2.1514892578125e-06, + "step": 1410, + "training_step_time": 0.10868406295776367 + }, + { + "epoch": 2.15301513671875e-06, + "model_forward_time": 0.02468585968017578, + "step": 1411 + }, + { + "epoch": 2.15301513671875e-06, + "step": 1411, + "training_step_time": 0.1329805850982666 + }, + { + "epoch": 2.154541015625e-06, + "model_forward_time": 0.025498151779174805, + "step": 1412 + }, + { + "epoch": 2.154541015625e-06, + "step": 1412, + "training_step_time": 0.11503171920776367 + }, + { + "epoch": 2.15606689453125e-06, + "model_forward_time": 0.02512049674987793, + "step": 1413 + }, + { + "epoch": 2.15606689453125e-06, + "step": 1413, + "training_step_time": 0.12928462028503418 + }, + { + "epoch": 2.1575927734375e-06, + "model_forward_time": 0.025092363357543945, + "step": 1414 + }, + { + "epoch": 2.1575927734375e-06, + "step": 1414, + "training_step_time": 0.15896105766296387 + }, + { + "epoch": 2.15911865234375e-06, + "model_forward_time": 0.02454686164855957, + "step": 1415 + }, + { + "epoch": 2.15911865234375e-06, + "step": 1415, + "training_step_time": 0.18423247337341309 + }, + { + "epoch": 2.16064453125e-06, + "model_forward_time": 0.02487802505493164, + "step": 1416 + }, + { + "epoch": 2.16064453125e-06, + "step": 1416, + "training_step_time": 0.15381836891174316 + }, + { + "epoch": 2.16217041015625e-06, + "model_forward_time": 0.02419567108154297, + "step": 1417 + }, + { + "epoch": 2.16217041015625e-06, + "step": 1417, + "training_step_time": 0.1028435230255127 + }, + { + "epoch": 2.1636962890625e-06, + "model_forward_time": 0.024589061737060547, + "step": 1418 + }, + { + "epoch": 2.1636962890625e-06, + "step": 1418, + "training_step_time": 0.10290408134460449 + }, + { + "epoch": 2.16522216796875e-06, + "model_forward_time": 0.025154590606689453, + "step": 1419 + }, + { + "epoch": 2.16522216796875e-06, + "step": 1419, + "training_step_time": 0.10628390312194824 + }, + { + "epoch": 2.166748046875e-06, + "grad_norm": 1.0266139507293701, + "learning_rate": 9.466666666666667e-05, + "loss": 0.1573, + "step": 1420 + }, + { + "epoch": 2.166748046875e-06, + "model_forward_time": 0.02559185028076172, + "step": 1420 + }, + { + "epoch": 2.166748046875e-06, + "step": 1420, + "training_step_time": 0.11061215400695801 + }, + { + "epoch": 2.16827392578125e-06, + "model_forward_time": 0.02534008026123047, + "step": 1421 + }, + { + "epoch": 2.16827392578125e-06, + "step": 1421, + "training_step_time": 0.13797211647033691 + }, + { + "epoch": 2.1697998046875e-06, + "model_forward_time": 0.025868892669677734, + "step": 1422 + }, + { + "epoch": 2.1697998046875e-06, + "step": 1422, + "training_step_time": 0.11163997650146484 + }, + { + "epoch": 2.17132568359375e-06, + "model_forward_time": 0.028405189514160156, + "step": 1423 + }, + { + "epoch": 2.17132568359375e-06, + "step": 1423, + "training_step_time": 0.11517572402954102 + }, + { + "epoch": 2.1728515625e-06, + "model_forward_time": 0.025037050247192383, + "step": 1424 + }, + { + "epoch": 2.1728515625e-06, + "step": 1424, + "training_step_time": 0.11580061912536621 + }, + { + "epoch": 2.17437744140625e-06, + "model_forward_time": 0.025238513946533203, + "step": 1425 + }, + { + "epoch": 2.17437744140625e-06, + "step": 1425, + "training_step_time": 0.19036149978637695 + }, + { + "epoch": 2.1759033203125e-06, + "model_forward_time": 0.02507495880126953, + "step": 1426 + }, + { + "epoch": 2.1759033203125e-06, + "step": 1426, + "training_step_time": 0.11758232116699219 + }, + { + "epoch": 2.17742919921875e-06, + "model_forward_time": 0.0245513916015625, + "step": 1427 + }, + { + "epoch": 2.17742919921875e-06, + "step": 1427, + "training_step_time": 0.11407113075256348 + }, + { + "epoch": 2.178955078125e-06, + "model_forward_time": 0.025414228439331055, + "step": 1428 + }, + { + "epoch": 2.178955078125e-06, + "step": 1428, + "training_step_time": 0.10669732093811035 + }, + { + "epoch": 2.18048095703125e-06, + "model_forward_time": 0.02521038055419922, + "step": 1429 + }, + { + "epoch": 2.18048095703125e-06, + "step": 1429, + "training_step_time": 0.10910439491271973 + }, + { + "epoch": 2.1820068359375e-06, + "grad_norm": 0.6546294689178467, + "learning_rate": 9.533333333333334e-05, + "loss": 0.1192, + "step": 1430 + }, + { + "epoch": 2.1820068359375e-06, + "model_forward_time": 0.028010845184326172, + "step": 1430 + }, + { + "epoch": 2.1820068359375e-06, + "step": 1430, + "training_step_time": 0.110015869140625 + }, + { + "epoch": 2.18353271484375e-06, + "model_forward_time": 0.026095867156982422, + "step": 1431 + }, + { + "epoch": 2.18353271484375e-06, + "step": 1431, + "training_step_time": 0.11234498023986816 + }, + { + "epoch": 2.18505859375e-06, + "model_forward_time": 0.025310754776000977, + "step": 1432 + }, + { + "epoch": 2.18505859375e-06, + "step": 1432, + "training_step_time": 0.10950970649719238 + }, + { + "epoch": 2.18658447265625e-06, + "model_forward_time": 0.025457143783569336, + "step": 1433 + }, + { + "epoch": 2.18658447265625e-06, + "step": 1433, + "training_step_time": 0.1683039665222168 + }, + { + "epoch": 2.1881103515625e-06, + "model_forward_time": 0.025000572204589844, + "step": 1434 + }, + { + "epoch": 2.1881103515625e-06, + "step": 1434, + "training_step_time": 0.16678833961486816 + }, + { + "epoch": 2.18963623046875e-06, + "model_forward_time": 0.0244295597076416, + "step": 1435 + }, + { + "epoch": 2.18963623046875e-06, + "step": 1435, + "training_step_time": 0.1041109561920166 + }, + { + "epoch": 2.191162109375e-06, + "model_forward_time": 0.02525925636291504, + "step": 1436 + }, + { + "epoch": 2.191162109375e-06, + "step": 1436, + "training_step_time": 0.10423755645751953 + }, + { + "epoch": 2.19268798828125e-06, + "model_forward_time": 0.025871753692626953, + "step": 1437 + }, + { + "epoch": 2.19268798828125e-06, + "step": 1437, + "training_step_time": 0.1102755069732666 + }, + { + "epoch": 2.1942138671875e-06, + "model_forward_time": 0.025707006454467773, + "step": 1438 + }, + { + "epoch": 2.1942138671875e-06, + "step": 1438, + "training_step_time": 0.10861945152282715 + }, + { + "epoch": 2.19573974609375e-06, + "model_forward_time": 0.025672197341918945, + "step": 1439 + }, + { + "epoch": 2.19573974609375e-06, + "step": 1439, + "training_step_time": 0.10539579391479492 + }, + { + "epoch": 2.197265625e-06, + "grad_norm": 0.6670544743537903, + "learning_rate": 9.6e-05, + "loss": 0.16, + "step": 1440 + }, + { + "epoch": 2.197265625e-06, + "model_forward_time": 0.02554178237915039, + "step": 1440 + }, + { + "epoch": 2.197265625e-06, + "step": 1440, + "training_step_time": 0.10809659957885742 + }, + { + "epoch": 2.19879150390625e-06, + "model_forward_time": 0.02510976791381836, + "step": 1441 + }, + { + "epoch": 2.19879150390625e-06, + "step": 1441, + "training_step_time": 0.1068410873413086 + }, + { + "epoch": 2.2003173828125e-06, + "model_forward_time": 0.02552652359008789, + "step": 1442 + }, + { + "epoch": 2.2003173828125e-06, + "step": 1442, + "training_step_time": 0.11005473136901855 + }, + { + "epoch": 2.20184326171875e-06, + "model_forward_time": 0.025165557861328125, + "step": 1443 + }, + { + "epoch": 2.20184326171875e-06, + "step": 1443, + "training_step_time": 0.10723471641540527 + }, + { + "epoch": 2.203369140625e-06, + "model_forward_time": 0.025207042694091797, + "step": 1444 + }, + { + "epoch": 2.203369140625e-06, + "step": 1444, + "training_step_time": 0.10516786575317383 + }, + { + "epoch": 2.20489501953125e-06, + "model_forward_time": 0.025761127471923828, + "step": 1445 + }, + { + "epoch": 2.20489501953125e-06, + "step": 1445, + "training_step_time": 0.10593652725219727 + }, + { + "epoch": 2.2064208984375e-06, + "model_forward_time": 0.025539398193359375, + "step": 1446 + }, + { + "epoch": 2.2064208984375e-06, + "step": 1446, + "training_step_time": 0.10727167129516602 + }, + { + "epoch": 2.20794677734375e-06, + "model_forward_time": 0.02572011947631836, + "step": 1447 + }, + { + "epoch": 2.20794677734375e-06, + "step": 1447, + "training_step_time": 0.10550975799560547 + }, + { + "epoch": 2.20947265625e-06, + "model_forward_time": 0.025275707244873047, + "step": 1448 + }, + { + "epoch": 2.20947265625e-06, + "step": 1448, + "training_step_time": 0.1038062572479248 + }, + { + "epoch": 2.21099853515625e-06, + "model_forward_time": 0.025598526000976562, + "step": 1449 + }, + { + "epoch": 2.21099853515625e-06, + "step": 1449, + "training_step_time": 0.10940861701965332 + }, + { + "epoch": 2.2125244140625e-06, + "grad_norm": 1.329904317855835, + "learning_rate": 9.666666666666667e-05, + "loss": 0.1751, + "step": 1450 + }, + { + "epoch": 2.2125244140625e-06, + "model_forward_time": 0.02562093734741211, + "step": 1450 + }, + { + "epoch": 2.2125244140625e-06, + "step": 1450, + "training_step_time": 0.10740423202514648 + }, + { + "epoch": 2.21405029296875e-06, + "model_forward_time": 0.025353670120239258, + "step": 1451 + }, + { + "epoch": 2.21405029296875e-06, + "step": 1451, + "training_step_time": 0.10501313209533691 + }, + { + "epoch": 2.215576171875e-06, + "model_forward_time": 0.025502681732177734, + "step": 1452 + }, + { + "epoch": 2.215576171875e-06, + "step": 1452, + "training_step_time": 0.1130983829498291 + }, + { + "epoch": 2.21710205078125e-06, + "model_forward_time": 0.025914907455444336, + "step": 1453 + }, + { + "epoch": 2.21710205078125e-06, + "step": 1453, + "training_step_time": 0.10426592826843262 + }, + { + "epoch": 2.2186279296875e-06, + "model_forward_time": 0.02516913414001465, + "step": 1454 + }, + { + "epoch": 2.2186279296875e-06, + "step": 1454, + "training_step_time": 0.10678982734680176 + }, + { + "epoch": 2.22015380859375e-06, + "model_forward_time": 0.025825023651123047, + "step": 1455 + }, + { + "epoch": 2.22015380859375e-06, + "step": 1455, + "training_step_time": 0.11348795890808105 + }, + { + "epoch": 2.2216796875e-06, + "model_forward_time": 0.02558612823486328, + "step": 1456 + }, + { + "epoch": 2.2216796875e-06, + "step": 1456, + "training_step_time": 0.20345592498779297 + }, + { + "epoch": 2.22320556640625e-06, + "model_forward_time": 0.02460479736328125, + "step": 1457 + }, + { + "epoch": 2.22320556640625e-06, + "step": 1457, + "training_step_time": 0.1753673553466797 + }, + { + "epoch": 2.2247314453125e-06, + "model_forward_time": 0.024578332901000977, + "step": 1458 + }, + { + "epoch": 2.2247314453125e-06, + "step": 1458, + "training_step_time": 0.1734919548034668 + }, + { + "epoch": 2.22625732421875e-06, + "model_forward_time": 0.024336814880371094, + "step": 1459 + }, + { + "epoch": 2.22625732421875e-06, + "step": 1459, + "training_step_time": 0.16818761825561523 + }, + { + "epoch": 2.227783203125e-06, + "grad_norm": 1.1440796852111816, + "learning_rate": 9.733333333333335e-05, + "loss": 0.1812, + "step": 1460 + }, + { + "epoch": 2.227783203125e-06, + "model_forward_time": 0.02463054656982422, + "step": 1460 + }, + { + "epoch": 2.227783203125e-06, + "step": 1460, + "training_step_time": 0.18352651596069336 + }, + { + "epoch": 2.22930908203125e-06, + "model_forward_time": 0.026389598846435547, + "step": 1461 + }, + { + "epoch": 2.22930908203125e-06, + "step": 1461, + "training_step_time": 0.1149282455444336 + }, + { + "epoch": 2.2308349609375e-06, + "model_forward_time": 0.024538278579711914, + "step": 1462 + }, + { + "epoch": 2.2308349609375e-06, + "step": 1462, + "training_step_time": 0.105133056640625 + }, + { + "epoch": 2.23236083984375e-06, + "model_forward_time": 0.02522754669189453, + "step": 1463 + }, + { + "epoch": 2.23236083984375e-06, + "step": 1463, + "training_step_time": 0.10737967491149902 + }, + { + "epoch": 2.23388671875e-06, + "model_forward_time": 0.025313377380371094, + "step": 1464 + }, + { + "epoch": 2.23388671875e-06, + "step": 1464, + "training_step_time": 0.10556316375732422 + }, + { + "epoch": 2.23541259765625e-06, + "model_forward_time": 0.025303363800048828, + "step": 1465 + }, + { + "epoch": 2.23541259765625e-06, + "step": 1465, + "training_step_time": 0.10871553421020508 + }, + { + "epoch": 2.2369384765625e-06, + "model_forward_time": 0.02571272850036621, + "step": 1466 + }, + { + "epoch": 2.2369384765625e-06, + "step": 1466, + "training_step_time": 0.13390660285949707 + }, + { + "epoch": 2.23846435546875e-06, + "model_forward_time": 0.025513410568237305, + "step": 1467 + }, + { + "epoch": 2.23846435546875e-06, + "step": 1467, + "training_step_time": 0.1381216049194336 + }, + { + "epoch": 2.239990234375e-06, + "model_forward_time": 0.026959657669067383, + "step": 1468 + }, + { + "epoch": 2.239990234375e-06, + "step": 1468, + "training_step_time": 0.10939860343933105 + }, + { + "epoch": 2.24151611328125e-06, + "model_forward_time": 0.025315523147583008, + "step": 1469 + }, + { + "epoch": 2.24151611328125e-06, + "step": 1469, + "training_step_time": 0.11547088623046875 + }, + { + "epoch": 2.2430419921875e-06, + "grad_norm": 0.6092724800109863, + "learning_rate": 9.8e-05, + "loss": 0.166, + "step": 1470 + }, + { + "epoch": 2.2430419921875e-06, + "model_forward_time": 0.025532245635986328, + "step": 1470 + }, + { + "epoch": 2.2430419921875e-06, + "step": 1470, + "training_step_time": 0.10926604270935059 + }, + { + "epoch": 2.24456787109375e-06, + "model_forward_time": 0.025789976119995117, + "step": 1471 + }, + { + "epoch": 2.24456787109375e-06, + "step": 1471, + "training_step_time": 0.1045832633972168 + }, + { + "epoch": 2.24609375e-06, + "model_forward_time": 0.024941682815551758, + "step": 1472 + }, + { + "epoch": 2.24609375e-06, + "step": 1472, + "training_step_time": 0.1971895694732666 + }, + { + "epoch": 2.24761962890625e-06, + "model_forward_time": 0.024367094039916992, + "step": 1473 + }, + { + "epoch": 2.24761962890625e-06, + "step": 1473, + "training_step_time": 0.10470128059387207 + }, + { + "epoch": 2.2491455078125e-06, + "model_forward_time": 0.02476334571838379, + "step": 1474 + }, + { + "epoch": 2.2491455078125e-06, + "step": 1474, + "training_step_time": 0.10367774963378906 + }, + { + "epoch": 2.25067138671875e-06, + "model_forward_time": 0.025284290313720703, + "step": 1475 + }, + { + "epoch": 2.25067138671875e-06, + "step": 1475, + "training_step_time": 0.14227652549743652 + }, + { + "epoch": 2.252197265625e-06, + "model_forward_time": 0.025363922119140625, + "step": 1476 + }, + { + "epoch": 2.252197265625e-06, + "step": 1476, + "training_step_time": 0.12005734443664551 + }, + { + "epoch": 2.25372314453125e-06, + "model_forward_time": 0.025188207626342773, + "step": 1477 + }, + { + "epoch": 2.25372314453125e-06, + "step": 1477, + "training_step_time": 0.10769152641296387 + }, + { + "epoch": 2.2552490234375e-06, + "model_forward_time": 0.02599024772644043, + "step": 1478 + }, + { + "epoch": 2.2552490234375e-06, + "step": 1478, + "training_step_time": 0.11042118072509766 + }, + { + "epoch": 2.25677490234375e-06, + "model_forward_time": 0.026059389114379883, + "step": 1479 + }, + { + "epoch": 2.25677490234375e-06, + "step": 1479, + "training_step_time": 0.10733914375305176 + }, + { + "epoch": 2.25830078125e-06, + "grad_norm": 0.7142199873924255, + "learning_rate": 9.866666666666668e-05, + "loss": 0.1575, + "step": 1480 + }, + { + "epoch": 2.25830078125e-06, + "model_forward_time": 0.025785446166992188, + "step": 1480 + }, + { + "epoch": 2.25830078125e-06, + "step": 1480, + "training_step_time": 0.1099233627319336 + }, + { + "epoch": 2.25982666015625e-06, + "model_forward_time": 0.0255887508392334, + "step": 1481 + }, + { + "epoch": 2.25982666015625e-06, + "step": 1481, + "training_step_time": 0.11271071434020996 + }, + { + "epoch": 2.2613525390625e-06, + "model_forward_time": 0.026090621948242188, + "step": 1482 + }, + { + "epoch": 2.2613525390625e-06, + "step": 1482, + "training_step_time": 0.10665321350097656 + }, + { + "epoch": 2.26287841796875e-06, + "model_forward_time": 0.02603745460510254, + "step": 1483 + }, + { + "epoch": 2.26287841796875e-06, + "step": 1483, + "training_step_time": 0.10934114456176758 + }, + { + "epoch": 2.264404296875e-06, + "model_forward_time": 0.025475025177001953, + "step": 1484 + }, + { + "epoch": 2.264404296875e-06, + "step": 1484, + "training_step_time": 0.1168220043182373 + }, + { + "epoch": 2.26593017578125e-06, + "model_forward_time": 0.02499532699584961, + "step": 1485 + }, + { + "epoch": 2.26593017578125e-06, + "step": 1485, + "training_step_time": 0.10541892051696777 + }, + { + "epoch": 2.2674560546875e-06, + "model_forward_time": 0.025171995162963867, + "step": 1486 + }, + { + "epoch": 2.2674560546875e-06, + "step": 1486, + "training_step_time": 0.10609817504882812 + }, + { + "epoch": 2.26898193359375e-06, + "model_forward_time": 0.025940895080566406, + "step": 1487 + }, + { + "epoch": 2.26898193359375e-06, + "step": 1487, + "training_step_time": 0.10712933540344238 + }, + { + "epoch": 2.2705078125e-06, + "model_forward_time": 0.025603771209716797, + "step": 1488 + }, + { + "epoch": 2.2705078125e-06, + "step": 1488, + "training_step_time": 0.10471200942993164 + }, + { + "epoch": 2.27203369140625e-06, + "model_forward_time": 0.02543330192565918, + "step": 1489 + }, + { + "epoch": 2.27203369140625e-06, + "step": 1489, + "training_step_time": 0.10407114028930664 + }, + { + "epoch": 2.2735595703125e-06, + "grad_norm": 0.9511436820030212, + "learning_rate": 9.933333333333334e-05, + "loss": 0.1787, + "step": 1490 + }, + { + "epoch": 2.2735595703125e-06, + "model_forward_time": 0.02528858184814453, + "step": 1490 + }, + { + "epoch": 2.2735595703125e-06, + "step": 1490, + "training_step_time": 0.11156201362609863 + }, + { + "epoch": 2.27508544921875e-06, + "model_forward_time": 0.025616884231567383, + "step": 1491 + }, + { + "epoch": 2.27508544921875e-06, + "step": 1491, + "training_step_time": 0.12158775329589844 + }, + { + "epoch": 2.276611328125e-06, + "model_forward_time": 0.024897098541259766, + "step": 1492 + }, + { + "epoch": 2.276611328125e-06, + "step": 1492, + "training_step_time": 0.1252896785736084 + }, + { + "epoch": 2.27813720703125e-06, + "model_forward_time": 0.024675607681274414, + "step": 1493 + }, + { + "epoch": 2.27813720703125e-06, + "step": 1493, + "training_step_time": 0.12450051307678223 + }, + { + "epoch": 2.2796630859375e-06, + "model_forward_time": 0.024403095245361328, + "step": 1494 + }, + { + "epoch": 2.2796630859375e-06, + "step": 1494, + "training_step_time": 0.11598849296569824 + }, + { + "epoch": 2.28118896484375e-06, + "model_forward_time": 0.0244290828704834, + "step": 1495 + }, + { + "epoch": 2.28118896484375e-06, + "step": 1495, + "training_step_time": 0.12032890319824219 + }, + { + "epoch": 2.28271484375e-06, + "model_forward_time": 0.025087356567382812, + "step": 1496 + }, + { + "epoch": 2.28271484375e-06, + "step": 1496, + "training_step_time": 0.11457419395446777 + }, + { + "epoch": 2.28424072265625e-06, + "model_forward_time": 0.02712726593017578, + "step": 1497 + }, + { + "epoch": 2.28424072265625e-06, + "step": 1497, + "training_step_time": 0.11216497421264648 + }, + { + "epoch": 2.2857666015625e-06, + "model_forward_time": 0.02434396743774414, + "step": 1498 + }, + { + "epoch": 2.2857666015625e-06, + "step": 1498, + "training_step_time": 0.11299896240234375 + }, + { + "epoch": 2.28729248046875e-06, + "model_forward_time": 0.025021076202392578, + "step": 1499 + }, + { + "epoch": 2.28729248046875e-06, + "step": 1499, + "training_step_time": 0.11070585250854492 + }, + { + "epoch": 2.288818359375e-06, + "grad_norm": 0.6693169474601746, + "learning_rate": 0.0001, + "loss": 0.1696, + "step": 1500 + }, + { + "epoch": 2.288818359375e-06, + "model_forward_time": 0.02524566650390625, + "step": 1500 + }, + { + "epoch": 2.288818359375e-06, + "step": 1500, + "training_step_time": 0.11181497573852539 + }, + { + "epoch": 2.29034423828125e-06, + "model_forward_time": 0.025269031524658203, + "step": 1501 + }, + { + "epoch": 2.29034423828125e-06, + "step": 1501, + "training_step_time": 0.10571575164794922 + }, + { + "epoch": 2.2918701171875e-06, + "model_forward_time": 0.02554488182067871, + "step": 1502 + }, + { + "epoch": 2.2918701171875e-06, + "step": 1502, + "training_step_time": 0.1989579200744629 + }, + { + "epoch": 2.29339599609375e-06, + "model_forward_time": 0.024329423904418945, + "step": 1503 + }, + { + "epoch": 2.29339599609375e-06, + "step": 1503, + "training_step_time": 0.17641091346740723 + }, + { + "epoch": 2.294921875e-06, + "model_forward_time": 0.024661779403686523, + "step": 1504 + }, + { + "epoch": 2.294921875e-06, + "step": 1504, + "training_step_time": 0.17810988426208496 + }, + { + "epoch": 2.29644775390625e-06, + "model_forward_time": 0.024297475814819336, + "step": 1505 + }, + { + "epoch": 2.29644775390625e-06, + "step": 1505, + "training_step_time": 0.15257525444030762 + }, + { + "epoch": 2.2979736328125e-06, + "model_forward_time": 0.025177001953125, + "step": 1506 + }, + { + "epoch": 2.2979736328125e-06, + "step": 1506, + "training_step_time": 0.10593819618225098 + }, + { + "epoch": 2.29949951171875e-06, + "model_forward_time": 0.024662494659423828, + "step": 1507 + }, + { + "epoch": 2.29949951171875e-06, + "step": 1507, + "training_step_time": 0.1587827205657959 + }, + { + "epoch": 2.301025390625e-06, + "model_forward_time": 0.02459263801574707, + "step": 1508 + }, + { + "epoch": 2.301025390625e-06, + "step": 1508, + "training_step_time": 0.12143325805664062 + }, + { + "epoch": 2.30255126953125e-06, + "model_forward_time": 0.024259090423583984, + "step": 1509 + }, + { + "epoch": 2.30255126953125e-06, + "step": 1509, + "training_step_time": 0.11380934715270996 + }, + { + "epoch": 2.3040771484375e-06, + "grad_norm": 0.8862974643707275, + "learning_rate": 9.999996962264266e-05, + "loss": 0.1563, + "step": 1510 + }, + { + "epoch": 2.3040771484375e-06, + "model_forward_time": 0.025377988815307617, + "step": 1510 + }, + { + "epoch": 2.3040771484375e-06, + "step": 1510, + "training_step_time": 0.10877585411071777 + }, + { + "epoch": 2.30560302734375e-06, + "model_forward_time": 0.025152206420898438, + "step": 1511 + }, + { + "epoch": 2.30560302734375e-06, + "step": 1511, + "training_step_time": 0.10483407974243164 + }, + { + "epoch": 2.30712890625e-06, + "model_forward_time": 0.025164365768432617, + "step": 1512 + }, + { + "epoch": 2.30712890625e-06, + "step": 1512, + "training_step_time": 0.1571347713470459 + }, + { + "epoch": 2.30865478515625e-06, + "model_forward_time": 0.024854660034179688, + "step": 1513 + }, + { + "epoch": 2.30865478515625e-06, + "step": 1513, + "training_step_time": 0.14067697525024414 + }, + { + "epoch": 2.3101806640625e-06, + "model_forward_time": 0.02483510971069336, + "step": 1514 + }, + { + "epoch": 2.3101806640625e-06, + "step": 1514, + "training_step_time": 0.10798215866088867 + }, + { + "epoch": 2.31170654296875e-06, + "model_forward_time": 0.025162696838378906, + "step": 1515 + }, + { + "epoch": 2.31170654296875e-06, + "step": 1515, + "training_step_time": 0.11213088035583496 + }, + { + "epoch": 2.313232421875e-06, + "model_forward_time": 0.025634050369262695, + "step": 1516 + }, + { + "epoch": 2.313232421875e-06, + "step": 1516, + "training_step_time": 0.11103343963623047 + }, + { + "epoch": 2.31475830078125e-06, + "model_forward_time": 0.025055646896362305, + "step": 1517 + }, + { + "epoch": 2.31475830078125e-06, + "step": 1517, + "training_step_time": 0.11058259010314941 + }, + { + "epoch": 2.3162841796875e-06, + "model_forward_time": 0.02537822723388672, + "step": 1518 + }, + { + "epoch": 2.3162841796875e-06, + "step": 1518, + "training_step_time": 0.1532423496246338 + }, + { + "epoch": 2.31781005859375e-06, + "model_forward_time": 0.024830102920532227, + "step": 1519 + }, + { + "epoch": 2.31781005859375e-06, + "step": 1519, + "training_step_time": 0.10341310501098633 + }, + { + "epoch": 2.3193359375e-06, + "grad_norm": 1.0126616954803467, + "learning_rate": 9.999987849060753e-05, + "loss": 0.1561, + "step": 1520 + }, + { + "epoch": 2.3193359375e-06, + "model_forward_time": 0.025298595428466797, + "step": 1520 + }, + { + "epoch": 2.3193359375e-06, + "step": 1520, + "training_step_time": 0.10521435737609863 + }, + { + "epoch": 2.32086181640625e-06, + "model_forward_time": 0.029204368591308594, + "step": 1521 + }, + { + "epoch": 2.32086181640625e-06, + "step": 1521, + "training_step_time": 0.11081242561340332 + }, + { + "epoch": 2.3223876953125e-06, + "model_forward_time": 0.02555561065673828, + "step": 1522 + }, + { + "epoch": 2.3223876953125e-06, + "step": 1522, + "training_step_time": 0.10884857177734375 + }, + { + "epoch": 2.32391357421875e-06, + "model_forward_time": 0.025867700576782227, + "step": 1523 + }, + { + "epoch": 2.32391357421875e-06, + "step": 1523, + "training_step_time": 0.129561185836792 + }, + { + "epoch": 2.325439453125e-06, + "model_forward_time": 0.02549004554748535, + "step": 1524 + }, + { + "epoch": 2.325439453125e-06, + "step": 1524, + "training_step_time": 0.1743464469909668 + }, + { + "epoch": 2.32696533203125e-06, + "model_forward_time": 0.024520158767700195, + "step": 1525 + }, + { + "epoch": 2.32696533203125e-06, + "step": 1525, + "training_step_time": 0.13298964500427246 + }, + { + "epoch": 2.3284912109375e-06, + "model_forward_time": 0.02425527572631836, + "step": 1526 + }, + { + "epoch": 2.3284912109375e-06, + "step": 1526, + "training_step_time": 0.12737154960632324 + }, + { + "epoch": 2.33001708984375e-06, + "model_forward_time": 0.025834321975708008, + "step": 1527 + }, + { + "epoch": 2.33001708984375e-06, + "step": 1527, + "training_step_time": 0.11346840858459473 + }, + { + "epoch": 2.33154296875e-06, + "model_forward_time": 0.025515317916870117, + "step": 1528 + }, + { + "epoch": 2.33154296875e-06, + "step": 1528, + "training_step_time": 0.11336469650268555 + }, + { + "epoch": 2.33306884765625e-06, + "model_forward_time": 0.025148868560791016, + "step": 1529 + }, + { + "epoch": 2.33306884765625e-06, + "step": 1529, + "training_step_time": 0.11333703994750977 + }, + { + "epoch": 2.3345947265625e-06, + "grad_norm": 0.6787976622581482, + "learning_rate": 9.999972660400536e-05, + "loss": 0.1554, + "step": 1530 + }, + { + "epoch": 2.3345947265625e-06, + "model_forward_time": 0.025073528289794922, + "step": 1530 + }, + { + "epoch": 2.3345947265625e-06, + "step": 1530, + "training_step_time": 0.1073770523071289 + }, + { + "epoch": 2.33612060546875e-06, + "model_forward_time": 0.02554774284362793, + "step": 1531 + }, + { + "epoch": 2.33612060546875e-06, + "step": 1531, + "training_step_time": 0.10764288902282715 + }, + { + "epoch": 2.337646484375e-06, + "model_forward_time": 0.025145530700683594, + "step": 1532 + }, + { + "epoch": 2.337646484375e-06, + "step": 1532, + "training_step_time": 0.10599684715270996 + }, + { + "epoch": 2.33917236328125e-06, + "model_forward_time": 0.02475285530090332, + "step": 1533 + }, + { + "epoch": 2.33917236328125e-06, + "step": 1533, + "training_step_time": 0.108551025390625 + }, + { + "epoch": 2.3406982421875e-06, + "model_forward_time": 0.024851322174072266, + "step": 1534 + }, + { + "epoch": 2.3406982421875e-06, + "step": 1534, + "training_step_time": 0.10662627220153809 + }, + { + "epoch": 2.34222412109375e-06, + "model_forward_time": 0.025217294692993164, + "step": 1535 + }, + { + "epoch": 2.34222412109375e-06, + "step": 1535, + "training_step_time": 0.10664987564086914 + }, + { + "epoch": 2.34375e-06, + "model_forward_time": 0.025756359100341797, + "step": 1536 + }, + { + "epoch": 2.34375e-06, + "step": 1536, + "training_step_time": 0.10522127151489258 + }, + { + "epoch": 2.34527587890625e-06, + "model_forward_time": 0.029313087463378906, + "step": 1537 + }, + { + "epoch": 2.34527587890625e-06, + "step": 1537, + "training_step_time": 0.11299586296081543 + }, + { + "epoch": 2.3468017578125e-06, + "model_forward_time": 0.02495861053466797, + "step": 1538 + }, + { + "epoch": 2.3468017578125e-06, + "step": 1538, + "training_step_time": 0.10603451728820801 + }, + { + "epoch": 2.34832763671875e-06, + "model_forward_time": 0.02507781982421875, + "step": 1539 + }, + { + "epoch": 2.34832763671875e-06, + "step": 1539, + "training_step_time": 0.10508990287780762 + }, + { + "epoch": 2.349853515625e-06, + "grad_norm": 1.0427889823913574, + "learning_rate": 9.999951396302069e-05, + "loss": 0.1667, + "step": 1540 + }, + { + "epoch": 2.349853515625e-06, + "model_forward_time": 0.024873971939086914, + "step": 1540 + }, + { + "epoch": 2.349853515625e-06, + "step": 1540, + "training_step_time": 0.10645270347595215 + }, + { + "epoch": 2.35137939453125e-06, + "model_forward_time": 0.025048255920410156, + "step": 1541 + }, + { + "epoch": 2.35137939453125e-06, + "step": 1541, + "training_step_time": 0.10559558868408203 + }, + { + "epoch": 2.3529052734375e-06, + "model_forward_time": 0.02483963966369629, + "step": 1542 + }, + { + "epoch": 2.3529052734375e-06, + "step": 1542, + "training_step_time": 0.10787081718444824 + }, + { + "epoch": 2.35443115234375e-06, + "model_forward_time": 0.025114774703979492, + "step": 1543 + }, + { + "epoch": 2.35443115234375e-06, + "step": 1543, + "training_step_time": 0.10422015190124512 + }, + { + "epoch": 2.35595703125e-06, + "model_forward_time": 0.02503800392150879, + "step": 1544 + }, + { + "epoch": 2.35595703125e-06, + "step": 1544, + "training_step_time": 0.10575270652770996 + }, + { + "epoch": 2.35748291015625e-06, + "model_forward_time": 0.024692058563232422, + "step": 1545 + }, + { + "epoch": 2.35748291015625e-06, + "step": 1545, + "training_step_time": 0.11096620559692383 + }, + { + "epoch": 2.3590087890625e-06, + "model_forward_time": 0.02535390853881836, + "step": 1546 + }, + { + "epoch": 2.3590087890625e-06, + "step": 1546, + "training_step_time": 0.11087369918823242 + }, + { + "epoch": 2.36053466796875e-06, + "model_forward_time": 0.02507495880126953, + "step": 1547 + }, + { + "epoch": 2.36053466796875e-06, + "step": 1547, + "training_step_time": 0.1046602725982666 + }, + { + "epoch": 2.362060546875e-06, + "model_forward_time": 0.0252227783203125, + "step": 1548 + }, + { + "epoch": 2.362060546875e-06, + "step": 1548, + "training_step_time": 0.10464715957641602 + }, + { + "epoch": 2.36358642578125e-06, + "model_forward_time": 0.02490830421447754, + "step": 1549 + }, + { + "epoch": 2.36358642578125e-06, + "step": 1549, + "training_step_time": 0.2046661376953125 + }, + { + "epoch": 2.3651123046875e-06, + "grad_norm": 0.6926366686820984, + "learning_rate": 9.999924056791192e-05, + "loss": 0.187, + "step": 1550 + }, + { + "epoch": 2.3651123046875e-06, + "model_forward_time": 0.02468729019165039, + "step": 1550 + }, + { + "epoch": 2.3651123046875e-06, + "step": 1550, + "training_step_time": 0.21181702613830566 + }, + { + "epoch": 2.36663818359375e-06, + "model_forward_time": 0.02469921112060547, + "step": 1551 + }, + { + "epoch": 2.36663818359375e-06, + "step": 1551, + "training_step_time": 0.12556934356689453 + }, + { + "epoch": 2.3681640625e-06, + "model_forward_time": 0.024120330810546875, + "step": 1552 + }, + { + "epoch": 2.3681640625e-06, + "step": 1552, + "training_step_time": 0.1341536045074463 + }, + { + "epoch": 2.36968994140625e-06, + "model_forward_time": 0.025313615798950195, + "step": 1553 + }, + { + "epoch": 2.36968994140625e-06, + "step": 1553, + "training_step_time": 0.14391827583312988 + }, + { + "epoch": 2.3712158203125e-06, + "model_forward_time": 0.025265216827392578, + "step": 1554 + }, + { + "epoch": 2.3712158203125e-06, + "step": 1554, + "training_step_time": 0.17690753936767578 + }, + { + "epoch": 2.37274169921875e-06, + "model_forward_time": 0.02501368522644043, + "step": 1555 + }, + { + "epoch": 2.37274169921875e-06, + "step": 1555, + "training_step_time": 0.1683824062347412 + }, + { + "epoch": 2.374267578125e-06, + "model_forward_time": 0.024725914001464844, + "step": 1556 + }, + { + "epoch": 2.374267578125e-06, + "step": 1556, + "training_step_time": 0.10369706153869629 + }, + { + "epoch": 2.37579345703125e-06, + "model_forward_time": 0.024688005447387695, + "step": 1557 + }, + { + "epoch": 2.37579345703125e-06, + "step": 1557, + "training_step_time": 0.10619783401489258 + }, + { + "epoch": 2.3773193359375e-06, + "model_forward_time": 0.02528667449951172, + "step": 1558 + }, + { + "epoch": 2.3773193359375e-06, + "step": 1558, + "training_step_time": 0.16920804977416992 + }, + { + "epoch": 2.37884521484375e-06, + "model_forward_time": 0.02508234977722168, + "step": 1559 + }, + { + "epoch": 2.37884521484375e-06, + "step": 1559, + "training_step_time": 0.17266845703125 + }, + { + "epoch": 2.38037109375e-06, + "grad_norm": 0.8135605454444885, + "learning_rate": 9.999890641901125e-05, + "loss": 0.1778, + "step": 1560 + }, + { + "epoch": 2.38037109375e-06, + "model_forward_time": 0.024245738983154297, + "step": 1560 + }, + { + "epoch": 2.38037109375e-06, + "step": 1560, + "training_step_time": 0.10927152633666992 + }, + { + "epoch": 2.38189697265625e-06, + "model_forward_time": 0.02481222152709961, + "step": 1561 + }, + { + "epoch": 2.38189697265625e-06, + "step": 1561, + "training_step_time": 0.10656404495239258 + }, + { + "epoch": 2.3834228515625e-06, + "model_forward_time": 0.025086641311645508, + "step": 1562 + }, + { + "epoch": 2.3834228515625e-06, + "step": 1562, + "training_step_time": 0.11495614051818848 + }, + { + "epoch": 2.38494873046875e-06, + "model_forward_time": 0.025652647018432617, + "step": 1563 + }, + { + "epoch": 2.38494873046875e-06, + "step": 1563, + "training_step_time": 0.10726046562194824 + }, + { + "epoch": 2.386474609375e-06, + "model_forward_time": 0.025349140167236328, + "step": 1564 + }, + { + "epoch": 2.386474609375e-06, + "step": 1564, + "training_step_time": 0.1928558349609375 + }, + { + "epoch": 2.38800048828125e-06, + "model_forward_time": 0.024384498596191406, + "step": 1565 + }, + { + "epoch": 2.38800048828125e-06, + "step": 1565, + "training_step_time": 0.10361766815185547 + }, + { + "epoch": 2.3895263671875e-06, + "model_forward_time": 0.02527022361755371, + "step": 1566 + }, + { + "epoch": 2.3895263671875e-06, + "step": 1566, + "training_step_time": 0.10757112503051758 + }, + { + "epoch": 2.39105224609375e-06, + "model_forward_time": 0.02475118637084961, + "step": 1567 + }, + { + "epoch": 2.39105224609375e-06, + "step": 1567, + "training_step_time": 0.10572028160095215 + }, + { + "epoch": 2.392578125e-06, + "model_forward_time": 0.024987220764160156, + "step": 1568 + }, + { + "epoch": 2.392578125e-06, + "step": 1568, + "training_step_time": 0.11092543601989746 + }, + { + "epoch": 2.39410400390625e-06, + "model_forward_time": 0.025340557098388672, + "step": 1569 + }, + { + "epoch": 2.39410400390625e-06, + "step": 1569, + "training_step_time": 0.11502385139465332 + }, + { + "epoch": 2.3956298828125e-06, + "grad_norm": 0.7793666124343872, + "learning_rate": 9.999851151672466e-05, + "loss": 0.1475, + "step": 1570 + }, + { + "epoch": 2.3956298828125e-06, + "model_forward_time": 0.025092363357543945, + "step": 1570 + }, + { + "epoch": 2.3956298828125e-06, + "step": 1570, + "training_step_time": 0.11787033081054688 + }, + { + "epoch": 2.39715576171875e-06, + "model_forward_time": 0.025498151779174805, + "step": 1571 + }, + { + "epoch": 2.39715576171875e-06, + "step": 1571, + "training_step_time": 0.21085405349731445 + }, + { + "epoch": 2.398681640625e-06, + "model_forward_time": 0.024493932723999023, + "step": 1572 + }, + { + "epoch": 2.398681640625e-06, + "step": 1572, + "training_step_time": 0.1144406795501709 + }, + { + "epoch": 2.40020751953125e-06, + "model_forward_time": 0.0244443416595459, + "step": 1573 + }, + { + "epoch": 2.40020751953125e-06, + "step": 1573, + "training_step_time": 0.11126399040222168 + }, + { + "epoch": 2.4017333984375e-06, + "model_forward_time": 0.025219440460205078, + "step": 1574 + }, + { + "epoch": 2.4017333984375e-06, + "step": 1574, + "training_step_time": 0.11485075950622559 + }, + { + "epoch": 2.40325927734375e-06, + "model_forward_time": 0.025014638900756836, + "step": 1575 + }, + { + "epoch": 2.40325927734375e-06, + "step": 1575, + "training_step_time": 0.11290979385375977 + }, + { + "epoch": 2.40478515625e-06, + "model_forward_time": 0.025056123733520508, + "step": 1576 + }, + { + "epoch": 2.40478515625e-06, + "step": 1576, + "training_step_time": 0.11079597473144531 + }, + { + "epoch": 2.40631103515625e-06, + "model_forward_time": 0.0247952938079834, + "step": 1577 + }, + { + "epoch": 2.40631103515625e-06, + "step": 1577, + "training_step_time": 0.11685681343078613 + }, + { + "epoch": 2.4078369140625e-06, + "model_forward_time": 0.02493453025817871, + "step": 1578 + }, + { + "epoch": 2.4078369140625e-06, + "step": 1578, + "training_step_time": 0.11461615562438965 + }, + { + "epoch": 2.40936279296875e-06, + "model_forward_time": 0.02508831024169922, + "step": 1579 + }, + { + "epoch": 2.40936279296875e-06, + "step": 1579, + "training_step_time": 0.11315345764160156 + }, + { + "epoch": 2.410888671875e-06, + "grad_norm": 1.0081157684326172, + "learning_rate": 9.999805586153205e-05, + "loss": 0.1533, + "step": 1580 + }, + { + "epoch": 2.410888671875e-06, + "model_forward_time": 0.024403810501098633, + "step": 1580 + }, + { + "epoch": 2.410888671875e-06, + "step": 1580, + "training_step_time": 0.10820603370666504 + }, + { + "epoch": 2.41241455078125e-06, + "model_forward_time": 0.028634071350097656, + "step": 1581 + }, + { + "epoch": 2.41241455078125e-06, + "step": 1581, + "training_step_time": 0.11458706855773926 + }, + { + "epoch": 2.4139404296875e-06, + "model_forward_time": 0.024892330169677734, + "step": 1582 + }, + { + "epoch": 2.4139404296875e-06, + "step": 1582, + "training_step_time": 0.10680365562438965 + }, + { + "epoch": 2.41546630859375e-06, + "model_forward_time": 0.025922536849975586, + "step": 1583 + }, + { + "epoch": 2.41546630859375e-06, + "step": 1583, + "training_step_time": 0.10855865478515625 + }, + { + "epoch": 2.4169921875e-06, + "model_forward_time": 0.025766849517822266, + "step": 1584 + }, + { + "epoch": 2.4169921875e-06, + "step": 1584, + "training_step_time": 0.10608816146850586 + }, + { + "epoch": 2.41851806640625e-06, + "model_forward_time": 0.025815248489379883, + "step": 1585 + }, + { + "epoch": 2.41851806640625e-06, + "step": 1585, + "training_step_time": 0.11077761650085449 + }, + { + "epoch": 2.4200439453125e-06, + "model_forward_time": 0.025668859481811523, + "step": 1586 + }, + { + "epoch": 2.4200439453125e-06, + "step": 1586, + "training_step_time": 0.10840249061584473 + }, + { + "epoch": 2.42156982421875e-06, + "model_forward_time": 0.025699853897094727, + "step": 1587 + }, + { + "epoch": 2.42156982421875e-06, + "step": 1587, + "training_step_time": 0.10577964782714844 + }, + { + "epoch": 2.423095703125e-06, + "model_forward_time": 0.025195598602294922, + "step": 1588 + }, + { + "epoch": 2.423095703125e-06, + "step": 1588, + "training_step_time": 0.10590195655822754 + }, + { + "epoch": 2.42462158203125e-06, + "model_forward_time": 0.026769161224365234, + "step": 1589 + }, + { + "epoch": 2.42462158203125e-06, + "step": 1589, + "training_step_time": 0.11101388931274414 + }, + { + "epoch": 2.4261474609375e-06, + "grad_norm": 0.8160147070884705, + "learning_rate": 9.999753945398704e-05, + "loss": 0.1827, + "step": 1590 + }, + { + "epoch": 2.4261474609375e-06, + "model_forward_time": 0.024907827377319336, + "step": 1590 + }, + { + "epoch": 2.4261474609375e-06, + "step": 1590, + "training_step_time": 0.10895228385925293 + }, + { + "epoch": 2.42767333984375e-06, + "model_forward_time": 0.025563955307006836, + "step": 1591 + }, + { + "epoch": 2.42767333984375e-06, + "step": 1591, + "training_step_time": 0.10527420043945312 + }, + { + "epoch": 2.42919921875e-06, + "model_forward_time": 0.029664039611816406, + "step": 1592 + }, + { + "epoch": 2.42919921875e-06, + "step": 1592, + "training_step_time": 0.1105034351348877 + }, + { + "epoch": 2.43072509765625e-06, + "model_forward_time": 0.02521681785583496, + "step": 1593 + }, + { + "epoch": 2.43072509765625e-06, + "step": 1593, + "training_step_time": 0.1050713062286377 + }, + { + "epoch": 2.4322509765625e-06, + "model_forward_time": 0.02588486671447754, + "step": 1594 + }, + { + "epoch": 2.4322509765625e-06, + "step": 1594, + "training_step_time": 0.1811234951019287 + }, + { + "epoch": 2.43377685546875e-06, + "model_forward_time": 0.024675607681274414, + "step": 1595 + }, + { + "epoch": 2.43377685546875e-06, + "step": 1595, + "training_step_time": 0.18916702270507812 + }, + { + "epoch": 2.435302734375e-06, + "model_forward_time": 0.024202585220336914, + "step": 1596 + }, + { + "epoch": 2.435302734375e-06, + "step": 1596, + "training_step_time": 0.16530394554138184 + }, + { + "epoch": 2.43682861328125e-06, + "model_forward_time": 0.024588823318481445, + "step": 1597 + }, + { + "epoch": 2.43682861328125e-06, + "step": 1597, + "training_step_time": 0.1716609001159668 + }, + { + "epoch": 2.4383544921875e-06, + "model_forward_time": 0.02463054656982422, + "step": 1598 + }, + { + "epoch": 2.4383544921875e-06, + "step": 1598, + "training_step_time": 0.18728399276733398 + }, + { + "epoch": 2.43988037109375e-06, + "model_forward_time": 0.025175809860229492, + "step": 1599 + }, + { + "epoch": 2.43988037109375e-06, + "step": 1599, + "training_step_time": 0.10735058784484863 + }, + { + "epoch": 2.44140625e-06, + "grad_norm": 1.0092593431472778, + "learning_rate": 9.999696229471716e-05, + "loss": 0.1709, + "step": 1600 + }, + { + "epoch": 2.44140625e-06, + "model_forward_time": 0.024734020233154297, + "step": 1600 + }, + { + "epoch": 2.44140625e-06, + "step": 1600, + "training_step_time": 0.1192929744720459 + }, + { + "epoch": 2.44293212890625e-06, + "model_forward_time": 0.02531719207763672, + "step": 1601 + }, + { + "epoch": 2.44293212890625e-06, + "step": 1601, + "training_step_time": 0.10948729515075684 + }, + { + "epoch": 2.4444580078125e-06, + "model_forward_time": 0.025701284408569336, + "step": 1602 + }, + { + "epoch": 2.4444580078125e-06, + "step": 1602, + "training_step_time": 0.11162304878234863 + }, + { + "epoch": 2.44598388671875e-06, + "model_forward_time": 0.026262283325195312, + "step": 1603 + }, + { + "epoch": 2.44598388671875e-06, + "step": 1603, + "training_step_time": 0.10985779762268066 + }, + { + "epoch": 2.447509765625e-06, + "model_forward_time": 0.025617361068725586, + "step": 1604 + }, + { + "epoch": 2.447509765625e-06, + "step": 1604, + "training_step_time": 0.11783075332641602 + }, + { + "epoch": 2.44903564453125e-06, + "model_forward_time": 0.025620698928833008, + "step": 1605 + }, + { + "epoch": 2.44903564453125e-06, + "step": 1605, + "training_step_time": 0.14901280403137207 + }, + { + "epoch": 2.4505615234375e-06, + "model_forward_time": 0.02506113052368164, + "step": 1606 + }, + { + "epoch": 2.4505615234375e-06, + "step": 1606, + "training_step_time": 0.11181807518005371 + }, + { + "epoch": 2.45208740234375e-06, + "model_forward_time": 0.025460481643676758, + "step": 1607 + }, + { + "epoch": 2.45208740234375e-06, + "step": 1607, + "training_step_time": 0.11501336097717285 + }, + { + "epoch": 2.45361328125e-06, + "model_forward_time": 0.025843143463134766, + "step": 1608 + }, + { + "epoch": 2.45361328125e-06, + "step": 1608, + "training_step_time": 0.11146688461303711 + }, + { + "epoch": 2.45513916015625e-06, + "model_forward_time": 0.024762868881225586, + "step": 1609 + }, + { + "epoch": 2.45513916015625e-06, + "step": 1609, + "training_step_time": 0.18772244453430176 + }, + { + "epoch": 2.4566650390625e-06, + "grad_norm": 0.6679033637046814, + "learning_rate": 9.999632438442367e-05, + "loss": 0.1554, + "step": 1610 + }, + { + "epoch": 2.4566650390625e-06, + "model_forward_time": 0.024842500686645508, + "step": 1610 + }, + { + "epoch": 2.4566650390625e-06, + "step": 1610, + "training_step_time": 0.11209440231323242 + }, + { + "epoch": 2.45819091796875e-06, + "model_forward_time": 0.024820327758789062, + "step": 1611 + }, + { + "epoch": 2.45819091796875e-06, + "step": 1611, + "training_step_time": 0.11181807518005371 + }, + { + "epoch": 2.459716796875e-06, + "model_forward_time": 0.0253753662109375, + "step": 1612 + }, + { + "epoch": 2.459716796875e-06, + "step": 1612, + "training_step_time": 0.10635232925415039 + }, + { + "epoch": 2.46124267578125e-06, + "model_forward_time": 0.024628162384033203, + "step": 1613 + }, + { + "epoch": 2.46124267578125e-06, + "step": 1613, + "training_step_time": 0.10764741897583008 + }, + { + "epoch": 2.4627685546875e-06, + "model_forward_time": 0.025855302810668945, + "step": 1614 + }, + { + "epoch": 2.4627685546875e-06, + "step": 1614, + "training_step_time": 0.11245179176330566 + }, + { + "epoch": 2.46429443359375e-06, + "model_forward_time": 0.025545835494995117, + "step": 1615 + }, + { + "epoch": 2.46429443359375e-06, + "step": 1615, + "training_step_time": 0.10765624046325684 + }, + { + "epoch": 2.4658203125e-06, + "model_forward_time": 0.025587797164916992, + "step": 1616 + }, + { + "epoch": 2.4658203125e-06, + "step": 1616, + "training_step_time": 0.10793781280517578 + }, + { + "epoch": 2.46734619140625e-06, + "model_forward_time": 0.025378942489624023, + "step": 1617 + }, + { + "epoch": 2.46734619140625e-06, + "step": 1617, + "training_step_time": 0.10886073112487793 + }, + { + "epoch": 2.4688720703125e-06, + "model_forward_time": 0.025366783142089844, + "step": 1618 + }, + { + "epoch": 2.4688720703125e-06, + "step": 1618, + "training_step_time": 0.11404919624328613 + }, + { + "epoch": 2.47039794921875e-06, + "model_forward_time": 0.025451183319091797, + "step": 1619 + }, + { + "epoch": 2.47039794921875e-06, + "step": 1619, + "training_step_time": 0.11955642700195312 + }, + { + "epoch": 2.471923828125e-06, + "grad_norm": 0.9833411574363708, + "learning_rate": 9.99956257238817e-05, + "loss": 0.1512, + "step": 1620 + }, + { + "epoch": 2.471923828125e-06, + "model_forward_time": 0.025299549102783203, + "step": 1620 + }, + { + "epoch": 2.471923828125e-06, + "step": 1620, + "training_step_time": 0.10600805282592773 + }, + { + "epoch": 2.47344970703125e-06, + "model_forward_time": 0.02526545524597168, + "step": 1621 + }, + { + "epoch": 2.47344970703125e-06, + "step": 1621, + "training_step_time": 0.10697603225708008 + }, + { + "epoch": 2.4749755859375e-06, + "model_forward_time": 0.025009870529174805, + "step": 1622 + }, + { + "epoch": 2.4749755859375e-06, + "step": 1622, + "training_step_time": 0.10750985145568848 + }, + { + "epoch": 2.47650146484375e-06, + "model_forward_time": 0.02521038055419922, + "step": 1623 + }, + { + "epoch": 2.47650146484375e-06, + "step": 1623, + "training_step_time": 0.11015105247497559 + }, + { + "epoch": 2.47802734375e-06, + "model_forward_time": 0.02494192123413086, + "step": 1624 + }, + { + "epoch": 2.47802734375e-06, + "step": 1624, + "training_step_time": 0.10486960411071777 + }, + { + "epoch": 2.47955322265625e-06, + "model_forward_time": 0.02512359619140625, + "step": 1625 + }, + { + "epoch": 2.47955322265625e-06, + "step": 1625, + "training_step_time": 0.10471177101135254 + }, + { + "epoch": 2.4810791015625e-06, + "model_forward_time": 0.025434017181396484, + "step": 1626 + }, + { + "epoch": 2.4810791015625e-06, + "step": 1626, + "training_step_time": 0.10604977607727051 + }, + { + "epoch": 2.48260498046875e-06, + "model_forward_time": 0.0268862247467041, + "step": 1627 + }, + { + "epoch": 2.48260498046875e-06, + "step": 1627, + "training_step_time": 0.11164212226867676 + }, + { + "epoch": 2.484130859375e-06, + "model_forward_time": 0.024907827377319336, + "step": 1628 + }, + { + "epoch": 2.484130859375e-06, + "step": 1628, + "training_step_time": 0.10750126838684082 + }, + { + "epoch": 2.48565673828125e-06, + "model_forward_time": 0.025626659393310547, + "step": 1629 + }, + { + "epoch": 2.48565673828125e-06, + "step": 1629, + "training_step_time": 0.10658764839172363 + }, + { + "epoch": 2.4871826171875e-06, + "grad_norm": 0.9036684632301331, + "learning_rate": 9.999486631394021e-05, + "loss": 0.1753, + "step": 1630 + }, + { + "epoch": 2.4871826171875e-06, + "model_forward_time": 0.02560281753540039, + "step": 1630 + }, + { + "epoch": 2.4871826171875e-06, + "step": 1630, + "training_step_time": 0.10422348976135254 + }, + { + "epoch": 2.48870849609375e-06, + "model_forward_time": 0.025482177734375, + "step": 1631 + }, + { + "epoch": 2.48870849609375e-06, + "step": 1631, + "training_step_time": 0.1080784797668457 + }, + { + "epoch": 2.490234375e-06, + "model_forward_time": 0.025289535522460938, + "step": 1632 + }, + { + "epoch": 2.490234375e-06, + "step": 1632, + "training_step_time": 0.10364174842834473 + }, + { + "epoch": 2.49176025390625e-06, + "model_forward_time": 0.025511980056762695, + "step": 1633 + }, + { + "epoch": 2.49176025390625e-06, + "step": 1633, + "training_step_time": 0.10306596755981445 + }, + { + "epoch": 2.4932861328125e-06, + "model_forward_time": 0.025533199310302734, + "step": 1634 + }, + { + "epoch": 2.4932861328125e-06, + "step": 1634, + "training_step_time": 0.10653114318847656 + }, + { + "epoch": 2.49481201171875e-06, + "model_forward_time": 0.02448439598083496, + "step": 1635 + }, + { + "epoch": 2.49481201171875e-06, + "step": 1635, + "training_step_time": 0.10642290115356445 + }, + { + "epoch": 2.496337890625e-06, + "model_forward_time": 0.025453567504882812, + "step": 1636 + }, + { + "epoch": 2.496337890625e-06, + "step": 1636, + "training_step_time": 0.10813236236572266 + }, + { + "epoch": 2.49786376953125e-06, + "model_forward_time": 0.02580857276916504, + "step": 1637 + }, + { + "epoch": 2.49786376953125e-06, + "step": 1637, + "training_step_time": 0.1059873104095459 + }, + { + "epoch": 2.4993896484375e-06, + "model_forward_time": 0.02550220489501953, + "step": 1638 + }, + { + "epoch": 2.4993896484375e-06, + "step": 1638, + "training_step_time": 0.10642313957214355 + }, + { + "epoch": 2.50091552734375e-06, + "model_forward_time": 0.025683164596557617, + "step": 1639 + }, + { + "epoch": 2.50091552734375e-06, + "step": 1639, + "training_step_time": 0.10556793212890625 + }, + { + "epoch": 2.50244140625e-06, + "grad_norm": 1.1588786840438843, + "learning_rate": 9.999404615552194e-05, + "loss": 0.1688, + "step": 1640 + }, + { + "epoch": 2.50244140625e-06, + "model_forward_time": 0.025720834732055664, + "step": 1640 + }, + { + "epoch": 2.50244140625e-06, + "step": 1640, + "training_step_time": 0.10625624656677246 + }, + { + "epoch": 2.50396728515625e-06, + "model_forward_time": 0.02566981315612793, + "step": 1641 + }, + { + "epoch": 2.50396728515625e-06, + "step": 1641, + "training_step_time": 0.10621213912963867 + }, + { + "epoch": 2.5054931640625e-06, + "model_forward_time": 0.025335311889648438, + "step": 1642 + }, + { + "epoch": 2.5054931640625e-06, + "step": 1642, + "training_step_time": 0.14310026168823242 + }, + { + "epoch": 2.50701904296875e-06, + "model_forward_time": 0.025811195373535156, + "step": 1643 + }, + { + "epoch": 2.50701904296875e-06, + "step": 1643, + "training_step_time": 0.14880943298339844 + }, + { + "epoch": 2.508544921875e-06, + "model_forward_time": 0.025105714797973633, + "step": 1644 + }, + { + "epoch": 2.508544921875e-06, + "step": 1644, + "training_step_time": 0.19982671737670898 + }, + { + "epoch": 2.51007080078125e-06, + "model_forward_time": 0.024760723114013672, + "step": 1645 + }, + { + "epoch": 2.51007080078125e-06, + "step": 1645, + "training_step_time": 0.14131951332092285 + }, + { + "epoch": 2.5115966796875e-06, + "model_forward_time": 0.024747848510742188, + "step": 1646 + }, + { + "epoch": 2.5115966796875e-06, + "step": 1646, + "training_step_time": 0.15313315391540527 + }, + { + "epoch": 2.51312255859375e-06, + "model_forward_time": 0.02459120750427246, + "step": 1647 + }, + { + "epoch": 2.51312255859375e-06, + "step": 1647, + "training_step_time": 0.1614854335784912 + }, + { + "epoch": 2.5146484375e-06, + "model_forward_time": 0.025159597396850586, + "step": 1648 + }, + { + "epoch": 2.5146484375e-06, + "step": 1648, + "training_step_time": 0.11337447166442871 + }, + { + "epoch": 2.51617431640625e-06, + "model_forward_time": 0.024147748947143555, + "step": 1649 + }, + { + "epoch": 2.51617431640625e-06, + "step": 1649, + "training_step_time": 0.11206936836242676 + }, + { + "epoch": 2.5177001953125e-06, + "grad_norm": 0.9198539853096008, + "learning_rate": 9.999316524962345e-05, + "loss": 0.1555, + "step": 1650 + }, + { + "epoch": 2.5177001953125e-06, + "model_forward_time": 0.029583215713500977, + "step": 1650 + }, + { + "epoch": 2.5177001953125e-06, + "step": 1650, + "training_step_time": 0.11736631393432617 + }, + { + "epoch": 2.51922607421875e-06, + "model_forward_time": 0.025638341903686523, + "step": 1651 + }, + { + "epoch": 2.51922607421875e-06, + "step": 1651, + "training_step_time": 0.17767643928527832 + }, + { + "epoch": 2.520751953125e-06, + "model_forward_time": 0.024290084838867188, + "step": 1652 + }, + { + "epoch": 2.520751953125e-06, + "step": 1652, + "training_step_time": 0.13448023796081543 + }, + { + "epoch": 2.52227783203125e-06, + "model_forward_time": 0.024469614028930664, + "step": 1653 + }, + { + "epoch": 2.52227783203125e-06, + "step": 1653, + "training_step_time": 0.10694003105163574 + }, + { + "epoch": 2.5238037109375e-06, + "model_forward_time": 0.025369882583618164, + "step": 1654 + }, + { + "epoch": 2.5238037109375e-06, + "step": 1654, + "training_step_time": 0.11945462226867676 + }, + { + "epoch": 2.52532958984375e-06, + "model_forward_time": 0.025226593017578125, + "step": 1655 + }, + { + "epoch": 2.52532958984375e-06, + "step": 1655, + "training_step_time": 0.11599254608154297 + }, + { + "epoch": 2.52685546875e-06, + "model_forward_time": 0.02557682991027832, + "step": 1656 + }, + { + "epoch": 2.52685546875e-06, + "step": 1656, + "training_step_time": 0.11301517486572266 + }, + { + "epoch": 2.52838134765625e-06, + "model_forward_time": 0.025043010711669922, + "step": 1657 + }, + { + "epoch": 2.52838134765625e-06, + "step": 1657, + "training_step_time": 0.1908574104309082 + }, + { + "epoch": 2.5299072265625e-06, + "model_forward_time": 0.026215553283691406, + "step": 1658 + }, + { + "epoch": 2.5299072265625e-06, + "step": 1658, + "training_step_time": 0.10634136199951172 + }, + { + "epoch": 2.53143310546875e-06, + "model_forward_time": 0.02524113655090332, + "step": 1659 + }, + { + "epoch": 2.53143310546875e-06, + "step": 1659, + "training_step_time": 0.10634851455688477 + }, + { + "epoch": 2.532958984375e-06, + "grad_norm": 1.0522770881652832, + "learning_rate": 9.999222359731514e-05, + "loss": 0.1707, + "step": 1660 + }, + { + "epoch": 2.532958984375e-06, + "model_forward_time": 0.024770021438598633, + "step": 1660 + }, + { + "epoch": 2.532958984375e-06, + "step": 1660, + "training_step_time": 0.10791730880737305 + }, + { + "epoch": 2.53448486328125e-06, + "model_forward_time": 0.024999618530273438, + "step": 1661 + }, + { + "epoch": 2.53448486328125e-06, + "step": 1661, + "training_step_time": 0.11050176620483398 + }, + { + "epoch": 2.5360107421875e-06, + "model_forward_time": 0.025586605072021484, + "step": 1662 + }, + { + "epoch": 2.5360107421875e-06, + "step": 1662, + "training_step_time": 0.11785173416137695 + }, + { + "epoch": 2.53753662109375e-06, + "model_forward_time": 0.0258634090423584, + "step": 1663 + }, + { + "epoch": 2.53753662109375e-06, + "step": 1663, + "training_step_time": 0.11034440994262695 + }, + { + "epoch": 2.5390625e-06, + "model_forward_time": 0.025838851928710938, + "step": 1664 + }, + { + "epoch": 2.5390625e-06, + "step": 1664, + "training_step_time": 0.21900415420532227 + }, + { + "epoch": 2.54058837890625e-06, + "model_forward_time": 0.024809837341308594, + "step": 1665 + }, + { + "epoch": 2.54058837890625e-06, + "step": 1665, + "training_step_time": 0.11528658866882324 + }, + { + "epoch": 2.5421142578125e-06, + "model_forward_time": 0.024857282638549805, + "step": 1666 + }, + { + "epoch": 2.5421142578125e-06, + "step": 1666, + "training_step_time": 0.10450148582458496 + }, + { + "epoch": 2.54364013671875e-06, + "model_forward_time": 0.025818347930908203, + "step": 1667 + }, + { + "epoch": 2.54364013671875e-06, + "step": 1667, + "training_step_time": 0.10629725456237793 + }, + { + "epoch": 2.545166015625e-06, + "model_forward_time": 0.025442838668823242, + "step": 1668 + }, + { + "epoch": 2.545166015625e-06, + "step": 1668, + "training_step_time": 0.11000633239746094 + }, + { + "epoch": 2.54669189453125e-06, + "model_forward_time": 0.025569677352905273, + "step": 1669 + }, + { + "epoch": 2.54669189453125e-06, + "step": 1669, + "training_step_time": 0.1071164608001709 + }, + { + "epoch": 2.5482177734375e-06, + "grad_norm": 0.6459003686904907, + "learning_rate": 9.999122119974121e-05, + "loss": 0.1302, + "step": 1670 + }, + { + "epoch": 2.5482177734375e-06, + "model_forward_time": 0.025876283645629883, + "step": 1670 + }, + { + "epoch": 2.5482177734375e-06, + "step": 1670, + "training_step_time": 0.10610389709472656 + }, + { + "epoch": 2.54974365234375e-06, + "model_forward_time": 0.025436878204345703, + "step": 1671 + }, + { + "epoch": 2.54974365234375e-06, + "step": 1671, + "training_step_time": 0.10762882232666016 + }, + { + "epoch": 2.55126953125e-06, + "model_forward_time": 0.024594545364379883, + "step": 1672 + }, + { + "epoch": 2.55126953125e-06, + "step": 1672, + "training_step_time": 0.11058759689331055 + }, + { + "epoch": 2.55279541015625e-06, + "model_forward_time": 0.02453923225402832, + "step": 1673 + }, + { + "epoch": 2.55279541015625e-06, + "step": 1673, + "training_step_time": 0.10865974426269531 + }, + { + "epoch": 2.5543212890625e-06, + "model_forward_time": 0.024367332458496094, + "step": 1674 + }, + { + "epoch": 2.5543212890625e-06, + "step": 1674, + "training_step_time": 0.10678243637084961 + }, + { + "epoch": 2.55584716796875e-06, + "model_forward_time": 0.024334192276000977, + "step": 1675 + }, + { + "epoch": 2.55584716796875e-06, + "step": 1675, + "training_step_time": 0.10779595375061035 + }, + { + "epoch": 2.557373046875e-06, + "model_forward_time": 0.0243072509765625, + "step": 1676 + }, + { + "epoch": 2.557373046875e-06, + "step": 1676, + "training_step_time": 0.1083981990814209 + }, + { + "epoch": 2.55889892578125e-06, + "model_forward_time": 0.024738788604736328, + "step": 1677 + }, + { + "epoch": 2.55889892578125e-06, + "step": 1677, + "training_step_time": 0.10526275634765625 + }, + { + "epoch": 2.5604248046875e-06, + "model_forward_time": 0.025640010833740234, + "step": 1678 + }, + { + "epoch": 2.5604248046875e-06, + "step": 1678, + "training_step_time": 0.10669565200805664 + }, + { + "epoch": 2.56195068359375e-06, + "model_forward_time": 0.02536463737487793, + "step": 1679 + }, + { + "epoch": 2.56195068359375e-06, + "step": 1679, + "training_step_time": 0.10544252395629883 + }, + { + "epoch": 2.5634765625e-06, + "grad_norm": 0.7667890787124634, + "learning_rate": 9.999015805811965e-05, + "loss": 0.1444, + "step": 1680 + }, + { + "epoch": 2.5634765625e-06, + "model_forward_time": 0.025038719177246094, + "step": 1680 + }, + { + "epoch": 2.5634765625e-06, + "step": 1680, + "training_step_time": 0.10881948471069336 + }, + { + "epoch": 2.56500244140625e-06, + "model_forward_time": 0.02547621726989746, + "step": 1681 + }, + { + "epoch": 2.56500244140625e-06, + "step": 1681, + "training_step_time": 0.10595035552978516 + }, + { + "epoch": 2.5665283203125e-06, + "model_forward_time": 0.02556467056274414, + "step": 1682 + }, + { + "epoch": 2.5665283203125e-06, + "step": 1682, + "training_step_time": 0.10466957092285156 + }, + { + "epoch": 2.56805419921875e-06, + "model_forward_time": 0.025458335876464844, + "step": 1683 + }, + { + "epoch": 2.56805419921875e-06, + "step": 1683, + "training_step_time": 0.10703253746032715 + }, + { + "epoch": 2.569580078125e-06, + "model_forward_time": 0.025152206420898438, + "step": 1684 + }, + { + "epoch": 2.569580078125e-06, + "step": 1684, + "training_step_time": 0.10369753837585449 + }, + { + "epoch": 2.57110595703125e-06, + "model_forward_time": 0.025420665740966797, + "step": 1685 + }, + { + "epoch": 2.57110595703125e-06, + "step": 1685, + "training_step_time": 0.1065969467163086 + }, + { + "epoch": 2.5726318359375e-06, + "model_forward_time": 0.025539875030517578, + "step": 1686 + }, + { + "epoch": 2.5726318359375e-06, + "step": 1686, + "training_step_time": 0.10527348518371582 + }, + { + "epoch": 2.57415771484375e-06, + "model_forward_time": 0.025393962860107422, + "step": 1687 + }, + { + "epoch": 2.57415771484375e-06, + "step": 1687, + "training_step_time": 0.10501480102539062 + }, + { + "epoch": 2.57568359375e-06, + "model_forward_time": 0.025498628616333008, + "step": 1688 + }, + { + "epoch": 2.57568359375e-06, + "step": 1688, + "training_step_time": 0.1864030361175537 + }, + { + "epoch": 2.57720947265625e-06, + "model_forward_time": 0.024951934814453125, + "step": 1689 + }, + { + "epoch": 2.57720947265625e-06, + "step": 1689, + "training_step_time": 0.17250490188598633 + }, + { + "epoch": 2.5787353515625e-06, + "grad_norm": 1.1121721267700195, + "learning_rate": 9.998903417374228e-05, + "loss": 0.1606, + "step": 1690 + }, + { + "epoch": 2.5787353515625e-06, + "model_forward_time": 0.024769067764282227, + "step": 1690 + }, + { + "epoch": 2.5787353515625e-06, + "step": 1690, + "training_step_time": 0.17179083824157715 + }, + { + "epoch": 2.58026123046875e-06, + "model_forward_time": 0.024854660034179688, + "step": 1691 + }, + { + "epoch": 2.58026123046875e-06, + "step": 1691, + "training_step_time": 0.14887189865112305 + }, + { + "epoch": 2.581787109375e-06, + "model_forward_time": 0.024777889251708984, + "step": 1692 + }, + { + "epoch": 2.581787109375e-06, + "step": 1692, + "training_step_time": 0.2230379581451416 + }, + { + "epoch": 2.58331298828125e-06, + "model_forward_time": 0.025450468063354492, + "step": 1693 + }, + { + "epoch": 2.58331298828125e-06, + "step": 1693, + "training_step_time": 0.11564517021179199 + }, + { + "epoch": 2.5848388671875e-06, + "model_forward_time": 0.02463817596435547, + "step": 1694 + }, + { + "epoch": 2.5848388671875e-06, + "step": 1694, + "training_step_time": 0.1060943603515625 + }, + { + "epoch": 2.58636474609375e-06, + "model_forward_time": 0.025239944458007812, + "step": 1695 + }, + { + "epoch": 2.58636474609375e-06, + "step": 1695, + "training_step_time": 0.1125338077545166 + }, + { + "epoch": 2.587890625e-06, + "model_forward_time": 0.025891780853271484, + "step": 1696 + }, + { + "epoch": 2.587890625e-06, + "step": 1696, + "training_step_time": 0.10701942443847656 + }, + { + "epoch": 2.58941650390625e-06, + "model_forward_time": 0.025365114212036133, + "step": 1697 + }, + { + "epoch": 2.58941650390625e-06, + "step": 1697, + "training_step_time": 0.1244502067565918 + }, + { + "epoch": 2.5909423828125e-06, + "model_forward_time": 0.025916337966918945, + "step": 1698 + }, + { + "epoch": 2.5909423828125e-06, + "step": 1698, + "training_step_time": 0.1482686996459961 + }, + { + "epoch": 2.59246826171875e-06, + "model_forward_time": 0.02519822120666504, + "step": 1699 + }, + { + "epoch": 2.59246826171875e-06, + "step": 1699, + "training_step_time": 0.10748696327209473 + }, + { + "epoch": 2.593994140625e-06, + "grad_norm": 0.8197618126869202, + "learning_rate": 9.998784954797474e-05, + "loss": 0.1401, + "step": 1700 + }, + { + "epoch": 2.593994140625e-06, + "model_forward_time": 0.02612471580505371, + "step": 1700 + }, + { + "epoch": 2.593994140625e-06, + "step": 1700, + "training_step_time": 0.10956525802612305 + }, + { + "epoch": 2.59552001953125e-06, + "model_forward_time": 0.025414705276489258, + "step": 1701 + }, + { + "epoch": 2.59552001953125e-06, + "step": 1701, + "training_step_time": 0.10976600646972656 + }, + { + "epoch": 2.5970458984375e-06, + "model_forward_time": 0.025226354598999023, + "step": 1702 + }, + { + "epoch": 2.5970458984375e-06, + "step": 1702, + "training_step_time": 0.16132068634033203 + }, + { + "epoch": 2.59857177734375e-06, + "model_forward_time": 0.024600505828857422, + "step": 1703 + }, + { + "epoch": 2.59857177734375e-06, + "step": 1703, + "training_step_time": 0.1523456573486328 + }, + { + "epoch": 2.60009765625e-06, + "model_forward_time": 0.024468660354614258, + "step": 1704 + }, + { + "epoch": 2.60009765625e-06, + "step": 1704, + "training_step_time": 0.1074533462524414 + }, + { + "epoch": 2.60162353515625e-06, + "model_forward_time": 0.024718284606933594, + "step": 1705 + }, + { + "epoch": 2.60162353515625e-06, + "step": 1705, + "training_step_time": 0.1078338623046875 + }, + { + "epoch": 2.6031494140625e-06, + "model_forward_time": 0.02620387077331543, + "step": 1706 + }, + { + "epoch": 2.6031494140625e-06, + "step": 1706, + "training_step_time": 0.11070632934570312 + }, + { + "epoch": 2.60467529296875e-06, + "model_forward_time": 0.02525043487548828, + "step": 1707 + }, + { + "epoch": 2.60467529296875e-06, + "step": 1707, + "training_step_time": 0.21219873428344727 + }, + { + "epoch": 2.606201171875e-06, + "model_forward_time": 0.025011301040649414, + "step": 1708 + }, + { + "epoch": 2.606201171875e-06, + "step": 1708, + "training_step_time": 0.12181925773620605 + }, + { + "epoch": 2.60772705078125e-06, + "model_forward_time": 0.02830028533935547, + "step": 1709 + }, + { + "epoch": 2.60772705078125e-06, + "step": 1709, + "training_step_time": 0.1104726791381836 + }, + { + "epoch": 2.6092529296875e-06, + "grad_norm": 1.292397379875183, + "learning_rate": 9.998660418225645e-05, + "loss": 0.1681, + "step": 1710 + }, + { + "epoch": 2.6092529296875e-06, + "model_forward_time": 0.02522897720336914, + "step": 1710 + }, + { + "epoch": 2.6092529296875e-06, + "step": 1710, + "training_step_time": 0.22046256065368652 + }, + { + "epoch": 2.61077880859375e-06, + "model_forward_time": 0.0238802433013916, + "step": 1711 + }, + { + "epoch": 2.61077880859375e-06, + "step": 1711, + "training_step_time": 0.11639142036437988 + }, + { + "epoch": 2.6123046875e-06, + "model_forward_time": 0.023783206939697266, + "step": 1712 + }, + { + "epoch": 2.6123046875e-06, + "step": 1712, + "training_step_time": 0.10813117027282715 + }, + { + "epoch": 2.61383056640625e-06, + "model_forward_time": 0.0244901180267334, + "step": 1713 + }, + { + "epoch": 2.61383056640625e-06, + "step": 1713, + "training_step_time": 0.10531377792358398 + }, + { + "epoch": 2.6153564453125e-06, + "model_forward_time": 0.0253448486328125, + "step": 1714 + }, + { + "epoch": 2.6153564453125e-06, + "step": 1714, + "training_step_time": 0.11465668678283691 + }, + { + "epoch": 2.61688232421875e-06, + "model_forward_time": 0.025406360626220703, + "step": 1715 + }, + { + "epoch": 2.61688232421875e-06, + "step": 1715, + "training_step_time": 0.12316679954528809 + }, + { + "epoch": 2.618408203125e-06, + "model_forward_time": 0.02521204948425293, + "step": 1716 + }, + { + "epoch": 2.618408203125e-06, + "step": 1716, + "training_step_time": 0.10997319221496582 + }, + { + "epoch": 2.61993408203125e-06, + "model_forward_time": 0.025532245635986328, + "step": 1717 + }, + { + "epoch": 2.61993408203125e-06, + "step": 1717, + "training_step_time": 0.14649748802185059 + }, + { + "epoch": 2.6214599609375e-06, + "model_forward_time": 0.02529001235961914, + "step": 1718 + }, + { + "epoch": 2.6214599609375e-06, + "step": 1718, + "training_step_time": 0.20202302932739258 + }, + { + "epoch": 2.62298583984375e-06, + "model_forward_time": 0.02382373809814453, + "step": 1719 + }, + { + "epoch": 2.62298583984375e-06, + "step": 1719, + "training_step_time": 0.2074873447418213 + }, + { + "epoch": 2.62451171875e-06, + "grad_norm": 0.7815642356872559, + "learning_rate": 9.998529807810064e-05, + "loss": 0.1668, + "step": 1720 + }, + { + "epoch": 2.62451171875e-06, + "model_forward_time": 0.02386641502380371, + "step": 1720 + }, + { + "epoch": 2.62451171875e-06, + "step": 1720, + "training_step_time": 0.19618439674377441 + }, + { + "epoch": 2.62603759765625e-06, + "model_forward_time": 0.02385091781616211, + "step": 1721 + }, + { + "epoch": 2.62603759765625e-06, + "step": 1721, + "training_step_time": 0.1876358985900879 + }, + { + "epoch": 2.6275634765625e-06, + "model_forward_time": 0.023627519607543945, + "step": 1722 + }, + { + "epoch": 2.6275634765625e-06, + "step": 1722, + "training_step_time": 0.17208218574523926 + }, + { + "epoch": 2.62908935546875e-06, + "model_forward_time": 0.024857282638549805, + "step": 1723 + }, + { + "epoch": 2.62908935546875e-06, + "step": 1723, + "training_step_time": 0.15814995765686035 + }, + { + "epoch": 2.630615234375e-06, + "model_forward_time": 0.024690866470336914, + "step": 1724 + }, + { + "epoch": 2.630615234375e-06, + "step": 1724, + "training_step_time": 0.1476595401763916 + }, + { + "epoch": 2.63214111328125e-06, + "model_forward_time": 0.024652481079101562, + "step": 1725 + }, + { + "epoch": 2.63214111328125e-06, + "step": 1725, + "training_step_time": 0.10276484489440918 + }, + { + "epoch": 2.6336669921875e-06, + "model_forward_time": 0.025558948516845703, + "step": 1726 + }, + { + "epoch": 2.6336669921875e-06, + "step": 1726, + "training_step_time": 0.10359597206115723 + }, + { + "epoch": 2.63519287109375e-06, + "model_forward_time": 0.02512812614440918, + "step": 1727 + }, + { + "epoch": 2.63519287109375e-06, + "step": 1727, + "training_step_time": 0.10404753684997559 + }, + { + "epoch": 2.63671875e-06, + "model_forward_time": 0.025196075439453125, + "step": 1728 + }, + { + "epoch": 2.63671875e-06, + "step": 1728, + "training_step_time": 0.10520076751708984 + }, + { + "epoch": 2.63824462890625e-06, + "model_forward_time": 0.02586674690246582, + "step": 1729 + }, + { + "epoch": 2.63824462890625e-06, + "step": 1729, + "training_step_time": 0.20693659782409668 + }, + { + "epoch": 2.6397705078125e-06, + "grad_norm": 0.9279288649559021, + "learning_rate": 9.998393123709438e-05, + "loss": 0.1554, + "step": 1730 + }, + { + "epoch": 2.6397705078125e-06, + "model_forward_time": 0.024558067321777344, + "step": 1730 + }, + { + "epoch": 2.6397705078125e-06, + "step": 1730, + "training_step_time": 0.11133003234863281 + }, + { + "epoch": 2.64129638671875e-06, + "model_forward_time": 0.024407386779785156, + "step": 1731 + }, + { + "epoch": 2.64129638671875e-06, + "step": 1731, + "training_step_time": 0.22078156471252441 + }, + { + "epoch": 2.642822265625e-06, + "model_forward_time": 0.02470850944519043, + "step": 1732 + }, + { + "epoch": 2.642822265625e-06, + "step": 1732, + "training_step_time": 0.17116665840148926 + }, + { + "epoch": 2.64434814453125e-06, + "model_forward_time": 0.024406909942626953, + "step": 1733 + }, + { + "epoch": 2.64434814453125e-06, + "step": 1733, + "training_step_time": 0.18570280075073242 + }, + { + "epoch": 2.6458740234375e-06, + "model_forward_time": 0.02422165870666504, + "step": 1734 + }, + { + "epoch": 2.6458740234375e-06, + "step": 1734, + "training_step_time": 0.12007713317871094 + }, + { + "epoch": 2.64739990234375e-06, + "model_forward_time": 0.023708343505859375, + "step": 1735 + }, + { + "epoch": 2.64739990234375e-06, + "step": 1735, + "training_step_time": 0.1344454288482666 + }, + { + "epoch": 2.64892578125e-06, + "model_forward_time": 0.024355173110961914, + "step": 1736 + }, + { + "epoch": 2.64892578125e-06, + "step": 1736, + "training_step_time": 0.1295299530029297 + }, + { + "epoch": 2.65045166015625e-06, + "model_forward_time": 0.023968219757080078, + "step": 1737 + }, + { + "epoch": 2.65045166015625e-06, + "step": 1737, + "training_step_time": 0.18445229530334473 + }, + { + "epoch": 2.6519775390625e-06, + "model_forward_time": 0.024669408798217773, + "step": 1738 + }, + { + "epoch": 2.6519775390625e-06, + "step": 1738, + "training_step_time": 0.13410305976867676 + }, + { + "epoch": 2.65350341796875e-06, + "model_forward_time": 0.024066448211669922, + "step": 1739 + }, + { + "epoch": 2.65350341796875e-06, + "step": 1739, + "training_step_time": 0.11945533752441406 + }, + { + "epoch": 2.655029296875e-06, + "grad_norm": 1.3572587966918945, + "learning_rate": 9.998250366089848e-05, + "loss": 0.1891, + "step": 1740 + }, + { + "epoch": 2.655029296875e-06, + "model_forward_time": 0.02459716796875, + "step": 1740 + }, + { + "epoch": 2.655029296875e-06, + "step": 1740, + "training_step_time": 0.20980310440063477 + }, + { + "epoch": 2.65655517578125e-06, + "model_forward_time": 0.024874448776245117, + "step": 1741 + }, + { + "epoch": 2.65655517578125e-06, + "step": 1741, + "training_step_time": 0.11301040649414062 + }, + { + "epoch": 2.6580810546875e-06, + "model_forward_time": 0.024437665939331055, + "step": 1742 + }, + { + "epoch": 2.6580810546875e-06, + "step": 1742, + "training_step_time": 0.19518685340881348 + }, + { + "epoch": 2.65960693359375e-06, + "model_forward_time": 0.02416062355041504, + "step": 1743 + }, + { + "epoch": 2.65960693359375e-06, + "step": 1743, + "training_step_time": 0.1085824966430664 + }, + { + "epoch": 2.6611328125e-06, + "model_forward_time": 0.024532794952392578, + "step": 1744 + }, + { + "epoch": 2.6611328125e-06, + "step": 1744, + "training_step_time": 0.10963058471679688 + }, + { + "epoch": 2.66265869140625e-06, + "model_forward_time": 0.025799036026000977, + "step": 1745 + }, + { + "epoch": 2.66265869140625e-06, + "step": 1745, + "training_step_time": 0.10655498504638672 + }, + { + "epoch": 2.6641845703125e-06, + "model_forward_time": 0.025012731552124023, + "step": 1746 + }, + { + "epoch": 2.6641845703125e-06, + "step": 1746, + "training_step_time": 0.10536885261535645 + }, + { + "epoch": 2.66571044921875e-06, + "model_forward_time": 0.02554631233215332, + "step": 1747 + }, + { + "epoch": 2.66571044921875e-06, + "step": 1747, + "training_step_time": 0.11204719543457031 + }, + { + "epoch": 2.667236328125e-06, + "model_forward_time": 0.027681350708007812, + "step": 1748 + }, + { + "epoch": 2.667236328125e-06, + "step": 1748, + "training_step_time": 0.11464190483093262 + }, + { + "epoch": 2.66876220703125e-06, + "model_forward_time": 0.025501012802124023, + "step": 1749 + }, + { + "epoch": 2.66876220703125e-06, + "step": 1749, + "training_step_time": 0.10691475868225098 + }, + { + "epoch": 2.6702880859375e-06, + "grad_norm": 1.452040195465088, + "learning_rate": 9.998101535124758e-05, + "loss": 0.1468, + "step": 1750 + }, + { + "epoch": 2.6702880859375e-06, + "model_forward_time": 0.025337696075439453, + "step": 1750 + }, + { + "epoch": 2.6702880859375e-06, + "step": 1750, + "training_step_time": 0.16977453231811523 + }, + { + "epoch": 2.67181396484375e-06, + "model_forward_time": 0.02416253089904785, + "step": 1751 + }, + { + "epoch": 2.67181396484375e-06, + "step": 1751, + "training_step_time": 0.16734576225280762 + }, + { + "epoch": 2.67333984375e-06, + "model_forward_time": 0.0244753360748291, + "step": 1752 + }, + { + "epoch": 2.67333984375e-06, + "step": 1752, + "training_step_time": 0.10255169868469238 + }, + { + "epoch": 2.67486572265625e-06, + "model_forward_time": 0.02461838722229004, + "step": 1753 + }, + { + "epoch": 2.67486572265625e-06, + "step": 1753, + "training_step_time": 0.10480976104736328 + }, + { + "epoch": 2.6763916015625e-06, + "model_forward_time": 0.025255680084228516, + "step": 1754 + }, + { + "epoch": 2.6763916015625e-06, + "step": 1754, + "training_step_time": 0.10711407661437988 + }, + { + "epoch": 2.67791748046875e-06, + "model_forward_time": 0.024811983108520508, + "step": 1755 + }, + { + "epoch": 2.67791748046875e-06, + "step": 1755, + "training_step_time": 0.10537958145141602 + }, + { + "epoch": 2.679443359375e-06, + "model_forward_time": 0.025687456130981445, + "step": 1756 + }, + { + "epoch": 2.679443359375e-06, + "step": 1756, + "training_step_time": 0.11127805709838867 + }, + { + "epoch": 2.68096923828125e-06, + "model_forward_time": 0.025592803955078125, + "step": 1757 + }, + { + "epoch": 2.68096923828125e-06, + "step": 1757, + "training_step_time": 0.10577225685119629 + }, + { + "epoch": 2.6824951171875e-06, + "model_forward_time": 0.02527141571044922, + "step": 1758 + }, + { + "epoch": 2.6824951171875e-06, + "step": 1758, + "training_step_time": 0.10488677024841309 + }, + { + "epoch": 2.68402099609375e-06, + "model_forward_time": 0.026350021362304688, + "step": 1759 + }, + { + "epoch": 2.68402099609375e-06, + "step": 1759, + "training_step_time": 0.10598134994506836 + }, + { + "epoch": 2.685546875e-06, + "grad_norm": 0.9804360270500183, + "learning_rate": 9.997946630995013e-05, + "loss": 0.1408, + "step": 1760 + }, + { + "epoch": 2.685546875e-06, + "model_forward_time": 0.02541637420654297, + "step": 1760 + }, + { + "epoch": 2.685546875e-06, + "step": 1760, + "training_step_time": 0.10829663276672363 + }, + { + "epoch": 2.68707275390625e-06, + "model_forward_time": 0.0254213809967041, + "step": 1761 + }, + { + "epoch": 2.68707275390625e-06, + "step": 1761, + "training_step_time": 0.10474324226379395 + }, + { + "epoch": 2.6885986328125e-06, + "model_forward_time": 0.0254061222076416, + "step": 1762 + }, + { + "epoch": 2.6885986328125e-06, + "step": 1762, + "training_step_time": 0.10452938079833984 + }, + { + "epoch": 2.69012451171875e-06, + "model_forward_time": 0.026445388793945312, + "step": 1763 + }, + { + "epoch": 2.69012451171875e-06, + "step": 1763, + "training_step_time": 0.1069033145904541 + }, + { + "epoch": 2.691650390625e-06, + "model_forward_time": 0.02569437026977539, + "step": 1764 + }, + { + "epoch": 2.691650390625e-06, + "step": 1764, + "training_step_time": 0.11055326461791992 + }, + { + "epoch": 2.69317626953125e-06, + "model_forward_time": 0.025249481201171875, + "step": 1765 + }, + { + "epoch": 2.69317626953125e-06, + "step": 1765, + "training_step_time": 0.1125478744506836 + }, + { + "epoch": 2.6947021484375e-06, + "model_forward_time": 0.024700164794921875, + "step": 1766 + }, + { + "epoch": 2.6947021484375e-06, + "step": 1766, + "training_step_time": 0.10641694068908691 + }, + { + "epoch": 2.69622802734375e-06, + "model_forward_time": 0.025064945220947266, + "step": 1767 + }, + { + "epoch": 2.69622802734375e-06, + "step": 1767, + "training_step_time": 0.10965824127197266 + }, + { + "epoch": 2.69775390625e-06, + "model_forward_time": 0.026335477828979492, + "step": 1768 + }, + { + "epoch": 2.69775390625e-06, + "step": 1768, + "training_step_time": 0.1074528694152832 + }, + { + "epoch": 2.69927978515625e-06, + "model_forward_time": 0.025423765182495117, + "step": 1769 + }, + { + "epoch": 2.69927978515625e-06, + "step": 1769, + "training_step_time": 0.10599374771118164 + }, + { + "epoch": 2.7008056640625e-06, + "grad_norm": 1.212485909461975, + "learning_rate": 9.997785653888835e-05, + "loss": 0.1296, + "step": 1770 + }, + { + "epoch": 2.7008056640625e-06, + "model_forward_time": 0.024813175201416016, + "step": 1770 + }, + { + "epoch": 2.7008056640625e-06, + "step": 1770, + "training_step_time": 0.10941004753112793 + }, + { + "epoch": 2.70233154296875e-06, + "model_forward_time": 0.02537989616394043, + "step": 1771 + }, + { + "epoch": 2.70233154296875e-06, + "step": 1771, + "training_step_time": 0.10610604286193848 + }, + { + "epoch": 2.703857421875e-06, + "model_forward_time": 0.025090694427490234, + "step": 1772 + }, + { + "epoch": 2.703857421875e-06, + "step": 1772, + "training_step_time": 0.11089229583740234 + }, + { + "epoch": 2.70538330078125e-06, + "model_forward_time": 0.025800704956054688, + "step": 1773 + }, + { + "epoch": 2.70538330078125e-06, + "step": 1773, + "training_step_time": 0.11060070991516113 + }, + { + "epoch": 2.7069091796875e-06, + "model_forward_time": 0.025324344635009766, + "step": 1774 + }, + { + "epoch": 2.7069091796875e-06, + "step": 1774, + "training_step_time": 0.20423603057861328 + }, + { + "epoch": 2.70843505859375e-06, + "model_forward_time": 0.024809837341308594, + "step": 1775 + }, + { + "epoch": 2.70843505859375e-06, + "step": 1775, + "training_step_time": 0.20985841751098633 + }, + { + "epoch": 2.7099609375e-06, + "model_forward_time": 0.02463531494140625, + "step": 1776 + }, + { + "epoch": 2.7099609375e-06, + "step": 1776, + "training_step_time": 0.1501476764678955 + }, + { + "epoch": 2.71148681640625e-06, + "model_forward_time": 0.024366378784179688, + "step": 1777 + }, + { + "epoch": 2.71148681640625e-06, + "step": 1777, + "training_step_time": 0.21163129806518555 + }, + { + "epoch": 2.7130126953125e-06, + "model_forward_time": 0.024646997451782227, + "step": 1778 + }, + { + "epoch": 2.7130126953125e-06, + "step": 1778, + "training_step_time": 0.14583420753479004 + }, + { + "epoch": 2.71453857421875e-06, + "model_forward_time": 0.024041175842285156, + "step": 1779 + }, + { + "epoch": 2.71453857421875e-06, + "step": 1779, + "training_step_time": 0.10576987266540527 + }, + { + "epoch": 2.716064453125e-06, + "grad_norm": 0.6639909148216248, + "learning_rate": 9.997618604001829e-05, + "loss": 0.1776, + "step": 1780 + }, + { + "epoch": 2.716064453125e-06, + "model_forward_time": 0.025102853775024414, + "step": 1780 + }, + { + "epoch": 2.716064453125e-06, + "step": 1780, + "training_step_time": 0.11065435409545898 + }, + { + "epoch": 2.71759033203125e-06, + "model_forward_time": 0.025342226028442383, + "step": 1781 + }, + { + "epoch": 2.71759033203125e-06, + "step": 1781, + "training_step_time": 0.10585570335388184 + }, + { + "epoch": 2.7191162109375e-06, + "model_forward_time": 0.025157928466796875, + "step": 1782 + }, + { + "epoch": 2.7191162109375e-06, + "step": 1782, + "training_step_time": 0.2035233974456787 + }, + { + "epoch": 2.72064208984375e-06, + "model_forward_time": 0.02389669418334961, + "step": 1783 + }, + { + "epoch": 2.72064208984375e-06, + "step": 1783, + "training_step_time": 0.13607263565063477 + }, + { + "epoch": 2.72216796875e-06, + "model_forward_time": 0.02414226531982422, + "step": 1784 + }, + { + "epoch": 2.72216796875e-06, + "step": 1784, + "training_step_time": 0.11014604568481445 + }, + { + "epoch": 2.72369384765625e-06, + "model_forward_time": 0.025170564651489258, + "step": 1785 + }, + { + "epoch": 2.72369384765625e-06, + "step": 1785, + "training_step_time": 0.11640071868896484 + }, + { + "epoch": 2.7252197265625e-06, + "model_forward_time": 0.02529454231262207, + "step": 1786 + }, + { + "epoch": 2.7252197265625e-06, + "step": 1786, + "training_step_time": 0.1109473705291748 + }, + { + "epoch": 2.72674560546875e-06, + "model_forward_time": 0.02518916130065918, + "step": 1787 + }, + { + "epoch": 2.72674560546875e-06, + "step": 1787, + "training_step_time": 0.11364459991455078 + }, + { + "epoch": 2.728271484375e-06, + "model_forward_time": 0.026539087295532227, + "step": 1788 + }, + { + "epoch": 2.728271484375e-06, + "step": 1788, + "training_step_time": 0.2055072784423828 + }, + { + "epoch": 2.72979736328125e-06, + "model_forward_time": 0.024203062057495117, + "step": 1789 + }, + { + "epoch": 2.72979736328125e-06, + "step": 1789, + "training_step_time": 0.1112067699432373 + }, + { + "epoch": 2.7313232421875e-06, + "grad_norm": 1.1101709604263306, + "learning_rate": 9.997445481536973e-05, + "loss": 0.1352, + "step": 1790 + }, + { + "epoch": 2.7313232421875e-06, + "model_forward_time": 0.024616479873657227, + "step": 1790 + }, + { + "epoch": 2.7313232421875e-06, + "step": 1790, + "training_step_time": 0.10577082633972168 + }, + { + "epoch": 2.73284912109375e-06, + "model_forward_time": 0.025639057159423828, + "step": 1791 + }, + { + "epoch": 2.73284912109375e-06, + "step": 1791, + "training_step_time": 0.10722017288208008 + }, + { + "epoch": 2.734375e-06, + "model_forward_time": 0.025412321090698242, + "step": 1792 + }, + { + "epoch": 2.734375e-06, + "step": 1792, + "training_step_time": 0.106903076171875 + }, + { + "epoch": 2.73590087890625e-06, + "model_forward_time": 0.029206514358520508, + "step": 1793 + }, + { + "epoch": 2.73590087890625e-06, + "step": 1793, + "training_step_time": 0.12263107299804688 + }, + { + "epoch": 2.7374267578125e-06, + "model_forward_time": 0.025545597076416016, + "step": 1794 + }, + { + "epoch": 2.7374267578125e-06, + "step": 1794, + "training_step_time": 0.1107485294342041 + }, + { + "epoch": 2.73895263671875e-06, + "model_forward_time": 0.025102615356445312, + "step": 1795 + }, + { + "epoch": 2.73895263671875e-06, + "step": 1795, + "training_step_time": 0.1124734878540039 + }, + { + "epoch": 2.740478515625e-06, + "model_forward_time": 0.025334835052490234, + "step": 1796 + }, + { + "epoch": 2.740478515625e-06, + "step": 1796, + "training_step_time": 0.21331572532653809 + }, + { + "epoch": 2.74200439453125e-06, + "model_forward_time": 0.024575233459472656, + "step": 1797 + }, + { + "epoch": 2.74200439453125e-06, + "step": 1797, + "training_step_time": 0.11715364456176758 + }, + { + "epoch": 2.7435302734375e-06, + "model_forward_time": 0.02477884292602539, + "step": 1798 + }, + { + "epoch": 2.7435302734375e-06, + "step": 1798, + "training_step_time": 0.1045994758605957 + }, + { + "epoch": 2.74505615234375e-06, + "model_forward_time": 0.025434017181396484, + "step": 1799 + }, + { + "epoch": 2.74505615234375e-06, + "step": 1799, + "training_step_time": 0.10998272895812988 + }, + { + "epoch": 2.74658203125e-06, + "grad_norm": 0.6286399364471436, + "learning_rate": 9.997266286704631e-05, + "loss": 0.1496, + "step": 1800 + }, + { + "epoch": 2.74658203125e-06, + "model_forward_time": 0.02579498291015625, + "step": 1800 + }, + { + "epoch": 2.74658203125e-06, + "step": 1800, + "training_step_time": 0.12121963500976562 + }, + { + "epoch": 2.74810791015625e-06, + "model_forward_time": 0.02513718605041504, + "step": 1801 + }, + { + "epoch": 2.74810791015625e-06, + "step": 1801, + "training_step_time": 0.12234854698181152 + }, + { + "epoch": 2.7496337890625e-06, + "model_forward_time": 0.025505781173706055, + "step": 1802 + }, + { + "epoch": 2.7496337890625e-06, + "step": 1802, + "training_step_time": 0.12576866149902344 + }, + { + "epoch": 2.75115966796875e-06, + "model_forward_time": 0.025467872619628906, + "step": 1803 + }, + { + "epoch": 2.75115966796875e-06, + "step": 1803, + "training_step_time": 0.11745119094848633 + }, + { + "epoch": 2.752685546875e-06, + "model_forward_time": 0.024660110473632812, + "step": 1804 + }, + { + "epoch": 2.752685546875e-06, + "step": 1804, + "training_step_time": 0.11979126930236816 + }, + { + "epoch": 2.75421142578125e-06, + "model_forward_time": 0.02440190315246582, + "step": 1805 + }, + { + "epoch": 2.75421142578125e-06, + "step": 1805, + "training_step_time": 0.11834263801574707 + }, + { + "epoch": 2.7557373046875e-06, + "model_forward_time": 0.02427959442138672, + "step": 1806 + }, + { + "epoch": 2.7557373046875e-06, + "step": 1806, + "training_step_time": 0.11388421058654785 + }, + { + "epoch": 2.75726318359375e-06, + "model_forward_time": 0.02536487579345703, + "step": 1807 + }, + { + "epoch": 2.75726318359375e-06, + "step": 1807, + "training_step_time": 0.11600446701049805 + }, + { + "epoch": 2.7587890625e-06, + "model_forward_time": 0.025497913360595703, + "step": 1808 + }, + { + "epoch": 2.7587890625e-06, + "step": 1808, + "training_step_time": 0.11174941062927246 + }, + { + "epoch": 2.76031494140625e-06, + "model_forward_time": 0.024935007095336914, + "step": 1809 + }, + { + "epoch": 2.76031494140625e-06, + "step": 1809, + "training_step_time": 0.11052107810974121 + }, + { + "epoch": 2.7618408203125e-06, + "grad_norm": 1.036386489868164, + "learning_rate": 9.997081019722537e-05, + "loss": 0.1256, + "step": 1810 + }, + { + "epoch": 2.7618408203125e-06, + "model_forward_time": 0.02544116973876953, + "step": 1810 + }, + { + "epoch": 2.7618408203125e-06, + "step": 1810, + "training_step_time": 0.1088249683380127 + }, + { + "epoch": 2.76336669921875e-06, + "model_forward_time": 0.025402545928955078, + "step": 1811 + }, + { + "epoch": 2.76336669921875e-06, + "step": 1811, + "training_step_time": 0.10657811164855957 + }, + { + "epoch": 2.764892578125e-06, + "model_forward_time": 0.0251772403717041, + "step": 1812 + }, + { + "epoch": 2.764892578125e-06, + "step": 1812, + "training_step_time": 0.10655951499938965 + }, + { + "epoch": 2.76641845703125e-06, + "model_forward_time": 0.02485942840576172, + "step": 1813 + }, + { + "epoch": 2.76641845703125e-06, + "step": 1813, + "training_step_time": 0.10702252388000488 + }, + { + "epoch": 2.7679443359375e-06, + "model_forward_time": 0.02496790885925293, + "step": 1814 + }, + { + "epoch": 2.7679443359375e-06, + "step": 1814, + "training_step_time": 0.10612940788269043 + }, + { + "epoch": 2.76947021484375e-06, + "model_forward_time": 0.025246620178222656, + "step": 1815 + }, + { + "epoch": 2.76947021484375e-06, + "step": 1815, + "training_step_time": 0.10505461692810059 + }, + { + "epoch": 2.77099609375e-06, + "model_forward_time": 0.027647733688354492, + "step": 1816 + }, + { + "epoch": 2.77099609375e-06, + "step": 1816, + "training_step_time": 0.1110830307006836 + }, + { + "epoch": 2.77252197265625e-06, + "model_forward_time": 0.02538442611694336, + "step": 1817 + }, + { + "epoch": 2.77252197265625e-06, + "step": 1817, + "training_step_time": 0.11000871658325195 + }, + { + "epoch": 2.7740478515625e-06, + "model_forward_time": 0.025326013565063477, + "step": 1818 + }, + { + "epoch": 2.7740478515625e-06, + "step": 1818, + "training_step_time": 0.22469353675842285 + }, + { + "epoch": 2.77557373046875e-06, + "model_forward_time": 0.024481534957885742, + "step": 1819 + }, + { + "epoch": 2.77557373046875e-06, + "step": 1819, + "training_step_time": 0.11764669418334961 + }, + { + "epoch": 2.777099609375e-06, + "grad_norm": 0.8333061933517456, + "learning_rate": 9.99688968081581e-05, + "loss": 0.1471, + "step": 1820 + }, + { + "epoch": 2.777099609375e-06, + "model_forward_time": 0.024678945541381836, + "step": 1820 + }, + { + "epoch": 2.777099609375e-06, + "step": 1820, + "training_step_time": 0.12758731842041016 + }, + { + "epoch": 2.77862548828125e-06, + "model_forward_time": 0.025342464447021484, + "step": 1821 + }, + { + "epoch": 2.77862548828125e-06, + "step": 1821, + "training_step_time": 0.21818852424621582 + }, + { + "epoch": 2.7801513671875e-06, + "model_forward_time": 0.024321794509887695, + "step": 1822 + }, + { + "epoch": 2.7801513671875e-06, + "step": 1822, + "training_step_time": 0.16818833351135254 + }, + { + "epoch": 2.78167724609375e-06, + "model_forward_time": 0.024546384811401367, + "step": 1823 + }, + { + "epoch": 2.78167724609375e-06, + "step": 1823, + "training_step_time": 0.18592286109924316 + }, + { + "epoch": 2.783203125e-06, + "model_forward_time": 0.02414107322692871, + "step": 1824 + }, + { + "epoch": 2.783203125e-06, + "step": 1824, + "training_step_time": 0.10913395881652832 + }, + { + "epoch": 2.78472900390625e-06, + "model_forward_time": 0.024141788482666016, + "step": 1825 + }, + { + "epoch": 2.78472900390625e-06, + "step": 1825, + "training_step_time": 0.1048421859741211 + }, + { + "epoch": 2.7862548828125e-06, + "model_forward_time": 0.0249021053314209, + "step": 1826 + }, + { + "epoch": 2.7862548828125e-06, + "step": 1826, + "training_step_time": 0.11409425735473633 + }, + { + "epoch": 2.78778076171875e-06, + "model_forward_time": 0.025697708129882812, + "step": 1827 + }, + { + "epoch": 2.78778076171875e-06, + "step": 1827, + "training_step_time": 0.15686392784118652 + }, + { + "epoch": 2.789306640625e-06, + "model_forward_time": 0.02518486976623535, + "step": 1828 + }, + { + "epoch": 2.789306640625e-06, + "step": 1828, + "training_step_time": 0.13190722465515137 + }, + { + "epoch": 2.79083251953125e-06, + "model_forward_time": 0.024442434310913086, + "step": 1829 + }, + { + "epoch": 2.79083251953125e-06, + "step": 1829, + "training_step_time": 0.11804938316345215 + }, + { + "epoch": 2.7923583984375e-06, + "grad_norm": 1.2781480550765991, + "learning_rate": 9.996692270216947e-05, + "loss": 0.1507, + "step": 1830 + }, + { + "epoch": 2.7923583984375e-06, + "model_forward_time": 0.02542591094970703, + "step": 1830 + }, + { + "epoch": 2.7923583984375e-06, + "step": 1830, + "training_step_time": 0.11500358581542969 + }, + { + "epoch": 2.79388427734375e-06, + "model_forward_time": 0.025402307510375977, + "step": 1831 + }, + { + "epoch": 2.79388427734375e-06, + "step": 1831, + "training_step_time": 0.10614705085754395 + }, + { + "epoch": 2.79541015625e-06, + "model_forward_time": 0.025272130966186523, + "step": 1832 + }, + { + "epoch": 2.79541015625e-06, + "step": 1832, + "training_step_time": 0.10848045349121094 + }, + { + "epoch": 2.79693603515625e-06, + "model_forward_time": 0.02506709098815918, + "step": 1833 + }, + { + "epoch": 2.79693603515625e-06, + "step": 1833, + "training_step_time": 0.19932842254638672 + }, + { + "epoch": 2.7984619140625e-06, + "model_forward_time": 0.024361610412597656, + "step": 1834 + }, + { + "epoch": 2.7984619140625e-06, + "step": 1834, + "training_step_time": 0.10292339324951172 + }, + { + "epoch": 2.79998779296875e-06, + "model_forward_time": 0.024723529815673828, + "step": 1835 + }, + { + "epoch": 2.79998779296875e-06, + "step": 1835, + "training_step_time": 0.10399508476257324 + }, + { + "epoch": 2.801513671875e-06, + "model_forward_time": 0.026547908782958984, + "step": 1836 + }, + { + "epoch": 2.801513671875e-06, + "step": 1836, + "training_step_time": 0.11370658874511719 + }, + { + "epoch": 2.80303955078125e-06, + "model_forward_time": 0.02829742431640625, + "step": 1837 + }, + { + "epoch": 2.80303955078125e-06, + "step": 1837, + "training_step_time": 0.21724414825439453 + }, + { + "epoch": 2.8045654296875e-06, + "model_forward_time": 0.024499177932739258, + "step": 1838 + }, + { + "epoch": 2.8045654296875e-06, + "step": 1838, + "training_step_time": 0.11639022827148438 + }, + { + "epoch": 2.80609130859375e-06, + "model_forward_time": 0.025053739547729492, + "step": 1839 + }, + { + "epoch": 2.80609130859375e-06, + "step": 1839, + "training_step_time": 0.1108248233795166 + }, + { + "epoch": 2.8076171875e-06, + "grad_norm": 1.1419516801834106, + "learning_rate": 9.996488788165816e-05, + "loss": 0.1685, + "step": 1840 + }, + { + "epoch": 2.8076171875e-06, + "model_forward_time": 0.025682449340820312, + "step": 1840 + }, + { + "epoch": 2.8076171875e-06, + "step": 1840, + "training_step_time": 0.21270537376403809 + }, + { + "epoch": 2.80914306640625e-06, + "model_forward_time": 0.024951696395874023, + "step": 1841 + }, + { + "epoch": 2.80914306640625e-06, + "step": 1841, + "training_step_time": 0.1273365020751953 + }, + { + "epoch": 2.8106689453125e-06, + "model_forward_time": 0.024540424346923828, + "step": 1842 + }, + { + "epoch": 2.8106689453125e-06, + "step": 1842, + "training_step_time": 0.10443735122680664 + }, + { + "epoch": 2.81219482421875e-06, + "model_forward_time": 0.025720596313476562, + "step": 1843 + }, + { + "epoch": 2.81219482421875e-06, + "step": 1843, + "training_step_time": 0.11735987663269043 + }, + { + "epoch": 2.813720703125e-06, + "model_forward_time": 0.02510237693786621, + "step": 1844 + }, + { + "epoch": 2.813720703125e-06, + "step": 1844, + "training_step_time": 0.10677528381347656 + }, + { + "epoch": 2.81524658203125e-06, + "model_forward_time": 0.025218725204467773, + "step": 1845 + }, + { + "epoch": 2.81524658203125e-06, + "step": 1845, + "training_step_time": 0.11403369903564453 + }, + { + "epoch": 2.8167724609375e-06, + "model_forward_time": 0.024869918823242188, + "step": 1846 + }, + { + "epoch": 2.8167724609375e-06, + "step": 1846, + "training_step_time": 0.10435366630554199 + }, + { + "epoch": 2.81829833984375e-06, + "model_forward_time": 0.025243043899536133, + "step": 1847 + }, + { + "epoch": 2.81829833984375e-06, + "step": 1847, + "training_step_time": 0.10698270797729492 + }, + { + "epoch": 2.81982421875e-06, + "model_forward_time": 0.025437116622924805, + "step": 1848 + }, + { + "epoch": 2.81982421875e-06, + "step": 1848, + "training_step_time": 0.10965323448181152 + }, + { + "epoch": 2.82135009765625e-06, + "model_forward_time": 0.02542877197265625, + "step": 1849 + }, + { + "epoch": 2.82135009765625e-06, + "step": 1849, + "training_step_time": 0.10606908798217773 + }, + { + "epoch": 2.8228759765625e-06, + "grad_norm": 1.2174485921859741, + "learning_rate": 9.996279234909671e-05, + "loss": 0.1748, + "step": 1850 + }, + { + "epoch": 2.8228759765625e-06, + "model_forward_time": 0.025156021118164062, + "step": 1850 + }, + { + "epoch": 2.8228759765625e-06, + "step": 1850, + "training_step_time": 0.11067533493041992 + }, + { + "epoch": 2.82440185546875e-06, + "model_forward_time": 0.02543497085571289, + "step": 1851 + }, + { + "epoch": 2.82440185546875e-06, + "step": 1851, + "training_step_time": 0.10841655731201172 + }, + { + "epoch": 2.825927734375e-06, + "model_forward_time": 0.02602386474609375, + "step": 1852 + }, + { + "epoch": 2.825927734375e-06, + "step": 1852, + "training_step_time": 0.14484000205993652 + }, + { + "epoch": 2.82745361328125e-06, + "model_forward_time": 0.02520275115966797, + "step": 1853 + }, + { + "epoch": 2.82745361328125e-06, + "step": 1853, + "training_step_time": 0.1279306411743164 + }, + { + "epoch": 2.8289794921875e-06, + "model_forward_time": 0.02490520477294922, + "step": 1854 + }, + { + "epoch": 2.8289794921875e-06, + "step": 1854, + "training_step_time": 0.1275637149810791 + }, + { + "epoch": 2.83050537109375e-06, + "model_forward_time": 0.02480936050415039, + "step": 1855 + }, + { + "epoch": 2.83050537109375e-06, + "step": 1855, + "training_step_time": 0.12048530578613281 + }, + { + "epoch": 2.83203125e-06, + "model_forward_time": 0.024746179580688477, + "step": 1856 + }, + { + "epoch": 2.83203125e-06, + "step": 1856, + "training_step_time": 0.11751151084899902 + }, + { + "epoch": 2.83355712890625e-06, + "model_forward_time": 0.024985074996948242, + "step": 1857 + }, + { + "epoch": 2.83355712890625e-06, + "step": 1857, + "training_step_time": 0.11470794677734375 + }, + { + "epoch": 2.8350830078125e-06, + "model_forward_time": 0.025122404098510742, + "step": 1858 + }, + { + "epoch": 2.8350830078125e-06, + "step": 1858, + "training_step_time": 0.11133146286010742 + }, + { + "epoch": 2.83660888671875e-06, + "model_forward_time": 0.025282859802246094, + "step": 1859 + }, + { + "epoch": 2.83660888671875e-06, + "step": 1859, + "training_step_time": 0.11348962783813477 + }, + { + "epoch": 2.838134765625e-06, + "grad_norm": 0.8413699269294739, + "learning_rate": 9.996063610703137e-05, + "loss": 0.166, + "step": 1860 + }, + { + "epoch": 2.838134765625e-06, + "model_forward_time": 0.02525639533996582, + "step": 1860 + }, + { + "epoch": 2.838134765625e-06, + "step": 1860, + "training_step_time": 0.11232233047485352 + }, + { + "epoch": 2.83966064453125e-06, + "model_forward_time": 0.025519609451293945, + "step": 1861 + }, + { + "epoch": 2.83966064453125e-06, + "step": 1861, + "training_step_time": 0.10790729522705078 + }, + { + "epoch": 2.8411865234375e-06, + "model_forward_time": 0.025049209594726562, + "step": 1862 + }, + { + "epoch": 2.8411865234375e-06, + "step": 1862, + "training_step_time": 0.15474772453308105 + }, + { + "epoch": 2.84271240234375e-06, + "model_forward_time": 0.025496244430541992, + "step": 1863 + }, + { + "epoch": 2.84271240234375e-06, + "step": 1863, + "training_step_time": 0.1703324317932129 + }, + { + "epoch": 2.84423828125e-06, + "model_forward_time": 0.023806095123291016, + "step": 1864 + }, + { + "epoch": 2.84423828125e-06, + "step": 1864, + "training_step_time": 0.17456531524658203 + }, + { + "epoch": 2.84576416015625e-06, + "model_forward_time": 0.024547576904296875, + "step": 1865 + }, + { + "epoch": 2.84576416015625e-06, + "step": 1865, + "training_step_time": 0.16968345642089844 + }, + { + "epoch": 2.8472900390625e-06, + "model_forward_time": 0.025000810623168945, + "step": 1866 + }, + { + "epoch": 2.8472900390625e-06, + "step": 1866, + "training_step_time": 0.15189409255981445 + }, + { + "epoch": 2.84881591796875e-06, + "model_forward_time": 0.024269580841064453, + "step": 1867 + }, + { + "epoch": 2.84881591796875e-06, + "step": 1867, + "training_step_time": 0.11424612998962402 + }, + { + "epoch": 2.850341796875e-06, + "model_forward_time": 0.024725675582885742, + "step": 1868 + }, + { + "epoch": 2.850341796875e-06, + "step": 1868, + "training_step_time": 0.1562044620513916 + }, + { + "epoch": 2.85186767578125e-06, + "model_forward_time": 0.024988412857055664, + "step": 1869 + }, + { + "epoch": 2.85186767578125e-06, + "step": 1869, + "training_step_time": 0.10584068298339844 + }, + { + "epoch": 2.8533935546875e-06, + "grad_norm": 1.2796015739440918, + "learning_rate": 9.995841915808218e-05, + "loss": 0.1571, + "step": 1870 + }, + { + "epoch": 2.8533935546875e-06, + "model_forward_time": 0.024498939514160156, + "step": 1870 + }, + { + "epoch": 2.8533935546875e-06, + "step": 1870, + "training_step_time": 0.10613131523132324 + }, + { + "epoch": 2.85491943359375e-06, + "model_forward_time": 0.025120258331298828, + "step": 1871 + }, + { + "epoch": 2.85491943359375e-06, + "step": 1871, + "training_step_time": 0.1054072380065918 + }, + { + "epoch": 2.8564453125e-06, + "model_forward_time": 0.025531530380249023, + "step": 1872 + }, + { + "epoch": 2.8564453125e-06, + "step": 1872, + "training_step_time": 0.10857462882995605 + }, + { + "epoch": 2.85797119140625e-06, + "model_forward_time": 0.02545785903930664, + "step": 1873 + }, + { + "epoch": 2.85797119140625e-06, + "step": 1873, + "training_step_time": 0.13704848289489746 + }, + { + "epoch": 2.8594970703125e-06, + "model_forward_time": 0.025249481201171875, + "step": 1874 + }, + { + "epoch": 2.8594970703125e-06, + "step": 1874, + "training_step_time": 0.11295819282531738 + }, + { + "epoch": 2.86102294921875e-06, + "model_forward_time": 0.02603602409362793, + "step": 1875 + }, + { + "epoch": 2.86102294921875e-06, + "step": 1875, + "training_step_time": 0.1112053394317627 + }, + { + "epoch": 2.862548828125e-06, + "model_forward_time": 0.025376319885253906, + "step": 1876 + }, + { + "epoch": 2.862548828125e-06, + "step": 1876, + "training_step_time": 0.11547994613647461 + }, + { + "epoch": 2.86407470703125e-06, + "model_forward_time": 0.02539801597595215, + "step": 1877 + }, + { + "epoch": 2.86407470703125e-06, + "step": 1877, + "training_step_time": 0.1099696159362793 + }, + { + "epoch": 2.8656005859375e-06, + "model_forward_time": 0.025517702102661133, + "step": 1878 + }, + { + "epoch": 2.8656005859375e-06, + "step": 1878, + "training_step_time": 0.1985776424407959 + }, + { + "epoch": 2.86712646484375e-06, + "model_forward_time": 0.024854421615600586, + "step": 1879 + }, + { + "epoch": 2.86712646484375e-06, + "step": 1879, + "training_step_time": 0.10831761360168457 + }, + { + "epoch": 2.86865234375e-06, + "grad_norm": 1.016643762588501, + "learning_rate": 9.995614150494293e-05, + "loss": 0.1497, + "step": 1880 + }, + { + "epoch": 2.86865234375e-06, + "model_forward_time": 0.02571725845336914, + "step": 1880 + }, + { + "epoch": 2.86865234375e-06, + "step": 1880, + "training_step_time": 0.10736393928527832 + }, + { + "epoch": 2.87017822265625e-06, + "model_forward_time": 0.02530360221862793, + "step": 1881 + }, + { + "epoch": 2.87017822265625e-06, + "step": 1881, + "training_step_time": 0.11052322387695312 + }, + { + "epoch": 2.8717041015625e-06, + "model_forward_time": 0.02534008026123047, + "step": 1882 + }, + { + "epoch": 2.8717041015625e-06, + "step": 1882, + "training_step_time": 0.12479472160339355 + }, + { + "epoch": 2.87322998046875e-06, + "model_forward_time": 0.025632143020629883, + "step": 1883 + }, + { + "epoch": 2.87322998046875e-06, + "step": 1883, + "training_step_time": 0.1278977394104004 + }, + { + "epoch": 2.874755859375e-06, + "model_forward_time": 0.02551126480102539, + "step": 1884 + }, + { + "epoch": 2.874755859375e-06, + "step": 1884, + "training_step_time": 0.18056058883666992 + }, + { + "epoch": 2.87628173828125e-06, + "model_forward_time": 0.025063514709472656, + "step": 1885 + }, + { + "epoch": 2.87628173828125e-06, + "step": 1885, + "training_step_time": 0.13353562355041504 + }, + { + "epoch": 2.8778076171875e-06, + "model_forward_time": 0.025174856185913086, + "step": 1886 + }, + { + "epoch": 2.8778076171875e-06, + "step": 1886, + "training_step_time": 0.11635804176330566 + }, + { + "epoch": 2.87933349609375e-06, + "model_forward_time": 0.025496959686279297, + "step": 1887 + }, + { + "epoch": 2.87933349609375e-06, + "step": 1887, + "training_step_time": 0.11378908157348633 + }, + { + "epoch": 2.880859375e-06, + "model_forward_time": 0.025817394256591797, + "step": 1888 + }, + { + "epoch": 2.880859375e-06, + "step": 1888, + "training_step_time": 0.11269330978393555 + }, + { + "epoch": 2.88238525390625e-06, + "model_forward_time": 0.025176525115966797, + "step": 1889 + }, + { + "epoch": 2.88238525390625e-06, + "step": 1889, + "training_step_time": 0.10948562622070312 + }, + { + "epoch": 2.8839111328125e-06, + "grad_norm": 0.7047964930534363, + "learning_rate": 9.995380315038119e-05, + "loss": 0.1435, + "step": 1890 + }, + { + "epoch": 2.8839111328125e-06, + "model_forward_time": 0.024848222732543945, + "step": 1890 + }, + { + "epoch": 2.8839111328125e-06, + "step": 1890, + "training_step_time": 0.10723495483398438 + }, + { + "epoch": 2.88543701171875e-06, + "model_forward_time": 0.02545952796936035, + "step": 1891 + }, + { + "epoch": 2.88543701171875e-06, + "step": 1891, + "training_step_time": 0.10792994499206543 + }, + { + "epoch": 2.886962890625e-06, + "model_forward_time": 0.025435686111450195, + "step": 1892 + }, + { + "epoch": 2.886962890625e-06, + "step": 1892, + "training_step_time": 0.10947227478027344 + }, + { + "epoch": 2.88848876953125e-06, + "model_forward_time": 0.025304079055786133, + "step": 1893 + }, + { + "epoch": 2.88848876953125e-06, + "step": 1893, + "training_step_time": 0.10916423797607422 + }, + { + "epoch": 2.8900146484375e-06, + "model_forward_time": 0.02515244483947754, + "step": 1894 + }, + { + "epoch": 2.8900146484375e-06, + "step": 1894, + "training_step_time": 0.1080162525177002 + }, + { + "epoch": 2.89154052734375e-06, + "model_forward_time": 0.02533698081970215, + "step": 1895 + }, + { + "epoch": 2.89154052734375e-06, + "step": 1895, + "training_step_time": 0.10825538635253906 + }, + { + "epoch": 2.89306640625e-06, + "model_forward_time": 0.025634050369262695, + "step": 1896 + }, + { + "epoch": 2.89306640625e-06, + "step": 1896, + "training_step_time": 0.10918927192687988 + }, + { + "epoch": 2.89459228515625e-06, + "model_forward_time": 0.02533268928527832, + "step": 1897 + }, + { + "epoch": 2.89459228515625e-06, + "step": 1897, + "training_step_time": 0.10771608352661133 + }, + { + "epoch": 2.8961181640625e-06, + "model_forward_time": 0.02513289451599121, + "step": 1898 + }, + { + "epoch": 2.8961181640625e-06, + "step": 1898, + "training_step_time": 0.1048574447631836 + }, + { + "epoch": 2.89764404296875e-06, + "model_forward_time": 0.025398731231689453, + "step": 1899 + }, + { + "epoch": 2.89764404296875e-06, + "step": 1899, + "training_step_time": 0.10567426681518555 + }, + { + "epoch": 2.899169921875e-06, + "grad_norm": 0.7637397646903992, + "learning_rate": 9.99514040972383e-05, + "loss": 0.1593, + "step": 1900 + }, + { + "epoch": 2.899169921875e-06, + "model_forward_time": 0.025198698043823242, + "step": 1900 + }, + { + "epoch": 2.899169921875e-06, + "step": 1900, + "training_step_time": 0.10682225227355957 + }, + { + "epoch": 2.90069580078125e-06, + "model_forward_time": 0.02555227279663086, + "step": 1901 + }, + { + "epoch": 2.90069580078125e-06, + "step": 1901, + "training_step_time": 0.10581541061401367 + }, + { + "epoch": 2.9022216796875e-06, + "model_forward_time": 0.02541065216064453, + "step": 1902 + }, + { + "epoch": 2.9022216796875e-06, + "step": 1902, + "training_step_time": 0.10606122016906738 + }, + { + "epoch": 2.90374755859375e-06, + "model_forward_time": 0.025368928909301758, + "step": 1903 + }, + { + "epoch": 2.90374755859375e-06, + "step": 1903, + "training_step_time": 0.1044607162475586 + }, + { + "epoch": 2.9052734375e-06, + "model_forward_time": 0.026404142379760742, + "step": 1904 + }, + { + "epoch": 2.9052734375e-06, + "step": 1904, + "training_step_time": 0.10947704315185547 + }, + { + "epoch": 2.90679931640625e-06, + "model_forward_time": 0.0252230167388916, + "step": 1905 + }, + { + "epoch": 2.90679931640625e-06, + "step": 1905, + "training_step_time": 0.11258840560913086 + }, + { + "epoch": 2.9083251953125e-06, + "model_forward_time": 0.0250396728515625, + "step": 1906 + }, + { + "epoch": 2.9083251953125e-06, + "step": 1906, + "training_step_time": 0.10487484931945801 + }, + { + "epoch": 2.90985107421875e-06, + "model_forward_time": 0.025368690490722656, + "step": 1907 + }, + { + "epoch": 2.90985107421875e-06, + "step": 1907, + "training_step_time": 0.10548019409179688 + }, + { + "epoch": 2.911376953125e-06, + "model_forward_time": 0.02552175521850586, + "step": 1908 + }, + { + "epoch": 2.911376953125e-06, + "step": 1908, + "training_step_time": 0.18558001518249512 + }, + { + "epoch": 2.91290283203125e-06, + "model_forward_time": 0.024863243103027344, + "step": 1909 + }, + { + "epoch": 2.91290283203125e-06, + "step": 1909, + "training_step_time": 0.16714763641357422 + }, + { + "epoch": 2.9144287109375e-06, + "grad_norm": 1.1091893911361694, + "learning_rate": 9.994894434842932e-05, + "loss": 0.1622, + "step": 1910 + }, + { + "epoch": 2.9144287109375e-06, + "model_forward_time": 0.024555683135986328, + "step": 1910 + }, + { + "epoch": 2.9144287109375e-06, + "step": 1910, + "training_step_time": 0.16541171073913574 + }, + { + "epoch": 2.91595458984375e-06, + "model_forward_time": 0.024396896362304688, + "step": 1911 + }, + { + "epoch": 2.91595458984375e-06, + "step": 1911, + "training_step_time": 0.1871027946472168 + }, + { + "epoch": 2.91748046875e-06, + "model_forward_time": 0.024492979049682617, + "step": 1912 + }, + { + "epoch": 2.91748046875e-06, + "step": 1912, + "training_step_time": 0.12774276733398438 + }, + { + "epoch": 2.91900634765625e-06, + "model_forward_time": 0.024938344955444336, + "step": 1913 + }, + { + "epoch": 2.91900634765625e-06, + "step": 1913, + "training_step_time": 0.10907268524169922 + }, + { + "epoch": 2.9205322265625e-06, + "model_forward_time": 0.025458097457885742, + "step": 1914 + }, + { + "epoch": 2.9205322265625e-06, + "step": 1914, + "training_step_time": 0.12136673927307129 + }, + { + "epoch": 2.92205810546875e-06, + "model_forward_time": 0.026766538619995117, + "step": 1915 + }, + { + "epoch": 2.92205810546875e-06, + "step": 1915, + "training_step_time": 0.10941743850708008 + }, + { + "epoch": 2.923583984375e-06, + "model_forward_time": 0.02539205551147461, + "step": 1916 + }, + { + "epoch": 2.923583984375e-06, + "step": 1916, + "training_step_time": 0.10824155807495117 + }, + { + "epoch": 2.92510986328125e-06, + "model_forward_time": 0.025044918060302734, + "step": 1917 + }, + { + "epoch": 2.92510986328125e-06, + "step": 1917, + "training_step_time": 0.10641813278198242 + }, + { + "epoch": 2.9266357421875e-06, + "model_forward_time": 0.026980161666870117, + "step": 1918 + }, + { + "epoch": 2.9266357421875e-06, + "step": 1918, + "training_step_time": 0.1630873680114746 + }, + { + "epoch": 2.92816162109375e-06, + "model_forward_time": 0.02554178237915039, + "step": 1919 + }, + { + "epoch": 2.92816162109375e-06, + "step": 1919, + "training_step_time": 0.13785362243652344 + }, + { + "epoch": 2.9296875e-06, + "grad_norm": 1.265679955482483, + "learning_rate": 9.994642390694308e-05, + "loss": 0.1643, + "step": 1920 + }, + { + "epoch": 2.9296875e-06, + "model_forward_time": 0.02440476417541504, + "step": 1920 + }, + { + "epoch": 2.9296875e-06, + "step": 1920, + "training_step_time": 0.11036562919616699 + }, + { + "epoch": 2.93121337890625e-06, + "model_forward_time": 0.025008440017700195, + "step": 1921 + }, + { + "epoch": 2.93121337890625e-06, + "step": 1921, + "training_step_time": 0.11149191856384277 + }, + { + "epoch": 2.9327392578125e-06, + "model_forward_time": 0.026048898696899414, + "step": 1922 + }, + { + "epoch": 2.9327392578125e-06, + "step": 1922, + "training_step_time": 0.11435818672180176 + }, + { + "epoch": 2.93426513671875e-06, + "model_forward_time": 0.025460243225097656, + "step": 1923 + }, + { + "epoch": 2.93426513671875e-06, + "step": 1923, + "training_step_time": 0.11545991897583008 + }, + { + "epoch": 2.935791015625e-06, + "model_forward_time": 0.02558279037475586, + "step": 1924 + }, + { + "epoch": 2.935791015625e-06, + "step": 1924, + "training_step_time": 0.19411754608154297 + }, + { + "epoch": 2.93731689453125e-06, + "model_forward_time": 0.02492499351501465, + "step": 1925 + }, + { + "epoch": 2.93731689453125e-06, + "step": 1925, + "training_step_time": 0.11108016967773438 + }, + { + "epoch": 2.9388427734375e-06, + "model_forward_time": 0.025048255920410156, + "step": 1926 + }, + { + "epoch": 2.9388427734375e-06, + "step": 1926, + "training_step_time": 0.10763287544250488 + }, + { + "epoch": 2.94036865234375e-06, + "model_forward_time": 0.02613210678100586, + "step": 1927 + }, + { + "epoch": 2.94036865234375e-06, + "step": 1927, + "training_step_time": 0.11142778396606445 + }, + { + "epoch": 2.94189453125e-06, + "model_forward_time": 0.025040626525878906, + "step": 1928 + }, + { + "epoch": 2.94189453125e-06, + "step": 1928, + "training_step_time": 0.10770320892333984 + }, + { + "epoch": 2.94342041015625e-06, + "model_forward_time": 0.025653839111328125, + "step": 1929 + }, + { + "epoch": 2.94342041015625e-06, + "step": 1929, + "training_step_time": 0.10874056816101074 + }, + { + "epoch": 2.9449462890625e-06, + "grad_norm": 0.7586075663566589, + "learning_rate": 9.994384277584214e-05, + "loss": 0.1409, + "step": 1930 + }, + { + "epoch": 2.9449462890625e-06, + "model_forward_time": 0.02581930160522461, + "step": 1930 + }, + { + "epoch": 2.9449462890625e-06, + "step": 1930, + "training_step_time": 0.11140680313110352 + }, + { + "epoch": 2.94647216796875e-06, + "model_forward_time": 0.02597332000732422, + "step": 1931 + }, + { + "epoch": 2.94647216796875e-06, + "step": 1931, + "training_step_time": 0.1695399284362793 + }, + { + "epoch": 2.947998046875e-06, + "model_forward_time": 0.025102853775024414, + "step": 1932 + }, + { + "epoch": 2.947998046875e-06, + "step": 1932, + "training_step_time": 0.17125248908996582 + }, + { + "epoch": 2.94952392578125e-06, + "model_forward_time": 0.024445056915283203, + "step": 1933 + }, + { + "epoch": 2.94952392578125e-06, + "step": 1933, + "training_step_time": 0.10594320297241211 + }, + { + "epoch": 2.9510498046875e-06, + "model_forward_time": 0.024946928024291992, + "step": 1934 + }, + { + "epoch": 2.9510498046875e-06, + "step": 1934, + "training_step_time": 0.10984253883361816 + }, + { + "epoch": 2.95257568359375e-06, + "model_forward_time": 0.02544093132019043, + "step": 1935 + }, + { + "epoch": 2.95257568359375e-06, + "step": 1935, + "training_step_time": 0.10640692710876465 + }, + { + "epoch": 2.9541015625e-06, + "model_forward_time": 0.025753021240234375, + "step": 1936 + }, + { + "epoch": 2.9541015625e-06, + "step": 1936, + "training_step_time": 0.10672688484191895 + }, + { + "epoch": 2.95562744140625e-06, + "model_forward_time": 0.025441884994506836, + "step": 1937 + }, + { + "epoch": 2.95562744140625e-06, + "step": 1937, + "training_step_time": 0.10877275466918945 + }, + { + "epoch": 2.9571533203125e-06, + "model_forward_time": 0.025537729263305664, + "step": 1938 + }, + { + "epoch": 2.9571533203125e-06, + "step": 1938, + "training_step_time": 0.11223125457763672 + }, + { + "epoch": 2.95867919921875e-06, + "model_forward_time": 0.025737762451171875, + "step": 1939 + }, + { + "epoch": 2.95867919921875e-06, + "step": 1939, + "training_step_time": 0.10566854476928711 + }, + { + "epoch": 2.960205078125e-06, + "grad_norm": 1.0724173784255981, + "learning_rate": 9.994120095826285e-05, + "loss": 0.1453, + "step": 1940 + }, + { + "epoch": 2.960205078125e-06, + "model_forward_time": 0.02599310874938965, + "step": 1940 + }, + { + "epoch": 2.960205078125e-06, + "step": 1940, + "training_step_time": 0.10955691337585449 + }, + { + "epoch": 2.96173095703125e-06, + "model_forward_time": 0.025438308715820312, + "step": 1941 + }, + { + "epoch": 2.96173095703125e-06, + "step": 1941, + "training_step_time": 0.11724257469177246 + }, + { + "epoch": 2.9632568359375e-06, + "model_forward_time": 0.02529740333557129, + "step": 1942 + }, + { + "epoch": 2.9632568359375e-06, + "step": 1942, + "training_step_time": 0.1090691089630127 + }, + { + "epoch": 2.96478271484375e-06, + "model_forward_time": 0.025452136993408203, + "step": 1943 + }, + { + "epoch": 2.96478271484375e-06, + "step": 1943, + "training_step_time": 0.11310744285583496 + }, + { + "epoch": 2.96630859375e-06, + "model_forward_time": 0.02564859390258789, + "step": 1944 + }, + { + "epoch": 2.96630859375e-06, + "step": 1944, + "training_step_time": 0.11342811584472656 + }, + { + "epoch": 2.96783447265625e-06, + "model_forward_time": 0.025187015533447266, + "step": 1945 + }, + { + "epoch": 2.96783447265625e-06, + "step": 1945, + "training_step_time": 0.11098694801330566 + }, + { + "epoch": 2.9693603515625e-06, + "model_forward_time": 0.02458786964416504, + "step": 1946 + }, + { + "epoch": 2.9693603515625e-06, + "step": 1946, + "training_step_time": 0.11322331428527832 + }, + { + "epoch": 2.97088623046875e-06, + "model_forward_time": 0.025740861892700195, + "step": 1947 + }, + { + "epoch": 2.97088623046875e-06, + "step": 1947, + "training_step_time": 0.11025476455688477 + }, + { + "epoch": 2.972412109375e-06, + "model_forward_time": 0.026183128356933594, + "step": 1948 + }, + { + "epoch": 2.972412109375e-06, + "step": 1948, + "training_step_time": 0.10956573486328125 + }, + { + "epoch": 2.97393798828125e-06, + "model_forward_time": 0.029927492141723633, + "step": 1949 + }, + { + "epoch": 2.97393798828125e-06, + "step": 1949, + "training_step_time": 0.11331057548522949 + }, + { + "epoch": 2.9754638671875e-06, + "grad_norm": 0.4724397659301758, + "learning_rate": 9.993849845741524e-05, + "loss": 0.1095, + "step": 1950 + }, + { + "epoch": 2.9754638671875e-06, + "model_forward_time": 0.02519536018371582, + "step": 1950 + }, + { + "epoch": 2.9754638671875e-06, + "step": 1950, + "training_step_time": 0.1093137264251709 + }, + { + "epoch": 2.97698974609375e-06, + "model_forward_time": 0.025475025177001953, + "step": 1951 + }, + { + "epoch": 2.97698974609375e-06, + "step": 1951, + "training_step_time": 0.10733890533447266 + }, + { + "epoch": 2.978515625e-06, + "model_forward_time": 0.025781869888305664, + "step": 1952 + }, + { + "epoch": 2.978515625e-06, + "step": 1952, + "training_step_time": 0.11156868934631348 + }, + { + "epoch": 2.98004150390625e-06, + "model_forward_time": 0.025430679321289062, + "step": 1953 + }, + { + "epoch": 2.98004150390625e-06, + "step": 1953, + "training_step_time": 0.10851716995239258 + }, + { + "epoch": 2.9815673828125e-06, + "model_forward_time": 0.025382518768310547, + "step": 1954 + }, + { + "epoch": 2.9815673828125e-06, + "step": 1954, + "training_step_time": 0.11156392097473145 + }, + { + "epoch": 2.98309326171875e-06, + "model_forward_time": 0.025736570358276367, + "step": 1955 + }, + { + "epoch": 2.98309326171875e-06, + "step": 1955, + "training_step_time": 0.11125874519348145 + }, + { + "epoch": 2.984619140625e-06, + "model_forward_time": 0.028539419174194336, + "step": 1956 + }, + { + "epoch": 2.984619140625e-06, + "step": 1956, + "training_step_time": 0.12648630142211914 + }, + { + "epoch": 2.98614501953125e-06, + "model_forward_time": 0.02822256088256836, + "step": 1957 + }, + { + "epoch": 2.98614501953125e-06, + "step": 1957, + "training_step_time": 0.14502239227294922 + }, + { + "epoch": 2.9876708984375e-06, + "model_forward_time": 0.025331735610961914, + "step": 1958 + }, + { + "epoch": 2.9876708984375e-06, + "step": 1958, + "training_step_time": 0.18198585510253906 + }, + { + "epoch": 2.98919677734375e-06, + "model_forward_time": 0.024846315383911133, + "step": 1959 + }, + { + "epoch": 2.98919677734375e-06, + "step": 1959, + "training_step_time": 0.1720259189605713 + }, + { + "epoch": 2.99072265625e-06, + "grad_norm": 0.6855494379997253, + "learning_rate": 9.99357352765831e-05, + "loss": 0.1564, + "step": 1960 + }, + { + "epoch": 2.99072265625e-06, + "model_forward_time": 0.02470874786376953, + "step": 1960 + }, + { + "epoch": 2.99072265625e-06, + "step": 1960, + "training_step_time": 0.18732357025146484 + }, + { + "epoch": 2.99224853515625e-06, + "model_forward_time": 0.024956226348876953, + "step": 1961 + }, + { + "epoch": 2.99224853515625e-06, + "step": 1961, + "training_step_time": 0.1044917106628418 + }, + { + "epoch": 2.9937744140625e-06, + "model_forward_time": 0.025068283081054688, + "step": 1962 + }, + { + "epoch": 2.9937744140625e-06, + "step": 1962, + "training_step_time": 0.10672950744628906 + }, + { + "epoch": 2.99530029296875e-06, + "model_forward_time": 0.026046276092529297, + "step": 1963 + }, + { + "epoch": 2.99530029296875e-06, + "step": 1963, + "training_step_time": 0.10952329635620117 + }, + { + "epoch": 2.996826171875e-06, + "model_forward_time": 0.025975704193115234, + "step": 1964 + }, + { + "epoch": 2.996826171875e-06, + "step": 1964, + "training_step_time": 0.16299128532409668 + }, + { + "epoch": 2.99835205078125e-06, + "model_forward_time": 0.025108814239501953, + "step": 1965 + }, + { + "epoch": 2.99835205078125e-06, + "step": 1965, + "training_step_time": 0.13865208625793457 + }, + { + "epoch": 2.9998779296875e-06, + "model_forward_time": 0.02524089813232422, + "step": 1966 + }, + { + "epoch": 2.9998779296875e-06, + "step": 1966, + "training_step_time": 0.10971307754516602 + }, + { + "epoch": 3.00140380859375e-06, + "model_forward_time": 0.02578449249267578, + "step": 1967 + }, + { + "epoch": 3.00140380859375e-06, + "step": 1967, + "training_step_time": 0.11868000030517578 + }, + { + "epoch": 3.0029296875e-06, + "model_forward_time": 0.02603626251220703, + "step": 1968 + }, + { + "epoch": 3.0029296875e-06, + "step": 1968, + "training_step_time": 0.11072802543640137 + }, + { + "epoch": 3.00445556640625e-06, + "model_forward_time": 0.02520751953125, + "step": 1969 + }, + { + "epoch": 3.00445556640625e-06, + "step": 1969, + "training_step_time": 0.10694336891174316 + }, + { + "epoch": 3.0059814453125e-06, + "grad_norm": 0.6952374577522278, + "learning_rate": 9.9932911419124e-05, + "loss": 0.1304, + "step": 1970 + }, + { + "epoch": 3.0059814453125e-06, + "model_forward_time": 0.02581501007080078, + "step": 1970 + }, + { + "epoch": 3.0059814453125e-06, + "step": 1970, + "training_step_time": 0.20280909538269043 + }, + { + "epoch": 3.00750732421875e-06, + "model_forward_time": 0.024640560150146484, + "step": 1971 + }, + { + "epoch": 3.00750732421875e-06, + "step": 1971, + "training_step_time": 0.10941433906555176 + }, + { + "epoch": 3.009033203125e-06, + "model_forward_time": 0.02518606185913086, + "step": 1972 + }, + { + "epoch": 3.009033203125e-06, + "step": 1972, + "training_step_time": 0.10547113418579102 + }, + { + "epoch": 3.01055908203125e-06, + "model_forward_time": 0.02469325065612793, + "step": 1973 + }, + { + "epoch": 3.01055908203125e-06, + "step": 1973, + "training_step_time": 0.10996365547180176 + }, + { + "epoch": 3.0120849609375e-06, + "model_forward_time": 0.025483369827270508, + "step": 1974 + }, + { + "epoch": 3.0120849609375e-06, + "step": 1974, + "training_step_time": 0.11454057693481445 + }, + { + "epoch": 3.01361083984375e-06, + "model_forward_time": 0.025638580322265625, + "step": 1975 + }, + { + "epoch": 3.01361083984375e-06, + "step": 1975, + "training_step_time": 0.11404204368591309 + }, + { + "epoch": 3.01513671875e-06, + "model_forward_time": 0.025010108947753906, + "step": 1976 + }, + { + "epoch": 3.01513671875e-06, + "step": 1976, + "training_step_time": 0.17455482482910156 + }, + { + "epoch": 3.01666259765625e-06, + "model_forward_time": 0.024666547775268555, + "step": 1977 + }, + { + "epoch": 3.01666259765625e-06, + "step": 1977, + "training_step_time": 0.20657992362976074 + }, + { + "epoch": 3.0181884765625e-06, + "model_forward_time": 0.024562597274780273, + "step": 1978 + }, + { + "epoch": 3.0181884765625e-06, + "step": 1978, + "training_step_time": 0.19892644882202148 + }, + { + "epoch": 3.01971435546875e-06, + "model_forward_time": 0.02467060089111328, + "step": 1979 + }, + { + "epoch": 3.01971435546875e-06, + "step": 1979, + "training_step_time": 0.19018864631652832 + }, + { + "epoch": 3.021240234375e-06, + "grad_norm": 0.709253191947937, + "learning_rate": 9.993002688846913e-05, + "loss": 0.1683, + "step": 1980 + }, + { + "epoch": 3.021240234375e-06, + "model_forward_time": 0.024123430252075195, + "step": 1980 + }, + { + "epoch": 3.021240234375e-06, + "step": 1980, + "training_step_time": 0.17840576171875 + }, + { + "epoch": 3.02276611328125e-06, + "model_forward_time": 0.024484872817993164, + "step": 1981 + }, + { + "epoch": 3.02276611328125e-06, + "step": 1981, + "training_step_time": 0.16625404357910156 + }, + { + "epoch": 3.0242919921875e-06, + "model_forward_time": 0.02425074577331543, + "step": 1982 + }, + { + "epoch": 3.0242919921875e-06, + "step": 1982, + "training_step_time": 0.11740279197692871 + }, + { + "epoch": 3.02581787109375e-06, + "model_forward_time": 0.02485346794128418, + "step": 1983 + }, + { + "epoch": 3.02581787109375e-06, + "step": 1983, + "training_step_time": 0.10535669326782227 + }, + { + "epoch": 3.02734375e-06, + "model_forward_time": 0.02521491050720215, + "step": 1984 + }, + { + "epoch": 3.02734375e-06, + "step": 1984, + "training_step_time": 0.10390210151672363 + }, + { + "epoch": 3.02886962890625e-06, + "model_forward_time": 0.025643348693847656, + "step": 1985 + }, + { + "epoch": 3.02886962890625e-06, + "step": 1985, + "training_step_time": 0.10459208488464355 + }, + { + "epoch": 3.0303955078125e-06, + "model_forward_time": 0.025364398956298828, + "step": 1986 + }, + { + "epoch": 3.0303955078125e-06, + "step": 1986, + "training_step_time": 0.10532951354980469 + }, + { + "epoch": 3.03192138671875e-06, + "model_forward_time": 0.025266170501708984, + "step": 1987 + }, + { + "epoch": 3.03192138671875e-06, + "step": 1987, + "training_step_time": 0.10615348815917969 + }, + { + "epoch": 3.033447265625e-06, + "model_forward_time": 0.02510976791381836, + "step": 1988 + }, + { + "epoch": 3.033447265625e-06, + "step": 1988, + "training_step_time": 0.11060190200805664 + }, + { + "epoch": 3.03497314453125e-06, + "model_forward_time": 0.024952411651611328, + "step": 1989 + }, + { + "epoch": 3.03497314453125e-06, + "step": 1989, + "training_step_time": 0.10471582412719727 + }, + { + "epoch": 3.0364990234375e-06, + "grad_norm": 0.5270886421203613, + "learning_rate": 9.99270816881235e-05, + "loss": 0.1446, + "step": 1990 + }, + { + "epoch": 3.0364990234375e-06, + "model_forward_time": 0.02549910545349121, + "step": 1990 + }, + { + "epoch": 3.0364990234375e-06, + "step": 1990, + "training_step_time": 0.10418057441711426 + }, + { + "epoch": 3.03802490234375e-06, + "model_forward_time": 0.025516748428344727, + "step": 1991 + }, + { + "epoch": 3.03802490234375e-06, + "step": 1991, + "training_step_time": 0.10926461219787598 + }, + { + "epoch": 3.03955078125e-06, + "model_forward_time": 0.025645732879638672, + "step": 1992 + }, + { + "epoch": 3.03955078125e-06, + "step": 1992, + "training_step_time": 0.11027145385742188 + }, + { + "epoch": 3.04107666015625e-06, + "model_forward_time": 0.025409936904907227, + "step": 1993 + }, + { + "epoch": 3.04107666015625e-06, + "step": 1993, + "training_step_time": 0.10542654991149902 + }, + { + "epoch": 3.0426025390625e-06, + "model_forward_time": 0.025529146194458008, + "step": 1994 + }, + { + "epoch": 3.0426025390625e-06, + "step": 1994, + "training_step_time": 0.11611413955688477 + }, + { + "epoch": 3.04412841796875e-06, + "model_forward_time": 0.025817394256591797, + "step": 1995 + }, + { + "epoch": 3.04412841796875e-06, + "step": 1995, + "training_step_time": 0.1058652400970459 + }, + { + "epoch": 3.045654296875e-06, + "model_forward_time": 0.02559947967529297, + "step": 1996 + }, + { + "epoch": 3.045654296875e-06, + "step": 1996, + "training_step_time": 0.10748600959777832 + }, + { + "epoch": 3.04718017578125e-06, + "model_forward_time": 0.025455951690673828, + "step": 1997 + }, + { + "epoch": 3.04718017578125e-06, + "step": 1997, + "training_step_time": 0.12153196334838867 + }, + { + "epoch": 3.0487060546875e-06, + "model_forward_time": 0.02539348602294922, + "step": 1998 + }, + { + "epoch": 3.0487060546875e-06, + "step": 1998, + "training_step_time": 0.10736274719238281 + }, + { + "epoch": 3.05023193359375e-06, + "model_forward_time": 0.025676488876342773, + "step": 1999 + }, + { + "epoch": 3.05023193359375e-06, + "step": 1999, + "training_step_time": 0.1779797077178955 + }, + { + "epoch": 3.0517578125e-06, + "grad_norm": 1.4205913543701172, + "learning_rate": 9.992407582166581e-05, + "loss": 0.1625, + "step": 2000 + }, + { + "epoch": 3.0517578125e-06, + "model_forward_time": 0.025179386138916016, + "step": 2000 + }, + { + "epoch": 3.0517578125e-06, + "step": 2000, + "training_step_time": 0.09915947914123535 + }, + { + "epoch": 3.05328369140625e-06, + "model_forward_time": 0.02448248863220215, + "step": 2001 + }, + { + "epoch": 3.05328369140625e-06, + "step": 2001, + "training_step_time": 0.09954714775085449 + }, + { + "epoch": 3.0548095703125e-06, + "model_forward_time": 0.025098800659179688, + "step": 2002 + }, + { + "epoch": 3.0548095703125e-06, + "step": 2002, + "training_step_time": 0.10394859313964844 + }, + { + "epoch": 3.05633544921875e-06, + "model_forward_time": 0.02563309669494629, + "step": 2003 + }, + { + "epoch": 3.05633544921875e-06, + "step": 2003, + "training_step_time": 0.10478830337524414 + }, + { + "epoch": 3.057861328125e-06, + "model_forward_time": 0.02525186538696289, + "step": 2004 + }, + { + "epoch": 3.057861328125e-06, + "step": 2004, + "training_step_time": 0.1038506031036377 + }, + { + "epoch": 3.05938720703125e-06, + "model_forward_time": 0.02580428123474121, + "step": 2005 + }, + { + "epoch": 3.05938720703125e-06, + "step": 2005, + "training_step_time": 0.10744881629943848 + }, + { + "epoch": 3.0609130859375e-06, + "model_forward_time": 0.025240659713745117, + "step": 2006 + }, + { + "epoch": 3.0609130859375e-06, + "step": 2006, + "training_step_time": 0.1900327205657959 + }, + { + "epoch": 3.06243896484375e-06, + "model_forward_time": 0.024365901947021484, + "step": 2007 + }, + { + "epoch": 3.06243896484375e-06, + "step": 2007, + "training_step_time": 0.12936973571777344 + }, + { + "epoch": 3.06396484375e-06, + "model_forward_time": 0.024620532989501953, + "step": 2008 + }, + { + "epoch": 3.06396484375e-06, + "step": 2008, + "training_step_time": 0.2175137996673584 + }, + { + "epoch": 3.06549072265625e-06, + "model_forward_time": 0.023994922637939453, + "step": 2009 + }, + { + "epoch": 3.06549072265625e-06, + "step": 2009, + "training_step_time": 0.14101886749267578 + }, + { + "epoch": 3.0670166015625e-06, + "grad_norm": 0.6931583881378174, + "learning_rate": 9.992100929274846e-05, + "loss": 0.1831, + "step": 2010 + }, + { + "epoch": 3.0670166015625e-06, + "model_forward_time": 0.02390742301940918, + "step": 2010 + }, + { + "epoch": 3.0670166015625e-06, + "step": 2010, + "training_step_time": 0.1059420108795166 + }, + { + "epoch": 3.06854248046875e-06, + "model_forward_time": 0.025151968002319336, + "step": 2011 + }, + { + "epoch": 3.06854248046875e-06, + "step": 2011, + "training_step_time": 0.11233735084533691 + }, + { + "epoch": 3.070068359375e-06, + "model_forward_time": 0.025288105010986328, + "step": 2012 + }, + { + "epoch": 3.070068359375e-06, + "step": 2012, + "training_step_time": 0.10818290710449219 + }, + { + "epoch": 3.07159423828125e-06, + "model_forward_time": 0.02550530433654785, + "step": 2013 + }, + { + "epoch": 3.07159423828125e-06, + "step": 2013, + "training_step_time": 0.10864925384521484 + }, + { + "epoch": 3.0731201171875e-06, + "model_forward_time": 0.025258541107177734, + "step": 2014 + }, + { + "epoch": 3.0731201171875e-06, + "step": 2014, + "training_step_time": 0.10622262954711914 + }, + { + "epoch": 3.07464599609375e-06, + "model_forward_time": 0.025121450424194336, + "step": 2015 + }, + { + "epoch": 3.07464599609375e-06, + "step": 2015, + "training_step_time": 0.1245272159576416 + }, + { + "epoch": 3.076171875e-06, + "model_forward_time": 0.02507925033569336, + "step": 2016 + }, + { + "epoch": 3.076171875e-06, + "step": 2016, + "training_step_time": 0.14467501640319824 + }, + { + "epoch": 3.07769775390625e-06, + "model_forward_time": 0.02483224868774414, + "step": 2017 + }, + { + "epoch": 3.07769775390625e-06, + "step": 2017, + "training_step_time": 0.11435580253601074 + }, + { + "epoch": 3.0792236328125e-06, + "model_forward_time": 0.024974822998046875, + "step": 2018 + }, + { + "epoch": 3.0792236328125e-06, + "step": 2018, + "training_step_time": 0.11614990234375 + }, + { + "epoch": 3.08074951171875e-06, + "model_forward_time": 0.025118589401245117, + "step": 2019 + }, + { + "epoch": 3.08074951171875e-06, + "step": 2019, + "training_step_time": 0.11707258224487305 + }, + { + "epoch": 3.082275390625e-06, + "grad_norm": 0.949308454990387, + "learning_rate": 9.991788210509758e-05, + "loss": 0.1566, + "step": 2020 + }, + { + "epoch": 3.082275390625e-06, + "model_forward_time": 0.025453567504882812, + "step": 2020 + }, + { + "epoch": 3.082275390625e-06, + "step": 2020, + "training_step_time": 0.18665218353271484 + }, + { + "epoch": 3.08380126953125e-06, + "model_forward_time": 0.0244290828704834, + "step": 2021 + }, + { + "epoch": 3.08380126953125e-06, + "step": 2021, + "training_step_time": 0.11759328842163086 + }, + { + "epoch": 3.0853271484375e-06, + "model_forward_time": 0.024521589279174805, + "step": 2022 + }, + { + "epoch": 3.0853271484375e-06, + "step": 2022, + "training_step_time": 0.10910296440124512 + }, + { + "epoch": 3.08685302734375e-06, + "model_forward_time": 0.02557659149169922, + "step": 2023 + }, + { + "epoch": 3.08685302734375e-06, + "step": 2023, + "training_step_time": 0.10752582550048828 + }, + { + "epoch": 3.08837890625e-06, + "model_forward_time": 0.026337146759033203, + "step": 2024 + }, + { + "epoch": 3.08837890625e-06, + "step": 2024, + "training_step_time": 0.14465785026550293 + }, + { + "epoch": 3.08990478515625e-06, + "model_forward_time": 0.02496790885925293, + "step": 2025 + }, + { + "epoch": 3.08990478515625e-06, + "step": 2025, + "training_step_time": 0.10874366760253906 + }, + { + "epoch": 3.0914306640625e-06, + "model_forward_time": 0.025317668914794922, + "step": 2026 + }, + { + "epoch": 3.0914306640625e-06, + "step": 2026, + "training_step_time": 0.21014761924743652 + }, + { + "epoch": 3.09295654296875e-06, + "model_forward_time": 0.024985790252685547, + "step": 2027 + }, + { + "epoch": 3.09295654296875e-06, + "step": 2027, + "training_step_time": 0.10713934898376465 + }, + { + "epoch": 3.094482421875e-06, + "model_forward_time": 0.025104522705078125, + "step": 2028 + }, + { + "epoch": 3.094482421875e-06, + "step": 2028, + "training_step_time": 0.12088894844055176 + }, + { + "epoch": 3.09600830078125e-06, + "model_forward_time": 0.02487349510192871, + "step": 2029 + }, + { + "epoch": 3.09600830078125e-06, + "step": 2029, + "training_step_time": 0.11110949516296387 + }, + { + "epoch": 3.0975341796875e-06, + "grad_norm": 1.022132396697998, + "learning_rate": 9.9914694262513e-05, + "loss": 0.121, + "step": 2030 + }, + { + "epoch": 3.0975341796875e-06, + "model_forward_time": 0.024765491485595703, + "step": 2030 + }, + { + "epoch": 3.0975341796875e-06, + "step": 2030, + "training_step_time": 0.11166810989379883 + }, + { + "epoch": 3.09906005859375e-06, + "model_forward_time": 0.02648782730102539, + "step": 2031 + }, + { + "epoch": 3.09906005859375e-06, + "step": 2031, + "training_step_time": 0.11631202697753906 + }, + { + "epoch": 3.1005859375e-06, + "model_forward_time": 0.025310754776000977, + "step": 2032 + }, + { + "epoch": 3.1005859375e-06, + "step": 2032, + "training_step_time": 0.11087274551391602 + }, + { + "epoch": 3.10211181640625e-06, + "model_forward_time": 0.02505946159362793, + "step": 2033 + }, + { + "epoch": 3.10211181640625e-06, + "step": 2033, + "training_step_time": 0.10766887664794922 + }, + { + "epoch": 3.1036376953125e-06, + "model_forward_time": 0.02511882781982422, + "step": 2034 + }, + { + "epoch": 3.1036376953125e-06, + "step": 2034, + "training_step_time": 0.10961437225341797 + }, + { + "epoch": 3.10516357421875e-06, + "model_forward_time": 0.025962114334106445, + "step": 2035 + }, + { + "epoch": 3.10516357421875e-06, + "step": 2035, + "training_step_time": 0.1130836009979248 + }, + { + "epoch": 3.106689453125e-06, + "model_forward_time": 0.02511763572692871, + "step": 2036 + }, + { + "epoch": 3.106689453125e-06, + "step": 2036, + "training_step_time": 0.11104178428649902 + }, + { + "epoch": 3.10821533203125e-06, + "model_forward_time": 0.02573680877685547, + "step": 2037 + }, + { + "epoch": 3.10821533203125e-06, + "step": 2037, + "training_step_time": 0.11015129089355469 + }, + { + "epoch": 3.1097412109375e-06, + "model_forward_time": 0.025629520416259766, + "step": 2038 + }, + { + "epoch": 3.1097412109375e-06, + "step": 2038, + "training_step_time": 0.10697746276855469 + }, + { + "epoch": 3.11126708984375e-06, + "model_forward_time": 0.025533199310302734, + "step": 2039 + }, + { + "epoch": 3.11126708984375e-06, + "step": 2039, + "training_step_time": 0.11368727684020996 + }, + { + "epoch": 3.11279296875e-06, + "grad_norm": 0.8981501460075378, + "learning_rate": 9.991144576886823e-05, + "loss": 0.15, + "step": 2040 + }, + { + "epoch": 3.11279296875e-06, + "model_forward_time": 0.024079561233520508, + "step": 2040 + }, + { + "epoch": 3.11279296875e-06, + "step": 2040, + "training_step_time": 0.10770583152770996 + }, + { + "epoch": 3.11431884765625e-06, + "model_forward_time": 0.02429342269897461, + "step": 2041 + }, + { + "epoch": 3.11431884765625e-06, + "step": 2041, + "training_step_time": 0.11056280136108398 + }, + { + "epoch": 3.1158447265625e-06, + "model_forward_time": 0.025574445724487305, + "step": 2042 + }, + { + "epoch": 3.1158447265625e-06, + "step": 2042, + "training_step_time": 0.10752487182617188 + }, + { + "epoch": 3.11737060546875e-06, + "model_forward_time": 0.025130033493041992, + "step": 2043 + }, + { + "epoch": 3.11737060546875e-06, + "step": 2043, + "training_step_time": 0.11660265922546387 + }, + { + "epoch": 3.118896484375e-06, + "model_forward_time": 0.025330781936645508, + "step": 2044 + }, + { + "epoch": 3.118896484375e-06, + "step": 2044, + "training_step_time": 0.11204743385314941 + }, + { + "epoch": 3.12042236328125e-06, + "model_forward_time": 0.025309324264526367, + "step": 2045 + }, + { + "epoch": 3.12042236328125e-06, + "step": 2045, + "training_step_time": 0.10686254501342773 + }, + { + "epoch": 3.1219482421875e-06, + "model_forward_time": 0.025354862213134766, + "step": 2046 + }, + { + "epoch": 3.1219482421875e-06, + "step": 2046, + "training_step_time": 0.11196517944335938 + }, + { + "epoch": 3.12347412109375e-06, + "model_forward_time": 0.025183916091918945, + "step": 2047 + }, + { + "epoch": 3.12347412109375e-06, + "step": 2047, + "training_step_time": 0.1076822280883789 + }, + { + "epoch": 3.125e-06, + "model_forward_time": 0.024935245513916016, + "step": 2048 + }, + { + "epoch": 3.125e-06, + "step": 2048, + "training_step_time": 0.10955548286437988 + }, + { + "epoch": 3.12652587890625e-06, + "model_forward_time": 0.025295019149780273, + "step": 2049 + }, + { + "epoch": 3.12652587890625e-06, + "step": 2049, + "training_step_time": 0.11477971076965332 + }, + { + "epoch": 3.1280517578125e-06, + "grad_norm": 0.6280609965324402, + "learning_rate": 9.990813662811051e-05, + "loss": 0.1311, + "step": 2050 + }, + { + "epoch": 3.1280517578125e-06, + "model_forward_time": 0.025088787078857422, + "step": 2050 + }, + { + "epoch": 3.1280517578125e-06, + "step": 2050, + "training_step_time": 0.18810796737670898 + }, + { + "epoch": 3.12957763671875e-06, + "model_forward_time": 0.025364160537719727, + "step": 2051 + }, + { + "epoch": 3.12957763671875e-06, + "step": 2051, + "training_step_time": 0.1219489574432373 + }, + { + "epoch": 3.131103515625e-06, + "model_forward_time": 0.02609705924987793, + "step": 2052 + }, + { + "epoch": 3.131103515625e-06, + "step": 2052, + "training_step_time": 0.17927050590515137 + }, + { + "epoch": 3.13262939453125e-06, + "model_forward_time": 0.024739742279052734, + "step": 2053 + }, + { + "epoch": 3.13262939453125e-06, + "step": 2053, + "training_step_time": 0.11631250381469727 + }, + { + "epoch": 3.1341552734375e-06, + "model_forward_time": 0.025247812271118164, + "step": 2054 + }, + { + "epoch": 3.1341552734375e-06, + "step": 2054, + "training_step_time": 0.21597838401794434 + }, + { + "epoch": 3.13568115234375e-06, + "model_forward_time": 0.0245513916015625, + "step": 2055 + }, + { + "epoch": 3.13568115234375e-06, + "step": 2055, + "training_step_time": 0.1456131935119629 + }, + { + "epoch": 3.13720703125e-06, + "model_forward_time": 0.025038719177246094, + "step": 2056 + }, + { + "epoch": 3.13720703125e-06, + "step": 2056, + "training_step_time": 0.10952234268188477 + }, + { + "epoch": 3.13873291015625e-06, + "model_forward_time": 0.025162458419799805, + "step": 2057 + }, + { + "epoch": 3.13873291015625e-06, + "step": 2057, + "training_step_time": 0.11988949775695801 + }, + { + "epoch": 3.1402587890625e-06, + "model_forward_time": 0.02513861656188965, + "step": 2058 + }, + { + "epoch": 3.1402587890625e-06, + "step": 2058, + "training_step_time": 0.10820245742797852 + }, + { + "epoch": 3.14178466796875e-06, + "model_forward_time": 0.025768280029296875, + "step": 2059 + }, + { + "epoch": 3.14178466796875e-06, + "step": 2059, + "training_step_time": 0.10930800437927246 + }, + { + "epoch": 3.143310546875e-06, + "grad_norm": 0.7524927258491516, + "learning_rate": 9.990476684426075e-05, + "loss": 0.1324, + "step": 2060 + }, + { + "epoch": 3.143310546875e-06, + "model_forward_time": 0.0254976749420166, + "step": 2060 + }, + { + "epoch": 3.143310546875e-06, + "step": 2060, + "training_step_time": 0.1744976043701172 + }, + { + "epoch": 3.14483642578125e-06, + "model_forward_time": 0.02460646629333496, + "step": 2061 + }, + { + "epoch": 3.14483642578125e-06, + "step": 2061, + "training_step_time": 0.14745163917541504 + }, + { + "epoch": 3.1463623046875e-06, + "model_forward_time": 0.024044275283813477, + "step": 2062 + }, + { + "epoch": 3.1463623046875e-06, + "step": 2062, + "training_step_time": 0.10992836952209473 + }, + { + "epoch": 3.14788818359375e-06, + "model_forward_time": 0.025064945220947266, + "step": 2063 + }, + { + "epoch": 3.14788818359375e-06, + "step": 2063, + "training_step_time": 0.10988306999206543 + }, + { + "epoch": 3.1494140625e-06, + "model_forward_time": 0.025089502334594727, + "step": 2064 + }, + { + "epoch": 3.1494140625e-06, + "step": 2064, + "training_step_time": 0.11190271377563477 + }, + { + "epoch": 3.15093994140625e-06, + "model_forward_time": 0.0263669490814209, + "step": 2065 + }, + { + "epoch": 3.15093994140625e-06, + "step": 2065, + "training_step_time": 0.160369873046875 + }, + { + "epoch": 3.1524658203125e-06, + "model_forward_time": 0.024605512619018555, + "step": 2066 + }, + { + "epoch": 3.1524658203125e-06, + "step": 2066, + "training_step_time": 0.15439772605895996 + }, + { + "epoch": 3.15399169921875e-06, + "model_forward_time": 0.02458500862121582, + "step": 2067 + }, + { + "epoch": 3.15399169921875e-06, + "step": 2067, + "training_step_time": 0.10489153861999512 + }, + { + "epoch": 3.155517578125e-06, + "model_forward_time": 0.02501201629638672, + "step": 2068 + }, + { + "epoch": 3.155517578125e-06, + "step": 2068, + "training_step_time": 0.10581231117248535 + }, + { + "epoch": 3.15704345703125e-06, + "model_forward_time": 0.02628636360168457, + "step": 2069 + }, + { + "epoch": 3.15704345703125e-06, + "step": 2069, + "training_step_time": 0.10709309577941895 + }, + { + "epoch": 3.1585693359375e-06, + "grad_norm": 0.6197975277900696, + "learning_rate": 9.990133642141359e-05, + "loss": 0.1219, + "step": 2070 + }, + { + "epoch": 3.1585693359375e-06, + "model_forward_time": 0.025173664093017578, + "step": 2070 + }, + { + "epoch": 3.1585693359375e-06, + "step": 2070, + "training_step_time": 0.11158990859985352 + }, + { + "epoch": 3.16009521484375e-06, + "model_forward_time": 0.02557826042175293, + "step": 2071 + }, + { + "epoch": 3.16009521484375e-06, + "step": 2071, + "training_step_time": 0.12154388427734375 + }, + { + "epoch": 3.16162109375e-06, + "model_forward_time": 0.028257131576538086, + "step": 2072 + }, + { + "epoch": 3.16162109375e-06, + "step": 2072, + "training_step_time": 0.11698007583618164 + }, + { + "epoch": 3.16314697265625e-06, + "model_forward_time": 0.02584528923034668, + "step": 2073 + }, + { + "epoch": 3.16314697265625e-06, + "step": 2073, + "training_step_time": 0.11852598190307617 + }, + { + "epoch": 3.1646728515625e-06, + "model_forward_time": 0.025632619857788086, + "step": 2074 + }, + { + "epoch": 3.1646728515625e-06, + "step": 2074, + "training_step_time": 0.10630321502685547 + }, + { + "epoch": 3.16619873046875e-06, + "model_forward_time": 0.02563166618347168, + "step": 2075 + }, + { + "epoch": 3.16619873046875e-06, + "step": 2075, + "training_step_time": 0.11893653869628906 + }, + { + "epoch": 3.167724609375e-06, + "model_forward_time": 0.02534317970275879, + "step": 2076 + }, + { + "epoch": 3.167724609375e-06, + "step": 2076, + "training_step_time": 0.14342761039733887 + }, + { + "epoch": 3.16925048828125e-06, + "model_forward_time": 0.024918079376220703, + "step": 2077 + }, + { + "epoch": 3.16925048828125e-06, + "step": 2077, + "training_step_time": 0.1947317123413086 + }, + { + "epoch": 3.1707763671875e-06, + "model_forward_time": 0.02443838119506836, + "step": 2078 + }, + { + "epoch": 3.1707763671875e-06, + "step": 2078, + "training_step_time": 0.1858081817626953 + }, + { + "epoch": 3.17230224609375e-06, + "model_forward_time": 0.02475762367248535, + "step": 2079 + }, + { + "epoch": 3.17230224609375e-06, + "step": 2079, + "training_step_time": 0.1803886890411377 + }, + { + "epoch": 3.173828125e-06, + "grad_norm": 0.713837742805481, + "learning_rate": 9.989784536373726e-05, + "loss": 0.1199, + "step": 2080 + }, + { + "epoch": 3.173828125e-06, + "model_forward_time": 0.02427363395690918, + "step": 2080 + }, + { + "epoch": 3.173828125e-06, + "step": 2080, + "training_step_time": 0.16886472702026367 + }, + { + "epoch": 3.17535400390625e-06, + "model_forward_time": 0.02570343017578125, + "step": 2081 + }, + { + "epoch": 3.17535400390625e-06, + "step": 2081, + "training_step_time": 0.14660906791687012 + }, + { + "epoch": 3.1768798828125e-06, + "model_forward_time": 0.024727344512939453, + "step": 2082 + }, + { + "epoch": 3.1768798828125e-06, + "step": 2082, + "training_step_time": 0.13730072975158691 + }, + { + "epoch": 3.17840576171875e-06, + "model_forward_time": 0.024831056594848633, + "step": 2083 + }, + { + "epoch": 3.17840576171875e-06, + "step": 2083, + "training_step_time": 0.13072848320007324 + }, + { + "epoch": 3.179931640625e-06, + "model_forward_time": 0.024785757064819336, + "step": 2084 + }, + { + "epoch": 3.179931640625e-06, + "step": 2084, + "training_step_time": 0.12457966804504395 + }, + { + "epoch": 3.18145751953125e-06, + "model_forward_time": 0.030757427215576172, + "step": 2085 + }, + { + "epoch": 3.18145751953125e-06, + "step": 2085, + "training_step_time": 0.12244343757629395 + }, + { + "epoch": 3.1829833984375e-06, + "model_forward_time": 0.025048494338989258, + "step": 2086 + }, + { + "epoch": 3.1829833984375e-06, + "step": 2086, + "training_step_time": 0.11066579818725586 + }, + { + "epoch": 3.18450927734375e-06, + "model_forward_time": 0.025244951248168945, + "step": 2087 + }, + { + "epoch": 3.18450927734375e-06, + "step": 2087, + "training_step_time": 0.10466861724853516 + }, + { + "epoch": 3.18603515625e-06, + "model_forward_time": 0.025649547576904297, + "step": 2088 + }, + { + "epoch": 3.18603515625e-06, + "step": 2088, + "training_step_time": 0.10782051086425781 + }, + { + "epoch": 3.18756103515625e-06, + "model_forward_time": 0.025396108627319336, + "step": 2089 + }, + { + "epoch": 3.18756103515625e-06, + "step": 2089, + "training_step_time": 0.10519099235534668 + }, + { + "epoch": 3.1890869140625e-06, + "grad_norm": 0.7144132256507874, + "learning_rate": 9.989429367547377e-05, + "loss": 0.1204, + "step": 2090 + }, + { + "epoch": 3.1890869140625e-06, + "model_forward_time": 0.025180578231811523, + "step": 2090 + }, + { + "epoch": 3.1890869140625e-06, + "step": 2090, + "training_step_time": 0.10965895652770996 + }, + { + "epoch": 3.19061279296875e-06, + "model_forward_time": 0.025431394577026367, + "step": 2091 + }, + { + "epoch": 3.19061279296875e-06, + "step": 2091, + "training_step_time": 0.10565376281738281 + }, + { + "epoch": 3.192138671875e-06, + "model_forward_time": 0.025173664093017578, + "step": 2092 + }, + { + "epoch": 3.192138671875e-06, + "step": 2092, + "training_step_time": 0.10548543930053711 + }, + { + "epoch": 3.19366455078125e-06, + "model_forward_time": 0.025699138641357422, + "step": 2093 + }, + { + "epoch": 3.19366455078125e-06, + "step": 2093, + "training_step_time": 0.17055177688598633 + }, + { + "epoch": 3.1951904296875e-06, + "model_forward_time": 0.025228261947631836, + "step": 2094 + }, + { + "epoch": 3.1951904296875e-06, + "step": 2094, + "training_step_time": 0.12229347229003906 + }, + { + "epoch": 3.19671630859375e-06, + "model_forward_time": 0.0248258113861084, + "step": 2095 + }, + { + "epoch": 3.19671630859375e-06, + "step": 2095, + "training_step_time": 0.12253737449645996 + }, + { + "epoch": 3.1982421875e-06, + "model_forward_time": 0.0254518985748291, + "step": 2096 + }, + { + "epoch": 3.1982421875e-06, + "step": 2096, + "training_step_time": 0.11786389350891113 + }, + { + "epoch": 3.19976806640625e-06, + "model_forward_time": 0.025361061096191406, + "step": 2097 + }, + { + "epoch": 3.19976806640625e-06, + "step": 2097, + "training_step_time": 0.11223268508911133 + }, + { + "epoch": 3.2012939453125e-06, + "model_forward_time": 0.025323152542114258, + "step": 2098 + }, + { + "epoch": 3.2012939453125e-06, + "step": 2098, + "training_step_time": 0.10807538032531738 + }, + { + "epoch": 3.20281982421875e-06, + "model_forward_time": 0.02576756477355957, + "step": 2099 + }, + { + "epoch": 3.20281982421875e-06, + "step": 2099, + "training_step_time": 0.12779521942138672 + }, + { + "epoch": 3.204345703125e-06, + "grad_norm": 0.6506755352020264, + "learning_rate": 9.989068136093873e-05, + "loss": 0.1189, + "step": 2100 + }, + { + "epoch": 3.204345703125e-06, + "model_forward_time": 0.02525496482849121, + "step": 2100 + }, + { + "epoch": 3.204345703125e-06, + "step": 2100, + "training_step_time": 0.12012076377868652 + }, + { + "epoch": 3.20587158203125e-06, + "model_forward_time": 0.026320219039916992, + "step": 2101 + }, + { + "epoch": 3.20587158203125e-06, + "step": 2101, + "training_step_time": 0.11673259735107422 + }, + { + "epoch": 3.2073974609375e-06, + "model_forward_time": 0.02611851692199707, + "step": 2102 + }, + { + "epoch": 3.2073974609375e-06, + "step": 2102, + "training_step_time": 0.10762786865234375 + }, + { + "epoch": 3.20892333984375e-06, + "model_forward_time": 0.02554607391357422, + "step": 2103 + }, + { + "epoch": 3.20892333984375e-06, + "step": 2103, + "training_step_time": 0.10594296455383301 + }, + { + "epoch": 3.21044921875e-06, + "model_forward_time": 0.02533411979675293, + "step": 2104 + }, + { + "epoch": 3.21044921875e-06, + "step": 2104, + "training_step_time": 0.10741949081420898 + }, + { + "epoch": 3.21197509765625e-06, + "model_forward_time": 0.02526402473449707, + "step": 2105 + }, + { + "epoch": 3.21197509765625e-06, + "step": 2105, + "training_step_time": 0.14597535133361816 + }, + { + "epoch": 3.2135009765625e-06, + "model_forward_time": 0.026140213012695312, + "step": 2106 + }, + { + "epoch": 3.2135009765625e-06, + "step": 2106, + "training_step_time": 0.14003515243530273 + }, + { + "epoch": 3.21502685546875e-06, + "model_forward_time": 0.02495431900024414, + "step": 2107 + }, + { + "epoch": 3.21502685546875e-06, + "step": 2107, + "training_step_time": 0.10941839218139648 + }, + { + "epoch": 3.216552734375e-06, + "model_forward_time": 0.02492833137512207, + "step": 2108 + }, + { + "epoch": 3.216552734375e-06, + "step": 2108, + "training_step_time": 0.10828232765197754 + }, + { + "epoch": 3.21807861328125e-06, + "model_forward_time": 0.02807450294494629, + "step": 2109 + }, + { + "epoch": 3.21807861328125e-06, + "step": 2109, + "training_step_time": 0.11664819717407227 + }, + { + "epoch": 3.2196044921875e-06, + "grad_norm": 0.6079724431037903, + "learning_rate": 9.988700842452146e-05, + "loss": 0.1273, + "step": 2110 + }, + { + "epoch": 3.2196044921875e-06, + "model_forward_time": 0.025449514389038086, + "step": 2110 + }, + { + "epoch": 3.2196044921875e-06, + "step": 2110, + "training_step_time": 0.10712432861328125 + }, + { + "epoch": 3.22113037109375e-06, + "model_forward_time": 0.025594711303710938, + "step": 2111 + }, + { + "epoch": 3.22113037109375e-06, + "step": 2111, + "training_step_time": 0.20910263061523438 + }, + { + "epoch": 3.22265625e-06, + "model_forward_time": 0.028296947479248047, + "step": 2112 + }, + { + "epoch": 3.22265625e-06, + "step": 2112, + "training_step_time": 0.10878992080688477 + }, + { + "epoch": 3.22418212890625e-06, + "model_forward_time": 0.024677515029907227, + "step": 2113 + }, + { + "epoch": 3.22418212890625e-06, + "step": 2113, + "training_step_time": 0.10692906379699707 + }, + { + "epoch": 3.2257080078125e-06, + "model_forward_time": 0.025416851043701172, + "step": 2114 + }, + { + "epoch": 3.2257080078125e-06, + "step": 2114, + "training_step_time": 0.10677242279052734 + }, + { + "epoch": 3.22723388671875e-06, + "model_forward_time": 0.025374174118041992, + "step": 2115 + }, + { + "epoch": 3.22723388671875e-06, + "step": 2115, + "training_step_time": 0.10704207420349121 + }, + { + "epoch": 3.228759765625e-06, + "model_forward_time": 0.02539515495300293, + "step": 2116 + }, + { + "epoch": 3.228759765625e-06, + "step": 2116, + "training_step_time": 0.11344528198242188 + }, + { + "epoch": 3.23028564453125e-06, + "model_forward_time": 0.026009559631347656, + "step": 2117 + }, + { + "epoch": 3.23028564453125e-06, + "step": 2117, + "training_step_time": 0.11017346382141113 + }, + { + "epoch": 3.2318115234375e-06, + "model_forward_time": 0.02544403076171875, + "step": 2118 + }, + { + "epoch": 3.2318115234375e-06, + "step": 2118, + "training_step_time": 0.1189274787902832 + }, + { + "epoch": 3.23333740234375e-06, + "model_forward_time": 0.025483369827270508, + "step": 2119 + }, + { + "epoch": 3.23333740234375e-06, + "step": 2119, + "training_step_time": 0.21869158744812012 + }, + { + "epoch": 3.23486328125e-06, + "grad_norm": 0.6594318151473999, + "learning_rate": 9.988327487068492e-05, + "loss": 0.1457, + "step": 2120 + }, + { + "epoch": 3.23486328125e-06, + "model_forward_time": 0.024870634078979492, + "step": 2120 + }, + { + "epoch": 3.23486328125e-06, + "step": 2120, + "training_step_time": 0.11153578758239746 + }, + { + "epoch": 3.23638916015625e-06, + "model_forward_time": 0.02471184730529785, + "step": 2121 + }, + { + "epoch": 3.23638916015625e-06, + "step": 2121, + "training_step_time": 0.11233401298522949 + }, + { + "epoch": 3.2379150390625e-06, + "model_forward_time": 0.025632143020629883, + "step": 2122 + }, + { + "epoch": 3.2379150390625e-06, + "step": 2122, + "training_step_time": 0.1072697639465332 + }, + { + "epoch": 3.23944091796875e-06, + "model_forward_time": 0.025048255920410156, + "step": 2123 + }, + { + "epoch": 3.23944091796875e-06, + "step": 2123, + "training_step_time": 0.10773730278015137 + }, + { + "epoch": 3.240966796875e-06, + "model_forward_time": 0.025753021240234375, + "step": 2124 + }, + { + "epoch": 3.240966796875e-06, + "step": 2124, + "training_step_time": 0.10922646522521973 + }, + { + "epoch": 3.24249267578125e-06, + "model_forward_time": 0.025677919387817383, + "step": 2125 + }, + { + "epoch": 3.24249267578125e-06, + "step": 2125, + "training_step_time": 0.10572957992553711 + }, + { + "epoch": 3.2440185546875e-06, + "model_forward_time": 0.025423526763916016, + "step": 2126 + }, + { + "epoch": 3.2440185546875e-06, + "step": 2126, + "training_step_time": 0.10510468482971191 + }, + { + "epoch": 3.24554443359375e-06, + "model_forward_time": 0.026118040084838867, + "step": 2127 + }, + { + "epoch": 3.24554443359375e-06, + "step": 2127, + "training_step_time": 0.11398768424987793 + }, + { + "epoch": 3.2470703125e-06, + "model_forward_time": 0.02547907829284668, + "step": 2128 + }, + { + "epoch": 3.2470703125e-06, + "step": 2128, + "training_step_time": 0.18561720848083496 + }, + { + "epoch": 3.24859619140625e-06, + "model_forward_time": 0.0248110294342041, + "step": 2129 + }, + { + "epoch": 3.24859619140625e-06, + "step": 2129, + "training_step_time": 0.20724749565124512 + }, + { + "epoch": 3.2501220703125e-06, + "grad_norm": 0.624142050743103, + "learning_rate": 9.987948070396571e-05, + "loss": 0.1262, + "step": 2130 + }, + { + "epoch": 3.2501220703125e-06, + "model_forward_time": 0.024610519409179688, + "step": 2130 + }, + { + "epoch": 3.2501220703125e-06, + "step": 2130, + "training_step_time": 0.2136697769165039 + }, + { + "epoch": 3.25164794921875e-06, + "model_forward_time": 0.024919986724853516, + "step": 2131 + }, + { + "epoch": 3.25164794921875e-06, + "step": 2131, + "training_step_time": 0.21112704277038574 + }, + { + "epoch": 3.253173828125e-06, + "model_forward_time": 0.024692058563232422, + "step": 2132 + }, + { + "epoch": 3.253173828125e-06, + "step": 2132, + "training_step_time": 0.2121884822845459 + }, + { + "epoch": 3.25469970703125e-06, + "model_forward_time": 0.02683401107788086, + "step": 2133 + }, + { + "epoch": 3.25469970703125e-06, + "step": 2133, + "training_step_time": 0.21092891693115234 + }, + { + "epoch": 3.2562255859375e-06, + "model_forward_time": 0.024557113647460938, + "step": 2134 + }, + { + "epoch": 3.2562255859375e-06, + "step": 2134, + "training_step_time": 0.20020246505737305 + }, + { + "epoch": 3.25775146484375e-06, + "model_forward_time": 0.024295568466186523, + "step": 2135 + }, + { + "epoch": 3.25775146484375e-06, + "step": 2135, + "training_step_time": 0.16309142112731934 + }, + { + "epoch": 3.25927734375e-06, + "model_forward_time": 0.024413585662841797, + "step": 2136 + }, + { + "epoch": 3.25927734375e-06, + "step": 2136, + "training_step_time": 0.19826674461364746 + }, + { + "epoch": 3.26080322265625e-06, + "model_forward_time": 0.024466514587402344, + "step": 2137 + }, + { + "epoch": 3.26080322265625e-06, + "step": 2137, + "training_step_time": 0.13530588150024414 + }, + { + "epoch": 3.2623291015625e-06, + "model_forward_time": 0.024679183959960938, + "step": 2138 + }, + { + "epoch": 3.2623291015625e-06, + "step": 2138, + "training_step_time": 0.18079113960266113 + }, + { + "epoch": 3.26385498046875e-06, + "model_forward_time": 0.024440288543701172, + "step": 2139 + }, + { + "epoch": 3.26385498046875e-06, + "step": 2139, + "training_step_time": 0.11630678176879883 + }, + { + "epoch": 3.265380859375e-06, + "grad_norm": 0.6571296453475952, + "learning_rate": 9.987562592897413e-05, + "loss": 0.1203, + "step": 2140 + }, + { + "epoch": 3.265380859375e-06, + "model_forward_time": 0.024141550064086914, + "step": 2140 + }, + { + "epoch": 3.265380859375e-06, + "step": 2140, + "training_step_time": 0.11722898483276367 + }, + { + "epoch": 3.26690673828125e-06, + "model_forward_time": 0.025452136993408203, + "step": 2141 + }, + { + "epoch": 3.26690673828125e-06, + "step": 2141, + "training_step_time": 0.12760114669799805 + }, + { + "epoch": 3.2684326171875e-06, + "model_forward_time": 0.025402069091796875, + "step": 2142 + }, + { + "epoch": 3.2684326171875e-06, + "step": 2142, + "training_step_time": 0.11624526977539062 + }, + { + "epoch": 3.26995849609375e-06, + "model_forward_time": 0.025295257568359375, + "step": 2143 + }, + { + "epoch": 3.26995849609375e-06, + "step": 2143, + "training_step_time": 0.12904644012451172 + }, + { + "epoch": 3.271484375e-06, + "model_forward_time": 0.024993181228637695, + "step": 2144 + }, + { + "epoch": 3.271484375e-06, + "step": 2144, + "training_step_time": 0.12623310089111328 + }, + { + "epoch": 3.27301025390625e-06, + "model_forward_time": 0.024932146072387695, + "step": 2145 + }, + { + "epoch": 3.27301025390625e-06, + "step": 2145, + "training_step_time": 0.20032072067260742 + }, + { + "epoch": 3.2745361328125e-06, + "model_forward_time": 0.024401426315307617, + "step": 2146 + }, + { + "epoch": 3.2745361328125e-06, + "step": 2146, + "training_step_time": 0.13790631294250488 + }, + { + "epoch": 3.27606201171875e-06, + "model_forward_time": 0.024456501007080078, + "step": 2147 + }, + { + "epoch": 3.27606201171875e-06, + "step": 2147, + "training_step_time": 0.12216615676879883 + }, + { + "epoch": 3.277587890625e-06, + "model_forward_time": 0.02521038055419922, + "step": 2148 + }, + { + "epoch": 3.277587890625e-06, + "step": 2148, + "training_step_time": 0.1178901195526123 + }, + { + "epoch": 3.27911376953125e-06, + "model_forward_time": 0.02555704116821289, + "step": 2149 + }, + { + "epoch": 3.27911376953125e-06, + "step": 2149, + "training_step_time": 0.11796283721923828 + }, + { + "epoch": 3.2806396484375e-06, + "grad_norm": 0.9658941626548767, + "learning_rate": 9.987171055039408e-05, + "loss": 0.175, + "step": 2150 + }, + { + "epoch": 3.2806396484375e-06, + "model_forward_time": 0.025292634963989258, + "step": 2150 + }, + { + "epoch": 3.2806396484375e-06, + "step": 2150, + "training_step_time": 0.17586350440979004 + }, + { + "epoch": 3.28216552734375e-06, + "model_forward_time": 0.02443861961364746, + "step": 2151 + }, + { + "epoch": 3.28216552734375e-06, + "step": 2151, + "training_step_time": 0.11321043968200684 + }, + { + "epoch": 3.28369140625e-06, + "model_forward_time": 0.025038719177246094, + "step": 2152 + }, + { + "epoch": 3.28369140625e-06, + "step": 2152, + "training_step_time": 0.10466170310974121 + }, + { + "epoch": 3.28521728515625e-06, + "model_forward_time": 0.025259971618652344, + "step": 2153 + }, + { + "epoch": 3.28521728515625e-06, + "step": 2153, + "training_step_time": 0.10571503639221191 + }, + { + "epoch": 3.2867431640625e-06, + "model_forward_time": 0.025088071823120117, + "step": 2154 + }, + { + "epoch": 3.2867431640625e-06, + "step": 2154, + "training_step_time": 0.10432124137878418 + }, + { + "epoch": 3.28826904296875e-06, + "model_forward_time": 0.02495741844177246, + "step": 2155 + }, + { + "epoch": 3.28826904296875e-06, + "step": 2155, + "training_step_time": 0.11027741432189941 + }, + { + "epoch": 3.289794921875e-06, + "model_forward_time": 0.02582573890686035, + "step": 2156 + }, + { + "epoch": 3.289794921875e-06, + "step": 2156, + "training_step_time": 0.11111211776733398 + }, + { + "epoch": 3.29132080078125e-06, + "model_forward_time": 0.025462627410888672, + "step": 2157 + }, + { + "epoch": 3.29132080078125e-06, + "step": 2157, + "training_step_time": 0.11565208435058594 + }, + { + "epoch": 3.2928466796875e-06, + "model_forward_time": 0.025603294372558594, + "step": 2158 + }, + { + "epoch": 3.2928466796875e-06, + "step": 2158, + "training_step_time": 0.11037921905517578 + }, + { + "epoch": 3.29437255859375e-06, + "model_forward_time": 0.025257587432861328, + "step": 2159 + }, + { + "epoch": 3.29437255859375e-06, + "step": 2159, + "training_step_time": 0.17060470581054688 + }, + { + "epoch": 3.2958984375e-06, + "grad_norm": 0.8946394324302673, + "learning_rate": 9.986773457298311e-05, + "loss": 0.1522, + "step": 2160 + }, + { + "epoch": 3.2958984375e-06, + "model_forward_time": 0.024634361267089844, + "step": 2160 + }, + { + "epoch": 3.2958984375e-06, + "step": 2160, + "training_step_time": 0.16210556030273438 + }, + { + "epoch": 3.29742431640625e-06, + "model_forward_time": 0.025043725967407227, + "step": 2161 + }, + { + "epoch": 3.29742431640625e-06, + "step": 2161, + "training_step_time": 0.10389494895935059 + }, + { + "epoch": 3.2989501953125e-06, + "model_forward_time": 0.025413990020751953, + "step": 2162 + }, + { + "epoch": 3.2989501953125e-06, + "step": 2162, + "training_step_time": 0.10532212257385254 + }, + { + "epoch": 3.30047607421875e-06, + "model_forward_time": 0.025616168975830078, + "step": 2163 + }, + { + "epoch": 3.30047607421875e-06, + "step": 2163, + "training_step_time": 0.10789036750793457 + }, + { + "epoch": 3.302001953125e-06, + "model_forward_time": 0.02506113052368164, + "step": 2164 + }, + { + "epoch": 3.302001953125e-06, + "step": 2164, + "training_step_time": 0.10484933853149414 + }, + { + "epoch": 3.30352783203125e-06, + "model_forward_time": 0.025386333465576172, + "step": 2165 + }, + { + "epoch": 3.30352783203125e-06, + "step": 2165, + "training_step_time": 0.10552430152893066 + }, + { + "epoch": 3.3050537109375e-06, + "model_forward_time": 0.025143146514892578, + "step": 2166 + }, + { + "epoch": 3.3050537109375e-06, + "step": 2166, + "training_step_time": 0.10491585731506348 + }, + { + "epoch": 3.30657958984375e-06, + "model_forward_time": 0.024997711181640625, + "step": 2167 + }, + { + "epoch": 3.30657958984375e-06, + "step": 2167, + "training_step_time": 0.10502815246582031 + }, + { + "epoch": 3.30810546875e-06, + "model_forward_time": 0.025150299072265625, + "step": 2168 + }, + { + "epoch": 3.30810546875e-06, + "step": 2168, + "training_step_time": 0.1053466796875 + }, + { + "epoch": 3.30963134765625e-06, + "model_forward_time": 0.027341842651367188, + "step": 2169 + }, + { + "epoch": 3.30963134765625e-06, + "step": 2169, + "training_step_time": 0.10728335380554199 + }, + { + "epoch": 3.3111572265625e-06, + "grad_norm": 0.7859796285629272, + "learning_rate": 9.986369800157242e-05, + "loss": 0.1524, + "step": 2170 + }, + { + "epoch": 3.3111572265625e-06, + "model_forward_time": 0.025215625762939453, + "step": 2170 + }, + { + "epoch": 3.3111572265625e-06, + "step": 2170, + "training_step_time": 0.10550832748413086 + }, + { + "epoch": 3.31268310546875e-06, + "model_forward_time": 0.025255680084228516, + "step": 2171 + }, + { + "epoch": 3.31268310546875e-06, + "step": 2171, + "training_step_time": 0.10847997665405273 + }, + { + "epoch": 3.314208984375e-06, + "model_forward_time": 0.025992393493652344, + "step": 2172 + }, + { + "epoch": 3.314208984375e-06, + "step": 2172, + "training_step_time": 0.10549521446228027 + }, + { + "epoch": 3.31573486328125e-06, + "model_forward_time": 0.025362014770507812, + "step": 2173 + }, + { + "epoch": 3.31573486328125e-06, + "step": 2173, + "training_step_time": 0.10432863235473633 + }, + { + "epoch": 3.3172607421875e-06, + "model_forward_time": 0.025313854217529297, + "step": 2174 + }, + { + "epoch": 3.3172607421875e-06, + "step": 2174, + "training_step_time": 0.10533928871154785 + }, + { + "epoch": 3.31878662109375e-06, + "model_forward_time": 0.025109052658081055, + "step": 2175 + }, + { + "epoch": 3.31878662109375e-06, + "step": 2175, + "training_step_time": 0.10478687286376953 + }, + { + "epoch": 3.3203125e-06, + "model_forward_time": 0.025460243225097656, + "step": 2176 + }, + { + "epoch": 3.3203125e-06, + "step": 2176, + "training_step_time": 0.10328149795532227 + }, + { + "epoch": 3.32183837890625e-06, + "model_forward_time": 0.025415420532226562, + "step": 2177 + }, + { + "epoch": 3.32183837890625e-06, + "step": 2177, + "training_step_time": 0.10247683525085449 + }, + { + "epoch": 3.3233642578125e-06, + "model_forward_time": 0.02520132064819336, + "step": 2178 + }, + { + "epoch": 3.3233642578125e-06, + "step": 2178, + "training_step_time": 0.10479617118835449 + }, + { + "epoch": 3.32489013671875e-06, + "model_forward_time": 0.025570154190063477, + "step": 2179 + }, + { + "epoch": 3.32489013671875e-06, + "step": 2179, + "training_step_time": 0.10556745529174805 + }, + { + "epoch": 3.326416015625e-06, + "grad_norm": 0.827404797077179, + "learning_rate": 9.985960084106682e-05, + "loss": 0.1413, + "step": 2180 + }, + { + "epoch": 3.326416015625e-06, + "model_forward_time": 0.027471542358398438, + "step": 2180 + }, + { + "epoch": 3.326416015625e-06, + "step": 2180, + "training_step_time": 0.10951590538024902 + }, + { + "epoch": 3.32794189453125e-06, + "model_forward_time": 0.026354551315307617, + "step": 2181 + }, + { + "epoch": 3.32794189453125e-06, + "step": 2181, + "training_step_time": 0.10940313339233398 + }, + { + "epoch": 3.3294677734375e-06, + "model_forward_time": 0.025063514709472656, + "step": 2182 + }, + { + "epoch": 3.3294677734375e-06, + "step": 2182, + "training_step_time": 0.13629531860351562 + }, + { + "epoch": 3.33099365234375e-06, + "model_forward_time": 0.025477170944213867, + "step": 2183 + }, + { + "epoch": 3.33099365234375e-06, + "step": 2183, + "training_step_time": 0.1177668571472168 + }, + { + "epoch": 3.33251953125e-06, + "model_forward_time": 0.025032997131347656, + "step": 2184 + }, + { + "epoch": 3.33251953125e-06, + "step": 2184, + "training_step_time": 0.20606446266174316 + }, + { + "epoch": 3.33404541015625e-06, + "model_forward_time": 0.02445077896118164, + "step": 2185 + }, + { + "epoch": 3.33404541015625e-06, + "step": 2185, + "training_step_time": 0.13168716430664062 + }, + { + "epoch": 3.3355712890625e-06, + "model_forward_time": 0.02429485321044922, + "step": 2186 + }, + { + "epoch": 3.3355712890625e-06, + "step": 2186, + "training_step_time": 0.22959256172180176 + }, + { + "epoch": 3.33709716796875e-06, + "model_forward_time": 0.024776935577392578, + "step": 2187 + }, + { + "epoch": 3.33709716796875e-06, + "step": 2187, + "training_step_time": 0.12503457069396973 + }, + { + "epoch": 3.338623046875e-06, + "model_forward_time": 0.02451467514038086, + "step": 2188 + }, + { + "epoch": 3.338623046875e-06, + "step": 2188, + "training_step_time": 0.12317419052124023 + }, + { + "epoch": 3.34014892578125e-06, + "model_forward_time": 0.02436065673828125, + "step": 2189 + }, + { + "epoch": 3.34014892578125e-06, + "step": 2189, + "training_step_time": 0.1925981044769287 + }, + { + "epoch": 3.3416748046875e-06, + "grad_norm": 0.7417447566986084, + "learning_rate": 9.985544309644475e-05, + "loss": 0.1272, + "step": 2190 + }, + { + "epoch": 3.3416748046875e-06, + "model_forward_time": 0.024759769439697266, + "step": 2190 + }, + { + "epoch": 3.3416748046875e-06, + "step": 2190, + "training_step_time": 0.10996413230895996 + }, + { + "epoch": 3.34320068359375e-06, + "model_forward_time": 0.02462005615234375, + "step": 2191 + }, + { + "epoch": 3.34320068359375e-06, + "step": 2191, + "training_step_time": 0.1260089874267578 + }, + { + "epoch": 3.3447265625e-06, + "model_forward_time": 0.025873899459838867, + "step": 2192 + }, + { + "epoch": 3.3447265625e-06, + "step": 2192, + "training_step_time": 0.1339128017425537 + }, + { + "epoch": 3.34625244140625e-06, + "model_forward_time": 0.025289535522460938, + "step": 2193 + }, + { + "epoch": 3.34625244140625e-06, + "step": 2193, + "training_step_time": 0.11606144905090332 + }, + { + "epoch": 3.3477783203125e-06, + "model_forward_time": 0.02546095848083496, + "step": 2194 + }, + { + "epoch": 3.3477783203125e-06, + "step": 2194, + "training_step_time": 0.11260271072387695 + }, + { + "epoch": 3.34930419921875e-06, + "model_forward_time": 0.025516748428344727, + "step": 2195 + }, + { + "epoch": 3.34930419921875e-06, + "step": 2195, + "training_step_time": 0.11277151107788086 + }, + { + "epoch": 3.350830078125e-06, + "model_forward_time": 0.025420188903808594, + "step": 2196 + }, + { + "epoch": 3.350830078125e-06, + "step": 2196, + "training_step_time": 0.10816264152526855 + }, + { + "epoch": 3.35235595703125e-06, + "model_forward_time": 0.025388717651367188, + "step": 2197 + }, + { + "epoch": 3.35235595703125e-06, + "step": 2197, + "training_step_time": 0.19941139221191406 + }, + { + "epoch": 3.3538818359375e-06, + "model_forward_time": 0.024378538131713867, + "step": 2198 + }, + { + "epoch": 3.3538818359375e-06, + "step": 2198, + "training_step_time": 0.10484576225280762 + }, + { + "epoch": 3.35540771484375e-06, + "model_forward_time": 0.02456188201904297, + "step": 2199 + }, + { + "epoch": 3.35540771484375e-06, + "step": 2199, + "training_step_time": 0.10401391983032227 + }, + { + "epoch": 3.35693359375e-06, + "grad_norm": 0.6290052533149719, + "learning_rate": 9.985122477275824e-05, + "loss": 0.1247, + "step": 2200 + }, + { + "epoch": 3.35693359375e-06, + "model_forward_time": 0.025468826293945312, + "step": 2200 + }, + { + "epoch": 3.35693359375e-06, + "step": 2200, + "training_step_time": 0.10952520370483398 + }, + { + "epoch": 3.35845947265625e-06, + "model_forward_time": 0.027116775512695312, + "step": 2201 + }, + { + "epoch": 3.35845947265625e-06, + "step": 2201, + "training_step_time": 0.11272716522216797 + }, + { + "epoch": 3.3599853515625e-06, + "model_forward_time": 0.025820016860961914, + "step": 2202 + }, + { + "epoch": 3.3599853515625e-06, + "step": 2202, + "training_step_time": 0.21257948875427246 + }, + { + "epoch": 3.36151123046875e-06, + "model_forward_time": 0.02475452423095703, + "step": 2203 + }, + { + "epoch": 3.36151123046875e-06, + "step": 2203, + "training_step_time": 0.1158289909362793 + }, + { + "epoch": 3.363037109375e-06, + "model_forward_time": 0.02440643310546875, + "step": 2204 + }, + { + "epoch": 3.363037109375e-06, + "step": 2204, + "training_step_time": 0.10750460624694824 + }, + { + "epoch": 3.36456298828125e-06, + "model_forward_time": 0.02592754364013672, + "step": 2205 + }, + { + "epoch": 3.36456298828125e-06, + "step": 2205, + "training_step_time": 0.10721445083618164 + }, + { + "epoch": 3.3660888671875e-06, + "model_forward_time": 0.02557206153869629, + "step": 2206 + }, + { + "epoch": 3.3660888671875e-06, + "step": 2206, + "training_step_time": 0.11526727676391602 + }, + { + "epoch": 3.36761474609375e-06, + "model_forward_time": 0.02530694007873535, + "step": 2207 + }, + { + "epoch": 3.36761474609375e-06, + "step": 2207, + "training_step_time": 0.11032295227050781 + }, + { + "epoch": 3.369140625e-06, + "model_forward_time": 0.025232791900634766, + "step": 2208 + }, + { + "epoch": 3.369140625e-06, + "step": 2208, + "training_step_time": 0.11005616188049316 + }, + { + "epoch": 3.37066650390625e-06, + "model_forward_time": 0.02534008026123047, + "step": 2209 + }, + { + "epoch": 3.37066650390625e-06, + "step": 2209, + "training_step_time": 0.10846734046936035 + }, + { + "epoch": 3.3721923828125e-06, + "grad_norm": 1.0931735038757324, + "learning_rate": 9.984694587513298e-05, + "loss": 0.1413, + "step": 2210 + }, + { + "epoch": 3.3721923828125e-06, + "model_forward_time": 0.025806665420532227, + "step": 2210 + }, + { + "epoch": 3.3721923828125e-06, + "step": 2210, + "training_step_time": 0.1068124771118164 + }, + { + "epoch": 3.37371826171875e-06, + "model_forward_time": 0.02540755271911621, + "step": 2211 + }, + { + "epoch": 3.37371826171875e-06, + "step": 2211, + "training_step_time": 0.10565328598022461 + }, + { + "epoch": 3.375244140625e-06, + "model_forward_time": 0.025311708450317383, + "step": 2212 + }, + { + "epoch": 3.375244140625e-06, + "step": 2212, + "training_step_time": 0.10906720161437988 + }, + { + "epoch": 3.37677001953125e-06, + "model_forward_time": 0.025380373001098633, + "step": 2213 + }, + { + "epoch": 3.37677001953125e-06, + "step": 2213, + "training_step_time": 0.1071782112121582 + }, + { + "epoch": 3.3782958984375e-06, + "model_forward_time": 0.025197982788085938, + "step": 2214 + }, + { + "epoch": 3.3782958984375e-06, + "step": 2214, + "training_step_time": 0.10754251480102539 + }, + { + "epoch": 3.37982177734375e-06, + "model_forward_time": 0.025327205657958984, + "step": 2215 + }, + { + "epoch": 3.37982177734375e-06, + "step": 2215, + "training_step_time": 0.10689949989318848 + }, + { + "epoch": 3.38134765625e-06, + "model_forward_time": 0.02474236488342285, + "step": 2216 + }, + { + "epoch": 3.38134765625e-06, + "step": 2216, + "training_step_time": 0.10626339912414551 + }, + { + "epoch": 3.38287353515625e-06, + "model_forward_time": 0.025862693786621094, + "step": 2217 + }, + { + "epoch": 3.38287353515625e-06, + "step": 2217, + "training_step_time": 0.11328125 + }, + { + "epoch": 3.3843994140625e-06, + "model_forward_time": 0.02625441551208496, + "step": 2218 + }, + { + "epoch": 3.3843994140625e-06, + "step": 2218, + "training_step_time": 0.1129457950592041 + }, + { + "epoch": 3.38592529296875e-06, + "model_forward_time": 0.02501845359802246, + "step": 2219 + }, + { + "epoch": 3.38592529296875e-06, + "step": 2219, + "training_step_time": 0.10761117935180664 + }, + { + "epoch": 3.387451171875e-06, + "grad_norm": 0.6803141236305237, + "learning_rate": 9.984260640876821e-05, + "loss": 0.1228, + "step": 2220 + }, + { + "epoch": 3.387451171875e-06, + "model_forward_time": 0.024636030197143555, + "step": 2220 + }, + { + "epoch": 3.387451171875e-06, + "step": 2220, + "training_step_time": 0.10845708847045898 + }, + { + "epoch": 3.38897705078125e-06, + "model_forward_time": 0.024316072463989258, + "step": 2221 + }, + { + "epoch": 3.38897705078125e-06, + "step": 2221, + "training_step_time": 0.11272144317626953 + }, + { + "epoch": 3.3905029296875e-06, + "model_forward_time": 0.02524542808532715, + "step": 2222 + }, + { + "epoch": 3.3905029296875e-06, + "step": 2222, + "training_step_time": 0.11137151718139648 + }, + { + "epoch": 3.39202880859375e-06, + "model_forward_time": 0.024283170700073242, + "step": 2223 + }, + { + "epoch": 3.39202880859375e-06, + "step": 2223, + "training_step_time": 0.10985541343688965 + }, + { + "epoch": 3.3935546875e-06, + "model_forward_time": 0.025202035903930664, + "step": 2224 + }, + { + "epoch": 3.3935546875e-06, + "step": 2224, + "training_step_time": 0.10822153091430664 + }, + { + "epoch": 3.39508056640625e-06, + "model_forward_time": 0.024516820907592773, + "step": 2225 + }, + { + "epoch": 3.39508056640625e-06, + "step": 2225, + "training_step_time": 0.1114501953125 + }, + { + "epoch": 3.3966064453125e-06, + "model_forward_time": 0.025405168533325195, + "step": 2226 + }, + { + "epoch": 3.3966064453125e-06, + "step": 2226, + "training_step_time": 0.10735940933227539 + }, + { + "epoch": 3.39813232421875e-06, + "model_forward_time": 0.026009321212768555, + "step": 2227 + }, + { + "epoch": 3.39813232421875e-06, + "step": 2227, + "training_step_time": 0.10767865180969238 + }, + { + "epoch": 3.399658203125e-06, + "model_forward_time": 0.025368690490722656, + "step": 2228 + }, + { + "epoch": 3.399658203125e-06, + "step": 2228, + "training_step_time": 0.18982911109924316 + }, + { + "epoch": 3.40118408203125e-06, + "model_forward_time": 0.024901151657104492, + "step": 2229 + }, + { + "epoch": 3.40118408203125e-06, + "step": 2229, + "training_step_time": 0.10494637489318848 + }, + { + "epoch": 3.4027099609375e-06, + "grad_norm": 0.6757562160491943, + "learning_rate": 9.983820637893681e-05, + "loss": 0.144, + "step": 2230 + }, + { + "epoch": 3.4027099609375e-06, + "model_forward_time": 0.024393558502197266, + "step": 2230 + }, + { + "epoch": 3.4027099609375e-06, + "step": 2230, + "training_step_time": 0.16824698448181152 + }, + { + "epoch": 3.40423583984375e-06, + "model_forward_time": 0.024935245513916016, + "step": 2231 + }, + { + "epoch": 3.40423583984375e-06, + "step": 2231, + "training_step_time": 0.1375739574432373 + }, + { + "epoch": 3.40576171875e-06, + "model_forward_time": 0.0245206356048584, + "step": 2232 + }, + { + "epoch": 3.40576171875e-06, + "step": 2232, + "training_step_time": 0.2156391143798828 + }, + { + "epoch": 3.40728759765625e-06, + "model_forward_time": 0.024553537368774414, + "step": 2233 + }, + { + "epoch": 3.40728759765625e-06, + "step": 2233, + "training_step_time": 0.1231074333190918 + }, + { + "epoch": 3.4088134765625e-06, + "model_forward_time": 0.024496793746948242, + "step": 2234 + }, + { + "epoch": 3.4088134765625e-06, + "step": 2234, + "training_step_time": 0.11021804809570312 + }, + { + "epoch": 3.41033935546875e-06, + "model_forward_time": 0.025335073471069336, + "step": 2235 + }, + { + "epoch": 3.41033935546875e-06, + "step": 2235, + "training_step_time": 0.12230038642883301 + }, + { + "epoch": 3.411865234375e-06, + "model_forward_time": 0.025592565536499023, + "step": 2236 + }, + { + "epoch": 3.411865234375e-06, + "step": 2236, + "training_step_time": 0.10744237899780273 + }, + { + "epoch": 3.41339111328125e-06, + "model_forward_time": 0.025238752365112305, + "step": 2237 + }, + { + "epoch": 3.41339111328125e-06, + "step": 2237, + "training_step_time": 0.12774324417114258 + }, + { + "epoch": 3.4149169921875e-06, + "model_forward_time": 0.02513885498046875, + "step": 2238 + }, + { + "epoch": 3.4149169921875e-06, + "step": 2238, + "training_step_time": 0.15024566650390625 + }, + { + "epoch": 3.41644287109375e-06, + "model_forward_time": 0.02674412727355957, + "step": 2239 + }, + { + "epoch": 3.41644287109375e-06, + "step": 2239, + "training_step_time": 0.13843321800231934 + }, + { + "epoch": 3.41796875e-06, + "grad_norm": 0.9230808615684509, + "learning_rate": 9.983374579098523e-05, + "loss": 0.1256, + "step": 2240 + }, + { + "epoch": 3.41796875e-06, + "model_forward_time": 0.024523496627807617, + "step": 2240 + }, + { + "epoch": 3.41796875e-06, + "step": 2240, + "training_step_time": 0.20284485816955566 + }, + { + "epoch": 3.41949462890625e-06, + "model_forward_time": 0.024039268493652344, + "step": 2241 + }, + { + "epoch": 3.41949462890625e-06, + "step": 2241, + "training_step_time": 0.15082240104675293 + }, + { + "epoch": 3.4210205078125e-06, + "model_forward_time": 0.024088621139526367, + "step": 2242 + }, + { + "epoch": 3.4210205078125e-06, + "step": 2242, + "training_step_time": 0.17657041549682617 + }, + { + "epoch": 3.42254638671875e-06, + "model_forward_time": 0.02451014518737793, + "step": 2243 + }, + { + "epoch": 3.42254638671875e-06, + "step": 2243, + "training_step_time": 0.1265413761138916 + }, + { + "epoch": 3.424072265625e-06, + "model_forward_time": 0.02435922622680664, + "step": 2244 + }, + { + "epoch": 3.424072265625e-06, + "step": 2244, + "training_step_time": 0.11759614944458008 + }, + { + "epoch": 3.42559814453125e-06, + "model_forward_time": 0.024877309799194336, + "step": 2245 + }, + { + "epoch": 3.42559814453125e-06, + "step": 2245, + "training_step_time": 0.12009263038635254 + }, + { + "epoch": 3.4271240234375e-06, + "model_forward_time": 0.025249719619750977, + "step": 2246 + }, + { + "epoch": 3.4271240234375e-06, + "step": 2246, + "training_step_time": 0.10747218132019043 + }, + { + "epoch": 3.42864990234375e-06, + "model_forward_time": 0.025400638580322266, + "step": 2247 + }, + { + "epoch": 3.42864990234375e-06, + "step": 2247, + "training_step_time": 0.10975098609924316 + }, + { + "epoch": 3.43017578125e-06, + "model_forward_time": 0.025346994400024414, + "step": 2248 + }, + { + "epoch": 3.43017578125e-06, + "step": 2248, + "training_step_time": 0.12213873863220215 + }, + { + "epoch": 3.43170166015625e-06, + "model_forward_time": 0.025269746780395508, + "step": 2249 + }, + { + "epoch": 3.43170166015625e-06, + "step": 2249, + "training_step_time": 0.11492919921875 + }, + { + "epoch": 3.4332275390625e-06, + "grad_norm": 1.0198084115982056, + "learning_rate": 9.98292246503335e-05, + "loss": 0.1454, + "step": 2250 + }, + { + "epoch": 3.4332275390625e-06, + "model_forward_time": 0.02577352523803711, + "step": 2250 + }, + { + "epoch": 3.4332275390625e-06, + "step": 2250, + "training_step_time": 0.15647268295288086 + }, + { + "epoch": 3.43475341796875e-06, + "model_forward_time": 0.025000810623168945, + "step": 2251 + }, + { + "epoch": 3.43475341796875e-06, + "step": 2251, + "training_step_time": 0.1584458351135254 + }, + { + "epoch": 3.436279296875e-06, + "model_forward_time": 0.024669170379638672, + "step": 2252 + }, + { + "epoch": 3.436279296875e-06, + "step": 2252, + "training_step_time": 0.12378954887390137 + }, + { + "epoch": 3.43780517578125e-06, + "model_forward_time": 0.02539229393005371, + "step": 2253 + }, + { + "epoch": 3.43780517578125e-06, + "step": 2253, + "training_step_time": 0.10535311698913574 + }, + { + "epoch": 3.4393310546875e-06, + "model_forward_time": 0.025667428970336914, + "step": 2254 + }, + { + "epoch": 3.4393310546875e-06, + "step": 2254, + "training_step_time": 0.1070563793182373 + }, + { + "epoch": 3.44085693359375e-06, + "model_forward_time": 0.024979114532470703, + "step": 2255 + }, + { + "epoch": 3.44085693359375e-06, + "step": 2255, + "training_step_time": 0.10921239852905273 + }, + { + "epoch": 3.4423828125e-06, + "model_forward_time": 0.025542259216308594, + "step": 2256 + }, + { + "epoch": 3.4423828125e-06, + "step": 2256, + "training_step_time": 0.10881543159484863 + }, + { + "epoch": 3.44390869140625e-06, + "model_forward_time": 0.02544093132019043, + "step": 2257 + }, + { + "epoch": 3.44390869140625e-06, + "step": 2257, + "training_step_time": 0.10526108741760254 + }, + { + "epoch": 3.4454345703125e-06, + "model_forward_time": 0.02520608901977539, + "step": 2258 + }, + { + "epoch": 3.4454345703125e-06, + "step": 2258, + "training_step_time": 0.10660457611083984 + }, + { + "epoch": 3.44696044921875e-06, + "model_forward_time": 0.025272607803344727, + "step": 2259 + }, + { + "epoch": 3.44696044921875e-06, + "step": 2259, + "training_step_time": 0.10548901557922363 + }, + { + "epoch": 3.448486328125e-06, + "grad_norm": 0.7208828330039978, + "learning_rate": 9.982464296247522e-05, + "loss": 0.1272, + "step": 2260 + }, + { + "epoch": 3.448486328125e-06, + "model_forward_time": 0.025158405303955078, + "step": 2260 + }, + { + "epoch": 3.448486328125e-06, + "step": 2260, + "training_step_time": 0.10584449768066406 + }, + { + "epoch": 3.45001220703125e-06, + "model_forward_time": 0.028775453567504883, + "step": 2261 + }, + { + "epoch": 3.45001220703125e-06, + "step": 2261, + "training_step_time": 0.11305022239685059 + }, + { + "epoch": 3.4515380859375e-06, + "model_forward_time": 0.026183366775512695, + "step": 2262 + }, + { + "epoch": 3.4515380859375e-06, + "step": 2262, + "training_step_time": 0.1086130142211914 + }, + { + "epoch": 3.45306396484375e-06, + "model_forward_time": 0.0251772403717041, + "step": 2263 + }, + { + "epoch": 3.45306396484375e-06, + "step": 2263, + "training_step_time": 0.10745835304260254 + }, + { + "epoch": 3.45458984375e-06, + "model_forward_time": 0.02534031867980957, + "step": 2264 + }, + { + "epoch": 3.45458984375e-06, + "step": 2264, + "training_step_time": 0.10597467422485352 + }, + { + "epoch": 3.45611572265625e-06, + "model_forward_time": 0.024297714233398438, + "step": 2265 + }, + { + "epoch": 3.45611572265625e-06, + "step": 2265, + "training_step_time": 0.1075892448425293 + }, + { + "epoch": 3.4576416015625e-06, + "model_forward_time": 0.02575373649597168, + "step": 2266 + }, + { + "epoch": 3.4576416015625e-06, + "step": 2266, + "training_step_time": 0.1070547103881836 + }, + { + "epoch": 3.45916748046875e-06, + "model_forward_time": 0.025467634201049805, + "step": 2267 + }, + { + "epoch": 3.45916748046875e-06, + "step": 2267, + "training_step_time": 0.10497355461120605 + }, + { + "epoch": 3.460693359375e-06, + "model_forward_time": 0.025322675704956055, + "step": 2268 + }, + { + "epoch": 3.460693359375e-06, + "step": 2268, + "training_step_time": 0.10626578330993652 + }, + { + "epoch": 3.46221923828125e-06, + "model_forward_time": 0.025413036346435547, + "step": 2269 + }, + { + "epoch": 3.46221923828125e-06, + "step": 2269, + "training_step_time": 0.11072230339050293 + }, + { + "epoch": 3.4637451171875e-06, + "grad_norm": 0.763227105140686, + "learning_rate": 9.982000073297759e-05, + "loss": 0.1287, + "step": 2270 + }, + { + "epoch": 3.4637451171875e-06, + "model_forward_time": 0.025361299514770508, + "step": 2270 + }, + { + "epoch": 3.4637451171875e-06, + "step": 2270, + "training_step_time": 0.11115837097167969 + }, + { + "epoch": 3.46527099609375e-06, + "model_forward_time": 0.02540302276611328, + "step": 2271 + }, + { + "epoch": 3.46527099609375e-06, + "step": 2271, + "training_step_time": 0.10826802253723145 + }, + { + "epoch": 3.466796875e-06, + "model_forward_time": 0.025376319885253906, + "step": 2272 + }, + { + "epoch": 3.466796875e-06, + "step": 2272, + "training_step_time": 0.10554218292236328 + }, + { + "epoch": 3.46832275390625e-06, + "model_forward_time": 0.02539539337158203, + "step": 2273 + }, + { + "epoch": 3.46832275390625e-06, + "step": 2273, + "training_step_time": 0.19907760620117188 + }, + { + "epoch": 3.4698486328125e-06, + "model_forward_time": 0.024342060089111328, + "step": 2274 + }, + { + "epoch": 3.4698486328125e-06, + "step": 2274, + "training_step_time": 0.11154818534851074 + }, + { + "epoch": 3.47137451171875e-06, + "model_forward_time": 0.024457931518554688, + "step": 2275 + }, + { + "epoch": 3.47137451171875e-06, + "step": 2275, + "training_step_time": 0.10834741592407227 + }, + { + "epoch": 3.472900390625e-06, + "model_forward_time": 0.025557756423950195, + "step": 2276 + }, + { + "epoch": 3.472900390625e-06, + "step": 2276, + "training_step_time": 0.12626862525939941 + }, + { + "epoch": 3.47442626953125e-06, + "model_forward_time": 0.0280759334564209, + "step": 2277 + }, + { + "epoch": 3.47442626953125e-06, + "step": 2277, + "training_step_time": 0.1174774169921875 + }, + { + "epoch": 3.4759521484375e-06, + "model_forward_time": 0.0254974365234375, + "step": 2278 + }, + { + "epoch": 3.4759521484375e-06, + "step": 2278, + "training_step_time": 0.216264009475708 + }, + { + "epoch": 3.47747802734375e-06, + "model_forward_time": 0.02453756332397461, + "step": 2279 + }, + { + "epoch": 3.47747802734375e-06, + "step": 2279, + "training_step_time": 0.15039634704589844 + }, + { + "epoch": 3.47900390625e-06, + "grad_norm": 0.7737330794334412, + "learning_rate": 9.981529796748134e-05, + "loss": 0.1356, + "step": 2280 + }, + { + "epoch": 3.47900390625e-06, + "model_forward_time": 0.02416515350341797, + "step": 2280 + }, + { + "epoch": 3.47900390625e-06, + "step": 2280, + "training_step_time": 0.10844302177429199 + }, + { + "epoch": 3.48052978515625e-06, + "model_forward_time": 0.024841785430908203, + "step": 2281 + }, + { + "epoch": 3.48052978515625e-06, + "step": 2281, + "training_step_time": 0.12086176872253418 + }, + { + "epoch": 3.4820556640625e-06, + "model_forward_time": 0.025487184524536133, + "step": 2282 + }, + { + "epoch": 3.4820556640625e-06, + "step": 2282, + "training_step_time": 0.10951948165893555 + }, + { + "epoch": 3.48358154296875e-06, + "model_forward_time": 0.025606870651245117, + "step": 2283 + }, + { + "epoch": 3.48358154296875e-06, + "step": 2283, + "training_step_time": 0.17801833152770996 + }, + { + "epoch": 3.485107421875e-06, + "model_forward_time": 0.02459120750427246, + "step": 2284 + }, + { + "epoch": 3.485107421875e-06, + "step": 2284, + "training_step_time": 0.1344912052154541 + }, + { + "epoch": 3.48663330078125e-06, + "model_forward_time": 0.02458024024963379, + "step": 2285 + }, + { + "epoch": 3.48663330078125e-06, + "step": 2285, + "training_step_time": 0.1105201244354248 + }, + { + "epoch": 3.4881591796875e-06, + "model_forward_time": 0.025744915008544922, + "step": 2286 + }, + { + "epoch": 3.4881591796875e-06, + "step": 2286, + "training_step_time": 0.11853241920471191 + }, + { + "epoch": 3.48968505859375e-06, + "model_forward_time": 0.02599811553955078, + "step": 2287 + }, + { + "epoch": 3.48968505859375e-06, + "step": 2287, + "training_step_time": 0.11443853378295898 + }, + { + "epoch": 3.4912109375e-06, + "model_forward_time": 0.02582263946533203, + "step": 2288 + }, + { + "epoch": 3.4912109375e-06, + "step": 2288, + "training_step_time": 0.15849065780639648 + }, + { + "epoch": 3.49273681640625e-06, + "model_forward_time": 0.02512645721435547, + "step": 2289 + }, + { + "epoch": 3.49273681640625e-06, + "step": 2289, + "training_step_time": 0.1483144760131836 + }, + { + "epoch": 3.4942626953125e-06, + "grad_norm": 0.7489446401596069, + "learning_rate": 9.98105346717008e-05, + "loss": 0.1545, + "step": 2290 + }, + { + "epoch": 3.4942626953125e-06, + "model_forward_time": 0.02517223358154297, + "step": 2290 + }, + { + "epoch": 3.4942626953125e-06, + "step": 2290, + "training_step_time": 0.10528326034545898 + }, + { + "epoch": 3.49578857421875e-06, + "model_forward_time": 0.025907278060913086, + "step": 2291 + }, + { + "epoch": 3.49578857421875e-06, + "step": 2291, + "training_step_time": 0.11009526252746582 + }, + { + "epoch": 3.497314453125e-06, + "model_forward_time": 0.02588486671447754, + "step": 2292 + }, + { + "epoch": 3.497314453125e-06, + "step": 2292, + "training_step_time": 0.10511541366577148 + }, + { + "epoch": 3.49884033203125e-06, + "model_forward_time": 0.02510356903076172, + "step": 2293 + }, + { + "epoch": 3.49884033203125e-06, + "step": 2293, + "training_step_time": 0.10793161392211914 + }, + { + "epoch": 3.5003662109375e-06, + "model_forward_time": 0.025310277938842773, + "step": 2294 + }, + { + "epoch": 3.5003662109375e-06, + "step": 2294, + "training_step_time": 0.1234273910522461 + }, + { + "epoch": 3.50189208984375e-06, + "model_forward_time": 0.025749921798706055, + "step": 2295 + }, + { + "epoch": 3.50189208984375e-06, + "step": 2295, + "training_step_time": 0.11373019218444824 + }, + { + "epoch": 3.50341796875e-06, + "model_forward_time": 0.025767803192138672, + "step": 2296 + }, + { + "epoch": 3.50341796875e-06, + "step": 2296, + "training_step_time": 0.10607266426086426 + }, + { + "epoch": 3.50494384765625e-06, + "model_forward_time": 0.025495529174804688, + "step": 2297 + }, + { + "epoch": 3.50494384765625e-06, + "step": 2297, + "training_step_time": 0.2454214096069336 + }, + { + "epoch": 3.5064697265625e-06, + "model_forward_time": 0.025356531143188477, + "step": 2298 + }, + { + "epoch": 3.5064697265625e-06, + "step": 2298, + "training_step_time": 0.2084031105041504 + }, + { + "epoch": 3.50799560546875e-06, + "model_forward_time": 0.025348424911499023, + "step": 2299 + }, + { + "epoch": 3.50799560546875e-06, + "step": 2299, + "training_step_time": 0.2015683650970459 + }, + { + "epoch": 3.509521484375e-06, + "grad_norm": 0.6070877313613892, + "learning_rate": 9.980571085142381e-05, + "loss": 0.1133, + "step": 2300 + }, + { + "epoch": 3.509521484375e-06, + "model_forward_time": 0.024828433990478516, + "step": 2300 + }, + { + "epoch": 3.509521484375e-06, + "step": 2300, + "training_step_time": 0.19903993606567383 + }, + { + "epoch": 3.51104736328125e-06, + "model_forward_time": 0.024506092071533203, + "step": 2301 + }, + { + "epoch": 3.51104736328125e-06, + "step": 2301, + "training_step_time": 0.1875004768371582 + }, + { + "epoch": 3.5125732421875e-06, + "model_forward_time": 0.024552583694458008, + "step": 2302 + }, + { + "epoch": 3.5125732421875e-06, + "step": 2302, + "training_step_time": 0.17716622352600098 + }, + { + "epoch": 3.51409912109375e-06, + "model_forward_time": 0.024951457977294922, + "step": 2303 + }, + { + "epoch": 3.51409912109375e-06, + "step": 2303, + "training_step_time": 0.16551423072814941 + }, + { + "epoch": 3.515625e-06, + "model_forward_time": 0.024672985076904297, + "step": 2304 + }, + { + "epoch": 3.515625e-06, + "step": 2304, + "training_step_time": 0.10959076881408691 + }, + { + "epoch": 3.51715087890625e-06, + "model_forward_time": 0.02474236488342285, + "step": 2305 + }, + { + "epoch": 3.51715087890625e-06, + "step": 2305, + "training_step_time": 0.10127520561218262 + }, + { + "epoch": 3.5186767578125e-06, + "model_forward_time": 0.025143146514892578, + "step": 2306 + }, + { + "epoch": 3.5186767578125e-06, + "step": 2306, + "training_step_time": 0.10676217079162598 + }, + { + "epoch": 3.52020263671875e-06, + "model_forward_time": 0.025380611419677734, + "step": 2307 + }, + { + "epoch": 3.52020263671875e-06, + "step": 2307, + "training_step_time": 0.10568928718566895 + }, + { + "epoch": 3.521728515625e-06, + "model_forward_time": 0.02559947967529297, + "step": 2308 + }, + { + "epoch": 3.521728515625e-06, + "step": 2308, + "training_step_time": 0.10421633720397949 + }, + { + "epoch": 3.52325439453125e-06, + "model_forward_time": 0.02534627914428711, + "step": 2309 + }, + { + "epoch": 3.52325439453125e-06, + "step": 2309, + "training_step_time": 0.10481667518615723 + }, + { + "epoch": 3.5247802734375e-06, + "grad_norm": 0.7714501023292542, + "learning_rate": 9.980082651251175e-05, + "loss": 0.1339, + "step": 2310 + }, + { + "epoch": 3.5247802734375e-06, + "model_forward_time": 0.02578425407409668, + "step": 2310 + }, + { + "epoch": 3.5247802734375e-06, + "step": 2310, + "training_step_time": 0.10922074317932129 + }, + { + "epoch": 3.52630615234375e-06, + "model_forward_time": 0.02527308464050293, + "step": 2311 + }, + { + "epoch": 3.52630615234375e-06, + "step": 2311, + "training_step_time": 0.10564994812011719 + }, + { + "epoch": 3.52783203125e-06, + "model_forward_time": 0.025030851364135742, + "step": 2312 + }, + { + "epoch": 3.52783203125e-06, + "step": 2312, + "training_step_time": 0.10576820373535156 + }, + { + "epoch": 3.52935791015625e-06, + "model_forward_time": 0.025517702102661133, + "step": 2313 + }, + { + "epoch": 3.52935791015625e-06, + "step": 2313, + "training_step_time": 0.10463762283325195 + }, + { + "epoch": 3.5308837890625e-06, + "model_forward_time": 0.025667905807495117, + "step": 2314 + }, + { + "epoch": 3.5308837890625e-06, + "step": 2314, + "training_step_time": 0.10563826560974121 + }, + { + "epoch": 3.53240966796875e-06, + "model_forward_time": 0.026885509490966797, + "step": 2315 + }, + { + "epoch": 3.53240966796875e-06, + "step": 2315, + "training_step_time": 0.11027240753173828 + }, + { + "epoch": 3.533935546875e-06, + "model_forward_time": 0.025712251663208008, + "step": 2316 + }, + { + "epoch": 3.533935546875e-06, + "step": 2316, + "training_step_time": 0.11579322814941406 + }, + { + "epoch": 3.53546142578125e-06, + "model_forward_time": 0.0249025821685791, + "step": 2317 + }, + { + "epoch": 3.53546142578125e-06, + "step": 2317, + "training_step_time": 0.11815404891967773 + }, + { + "epoch": 3.5369873046875e-06, + "model_forward_time": 0.025420665740966797, + "step": 2318 + }, + { + "epoch": 3.5369873046875e-06, + "step": 2318, + "training_step_time": 0.1580061912536621 + }, + { + "epoch": 3.53851318359375e-06, + "model_forward_time": 0.02613091468811035, + "step": 2319 + }, + { + "epoch": 3.53851318359375e-06, + "step": 2319, + "training_step_time": 0.15944457054138184 + }, + { + "epoch": 3.5400390625e-06, + "grad_norm": 1.6715880632400513, + "learning_rate": 9.979588166089958e-05, + "loss": 0.1376, + "step": 2320 + }, + { + "epoch": 3.5400390625e-06, + "model_forward_time": 0.024211645126342773, + "step": 2320 + }, + { + "epoch": 3.5400390625e-06, + "step": 2320, + "training_step_time": 0.19746780395507812 + }, + { + "epoch": 3.54156494140625e-06, + "model_forward_time": 0.02441716194152832, + "step": 2321 + }, + { + "epoch": 3.54156494140625e-06, + "step": 2321, + "training_step_time": 0.11595749855041504 + }, + { + "epoch": 3.5430908203125e-06, + "model_forward_time": 0.02441883087158203, + "step": 2322 + }, + { + "epoch": 3.5430908203125e-06, + "step": 2322, + "training_step_time": 0.19123101234436035 + }, + { + "epoch": 3.54461669921875e-06, + "model_forward_time": 0.02567887306213379, + "step": 2323 + }, + { + "epoch": 3.54461669921875e-06, + "step": 2323, + "training_step_time": 0.11079692840576172 + }, + { + "epoch": 3.546142578125e-06, + "model_forward_time": 0.024657249450683594, + "step": 2324 + }, + { + "epoch": 3.546142578125e-06, + "step": 2324, + "training_step_time": 0.10932445526123047 + }, + { + "epoch": 3.54766845703125e-06, + "model_forward_time": 0.025821924209594727, + "step": 2325 + }, + { + "epoch": 3.54766845703125e-06, + "step": 2325, + "training_step_time": 0.15426087379455566 + }, + { + "epoch": 3.5491943359375e-06, + "model_forward_time": 0.025013208389282227, + "step": 2326 + }, + { + "epoch": 3.5491943359375e-06, + "step": 2326, + "training_step_time": 0.13788342475891113 + }, + { + "epoch": 3.55072021484375e-06, + "model_forward_time": 0.024633169174194336, + "step": 2327 + }, + { + "epoch": 3.55072021484375e-06, + "step": 2327, + "training_step_time": 0.11156797409057617 + }, + { + "epoch": 3.55224609375e-06, + "model_forward_time": 0.025289297103881836, + "step": 2328 + }, + { + "epoch": 3.55224609375e-06, + "step": 2328, + "training_step_time": 0.12189078330993652 + }, + { + "epoch": 3.55377197265625e-06, + "model_forward_time": 0.02517104148864746, + "step": 2329 + }, + { + "epoch": 3.55377197265625e-06, + "step": 2329, + "training_step_time": 0.11261820793151855 + }, + { + "epoch": 3.5552978515625e-06, + "grad_norm": 0.6216801404953003, + "learning_rate": 9.979087630259572e-05, + "loss": 0.1565, + "step": 2330 + }, + { + "epoch": 3.5552978515625e-06, + "model_forward_time": 0.02546381950378418, + "step": 2330 + }, + { + "epoch": 3.5552978515625e-06, + "step": 2330, + "training_step_time": 0.18820929527282715 + }, + { + "epoch": 3.55682373046875e-06, + "model_forward_time": 0.025282621383666992, + "step": 2331 + }, + { + "epoch": 3.55682373046875e-06, + "step": 2331, + "training_step_time": 0.11551642417907715 + }, + { + "epoch": 3.558349609375e-06, + "model_forward_time": 0.025131940841674805, + "step": 2332 + }, + { + "epoch": 3.558349609375e-06, + "step": 2332, + "training_step_time": 0.10753035545349121 + }, + { + "epoch": 3.55987548828125e-06, + "model_forward_time": 0.02474665641784668, + "step": 2333 + }, + { + "epoch": 3.55987548828125e-06, + "step": 2333, + "training_step_time": 0.10717606544494629 + }, + { + "epoch": 3.5614013671875e-06, + "model_forward_time": 0.025811433792114258, + "step": 2334 + }, + { + "epoch": 3.5614013671875e-06, + "step": 2334, + "training_step_time": 0.10893607139587402 + }, + { + "epoch": 3.56292724609375e-06, + "model_forward_time": 0.02475714683532715, + "step": 2335 + }, + { + "epoch": 3.56292724609375e-06, + "step": 2335, + "training_step_time": 0.10801553726196289 + }, + { + "epoch": 3.564453125e-06, + "model_forward_time": 0.02489161491394043, + "step": 2336 + }, + { + "epoch": 3.564453125e-06, + "step": 2336, + "training_step_time": 0.11002802848815918 + }, + { + "epoch": 3.56597900390625e-06, + "model_forward_time": 0.025223970413208008, + "step": 2337 + }, + { + "epoch": 3.56597900390625e-06, + "step": 2337, + "training_step_time": 0.11621451377868652 + }, + { + "epoch": 3.5675048828125e-06, + "model_forward_time": 0.025515079498291016, + "step": 2338 + }, + { + "epoch": 3.5675048828125e-06, + "step": 2338, + "training_step_time": 0.1081857681274414 + }, + { + "epoch": 3.56903076171875e-06, + "model_forward_time": 0.025481700897216797, + "step": 2339 + }, + { + "epoch": 3.56903076171875e-06, + "step": 2339, + "training_step_time": 0.17309308052062988 + }, + { + "epoch": 3.570556640625e-06, + "grad_norm": 0.6606565117835999, + "learning_rate": 9.97858104436822e-05, + "loss": 0.131, + "step": 2340 + }, + { + "epoch": 3.570556640625e-06, + "model_forward_time": 0.025295257568359375, + "step": 2340 + }, + { + "epoch": 3.570556640625e-06, + "step": 2340, + "training_step_time": 0.15503239631652832 + }, + { + "epoch": 3.57208251953125e-06, + "model_forward_time": 0.024593353271484375, + "step": 2341 + }, + { + "epoch": 3.57208251953125e-06, + "step": 2341, + "training_step_time": 0.10154461860656738 + }, + { + "epoch": 3.5736083984375e-06, + "model_forward_time": 0.025056123733520508, + "step": 2342 + }, + { + "epoch": 3.5736083984375e-06, + "step": 2342, + "training_step_time": 0.10542845726013184 + }, + { + "epoch": 3.57513427734375e-06, + "model_forward_time": 0.02513575553894043, + "step": 2343 + }, + { + "epoch": 3.57513427734375e-06, + "step": 2343, + "training_step_time": 0.10832452774047852 + }, + { + "epoch": 3.57666015625e-06, + "model_forward_time": 0.025075435638427734, + "step": 2344 + }, + { + "epoch": 3.57666015625e-06, + "step": 2344, + "training_step_time": 0.10765814781188965 + }, + { + "epoch": 3.57818603515625e-06, + "model_forward_time": 0.025156259536743164, + "step": 2345 + }, + { + "epoch": 3.57818603515625e-06, + "step": 2345, + "training_step_time": 0.11246943473815918 + }, + { + "epoch": 3.5797119140625e-06, + "model_forward_time": 0.02586674690246582, + "step": 2346 + }, + { + "epoch": 3.5797119140625e-06, + "step": 2346, + "training_step_time": 0.10997605323791504 + }, + { + "epoch": 3.58123779296875e-06, + "model_forward_time": 0.025565385818481445, + "step": 2347 + }, + { + "epoch": 3.58123779296875e-06, + "step": 2347, + "training_step_time": 0.10850954055786133 + }, + { + "epoch": 3.582763671875e-06, + "model_forward_time": 0.02528238296508789, + "step": 2348 + }, + { + "epoch": 3.582763671875e-06, + "step": 2348, + "training_step_time": 0.10843420028686523 + }, + { + "epoch": 3.58428955078125e-06, + "model_forward_time": 0.02548694610595703, + "step": 2349 + }, + { + "epoch": 3.58428955078125e-06, + "step": 2349, + "training_step_time": 0.10651826858520508 + }, + { + "epoch": 3.5858154296875e-06, + "grad_norm": 0.6426587700843811, + "learning_rate": 9.978068409031449e-05, + "loss": 0.1354, + "step": 2350 + }, + { + "epoch": 3.5858154296875e-06, + "model_forward_time": 0.02540755271911621, + "step": 2350 + }, + { + "epoch": 3.5858154296875e-06, + "step": 2350, + "training_step_time": 0.10778188705444336 + }, + { + "epoch": 3.58734130859375e-06, + "model_forward_time": 0.025505542755126953, + "step": 2351 + }, + { + "epoch": 3.58734130859375e-06, + "step": 2351, + "training_step_time": 0.10970163345336914 + }, + { + "epoch": 3.5888671875e-06, + "model_forward_time": 0.025675296783447266, + "step": 2352 + }, + { + "epoch": 3.5888671875e-06, + "step": 2352, + "training_step_time": 0.11286282539367676 + }, + { + "epoch": 3.59039306640625e-06, + "model_forward_time": 0.027707815170288086, + "step": 2353 + }, + { + "epoch": 3.59039306640625e-06, + "step": 2353, + "training_step_time": 0.10751152038574219 + }, + { + "epoch": 3.5919189453125e-06, + "model_forward_time": 0.025429725646972656, + "step": 2354 + }, + { + "epoch": 3.5919189453125e-06, + "step": 2354, + "training_step_time": 0.10754513740539551 + }, + { + "epoch": 3.59344482421875e-06, + "model_forward_time": 0.025174379348754883, + "step": 2355 + }, + { + "epoch": 3.59344482421875e-06, + "step": 2355, + "training_step_time": 0.11274528503417969 + }, + { + "epoch": 3.594970703125e-06, + "model_forward_time": 0.025615215301513672, + "step": 2356 + }, + { + "epoch": 3.594970703125e-06, + "step": 2356, + "training_step_time": 0.11062955856323242 + }, + { + "epoch": 3.59649658203125e-06, + "model_forward_time": 0.025330781936645508, + "step": 2357 + }, + { + "epoch": 3.59649658203125e-06, + "step": 2357, + "training_step_time": 0.10631728172302246 + }, + { + "epoch": 3.5980224609375e-06, + "model_forward_time": 0.02548384666442871, + "step": 2358 + }, + { + "epoch": 3.5980224609375e-06, + "step": 2358, + "training_step_time": 0.10638141632080078 + }, + { + "epoch": 3.59954833984375e-06, + "model_forward_time": 0.0252530574798584, + "step": 2359 + }, + { + "epoch": 3.59954833984375e-06, + "step": 2359, + "training_step_time": 0.10704827308654785 + }, + { + "epoch": 3.60107421875e-06, + "grad_norm": 0.6929382681846619, + "learning_rate": 9.97754972487216e-05, + "loss": 0.1318, + "step": 2360 + }, + { + "epoch": 3.60107421875e-06, + "model_forward_time": 0.025241613388061523, + "step": 2360 + }, + { + "epoch": 3.60107421875e-06, + "step": 2360, + "training_step_time": 0.10992836952209473 + }, + { + "epoch": 3.60260009765625e-06, + "model_forward_time": 0.025150299072265625, + "step": 2361 + }, + { + "epoch": 3.60260009765625e-06, + "step": 2361, + "training_step_time": 0.21767568588256836 + }, + { + "epoch": 3.6041259765625e-06, + "model_forward_time": 0.024817228317260742, + "step": 2362 + }, + { + "epoch": 3.6041259765625e-06, + "step": 2362, + "training_step_time": 0.12064838409423828 + }, + { + "epoch": 3.60565185546875e-06, + "model_forward_time": 0.025087356567382812, + "step": 2363 + }, + { + "epoch": 3.60565185546875e-06, + "step": 2363, + "training_step_time": 0.1828005313873291 + }, + { + "epoch": 3.607177734375e-06, + "model_forward_time": 0.024841785430908203, + "step": 2364 + }, + { + "epoch": 3.607177734375e-06, + "step": 2364, + "training_step_time": 0.11818504333496094 + }, + { + "epoch": 3.60870361328125e-06, + "model_forward_time": 0.024193286895751953, + "step": 2365 + }, + { + "epoch": 3.60870361328125e-06, + "step": 2365, + "training_step_time": 0.22084712982177734 + }, + { + "epoch": 3.6102294921875e-06, + "model_forward_time": 0.02446603775024414, + "step": 2366 + }, + { + "epoch": 3.6102294921875e-06, + "step": 2366, + "training_step_time": 0.1521778106689453 + }, + { + "epoch": 3.61175537109375e-06, + "model_forward_time": 0.02464771270751953, + "step": 2367 + }, + { + "epoch": 3.61175537109375e-06, + "step": 2367, + "training_step_time": 0.10435271263122559 + }, + { + "epoch": 3.61328125e-06, + "model_forward_time": 0.025382518768310547, + "step": 2368 + }, + { + "epoch": 3.61328125e-06, + "step": 2368, + "training_step_time": 0.10529160499572754 + }, + { + "epoch": 3.61480712890625e-06, + "model_forward_time": 0.025074005126953125, + "step": 2369 + }, + { + "epoch": 3.61480712890625e-06, + "step": 2369, + "training_step_time": 0.11052441596984863 + }, + { + "epoch": 3.6163330078125e-06, + "grad_norm": 0.7236571311950684, + "learning_rate": 9.977024992520602e-05, + "loss": 0.1374, + "step": 2370 + }, + { + "epoch": 3.6163330078125e-06, + "model_forward_time": 0.026082515716552734, + "step": 2370 + }, + { + "epoch": 3.6163330078125e-06, + "step": 2370, + "training_step_time": 0.17008280754089355 + }, + { + "epoch": 3.61785888671875e-06, + "model_forward_time": 0.02504706382751465, + "step": 2371 + }, + { + "epoch": 3.61785888671875e-06, + "step": 2371, + "training_step_time": 0.17674708366394043 + }, + { + "epoch": 3.619384765625e-06, + "model_forward_time": 0.024468660354614258, + "step": 2372 + }, + { + "epoch": 3.619384765625e-06, + "step": 2372, + "training_step_time": 0.11211133003234863 + }, + { + "epoch": 3.62091064453125e-06, + "model_forward_time": 0.024601221084594727, + "step": 2373 + }, + { + "epoch": 3.62091064453125e-06, + "step": 2373, + "training_step_time": 0.11912894248962402 + }, + { + "epoch": 3.6224365234375e-06, + "model_forward_time": 0.025422096252441406, + "step": 2374 + }, + { + "epoch": 3.6224365234375e-06, + "step": 2374, + "training_step_time": 0.10901451110839844 + }, + { + "epoch": 3.62396240234375e-06, + "model_forward_time": 0.02536463737487793, + "step": 2375 + }, + { + "epoch": 3.62396240234375e-06, + "step": 2375, + "training_step_time": 0.11543488502502441 + }, + { + "epoch": 3.62548828125e-06, + "model_forward_time": 0.025476694107055664, + "step": 2376 + }, + { + "epoch": 3.62548828125e-06, + "step": 2376, + "training_step_time": 0.2022261619567871 + }, + { + "epoch": 3.62701416015625e-06, + "model_forward_time": 0.024479150772094727, + "step": 2377 + }, + { + "epoch": 3.62701416015625e-06, + "step": 2377, + "training_step_time": 0.10659217834472656 + }, + { + "epoch": 3.6285400390625e-06, + "model_forward_time": 0.02436518669128418, + "step": 2378 + }, + { + "epoch": 3.6285400390625e-06, + "step": 2378, + "training_step_time": 0.10633468627929688 + }, + { + "epoch": 3.63006591796875e-06, + "model_forward_time": 0.025379419326782227, + "step": 2379 + }, + { + "epoch": 3.63006591796875e-06, + "step": 2379, + "training_step_time": 0.10657954216003418 + }, + { + "epoch": 3.631591796875e-06, + "grad_norm": 0.518153190612793, + "learning_rate": 9.976494212614377e-05, + "loss": 0.131, + "step": 2380 + }, + { + "epoch": 3.631591796875e-06, + "model_forward_time": 0.024471759796142578, + "step": 2380 + }, + { + "epoch": 3.631591796875e-06, + "step": 2380, + "training_step_time": 0.10532784461975098 + }, + { + "epoch": 3.63311767578125e-06, + "model_forward_time": 0.024922847747802734, + "step": 2381 + }, + { + "epoch": 3.63311767578125e-06, + "step": 2381, + "training_step_time": 0.1154634952545166 + }, + { + "epoch": 3.6346435546875e-06, + "model_forward_time": 0.025488615036010742, + "step": 2382 + }, + { + "epoch": 3.6346435546875e-06, + "step": 2382, + "training_step_time": 0.10823822021484375 + }, + { + "epoch": 3.63616943359375e-06, + "model_forward_time": 0.025673627853393555, + "step": 2383 + }, + { + "epoch": 3.63616943359375e-06, + "step": 2383, + "training_step_time": 0.10618185997009277 + }, + { + "epoch": 3.6376953125e-06, + "model_forward_time": 0.026149511337280273, + "step": 2384 + }, + { + "epoch": 3.6376953125e-06, + "step": 2384, + "training_step_time": 0.11056303977966309 + }, + { + "epoch": 3.63922119140625e-06, + "model_forward_time": 0.02579784393310547, + "step": 2385 + }, + { + "epoch": 3.63922119140625e-06, + "step": 2385, + "training_step_time": 0.11134576797485352 + }, + { + "epoch": 3.6407470703125e-06, + "model_forward_time": 0.02525782585144043, + "step": 2386 + }, + { + "epoch": 3.6407470703125e-06, + "step": 2386, + "training_step_time": 0.11006999015808105 + }, + { + "epoch": 3.64227294921875e-06, + "model_forward_time": 0.02589726448059082, + "step": 2387 + }, + { + "epoch": 3.64227294921875e-06, + "step": 2387, + "training_step_time": 0.11006426811218262 + }, + { + "epoch": 3.643798828125e-06, + "model_forward_time": 0.025664329528808594, + "step": 2388 + }, + { + "epoch": 3.643798828125e-06, + "step": 2388, + "training_step_time": 0.10926985740661621 + }, + { + "epoch": 3.64532470703125e-06, + "model_forward_time": 0.02555561065673828, + "step": 2389 + }, + { + "epoch": 3.64532470703125e-06, + "step": 2389, + "training_step_time": 0.10498476028442383 + }, + { + "epoch": 3.6468505859375e-06, + "grad_norm": 0.5260281562805176, + "learning_rate": 9.97595738579843e-05, + "loss": 0.1324, + "step": 2390 + }, + { + "epoch": 3.6468505859375e-06, + "model_forward_time": 0.025089263916015625, + "step": 2390 + }, + { + "epoch": 3.6468505859375e-06, + "step": 2390, + "training_step_time": 0.10754656791687012 + }, + { + "epoch": 3.64837646484375e-06, + "model_forward_time": 0.02536487579345703, + "step": 2391 + }, + { + "epoch": 3.64837646484375e-06, + "step": 2391, + "training_step_time": 0.10452055931091309 + }, + { + "epoch": 3.64990234375e-06, + "model_forward_time": 0.02567291259765625, + "step": 2392 + }, + { + "epoch": 3.64990234375e-06, + "step": 2392, + "training_step_time": 0.10955476760864258 + }, + { + "epoch": 3.65142822265625e-06, + "model_forward_time": 0.025548934936523438, + "step": 2393 + }, + { + "epoch": 3.65142822265625e-06, + "step": 2393, + "training_step_time": 0.10619258880615234 + }, + { + "epoch": 3.6529541015625e-06, + "model_forward_time": 0.02559065818786621, + "step": 2394 + }, + { + "epoch": 3.6529541015625e-06, + "step": 2394, + "training_step_time": 0.10727596282958984 + }, + { + "epoch": 3.65447998046875e-06, + "model_forward_time": 0.02559638023376465, + "step": 2395 + }, + { + "epoch": 3.65447998046875e-06, + "step": 2395, + "training_step_time": 0.1066293716430664 + }, + { + "epoch": 3.656005859375e-06, + "model_forward_time": 0.0254971981048584, + "step": 2396 + }, + { + "epoch": 3.656005859375e-06, + "step": 2396, + "training_step_time": 0.1046757698059082 + }, + { + "epoch": 3.65753173828125e-06, + "model_forward_time": 0.025500059127807617, + "step": 2397 + }, + { + "epoch": 3.65753173828125e-06, + "step": 2397, + "training_step_time": 0.11049604415893555 + }, + { + "epoch": 3.6590576171875e-06, + "model_forward_time": 0.02557063102722168, + "step": 2398 + }, + { + "epoch": 3.6590576171875e-06, + "step": 2398, + "training_step_time": 0.10599732398986816 + }, + { + "epoch": 3.66058349609375e-06, + "model_forward_time": 0.025759220123291016, + "step": 2399 + }, + { + "epoch": 3.66058349609375e-06, + "step": 2399, + "training_step_time": 0.1056973934173584 + }, + { + "epoch": 3.662109375e-06, + "grad_norm": 0.8776196241378784, + "learning_rate": 9.975414512725057e-05, + "loss": 0.1139, + "step": 2400 + }, + { + "epoch": 3.662109375e-06, + "model_forward_time": 0.025216102600097656, + "step": 2400 + }, + { + "epoch": 3.662109375e-06, + "step": 2400, + "training_step_time": 0.10498809814453125 + }, + { + "epoch": 3.66363525390625e-06, + "model_forward_time": 0.025449752807617188, + "step": 2401 + }, + { + "epoch": 3.66363525390625e-06, + "step": 2401, + "training_step_time": 0.10901117324829102 + }, + { + "epoch": 3.6651611328125e-06, + "model_forward_time": 0.025267362594604492, + "step": 2402 + }, + { + "epoch": 3.6651611328125e-06, + "step": 2402, + "training_step_time": 0.10559630393981934 + }, + { + "epoch": 3.66668701171875e-06, + "model_forward_time": 0.025326967239379883, + "step": 2403 + }, + { + "epoch": 3.66668701171875e-06, + "step": 2403, + "training_step_time": 0.11076951026916504 + }, + { + "epoch": 3.668212890625e-06, + "model_forward_time": 0.025516033172607422, + "step": 2404 + }, + { + "epoch": 3.668212890625e-06, + "step": 2404, + "training_step_time": 0.1061394214630127 + }, + { + "epoch": 3.66973876953125e-06, + "model_forward_time": 0.02540421485900879, + "step": 2405 + }, + { + "epoch": 3.66973876953125e-06, + "step": 2405, + "training_step_time": 0.11357808113098145 + }, + { + "epoch": 3.6712646484375e-06, + "model_forward_time": 0.025434017181396484, + "step": 2406 + }, + { + "epoch": 3.6712646484375e-06, + "step": 2406, + "training_step_time": 0.11350870132446289 + }, + { + "epoch": 3.67279052734375e-06, + "model_forward_time": 0.02530956268310547, + "step": 2407 + }, + { + "epoch": 3.67279052734375e-06, + "step": 2407, + "training_step_time": 0.10465884208679199 + }, + { + "epoch": 3.67431640625e-06, + "model_forward_time": 0.02541971206665039, + "step": 2408 + }, + { + "epoch": 3.67431640625e-06, + "step": 2408, + "training_step_time": 0.13942766189575195 + }, + { + "epoch": 3.67584228515625e-06, + "model_forward_time": 0.02474522590637207, + "step": 2409 + }, + { + "epoch": 3.67584228515625e-06, + "step": 2409, + "training_step_time": 0.11500954627990723 + }, + { + "epoch": 3.6773681640625e-06, + "grad_norm": 0.62945955991745, + "learning_rate": 9.974865594053902e-05, + "loss": 0.1394, + "step": 2410 + }, + { + "epoch": 3.6773681640625e-06, + "model_forward_time": 0.02553868293762207, + "step": 2410 + }, + { + "epoch": 3.6773681640625e-06, + "step": 2410, + "training_step_time": 0.11709332466125488 + }, + { + "epoch": 3.67889404296875e-06, + "model_forward_time": 0.02585434913635254, + "step": 2411 + }, + { + "epoch": 3.67889404296875e-06, + "step": 2411, + "training_step_time": 0.12130308151245117 + }, + { + "epoch": 3.680419921875e-06, + "model_forward_time": 0.025525331497192383, + "step": 2412 + }, + { + "epoch": 3.680419921875e-06, + "step": 2412, + "training_step_time": 0.22292232513427734 + }, + { + "epoch": 3.68194580078125e-06, + "model_forward_time": 0.02533411979675293, + "step": 2413 + }, + { + "epoch": 3.68194580078125e-06, + "step": 2413, + "training_step_time": 0.12282395362854004 + }, + { + "epoch": 3.6834716796875e-06, + "model_forward_time": 0.02399277687072754, + "step": 2414 + }, + { + "epoch": 3.6834716796875e-06, + "step": 2414, + "training_step_time": 0.1124117374420166 + }, + { + "epoch": 3.68499755859375e-06, + "model_forward_time": 0.025194883346557617, + "step": 2415 + }, + { + "epoch": 3.68499755859375e-06, + "step": 2415, + "training_step_time": 0.11420488357543945 + }, + { + "epoch": 3.6865234375e-06, + "model_forward_time": 0.02571582794189453, + "step": 2416 + }, + { + "epoch": 3.6865234375e-06, + "step": 2416, + "training_step_time": 0.11023402214050293 + }, + { + "epoch": 3.68804931640625e-06, + "model_forward_time": 0.025423765182495117, + "step": 2417 + }, + { + "epoch": 3.68804931640625e-06, + "step": 2417, + "training_step_time": 0.10760879516601562 + }, + { + "epoch": 3.6895751953125e-06, + "model_forward_time": 0.025397300720214844, + "step": 2418 + }, + { + "epoch": 3.6895751953125e-06, + "step": 2418, + "training_step_time": 0.2127223014831543 + }, + { + "epoch": 3.69110107421875e-06, + "model_forward_time": 0.025742053985595703, + "step": 2419 + }, + { + "epoch": 3.69110107421875e-06, + "step": 2419, + "training_step_time": 0.17074179649353027 + }, + { + "epoch": 3.692626953125e-06, + "grad_norm": 0.4951641857624054, + "learning_rate": 9.974310630451948e-05, + "loss": 0.1687, + "step": 2420 + }, + { + "epoch": 3.692626953125e-06, + "model_forward_time": 0.0235445499420166, + "step": 2420 + }, + { + "epoch": 3.692626953125e-06, + "step": 2420, + "training_step_time": 0.19449067115783691 + }, + { + "epoch": 3.69415283203125e-06, + "model_forward_time": 0.024948835372924805, + "step": 2421 + }, + { + "epoch": 3.69415283203125e-06, + "step": 2421, + "training_step_time": 0.1596693992614746 + }, + { + "epoch": 3.6956787109375e-06, + "model_forward_time": 0.024823665618896484, + "step": 2422 + }, + { + "epoch": 3.6956787109375e-06, + "step": 2422, + "training_step_time": 0.14661335945129395 + }, + { + "epoch": 3.69720458984375e-06, + "model_forward_time": 0.02461695671081543, + "step": 2423 + }, + { + "epoch": 3.69720458984375e-06, + "step": 2423, + "training_step_time": 0.1404561996459961 + }, + { + "epoch": 3.69873046875e-06, + "model_forward_time": 0.023802757263183594, + "step": 2424 + }, + { + "epoch": 3.69873046875e-06, + "step": 2424, + "training_step_time": 0.12650322914123535 + }, + { + "epoch": 3.70025634765625e-06, + "model_forward_time": 0.023796558380126953, + "step": 2425 + }, + { + "epoch": 3.70025634765625e-06, + "step": 2425, + "training_step_time": 0.11910057067871094 + }, + { + "epoch": 3.7017822265625e-06, + "model_forward_time": 0.025799989700317383, + "step": 2426 + }, + { + "epoch": 3.7017822265625e-06, + "step": 2426, + "training_step_time": 0.12034058570861816 + }, + { + "epoch": 3.70330810546875e-06, + "model_forward_time": 0.025651931762695312, + "step": 2427 + }, + { + "epoch": 3.70330810546875e-06, + "step": 2427, + "training_step_time": 0.20116853713989258 + }, + { + "epoch": 3.704833984375e-06, + "model_forward_time": 0.02464008331298828, + "step": 2428 + }, + { + "epoch": 3.704833984375e-06, + "step": 2428, + "training_step_time": 0.10827040672302246 + }, + { + "epoch": 3.70635986328125e-06, + "model_forward_time": 0.02773451805114746, + "step": 2429 + }, + { + "epoch": 3.70635986328125e-06, + "step": 2429, + "training_step_time": 0.11464834213256836 + }, + { + "epoch": 3.7078857421875e-06, + "grad_norm": 0.7130682468414307, + "learning_rate": 9.973749622593534e-05, + "loss": 0.1277, + "step": 2430 + }, + { + "epoch": 3.7078857421875e-06, + "model_forward_time": 0.02567148208618164, + "step": 2430 + }, + { + "epoch": 3.7078857421875e-06, + "step": 2430, + "training_step_time": 0.20616555213928223 + }, + { + "epoch": 3.70941162109375e-06, + "model_forward_time": 0.027072429656982422, + "step": 2431 + }, + { + "epoch": 3.70941162109375e-06, + "step": 2431, + "training_step_time": 0.1265108585357666 + }, + { + "epoch": 3.7109375e-06, + "model_forward_time": 0.024636507034301758, + "step": 2432 + }, + { + "epoch": 3.7109375e-06, + "step": 2432, + "training_step_time": 0.10872626304626465 + }, + { + "epoch": 3.71246337890625e-06, + "model_forward_time": 0.0260317325592041, + "step": 2433 + }, + { + "epoch": 3.71246337890625e-06, + "step": 2433, + "training_step_time": 0.10725021362304688 + }, + { + "epoch": 3.7139892578125e-06, + "model_forward_time": 0.024962425231933594, + "step": 2434 + }, + { + "epoch": 3.7139892578125e-06, + "step": 2434, + "training_step_time": 0.1084890365600586 + }, + { + "epoch": 3.71551513671875e-06, + "model_forward_time": 0.025940895080566406, + "step": 2435 + }, + { + "epoch": 3.71551513671875e-06, + "step": 2435, + "training_step_time": 0.10590553283691406 + }, + { + "epoch": 3.717041015625e-06, + "model_forward_time": 0.025467395782470703, + "step": 2436 + }, + { + "epoch": 3.717041015625e-06, + "step": 2436, + "training_step_time": 0.11286187171936035 + }, + { + "epoch": 3.71856689453125e-06, + "model_forward_time": 0.025593996047973633, + "step": 2437 + }, + { + "epoch": 3.71856689453125e-06, + "step": 2437, + "training_step_time": 0.10830378532409668 + }, + { + "epoch": 3.7200927734375e-06, + "model_forward_time": 0.025409221649169922, + "step": 2438 + }, + { + "epoch": 3.7200927734375e-06, + "step": 2438, + "training_step_time": 0.10957717895507812 + }, + { + "epoch": 3.72161865234375e-06, + "model_forward_time": 0.025145292282104492, + "step": 2439 + }, + { + "epoch": 3.72161865234375e-06, + "step": 2439, + "training_step_time": 0.11058807373046875 + }, + { + "epoch": 3.72314453125e-06, + "grad_norm": 0.6387666463851929, + "learning_rate": 9.973182571160332e-05, + "loss": 0.143, + "step": 2440 + }, + { + "epoch": 3.72314453125e-06, + "model_forward_time": 0.025300264358520508, + "step": 2440 + }, + { + "epoch": 3.72314453125e-06, + "step": 2440, + "training_step_time": 0.10539102554321289 + }, + { + "epoch": 3.72467041015625e-06, + "model_forward_time": 0.025233983993530273, + "step": 2441 + }, + { + "epoch": 3.72467041015625e-06, + "step": 2441, + "training_step_time": 0.10819363594055176 + }, + { + "epoch": 3.7261962890625e-06, + "model_forward_time": 0.02541375160217285, + "step": 2442 + }, + { + "epoch": 3.7261962890625e-06, + "step": 2442, + "training_step_time": 0.11135601997375488 + }, + { + "epoch": 3.72772216796875e-06, + "model_forward_time": 0.025618553161621094, + "step": 2443 + }, + { + "epoch": 3.72772216796875e-06, + "step": 2443, + "training_step_time": 0.10932230949401855 + }, + { + "epoch": 3.729248046875e-06, + "model_forward_time": 0.025752544403076172, + "step": 2444 + }, + { + "epoch": 3.729248046875e-06, + "step": 2444, + "training_step_time": 0.10847806930541992 + }, + { + "epoch": 3.73077392578125e-06, + "model_forward_time": 0.02591991424560547, + "step": 2445 + }, + { + "epoch": 3.73077392578125e-06, + "step": 2445, + "training_step_time": 0.10854244232177734 + }, + { + "epoch": 3.7322998046875e-06, + "model_forward_time": 0.025471210479736328, + "step": 2446 + }, + { + "epoch": 3.7322998046875e-06, + "step": 2446, + "training_step_time": 0.10749006271362305 + }, + { + "epoch": 3.73382568359375e-06, + "model_forward_time": 0.0258636474609375, + "step": 2447 + }, + { + "epoch": 3.73382568359375e-06, + "step": 2447, + "training_step_time": 0.11118054389953613 + }, + { + "epoch": 3.7353515625e-06, + "model_forward_time": 0.025213003158569336, + "step": 2448 + }, + { + "epoch": 3.7353515625e-06, + "step": 2448, + "training_step_time": 0.10736441612243652 + }, + { + "epoch": 3.73687744140625e-06, + "model_forward_time": 0.025422334671020508, + "step": 2449 + }, + { + "epoch": 3.73687744140625e-06, + "step": 2449, + "training_step_time": 0.10825872421264648 + }, + { + "epoch": 3.7384033203125e-06, + "grad_norm": 0.7156681418418884, + "learning_rate": 9.972609476841367e-05, + "loss": 0.1222, + "step": 2450 + }, + { + "epoch": 3.7384033203125e-06, + "model_forward_time": 0.025318145751953125, + "step": 2450 + }, + { + "epoch": 3.7384033203125e-06, + "step": 2450, + "training_step_time": 0.11148333549499512 + }, + { + "epoch": 3.73992919921875e-06, + "model_forward_time": 0.025162458419799805, + "step": 2451 + }, + { + "epoch": 3.73992919921875e-06, + "step": 2451, + "training_step_time": 0.1061704158782959 + }, + { + "epoch": 3.741455078125e-06, + "model_forward_time": 0.0251162052154541, + "step": 2452 + }, + { + "epoch": 3.741455078125e-06, + "step": 2452, + "training_step_time": 0.14069080352783203 + }, + { + "epoch": 3.74298095703125e-06, + "model_forward_time": 0.027409791946411133, + "step": 2453 + }, + { + "epoch": 3.74298095703125e-06, + "step": 2453, + "training_step_time": 0.17675089836120605 + }, + { + "epoch": 3.7445068359375e-06, + "model_forward_time": 0.024850130081176758, + "step": 2454 + }, + { + "epoch": 3.7445068359375e-06, + "step": 2454, + "training_step_time": 0.192976713180542 + }, + { + "epoch": 3.74603271484375e-06, + "model_forward_time": 0.02440166473388672, + "step": 2455 + }, + { + "epoch": 3.74603271484375e-06, + "step": 2455, + "training_step_time": 0.1324610710144043 + }, + { + "epoch": 3.74755859375e-06, + "model_forward_time": 0.024494409561157227, + "step": 2456 + }, + { + "epoch": 3.74755859375e-06, + "step": 2456, + "training_step_time": 0.16155195236206055 + }, + { + "epoch": 3.74908447265625e-06, + "model_forward_time": 0.025012493133544922, + "step": 2457 + }, + { + "epoch": 3.74908447265625e-06, + "step": 2457, + "training_step_time": 0.18750762939453125 + }, + { + "epoch": 3.7506103515625e-06, + "model_forward_time": 0.02459573745727539, + "step": 2458 + }, + { + "epoch": 3.7506103515625e-06, + "step": 2458, + "training_step_time": 0.15998196601867676 + }, + { + "epoch": 3.75213623046875e-06, + "model_forward_time": 0.025149106979370117, + "step": 2459 + }, + { + "epoch": 3.75213623046875e-06, + "step": 2459, + "training_step_time": 0.10747933387756348 + }, + { + "epoch": 3.753662109375e-06, + "grad_norm": 0.5127847194671631, + "learning_rate": 9.972030340333001e-05, + "loss": 0.1259, + "step": 2460 + }, + { + "epoch": 3.753662109375e-06, + "model_forward_time": 0.025229454040527344, + "step": 2460 + }, + { + "epoch": 3.753662109375e-06, + "step": 2460, + "training_step_time": 0.1051795482635498 + }, + { + "epoch": 3.75518798828125e-06, + "model_forward_time": 0.025468826293945312, + "step": 2461 + }, + { + "epoch": 3.75518798828125e-06, + "step": 2461, + "training_step_time": 0.19698452949523926 + }, + { + "epoch": 3.7567138671875e-06, + "model_forward_time": 0.024358034133911133, + "step": 2462 + }, + { + "epoch": 3.7567138671875e-06, + "step": 2462, + "training_step_time": 0.15067720413208008 + }, + { + "epoch": 3.75823974609375e-06, + "model_forward_time": 0.024712085723876953, + "step": 2463 + }, + { + "epoch": 3.75823974609375e-06, + "step": 2463, + "training_step_time": 0.11064767837524414 + }, + { + "epoch": 3.759765625e-06, + "model_forward_time": 0.02512955665588379, + "step": 2464 + }, + { + "epoch": 3.759765625e-06, + "step": 2464, + "training_step_time": 0.10876941680908203 + }, + { + "epoch": 3.76129150390625e-06, + "model_forward_time": 0.02569580078125, + "step": 2465 + }, + { + "epoch": 3.76129150390625e-06, + "step": 2465, + "training_step_time": 0.11101913452148438 + }, + { + "epoch": 3.7628173828125e-06, + "model_forward_time": 0.026356220245361328, + "step": 2466 + }, + { + "epoch": 3.7628173828125e-06, + "step": 2466, + "training_step_time": 0.11411738395690918 + }, + { + "epoch": 3.76434326171875e-06, + "model_forward_time": 0.02544569969177246, + "step": 2467 + }, + { + "epoch": 3.76434326171875e-06, + "step": 2467, + "training_step_time": 0.2089993953704834 + }, + { + "epoch": 3.765869140625e-06, + "model_forward_time": 0.024832487106323242, + "step": 2468 + }, + { + "epoch": 3.765869140625e-06, + "step": 2468, + "training_step_time": 0.13469791412353516 + }, + { + "epoch": 3.76739501953125e-06, + "model_forward_time": 0.02465224266052246, + "step": 2469 + }, + { + "epoch": 3.76739501953125e-06, + "step": 2469, + "training_step_time": 0.1372072696685791 + }, + { + "epoch": 3.7689208984375e-06, + "grad_norm": 0.7402927279472351, + "learning_rate": 9.971445162338939e-05, + "loss": 0.131, + "step": 2470 + }, + { + "epoch": 3.7689208984375e-06, + "model_forward_time": 0.02436685562133789, + "step": 2470 + }, + { + "epoch": 3.7689208984375e-06, + "step": 2470, + "training_step_time": 0.18536162376403809 + }, + { + "epoch": 3.77044677734375e-06, + "model_forward_time": 0.024528026580810547, + "step": 2471 + }, + { + "epoch": 3.77044677734375e-06, + "step": 2471, + "training_step_time": 0.12128996849060059 + }, + { + "epoch": 3.77197265625e-06, + "model_forward_time": 0.024566650390625, + "step": 2472 + }, + { + "epoch": 3.77197265625e-06, + "step": 2472, + "training_step_time": 0.11435961723327637 + }, + { + "epoch": 3.77349853515625e-06, + "model_forward_time": 0.025643348693847656, + "step": 2473 + }, + { + "epoch": 3.77349853515625e-06, + "step": 2473, + "training_step_time": 0.10745501518249512 + }, + { + "epoch": 3.7750244140625e-06, + "model_forward_time": 0.02560710906982422, + "step": 2474 + }, + { + "epoch": 3.7750244140625e-06, + "step": 2474, + "training_step_time": 0.10751605033874512 + }, + { + "epoch": 3.77655029296875e-06, + "model_forward_time": 0.026154756546020508, + "step": 2475 + }, + { + "epoch": 3.77655029296875e-06, + "step": 2475, + "training_step_time": 0.11568593978881836 + }, + { + "epoch": 3.778076171875e-06, + "model_forward_time": 0.02592778205871582, + "step": 2476 + }, + { + "epoch": 3.778076171875e-06, + "step": 2476, + "training_step_time": 0.11283731460571289 + }, + { + "epoch": 3.77960205078125e-06, + "model_forward_time": 0.0255126953125, + "step": 2477 + }, + { + "epoch": 3.77960205078125e-06, + "step": 2477, + "training_step_time": 0.1073300838470459 + }, + { + "epoch": 3.7811279296875e-06, + "model_forward_time": 0.02589106559753418, + "step": 2478 + }, + { + "epoch": 3.7811279296875e-06, + "step": 2478, + "training_step_time": 0.11598682403564453 + }, + { + "epoch": 3.78265380859375e-06, + "model_forward_time": 0.025252580642700195, + "step": 2479 + }, + { + "epoch": 3.78265380859375e-06, + "step": 2479, + "training_step_time": 0.12063789367675781 + }, + { + "epoch": 3.7841796875e-06, + "grad_norm": 0.9336197972297668, + "learning_rate": 9.97085394357023e-05, + "loss": 0.1645, + "step": 2480 + }, + { + "epoch": 3.7841796875e-06, + "model_forward_time": 0.02584981918334961, + "step": 2480 + }, + { + "epoch": 3.7841796875e-06, + "step": 2480, + "training_step_time": 0.11069917678833008 + }, + { + "epoch": 3.78570556640625e-06, + "model_forward_time": 0.025162458419799805, + "step": 2481 + }, + { + "epoch": 3.78570556640625e-06, + "step": 2481, + "training_step_time": 0.10551095008850098 + }, + { + "epoch": 3.7872314453125e-06, + "model_forward_time": 0.02553534507751465, + "step": 2482 + }, + { + "epoch": 3.7872314453125e-06, + "step": 2482, + "training_step_time": 0.10664749145507812 + }, + { + "epoch": 3.78875732421875e-06, + "model_forward_time": 0.02544379234313965, + "step": 2483 + }, + { + "epoch": 3.78875732421875e-06, + "step": 2483, + "training_step_time": 0.10379862785339355 + }, + { + "epoch": 3.790283203125e-06, + "model_forward_time": 0.0253446102142334, + "step": 2484 + }, + { + "epoch": 3.790283203125e-06, + "step": 2484, + "training_step_time": 0.10477423667907715 + }, + { + "epoch": 3.79180908203125e-06, + "model_forward_time": 0.025556564331054688, + "step": 2485 + }, + { + "epoch": 3.79180908203125e-06, + "step": 2485, + "training_step_time": 0.1066734790802002 + }, + { + "epoch": 3.7933349609375e-06, + "model_forward_time": 0.025150299072265625, + "step": 2486 + }, + { + "epoch": 3.7933349609375e-06, + "step": 2486, + "training_step_time": 0.10605001449584961 + }, + { + "epoch": 3.79486083984375e-06, + "model_forward_time": 0.025298118591308594, + "step": 2487 + }, + { + "epoch": 3.79486083984375e-06, + "step": 2487, + "training_step_time": 0.10730218887329102 + }, + { + "epoch": 3.79638671875e-06, + "model_forward_time": 0.025428295135498047, + "step": 2488 + }, + { + "epoch": 3.79638671875e-06, + "step": 2488, + "training_step_time": 0.10447001457214355 + }, + { + "epoch": 3.79791259765625e-06, + "model_forward_time": 0.027969837188720703, + "step": 2489 + }, + { + "epoch": 3.79791259765625e-06, + "step": 2489, + "training_step_time": 0.10995912551879883 + }, + { + "epoch": 3.7994384765625e-06, + "grad_norm": 1.0131940841674805, + "learning_rate": 9.970256684745258e-05, + "loss": 0.1391, + "step": 2490 + }, + { + "epoch": 3.7994384765625e-06, + "model_forward_time": 0.0251619815826416, + "step": 2490 + }, + { + "epoch": 3.7994384765625e-06, + "step": 2490, + "training_step_time": 0.10502076148986816 + }, + { + "epoch": 3.80096435546875e-06, + "model_forward_time": 0.025311946868896484, + "step": 2491 + }, + { + "epoch": 3.80096435546875e-06, + "step": 2491, + "training_step_time": 0.10914158821105957 + }, + { + "epoch": 3.802490234375e-06, + "model_forward_time": 0.025356769561767578, + "step": 2492 + }, + { + "epoch": 3.802490234375e-06, + "step": 2492, + "training_step_time": 0.10823988914489746 + }, + { + "epoch": 3.80401611328125e-06, + "model_forward_time": 0.025299549102783203, + "step": 2493 + }, + { + "epoch": 3.80401611328125e-06, + "step": 2493, + "training_step_time": 0.11547207832336426 + }, + { + "epoch": 3.8055419921875e-06, + "model_forward_time": 0.02527904510498047, + "step": 2494 + }, + { + "epoch": 3.8055419921875e-06, + "step": 2494, + "training_step_time": 0.12424206733703613 + }, + { + "epoch": 3.80706787109375e-06, + "model_forward_time": 0.025422334671020508, + "step": 2495 + }, + { + "epoch": 3.80706787109375e-06, + "step": 2495, + "training_step_time": 0.1422436237335205 + }, + { + "epoch": 3.80859375e-06, + "model_forward_time": 0.02512383460998535, + "step": 2496 + }, + { + "epoch": 3.80859375e-06, + "step": 2496, + "training_step_time": 0.16315555572509766 + }, + { + "epoch": 3.81011962890625e-06, + "model_forward_time": 0.024690628051757812, + "step": 2497 + }, + { + "epoch": 3.81011962890625e-06, + "step": 2497, + "training_step_time": 0.12959909439086914 + }, + { + "epoch": 3.8116455078125e-06, + "model_forward_time": 0.024353504180908203, + "step": 2498 + }, + { + "epoch": 3.8116455078125e-06, + "step": 2498, + "training_step_time": 0.1750173568725586 + }, + { + "epoch": 3.81317138671875e-06, + "model_forward_time": 0.024386882781982422, + "step": 2499 + }, + { + "epoch": 3.81317138671875e-06, + "step": 2499, + "training_step_time": 0.1945657730102539 + }, + { + "epoch": 3.814697265625e-06, + "grad_norm": 0.8203685283660889, + "learning_rate": 9.969653386589748e-05, + "loss": 0.1197, + "step": 2500 + }, + { + "epoch": 3.814697265625e-06, + "model_forward_time": 0.02459096908569336, + "step": 2500 + }, + { + "epoch": 3.814697265625e-06, + "step": 2500, + "training_step_time": 0.1203460693359375 + }, + { + "epoch": 3.81622314453125e-06, + "model_forward_time": 0.024329423904418945, + "step": 2501 + }, + { + "epoch": 3.81622314453125e-06, + "step": 2501, + "training_step_time": 0.17841076850891113 + }, + { + "epoch": 3.8177490234375e-06, + "model_forward_time": 0.02482128143310547, + "step": 2502 + }, + { + "epoch": 3.8177490234375e-06, + "step": 2502, + "training_step_time": 0.12412452697753906 + }, + { + "epoch": 3.81927490234375e-06, + "model_forward_time": 0.024544715881347656, + "step": 2503 + }, + { + "epoch": 3.81927490234375e-06, + "step": 2503, + "training_step_time": 0.10745644569396973 + }, + { + "epoch": 3.82080078125e-06, + "model_forward_time": 0.027962923049926758, + "step": 2504 + }, + { + "epoch": 3.82080078125e-06, + "step": 2504, + "training_step_time": 0.10976195335388184 + }, + { + "epoch": 3.82232666015625e-06, + "model_forward_time": 0.025274038314819336, + "step": 2505 + }, + { + "epoch": 3.82232666015625e-06, + "step": 2505, + "training_step_time": 0.1073157787322998 + }, + { + "epoch": 3.8238525390625e-06, + "model_forward_time": 0.0251309871673584, + "step": 2506 + }, + { + "epoch": 3.8238525390625e-06, + "step": 2506, + "training_step_time": 0.11293482780456543 + }, + { + "epoch": 3.82537841796875e-06, + "model_forward_time": 0.02541828155517578, + "step": 2507 + }, + { + "epoch": 3.82537841796875e-06, + "step": 2507, + "training_step_time": 0.15117120742797852 + }, + { + "epoch": 3.826904296875e-06, + "model_forward_time": 0.024676799774169922, + "step": 2508 + }, + { + "epoch": 3.826904296875e-06, + "step": 2508, + "training_step_time": 0.13915443420410156 + }, + { + "epoch": 3.82843017578125e-06, + "model_forward_time": 0.0246429443359375, + "step": 2509 + }, + { + "epoch": 3.82843017578125e-06, + "step": 2509, + "training_step_time": 0.11389350891113281 + }, + { + "epoch": 3.8299560546875e-06, + "grad_norm": 0.6547967195510864, + "learning_rate": 9.969044049836767e-05, + "loss": 0.1276, + "step": 2510 + }, + { + "epoch": 3.8299560546875e-06, + "model_forward_time": 0.02484130859375, + "step": 2510 + }, + { + "epoch": 3.8299560546875e-06, + "step": 2510, + "training_step_time": 0.11131405830383301 + }, + { + "epoch": 3.83148193359375e-06, + "model_forward_time": 0.025444507598876953, + "step": 2511 + }, + { + "epoch": 3.83148193359375e-06, + "step": 2511, + "training_step_time": 0.11545777320861816 + }, + { + "epoch": 3.8330078125e-06, + "model_forward_time": 0.024796724319458008, + "step": 2512 + }, + { + "epoch": 3.8330078125e-06, + "step": 2512, + "training_step_time": 0.10818123817443848 + }, + { + "epoch": 3.83453369140625e-06, + "model_forward_time": 0.02537226676940918, + "step": 2513 + }, + { + "epoch": 3.83453369140625e-06, + "step": 2513, + "training_step_time": 0.19669389724731445 + }, + { + "epoch": 3.8360595703125e-06, + "model_forward_time": 0.024755239486694336, + "step": 2514 + }, + { + "epoch": 3.8360595703125e-06, + "step": 2514, + "training_step_time": 0.10411620140075684 + }, + { + "epoch": 3.83758544921875e-06, + "model_forward_time": 0.024598360061645508, + "step": 2515 + }, + { + "epoch": 3.83758544921875e-06, + "step": 2515, + "training_step_time": 0.10577011108398438 + }, + { + "epoch": 3.839111328125e-06, + "model_forward_time": 0.025448083877563477, + "step": 2516 + }, + { + "epoch": 3.839111328125e-06, + "step": 2516, + "training_step_time": 0.10699868202209473 + }, + { + "epoch": 3.84063720703125e-06, + "model_forward_time": 0.0253756046295166, + "step": 2517 + }, + { + "epoch": 3.84063720703125e-06, + "step": 2517, + "training_step_time": 0.2233750820159912 + }, + { + "epoch": 3.8421630859375e-06, + "model_forward_time": 0.024968624114990234, + "step": 2518 + }, + { + "epoch": 3.8421630859375e-06, + "step": 2518, + "training_step_time": 0.11464333534240723 + }, + { + "epoch": 3.84368896484375e-06, + "model_forward_time": 0.024672985076904297, + "step": 2519 + }, + { + "epoch": 3.84368896484375e-06, + "step": 2519, + "training_step_time": 0.11810493469238281 + }, + { + "epoch": 3.84521484375e-06, + "grad_norm": 0.4905283451080322, + "learning_rate": 9.968428675226714e-05, + "loss": 0.15, + "step": 2520 + }, + { + "epoch": 3.84521484375e-06, + "model_forward_time": 0.02505636215209961, + "step": 2520 + }, + { + "epoch": 3.84521484375e-06, + "step": 2520, + "training_step_time": 0.19582796096801758 + }, + { + "epoch": 3.84674072265625e-06, + "model_forward_time": 0.02480006217956543, + "step": 2521 + }, + { + "epoch": 3.84674072265625e-06, + "step": 2521, + "training_step_time": 0.11697196960449219 + }, + { + "epoch": 3.8482666015625e-06, + "model_forward_time": 0.024858951568603516, + "step": 2522 + }, + { + "epoch": 3.8482666015625e-06, + "step": 2522, + "training_step_time": 0.10675406455993652 + }, + { + "epoch": 3.84979248046875e-06, + "model_forward_time": 0.02541184425354004, + "step": 2523 + }, + { + "epoch": 3.84979248046875e-06, + "step": 2523, + "training_step_time": 0.10760903358459473 + }, + { + "epoch": 3.851318359375e-06, + "model_forward_time": 0.02526378631591797, + "step": 2524 + }, + { + "epoch": 3.851318359375e-06, + "step": 2524, + "training_step_time": 0.10769486427307129 + }, + { + "epoch": 3.85284423828125e-06, + "model_forward_time": 0.02568650245666504, + "step": 2525 + }, + { + "epoch": 3.85284423828125e-06, + "step": 2525, + "training_step_time": 0.1098628044128418 + }, + { + "epoch": 3.8543701171875e-06, + "model_forward_time": 0.025000810623168945, + "step": 2526 + }, + { + "epoch": 3.8543701171875e-06, + "step": 2526, + "training_step_time": 0.10809779167175293 + }, + { + "epoch": 3.85589599609375e-06, + "model_forward_time": 0.02568507194519043, + "step": 2527 + }, + { + "epoch": 3.85589599609375e-06, + "step": 2527, + "training_step_time": 0.10846757888793945 + }, + { + "epoch": 3.857421875e-06, + "model_forward_time": 0.02523660659790039, + "step": 2528 + }, + { + "epoch": 3.857421875e-06, + "step": 2528, + "training_step_time": 0.10638165473937988 + }, + { + "epoch": 3.85894775390625e-06, + "model_forward_time": 0.025026559829711914, + "step": 2529 + }, + { + "epoch": 3.85894775390625e-06, + "step": 2529, + "training_step_time": 0.10637211799621582 + }, + { + "epoch": 3.8604736328125e-06, + "grad_norm": 0.45186877250671387, + "learning_rate": 9.967807263507329e-05, + "loss": 0.1006, + "step": 2530 + }, + { + "epoch": 3.8604736328125e-06, + "model_forward_time": 0.025406599044799805, + "step": 2530 + }, + { + "epoch": 3.8604736328125e-06, + "step": 2530, + "training_step_time": 0.10567331314086914 + }, + { + "epoch": 3.86199951171875e-06, + "model_forward_time": 0.02520895004272461, + "step": 2531 + }, + { + "epoch": 3.86199951171875e-06, + "step": 2531, + "training_step_time": 0.10552239418029785 + }, + { + "epoch": 3.863525390625e-06, + "model_forward_time": 0.025433778762817383, + "step": 2532 + }, + { + "epoch": 3.863525390625e-06, + "step": 2532, + "training_step_time": 0.10562419891357422 + }, + { + "epoch": 3.86505126953125e-06, + "model_forward_time": 0.02487802505493164, + "step": 2533 + }, + { + "epoch": 3.86505126953125e-06, + "step": 2533, + "training_step_time": 0.10971593856811523 + }, + { + "epoch": 3.8665771484375e-06, + "model_forward_time": 0.025235652923583984, + "step": 2534 + }, + { + "epoch": 3.8665771484375e-06, + "step": 2534, + "training_step_time": 0.10633349418640137 + }, + { + "epoch": 3.86810302734375e-06, + "model_forward_time": 0.025305986404418945, + "step": 2535 + }, + { + "epoch": 3.86810302734375e-06, + "step": 2535, + "training_step_time": 0.10921716690063477 + }, + { + "epoch": 3.86962890625e-06, + "model_forward_time": 0.025175094604492188, + "step": 2536 + }, + { + "epoch": 3.86962890625e-06, + "step": 2536, + "training_step_time": 0.10807156562805176 + }, + { + "epoch": 3.87115478515625e-06, + "model_forward_time": 0.02562403678894043, + "step": 2537 + }, + { + "epoch": 3.87115478515625e-06, + "step": 2537, + "training_step_time": 0.10750675201416016 + }, + { + "epoch": 3.8726806640625e-06, + "model_forward_time": 0.025577545166015625, + "step": 2538 + }, + { + "epoch": 3.8726806640625e-06, + "step": 2538, + "training_step_time": 0.10752511024475098 + }, + { + "epoch": 3.87420654296875e-06, + "model_forward_time": 0.026001930236816406, + "step": 2539 + }, + { + "epoch": 3.87420654296875e-06, + "step": 2539, + "training_step_time": 0.11614036560058594 + }, + { + "epoch": 3.875732421875e-06, + "grad_norm": 0.6325926184654236, + "learning_rate": 9.967179815433685e-05, + "loss": 0.1126, + "step": 2540 + }, + { + "epoch": 3.875732421875e-06, + "model_forward_time": 0.02568960189819336, + "step": 2540 + }, + { + "epoch": 3.875732421875e-06, + "step": 2540, + "training_step_time": 0.11594867706298828 + }, + { + "epoch": 3.87725830078125e-06, + "model_forward_time": 0.025379419326782227, + "step": 2541 + }, + { + "epoch": 3.87725830078125e-06, + "step": 2541, + "training_step_time": 0.11919307708740234 + }, + { + "epoch": 3.8787841796875e-06, + "model_forward_time": 0.025401830673217773, + "step": 2542 + }, + { + "epoch": 3.8787841796875e-06, + "step": 2542, + "training_step_time": 0.10793447494506836 + }, + { + "epoch": 3.88031005859375e-06, + "model_forward_time": 0.02532052993774414, + "step": 2543 + }, + { + "epoch": 3.88031005859375e-06, + "step": 2543, + "training_step_time": 0.19345736503601074 + }, + { + "epoch": 3.8818359375e-06, + "model_forward_time": 0.024195432662963867, + "step": 2544 + }, + { + "epoch": 3.8818359375e-06, + "step": 2544, + "training_step_time": 0.17615365982055664 + }, + { + "epoch": 3.88336181640625e-06, + "model_forward_time": 0.023886680603027344, + "step": 2545 + }, + { + "epoch": 3.88336181640625e-06, + "step": 2545, + "training_step_time": 0.18914461135864258 + }, + { + "epoch": 3.8848876953125e-06, + "model_forward_time": 0.024648189544677734, + "step": 2546 + }, + { + "epoch": 3.8848876953125e-06, + "step": 2546, + "training_step_time": 0.15303659439086914 + }, + { + "epoch": 3.88641357421875e-06, + "model_forward_time": 0.024959564208984375, + "step": 2547 + }, + { + "epoch": 3.88641357421875e-06, + "step": 2547, + "training_step_time": 0.17954802513122559 + }, + { + "epoch": 3.887939453125e-06, + "model_forward_time": 0.024660587310791016, + "step": 2548 + }, + { + "epoch": 3.887939453125e-06, + "step": 2548, + "training_step_time": 0.16007018089294434 + }, + { + "epoch": 3.88946533203125e-06, + "model_forward_time": 0.02429652214050293, + "step": 2549 + }, + { + "epoch": 3.88946533203125e-06, + "step": 2549, + "training_step_time": 0.10378861427307129 + }, + { + "epoch": 3.8909912109375e-06, + "grad_norm": 0.6692981719970703, + "learning_rate": 9.966546331768191e-05, + "loss": 0.1202, + "step": 2550 + }, + { + "epoch": 3.8909912109375e-06, + "model_forward_time": 0.025659561157226562, + "step": 2550 + }, + { + "epoch": 3.8909912109375e-06, + "step": 2550, + "training_step_time": 0.10602498054504395 + }, + { + "epoch": 3.89251708984375e-06, + "model_forward_time": 0.02485799789428711, + "step": 2551 + }, + { + "epoch": 3.89251708984375e-06, + "step": 2551, + "training_step_time": 0.10846996307373047 + }, + { + "epoch": 3.89404296875e-06, + "model_forward_time": 0.025962352752685547, + "step": 2552 + }, + { + "epoch": 3.89404296875e-06, + "step": 2552, + "training_step_time": 0.15173935890197754 + }, + { + "epoch": 3.89556884765625e-06, + "model_forward_time": 0.025590896606445312, + "step": 2553 + }, + { + "epoch": 3.89556884765625e-06, + "step": 2553, + "training_step_time": 0.15000033378601074 + }, + { + "epoch": 3.8970947265625e-06, + "model_forward_time": 0.024731874465942383, + "step": 2554 + }, + { + "epoch": 3.8970947265625e-06, + "step": 2554, + "training_step_time": 0.1374979019165039 + }, + { + "epoch": 3.89862060546875e-06, + "model_forward_time": 0.02346944808959961, + "step": 2555 + }, + { + "epoch": 3.89862060546875e-06, + "step": 2555, + "training_step_time": 0.20352649688720703 + }, + { + "epoch": 3.900146484375e-06, + "model_forward_time": 0.024442434310913086, + "step": 2556 + }, + { + "epoch": 3.900146484375e-06, + "step": 2556, + "training_step_time": 0.1940760612487793 + }, + { + "epoch": 3.90167236328125e-06, + "model_forward_time": 0.024971485137939453, + "step": 2557 + }, + { + "epoch": 3.90167236328125e-06, + "step": 2557, + "training_step_time": 0.16737747192382812 + }, + { + "epoch": 3.9031982421875e-06, + "model_forward_time": 0.023710966110229492, + "step": 2558 + }, + { + "epoch": 3.9031982421875e-06, + "step": 2558, + "training_step_time": 0.1463909149169922 + }, + { + "epoch": 3.90472412109375e-06, + "model_forward_time": 0.023838043212890625, + "step": 2559 + }, + { + "epoch": 3.90472412109375e-06, + "step": 2559, + "training_step_time": 0.12836956977844238 + }, + { + "epoch": 3.90625e-06, + "grad_norm": 1.0405771732330322, + "learning_rate": 9.96590681328059e-05, + "loss": 0.1484, + "step": 2560 + }, + { + "epoch": 3.90625e-06, + "model_forward_time": 0.023972034454345703, + "step": 2560 + }, + { + "epoch": 3.90625e-06, + "step": 2560, + "training_step_time": 0.1922767162322998 + }, + { + "epoch": 3.90777587890625e-06, + "model_forward_time": 0.025022506713867188, + "step": 2561 + }, + { + "epoch": 3.90777587890625e-06, + "step": 2561, + "training_step_time": 0.12780380249023438 + }, + { + "epoch": 3.9093017578125e-06, + "model_forward_time": 0.024634599685668945, + "step": 2562 + }, + { + "epoch": 3.9093017578125e-06, + "step": 2562, + "training_step_time": 0.15634989738464355 + }, + { + "epoch": 3.91082763671875e-06, + "model_forward_time": 0.024483680725097656, + "step": 2563 + }, + { + "epoch": 3.91082763671875e-06, + "step": 2563, + "training_step_time": 0.15680599212646484 + }, + { + "epoch": 3.912353515625e-06, + "model_forward_time": 0.02456831932067871, + "step": 2564 + }, + { + "epoch": 3.912353515625e-06, + "step": 2564, + "training_step_time": 0.11584043502807617 + }, + { + "epoch": 3.91387939453125e-06, + "model_forward_time": 0.025426149368286133, + "step": 2565 + }, + { + "epoch": 3.91387939453125e-06, + "step": 2565, + "training_step_time": 0.11266922950744629 + }, + { + "epoch": 3.9154052734375e-06, + "model_forward_time": 0.024250030517578125, + "step": 2566 + }, + { + "epoch": 3.9154052734375e-06, + "step": 2566, + "training_step_time": 0.1071479320526123 + }, + { + "epoch": 3.91693115234375e-06, + "model_forward_time": 0.02664327621459961, + "step": 2567 + }, + { + "epoch": 3.91693115234375e-06, + "step": 2567, + "training_step_time": 0.11099553108215332 + }, + { + "epoch": 3.91845703125e-06, + "model_forward_time": 0.02579951286315918, + "step": 2568 + }, + { + "epoch": 3.91845703125e-06, + "step": 2568, + "training_step_time": 0.10607290267944336 + }, + { + "epoch": 3.91998291015625e-06, + "model_forward_time": 0.0254361629486084, + "step": 2569 + }, + { + "epoch": 3.91998291015625e-06, + "step": 2569, + "training_step_time": 0.10824728012084961 + }, + { + "epoch": 3.9215087890625e-06, + "grad_norm": 0.6695353984832764, + "learning_rate": 9.965261260747956e-05, + "loss": 0.1261, + "step": 2570 + }, + { + "epoch": 3.9215087890625e-06, + "model_forward_time": 0.025298118591308594, + "step": 2570 + }, + { + "epoch": 3.9215087890625e-06, + "step": 2570, + "training_step_time": 0.10599803924560547 + }, + { + "epoch": 3.92303466796875e-06, + "model_forward_time": 0.02756810188293457, + "step": 2571 + }, + { + "epoch": 3.92303466796875e-06, + "step": 2571, + "training_step_time": 0.11224031448364258 + }, + { + "epoch": 3.924560546875e-06, + "model_forward_time": 0.025689363479614258, + "step": 2572 + }, + { + "epoch": 3.924560546875e-06, + "step": 2572, + "training_step_time": 0.10567450523376465 + }, + { + "epoch": 3.92608642578125e-06, + "model_forward_time": 0.025651216506958008, + "step": 2573 + }, + { + "epoch": 3.92608642578125e-06, + "step": 2573, + "training_step_time": 0.10559201240539551 + }, + { + "epoch": 3.9276123046875e-06, + "model_forward_time": 0.025165557861328125, + "step": 2574 + }, + { + "epoch": 3.9276123046875e-06, + "step": 2574, + "training_step_time": 0.11419391632080078 + }, + { + "epoch": 3.92913818359375e-06, + "model_forward_time": 0.02482295036315918, + "step": 2575 + }, + { + "epoch": 3.92913818359375e-06, + "step": 2575, + "training_step_time": 0.11034870147705078 + }, + { + "epoch": 3.9306640625e-06, + "model_forward_time": 0.025615692138671875, + "step": 2576 + }, + { + "epoch": 3.9306640625e-06, + "step": 2576, + "training_step_time": 0.10513186454772949 + }, + { + "epoch": 3.93218994140625e-06, + "model_forward_time": 0.025079965591430664, + "step": 2577 + }, + { + "epoch": 3.93218994140625e-06, + "step": 2577, + "training_step_time": 0.1083517074584961 + }, + { + "epoch": 3.9337158203125e-06, + "model_forward_time": 0.02583932876586914, + "step": 2578 + }, + { + "epoch": 3.9337158203125e-06, + "step": 2578, + "training_step_time": 0.10722923278808594 + }, + { + "epoch": 3.93524169921875e-06, + "model_forward_time": 0.025200843811035156, + "step": 2579 + }, + { + "epoch": 3.93524169921875e-06, + "step": 2579, + "training_step_time": 0.10635232925415039 + }, + { + "epoch": 3.936767578125e-06, + "grad_norm": 0.5957489013671875, + "learning_rate": 9.964609674954696e-05, + "loss": 0.134, + "step": 2580 + }, + { + "epoch": 3.936767578125e-06, + "model_forward_time": 0.02541804313659668, + "step": 2580 + }, + { + "epoch": 3.936767578125e-06, + "step": 2580, + "training_step_time": 0.11294436454772949 + }, + { + "epoch": 3.93829345703125e-06, + "model_forward_time": 0.025622844696044922, + "step": 2581 + }, + { + "epoch": 3.93829345703125e-06, + "step": 2581, + "training_step_time": 0.10666084289550781 + }, + { + "epoch": 3.9398193359375e-06, + "model_forward_time": 0.02549147605895996, + "step": 2582 + }, + { + "epoch": 3.9398193359375e-06, + "step": 2582, + "training_step_time": 0.11539244651794434 + }, + { + "epoch": 3.94134521484375e-06, + "model_forward_time": 0.025074243545532227, + "step": 2583 + }, + { + "epoch": 3.94134521484375e-06, + "step": 2583, + "training_step_time": 0.11464595794677734 + }, + { + "epoch": 3.94287109375e-06, + "model_forward_time": 0.025991439819335938, + "step": 2584 + }, + { + "epoch": 3.94287109375e-06, + "step": 2584, + "training_step_time": 0.11116671562194824 + }, + { + "epoch": 3.94439697265625e-06, + "model_forward_time": 0.025312185287475586, + "step": 2585 + }, + { + "epoch": 3.94439697265625e-06, + "step": 2585, + "training_step_time": 0.1107170581817627 + }, + { + "epoch": 3.9459228515625e-06, + "model_forward_time": 0.025470733642578125, + "step": 2586 + }, + { + "epoch": 3.9459228515625e-06, + "step": 2586, + "training_step_time": 0.12117505073547363 + }, + { + "epoch": 3.94744873046875e-06, + "model_forward_time": 0.02547478675842285, + "step": 2587 + }, + { + "epoch": 3.94744873046875e-06, + "step": 2587, + "training_step_time": 0.16582131385803223 + }, + { + "epoch": 3.948974609375e-06, + "model_forward_time": 0.024440288543701172, + "step": 2588 + }, + { + "epoch": 3.948974609375e-06, + "step": 2588, + "training_step_time": 0.17252588272094727 + }, + { + "epoch": 3.95050048828125e-06, + "model_forward_time": 0.02466607093811035, + "step": 2589 + }, + { + "epoch": 3.95050048828125e-06, + "step": 2589, + "training_step_time": 0.178086519241333 + }, + { + "epoch": 3.9520263671875e-06, + "grad_norm": 0.7916200757026672, + "learning_rate": 9.963952056692549e-05, + "loss": 0.1296, + "step": 2590 + }, + { + "epoch": 3.9520263671875e-06, + "model_forward_time": 0.023917198181152344, + "step": 2590 + }, + { + "epoch": 3.9520263671875e-06, + "step": 2590, + "training_step_time": 0.1822068691253662 + }, + { + "epoch": 3.95355224609375e-06, + "model_forward_time": 0.02631402015686035, + "step": 2591 + }, + { + "epoch": 3.95355224609375e-06, + "step": 2591, + "training_step_time": 0.1557767391204834 + }, + { + "epoch": 3.955078125e-06, + "model_forward_time": 0.025574445724487305, + "step": 2592 + }, + { + "epoch": 3.955078125e-06, + "step": 2592, + "training_step_time": 0.11522722244262695 + }, + { + "epoch": 3.95660400390625e-06, + "model_forward_time": 0.02468705177307129, + "step": 2593 + }, + { + "epoch": 3.95660400390625e-06, + "step": 2593, + "training_step_time": 0.10857152938842773 + }, + { + "epoch": 3.9581298828125e-06, + "model_forward_time": 0.025453805923461914, + "step": 2594 + }, + { + "epoch": 3.9581298828125e-06, + "step": 2594, + "training_step_time": 0.10865116119384766 + }, + { + "epoch": 3.95965576171875e-06, + "model_forward_time": 0.025238990783691406, + "step": 2595 + }, + { + "epoch": 3.95965576171875e-06, + "step": 2595, + "training_step_time": 0.10948657989501953 + }, + { + "epoch": 3.961181640625e-06, + "model_forward_time": 0.025798320770263672, + "step": 2596 + }, + { + "epoch": 3.961181640625e-06, + "step": 2596, + "training_step_time": 0.11607003211975098 + }, + { + "epoch": 3.96270751953125e-06, + "model_forward_time": 0.025298595428466797, + "step": 2597 + }, + { + "epoch": 3.96270751953125e-06, + "step": 2597, + "training_step_time": 0.13132214546203613 + }, + { + "epoch": 3.9642333984375e-06, + "model_forward_time": 0.02596879005432129, + "step": 2598 + }, + { + "epoch": 3.9642333984375e-06, + "step": 2598, + "training_step_time": 0.11165380477905273 + }, + { + "epoch": 3.96575927734375e-06, + "model_forward_time": 0.02558159828186035, + "step": 2599 + }, + { + "epoch": 3.96575927734375e-06, + "step": 2599, + "training_step_time": 0.12325191497802734 + }, + { + "epoch": 3.96728515625e-06, + "grad_norm": 0.8327940106391907, + "learning_rate": 9.963288406760582e-05, + "loss": 0.1503, + "step": 2600 + }, + { + "epoch": 3.96728515625e-06, + "model_forward_time": 0.026118993759155273, + "step": 2600 + }, + { + "epoch": 3.96728515625e-06, + "step": 2600, + "training_step_time": 0.1679854393005371 + }, + { + "epoch": 3.96881103515625e-06, + "model_forward_time": 0.024846315383911133, + "step": 2601 + }, + { + "epoch": 3.96881103515625e-06, + "step": 2601, + "training_step_time": 0.13950634002685547 + }, + { + "epoch": 3.9703369140625e-06, + "model_forward_time": 0.024503469467163086, + "step": 2602 + }, + { + "epoch": 3.9703369140625e-06, + "step": 2602, + "training_step_time": 0.10708093643188477 + }, + { + "epoch": 3.97186279296875e-06, + "model_forward_time": 0.025322914123535156, + "step": 2603 + }, + { + "epoch": 3.97186279296875e-06, + "step": 2603, + "training_step_time": 0.10945701599121094 + }, + { + "epoch": 3.973388671875e-06, + "model_forward_time": 0.02540421485900879, + "step": 2604 + }, + { + "epoch": 3.973388671875e-06, + "step": 2604, + "training_step_time": 0.11253929138183594 + }, + { + "epoch": 3.97491455078125e-06, + "model_forward_time": 0.025065898895263672, + "step": 2605 + }, + { + "epoch": 3.97491455078125e-06, + "step": 2605, + "training_step_time": 0.1102752685546875 + }, + { + "epoch": 3.9764404296875e-06, + "model_forward_time": 0.02481865882873535, + "step": 2606 + }, + { + "epoch": 3.9764404296875e-06, + "step": 2606, + "training_step_time": 0.11115741729736328 + }, + { + "epoch": 3.97796630859375e-06, + "model_forward_time": 0.026096343994140625, + "step": 2607 + }, + { + "epoch": 3.97796630859375e-06, + "step": 2607, + "training_step_time": 0.11132001876831055 + }, + { + "epoch": 3.9794921875e-06, + "model_forward_time": 0.02564716339111328, + "step": 2608 + }, + { + "epoch": 3.9794921875e-06, + "step": 2608, + "training_step_time": 0.10923266410827637 + }, + { + "epoch": 3.98101806640625e-06, + "model_forward_time": 0.025237560272216797, + "step": 2609 + }, + { + "epoch": 3.98101806640625e-06, + "step": 2609, + "training_step_time": 0.2297360897064209 + }, + { + "epoch": 3.9825439453125e-06, + "grad_norm": 0.7479241490364075, + "learning_rate": 9.962618725965196e-05, + "loss": 0.1371, + "step": 2610 + }, + { + "epoch": 3.9825439453125e-06, + "model_forward_time": 0.024466276168823242, + "step": 2610 + }, + { + "epoch": 3.9825439453125e-06, + "step": 2610, + "training_step_time": 0.10362887382507324 + }, + { + "epoch": 3.98406982421875e-06, + "model_forward_time": 0.02477288246154785, + "step": 2611 + }, + { + "epoch": 3.98406982421875e-06, + "step": 2611, + "training_step_time": 0.10330843925476074 + }, + { + "epoch": 3.985595703125e-06, + "model_forward_time": 0.02607131004333496, + "step": 2612 + }, + { + "epoch": 3.985595703125e-06, + "step": 2612, + "training_step_time": 0.11860013008117676 + }, + { + "epoch": 3.98712158203125e-06, + "model_forward_time": 0.025256872177124023, + "step": 2613 + }, + { + "epoch": 3.98712158203125e-06, + "step": 2613, + "training_step_time": 0.10472679138183594 + }, + { + "epoch": 3.9886474609375e-06, + "model_forward_time": 0.025536060333251953, + "step": 2614 + }, + { + "epoch": 3.9886474609375e-06, + "step": 2614, + "training_step_time": 0.10591983795166016 + }, + { + "epoch": 3.99017333984375e-06, + "model_forward_time": 0.025212764739990234, + "step": 2615 + }, + { + "epoch": 3.99017333984375e-06, + "step": 2615, + "training_step_time": 0.10774540901184082 + }, + { + "epoch": 3.99169921875e-06, + "model_forward_time": 0.025318145751953125, + "step": 2616 + }, + { + "epoch": 3.99169921875e-06, + "step": 2616, + "training_step_time": 0.11058163642883301 + }, + { + "epoch": 3.99322509765625e-06, + "model_forward_time": 0.02550029754638672, + "step": 2617 + }, + { + "epoch": 3.99322509765625e-06, + "step": 2617, + "training_step_time": 0.10853433609008789 + }, + { + "epoch": 3.9947509765625e-06, + "model_forward_time": 0.0256955623626709, + "step": 2618 + }, + { + "epoch": 3.9947509765625e-06, + "step": 2618, + "training_step_time": 0.10755658149719238 + }, + { + "epoch": 3.99627685546875e-06, + "model_forward_time": 0.024566173553466797, + "step": 2619 + }, + { + "epoch": 3.99627685546875e-06, + "step": 2619, + "training_step_time": 0.11729955673217773 + }, + { + "epoch": 3.997802734375e-06, + "grad_norm": 0.7612093687057495, + "learning_rate": 9.961943015120113e-05, + "loss": 0.1182, + "step": 2620 + }, + { + "epoch": 3.997802734375e-06, + "model_forward_time": 0.024451255798339844, + "step": 2620 + }, + { + "epoch": 3.997802734375e-06, + "step": 2620, + "training_step_time": 0.14741015434265137 + }, + { + "epoch": 3.99932861328125e-06, + "model_forward_time": 0.024184465408325195, + "step": 2621 + }, + { + "epoch": 3.99932861328125e-06, + "step": 2621, + "training_step_time": 0.13308405876159668 + }, + { + "epoch": 4.0008544921875e-06, + "model_forward_time": 0.023622989654541016, + "step": 2622 + }, + { + "epoch": 4.0008544921875e-06, + "step": 2622, + "training_step_time": 0.12961959838867188 + }, + { + "epoch": 4.00238037109375e-06, + "model_forward_time": 0.024151086807250977, + "step": 2623 + }, + { + "epoch": 4.00238037109375e-06, + "step": 2623, + "training_step_time": 0.12308955192565918 + }, + { + "epoch": 4.00390625e-06, + "model_forward_time": 0.024152517318725586, + "step": 2624 + }, + { + "epoch": 4.00390625e-06, + "step": 2624, + "training_step_time": 0.12040305137634277 + }, + { + "epoch": 4.00543212890625e-06, + "model_forward_time": 0.024043798446655273, + "step": 2625 + }, + { + "epoch": 4.00543212890625e-06, + "step": 2625, + "training_step_time": 0.11487674713134766 + }, + { + "epoch": 4.0069580078125e-06, + "model_forward_time": 0.024619579315185547, + "step": 2626 + }, + { + "epoch": 4.0069580078125e-06, + "step": 2626, + "training_step_time": 0.1153862476348877 + }, + { + "epoch": 4.00848388671875e-06, + "model_forward_time": 0.02565312385559082, + "step": 2627 + }, + { + "epoch": 4.00848388671875e-06, + "step": 2627, + "training_step_time": 0.1151423454284668 + }, + { + "epoch": 4.010009765625e-06, + "model_forward_time": 0.025574922561645508, + "step": 2628 + }, + { + "epoch": 4.010009765625e-06, + "step": 2628, + "training_step_time": 0.11503458023071289 + }, + { + "epoch": 4.01153564453125e-06, + "model_forward_time": 0.025277376174926758, + "step": 2629 + }, + { + "epoch": 4.01153564453125e-06, + "step": 2629, + "training_step_time": 0.11299943923950195 + }, + { + "epoch": 4.0130615234375e-06, + "grad_norm": 0.9169083833694458, + "learning_rate": 9.961261275046383e-05, + "loss": 0.1264, + "step": 2630 + }, + { + "epoch": 4.0130615234375e-06, + "model_forward_time": 0.025089025497436523, + "step": 2630 + }, + { + "epoch": 4.0130615234375e-06, + "step": 2630, + "training_step_time": 0.11535882949829102 + }, + { + "epoch": 4.01458740234375e-06, + "model_forward_time": 0.025220394134521484, + "step": 2631 + }, + { + "epoch": 4.01458740234375e-06, + "step": 2631, + "training_step_time": 0.1254265308380127 + }, + { + "epoch": 4.01611328125e-06, + "model_forward_time": 0.024977922439575195, + "step": 2632 + }, + { + "epoch": 4.01611328125e-06, + "step": 2632, + "training_step_time": 0.17168593406677246 + }, + { + "epoch": 4.01763916015625e-06, + "model_forward_time": 0.02472996711730957, + "step": 2633 + }, + { + "epoch": 4.01763916015625e-06, + "step": 2633, + "training_step_time": 0.16511201858520508 + }, + { + "epoch": 4.0191650390625e-06, + "model_forward_time": 0.024491071701049805, + "step": 2634 + }, + { + "epoch": 4.0191650390625e-06, + "step": 2634, + "training_step_time": 0.16451549530029297 + }, + { + "epoch": 4.02069091796875e-06, + "model_forward_time": 0.024625062942504883, + "step": 2635 + }, + { + "epoch": 4.02069091796875e-06, + "step": 2635, + "training_step_time": 0.2046198844909668 + }, + { + "epoch": 4.022216796875e-06, + "model_forward_time": 0.02654242515563965, + "step": 2636 + }, + { + "epoch": 4.022216796875e-06, + "step": 2636, + "training_step_time": 0.16288542747497559 + }, + { + "epoch": 4.02374267578125e-06, + "model_forward_time": 0.024131298065185547, + "step": 2637 + }, + { + "epoch": 4.02374267578125e-06, + "step": 2637, + "training_step_time": 0.10601449012756348 + }, + { + "epoch": 4.0252685546875e-06, + "model_forward_time": 0.024834156036376953, + "step": 2638 + }, + { + "epoch": 4.0252685546875e-06, + "step": 2638, + "training_step_time": 0.10745692253112793 + }, + { + "epoch": 4.02679443359375e-06, + "model_forward_time": 0.025490760803222656, + "step": 2639 + }, + { + "epoch": 4.02679443359375e-06, + "step": 2639, + "training_step_time": 0.11195969581604004 + }, + { + "epoch": 4.0283203125e-06, + "grad_norm": 0.8895446062088013, + "learning_rate": 9.96057350657239e-05, + "loss": 0.154, + "step": 2640 + }, + { + "epoch": 4.0283203125e-06, + "model_forward_time": 0.02508997917175293, + "step": 2640 + }, + { + "epoch": 4.0283203125e-06, + "step": 2640, + "training_step_time": 0.10900235176086426 + }, + { + "epoch": 4.02984619140625e-06, + "model_forward_time": 0.025409698486328125, + "step": 2641 + }, + { + "epoch": 4.02984619140625e-06, + "step": 2641, + "training_step_time": 0.11361122131347656 + }, + { + "epoch": 4.0313720703125e-06, + "model_forward_time": 0.025673866271972656, + "step": 2642 + }, + { + "epoch": 4.0313720703125e-06, + "step": 2642, + "training_step_time": 0.11690664291381836 + }, + { + "epoch": 4.03289794921875e-06, + "model_forward_time": 0.024580955505371094, + "step": 2643 + }, + { + "epoch": 4.03289794921875e-06, + "step": 2643, + "training_step_time": 0.11499595642089844 + }, + { + "epoch": 4.034423828125e-06, + "model_forward_time": 0.02550673484802246, + "step": 2644 + }, + { + "epoch": 4.034423828125e-06, + "step": 2644, + "training_step_time": 0.1154639720916748 + }, + { + "epoch": 4.03594970703125e-06, + "model_forward_time": 0.025206565856933594, + "step": 2645 + }, + { + "epoch": 4.03594970703125e-06, + "step": 2645, + "training_step_time": 0.11664366722106934 + }, + { + "epoch": 4.0374755859375e-06, + "model_forward_time": 0.02520298957824707, + "step": 2646 + }, + { + "epoch": 4.0374755859375e-06, + "step": 2646, + "training_step_time": 0.16458725929260254 + }, + { + "epoch": 4.03900146484375e-06, + "model_forward_time": 0.02398538589477539, + "step": 2647 + }, + { + "epoch": 4.03900146484375e-06, + "step": 2647, + "training_step_time": 0.15099358558654785 + }, + { + "epoch": 4.04052734375e-06, + "model_forward_time": 0.025594234466552734, + "step": 2648 + }, + { + "epoch": 4.04052734375e-06, + "step": 2648, + "training_step_time": 0.11366987228393555 + }, + { + "epoch": 4.04205322265625e-06, + "model_forward_time": 0.025868654251098633, + "step": 2649 + }, + { + "epoch": 4.04205322265625e-06, + "step": 2649, + "training_step_time": 0.10973405838012695 + }, + { + "epoch": 4.0435791015625e-06, + "grad_norm": 0.6514967679977417, + "learning_rate": 9.959879710533835e-05, + "loss": 0.1366, + "step": 2650 + }, + { + "epoch": 4.0435791015625e-06, + "model_forward_time": 0.02537822723388672, + "step": 2650 + }, + { + "epoch": 4.0435791015625e-06, + "step": 2650, + "training_step_time": 0.17003965377807617 + }, + { + "epoch": 4.04510498046875e-06, + "model_forward_time": 0.0243227481842041, + "step": 2651 + }, + { + "epoch": 4.04510498046875e-06, + "step": 2651, + "training_step_time": 0.16069912910461426 + }, + { + "epoch": 4.046630859375e-06, + "model_forward_time": 0.024210214614868164, + "step": 2652 + }, + { + "epoch": 4.046630859375e-06, + "step": 2652, + "training_step_time": 0.11087918281555176 + }, + { + "epoch": 4.04815673828125e-06, + "model_forward_time": 0.024750947952270508, + "step": 2653 + }, + { + "epoch": 4.04815673828125e-06, + "step": 2653, + "training_step_time": 0.1651015281677246 + }, + { + "epoch": 4.0496826171875e-06, + "model_forward_time": 0.024959802627563477, + "step": 2654 + }, + { + "epoch": 4.0496826171875e-06, + "step": 2654, + "training_step_time": 0.1662149429321289 + }, + { + "epoch": 4.05120849609375e-06, + "model_forward_time": 0.024851560592651367, + "step": 2655 + }, + { + "epoch": 4.05120849609375e-06, + "step": 2655, + "training_step_time": 0.10683155059814453 + }, + { + "epoch": 4.052734375e-06, + "model_forward_time": 0.025258779525756836, + "step": 2656 + }, + { + "epoch": 4.052734375e-06, + "step": 2656, + "training_step_time": 0.10517644882202148 + }, + { + "epoch": 4.05426025390625e-06, + "model_forward_time": 0.025101661682128906, + "step": 2657 + }, + { + "epoch": 4.05426025390625e-06, + "step": 2657, + "training_step_time": 0.10609674453735352 + }, + { + "epoch": 4.0557861328125e-06, + "model_forward_time": 0.025737762451171875, + "step": 2658 + }, + { + "epoch": 4.0557861328125e-06, + "step": 2658, + "training_step_time": 0.1074228286743164 + }, + { + "epoch": 4.05731201171875e-06, + "model_forward_time": 0.025517940521240234, + "step": 2659 + }, + { + "epoch": 4.05731201171875e-06, + "step": 2659, + "training_step_time": 0.10764479637145996 + }, + { + "epoch": 4.058837890625e-06, + "grad_norm": 0.6101964116096497, + "learning_rate": 9.959179887773744e-05, + "loss": 0.1499, + "step": 2660 + }, + { + "epoch": 4.058837890625e-06, + "model_forward_time": 0.025423288345336914, + "step": 2660 + }, + { + "epoch": 4.058837890625e-06, + "step": 2660, + "training_step_time": 0.11061787605285645 + }, + { + "epoch": 4.06036376953125e-06, + "model_forward_time": 0.025127887725830078, + "step": 2661 + }, + { + "epoch": 4.06036376953125e-06, + "step": 2661, + "training_step_time": 0.14537715911865234 + }, + { + "epoch": 4.0618896484375e-06, + "model_forward_time": 0.024112701416015625, + "step": 2662 + }, + { + "epoch": 4.0618896484375e-06, + "step": 2662, + "training_step_time": 0.1497800350189209 + }, + { + "epoch": 4.06341552734375e-06, + "model_forward_time": 0.024029016494750977, + "step": 2663 + }, + { + "epoch": 4.06341552734375e-06, + "step": 2663, + "training_step_time": 0.14650487899780273 + }, + { + "epoch": 4.06494140625e-06, + "model_forward_time": 0.02388453483581543, + "step": 2664 + }, + { + "epoch": 4.06494140625e-06, + "step": 2664, + "training_step_time": 0.14997458457946777 + }, + { + "epoch": 4.06646728515625e-06, + "model_forward_time": 0.02441549301147461, + "step": 2665 + }, + { + "epoch": 4.06646728515625e-06, + "step": 2665, + "training_step_time": 0.12883663177490234 + }, + { + "epoch": 4.0679931640625e-06, + "model_forward_time": 0.024616479873657227, + "step": 2666 + }, + { + "epoch": 4.0679931640625e-06, + "step": 2666, + "training_step_time": 0.13300681114196777 + }, + { + "epoch": 4.06951904296875e-06, + "model_forward_time": 0.02476954460144043, + "step": 2667 + }, + { + "epoch": 4.06951904296875e-06, + "step": 2667, + "training_step_time": 0.12348794937133789 + }, + { + "epoch": 4.071044921875e-06, + "model_forward_time": 0.024622201919555664, + "step": 2668 + }, + { + "epoch": 4.071044921875e-06, + "step": 2668, + "training_step_time": 0.11321663856506348 + }, + { + "epoch": 4.07257080078125e-06, + "model_forward_time": 0.025504589080810547, + "step": 2669 + }, + { + "epoch": 4.07257080078125e-06, + "step": 2669, + "training_step_time": 0.1139822006225586 + }, + { + "epoch": 4.0740966796875e-06, + "grad_norm": 0.6662260890007019, + "learning_rate": 9.95847403914247e-05, + "loss": 0.1603, + "step": 2670 + }, + { + "epoch": 4.0740966796875e-06, + "model_forward_time": 0.02512979507446289, + "step": 2670 + }, + { + "epoch": 4.0740966796875e-06, + "step": 2670, + "training_step_time": 0.11485671997070312 + }, + { + "epoch": 4.07562255859375e-06, + "model_forward_time": 0.025142908096313477, + "step": 2671 + }, + { + "epoch": 4.07562255859375e-06, + "step": 2671, + "training_step_time": 0.112152099609375 + }, + { + "epoch": 4.0771484375e-06, + "model_forward_time": 0.02572941780090332, + "step": 2672 + }, + { + "epoch": 4.0771484375e-06, + "step": 2672, + "training_step_time": 0.10960817337036133 + }, + { + "epoch": 4.07867431640625e-06, + "model_forward_time": 0.025151491165161133, + "step": 2673 + }, + { + "epoch": 4.07867431640625e-06, + "step": 2673, + "training_step_time": 0.10865187644958496 + }, + { + "epoch": 4.0802001953125e-06, + "model_forward_time": 0.0252840518951416, + "step": 2674 + }, + { + "epoch": 4.0802001953125e-06, + "step": 2674, + "training_step_time": 0.10846114158630371 + }, + { + "epoch": 4.08172607421875e-06, + "model_forward_time": 0.025331497192382812, + "step": 2675 + }, + { + "epoch": 4.08172607421875e-06, + "step": 2675, + "training_step_time": 0.17831993103027344 + }, + { + "epoch": 4.083251953125e-06, + "model_forward_time": 0.02456355094909668, + "step": 2676 + }, + { + "epoch": 4.083251953125e-06, + "step": 2676, + "training_step_time": 0.18026018142700195 + }, + { + "epoch": 4.08477783203125e-06, + "model_forward_time": 0.0242917537689209, + "step": 2677 + }, + { + "epoch": 4.08477783203125e-06, + "step": 2677, + "training_step_time": 0.18127012252807617 + }, + { + "epoch": 4.0863037109375e-06, + "model_forward_time": 0.02460026741027832, + "step": 2678 + }, + { + "epoch": 4.0863037109375e-06, + "step": 2678, + "training_step_time": 0.12339186668395996 + }, + { + "epoch": 4.08782958984375e-06, + "model_forward_time": 0.024390697479248047, + "step": 2679 + }, + { + "epoch": 4.08782958984375e-06, + "step": 2679, + "training_step_time": 0.11007428169250488 + }, + { + "epoch": 4.08935546875e-06, + "grad_norm": 0.8351659178733826, + "learning_rate": 9.957762165497686e-05, + "loss": 0.1788, + "step": 2680 + }, + { + "epoch": 4.08935546875e-06, + "model_forward_time": 0.025530099868774414, + "step": 2680 + }, + { + "epoch": 4.08935546875e-06, + "step": 2680, + "training_step_time": 0.11474108695983887 + }, + { + "epoch": 4.09088134765625e-06, + "model_forward_time": 0.0251772403717041, + "step": 2681 + }, + { + "epoch": 4.09088134765625e-06, + "step": 2681, + "training_step_time": 0.10891962051391602 + }, + { + "epoch": 4.0924072265625e-06, + "model_forward_time": 0.024941444396972656, + "step": 2682 + }, + { + "epoch": 4.0924072265625e-06, + "step": 2682, + "training_step_time": 0.11425948143005371 + }, + { + "epoch": 4.09393310546875e-06, + "model_forward_time": 0.025307178497314453, + "step": 2683 + }, + { + "epoch": 4.09393310546875e-06, + "step": 2683, + "training_step_time": 0.11291170120239258 + }, + { + "epoch": 4.095458984375e-06, + "model_forward_time": 0.0250089168548584, + "step": 2684 + }, + { + "epoch": 4.095458984375e-06, + "step": 2684, + "training_step_time": 0.10802578926086426 + }, + { + "epoch": 4.09698486328125e-06, + "model_forward_time": 0.025241374969482422, + "step": 2685 + }, + { + "epoch": 4.09698486328125e-06, + "step": 2685, + "training_step_time": 0.13444757461547852 + }, + { + "epoch": 4.0985107421875e-06, + "model_forward_time": 0.025255680084228516, + "step": 2686 + }, + { + "epoch": 4.0985107421875e-06, + "step": 2686, + "training_step_time": 0.13781356811523438 + }, + { + "epoch": 4.10003662109375e-06, + "model_forward_time": 0.025057315826416016, + "step": 2687 + }, + { + "epoch": 4.10003662109375e-06, + "step": 2687, + "training_step_time": 0.11336636543273926 + }, + { + "epoch": 4.1015625e-06, + "model_forward_time": 0.02519512176513672, + "step": 2688 + }, + { + "epoch": 4.1015625e-06, + "step": 2688, + "training_step_time": 0.1180574893951416 + }, + { + "epoch": 4.10308837890625e-06, + "model_forward_time": 0.025197267532348633, + "step": 2689 + }, + { + "epoch": 4.10308837890625e-06, + "step": 2689, + "training_step_time": 0.11135029792785645 + }, + { + "epoch": 4.1046142578125e-06, + "grad_norm": 0.6094166040420532, + "learning_rate": 9.957044267704384e-05, + "loss": 0.1523, + "step": 2690 + }, + { + "epoch": 4.1046142578125e-06, + "model_forward_time": 0.0252230167388916, + "step": 2690 + }, + { + "epoch": 4.1046142578125e-06, + "step": 2690, + "training_step_time": 0.1869983673095703 + }, + { + "epoch": 4.10614013671875e-06, + "model_forward_time": 0.02469015121459961, + "step": 2691 + }, + { + "epoch": 4.10614013671875e-06, + "step": 2691, + "training_step_time": 0.11540579795837402 + }, + { + "epoch": 4.107666015625e-06, + "model_forward_time": 0.025210142135620117, + "step": 2692 + }, + { + "epoch": 4.107666015625e-06, + "step": 2692, + "training_step_time": 0.11225557327270508 + }, + { + "epoch": 4.10919189453125e-06, + "model_forward_time": 0.02594780921936035, + "step": 2693 + }, + { + "epoch": 4.10919189453125e-06, + "step": 2693, + "training_step_time": 0.10793089866638184 + }, + { + "epoch": 4.1107177734375e-06, + "model_forward_time": 0.025758743286132812, + "step": 2694 + }, + { + "epoch": 4.1107177734375e-06, + "step": 2694, + "training_step_time": 0.10962867736816406 + }, + { + "epoch": 4.11224365234375e-06, + "model_forward_time": 0.025530338287353516, + "step": 2695 + }, + { + "epoch": 4.11224365234375e-06, + "step": 2695, + "training_step_time": 0.10907864570617676 + }, + { + "epoch": 4.11376953125e-06, + "model_forward_time": 0.02521347999572754, + "step": 2696 + }, + { + "epoch": 4.11376953125e-06, + "step": 2696, + "training_step_time": 0.11299681663513184 + }, + { + "epoch": 4.11529541015625e-06, + "model_forward_time": 0.02534770965576172, + "step": 2697 + }, + { + "epoch": 4.11529541015625e-06, + "step": 2697, + "training_step_time": 0.11208987236022949 + }, + { + "epoch": 4.1168212890625e-06, + "model_forward_time": 0.02551746368408203, + "step": 2698 + }, + { + "epoch": 4.1168212890625e-06, + "step": 2698, + "training_step_time": 0.22218728065490723 + }, + { + "epoch": 4.11834716796875e-06, + "model_forward_time": 0.024779796600341797, + "step": 2699 + }, + { + "epoch": 4.11834716796875e-06, + "step": 2699, + "training_step_time": 0.10980939865112305 + }, + { + "epoch": 4.119873046875e-06, + "grad_norm": 0.7234001755714417, + "learning_rate": 9.956320346634876e-05, + "loss": 0.106, + "step": 2700 + }, + { + "epoch": 4.119873046875e-06, + "model_forward_time": 0.02491617202758789, + "step": 2700 + }, + { + "epoch": 4.119873046875e-06, + "step": 2700, + "training_step_time": 0.10665249824523926 + }, + { + "epoch": 4.12139892578125e-06, + "model_forward_time": 0.02508401870727539, + "step": 2701 + }, + { + "epoch": 4.12139892578125e-06, + "step": 2701, + "training_step_time": 0.10627007484436035 + }, + { + "epoch": 4.1229248046875e-06, + "model_forward_time": 0.025051116943359375, + "step": 2702 + }, + { + "epoch": 4.1229248046875e-06, + "step": 2702, + "training_step_time": 0.10767531394958496 + }, + { + "epoch": 4.12445068359375e-06, + "model_forward_time": 0.025228500366210938, + "step": 2703 + }, + { + "epoch": 4.12445068359375e-06, + "step": 2703, + "training_step_time": 0.1067044734954834 + }, + { + "epoch": 4.1259765625e-06, + "model_forward_time": 0.025156021118164062, + "step": 2704 + }, + { + "epoch": 4.1259765625e-06, + "step": 2704, + "training_step_time": 0.10915851593017578 + }, + { + "epoch": 4.12750244140625e-06, + "model_forward_time": 0.02491927146911621, + "step": 2705 + }, + { + "epoch": 4.12750244140625e-06, + "step": 2705, + "training_step_time": 0.11313343048095703 + }, + { + "epoch": 4.1290283203125e-06, + "model_forward_time": 0.025448083877563477, + "step": 2706 + }, + { + "epoch": 4.1290283203125e-06, + "step": 2706, + "training_step_time": 0.11539244651794434 + }, + { + "epoch": 4.13055419921875e-06, + "model_forward_time": 0.024895668029785156, + "step": 2707 + }, + { + "epoch": 4.13055419921875e-06, + "step": 2707, + "training_step_time": 0.11974263191223145 + }, + { + "epoch": 4.132080078125e-06, + "model_forward_time": 0.025686264038085938, + "step": 2708 + }, + { + "epoch": 4.132080078125e-06, + "step": 2708, + "training_step_time": 0.11359095573425293 + }, + { + "epoch": 4.13360595703125e-06, + "model_forward_time": 0.024784564971923828, + "step": 2709 + }, + { + "epoch": 4.13360595703125e-06, + "step": 2709, + "training_step_time": 0.11349678039550781 + }, + { + "epoch": 4.1351318359375e-06, + "grad_norm": 0.5547717809677124, + "learning_rate": 9.955590403168799e-05, + "loss": 0.1129, + "step": 2710 + }, + { + "epoch": 4.1351318359375e-06, + "model_forward_time": 0.02514934539794922, + "step": 2710 + }, + { + "epoch": 4.1351318359375e-06, + "step": 2710, + "training_step_time": 0.10987210273742676 + }, + { + "epoch": 4.13665771484375e-06, + "model_forward_time": 0.02508544921875, + "step": 2711 + }, + { + "epoch": 4.13665771484375e-06, + "step": 2711, + "training_step_time": 0.10998225212097168 + }, + { + "epoch": 4.13818359375e-06, + "model_forward_time": 0.02506542205810547, + "step": 2712 + }, + { + "epoch": 4.13818359375e-06, + "step": 2712, + "training_step_time": 0.11174988746643066 + }, + { + "epoch": 4.13970947265625e-06, + "model_forward_time": 0.024965763092041016, + "step": 2713 + }, + { + "epoch": 4.13970947265625e-06, + "step": 2713, + "training_step_time": 0.10977840423583984 + }, + { + "epoch": 4.1412353515625e-06, + "model_forward_time": 0.025157451629638672, + "step": 2714 + }, + { + "epoch": 4.1412353515625e-06, + "step": 2714, + "training_step_time": 0.1067650318145752 + }, + { + "epoch": 4.14276123046875e-06, + "model_forward_time": 0.024903535842895508, + "step": 2715 + }, + { + "epoch": 4.14276123046875e-06, + "step": 2715, + "training_step_time": 0.10834574699401855 + }, + { + "epoch": 4.144287109375e-06, + "model_forward_time": 0.028226613998413086, + "step": 2716 + }, + { + "epoch": 4.144287109375e-06, + "step": 2716, + "training_step_time": 0.11452031135559082 + }, + { + "epoch": 4.14581298828125e-06, + "model_forward_time": 0.025138139724731445, + "step": 2717 + }, + { + "epoch": 4.14581298828125e-06, + "step": 2717, + "training_step_time": 0.10767292976379395 + }, + { + "epoch": 4.1473388671875e-06, + "model_forward_time": 0.024976491928100586, + "step": 2718 + }, + { + "epoch": 4.1473388671875e-06, + "step": 2718, + "training_step_time": 0.10459065437316895 + }, + { + "epoch": 4.14886474609375e-06, + "model_forward_time": 0.02512335777282715, + "step": 2719 + }, + { + "epoch": 4.14886474609375e-06, + "step": 2719, + "training_step_time": 0.10622358322143555 + }, + { + "epoch": 4.150390625e-06, + "grad_norm": 0.9897136688232422, + "learning_rate": 9.9548544381931e-05, + "loss": 0.1498, + "step": 2720 + }, + { + "epoch": 4.150390625e-06, + "model_forward_time": 0.025136947631835938, + "step": 2720 + }, + { + "epoch": 4.150390625e-06, + "step": 2720, + "training_step_time": 0.10980892181396484 + }, + { + "epoch": 4.15191650390625e-06, + "model_forward_time": 0.02495288848876953, + "step": 2721 + }, + { + "epoch": 4.15191650390625e-06, + "step": 2721, + "training_step_time": 0.10661029815673828 + }, + { + "epoch": 4.1534423828125e-06, + "model_forward_time": 0.02490067481994629, + "step": 2722 + }, + { + "epoch": 4.1534423828125e-06, + "step": 2722, + "training_step_time": 0.19107842445373535 + }, + { + "epoch": 4.15496826171875e-06, + "model_forward_time": 0.02626347541809082, + "step": 2723 + }, + { + "epoch": 4.15496826171875e-06, + "step": 2723, + "training_step_time": 0.10937261581420898 + }, + { + "epoch": 4.156494140625e-06, + "model_forward_time": 0.025488615036010742, + "step": 2724 + }, + { + "epoch": 4.156494140625e-06, + "step": 2724, + "training_step_time": 0.11562538146972656 + }, + { + "epoch": 4.15802001953125e-06, + "model_forward_time": 0.02881026268005371, + "step": 2725 + }, + { + "epoch": 4.15802001953125e-06, + "step": 2725, + "training_step_time": 0.12107086181640625 + }, + { + "epoch": 4.1595458984375e-06, + "model_forward_time": 0.025046586990356445, + "step": 2726 + }, + { + "epoch": 4.1595458984375e-06, + "step": 2726, + "training_step_time": 0.21125197410583496 + }, + { + "epoch": 4.16107177734375e-06, + "model_forward_time": 0.02410292625427246, + "step": 2727 + }, + { + "epoch": 4.16107177734375e-06, + "step": 2727, + "training_step_time": 0.14776849746704102 + }, + { + "epoch": 4.16259765625e-06, + "model_forward_time": 0.02440953254699707, + "step": 2728 + }, + { + "epoch": 4.16259765625e-06, + "step": 2728, + "training_step_time": 0.10667824745178223 + }, + { + "epoch": 4.16412353515625e-06, + "model_forward_time": 0.025004148483276367, + "step": 2729 + }, + { + "epoch": 4.16412353515625e-06, + "step": 2729, + "training_step_time": 0.10719132423400879 + }, + { + "epoch": 4.1656494140625e-06, + "grad_norm": 0.8511749505996704, + "learning_rate": 9.954112452602045e-05, + "loss": 0.1545, + "step": 2730 + }, + { + "epoch": 4.1656494140625e-06, + "model_forward_time": 0.02574324607849121, + "step": 2730 + }, + { + "epoch": 4.1656494140625e-06, + "step": 2730, + "training_step_time": 0.10624361038208008 + }, + { + "epoch": 4.16717529296875e-06, + "model_forward_time": 0.0293428897857666, + "step": 2731 + }, + { + "epoch": 4.16717529296875e-06, + "step": 2731, + "training_step_time": 0.11302828788757324 + }, + { + "epoch": 4.168701171875e-06, + "model_forward_time": 0.024964332580566406, + "step": 2732 + }, + { + "epoch": 4.168701171875e-06, + "step": 2732, + "training_step_time": 0.12818241119384766 + }, + { + "epoch": 4.17022705078125e-06, + "model_forward_time": 0.025168180465698242, + "step": 2733 + }, + { + "epoch": 4.17022705078125e-06, + "step": 2733, + "training_step_time": 0.14683961868286133 + }, + { + "epoch": 4.1717529296875e-06, + "model_forward_time": 0.02550816535949707, + "step": 2734 + }, + { + "epoch": 4.1717529296875e-06, + "step": 2734, + "training_step_time": 0.10952877998352051 + }, + { + "epoch": 4.17327880859375e-06, + "model_forward_time": 0.0255277156829834, + "step": 2735 + }, + { + "epoch": 4.17327880859375e-06, + "step": 2735, + "training_step_time": 0.10734128952026367 + }, + { + "epoch": 4.1748046875e-06, + "model_forward_time": 0.025738239288330078, + "step": 2736 + }, + { + "epoch": 4.1748046875e-06, + "step": 2736, + "training_step_time": 0.10856509208679199 + }, + { + "epoch": 4.17633056640625e-06, + "model_forward_time": 0.02543020248413086, + "step": 2737 + }, + { + "epoch": 4.17633056640625e-06, + "step": 2737, + "training_step_time": 0.11371254920959473 + }, + { + "epoch": 4.1778564453125e-06, + "model_forward_time": 0.02515888214111328, + "step": 2738 + }, + { + "epoch": 4.1778564453125e-06, + "step": 2738, + "training_step_time": 0.2068016529083252 + }, + { + "epoch": 4.17938232421875e-06, + "model_forward_time": 0.0245516300201416, + "step": 2739 + }, + { + "epoch": 4.17938232421875e-06, + "step": 2739, + "training_step_time": 0.1056830883026123 + }, + { + "epoch": 4.180908203125e-06, + "grad_norm": 0.5626347064971924, + "learning_rate": 9.953364447297219e-05, + "loss": 0.1306, + "step": 2740 + }, + { + "epoch": 4.180908203125e-06, + "model_forward_time": 0.024382829666137695, + "step": 2740 + }, + { + "epoch": 4.180908203125e-06, + "step": 2740, + "training_step_time": 0.10436034202575684 + }, + { + "epoch": 4.18243408203125e-06, + "model_forward_time": 0.02515578269958496, + "step": 2741 + }, + { + "epoch": 4.18243408203125e-06, + "step": 2741, + "training_step_time": 0.21937108039855957 + }, + { + "epoch": 4.1839599609375e-06, + "model_forward_time": 0.024631023406982422, + "step": 2742 + }, + { + "epoch": 4.1839599609375e-06, + "step": 2742, + "training_step_time": 0.10819697380065918 + }, + { + "epoch": 4.18548583984375e-06, + "model_forward_time": 0.02543187141418457, + "step": 2743 + }, + { + "epoch": 4.18548583984375e-06, + "step": 2743, + "training_step_time": 0.10727047920227051 + }, + { + "epoch": 4.18701171875e-06, + "model_forward_time": 0.025311708450317383, + "step": 2744 + }, + { + "epoch": 4.18701171875e-06, + "step": 2744, + "training_step_time": 0.10822248458862305 + }, + { + "epoch": 4.18853759765625e-06, + "model_forward_time": 0.025208234786987305, + "step": 2745 + }, + { + "epoch": 4.18853759765625e-06, + "step": 2745, + "training_step_time": 0.1193997859954834 + }, + { + "epoch": 4.1900634765625e-06, + "model_forward_time": 0.02602386474609375, + "step": 2746 + }, + { + "epoch": 4.1900634765625e-06, + "step": 2746, + "training_step_time": 0.11086630821228027 + }, + { + "epoch": 4.19158935546875e-06, + "model_forward_time": 0.025446176528930664, + "step": 2747 + }, + { + "epoch": 4.19158935546875e-06, + "step": 2747, + "training_step_time": 0.10746574401855469 + }, + { + "epoch": 4.193115234375e-06, + "model_forward_time": 0.025178909301757812, + "step": 2748 + }, + { + "epoch": 4.193115234375e-06, + "step": 2748, + "training_step_time": 0.10785436630249023 + }, + { + "epoch": 4.19464111328125e-06, + "model_forward_time": 0.024927139282226562, + "step": 2749 + }, + { + "epoch": 4.19464111328125e-06, + "step": 2749, + "training_step_time": 0.10636711120605469 + }, + { + "epoch": 4.1961669921875e-06, + "grad_norm": 0.7132095694541931, + "learning_rate": 9.952610423187516e-05, + "loss": 0.1481, + "step": 2750 + }, + { + "epoch": 4.1961669921875e-06, + "model_forward_time": 0.025022506713867188, + "step": 2750 + }, + { + "epoch": 4.1961669921875e-06, + "step": 2750, + "training_step_time": 0.10988736152648926 + }, + { + "epoch": 4.19769287109375e-06, + "model_forward_time": 0.02560710906982422, + "step": 2751 + }, + { + "epoch": 4.19769287109375e-06, + "step": 2751, + "training_step_time": 0.1097877025604248 + }, + { + "epoch": 4.19921875e-06, + "model_forward_time": 0.025916576385498047, + "step": 2752 + }, + { + "epoch": 4.19921875e-06, + "step": 2752, + "training_step_time": 0.10747647285461426 + }, + { + "epoch": 4.20074462890625e-06, + "model_forward_time": 0.02539539337158203, + "step": 2753 + }, + { + "epoch": 4.20074462890625e-06, + "step": 2753, + "training_step_time": 0.10840535163879395 + }, + { + "epoch": 4.2022705078125e-06, + "model_forward_time": 0.029796600341796875, + "step": 2754 + }, + { + "epoch": 4.2022705078125e-06, + "step": 2754, + "training_step_time": 0.11266589164733887 + }, + { + "epoch": 4.20379638671875e-06, + "model_forward_time": 0.025039196014404297, + "step": 2755 + }, + { + "epoch": 4.20379638671875e-06, + "step": 2755, + "training_step_time": 0.10694360733032227 + }, + { + "epoch": 4.205322265625e-06, + "model_forward_time": 0.024927139282226562, + "step": 2756 + }, + { + "epoch": 4.205322265625e-06, + "step": 2756, + "training_step_time": 0.11135268211364746 + }, + { + "epoch": 4.20684814453125e-06, + "model_forward_time": 0.025053977966308594, + "step": 2757 + }, + { + "epoch": 4.20684814453125e-06, + "step": 2757, + "training_step_time": 0.1098787784576416 + }, + { + "epoch": 4.2083740234375e-06, + "model_forward_time": 0.0254976749420166, + "step": 2758 + }, + { + "epoch": 4.2083740234375e-06, + "step": 2758, + "training_step_time": 0.10670614242553711 + }, + { + "epoch": 4.20989990234375e-06, + "model_forward_time": 0.02415633201599121, + "step": 2759 + }, + { + "epoch": 4.20989990234375e-06, + "step": 2759, + "training_step_time": 0.10811328887939453 + }, + { + "epoch": 4.21142578125e-06, + "grad_norm": 0.9546775817871094, + "learning_rate": 9.95185038118915e-05, + "loss": 0.1372, + "step": 2760 + }, + { + "epoch": 4.21142578125e-06, + "model_forward_time": 0.025157451629638672, + "step": 2760 + }, + { + "epoch": 4.21142578125e-06, + "step": 2760, + "training_step_time": 0.10591316223144531 + }, + { + "epoch": 4.21295166015625e-06, + "model_forward_time": 0.025135040283203125, + "step": 2761 + }, + { + "epoch": 4.21295166015625e-06, + "step": 2761, + "training_step_time": 0.1121976375579834 + }, + { + "epoch": 4.2144775390625e-06, + "model_forward_time": 0.02525043487548828, + "step": 2762 + }, + { + "epoch": 4.2144775390625e-06, + "step": 2762, + "training_step_time": 0.11163759231567383 + }, + { + "epoch": 4.21600341796875e-06, + "model_forward_time": 0.025014877319335938, + "step": 2763 + }, + { + "epoch": 4.21600341796875e-06, + "step": 2763, + "training_step_time": 0.10879802703857422 + }, + { + "epoch": 4.217529296875e-06, + "model_forward_time": 0.024827241897583008, + "step": 2764 + }, + { + "epoch": 4.217529296875e-06, + "step": 2764, + "training_step_time": 0.10605001449584961 + }, + { + "epoch": 4.21905517578125e-06, + "model_forward_time": 0.025132417678833008, + "step": 2765 + }, + { + "epoch": 4.21905517578125e-06, + "step": 2765, + "training_step_time": 0.10844850540161133 + }, + { + "epoch": 4.2205810546875e-06, + "model_forward_time": 0.025289297103881836, + "step": 2766 + }, + { + "epoch": 4.2205810546875e-06, + "step": 2766, + "training_step_time": 0.11169075965881348 + }, + { + "epoch": 4.22210693359375e-06, + "model_forward_time": 0.02502131462097168, + "step": 2767 + }, + { + "epoch": 4.22210693359375e-06, + "step": 2767, + "training_step_time": 0.10914254188537598 + }, + { + "epoch": 4.2236328125e-06, + "model_forward_time": 0.025182008743286133, + "step": 2768 + }, + { + "epoch": 4.2236328125e-06, + "step": 2768, + "training_step_time": 0.13020896911621094 + }, + { + "epoch": 4.22515869140625e-06, + "model_forward_time": 0.024641752243041992, + "step": 2769 + }, + { + "epoch": 4.22515869140625e-06, + "step": 2769, + "training_step_time": 0.14920425415039062 + }, + { + "epoch": 4.2266845703125e-06, + "grad_norm": 1.108259677886963, + "learning_rate": 9.951084322225641e-05, + "loss": 0.1249, + "step": 2770 + }, + { + "epoch": 4.2266845703125e-06, + "model_forward_time": 0.02445220947265625, + "step": 2770 + }, + { + "epoch": 4.2266845703125e-06, + "step": 2770, + "training_step_time": 0.22476983070373535 + }, + { + "epoch": 4.22821044921875e-06, + "model_forward_time": 0.02431488037109375, + "step": 2771 + }, + { + "epoch": 4.22821044921875e-06, + "step": 2771, + "training_step_time": 0.2097334861755371 + }, + { + "epoch": 4.229736328125e-06, + "model_forward_time": 0.024579763412475586, + "step": 2772 + }, + { + "epoch": 4.229736328125e-06, + "step": 2772, + "training_step_time": 0.13304352760314941 + }, + { + "epoch": 4.23126220703125e-06, + "model_forward_time": 0.024151325225830078, + "step": 2773 + }, + { + "epoch": 4.23126220703125e-06, + "step": 2773, + "training_step_time": 0.2030806541442871 + }, + { + "epoch": 4.2327880859375e-06, + "model_forward_time": 0.02460503578186035, + "step": 2774 + }, + { + "epoch": 4.2327880859375e-06, + "step": 2774, + "training_step_time": 0.11671948432922363 + }, + { + "epoch": 4.23431396484375e-06, + "model_forward_time": 0.024417638778686523, + "step": 2775 + }, + { + "epoch": 4.23431396484375e-06, + "step": 2775, + "training_step_time": 0.1087496280670166 + }, + { + "epoch": 4.23583984375e-06, + "model_forward_time": 0.028589248657226562, + "step": 2776 + }, + { + "epoch": 4.23583984375e-06, + "step": 2776, + "training_step_time": 0.11244964599609375 + }, + { + "epoch": 4.23736572265625e-06, + "model_forward_time": 0.025116443634033203, + "step": 2777 + }, + { + "epoch": 4.23736572265625e-06, + "step": 2777, + "training_step_time": 0.10978078842163086 + }, + { + "epoch": 4.2388916015625e-06, + "model_forward_time": 0.024893999099731445, + "step": 2778 + }, + { + "epoch": 4.2388916015625e-06, + "step": 2778, + "training_step_time": 0.12011241912841797 + }, + { + "epoch": 4.24041748046875e-06, + "model_forward_time": 0.025335073471069336, + "step": 2779 + }, + { + "epoch": 4.24041748046875e-06, + "step": 2779, + "training_step_time": 0.13481616973876953 + }, + { + "epoch": 4.241943359375e-06, + "grad_norm": 0.9175239205360413, + "learning_rate": 9.950312247227825e-05, + "loss": 0.1373, + "step": 2780 + }, + { + "epoch": 4.241943359375e-06, + "model_forward_time": 0.024530649185180664, + "step": 2780 + }, + { + "epoch": 4.241943359375e-06, + "step": 2780, + "training_step_time": 0.10839080810546875 + }, + { + "epoch": 4.24346923828125e-06, + "model_forward_time": 0.025356292724609375, + "step": 2781 + }, + { + "epoch": 4.24346923828125e-06, + "step": 2781, + "training_step_time": 0.11330747604370117 + }, + { + "epoch": 4.2449951171875e-06, + "model_forward_time": 0.025420427322387695, + "step": 2782 + }, + { + "epoch": 4.2449951171875e-06, + "step": 2782, + "training_step_time": 0.10999894142150879 + }, + { + "epoch": 4.24652099609375e-06, + "model_forward_time": 0.025222301483154297, + "step": 2783 + }, + { + "epoch": 4.24652099609375e-06, + "step": 2783, + "training_step_time": 0.11020755767822266 + }, + { + "epoch": 4.248046875e-06, + "model_forward_time": 0.02972698211669922, + "step": 2784 + }, + { + "epoch": 4.248046875e-06, + "step": 2784, + "training_step_time": 0.21287822723388672 + }, + { + "epoch": 4.24957275390625e-06, + "model_forward_time": 0.0247189998626709, + "step": 2785 + }, + { + "epoch": 4.24957275390625e-06, + "step": 2785, + "training_step_time": 0.11601758003234863 + }, + { + "epoch": 4.2510986328125e-06, + "model_forward_time": 0.024424314498901367, + "step": 2786 + }, + { + "epoch": 4.2510986328125e-06, + "step": 2786, + "training_step_time": 0.18888211250305176 + }, + { + "epoch": 4.25262451171875e-06, + "model_forward_time": 0.02725386619567871, + "step": 2787 + }, + { + "epoch": 4.25262451171875e-06, + "step": 2787, + "training_step_time": 0.12241959571838379 + }, + { + "epoch": 4.254150390625e-06, + "model_forward_time": 0.02453923225402832, + "step": 2788 + }, + { + "epoch": 4.254150390625e-06, + "step": 2788, + "training_step_time": 0.11156773567199707 + }, + { + "epoch": 4.25567626953125e-06, + "model_forward_time": 0.024919509887695312, + "step": 2789 + }, + { + "epoch": 4.25567626953125e-06, + "step": 2789, + "training_step_time": 0.10801911354064941 + }, + { + "epoch": 4.2572021484375e-06, + "grad_norm": 0.8682767748832703, + "learning_rate": 9.949534157133844e-05, + "loss": 0.1544, + "step": 2790 + }, + { + "epoch": 4.2572021484375e-06, + "model_forward_time": 0.02493762969970703, + "step": 2790 + }, + { + "epoch": 4.2572021484375e-06, + "step": 2790, + "training_step_time": 0.17709136009216309 + }, + { + "epoch": 4.25872802734375e-06, + "model_forward_time": 0.024722814559936523, + "step": 2791 + }, + { + "epoch": 4.25872802734375e-06, + "step": 2791, + "training_step_time": 0.16068506240844727 + }, + { + "epoch": 4.26025390625e-06, + "model_forward_time": 0.02444624900817871, + "step": 2792 + }, + { + "epoch": 4.26025390625e-06, + "step": 2792, + "training_step_time": 0.10413527488708496 + }, + { + "epoch": 4.26177978515625e-06, + "model_forward_time": 0.02468729019165039, + "step": 2793 + }, + { + "epoch": 4.26177978515625e-06, + "step": 2793, + "training_step_time": 0.10332489013671875 + }, + { + "epoch": 4.2633056640625e-06, + "model_forward_time": 0.02556324005126953, + "step": 2794 + }, + { + "epoch": 4.2633056640625e-06, + "step": 2794, + "training_step_time": 0.10662341117858887 + }, + { + "epoch": 4.26483154296875e-06, + "model_forward_time": 0.02501988410949707, + "step": 2795 + }, + { + "epoch": 4.26483154296875e-06, + "step": 2795, + "training_step_time": 0.10595536231994629 + }, + { + "epoch": 4.266357421875e-06, + "model_forward_time": 0.02497553825378418, + "step": 2796 + }, + { + "epoch": 4.266357421875e-06, + "step": 2796, + "training_step_time": 0.10684728622436523 + }, + { + "epoch": 4.26788330078125e-06, + "model_forward_time": 0.025267601013183594, + "step": 2797 + }, + { + "epoch": 4.26788330078125e-06, + "step": 2797, + "training_step_time": 0.1061089038848877 + }, + { + "epoch": 4.2694091796875e-06, + "model_forward_time": 0.025238990783691406, + "step": 2798 + }, + { + "epoch": 4.2694091796875e-06, + "step": 2798, + "training_step_time": 0.11097550392150879 + }, + { + "epoch": 4.27093505859375e-06, + "model_forward_time": 0.025223731994628906, + "step": 2799 + }, + { + "epoch": 4.27093505859375e-06, + "step": 2799, + "training_step_time": 0.11216378211975098 + }, + { + "epoch": 4.2724609375e-06, + "grad_norm": 0.5365232825279236, + "learning_rate": 9.94875005288915e-05, + "loss": 0.132, + "step": 2800 + }, + { + "epoch": 4.2724609375e-06, + "model_forward_time": 0.025026798248291016, + "step": 2800 + }, + { + "epoch": 4.2724609375e-06, + "step": 2800, + "training_step_time": 0.11704492568969727 + }, + { + "epoch": 4.27398681640625e-06, + "model_forward_time": 0.025136470794677734, + "step": 2801 + }, + { + "epoch": 4.27398681640625e-06, + "step": 2801, + "training_step_time": 0.13306212425231934 + }, + { + "epoch": 4.2755126953125e-06, + "model_forward_time": 0.02527904510498047, + "step": 2802 + }, + { + "epoch": 4.2755126953125e-06, + "step": 2802, + "training_step_time": 0.12569212913513184 + }, + { + "epoch": 4.27703857421875e-06, + "model_forward_time": 0.024560213088989258, + "step": 2803 + }, + { + "epoch": 4.27703857421875e-06, + "step": 2803, + "training_step_time": 0.1293184757232666 + }, + { + "epoch": 4.278564453125e-06, + "model_forward_time": 0.024906396865844727, + "step": 2804 + }, + { + "epoch": 4.278564453125e-06, + "step": 2804, + "training_step_time": 0.12253785133361816 + }, + { + "epoch": 4.28009033203125e-06, + "model_forward_time": 0.025130033493041992, + "step": 2805 + }, + { + "epoch": 4.28009033203125e-06, + "step": 2805, + "training_step_time": 0.12067270278930664 + }, + { + "epoch": 4.2816162109375e-06, + "model_forward_time": 0.02831268310546875, + "step": 2806 + }, + { + "epoch": 4.2816162109375e-06, + "step": 2806, + "training_step_time": 0.11629176139831543 + }, + { + "epoch": 4.28314208984375e-06, + "model_forward_time": 0.025098085403442383, + "step": 2807 + }, + { + "epoch": 4.28314208984375e-06, + "step": 2807, + "training_step_time": 0.11623263359069824 + }, + { + "epoch": 4.28466796875e-06, + "model_forward_time": 0.025158166885375977, + "step": 2808 + }, + { + "epoch": 4.28466796875e-06, + "step": 2808, + "training_step_time": 0.11282563209533691 + }, + { + "epoch": 4.28619384765625e-06, + "model_forward_time": 0.025584936141967773, + "step": 2809 + }, + { + "epoch": 4.28619384765625e-06, + "step": 2809, + "training_step_time": 0.11139798164367676 + }, + { + "epoch": 4.2877197265625e-06, + "grad_norm": 0.7726246118545532, + "learning_rate": 9.947959935446507e-05, + "loss": 0.1589, + "step": 2810 + }, + { + "epoch": 4.2877197265625e-06, + "model_forward_time": 0.025605201721191406, + "step": 2810 + }, + { + "epoch": 4.2877197265625e-06, + "step": 2810, + "training_step_time": 0.10854744911193848 + }, + { + "epoch": 4.28924560546875e-06, + "model_forward_time": 0.025130033493041992, + "step": 2811 + }, + { + "epoch": 4.28924560546875e-06, + "step": 2811, + "training_step_time": 0.10748934745788574 + }, + { + "epoch": 4.290771484375e-06, + "model_forward_time": 0.0254209041595459, + "step": 2812 + }, + { + "epoch": 4.290771484375e-06, + "step": 2812, + "training_step_time": 0.10888242721557617 + }, + { + "epoch": 4.29229736328125e-06, + "model_forward_time": 0.02546858787536621, + "step": 2813 + }, + { + "epoch": 4.29229736328125e-06, + "step": 2813, + "training_step_time": 0.10860180854797363 + }, + { + "epoch": 4.2938232421875e-06, + "model_forward_time": 0.025110721588134766, + "step": 2814 + }, + { + "epoch": 4.2938232421875e-06, + "step": 2814, + "training_step_time": 0.21330666542053223 + }, + { + "epoch": 4.29534912109375e-06, + "model_forward_time": 0.02472996711730957, + "step": 2815 + }, + { + "epoch": 4.29534912109375e-06, + "step": 2815, + "training_step_time": 0.13057613372802734 + }, + { + "epoch": 4.296875e-06, + "model_forward_time": 0.024506092071533203, + "step": 2816 + }, + { + "epoch": 4.296875e-06, + "step": 2816, + "training_step_time": 0.20092058181762695 + }, + { + "epoch": 4.29840087890625e-06, + "model_forward_time": 0.02419424057006836, + "step": 2817 + }, + { + "epoch": 4.29840087890625e-06, + "step": 2817, + "training_step_time": 0.13336777687072754 + }, + { + "epoch": 4.2999267578125e-06, + "model_forward_time": 0.024494409561157227, + "step": 2818 + }, + { + "epoch": 4.2999267578125e-06, + "step": 2818, + "training_step_time": 0.12605071067810059 + }, + { + "epoch": 4.30145263671875e-06, + "model_forward_time": 0.024712800979614258, + "step": 2819 + }, + { + "epoch": 4.30145263671875e-06, + "step": 2819, + "training_step_time": 0.16102075576782227 + }, + { + "epoch": 4.302978515625e-06, + "grad_norm": 0.7662800550460815, + "learning_rate": 9.94716380576598e-05, + "loss": 0.1394, + "step": 2820 + }, + { + "epoch": 4.302978515625e-06, + "model_forward_time": 0.027503252029418945, + "step": 2820 + }, + { + "epoch": 4.302978515625e-06, + "step": 2820, + "training_step_time": 0.1103670597076416 + }, + { + "epoch": 4.30450439453125e-06, + "model_forward_time": 0.024597644805908203, + "step": 2821 + }, + { + "epoch": 4.30450439453125e-06, + "step": 2821, + "training_step_time": 0.10418820381164551 + }, + { + "epoch": 4.3060302734375e-06, + "model_forward_time": 0.024363994598388672, + "step": 2822 + }, + { + "epoch": 4.3060302734375e-06, + "step": 2822, + "training_step_time": 0.1081233024597168 + }, + { + "epoch": 4.30755615234375e-06, + "model_forward_time": 0.025439739227294922, + "step": 2823 + }, + { + "epoch": 4.30755615234375e-06, + "step": 2823, + "training_step_time": 0.1555490493774414 + }, + { + "epoch": 4.30908203125e-06, + "model_forward_time": 0.024992704391479492, + "step": 2824 + }, + { + "epoch": 4.30908203125e-06, + "step": 2824, + "training_step_time": 0.14147615432739258 + }, + { + "epoch": 4.31060791015625e-06, + "model_forward_time": 0.025159597396850586, + "step": 2825 + }, + { + "epoch": 4.31060791015625e-06, + "step": 2825, + "training_step_time": 0.11299872398376465 + }, + { + "epoch": 4.3121337890625e-06, + "model_forward_time": 0.025101661682128906, + "step": 2826 + }, + { + "epoch": 4.3121337890625e-06, + "step": 2826, + "training_step_time": 0.11157035827636719 + }, + { + "epoch": 4.31365966796875e-06, + "model_forward_time": 0.025141000747680664, + "step": 2827 + }, + { + "epoch": 4.31365966796875e-06, + "step": 2827, + "training_step_time": 0.10704636573791504 + }, + { + "epoch": 4.315185546875e-06, + "model_forward_time": 0.0255887508392334, + "step": 2828 + }, + { + "epoch": 4.315185546875e-06, + "step": 2828, + "training_step_time": 0.11251211166381836 + }, + { + "epoch": 4.31671142578125e-06, + "model_forward_time": 0.026050090789794922, + "step": 2829 + }, + { + "epoch": 4.31671142578125e-06, + "step": 2829, + "training_step_time": 0.19586658477783203 + }, + { + "epoch": 4.3182373046875e-06, + "grad_norm": 0.7930927276611328, + "learning_rate": 9.946361664814943e-05, + "loss": 0.1426, + "step": 2830 + }, + { + "epoch": 4.3182373046875e-06, + "model_forward_time": 0.02529621124267578, + "step": 2830 + }, + { + "epoch": 4.3182373046875e-06, + "step": 2830, + "training_step_time": 0.10746026039123535 + }, + { + "epoch": 4.31976318359375e-06, + "model_forward_time": 0.024320602416992188, + "step": 2831 + }, + { + "epoch": 4.31976318359375e-06, + "step": 2831, + "training_step_time": 0.13182425498962402 + }, + { + "epoch": 4.3212890625e-06, + "model_forward_time": 0.02493882179260254, + "step": 2832 + }, + { + "epoch": 4.3212890625e-06, + "step": 2832, + "training_step_time": 0.11899733543395996 + }, + { + "epoch": 4.32281494140625e-06, + "model_forward_time": 0.025506973266601562, + "step": 2833 + }, + { + "epoch": 4.32281494140625e-06, + "step": 2833, + "training_step_time": 0.1136622428894043 + }, + { + "epoch": 4.3243408203125e-06, + "model_forward_time": 0.02584385871887207, + "step": 2834 + }, + { + "epoch": 4.3243408203125e-06, + "step": 2834, + "training_step_time": 0.11119794845581055 + }, + { + "epoch": 4.32586669921875e-06, + "model_forward_time": 0.0259859561920166, + "step": 2835 + }, + { + "epoch": 4.32586669921875e-06, + "step": 2835, + "training_step_time": 0.21367788314819336 + }, + { + "epoch": 4.327392578125e-06, + "model_forward_time": 0.02478790283203125, + "step": 2836 + }, + { + "epoch": 4.327392578125e-06, + "step": 2836, + "training_step_time": 0.12007260322570801 + }, + { + "epoch": 4.32891845703125e-06, + "model_forward_time": 0.02495718002319336, + "step": 2837 + }, + { + "epoch": 4.32891845703125e-06, + "step": 2837, + "training_step_time": 0.1038215160369873 + }, + { + "epoch": 4.3304443359375e-06, + "model_forward_time": 0.02550482749938965, + "step": 2838 + }, + { + "epoch": 4.3304443359375e-06, + "step": 2838, + "training_step_time": 0.10763120651245117 + }, + { + "epoch": 4.33197021484375e-06, + "model_forward_time": 0.025640487670898438, + "step": 2839 + }, + { + "epoch": 4.33197021484375e-06, + "step": 2839, + "training_step_time": 0.10847043991088867 + }, + { + "epoch": 4.33349609375e-06, + "grad_norm": 1.069185733795166, + "learning_rate": 9.945553513568068e-05, + "loss": 0.1457, + "step": 2840 + }, + { + "epoch": 4.33349609375e-06, + "model_forward_time": 0.0257875919342041, + "step": 2840 + }, + { + "epoch": 4.33349609375e-06, + "step": 2840, + "training_step_time": 0.10794520378112793 + }, + { + "epoch": 4.33502197265625e-06, + "model_forward_time": 0.025675058364868164, + "step": 2841 + }, + { + "epoch": 4.33502197265625e-06, + "step": 2841, + "training_step_time": 0.10611391067504883 + }, + { + "epoch": 4.3365478515625e-06, + "model_forward_time": 0.02539229393005371, + "step": 2842 + }, + { + "epoch": 4.3365478515625e-06, + "step": 2842, + "training_step_time": 0.11593151092529297 + }, + { + "epoch": 4.33807373046875e-06, + "model_forward_time": 0.025228023529052734, + "step": 2843 + }, + { + "epoch": 4.33807373046875e-06, + "step": 2843, + "training_step_time": 0.10912203788757324 + }, + { + "epoch": 4.339599609375e-06, + "model_forward_time": 0.02556133270263672, + "step": 2844 + }, + { + "epoch": 4.339599609375e-06, + "step": 2844, + "training_step_time": 0.10664772987365723 + }, + { + "epoch": 4.34112548828125e-06, + "model_forward_time": 0.025111675262451172, + "step": 2845 + }, + { + "epoch": 4.34112548828125e-06, + "step": 2845, + "training_step_time": 0.10616612434387207 + }, + { + "epoch": 4.3426513671875e-06, + "model_forward_time": 0.02564859390258789, + "step": 2846 + }, + { + "epoch": 4.3426513671875e-06, + "step": 2846, + "training_step_time": 0.1074683666229248 + }, + { + "epoch": 4.34417724609375e-06, + "model_forward_time": 0.025493621826171875, + "step": 2847 + }, + { + "epoch": 4.34417724609375e-06, + "step": 2847, + "training_step_time": 0.10701465606689453 + }, + { + "epoch": 4.345703125e-06, + "model_forward_time": 0.02537083625793457, + "step": 2848 + }, + { + "epoch": 4.345703125e-06, + "step": 2848, + "training_step_time": 0.10764622688293457 + }, + { + "epoch": 4.34722900390625e-06, + "model_forward_time": 0.025753498077392578, + "step": 2849 + }, + { + "epoch": 4.34722900390625e-06, + "step": 2849, + "training_step_time": 0.10744237899780273 + }, + { + "epoch": 4.3487548828125e-06, + "grad_norm": 0.5193539261817932, + "learning_rate": 9.944739353007344e-05, + "loss": 0.1199, + "step": 2850 + }, + { + "epoch": 4.3487548828125e-06, + "model_forward_time": 0.025250911712646484, + "step": 2850 + }, + { + "epoch": 4.3487548828125e-06, + "step": 2850, + "training_step_time": 0.1063528060913086 + }, + { + "epoch": 4.35028076171875e-06, + "model_forward_time": 0.025938749313354492, + "step": 2851 + }, + { + "epoch": 4.35028076171875e-06, + "step": 2851, + "training_step_time": 0.11112833023071289 + }, + { + "epoch": 4.351806640625e-06, + "model_forward_time": 0.0253140926361084, + "step": 2852 + }, + { + "epoch": 4.351806640625e-06, + "step": 2852, + "training_step_time": 0.10675048828125 + }, + { + "epoch": 4.35333251953125e-06, + "model_forward_time": 0.026303529739379883, + "step": 2853 + }, + { + "epoch": 4.35333251953125e-06, + "step": 2853, + "training_step_time": 0.11050891876220703 + }, + { + "epoch": 4.3548583984375e-06, + "model_forward_time": 0.025612592697143555, + "step": 2854 + }, + { + "epoch": 4.3548583984375e-06, + "step": 2854, + "training_step_time": 0.1068418025970459 + }, + { + "epoch": 4.35638427734375e-06, + "model_forward_time": 0.025264739990234375, + "step": 2855 + }, + { + "epoch": 4.35638427734375e-06, + "step": 2855, + "training_step_time": 0.10683393478393555 + }, + { + "epoch": 4.35791015625e-06, + "model_forward_time": 0.02545928955078125, + "step": 2856 + }, + { + "epoch": 4.35791015625e-06, + "step": 2856, + "training_step_time": 0.10934567451477051 + }, + { + "epoch": 4.35943603515625e-06, + "model_forward_time": 0.02525949478149414, + "step": 2857 + }, + { + "epoch": 4.35943603515625e-06, + "step": 2857, + "training_step_time": 0.10873246192932129 + }, + { + "epoch": 4.3609619140625e-06, + "model_forward_time": 0.025900602340698242, + "step": 2858 + }, + { + "epoch": 4.3609619140625e-06, + "step": 2858, + "training_step_time": 0.10770463943481445 + }, + { + "epoch": 4.36248779296875e-06, + "model_forward_time": 0.026159048080444336, + "step": 2859 + }, + { + "epoch": 4.36248779296875e-06, + "step": 2859, + "training_step_time": 0.11481833457946777 + }, + { + "epoch": 4.364013671875e-06, + "grad_norm": 0.9115896224975586, + "learning_rate": 9.943919184122043e-05, + "loss": 0.1402, + "step": 2860 + }, + { + "epoch": 4.364013671875e-06, + "model_forward_time": 0.0255584716796875, + "step": 2860 + }, + { + "epoch": 4.364013671875e-06, + "step": 2860, + "training_step_time": 0.1951124668121338 + }, + { + "epoch": 4.36553955078125e-06, + "model_forward_time": 0.02464127540588379, + "step": 2861 + }, + { + "epoch": 4.36553955078125e-06, + "step": 2861, + "training_step_time": 0.20286321640014648 + }, + { + "epoch": 4.3670654296875e-06, + "model_forward_time": 0.02478170394897461, + "step": 2862 + }, + { + "epoch": 4.3670654296875e-06, + "step": 2862, + "training_step_time": 0.13198113441467285 + }, + { + "epoch": 4.36859130859375e-06, + "model_forward_time": 0.024460554122924805, + "step": 2863 + }, + { + "epoch": 4.36859130859375e-06, + "step": 2863, + "training_step_time": 0.1323685646057129 + }, + { + "epoch": 4.3701171875e-06, + "model_forward_time": 0.02495861053466797, + "step": 2864 + }, + { + "epoch": 4.3701171875e-06, + "step": 2864, + "training_step_time": 0.16466927528381348 + }, + { + "epoch": 4.37164306640625e-06, + "model_forward_time": 0.02469015121459961, + "step": 2865 + }, + { + "epoch": 4.37164306640625e-06, + "step": 2865, + "training_step_time": 0.15828251838684082 + }, + { + "epoch": 4.3731689453125e-06, + "model_forward_time": 0.024965763092041016, + "step": 2866 + }, + { + "epoch": 4.3731689453125e-06, + "step": 2866, + "training_step_time": 0.10855245590209961 + }, + { + "epoch": 4.37469482421875e-06, + "model_forward_time": 0.02485179901123047, + "step": 2867 + }, + { + "epoch": 4.37469482421875e-06, + "step": 2867, + "training_step_time": 0.13995671272277832 + }, + { + "epoch": 4.376220703125e-06, + "model_forward_time": 0.02526378631591797, + "step": 2868 + }, + { + "epoch": 4.376220703125e-06, + "step": 2868, + "training_step_time": 0.19884872436523438 + }, + { + "epoch": 4.37774658203125e-06, + "model_forward_time": 0.02438521385192871, + "step": 2869 + }, + { + "epoch": 4.37774658203125e-06, + "step": 2869, + "training_step_time": 0.14481425285339355 + }, + { + "epoch": 4.3792724609375e-06, + "grad_norm": 1.1567416191101074, + "learning_rate": 9.943093007908755e-05, + "loss": 0.1096, + "step": 2870 + }, + { + "epoch": 4.3792724609375e-06, + "model_forward_time": 0.02489614486694336, + "step": 2870 + }, + { + "epoch": 4.3792724609375e-06, + "step": 2870, + "training_step_time": 0.2074892520904541 + }, + { + "epoch": 4.38079833984375e-06, + "model_forward_time": 0.025901317596435547, + "step": 2871 + }, + { + "epoch": 4.38079833984375e-06, + "step": 2871, + "training_step_time": 0.1325676441192627 + }, + { + "epoch": 4.38232421875e-06, + "model_forward_time": 0.02446269989013672, + "step": 2872 + }, + { + "epoch": 4.38232421875e-06, + "step": 2872, + "training_step_time": 0.1791667938232422 + }, + { + "epoch": 4.38385009765625e-06, + "model_forward_time": 0.025340557098388672, + "step": 2873 + }, + { + "epoch": 4.38385009765625e-06, + "step": 2873, + "training_step_time": 0.1185905933380127 + }, + { + "epoch": 4.3853759765625e-06, + "model_forward_time": 0.026240825653076172, + "step": 2874 + }, + { + "epoch": 4.3853759765625e-06, + "step": 2874, + "training_step_time": 0.11226582527160645 + }, + { + "epoch": 4.38690185546875e-06, + "model_forward_time": 0.026343345642089844, + "step": 2875 + }, + { + "epoch": 4.38690185546875e-06, + "step": 2875, + "training_step_time": 0.11324834823608398 + }, + { + "epoch": 4.388427734375e-06, + "model_forward_time": 0.025667905807495117, + "step": 2876 + }, + { + "epoch": 4.388427734375e-06, + "step": 2876, + "training_step_time": 0.2104356288909912 + }, + { + "epoch": 4.38995361328125e-06, + "model_forward_time": 0.026690244674682617, + "step": 2877 + }, + { + "epoch": 4.38995361328125e-06, + "step": 2877, + "training_step_time": 0.11708545684814453 + }, + { + "epoch": 4.3914794921875e-06, + "model_forward_time": 0.02524876594543457, + "step": 2878 + }, + { + "epoch": 4.3914794921875e-06, + "step": 2878, + "training_step_time": 0.1107950210571289 + }, + { + "epoch": 4.39300537109375e-06, + "model_forward_time": 0.026842117309570312, + "step": 2879 + }, + { + "epoch": 4.39300537109375e-06, + "step": 2879, + "training_step_time": 0.10951638221740723 + }, + { + "epoch": 4.39453125e-06, + "grad_norm": 0.8022437691688538, + "learning_rate": 9.942260825371358e-05, + "loss": 0.0972, + "step": 2880 + }, + { + "epoch": 4.39453125e-06, + "model_forward_time": 0.025896072387695312, + "step": 2880 + }, + { + "epoch": 4.39453125e-06, + "step": 2880, + "training_step_time": 0.19140625 + }, + { + "epoch": 4.39605712890625e-06, + "model_forward_time": 0.025415658950805664, + "step": 2881 + }, + { + "epoch": 4.39605712890625e-06, + "step": 2881, + "training_step_time": 0.11669254302978516 + }, + { + "epoch": 4.3975830078125e-06, + "model_forward_time": 0.02515387535095215, + "step": 2882 + }, + { + "epoch": 4.3975830078125e-06, + "step": 2882, + "training_step_time": 0.10691618919372559 + }, + { + "epoch": 4.39910888671875e-06, + "model_forward_time": 0.024991512298583984, + "step": 2883 + }, + { + "epoch": 4.39910888671875e-06, + "step": 2883, + "training_step_time": 0.10912060737609863 + }, + { + "epoch": 4.400634765625e-06, + "model_forward_time": 0.02570366859436035, + "step": 2884 + }, + { + "epoch": 4.400634765625e-06, + "step": 2884, + "training_step_time": 0.10786867141723633 + }, + { + "epoch": 4.40216064453125e-06, + "model_forward_time": 0.025783061981201172, + "step": 2885 + }, + { + "epoch": 4.40216064453125e-06, + "step": 2885, + "training_step_time": 0.11433076858520508 + }, + { + "epoch": 4.4036865234375e-06, + "model_forward_time": 0.02613353729248047, + "step": 2886 + }, + { + "epoch": 4.4036865234375e-06, + "step": 2886, + "training_step_time": 0.10788655281066895 + }, + { + "epoch": 4.40521240234375e-06, + "model_forward_time": 0.026128530502319336, + "step": 2887 + }, + { + "epoch": 4.40521240234375e-06, + "step": 2887, + "training_step_time": 0.10891532897949219 + }, + { + "epoch": 4.40673828125e-06, + "model_forward_time": 0.025747060775756836, + "step": 2888 + }, + { + "epoch": 4.40673828125e-06, + "step": 2888, + "training_step_time": 0.10700201988220215 + }, + { + "epoch": 4.40826416015625e-06, + "model_forward_time": 0.025652647018432617, + "step": 2889 + }, + { + "epoch": 4.40826416015625e-06, + "step": 2889, + "training_step_time": 0.10730957984924316 + }, + { + "epoch": 4.4097900390625e-06, + "grad_norm": 0.5341487526893616, + "learning_rate": 9.941422637521035e-05, + "loss": 0.1253, + "step": 2890 + }, + { + "epoch": 4.4097900390625e-06, + "model_forward_time": 0.02560901641845703, + "step": 2890 + }, + { + "epoch": 4.4097900390625e-06, + "step": 2890, + "training_step_time": 0.1115262508392334 + }, + { + "epoch": 4.41131591796875e-06, + "model_forward_time": 0.0257565975189209, + "step": 2891 + }, + { + "epoch": 4.41131591796875e-06, + "step": 2891, + "training_step_time": 0.10729789733886719 + }, + { + "epoch": 4.412841796875e-06, + "model_forward_time": 0.02559185028076172, + "step": 2892 + }, + { + "epoch": 4.412841796875e-06, + "step": 2892, + "training_step_time": 0.10714602470397949 + }, + { + "epoch": 4.41436767578125e-06, + "model_forward_time": 0.025635957717895508, + "step": 2893 + }, + { + "epoch": 4.41436767578125e-06, + "step": 2893, + "training_step_time": 0.11196184158325195 + }, + { + "epoch": 4.4158935546875e-06, + "model_forward_time": 0.026094913482666016, + "step": 2894 + }, + { + "epoch": 4.4158935546875e-06, + "step": 2894, + "training_step_time": 0.10988926887512207 + }, + { + "epoch": 4.41741943359375e-06, + "model_forward_time": 0.02536177635192871, + "step": 2895 + }, + { + "epoch": 4.41741943359375e-06, + "step": 2895, + "training_step_time": 0.10808682441711426 + }, + { + "epoch": 4.4189453125e-06, + "model_forward_time": 0.025553464889526367, + "step": 2896 + }, + { + "epoch": 4.4189453125e-06, + "step": 2896, + "training_step_time": 0.10755491256713867 + }, + { + "epoch": 4.42047119140625e-06, + "model_forward_time": 0.0259091854095459, + "step": 2897 + }, + { + "epoch": 4.42047119140625e-06, + "step": 2897, + "training_step_time": 0.10822916030883789 + }, + { + "epoch": 4.4219970703125e-06, + "model_forward_time": 0.025369644165039062, + "step": 2898 + }, + { + "epoch": 4.4219970703125e-06, + "step": 2898, + "training_step_time": 0.10725831985473633 + }, + { + "epoch": 4.42352294921875e-06, + "model_forward_time": 0.026180505752563477, + "step": 2899 + }, + { + "epoch": 4.42352294921875e-06, + "step": 2899, + "training_step_time": 0.10856175422668457 + }, + { + "epoch": 4.425048828125e-06, + "grad_norm": 0.4163476526737213, + "learning_rate": 9.940578445376258e-05, + "loss": 0.1239, + "step": 2900 + }, + { + "epoch": 4.425048828125e-06, + "model_forward_time": 0.02578449249267578, + "step": 2900 + }, + { + "epoch": 4.425048828125e-06, + "step": 2900, + "training_step_time": 0.11043620109558105 + }, + { + "epoch": 4.42657470703125e-06, + "model_forward_time": 0.02543330192565918, + "step": 2901 + }, + { + "epoch": 4.42657470703125e-06, + "step": 2901, + "training_step_time": 0.10791707038879395 + }, + { + "epoch": 4.4281005859375e-06, + "model_forward_time": 0.02577495574951172, + "step": 2902 + }, + { + "epoch": 4.4281005859375e-06, + "step": 2902, + "training_step_time": 0.11560869216918945 + }, + { + "epoch": 4.42962646484375e-06, + "model_forward_time": 0.025716066360473633, + "step": 2903 + }, + { + "epoch": 4.42962646484375e-06, + "step": 2903, + "training_step_time": 0.11022305488586426 + }, + { + "epoch": 4.43115234375e-06, + "model_forward_time": 0.025704622268676758, + "step": 2904 + }, + { + "epoch": 4.43115234375e-06, + "step": 2904, + "training_step_time": 0.2121117115020752 + }, + { + "epoch": 4.43267822265625e-06, + "model_forward_time": 0.02570343017578125, + "step": 2905 + }, + { + "epoch": 4.43267822265625e-06, + "step": 2905, + "training_step_time": 0.13804292678833008 + }, + { + "epoch": 4.4342041015625e-06, + "model_forward_time": 0.02502274513244629, + "step": 2906 + }, + { + "epoch": 4.4342041015625e-06, + "step": 2906, + "training_step_time": 0.20011472702026367 + }, + { + "epoch": 4.43572998046875e-06, + "model_forward_time": 0.024092912673950195, + "step": 2907 + }, + { + "epoch": 4.43572998046875e-06, + "step": 2907, + "training_step_time": 0.16405582427978516 + }, + { + "epoch": 4.437255859375e-06, + "model_forward_time": 0.025096416473388672, + "step": 2908 + }, + { + "epoch": 4.437255859375e-06, + "step": 2908, + "training_step_time": 0.1948237419128418 + }, + { + "epoch": 4.43878173828125e-06, + "model_forward_time": 0.024829864501953125, + "step": 2909 + }, + { + "epoch": 4.43878173828125e-06, + "step": 2909, + "training_step_time": 0.1401219367980957 + }, + { + "epoch": 4.4403076171875e-06, + "grad_norm": 0.5024942755699158, + "learning_rate": 9.939728249962807e-05, + "loss": 0.1176, + "step": 2910 + }, + { + "epoch": 4.4403076171875e-06, + "model_forward_time": 0.023928403854370117, + "step": 2910 + }, + { + "epoch": 4.4403076171875e-06, + "step": 2910, + "training_step_time": 0.10935497283935547 + }, + { + "epoch": 4.44183349609375e-06, + "model_forward_time": 0.0254514217376709, + "step": 2911 + }, + { + "epoch": 4.44183349609375e-06, + "step": 2911, + "training_step_time": 0.10876345634460449 + }, + { + "epoch": 4.443359375e-06, + "model_forward_time": 0.025729894638061523, + "step": 2912 + }, + { + "epoch": 4.443359375e-06, + "step": 2912, + "training_step_time": 0.16447782516479492 + }, + { + "epoch": 4.44488525390625e-06, + "model_forward_time": 0.02524852752685547, + "step": 2913 + }, + { + "epoch": 4.44488525390625e-06, + "step": 2913, + "training_step_time": 0.1319105625152588 + }, + { + "epoch": 4.4464111328125e-06, + "model_forward_time": 0.02468132972717285, + "step": 2914 + }, + { + "epoch": 4.4464111328125e-06, + "step": 2914, + "training_step_time": 0.11304616928100586 + }, + { + "epoch": 4.44793701171875e-06, + "model_forward_time": 0.02822089195251465, + "step": 2915 + }, + { + "epoch": 4.44793701171875e-06, + "step": 2915, + "training_step_time": 0.11149215698242188 + }, + { + "epoch": 4.449462890625e-06, + "model_forward_time": 0.025212764739990234, + "step": 2916 + }, + { + "epoch": 4.449462890625e-06, + "step": 2916, + "training_step_time": 0.10906720161437988 + }, + { + "epoch": 4.45098876953125e-06, + "model_forward_time": 0.02449345588684082, + "step": 2917 + }, + { + "epoch": 4.45098876953125e-06, + "step": 2917, + "training_step_time": 0.11286067962646484 + }, + { + "epoch": 4.4525146484375e-06, + "model_forward_time": 0.02461528778076172, + "step": 2918 + }, + { + "epoch": 4.4525146484375e-06, + "step": 2918, + "training_step_time": 0.19971013069152832 + }, + { + "epoch": 4.45404052734375e-06, + "model_forward_time": 0.024705886840820312, + "step": 2919 + }, + { + "epoch": 4.45404052734375e-06, + "step": 2919, + "training_step_time": 0.11220097541809082 + }, + { + "epoch": 4.45556640625e-06, + "grad_norm": 0.9816491603851318, + "learning_rate": 9.938872052313746e-05, + "loss": 0.1499, + "step": 2920 + }, + { + "epoch": 4.45556640625e-06, + "model_forward_time": 0.02576470375061035, + "step": 2920 + }, + { + "epoch": 4.45556640625e-06, + "step": 2920, + "training_step_time": 0.10891389846801758 + }, + { + "epoch": 4.45709228515625e-06, + "model_forward_time": 0.024667739868164062, + "step": 2921 + }, + { + "epoch": 4.45709228515625e-06, + "step": 2921, + "training_step_time": 0.10807919502258301 + }, + { + "epoch": 4.4586181640625e-06, + "model_forward_time": 0.026676416397094727, + "step": 2922 + }, + { + "epoch": 4.4586181640625e-06, + "step": 2922, + "training_step_time": 0.11333608627319336 + }, + { + "epoch": 4.46014404296875e-06, + "model_forward_time": 0.02564835548400879, + "step": 2923 + }, + { + "epoch": 4.46014404296875e-06, + "step": 2923, + "training_step_time": 0.11304354667663574 + }, + { + "epoch": 4.461669921875e-06, + "model_forward_time": 0.029223918914794922, + "step": 2924 + }, + { + "epoch": 4.461669921875e-06, + "step": 2924, + "training_step_time": 0.11306500434875488 + }, + { + "epoch": 4.46319580078125e-06, + "model_forward_time": 0.02574753761291504, + "step": 2925 + }, + { + "epoch": 4.46319580078125e-06, + "step": 2925, + "training_step_time": 0.21307802200317383 + }, + { + "epoch": 4.4647216796875e-06, + "model_forward_time": 0.024732112884521484, + "step": 2926 + }, + { + "epoch": 4.4647216796875e-06, + "step": 2926, + "training_step_time": 0.11550402641296387 + }, + { + "epoch": 4.46624755859375e-06, + "model_forward_time": 0.024950742721557617, + "step": 2927 + }, + { + "epoch": 4.46624755859375e-06, + "step": 2927, + "training_step_time": 0.10428953170776367 + }, + { + "epoch": 4.4677734375e-06, + "model_forward_time": 0.02560877799987793, + "step": 2928 + }, + { + "epoch": 4.4677734375e-06, + "step": 2928, + "training_step_time": 0.10859870910644531 + }, + { + "epoch": 4.46929931640625e-06, + "model_forward_time": 0.025862932205200195, + "step": 2929 + }, + { + "epoch": 4.46929931640625e-06, + "step": 2929, + "training_step_time": 0.11178445816040039 + }, + { + "epoch": 4.4708251953125e-06, + "grad_norm": 1.0555541515350342, + "learning_rate": 9.938009853469436e-05, + "loss": 0.1142, + "step": 2930 + }, + { + "epoch": 4.4708251953125e-06, + "model_forward_time": 0.02568840980529785, + "step": 2930 + }, + { + "epoch": 4.4708251953125e-06, + "step": 2930, + "training_step_time": 0.11029767990112305 + }, + { + "epoch": 4.47235107421875e-06, + "model_forward_time": 0.02713155746459961, + "step": 2931 + }, + { + "epoch": 4.47235107421875e-06, + "step": 2931, + "training_step_time": 0.10959458351135254 + }, + { + "epoch": 4.473876953125e-06, + "model_forward_time": 0.024959564208984375, + "step": 2932 + }, + { + "epoch": 4.473876953125e-06, + "step": 2932, + "training_step_time": 0.10962820053100586 + }, + { + "epoch": 4.47540283203125e-06, + "model_forward_time": 0.026279211044311523, + "step": 2933 + }, + { + "epoch": 4.47540283203125e-06, + "step": 2933, + "training_step_time": 0.10775542259216309 + }, + { + "epoch": 4.4769287109375e-06, + "model_forward_time": 0.0254974365234375, + "step": 2934 + }, + { + "epoch": 4.4769287109375e-06, + "step": 2934, + "training_step_time": 0.10799670219421387 + }, + { + "epoch": 4.47845458984375e-06, + "model_forward_time": 0.025084733963012695, + "step": 2935 + }, + { + "epoch": 4.47845458984375e-06, + "step": 2935, + "training_step_time": 0.132537841796875 + }, + { + "epoch": 4.47998046875e-06, + "model_forward_time": 0.025426149368286133, + "step": 2936 + }, + { + "epoch": 4.47998046875e-06, + "step": 2936, + "training_step_time": 0.1486375331878662 + }, + { + "epoch": 4.48150634765625e-06, + "model_forward_time": 0.024872303009033203, + "step": 2937 + }, + { + "epoch": 4.48150634765625e-06, + "step": 2937, + "training_step_time": 0.1477510929107666 + }, + { + "epoch": 4.4830322265625e-06, + "model_forward_time": 0.02434229850769043, + "step": 2938 + }, + { + "epoch": 4.4830322265625e-06, + "step": 2938, + "training_step_time": 0.12763762474060059 + }, + { + "epoch": 4.48455810546875e-06, + "model_forward_time": 0.024348974227905273, + "step": 2939 + }, + { + "epoch": 4.48455810546875e-06, + "step": 2939, + "training_step_time": 0.12826800346374512 + }, + { + "epoch": 4.486083984375e-06, + "grad_norm": 0.7148603200912476, + "learning_rate": 9.937141654477528e-05, + "loss": 0.1181, + "step": 2940 + }, + { + "epoch": 4.486083984375e-06, + "model_forward_time": 0.024813413619995117, + "step": 2940 + }, + { + "epoch": 4.486083984375e-06, + "step": 2940, + "training_step_time": 0.12384796142578125 + }, + { + "epoch": 4.48760986328125e-06, + "model_forward_time": 0.024008989334106445, + "step": 2941 + }, + { + "epoch": 4.48760986328125e-06, + "step": 2941, + "training_step_time": 0.11441802978515625 + }, + { + "epoch": 4.4891357421875e-06, + "model_forward_time": 0.025722026824951172, + "step": 2942 + }, + { + "epoch": 4.4891357421875e-06, + "step": 2942, + "training_step_time": 0.11750102043151855 + }, + { + "epoch": 4.49066162109375e-06, + "model_forward_time": 0.025391817092895508, + "step": 2943 + }, + { + "epoch": 4.49066162109375e-06, + "step": 2943, + "training_step_time": 0.11415600776672363 + }, + { + "epoch": 4.4921875e-06, + "model_forward_time": 0.025437593460083008, + "step": 2944 + }, + { + "epoch": 4.4921875e-06, + "step": 2944, + "training_step_time": 0.11010575294494629 + }, + { + "epoch": 4.49371337890625e-06, + "model_forward_time": 0.026737689971923828, + "step": 2945 + }, + { + "epoch": 4.49371337890625e-06, + "step": 2945, + "training_step_time": 0.1131284236907959 + }, + { + "epoch": 4.4952392578125e-06, + "model_forward_time": 0.02538585662841797, + "step": 2946 + }, + { + "epoch": 4.4952392578125e-06, + "step": 2946, + "training_step_time": 0.1160120964050293 + }, + { + "epoch": 4.49676513671875e-06, + "model_forward_time": 0.025252342224121094, + "step": 2947 + }, + { + "epoch": 4.49676513671875e-06, + "step": 2947, + "training_step_time": 0.11012840270996094 + }, + { + "epoch": 4.498291015625e-06, + "model_forward_time": 0.025865554809570312, + "step": 2948 + }, + { + "epoch": 4.498291015625e-06, + "step": 2948, + "training_step_time": 0.18613433837890625 + }, + { + "epoch": 4.49981689453125e-06, + "model_forward_time": 0.024675607681274414, + "step": 2949 + }, + { + "epoch": 4.49981689453125e-06, + "step": 2949, + "training_step_time": 0.19361066818237305 + }, + { + "epoch": 4.5013427734375e-06, + "grad_norm": 0.6078255772590637, + "learning_rate": 9.936267456392971e-05, + "loss": 0.1419, + "step": 2950 + }, + { + "epoch": 4.5013427734375e-06, + "model_forward_time": 0.024212121963500977, + "step": 2950 + }, + { + "epoch": 4.5013427734375e-06, + "step": 2950, + "training_step_time": 0.1473231315612793 + }, + { + "epoch": 4.50286865234375e-06, + "model_forward_time": 0.02373480796813965, + "step": 2951 + }, + { + "epoch": 4.50286865234375e-06, + "step": 2951, + "training_step_time": 0.1781630516052246 + }, + { + "epoch": 4.50439453125e-06, + "model_forward_time": 0.02470850944519043, + "step": 2952 + }, + { + "epoch": 4.50439453125e-06, + "step": 2952, + "training_step_time": 0.1799602508544922 + }, + { + "epoch": 4.50592041015625e-06, + "model_forward_time": 0.024767160415649414, + "step": 2953 + }, + { + "epoch": 4.50592041015625e-06, + "step": 2953, + "training_step_time": 0.14460277557373047 + }, + { + "epoch": 4.5074462890625e-06, + "model_forward_time": 0.024739503860473633, + "step": 2954 + }, + { + "epoch": 4.5074462890625e-06, + "step": 2954, + "training_step_time": 0.10888934135437012 + }, + { + "epoch": 4.50897216796875e-06, + "model_forward_time": 0.02471923828125, + "step": 2955 + }, + { + "epoch": 4.50897216796875e-06, + "step": 2955, + "training_step_time": 0.10982418060302734 + }, + { + "epoch": 4.510498046875e-06, + "model_forward_time": 0.025331974029541016, + "step": 2956 + }, + { + "epoch": 4.510498046875e-06, + "step": 2956, + "training_step_time": 0.19500398635864258 + }, + { + "epoch": 4.51202392578125e-06, + "model_forward_time": 0.025336742401123047, + "step": 2957 + }, + { + "epoch": 4.51202392578125e-06, + "step": 2957, + "training_step_time": 0.13620853424072266 + }, + { + "epoch": 4.5135498046875e-06, + "model_forward_time": 0.025560855865478516, + "step": 2958 + }, + { + "epoch": 4.5135498046875e-06, + "step": 2958, + "training_step_time": 0.11236977577209473 + }, + { + "epoch": 4.51507568359375e-06, + "model_forward_time": 0.025601863861083984, + "step": 2959 + }, + { + "epoch": 4.51507568359375e-06, + "step": 2959, + "training_step_time": 0.11033987998962402 + }, + { + "epoch": 4.5166015625e-06, + "grad_norm": 0.6266261339187622, + "learning_rate": 9.935387260277993e-05, + "loss": 0.1329, + "step": 2960 + }, + { + "epoch": 4.5166015625e-06, + "model_forward_time": 0.025922060012817383, + "step": 2960 + }, + { + "epoch": 4.5166015625e-06, + "step": 2960, + "training_step_time": 0.11202692985534668 + }, + { + "epoch": 4.51812744140625e-06, + "model_forward_time": 0.025138139724731445, + "step": 2961 + }, + { + "epoch": 4.51812744140625e-06, + "step": 2961, + "training_step_time": 0.11005687713623047 + }, + { + "epoch": 4.5196533203125e-06, + "model_forward_time": 0.02571868896484375, + "step": 2962 + }, + { + "epoch": 4.5196533203125e-06, + "step": 2962, + "training_step_time": 0.19635462760925293 + }, + { + "epoch": 4.52117919921875e-06, + "model_forward_time": 0.025515079498291016, + "step": 2963 + }, + { + "epoch": 4.52117919921875e-06, + "step": 2963, + "training_step_time": 0.11399555206298828 + }, + { + "epoch": 4.522705078125e-06, + "model_forward_time": 0.025533676147460938, + "step": 2964 + }, + { + "epoch": 4.522705078125e-06, + "step": 2964, + "training_step_time": 0.11181783676147461 + }, + { + "epoch": 4.52423095703125e-06, + "model_forward_time": 0.028892040252685547, + "step": 2965 + }, + { + "epoch": 4.52423095703125e-06, + "step": 2965, + "training_step_time": 0.21477127075195312 + }, + { + "epoch": 4.5257568359375e-06, + "model_forward_time": 0.025362253189086914, + "step": 2966 + }, + { + "epoch": 4.5257568359375e-06, + "step": 2966, + "training_step_time": 0.11672544479370117 + }, + { + "epoch": 4.52728271484375e-06, + "model_forward_time": 0.024881601333618164, + "step": 2967 + }, + { + "epoch": 4.52728271484375e-06, + "step": 2967, + "training_step_time": 0.10939812660217285 + }, + { + "epoch": 4.52880859375e-06, + "model_forward_time": 0.02612447738647461, + "step": 2968 + }, + { + "epoch": 4.52880859375e-06, + "step": 2968, + "training_step_time": 0.21409916877746582 + }, + { + "epoch": 4.53033447265625e-06, + "model_forward_time": 0.024281024932861328, + "step": 2969 + }, + { + "epoch": 4.53033447265625e-06, + "step": 2969, + "training_step_time": 0.11605477333068848 + }, + { + "epoch": 4.5318603515625e-06, + "grad_norm": 0.7585597634315491, + "learning_rate": 9.934501067202117e-05, + "loss": 0.1163, + "step": 2970 + }, + { + "epoch": 4.5318603515625e-06, + "model_forward_time": 0.02476954460144043, + "step": 2970 + }, + { + "epoch": 4.5318603515625e-06, + "step": 2970, + "training_step_time": 0.10861420631408691 + }, + { + "epoch": 4.53338623046875e-06, + "model_forward_time": 0.025798320770263672, + "step": 2971 + }, + { + "epoch": 4.53338623046875e-06, + "step": 2971, + "training_step_time": 0.11424756050109863 + }, + { + "epoch": 4.534912109375e-06, + "model_forward_time": 0.025229215621948242, + "step": 2972 + }, + { + "epoch": 4.534912109375e-06, + "step": 2972, + "training_step_time": 0.11175155639648438 + }, + { + "epoch": 4.53643798828125e-06, + "model_forward_time": 0.02484607696533203, + "step": 2973 + }, + { + "epoch": 4.53643798828125e-06, + "step": 2973, + "training_step_time": 0.10991120338439941 + }, + { + "epoch": 4.5379638671875e-06, + "model_forward_time": 0.025568723678588867, + "step": 2974 + }, + { + "epoch": 4.5379638671875e-06, + "step": 2974, + "training_step_time": 0.11665129661560059 + }, + { + "epoch": 4.53948974609375e-06, + "model_forward_time": 0.025730371475219727, + "step": 2975 + }, + { + "epoch": 4.53948974609375e-06, + "step": 2975, + "training_step_time": 0.11010360717773438 + }, + { + "epoch": 4.541015625e-06, + "model_forward_time": 0.025798320770263672, + "step": 2976 + }, + { + "epoch": 4.541015625e-06, + "step": 2976, + "training_step_time": 0.10947537422180176 + }, + { + "epoch": 4.54254150390625e-06, + "model_forward_time": 0.025766372680664062, + "step": 2977 + }, + { + "epoch": 4.54254150390625e-06, + "step": 2977, + "training_step_time": 0.10937786102294922 + }, + { + "epoch": 4.5440673828125e-06, + "model_forward_time": 0.024997711181640625, + "step": 2978 + }, + { + "epoch": 4.5440673828125e-06, + "step": 2978, + "training_step_time": 0.10871410369873047 + }, + { + "epoch": 4.54559326171875e-06, + "model_forward_time": 0.02529430389404297, + "step": 2979 + }, + { + "epoch": 4.54559326171875e-06, + "step": 2979, + "training_step_time": 0.11275863647460938 + }, + { + "epoch": 4.547119140625e-06, + "grad_norm": 0.7055568099021912, + "learning_rate": 9.933608878242153e-05, + "loss": 0.1257, + "step": 2980 + }, + { + "epoch": 4.547119140625e-06, + "model_forward_time": 0.029764652252197266, + "step": 2980 + }, + { + "epoch": 4.547119140625e-06, + "step": 2980, + "training_step_time": 0.11362099647521973 + }, + { + "epoch": 4.54864501953125e-06, + "model_forward_time": 0.02495884895324707, + "step": 2981 + }, + { + "epoch": 4.54864501953125e-06, + "step": 2981, + "training_step_time": 0.10736536979675293 + }, + { + "epoch": 4.5501708984375e-06, + "model_forward_time": 0.024997711181640625, + "step": 2982 + }, + { + "epoch": 4.5501708984375e-06, + "step": 2982, + "training_step_time": 0.10762286186218262 + }, + { + "epoch": 4.55169677734375e-06, + "model_forward_time": 0.02576589584350586, + "step": 2983 + }, + { + "epoch": 4.55169677734375e-06, + "step": 2983, + "training_step_time": 0.10982728004455566 + }, + { + "epoch": 4.55322265625e-06, + "model_forward_time": 0.02516031265258789, + "step": 2984 + }, + { + "epoch": 4.55322265625e-06, + "step": 2984, + "training_step_time": 0.10881328582763672 + }, + { + "epoch": 4.55474853515625e-06, + "model_forward_time": 0.02513861656188965, + "step": 2985 + }, + { + "epoch": 4.55474853515625e-06, + "step": 2985, + "training_step_time": 0.10706138610839844 + }, + { + "epoch": 4.5562744140625e-06, + "model_forward_time": 0.025171995162963867, + "step": 2986 + }, + { + "epoch": 4.5562744140625e-06, + "step": 2986, + "training_step_time": 0.10719585418701172 + }, + { + "epoch": 4.55780029296875e-06, + "model_forward_time": 0.025115489959716797, + "step": 2987 + }, + { + "epoch": 4.55780029296875e-06, + "step": 2987, + "training_step_time": 0.1064448356628418 + }, + { + "epoch": 4.559326171875e-06, + "model_forward_time": 0.02533411979675293, + "step": 2988 + }, + { + "epoch": 4.559326171875e-06, + "step": 2988, + "training_step_time": 0.10821652412414551 + }, + { + "epoch": 4.56085205078125e-06, + "model_forward_time": 0.025208473205566406, + "step": 2989 + }, + { + "epoch": 4.56085205078125e-06, + "step": 2989, + "training_step_time": 0.10895276069641113 + }, + { + "epoch": 4.5623779296875e-06, + "grad_norm": 0.9388747215270996, + "learning_rate": 9.932710694482191e-05, + "loss": 0.1511, + "step": 2990 + }, + { + "epoch": 4.5623779296875e-06, + "model_forward_time": 0.025083303451538086, + "step": 2990 + }, + { + "epoch": 4.5623779296875e-06, + "step": 2990, + "training_step_time": 0.1072242259979248 + }, + { + "epoch": 4.56390380859375e-06, + "model_forward_time": 0.02484583854675293, + "step": 2991 + }, + { + "epoch": 4.56390380859375e-06, + "step": 2991, + "training_step_time": 0.10586428642272949 + }, + { + "epoch": 4.5654296875e-06, + "model_forward_time": 0.025149822235107422, + "step": 2992 + }, + { + "epoch": 4.5654296875e-06, + "step": 2992, + "training_step_time": 0.113616943359375 + }, + { + "epoch": 4.56695556640625e-06, + "model_forward_time": 0.02527928352355957, + "step": 2993 + }, + { + "epoch": 4.56695556640625e-06, + "step": 2993, + "training_step_time": 0.16721057891845703 + }, + { + "epoch": 4.5684814453125e-06, + "model_forward_time": 0.02418208122253418, + "step": 2994 + }, + { + "epoch": 4.5684814453125e-06, + "step": 2994, + "training_step_time": 0.141829252243042 + }, + { + "epoch": 4.57000732421875e-06, + "model_forward_time": 0.024295330047607422, + "step": 2995 + }, + { + "epoch": 4.57000732421875e-06, + "step": 2995, + "training_step_time": 0.21079754829406738 + }, + { + "epoch": 4.571533203125e-06, + "model_forward_time": 0.02372145652770996, + "step": 2996 + }, + { + "epoch": 4.571533203125e-06, + "step": 2996, + "training_step_time": 0.15844202041625977 + }, + { + "epoch": 4.57305908203125e-06, + "model_forward_time": 0.02421116828918457, + "step": 2997 + }, + { + "epoch": 4.57305908203125e-06, + "step": 2997, + "training_step_time": 0.18604803085327148 + }, + { + "epoch": 4.5745849609375e-06, + "model_forward_time": 0.02385234832763672, + "step": 2998 + }, + { + "epoch": 4.5745849609375e-06, + "step": 2998, + "training_step_time": 0.12929105758666992 + }, + { + "epoch": 4.57611083984375e-06, + "model_forward_time": 0.02402353286743164, + "step": 2999 + }, + { + "epoch": 4.57611083984375e-06, + "step": 2999, + "training_step_time": 0.11617779731750488 + }, + { + "epoch": 4.57763671875e-06, + "grad_norm": 0.7032703757286072, + "learning_rate": 9.931806517013612e-05, + "loss": 0.1262, + "step": 3000 + }, + { + "epoch": 4.57763671875e-06, + "model_forward_time": 0.025274276733398438, + "step": 3000 + }, + { + "epoch": 4.57763671875e-06, + "step": 3000, + "training_step_time": 0.10644841194152832 + }, + { + "epoch": 4.57916259765625e-06, + "model_forward_time": 0.023895740509033203, + "step": 3001 + }, + { + "epoch": 4.57916259765625e-06, + "step": 3001, + "training_step_time": 0.10050320625305176 + }, + { + "epoch": 4.5806884765625e-06, + "model_forward_time": 0.024775981903076172, + "step": 3002 + }, + { + "epoch": 4.5806884765625e-06, + "step": 3002, + "training_step_time": 0.16507720947265625 + }, + { + "epoch": 4.58221435546875e-06, + "model_forward_time": 0.02513432502746582, + "step": 3003 + }, + { + "epoch": 4.58221435546875e-06, + "step": 3003, + "training_step_time": 0.13882660865783691 + }, + { + "epoch": 4.583740234375e-06, + "model_forward_time": 0.027904987335205078, + "step": 3004 + }, + { + "epoch": 4.583740234375e-06, + "step": 3004, + "training_step_time": 0.21424102783203125 + }, + { + "epoch": 4.58526611328125e-06, + "model_forward_time": 0.024539470672607422, + "step": 3005 + }, + { + "epoch": 4.58526611328125e-06, + "step": 3005, + "training_step_time": 0.13618683815002441 + }, + { + "epoch": 4.5867919921875e-06, + "model_forward_time": 0.02490067481994629, + "step": 3006 + }, + { + "epoch": 4.5867919921875e-06, + "step": 3006, + "training_step_time": 0.15112853050231934 + }, + { + "epoch": 4.58831787109375e-06, + "model_forward_time": 0.02464580535888672, + "step": 3007 + }, + { + "epoch": 4.58831787109375e-06, + "step": 3007, + "training_step_time": 0.1699233055114746 + }, + { + "epoch": 4.58984375e-06, + "model_forward_time": 0.025163888931274414, + "step": 3008 + }, + { + "epoch": 4.58984375e-06, + "step": 3008, + "training_step_time": 0.11030864715576172 + }, + { + "epoch": 4.59136962890625e-06, + "model_forward_time": 0.02524423599243164, + "step": 3009 + }, + { + "epoch": 4.59136962890625e-06, + "step": 3009, + "training_step_time": 0.16350126266479492 + }, + { + "epoch": 4.5928955078125e-06, + "grad_norm": 0.7527583837509155, + "learning_rate": 9.930896346935077e-05, + "loss": 0.1128, + "step": 3010 + }, + { + "epoch": 4.5928955078125e-06, + "model_forward_time": 0.027840137481689453, + "step": 3010 + }, + { + "epoch": 4.5928955078125e-06, + "step": 3010, + "training_step_time": 0.1271955966949463 + }, + { + "epoch": 4.59442138671875e-06, + "model_forward_time": 0.024216175079345703, + "step": 3011 + }, + { + "epoch": 4.59442138671875e-06, + "step": 3011, + "training_step_time": 0.11459136009216309 + }, + { + "epoch": 4.595947265625e-06, + "model_forward_time": 0.025548458099365234, + "step": 3012 + }, + { + "epoch": 4.595947265625e-06, + "step": 3012, + "training_step_time": 0.11770272254943848 + }, + { + "epoch": 4.59747314453125e-06, + "model_forward_time": 0.025242090225219727, + "step": 3013 + }, + { + "epoch": 4.59747314453125e-06, + "step": 3013, + "training_step_time": 0.11095738410949707 + }, + { + "epoch": 4.5989990234375e-06, + "model_forward_time": 0.02518630027770996, + "step": 3014 + }, + { + "epoch": 4.5989990234375e-06, + "step": 3014, + "training_step_time": 0.10807490348815918 + }, + { + "epoch": 4.60052490234375e-06, + "model_forward_time": 0.025982379913330078, + "step": 3015 + }, + { + "epoch": 4.60052490234375e-06, + "step": 3015, + "training_step_time": 0.20018553733825684 + }, + { + "epoch": 4.60205078125e-06, + "model_forward_time": 0.024311065673828125, + "step": 3016 + }, + { + "epoch": 4.60205078125e-06, + "step": 3016, + "training_step_time": 0.10424637794494629 + }, + { + "epoch": 4.60357666015625e-06, + "model_forward_time": 0.024016618728637695, + "step": 3017 + }, + { + "epoch": 4.60357666015625e-06, + "step": 3017, + "training_step_time": 0.10770297050476074 + }, + { + "epoch": 4.6051025390625e-06, + "model_forward_time": 0.025191783905029297, + "step": 3018 + }, + { + "epoch": 4.6051025390625e-06, + "step": 3018, + "training_step_time": 0.11938261985778809 + }, + { + "epoch": 4.60662841796875e-06, + "model_forward_time": 0.0260465145111084, + "step": 3019 + }, + { + "epoch": 4.60662841796875e-06, + "step": 3019, + "training_step_time": 0.11019563674926758 + }, + { + "epoch": 4.608154296875e-06, + "grad_norm": 0.4577745497226715, + "learning_rate": 9.929980185352526e-05, + "loss": 0.1118, + "step": 3020 + }, + { + "epoch": 4.608154296875e-06, + "model_forward_time": 0.02611684799194336, + "step": 3020 + }, + { + "epoch": 4.608154296875e-06, + "step": 3020, + "training_step_time": 0.16607999801635742 + }, + { + "epoch": 4.60968017578125e-06, + "model_forward_time": 0.024385929107666016, + "step": 3021 + }, + { + "epoch": 4.60968017578125e-06, + "step": 3021, + "training_step_time": 0.10687613487243652 + }, + { + "epoch": 4.6112060546875e-06, + "model_forward_time": 0.024969816207885742, + "step": 3022 + }, + { + "epoch": 4.6112060546875e-06, + "step": 3022, + "training_step_time": 0.10885810852050781 + }, + { + "epoch": 4.61273193359375e-06, + "model_forward_time": 0.0255582332611084, + "step": 3023 + }, + { + "epoch": 4.61273193359375e-06, + "step": 3023, + "training_step_time": 0.1081390380859375 + }, + { + "epoch": 4.6142578125e-06, + "model_forward_time": 0.02564835548400879, + "step": 3024 + }, + { + "epoch": 4.6142578125e-06, + "step": 3024, + "training_step_time": 0.11097168922424316 + }, + { + "epoch": 4.61578369140625e-06, + "model_forward_time": 0.025356769561767578, + "step": 3025 + }, + { + "epoch": 4.61578369140625e-06, + "step": 3025, + "training_step_time": 0.10971260070800781 + }, + { + "epoch": 4.6173095703125e-06, + "model_forward_time": 0.02590775489807129, + "step": 3026 + }, + { + "epoch": 4.6173095703125e-06, + "step": 3026, + "training_step_time": 0.10683202743530273 + }, + { + "epoch": 4.61883544921875e-06, + "model_forward_time": 0.025111913681030273, + "step": 3027 + }, + { + "epoch": 4.61883544921875e-06, + "step": 3027, + "training_step_time": 0.11165094375610352 + }, + { + "epoch": 4.620361328125e-06, + "model_forward_time": 0.0275270938873291, + "step": 3028 + }, + { + "epoch": 4.620361328125e-06, + "step": 3028, + "training_step_time": 0.11361312866210938 + }, + { + "epoch": 4.62188720703125e-06, + "model_forward_time": 0.02532029151916504, + "step": 3029 + }, + { + "epoch": 4.62188720703125e-06, + "step": 3029, + "training_step_time": 0.10674166679382324 + }, + { + "epoch": 4.6234130859375e-06, + "grad_norm": 0.5803720951080322, + "learning_rate": 9.929058033379181e-05, + "loss": 0.1292, + "step": 3030 + }, + { + "epoch": 4.6234130859375e-06, + "model_forward_time": 0.02605438232421875, + "step": 3030 + }, + { + "epoch": 4.6234130859375e-06, + "step": 3030, + "training_step_time": 0.10945558547973633 + }, + { + "epoch": 4.62493896484375e-06, + "model_forward_time": 0.025441408157348633, + "step": 3031 + }, + { + "epoch": 4.62493896484375e-06, + "step": 3031, + "training_step_time": 0.11014413833618164 + }, + { + "epoch": 4.62646484375e-06, + "model_forward_time": 0.02509140968322754, + "step": 3032 + }, + { + "epoch": 4.62646484375e-06, + "step": 3032, + "training_step_time": 0.11038565635681152 + }, + { + "epoch": 4.62799072265625e-06, + "model_forward_time": 0.02730250358581543, + "step": 3033 + }, + { + "epoch": 4.62799072265625e-06, + "step": 3033, + "training_step_time": 0.11754989624023438 + }, + { + "epoch": 4.6295166015625e-06, + "model_forward_time": 0.025072574615478516, + "step": 3034 + }, + { + "epoch": 4.6295166015625e-06, + "step": 3034, + "training_step_time": 0.10888481140136719 + }, + { + "epoch": 4.63104248046875e-06, + "model_forward_time": 0.025486230850219727, + "step": 3035 + }, + { + "epoch": 4.63104248046875e-06, + "step": 3035, + "training_step_time": 0.10706686973571777 + }, + { + "epoch": 4.632568359375e-06, + "model_forward_time": 0.026131391525268555, + "step": 3036 + }, + { + "epoch": 4.632568359375e-06, + "step": 3036, + "training_step_time": 0.1116795539855957 + }, + { + "epoch": 4.63409423828125e-06, + "model_forward_time": 0.025681734085083008, + "step": 3037 + }, + { + "epoch": 4.63409423828125e-06, + "step": 3037, + "training_step_time": 0.11033320426940918 + }, + { + "epoch": 4.6356201171875e-06, + "model_forward_time": 0.025547504425048828, + "step": 3038 + }, + { + "epoch": 4.6356201171875e-06, + "step": 3038, + "training_step_time": 0.1141977310180664 + }, + { + "epoch": 4.63714599609375e-06, + "model_forward_time": 0.025180578231811523, + "step": 3039 + }, + { + "epoch": 4.63714599609375e-06, + "step": 3039, + "training_step_time": 0.10655546188354492 + }, + { + "epoch": 4.638671875e-06, + "grad_norm": 0.4894583225250244, + "learning_rate": 9.92812989213555e-05, + "loss": 0.1352, + "step": 3040 + }, + { + "epoch": 4.638671875e-06, + "model_forward_time": 0.02577948570251465, + "step": 3040 + }, + { + "epoch": 4.638671875e-06, + "step": 3040, + "training_step_time": 0.10836982727050781 + }, + { + "epoch": 4.64019775390625e-06, + "model_forward_time": 0.02597522735595703, + "step": 3041 + }, + { + "epoch": 4.64019775390625e-06, + "step": 3041, + "training_step_time": 0.11708593368530273 + }, + { + "epoch": 4.6417236328125e-06, + "model_forward_time": 0.025516986846923828, + "step": 3042 + }, + { + "epoch": 4.6417236328125e-06, + "step": 3042, + "training_step_time": 0.10789799690246582 + }, + { + "epoch": 4.64324951171875e-06, + "model_forward_time": 0.025321006774902344, + "step": 3043 + }, + { + "epoch": 4.64324951171875e-06, + "step": 3043, + "training_step_time": 0.10865330696105957 + }, + { + "epoch": 4.644775390625e-06, + "model_forward_time": 0.02486586570739746, + "step": 3044 + }, + { + "epoch": 4.644775390625e-06, + "step": 3044, + "training_step_time": 0.10664510726928711 + }, + { + "epoch": 4.64630126953125e-06, + "model_forward_time": 0.02589869499206543, + "step": 3045 + }, + { + "epoch": 4.64630126953125e-06, + "step": 3045, + "training_step_time": 0.10899591445922852 + }, + { + "epoch": 4.6478271484375e-06, + "model_forward_time": 0.029685020446777344, + "step": 3046 + }, + { + "epoch": 4.6478271484375e-06, + "step": 3046, + "training_step_time": 0.1338672637939453 + }, + { + "epoch": 4.64935302734375e-06, + "model_forward_time": 0.025487422943115234, + "step": 3047 + }, + { + "epoch": 4.64935302734375e-06, + "step": 3047, + "training_step_time": 0.18281102180480957 + }, + { + "epoch": 4.65087890625e-06, + "model_forward_time": 0.024692773818969727, + "step": 3048 + }, + { + "epoch": 4.65087890625e-06, + "step": 3048, + "training_step_time": 0.21215152740478516 + }, + { + "epoch": 4.65240478515625e-06, + "model_forward_time": 0.025002002716064453, + "step": 3049 + }, + { + "epoch": 4.65240478515625e-06, + "step": 3049, + "training_step_time": 0.16957807540893555 + }, + { + "epoch": 4.6539306640625e-06, + "grad_norm": 0.9082778096199036, + "learning_rate": 9.927195762749405e-05, + "loss": 0.1249, + "step": 3050 + }, + { + "epoch": 4.6539306640625e-06, + "model_forward_time": 0.02416205406188965, + "step": 3050 + }, + { + "epoch": 4.6539306640625e-06, + "step": 3050, + "training_step_time": 0.17082524299621582 + }, + { + "epoch": 4.65545654296875e-06, + "model_forward_time": 0.024770259857177734, + "step": 3051 + }, + { + "epoch": 4.65545654296875e-06, + "step": 3051, + "training_step_time": 0.18565702438354492 + }, + { + "epoch": 4.656982421875e-06, + "model_forward_time": 0.02504587173461914, + "step": 3052 + }, + { + "epoch": 4.656982421875e-06, + "step": 3052, + "training_step_time": 0.12810707092285156 + }, + { + "epoch": 4.65850830078125e-06, + "model_forward_time": 0.025627851486206055, + "step": 3053 + }, + { + "epoch": 4.65850830078125e-06, + "step": 3053, + "training_step_time": 0.1111907958984375 + }, + { + "epoch": 4.6600341796875e-06, + "model_forward_time": 0.02577805519104004, + "step": 3054 + }, + { + "epoch": 4.6600341796875e-06, + "step": 3054, + "training_step_time": 0.17374491691589355 + }, + { + "epoch": 4.66156005859375e-06, + "model_forward_time": 0.025226831436157227, + "step": 3055 + }, + { + "epoch": 4.66156005859375e-06, + "step": 3055, + "training_step_time": 0.1413719654083252 + }, + { + "epoch": 4.6630859375e-06, + "model_forward_time": 0.024529457092285156, + "step": 3056 + }, + { + "epoch": 4.6630859375e-06, + "step": 3056, + "training_step_time": 0.10995221138000488 + }, + { + "epoch": 4.66461181640625e-06, + "model_forward_time": 0.025147438049316406, + "step": 3057 + }, + { + "epoch": 4.66461181640625e-06, + "step": 3057, + "training_step_time": 0.11493563652038574 + }, + { + "epoch": 4.6661376953125e-06, + "model_forward_time": 0.025557994842529297, + "step": 3058 + }, + { + "epoch": 4.6661376953125e-06, + "step": 3058, + "training_step_time": 0.11695313453674316 + }, + { + "epoch": 4.66766357421875e-06, + "model_forward_time": 0.02530503273010254, + "step": 3059 + }, + { + "epoch": 4.66766357421875e-06, + "step": 3059, + "training_step_time": 0.1722719669342041 + }, + { + "epoch": 4.669189453125e-06, + "grad_norm": 0.796428382396698, + "learning_rate": 9.926255646355804e-05, + "loss": 0.1306, + "step": 3060 + }, + { + "epoch": 4.669189453125e-06, + "model_forward_time": 0.024994611740112305, + "step": 3060 + }, + { + "epoch": 4.669189453125e-06, + "step": 3060, + "training_step_time": 0.19031000137329102 + }, + { + "epoch": 4.67071533203125e-06, + "model_forward_time": 0.026267290115356445, + "step": 3061 + }, + { + "epoch": 4.67071533203125e-06, + "step": 3061, + "training_step_time": 0.18209218978881836 + }, + { + "epoch": 4.6722412109375e-06, + "model_forward_time": 0.02486419677734375, + "step": 3062 + }, + { + "epoch": 4.6722412109375e-06, + "step": 3062, + "training_step_time": 0.17567920684814453 + }, + { + "epoch": 4.67376708984375e-06, + "model_forward_time": 0.025582313537597656, + "step": 3063 + }, + { + "epoch": 4.67376708984375e-06, + "step": 3063, + "training_step_time": 0.13541650772094727 + }, + { + "epoch": 4.67529296875e-06, + "model_forward_time": 0.025574684143066406, + "step": 3064 + }, + { + "epoch": 4.67529296875e-06, + "step": 3064, + "training_step_time": 0.10504913330078125 + }, + { + "epoch": 4.67681884765625e-06, + "model_forward_time": 0.025575637817382812, + "step": 3065 + }, + { + "epoch": 4.67681884765625e-06, + "step": 3065, + "training_step_time": 0.11639595031738281 + }, + { + "epoch": 4.6783447265625e-06, + "model_forward_time": 0.02613687515258789, + "step": 3066 + }, + { + "epoch": 4.6783447265625e-06, + "step": 3066, + "training_step_time": 0.11397767066955566 + }, + { + "epoch": 4.67987060546875e-06, + "model_forward_time": 0.025957584381103516, + "step": 3067 + }, + { + "epoch": 4.67987060546875e-06, + "step": 3067, + "training_step_time": 0.1090846061706543 + }, + { + "epoch": 4.681396484375e-06, + "model_forward_time": 0.0264284610748291, + "step": 3068 + }, + { + "epoch": 4.681396484375e-06, + "step": 3068, + "training_step_time": 0.11313819885253906 + }, + { + "epoch": 4.68292236328125e-06, + "model_forward_time": 0.026006698608398438, + "step": 3069 + }, + { + "epoch": 4.68292236328125e-06, + "step": 3069, + "training_step_time": 0.10953092575073242 + }, + { + "epoch": 4.6844482421875e-06, + "grad_norm": 0.6938896179199219, + "learning_rate": 9.925309544097078e-05, + "loss": 0.1133, + "step": 3070 + }, + { + "epoch": 4.6844482421875e-06, + "model_forward_time": 0.025972366333007812, + "step": 3070 + }, + { + "epoch": 4.6844482421875e-06, + "step": 3070, + "training_step_time": 0.10843706130981445 + }, + { + "epoch": 4.68597412109375e-06, + "model_forward_time": 0.025505542755126953, + "step": 3071 + }, + { + "epoch": 4.68597412109375e-06, + "step": 3071, + "training_step_time": 0.11654138565063477 + }, + { + "epoch": 4.6875e-06, + "model_forward_time": 0.025406599044799805, + "step": 3072 + }, + { + "epoch": 4.6875e-06, + "step": 3072, + "training_step_time": 0.10954761505126953 + }, + { + "epoch": 4.68902587890625e-06, + "model_forward_time": 0.025370121002197266, + "step": 3073 + }, + { + "epoch": 4.68902587890625e-06, + "step": 3073, + "training_step_time": 0.10794854164123535 + }, + { + "epoch": 4.6905517578125e-06, + "model_forward_time": 0.025587081909179688, + "step": 3074 + }, + { + "epoch": 4.6905517578125e-06, + "step": 3074, + "training_step_time": 0.10911726951599121 + }, + { + "epoch": 4.69207763671875e-06, + "model_forward_time": 0.02571702003479004, + "step": 3075 + }, + { + "epoch": 4.69207763671875e-06, + "step": 3075, + "training_step_time": 0.12769460678100586 + }, + { + "epoch": 4.693603515625e-06, + "model_forward_time": 0.028377532958984375, + "step": 3076 + }, + { + "epoch": 4.693603515625e-06, + "step": 3076, + "training_step_time": 0.1168069839477539 + }, + { + "epoch": 4.69512939453125e-06, + "model_forward_time": 0.025876522064208984, + "step": 3077 + }, + { + "epoch": 4.69512939453125e-06, + "step": 3077, + "training_step_time": 0.10981917381286621 + }, + { + "epoch": 4.6966552734375e-06, + "model_forward_time": 0.02604365348815918, + "step": 3078 + }, + { + "epoch": 4.6966552734375e-06, + "step": 3078, + "training_step_time": 0.10936856269836426 + }, + { + "epoch": 4.69818115234375e-06, + "model_forward_time": 0.02559041976928711, + "step": 3079 + }, + { + "epoch": 4.69818115234375e-06, + "step": 3079, + "training_step_time": 0.11368846893310547 + }, + { + "epoch": 4.69970703125e-06, + "grad_norm": 0.7649744153022766, + "learning_rate": 9.924357457122828e-05, + "loss": 0.1368, + "step": 3080 + }, + { + "epoch": 4.69970703125e-06, + "model_forward_time": 0.0251619815826416, + "step": 3080 + }, + { + "epoch": 4.69970703125e-06, + "step": 3080, + "training_step_time": 0.10979294776916504 + }, + { + "epoch": 4.70123291015625e-06, + "model_forward_time": 0.025305747985839844, + "step": 3081 + }, + { + "epoch": 4.70123291015625e-06, + "step": 3081, + "training_step_time": 0.10976910591125488 + }, + { + "epoch": 4.7027587890625e-06, + "model_forward_time": 0.02550363540649414, + "step": 3082 + }, + { + "epoch": 4.7027587890625e-06, + "step": 3082, + "training_step_time": 0.10929536819458008 + }, + { + "epoch": 4.70428466796875e-06, + "model_forward_time": 0.025512218475341797, + "step": 3083 + }, + { + "epoch": 4.70428466796875e-06, + "step": 3083, + "training_step_time": 0.11077189445495605 + }, + { + "epoch": 4.705810546875e-06, + "model_forward_time": 0.02516913414001465, + "step": 3084 + }, + { + "epoch": 4.705810546875e-06, + "step": 3084, + "training_step_time": 0.11489057540893555 + }, + { + "epoch": 4.70733642578125e-06, + "model_forward_time": 0.025218486785888672, + "step": 3085 + }, + { + "epoch": 4.70733642578125e-06, + "step": 3085, + "training_step_time": 0.10700464248657227 + }, + { + "epoch": 4.7088623046875e-06, + "model_forward_time": 0.025419950485229492, + "step": 3086 + }, + { + "epoch": 4.7088623046875e-06, + "step": 3086, + "training_step_time": 0.10664486885070801 + }, + { + "epoch": 4.71038818359375e-06, + "model_forward_time": 0.02564096450805664, + "step": 3087 + }, + { + "epoch": 4.71038818359375e-06, + "step": 3087, + "training_step_time": 0.10914230346679688 + }, + { + "epoch": 4.7119140625e-06, + "model_forward_time": 0.02474236488342285, + "step": 3088 + }, + { + "epoch": 4.7119140625e-06, + "step": 3088, + "training_step_time": 0.10856270790100098 + }, + { + "epoch": 4.71343994140625e-06, + "model_forward_time": 0.024720430374145508, + "step": 3089 + }, + { + "epoch": 4.71343994140625e-06, + "step": 3089, + "training_step_time": 0.10740780830383301 + }, + { + "epoch": 4.7149658203125e-06, + "grad_norm": 0.8241348266601562, + "learning_rate": 9.923399386589933e-05, + "loss": 0.1421, + "step": 3090 + }, + { + "epoch": 4.7149658203125e-06, + "model_forward_time": 0.025288105010986328, + "step": 3090 + }, + { + "epoch": 4.7149658203125e-06, + "step": 3090, + "training_step_time": 0.11110687255859375 + }, + { + "epoch": 4.71649169921875e-06, + "model_forward_time": 0.0258939266204834, + "step": 3091 + }, + { + "epoch": 4.71649169921875e-06, + "step": 3091, + "training_step_time": 0.11382651329040527 + }, + { + "epoch": 4.718017578125e-06, + "model_forward_time": 0.025494098663330078, + "step": 3092 + }, + { + "epoch": 4.718017578125e-06, + "step": 3092, + "training_step_time": 0.1103208065032959 + }, + { + "epoch": 4.71954345703125e-06, + "model_forward_time": 0.025516271591186523, + "step": 3093 + }, + { + "epoch": 4.71954345703125e-06, + "step": 3093, + "training_step_time": 0.20687389373779297 + }, + { + "epoch": 4.7210693359375e-06, + "model_forward_time": 0.025048494338989258, + "step": 3094 + }, + { + "epoch": 4.7210693359375e-06, + "step": 3094, + "training_step_time": 0.15758085250854492 + }, + { + "epoch": 4.72259521484375e-06, + "model_forward_time": 0.024439573287963867, + "step": 3095 + }, + { + "epoch": 4.72259521484375e-06, + "step": 3095, + "training_step_time": 0.1678307056427002 + }, + { + "epoch": 4.72412109375e-06, + "model_forward_time": 0.024590730667114258, + "step": 3096 + }, + { + "epoch": 4.72412109375e-06, + "step": 3096, + "training_step_time": 0.17733073234558105 + }, + { + "epoch": 4.72564697265625e-06, + "model_forward_time": 0.024485111236572266, + "step": 3097 + }, + { + "epoch": 4.72564697265625e-06, + "step": 3097, + "training_step_time": 0.18431401252746582 + }, + { + "epoch": 4.7271728515625e-06, + "model_forward_time": 0.024735450744628906, + "step": 3098 + }, + { + "epoch": 4.7271728515625e-06, + "step": 3098, + "training_step_time": 0.11357283592224121 + }, + { + "epoch": 4.72869873046875e-06, + "model_forward_time": 0.024533748626708984, + "step": 3099 + }, + { + "epoch": 4.72869873046875e-06, + "step": 3099, + "training_step_time": 0.10903763771057129 + }, + { + "epoch": 4.730224609375e-06, + "grad_norm": 0.7410247325897217, + "learning_rate": 9.922435333662536e-05, + "loss": 0.1088, + "step": 3100 + }, + { + "epoch": 4.730224609375e-06, + "model_forward_time": 0.025169849395751953, + "step": 3100 + }, + { + "epoch": 4.730224609375e-06, + "step": 3100, + "training_step_time": 0.11604928970336914 + }, + { + "epoch": 4.73175048828125e-06, + "model_forward_time": 0.025543928146362305, + "step": 3101 + }, + { + "epoch": 4.73175048828125e-06, + "step": 3101, + "training_step_time": 0.11035680770874023 + }, + { + "epoch": 4.7332763671875e-06, + "model_forward_time": 0.0251615047454834, + "step": 3102 + }, + { + "epoch": 4.7332763671875e-06, + "step": 3102, + "training_step_time": 0.17905545234680176 + }, + { + "epoch": 4.73480224609375e-06, + "model_forward_time": 0.023978471755981445, + "step": 3103 + }, + { + "epoch": 4.73480224609375e-06, + "step": 3103, + "training_step_time": 0.1168661117553711 + }, + { + "epoch": 4.736328125e-06, + "model_forward_time": 0.024658679962158203, + "step": 3104 + }, + { + "epoch": 4.736328125e-06, + "step": 3104, + "training_step_time": 0.10789823532104492 + }, + { + "epoch": 4.73785400390625e-06, + "model_forward_time": 0.02521228790283203, + "step": 3105 + }, + { + "epoch": 4.73785400390625e-06, + "step": 3105, + "training_step_time": 0.19896316528320312 + }, + { + "epoch": 4.7393798828125e-06, + "model_forward_time": 0.02454853057861328, + "step": 3106 + }, + { + "epoch": 4.7393798828125e-06, + "step": 3106, + "training_step_time": 0.16744017601013184 + }, + { + "epoch": 4.74090576171875e-06, + "model_forward_time": 0.024479150772094727, + "step": 3107 + }, + { + "epoch": 4.74090576171875e-06, + "step": 3107, + "training_step_time": 0.11704277992248535 + }, + { + "epoch": 4.742431640625e-06, + "model_forward_time": 0.025068044662475586, + "step": 3108 + }, + { + "epoch": 4.742431640625e-06, + "step": 3108, + "training_step_time": 0.11910486221313477 + }, + { + "epoch": 4.74395751953125e-06, + "model_forward_time": 0.025223970413208008, + "step": 3109 + }, + { + "epoch": 4.74395751953125e-06, + "step": 3109, + "training_step_time": 0.11467719078063965 + }, + { + "epoch": 4.7454833984375e-06, + "grad_norm": 0.8477486968040466, + "learning_rate": 9.921465299512054e-05, + "loss": 0.1006, + "step": 3110 + }, + { + "epoch": 4.7454833984375e-06, + "model_forward_time": 0.025619029998779297, + "step": 3110 + }, + { + "epoch": 4.7454833984375e-06, + "step": 3110, + "training_step_time": 0.20810341835021973 + }, + { + "epoch": 4.74700927734375e-06, + "model_forward_time": 0.024345874786376953, + "step": 3111 + }, + { + "epoch": 4.74700927734375e-06, + "step": 3111, + "training_step_time": 0.11537885665893555 + }, + { + "epoch": 4.74853515625e-06, + "model_forward_time": 0.026366710662841797, + "step": 3112 + }, + { + "epoch": 4.74853515625e-06, + "step": 3112, + "training_step_time": 0.11254429817199707 + }, + { + "epoch": 4.75006103515625e-06, + "model_forward_time": 0.025374889373779297, + "step": 3113 + }, + { + "epoch": 4.75006103515625e-06, + "step": 3113, + "training_step_time": 0.11008405685424805 + }, + { + "epoch": 4.7515869140625e-06, + "model_forward_time": 0.025318622589111328, + "step": 3114 + }, + { + "epoch": 4.7515869140625e-06, + "step": 3114, + "training_step_time": 0.11137890815734863 + }, + { + "epoch": 4.75311279296875e-06, + "model_forward_time": 0.024925708770751953, + "step": 3115 + }, + { + "epoch": 4.75311279296875e-06, + "step": 3115, + "training_step_time": 0.10834264755249023 + }, + { + "epoch": 4.754638671875e-06, + "model_forward_time": 0.025073766708374023, + "step": 3116 + }, + { + "epoch": 4.754638671875e-06, + "step": 3116, + "training_step_time": 0.10764598846435547 + }, + { + "epoch": 4.75616455078125e-06, + "model_forward_time": 0.025084257125854492, + "step": 3117 + }, + { + "epoch": 4.75616455078125e-06, + "step": 3117, + "training_step_time": 0.10880208015441895 + }, + { + "epoch": 4.7576904296875e-06, + "model_forward_time": 0.024461984634399414, + "step": 3118 + }, + { + "epoch": 4.7576904296875e-06, + "step": 3118, + "training_step_time": 0.11026763916015625 + }, + { + "epoch": 4.75921630859375e-06, + "model_forward_time": 0.024316072463989258, + "step": 3119 + }, + { + "epoch": 4.75921630859375e-06, + "step": 3119, + "training_step_time": 0.10907745361328125 + }, + { + "epoch": 4.7607421875e-06, + "grad_norm": 0.5950537323951721, + "learning_rate": 9.92048928531717e-05, + "loss": 0.1217, + "step": 3120 + }, + { + "epoch": 4.7607421875e-06, + "model_forward_time": 0.024172544479370117, + "step": 3120 + }, + { + "epoch": 4.7607421875e-06, + "step": 3120, + "training_step_time": 0.112945556640625 + }, + { + "epoch": 4.76226806640625e-06, + "model_forward_time": 0.02507925033569336, + "step": 3121 + }, + { + "epoch": 4.76226806640625e-06, + "step": 3121, + "training_step_time": 0.10976481437683105 + }, + { + "epoch": 4.7637939453125e-06, + "model_forward_time": 0.02536463737487793, + "step": 3122 + }, + { + "epoch": 4.7637939453125e-06, + "step": 3122, + "training_step_time": 0.10746359825134277 + }, + { + "epoch": 4.76531982421875e-06, + "model_forward_time": 0.025304079055786133, + "step": 3123 + }, + { + "epoch": 4.76531982421875e-06, + "step": 3123, + "training_step_time": 0.10760259628295898 + }, + { + "epoch": 4.766845703125e-06, + "model_forward_time": 0.02433013916015625, + "step": 3124 + }, + { + "epoch": 4.766845703125e-06, + "step": 3124, + "training_step_time": 0.10927248001098633 + }, + { + "epoch": 4.76837158203125e-06, + "model_forward_time": 0.02547144889831543, + "step": 3125 + }, + { + "epoch": 4.76837158203125e-06, + "step": 3125, + "training_step_time": 0.10896539688110352 + }, + { + "epoch": 4.7698974609375e-06, + "model_forward_time": 0.02511119842529297, + "step": 3126 + }, + { + "epoch": 4.7698974609375e-06, + "step": 3126, + "training_step_time": 0.10831022262573242 + }, + { + "epoch": 4.77142333984375e-06, + "model_forward_time": 0.025262117385864258, + "step": 3127 + }, + { + "epoch": 4.77142333984375e-06, + "step": 3127, + "training_step_time": 0.10940718650817871 + }, + { + "epoch": 4.77294921875e-06, + "model_forward_time": 0.02514195442199707, + "step": 3128 + }, + { + "epoch": 4.77294921875e-06, + "step": 3128, + "training_step_time": 0.10848808288574219 + }, + { + "epoch": 4.77447509765625e-06, + "model_forward_time": 0.025318384170532227, + "step": 3129 + }, + { + "epoch": 4.77447509765625e-06, + "step": 3129, + "training_step_time": 0.1079854965209961 + }, + { + "epoch": 4.7760009765625e-06, + "grad_norm": 0.4268414378166199, + "learning_rate": 9.91950729226383e-05, + "loss": 0.1098, + "step": 3130 + }, + { + "epoch": 4.7760009765625e-06, + "model_forward_time": 0.025234699249267578, + "step": 3130 + }, + { + "epoch": 4.7760009765625e-06, + "step": 3130, + "training_step_time": 0.10901474952697754 + }, + { + "epoch": 4.77752685546875e-06, + "model_forward_time": 0.02529597282409668, + "step": 3131 + }, + { + "epoch": 4.77752685546875e-06, + "step": 3131, + "training_step_time": 0.11394929885864258 + }, + { + "epoch": 4.779052734375e-06, + "model_forward_time": 0.02624988555908203, + "step": 3132 + }, + { + "epoch": 4.779052734375e-06, + "step": 3132, + "training_step_time": 0.10981297492980957 + }, + { + "epoch": 4.78057861328125e-06, + "model_forward_time": 0.025560379028320312, + "step": 3133 + }, + { + "epoch": 4.78057861328125e-06, + "step": 3133, + "training_step_time": 0.10924196243286133 + }, + { + "epoch": 4.7821044921875e-06, + "model_forward_time": 0.025394439697265625, + "step": 3134 + }, + { + "epoch": 4.7821044921875e-06, + "step": 3134, + "training_step_time": 0.10924434661865234 + }, + { + "epoch": 4.78363037109375e-06, + "model_forward_time": 0.0254213809967041, + "step": 3135 + }, + { + "epoch": 4.78363037109375e-06, + "step": 3135, + "training_step_time": 0.11183977127075195 + }, + { + "epoch": 4.78515625e-06, + "model_forward_time": 0.025263071060180664, + "step": 3136 + }, + { + "epoch": 4.78515625e-06, + "step": 3136, + "training_step_time": 0.1081991195678711 + }, + { + "epoch": 4.78668212890625e-06, + "model_forward_time": 0.025624513626098633, + "step": 3137 + }, + { + "epoch": 4.78668212890625e-06, + "step": 3137, + "training_step_time": 0.11359095573425293 + }, + { + "epoch": 4.7882080078125e-06, + "model_forward_time": 0.025541305541992188, + "step": 3138 + }, + { + "epoch": 4.7882080078125e-06, + "step": 3138, + "training_step_time": 0.10698437690734863 + }, + { + "epoch": 4.78973388671875e-06, + "model_forward_time": 0.02619338035583496, + "step": 3139 + }, + { + "epoch": 4.78973388671875e-06, + "step": 3139, + "training_step_time": 0.21125388145446777 + }, + { + "epoch": 4.791259765625e-06, + "grad_norm": 0.5600676536560059, + "learning_rate": 9.918519321545251e-05, + "loss": 0.1177, + "step": 3140 + }, + { + "epoch": 4.791259765625e-06, + "model_forward_time": 0.024468421936035156, + "step": 3140 + }, + { + "epoch": 4.791259765625e-06, + "step": 3140, + "training_step_time": 0.1807880401611328 + }, + { + "epoch": 4.79278564453125e-06, + "model_forward_time": 0.024535655975341797, + "step": 3141 + }, + { + "epoch": 4.79278564453125e-06, + "step": 3141, + "training_step_time": 0.1674511432647705 + }, + { + "epoch": 4.7943115234375e-06, + "model_forward_time": 0.02409529685974121, + "step": 3142 + }, + { + "epoch": 4.7943115234375e-06, + "step": 3142, + "training_step_time": 0.21997880935668945 + }, + { + "epoch": 4.79583740234375e-06, + "model_forward_time": 0.024723052978515625, + "step": 3143 + }, + { + "epoch": 4.79583740234375e-06, + "step": 3143, + "training_step_time": 0.1482689380645752 + }, + { + "epoch": 4.79736328125e-06, + "model_forward_time": 0.025604248046875, + "step": 3144 + }, + { + "epoch": 4.79736328125e-06, + "step": 3144, + "training_step_time": 0.17982006072998047 + }, + { + "epoch": 4.79888916015625e-06, + "model_forward_time": 0.024657249450683594, + "step": 3145 + }, + { + "epoch": 4.79888916015625e-06, + "step": 3145, + "training_step_time": 0.13733506202697754 + }, + { + "epoch": 4.8004150390625e-06, + "model_forward_time": 0.02406144142150879, + "step": 3146 + }, + { + "epoch": 4.8004150390625e-06, + "step": 3146, + "training_step_time": 0.11666083335876465 + }, + { + "epoch": 4.80194091796875e-06, + "model_forward_time": 0.025442123413085938, + "step": 3147 + }, + { + "epoch": 4.80194091796875e-06, + "step": 3147, + "training_step_time": 0.10765981674194336 + }, + { + "epoch": 4.803466796875e-06, + "model_forward_time": 0.0256044864654541, + "step": 3148 + }, + { + "epoch": 4.803466796875e-06, + "step": 3148, + "training_step_time": 0.1148371696472168 + }, + { + "epoch": 4.80499267578125e-06, + "model_forward_time": 0.02514481544494629, + "step": 3149 + }, + { + "epoch": 4.80499267578125e-06, + "step": 3149, + "training_step_time": 0.10923099517822266 + }, + { + "epoch": 4.8065185546875e-06, + "grad_norm": 0.7370355129241943, + "learning_rate": 9.917525374361912e-05, + "loss": 0.1256, + "step": 3150 + }, + { + "epoch": 4.8065185546875e-06, + "model_forward_time": 0.025751829147338867, + "step": 3150 + }, + { + "epoch": 4.8065185546875e-06, + "step": 3150, + "training_step_time": 0.19551610946655273 + }, + { + "epoch": 4.80804443359375e-06, + "model_forward_time": 0.02410435676574707, + "step": 3151 + }, + { + "epoch": 4.80804443359375e-06, + "step": 3151, + "training_step_time": 0.18570613861083984 + }, + { + "epoch": 4.8095703125e-06, + "model_forward_time": 0.024379730224609375, + "step": 3152 + }, + { + "epoch": 4.8095703125e-06, + "step": 3152, + "training_step_time": 0.12948274612426758 + }, + { + "epoch": 4.81109619140625e-06, + "model_forward_time": 0.02476191520690918, + "step": 3153 + }, + { + "epoch": 4.81109619140625e-06, + "step": 3153, + "training_step_time": 0.11074709892272949 + }, + { + "epoch": 4.8126220703125e-06, + "model_forward_time": 0.026234149932861328, + "step": 3154 + }, + { + "epoch": 4.8126220703125e-06, + "step": 3154, + "training_step_time": 0.11652326583862305 + }, + { + "epoch": 4.81414794921875e-06, + "model_forward_time": 0.025310516357421875, + "step": 3155 + }, + { + "epoch": 4.81414794921875e-06, + "step": 3155, + "training_step_time": 0.2198777198791504 + }, + { + "epoch": 4.815673828125e-06, + "model_forward_time": 0.024760007858276367, + "step": 3156 + }, + { + "epoch": 4.815673828125e-06, + "step": 3156, + "training_step_time": 0.10975384712219238 + }, + { + "epoch": 4.81719970703125e-06, + "model_forward_time": 0.025514841079711914, + "step": 3157 + }, + { + "epoch": 4.81719970703125e-06, + "step": 3157, + "training_step_time": 0.10725116729736328 + }, + { + "epoch": 4.8187255859375e-06, + "model_forward_time": 0.02529287338256836, + "step": 3158 + }, + { + "epoch": 4.8187255859375e-06, + "step": 3158, + "training_step_time": 0.11344408988952637 + }, + { + "epoch": 4.82025146484375e-06, + "model_forward_time": 0.025048494338989258, + "step": 3159 + }, + { + "epoch": 4.82025146484375e-06, + "step": 3159, + "training_step_time": 0.10832333564758301 + }, + { + "epoch": 4.82177734375e-06, + "grad_norm": 0.8812365531921387, + "learning_rate": 9.91652545192155e-05, + "loss": 0.14, + "step": 3160 + }, + { + "epoch": 4.82177734375e-06, + "model_forward_time": 0.02489018440246582, + "step": 3160 + }, + { + "epoch": 4.82177734375e-06, + "step": 3160, + "training_step_time": 0.11364555358886719 + }, + { + "epoch": 4.82330322265625e-06, + "model_forward_time": 0.02547287940979004, + "step": 3161 + }, + { + "epoch": 4.82330322265625e-06, + "step": 3161, + "training_step_time": 0.1086728572845459 + }, + { + "epoch": 4.8248291015625e-06, + "model_forward_time": 0.02527642250061035, + "step": 3162 + }, + { + "epoch": 4.8248291015625e-06, + "step": 3162, + "training_step_time": 0.10724067687988281 + }, + { + "epoch": 4.82635498046875e-06, + "model_forward_time": 0.025109529495239258, + "step": 3163 + }, + { + "epoch": 4.82635498046875e-06, + "step": 3163, + "training_step_time": 0.10861659049987793 + }, + { + "epoch": 4.827880859375e-06, + "model_forward_time": 0.025444746017456055, + "step": 3164 + }, + { + "epoch": 4.827880859375e-06, + "step": 3164, + "training_step_time": 0.1164860725402832 + }, + { + "epoch": 4.82940673828125e-06, + "model_forward_time": 0.02514934539794922, + "step": 3165 + }, + { + "epoch": 4.82940673828125e-06, + "step": 3165, + "training_step_time": 0.10828614234924316 + }, + { + "epoch": 4.8309326171875e-06, + "model_forward_time": 0.025427579879760742, + "step": 3166 + }, + { + "epoch": 4.8309326171875e-06, + "step": 3166, + "training_step_time": 0.11019372940063477 + }, + { + "epoch": 4.83245849609375e-06, + "model_forward_time": 0.025220870971679688, + "step": 3167 + }, + { + "epoch": 4.83245849609375e-06, + "step": 3167, + "training_step_time": 0.10805034637451172 + }, + { + "epoch": 4.833984375e-06, + "model_forward_time": 0.025301218032836914, + "step": 3168 + }, + { + "epoch": 4.833984375e-06, + "step": 3168, + "training_step_time": 0.11912965774536133 + }, + { + "epoch": 4.83551025390625e-06, + "model_forward_time": 0.025051355361938477, + "step": 3169 + }, + { + "epoch": 4.83551025390625e-06, + "step": 3169, + "training_step_time": 0.11191511154174805 + }, + { + "epoch": 4.8370361328125e-06, + "grad_norm": 0.9046509265899658, + "learning_rate": 9.915519555439166e-05, + "loss": 0.1481, + "step": 3170 + }, + { + "epoch": 4.8370361328125e-06, + "model_forward_time": 0.025673389434814453, + "step": 3170 + }, + { + "epoch": 4.8370361328125e-06, + "step": 3170, + "training_step_time": 0.11039066314697266 + }, + { + "epoch": 4.83856201171875e-06, + "model_forward_time": 0.025052547454833984, + "step": 3171 + }, + { + "epoch": 4.83856201171875e-06, + "step": 3171, + "training_step_time": 0.14138412475585938 + }, + { + "epoch": 4.840087890625e-06, + "model_forward_time": 0.025269031524658203, + "step": 3172 + }, + { + "epoch": 4.840087890625e-06, + "step": 3172, + "training_step_time": 0.16205191612243652 + }, + { + "epoch": 4.84161376953125e-06, + "model_forward_time": 0.02432107925415039, + "step": 3173 + }, + { + "epoch": 4.84161376953125e-06, + "step": 3173, + "training_step_time": 0.1522657871246338 + }, + { + "epoch": 4.8431396484375e-06, + "model_forward_time": 0.024186372756958008, + "step": 3174 + }, + { + "epoch": 4.8431396484375e-06, + "step": 3174, + "training_step_time": 0.13804411888122559 + }, + { + "epoch": 4.84466552734375e-06, + "model_forward_time": 0.024758100509643555, + "step": 3175 + }, + { + "epoch": 4.84466552734375e-06, + "step": 3175, + "training_step_time": 0.13001513481140137 + }, + { + "epoch": 4.84619140625e-06, + "model_forward_time": 0.024761199951171875, + "step": 3176 + }, + { + "epoch": 4.84619140625e-06, + "step": 3176, + "training_step_time": 0.12532424926757812 + }, + { + "epoch": 4.84771728515625e-06, + "model_forward_time": 0.02473616600036621, + "step": 3177 + }, + { + "epoch": 4.84771728515625e-06, + "step": 3177, + "training_step_time": 0.12359476089477539 + }, + { + "epoch": 4.8492431640625e-06, + "model_forward_time": 0.025469303131103516, + "step": 3178 + }, + { + "epoch": 4.8492431640625e-06, + "step": 3178, + "training_step_time": 0.12058568000793457 + }, + { + "epoch": 4.85076904296875e-06, + "model_forward_time": 0.025388002395629883, + "step": 3179 + }, + { + "epoch": 4.85076904296875e-06, + "step": 3179, + "training_step_time": 0.11426448822021484 + }, + { + "epoch": 4.852294921875e-06, + "grad_norm": 0.9116901755332947, + "learning_rate": 9.914507686137019e-05, + "loss": 0.1576, + "step": 3180 + }, + { + "epoch": 4.852294921875e-06, + "model_forward_time": 0.025117874145507812, + "step": 3180 + }, + { + "epoch": 4.852294921875e-06, + "step": 3180, + "training_step_time": 0.11200404167175293 + }, + { + "epoch": 4.85382080078125e-06, + "model_forward_time": 0.025031566619873047, + "step": 3181 + }, + { + "epoch": 4.85382080078125e-06, + "step": 3181, + "training_step_time": 0.2213914394378662 + }, + { + "epoch": 4.8553466796875e-06, + "model_forward_time": 0.024785995483398438, + "step": 3182 + }, + { + "epoch": 4.8553466796875e-06, + "step": 3182, + "training_step_time": 0.11844182014465332 + }, + { + "epoch": 4.85687255859375e-06, + "model_forward_time": 0.024105548858642578, + "step": 3183 + }, + { + "epoch": 4.85687255859375e-06, + "step": 3183, + "training_step_time": 0.18578124046325684 + }, + { + "epoch": 4.8583984375e-06, + "model_forward_time": 0.02423095703125, + "step": 3184 + }, + { + "epoch": 4.8583984375e-06, + "step": 3184, + "training_step_time": 0.13434386253356934 + }, + { + "epoch": 4.85992431640625e-06, + "model_forward_time": 0.024040937423706055, + "step": 3185 + }, + { + "epoch": 4.85992431640625e-06, + "step": 3185, + "training_step_time": 0.19899797439575195 + }, + { + "epoch": 4.8614501953125e-06, + "model_forward_time": 0.02435588836669922, + "step": 3186 + }, + { + "epoch": 4.8614501953125e-06, + "step": 3186, + "training_step_time": 0.16465091705322266 + }, + { + "epoch": 4.86297607421875e-06, + "model_forward_time": 0.024432659149169922, + "step": 3187 + }, + { + "epoch": 4.86297607421875e-06, + "step": 3187, + "training_step_time": 0.1498579978942871 + }, + { + "epoch": 4.864501953125e-06, + "model_forward_time": 0.02463388442993164, + "step": 3188 + }, + { + "epoch": 4.864501953125e-06, + "step": 3188, + "training_step_time": 0.1433238983154297 + }, + { + "epoch": 4.86602783203125e-06, + "model_forward_time": 0.0248563289642334, + "step": 3189 + }, + { + "epoch": 4.86602783203125e-06, + "step": 3189, + "training_step_time": 0.11121296882629395 + }, + { + "epoch": 4.8675537109375e-06, + "grad_norm": 0.5139617323875427, + "learning_rate": 9.913489845244626e-05, + "loss": 0.1522, + "step": 3190 + }, + { + "epoch": 4.8675537109375e-06, + "model_forward_time": 0.02484273910522461, + "step": 3190 + }, + { + "epoch": 4.8675537109375e-06, + "step": 3190, + "training_step_time": 0.11066246032714844 + }, + { + "epoch": 4.86907958984375e-06, + "model_forward_time": 0.024976730346679688, + "step": 3191 + }, + { + "epoch": 4.86907958984375e-06, + "step": 3191, + "training_step_time": 0.10774540901184082 + }, + { + "epoch": 4.87060546875e-06, + "model_forward_time": 0.025418519973754883, + "step": 3192 + }, + { + "epoch": 4.87060546875e-06, + "step": 3192, + "training_step_time": 0.11703896522521973 + }, + { + "epoch": 4.87213134765625e-06, + "model_forward_time": 0.025385379791259766, + "step": 3193 + }, + { + "epoch": 4.87213134765625e-06, + "step": 3193, + "training_step_time": 0.1956779956817627 + }, + { + "epoch": 4.8736572265625e-06, + "model_forward_time": 0.025589942932128906, + "step": 3194 + }, + { + "epoch": 4.8736572265625e-06, + "step": 3194, + "training_step_time": 0.1103067398071289 + }, + { + "epoch": 4.87518310546875e-06, + "model_forward_time": 0.024490833282470703, + "step": 3195 + }, + { + "epoch": 4.87518310546875e-06, + "step": 3195, + "training_step_time": 0.21574139595031738 + }, + { + "epoch": 4.876708984375e-06, + "model_forward_time": 0.02475595474243164, + "step": 3196 + }, + { + "epoch": 4.876708984375e-06, + "step": 3196, + "training_step_time": 0.11703276634216309 + }, + { + "epoch": 4.87823486328125e-06, + "model_forward_time": 0.025253772735595703, + "step": 3197 + }, + { + "epoch": 4.87823486328125e-06, + "step": 3197, + "training_step_time": 0.11318802833557129 + }, + { + "epoch": 4.8797607421875e-06, + "model_forward_time": 0.025367259979248047, + "step": 3198 + }, + { + "epoch": 4.8797607421875e-06, + "step": 3198, + "training_step_time": 0.2274341583251953 + }, + { + "epoch": 4.88128662109375e-06, + "model_forward_time": 0.024669170379638672, + "step": 3199 + }, + { + "epoch": 4.88128662109375e-06, + "step": 3199, + "training_step_time": 0.1598987579345703 + }, + { + "epoch": 4.8828125e-06, + "grad_norm": 0.4906257390975952, + "learning_rate": 9.912466033998757e-05, + "loss": 0.122, + "step": 3200 + }, + { + "epoch": 4.8828125e-06, + "model_forward_time": 0.02478766441345215, + "step": 3200 + }, + { + "epoch": 4.8828125e-06, + "step": 3200, + "training_step_time": 0.10839557647705078 + }, + { + "epoch": 4.88433837890625e-06, + "model_forward_time": 0.02462601661682129, + "step": 3201 + }, + { + "epoch": 4.88433837890625e-06, + "step": 3201, + "training_step_time": 0.11238646507263184 + }, + { + "epoch": 4.8858642578125e-06, + "model_forward_time": 0.025043487548828125, + "step": 3202 + }, + { + "epoch": 4.8858642578125e-06, + "step": 3202, + "training_step_time": 0.11549711227416992 + }, + { + "epoch": 4.88739013671875e-06, + "model_forward_time": 0.02493143081665039, + "step": 3203 + }, + { + "epoch": 4.88739013671875e-06, + "step": 3203, + "training_step_time": 0.1100616455078125 + }, + { + "epoch": 4.888916015625e-06, + "model_forward_time": 0.024981021881103516, + "step": 3204 + }, + { + "epoch": 4.888916015625e-06, + "step": 3204, + "training_step_time": 0.10721588134765625 + }, + { + "epoch": 4.89044189453125e-06, + "model_forward_time": 0.025205612182617188, + "step": 3205 + }, + { + "epoch": 4.89044189453125e-06, + "step": 3205, + "training_step_time": 0.11126208305358887 + }, + { + "epoch": 4.8919677734375e-06, + "model_forward_time": 0.025455474853515625, + "step": 3206 + }, + { + "epoch": 4.8919677734375e-06, + "step": 3206, + "training_step_time": 0.11132097244262695 + }, + { + "epoch": 4.89349365234375e-06, + "model_forward_time": 0.024722814559936523, + "step": 3207 + }, + { + "epoch": 4.89349365234375e-06, + "step": 3207, + "training_step_time": 0.10744476318359375 + }, + { + "epoch": 4.89501953125e-06, + "model_forward_time": 0.025031566619873047, + "step": 3208 + }, + { + "epoch": 4.89501953125e-06, + "step": 3208, + "training_step_time": 0.11104273796081543 + }, + { + "epoch": 4.89654541015625e-06, + "model_forward_time": 0.025067567825317383, + "step": 3209 + }, + { + "epoch": 4.89654541015625e-06, + "step": 3209, + "training_step_time": 0.10792708396911621 + }, + { + "epoch": 4.8980712890625e-06, + "grad_norm": 0.6585983037948608, + "learning_rate": 9.911436253643445e-05, + "loss": 0.1165, + "step": 3210 + }, + { + "epoch": 4.8980712890625e-06, + "model_forward_time": 0.02523493766784668, + "step": 3210 + }, + { + "epoch": 4.8980712890625e-06, + "step": 3210, + "training_step_time": 0.11326193809509277 + }, + { + "epoch": 4.89959716796875e-06, + "model_forward_time": 0.024994373321533203, + "step": 3211 + }, + { + "epoch": 4.89959716796875e-06, + "step": 3211, + "training_step_time": 0.10886335372924805 + }, + { + "epoch": 4.901123046875e-06, + "model_forward_time": 0.02552652359008789, + "step": 3212 + }, + { + "epoch": 4.901123046875e-06, + "step": 3212, + "training_step_time": 0.10810375213623047 + }, + { + "epoch": 4.90264892578125e-06, + "model_forward_time": 0.0249021053314209, + "step": 3213 + }, + { + "epoch": 4.90264892578125e-06, + "step": 3213, + "training_step_time": 0.10915923118591309 + }, + { + "epoch": 4.9041748046875e-06, + "model_forward_time": 0.024564027786254883, + "step": 3214 + }, + { + "epoch": 4.9041748046875e-06, + "step": 3214, + "training_step_time": 0.11595916748046875 + }, + { + "epoch": 4.90570068359375e-06, + "model_forward_time": 0.024170637130737305, + "step": 3215 + }, + { + "epoch": 4.90570068359375e-06, + "step": 3215, + "training_step_time": 0.11234235763549805 + }, + { + "epoch": 4.9072265625e-06, + "model_forward_time": 0.024499177932739258, + "step": 3216 + }, + { + "epoch": 4.9072265625e-06, + "step": 3216, + "training_step_time": 0.11677289009094238 + }, + { + "epoch": 4.90875244140625e-06, + "model_forward_time": 0.02436375617980957, + "step": 3217 + }, + { + "epoch": 4.90875244140625e-06, + "step": 3217, + "training_step_time": 0.10746884346008301 + }, + { + "epoch": 4.9102783203125e-06, + "model_forward_time": 0.02528524398803711, + "step": 3218 + }, + { + "epoch": 4.9102783203125e-06, + "step": 3218, + "training_step_time": 0.1084144115447998 + }, + { + "epoch": 4.91180419921875e-06, + "model_forward_time": 0.025277137756347656, + "step": 3219 + }, + { + "epoch": 4.91180419921875e-06, + "step": 3219, + "training_step_time": 0.1116180419921875 + }, + { + "epoch": 4.913330078125e-06, + "grad_norm": 0.9967507123947144, + "learning_rate": 9.910400505429965e-05, + "loss": 0.1518, + "step": 3220 + }, + { + "epoch": 4.913330078125e-06, + "model_forward_time": 0.02497410774230957, + "step": 3220 + }, + { + "epoch": 4.913330078125e-06, + "step": 3220, + "training_step_time": 0.10763144493103027 + }, + { + "epoch": 4.91485595703125e-06, + "model_forward_time": 0.025525808334350586, + "step": 3221 + }, + { + "epoch": 4.91485595703125e-06, + "step": 3221, + "training_step_time": 0.10751032829284668 + }, + { + "epoch": 4.9163818359375e-06, + "model_forward_time": 0.02528977394104004, + "step": 3222 + }, + { + "epoch": 4.9163818359375e-06, + "step": 3222, + "training_step_time": 0.10893654823303223 + }, + { + "epoch": 4.91790771484375e-06, + "model_forward_time": 0.025118112564086914, + "step": 3223 + }, + { + "epoch": 4.91790771484375e-06, + "step": 3223, + "training_step_time": 0.1071171760559082 + }, + { + "epoch": 4.91943359375e-06, + "model_forward_time": 0.025223255157470703, + "step": 3224 + }, + { + "epoch": 4.91943359375e-06, + "step": 3224, + "training_step_time": 0.1080465316772461 + }, + { + "epoch": 4.92095947265625e-06, + "model_forward_time": 0.02525639533996582, + "step": 3225 + }, + { + "epoch": 4.92095947265625e-06, + "step": 3225, + "training_step_time": 0.2277822494506836 + }, + { + "epoch": 4.9224853515625e-06, + "model_forward_time": 0.02449631690979004, + "step": 3226 + }, + { + "epoch": 4.9224853515625e-06, + "step": 3226, + "training_step_time": 0.15121841430664062 + }, + { + "epoch": 4.92401123046875e-06, + "model_forward_time": 0.024351119995117188, + "step": 3227 + }, + { + "epoch": 4.92401123046875e-06, + "step": 3227, + "training_step_time": 0.21167445182800293 + }, + { + "epoch": 4.925537109375e-06, + "model_forward_time": 0.024397850036621094, + "step": 3228 + }, + { + "epoch": 4.925537109375e-06, + "step": 3228, + "training_step_time": 0.139068603515625 + }, + { + "epoch": 4.92706298828125e-06, + "model_forward_time": 0.025906085968017578, + "step": 3229 + }, + { + "epoch": 4.92706298828125e-06, + "step": 3229, + "training_step_time": 0.19667959213256836 + }, + { + "epoch": 4.9285888671875e-06, + "grad_norm": 0.8460479378700256, + "learning_rate": 9.909358790616849e-05, + "loss": 0.1425, + "step": 3230 + }, + { + "epoch": 4.9285888671875e-06, + "model_forward_time": 0.025046586990356445, + "step": 3230 + }, + { + "epoch": 4.9285888671875e-06, + "step": 3230, + "training_step_time": 0.15529131889343262 + }, + { + "epoch": 4.93011474609375e-06, + "model_forward_time": 0.024851560592651367, + "step": 3231 + }, + { + "epoch": 4.93011474609375e-06, + "step": 3231, + "training_step_time": 0.14596843719482422 + }, + { + "epoch": 4.931640625e-06, + "model_forward_time": 0.02543354034423828, + "step": 3232 + }, + { + "epoch": 4.931640625e-06, + "step": 3232, + "training_step_time": 0.1572108268737793 + }, + { + "epoch": 4.93316650390625e-06, + "model_forward_time": 0.025016069412231445, + "step": 3233 + }, + { + "epoch": 4.93316650390625e-06, + "step": 3233, + "training_step_time": 0.22903752326965332 + }, + { + "epoch": 4.9346923828125e-06, + "model_forward_time": 0.025895357131958008, + "step": 3234 + }, + { + "epoch": 4.9346923828125e-06, + "step": 3234, + "training_step_time": 0.20109176635742188 + }, + { + "epoch": 4.93621826171875e-06, + "model_forward_time": 0.026651382446289062, + "step": 3235 + }, + { + "epoch": 4.93621826171875e-06, + "step": 3235, + "training_step_time": 0.19794034957885742 + }, + { + "epoch": 4.937744140625e-06, + "model_forward_time": 0.025330781936645508, + "step": 3236 + }, + { + "epoch": 4.937744140625e-06, + "step": 3236, + "training_step_time": 0.11130642890930176 + }, + { + "epoch": 4.93927001953125e-06, + "model_forward_time": 0.024691104888916016, + "step": 3237 + }, + { + "epoch": 4.93927001953125e-06, + "step": 3237, + "training_step_time": 0.2139301300048828 + }, + { + "epoch": 4.9407958984375e-06, + "model_forward_time": 0.024454832077026367, + "step": 3238 + }, + { + "epoch": 4.9407958984375e-06, + "step": 3238, + "training_step_time": 0.12315964698791504 + }, + { + "epoch": 4.94232177734375e-06, + "model_forward_time": 0.02460789680480957, + "step": 3239 + }, + { + "epoch": 4.94232177734375e-06, + "step": 3239, + "training_step_time": 0.11834836006164551 + }, + { + "epoch": 4.94384765625e-06, + "grad_norm": 0.49196356534957886, + "learning_rate": 9.90831111046988e-05, + "loss": 0.127, + "step": 3240 + }, + { + "epoch": 4.94384765625e-06, + "model_forward_time": 0.026029586791992188, + "step": 3240 + }, + { + "epoch": 4.94384765625e-06, + "step": 3240, + "training_step_time": 0.20206093788146973 + }, + { + "epoch": 4.94537353515625e-06, + "model_forward_time": 0.025101184844970703, + "step": 3241 + }, + { + "epoch": 4.94537353515625e-06, + "step": 3241, + "training_step_time": 0.11531448364257812 + }, + { + "epoch": 4.9468994140625e-06, + "model_forward_time": 0.025177717208862305, + "step": 3242 + }, + { + "epoch": 4.9468994140625e-06, + "step": 3242, + "training_step_time": 0.10837507247924805 + }, + { + "epoch": 4.94842529296875e-06, + "model_forward_time": 0.025664091110229492, + "step": 3243 + }, + { + "epoch": 4.94842529296875e-06, + "step": 3243, + "training_step_time": 0.10791540145874023 + }, + { + "epoch": 4.949951171875e-06, + "model_forward_time": 0.02532196044921875, + "step": 3244 + }, + { + "epoch": 4.949951171875e-06, + "step": 3244, + "training_step_time": 0.10921359062194824 + }, + { + "epoch": 4.95147705078125e-06, + "model_forward_time": 0.02577805519104004, + "step": 3245 + }, + { + "epoch": 4.95147705078125e-06, + "step": 3245, + "training_step_time": 0.11053252220153809 + }, + { + "epoch": 4.9530029296875e-06, + "model_forward_time": 0.025481700897216797, + "step": 3246 + }, + { + "epoch": 4.9530029296875e-06, + "step": 3246, + "training_step_time": 0.10992026329040527 + }, + { + "epoch": 4.95452880859375e-06, + "model_forward_time": 0.026688337326049805, + "step": 3247 + }, + { + "epoch": 4.95452880859375e-06, + "step": 3247, + "training_step_time": 0.11150455474853516 + }, + { + "epoch": 4.9560546875e-06, + "model_forward_time": 0.025789976119995117, + "step": 3248 + }, + { + "epoch": 4.9560546875e-06, + "step": 3248, + "training_step_time": 0.10913610458374023 + }, + { + "epoch": 4.95758056640625e-06, + "model_forward_time": 0.025461196899414062, + "step": 3249 + }, + { + "epoch": 4.95758056640625e-06, + "step": 3249, + "training_step_time": 0.10711050033569336 + }, + { + "epoch": 4.9591064453125e-06, + "grad_norm": 0.6129723191261292, + "learning_rate": 9.90725746626209e-05, + "loss": 0.1285, + "step": 3250 + }, + { + "epoch": 4.9591064453125e-06, + "model_forward_time": 0.025580406188964844, + "step": 3250 + }, + { + "epoch": 4.9591064453125e-06, + "step": 3250, + "training_step_time": 0.1126255989074707 + }, + { + "epoch": 4.96063232421875e-06, + "model_forward_time": 0.02577066421508789, + "step": 3251 + }, + { + "epoch": 4.96063232421875e-06, + "step": 3251, + "training_step_time": 0.10759377479553223 + }, + { + "epoch": 4.962158203125e-06, + "model_forward_time": 0.02594447135925293, + "step": 3252 + }, + { + "epoch": 4.962158203125e-06, + "step": 3252, + "training_step_time": 0.10802960395812988 + }, + { + "epoch": 4.96368408203125e-06, + "model_forward_time": 0.025814294815063477, + "step": 3253 + }, + { + "epoch": 4.96368408203125e-06, + "step": 3253, + "training_step_time": 0.10796785354614258 + }, + { + "epoch": 4.9652099609375e-06, + "model_forward_time": 0.025761127471923828, + "step": 3254 + }, + { + "epoch": 4.9652099609375e-06, + "step": 3254, + "training_step_time": 0.10899806022644043 + }, + { + "epoch": 4.96673583984375e-06, + "model_forward_time": 0.025426149368286133, + "step": 3255 + }, + { + "epoch": 4.96673583984375e-06, + "step": 3255, + "training_step_time": 0.11186838150024414 + }, + { + "epoch": 4.96826171875e-06, + "model_forward_time": 0.025588035583496094, + "step": 3256 + }, + { + "epoch": 4.96826171875e-06, + "step": 3256, + "training_step_time": 0.10788464546203613 + }, + { + "epoch": 4.96978759765625e-06, + "model_forward_time": 0.025451183319091797, + "step": 3257 + }, + { + "epoch": 4.96978759765625e-06, + "step": 3257, + "training_step_time": 0.11051535606384277 + }, + { + "epoch": 4.9713134765625e-06, + "model_forward_time": 0.026192188262939453, + "step": 3258 + }, + { + "epoch": 4.9713134765625e-06, + "step": 3258, + "training_step_time": 0.10768914222717285 + }, + { + "epoch": 4.97283935546875e-06, + "model_forward_time": 0.025746583938598633, + "step": 3259 + }, + { + "epoch": 4.97283935546875e-06, + "step": 3259, + "training_step_time": 0.106719970703125 + }, + { + "epoch": 4.974365234375e-06, + "grad_norm": 0.9966940879821777, + "learning_rate": 9.906197859273753e-05, + "loss": 0.145, + "step": 3260 + }, + { + "epoch": 4.974365234375e-06, + "model_forward_time": 0.025510787963867188, + "step": 3260 + }, + { + "epoch": 4.974365234375e-06, + "step": 3260, + "training_step_time": 0.10967254638671875 + }, + { + "epoch": 4.97589111328125e-06, + "model_forward_time": 0.025699853897094727, + "step": 3261 + }, + { + "epoch": 4.97589111328125e-06, + "step": 3261, + "training_step_time": 0.10957598686218262 + }, + { + "epoch": 4.9774169921875e-06, + "model_forward_time": 0.025473833084106445, + "step": 3262 + }, + { + "epoch": 4.9774169921875e-06, + "step": 3262, + "training_step_time": 0.10700035095214844 + }, + { + "epoch": 4.97894287109375e-06, + "model_forward_time": 0.02553582191467285, + "step": 3263 + }, + { + "epoch": 4.97894287109375e-06, + "step": 3263, + "training_step_time": 0.10734295845031738 + }, + { + "epoch": 4.98046875e-06, + "model_forward_time": 0.02573251724243164, + "step": 3264 + }, + { + "epoch": 4.98046875e-06, + "step": 3264, + "training_step_time": 0.11006784439086914 + }, + { + "epoch": 4.98199462890625e-06, + "model_forward_time": 0.02547168731689453, + "step": 3265 + }, + { + "epoch": 4.98199462890625e-06, + "step": 3265, + "training_step_time": 0.10898089408874512 + }, + { + "epoch": 4.9835205078125e-06, + "model_forward_time": 0.02541184425354004, + "step": 3266 + }, + { + "epoch": 4.9835205078125e-06, + "step": 3266, + "training_step_time": 0.10830068588256836 + }, + { + "epoch": 4.98504638671875e-06, + "model_forward_time": 0.025359630584716797, + "step": 3267 + }, + { + "epoch": 4.98504638671875e-06, + "step": 3267, + "training_step_time": 0.10808444023132324 + }, + { + "epoch": 4.986572265625e-06, + "model_forward_time": 0.02550220489501953, + "step": 3268 + }, + { + "epoch": 4.986572265625e-06, + "step": 3268, + "training_step_time": 0.1588141918182373 + }, + { + "epoch": 4.98809814453125e-06, + "model_forward_time": 0.025068044662475586, + "step": 3269 + }, + { + "epoch": 4.98809814453125e-06, + "step": 3269, + "training_step_time": 0.17066001892089844 + }, + { + "epoch": 4.9896240234375e-06, + "grad_norm": 0.5129582285881042, + "learning_rate": 9.905132290792394e-05, + "loss": 0.12, + "step": 3270 + }, + { + "epoch": 4.9896240234375e-06, + "model_forward_time": 0.024705886840820312, + "step": 3270 + }, + { + "epoch": 4.9896240234375e-06, + "step": 3270, + "training_step_time": 0.1843571662902832 + }, + { + "epoch": 4.99114990234375e-06, + "model_forward_time": 0.02533411979675293, + "step": 3271 + }, + { + "epoch": 4.99114990234375e-06, + "step": 3271, + "training_step_time": 0.18781065940856934 + }, + { + "epoch": 4.99267578125e-06, + "model_forward_time": 0.024753570556640625, + "step": 3272 + }, + { + "epoch": 4.99267578125e-06, + "step": 3272, + "training_step_time": 0.19375324249267578 + }, + { + "epoch": 4.99420166015625e-06, + "model_forward_time": 0.024758338928222656, + "step": 3273 + }, + { + "epoch": 4.99420166015625e-06, + "step": 3273, + "training_step_time": 0.13396215438842773 + }, + { + "epoch": 4.9957275390625e-06, + "model_forward_time": 0.02509164810180664, + "step": 3274 + }, + { + "epoch": 4.9957275390625e-06, + "step": 3274, + "training_step_time": 0.18749070167541504 + }, + { + "epoch": 4.99725341796875e-06, + "model_forward_time": 0.024580001831054688, + "step": 3275 + }, + { + "epoch": 4.99725341796875e-06, + "step": 3275, + "training_step_time": 0.18320822715759277 + }, + { + "epoch": 4.998779296875e-06, + "model_forward_time": 0.024519920349121094, + "step": 3276 + }, + { + "epoch": 4.998779296875e-06, + "step": 3276, + "training_step_time": 0.1153876781463623 + }, + { + "epoch": 5.00030517578125e-06, + "model_forward_time": 0.02528071403503418, + "step": 3277 + }, + { + "epoch": 5.00030517578125e-06, + "step": 3277, + "training_step_time": 0.1585848331451416 + }, + { + "epoch": 5.0018310546875e-06, + "model_forward_time": 0.024559736251831055, + "step": 3278 + }, + { + "epoch": 5.0018310546875e-06, + "step": 3278, + "training_step_time": 0.1093747615814209 + }, + { + "epoch": 5.00335693359375e-06, + "model_forward_time": 0.025209903717041016, + "step": 3279 + }, + { + "epoch": 5.00335693359375e-06, + "step": 3279, + "training_step_time": 0.10776019096374512 + }, + { + "epoch": 5.0048828125e-06, + "grad_norm": 0.7755047082901001, + "learning_rate": 9.904060762112777e-05, + "loss": 0.126, + "step": 3280 + }, + { + "epoch": 5.0048828125e-06, + "model_forward_time": 0.025447368621826172, + "step": 3280 + }, + { + "epoch": 5.0048828125e-06, + "step": 3280, + "training_step_time": 0.11076784133911133 + }, + { + "epoch": 5.00640869140625e-06, + "model_forward_time": 0.02446436882019043, + "step": 3281 + }, + { + "epoch": 5.00640869140625e-06, + "step": 3281, + "training_step_time": 0.16920948028564453 + }, + { + "epoch": 5.0079345703125e-06, + "model_forward_time": 0.02556920051574707, + "step": 3282 + }, + { + "epoch": 5.0079345703125e-06, + "step": 3282, + "training_step_time": 0.1642313003540039 + }, + { + "epoch": 5.00946044921875e-06, + "model_forward_time": 0.02524542808532715, + "step": 3283 + }, + { + "epoch": 5.00946044921875e-06, + "step": 3283, + "training_step_time": 0.13206839561462402 + }, + { + "epoch": 5.010986328125e-06, + "model_forward_time": 0.02482295036315918, + "step": 3284 + }, + { + "epoch": 5.010986328125e-06, + "step": 3284, + "training_step_time": 0.19943451881408691 + }, + { + "epoch": 5.01251220703125e-06, + "model_forward_time": 0.024399518966674805, + "step": 3285 + }, + { + "epoch": 5.01251220703125e-06, + "step": 3285, + "training_step_time": 0.12189745903015137 + }, + { + "epoch": 5.0140380859375e-06, + "model_forward_time": 0.02486872673034668, + "step": 3286 + }, + { + "epoch": 5.0140380859375e-06, + "step": 3286, + "training_step_time": 0.1141045093536377 + }, + { + "epoch": 5.01556396484375e-06, + "model_forward_time": 0.024568557739257812, + "step": 3287 + }, + { + "epoch": 5.01556396484375e-06, + "step": 3287, + "training_step_time": 0.11363387107849121 + }, + { + "epoch": 5.01708984375e-06, + "model_forward_time": 0.02406787872314453, + "step": 3288 + }, + { + "epoch": 5.01708984375e-06, + "step": 3288, + "training_step_time": 0.11143231391906738 + }, + { + "epoch": 5.01861572265625e-06, + "model_forward_time": 0.025574445724487305, + "step": 3289 + }, + { + "epoch": 5.01861572265625e-06, + "step": 3289, + "training_step_time": 0.11050891876220703 + }, + { + "epoch": 5.0201416015625e-06, + "grad_norm": 0.47359898686408997, + "learning_rate": 9.902983274536912e-05, + "loss": 0.1297, + "step": 3290 + }, + { + "epoch": 5.0201416015625e-06, + "model_forward_time": 0.02555561065673828, + "step": 3290 + }, + { + "epoch": 5.0201416015625e-06, + "step": 3290, + "training_step_time": 0.11321473121643066 + }, + { + "epoch": 5.02166748046875e-06, + "model_forward_time": 0.025382041931152344, + "step": 3291 + }, + { + "epoch": 5.02166748046875e-06, + "step": 3291, + "training_step_time": 0.11043715476989746 + }, + { + "epoch": 5.023193359375e-06, + "model_forward_time": 0.025308847427368164, + "step": 3292 + }, + { + "epoch": 5.023193359375e-06, + "step": 3292, + "training_step_time": 0.10826277732849121 + }, + { + "epoch": 5.02471923828125e-06, + "model_forward_time": 0.025557994842529297, + "step": 3293 + }, + { + "epoch": 5.02471923828125e-06, + "step": 3293, + "training_step_time": 0.11281871795654297 + }, + { + "epoch": 5.0262451171875e-06, + "model_forward_time": 0.025214195251464844, + "step": 3294 + }, + { + "epoch": 5.0262451171875e-06, + "step": 3294, + "training_step_time": 0.10975050926208496 + }, + { + "epoch": 5.02777099609375e-06, + "model_forward_time": 0.02509164810180664, + "step": 3295 + }, + { + "epoch": 5.02777099609375e-06, + "step": 3295, + "training_step_time": 0.10721588134765625 + }, + { + "epoch": 5.029296875e-06, + "model_forward_time": 0.02517104148864746, + "step": 3296 + }, + { + "epoch": 5.029296875e-06, + "step": 3296, + "training_step_time": 0.11140942573547363 + }, + { + "epoch": 5.03082275390625e-06, + "model_forward_time": 0.025516271591186523, + "step": 3297 + }, + { + "epoch": 5.03082275390625e-06, + "step": 3297, + "training_step_time": 0.10791349411010742 + }, + { + "epoch": 5.0323486328125e-06, + "model_forward_time": 0.02699875831604004, + "step": 3298 + }, + { + "epoch": 5.0323486328125e-06, + "step": 3298, + "training_step_time": 0.11146879196166992 + }, + { + "epoch": 5.03387451171875e-06, + "model_forward_time": 0.025566577911376953, + "step": 3299 + }, + { + "epoch": 5.03387451171875e-06, + "step": 3299, + "training_step_time": 0.11018252372741699 + }, + { + "epoch": 5.035400390625e-06, + "grad_norm": 0.6142375469207764, + "learning_rate": 9.901899829374047e-05, + "loss": 0.1135, + "step": 3300 + }, + { + "epoch": 5.035400390625e-06, + "model_forward_time": 0.025331497192382812, + "step": 3300 + }, + { + "epoch": 5.035400390625e-06, + "step": 3300, + "training_step_time": 0.1110227108001709 + }, + { + "epoch": 5.03692626953125e-06, + "model_forward_time": 0.025975465774536133, + "step": 3301 + }, + { + "epoch": 5.03692626953125e-06, + "step": 3301, + "training_step_time": 0.10928583145141602 + }, + { + "epoch": 5.0384521484375e-06, + "model_forward_time": 0.025483131408691406, + "step": 3302 + }, + { + "epoch": 5.0384521484375e-06, + "step": 3302, + "training_step_time": 0.1087038516998291 + }, + { + "epoch": 5.03997802734375e-06, + "model_forward_time": 0.025272130966186523, + "step": 3303 + }, + { + "epoch": 5.03997802734375e-06, + "step": 3303, + "training_step_time": 0.10715508460998535 + }, + { + "epoch": 5.04150390625e-06, + "model_forward_time": 0.025513887405395508, + "step": 3304 + }, + { + "epoch": 5.04150390625e-06, + "step": 3304, + "training_step_time": 0.11074662208557129 + }, + { + "epoch": 5.04302978515625e-06, + "model_forward_time": 0.02567005157470703, + "step": 3305 + }, + { + "epoch": 5.04302978515625e-06, + "step": 3305, + "training_step_time": 0.1072838306427002 + }, + { + "epoch": 5.0445556640625e-06, + "model_forward_time": 0.025232315063476562, + "step": 3306 + }, + { + "epoch": 5.0445556640625e-06, + "step": 3306, + "training_step_time": 0.10974931716918945 + }, + { + "epoch": 5.04608154296875e-06, + "model_forward_time": 0.025429248809814453, + "step": 3307 + }, + { + "epoch": 5.04608154296875e-06, + "step": 3307, + "training_step_time": 0.10987377166748047 + }, + { + "epoch": 5.047607421875e-06, + "model_forward_time": 0.025057554244995117, + "step": 3308 + }, + { + "epoch": 5.047607421875e-06, + "step": 3308, + "training_step_time": 0.10850024223327637 + }, + { + "epoch": 5.04913330078125e-06, + "model_forward_time": 0.025445222854614258, + "step": 3309 + }, + { + "epoch": 5.04913330078125e-06, + "step": 3309, + "training_step_time": 0.1112375259399414 + }, + { + "epoch": 5.0506591796875e-06, + "grad_norm": 0.8731642365455627, + "learning_rate": 9.90081042794067e-05, + "loss": 0.1424, + "step": 3310 + }, + { + "epoch": 5.0506591796875e-06, + "model_forward_time": 0.024201631546020508, + "step": 3310 + }, + { + "epoch": 5.0506591796875e-06, + "step": 3310, + "training_step_time": 0.11226630210876465 + }, + { + "epoch": 5.05218505859375e-06, + "model_forward_time": 0.025452852249145508, + "step": 3311 + }, + { + "epoch": 5.05218505859375e-06, + "step": 3311, + "training_step_time": 0.1145024299621582 + }, + { + "epoch": 5.0537109375e-06, + "model_forward_time": 0.02540302276611328, + "step": 3312 + }, + { + "epoch": 5.0537109375e-06, + "step": 3312, + "training_step_time": 0.12565064430236816 + }, + { + "epoch": 5.05523681640625e-06, + "model_forward_time": 0.025209426879882812, + "step": 3313 + }, + { + "epoch": 5.05523681640625e-06, + "step": 3313, + "training_step_time": 0.17058825492858887 + }, + { + "epoch": 5.0567626953125e-06, + "model_forward_time": 0.025848388671875, + "step": 3314 + }, + { + "epoch": 5.0567626953125e-06, + "step": 3314, + "training_step_time": 0.1715235710144043 + }, + { + "epoch": 5.05828857421875e-06, + "model_forward_time": 0.024121999740600586, + "step": 3315 + }, + { + "epoch": 5.05828857421875e-06, + "step": 3315, + "training_step_time": 0.16973376274108887 + }, + { + "epoch": 5.059814453125e-06, + "model_forward_time": 0.024633169174194336, + "step": 3316 + }, + { + "epoch": 5.059814453125e-06, + "step": 3316, + "training_step_time": 0.12722063064575195 + }, + { + "epoch": 5.06134033203125e-06, + "model_forward_time": 0.024718046188354492, + "step": 3317 + }, + { + "epoch": 5.06134033203125e-06, + "step": 3317, + "training_step_time": 0.15833187103271484 + }, + { + "epoch": 5.0628662109375e-06, + "model_forward_time": 0.024771928787231445, + "step": 3318 + }, + { + "epoch": 5.0628662109375e-06, + "step": 3318, + "training_step_time": 0.1680283546447754 + }, + { + "epoch": 5.06439208984375e-06, + "model_forward_time": 0.02459096908569336, + "step": 3319 + }, + { + "epoch": 5.06439208984375e-06, + "step": 3319, + "training_step_time": 0.18820762634277344 + }, + { + "epoch": 5.06591796875e-06, + "grad_norm": 0.7321391105651855, + "learning_rate": 9.899715071560508e-05, + "loss": 0.1516, + "step": 3320 + }, + { + "epoch": 5.06591796875e-06, + "model_forward_time": 0.025385618209838867, + "step": 3320 + }, + { + "epoch": 5.06591796875e-06, + "step": 3320, + "training_step_time": 0.16673731803894043 + }, + { + "epoch": 5.06744384765625e-06, + "model_forward_time": 0.024245023727416992, + "step": 3321 + }, + { + "epoch": 5.06744384765625e-06, + "step": 3321, + "training_step_time": 0.1881856918334961 + }, + { + "epoch": 5.0689697265625e-06, + "model_forward_time": 0.02424478530883789, + "step": 3322 + }, + { + "epoch": 5.0689697265625e-06, + "step": 3322, + "training_step_time": 0.17690134048461914 + }, + { + "epoch": 5.07049560546875e-06, + "model_forward_time": 0.024748802185058594, + "step": 3323 + }, + { + "epoch": 5.07049560546875e-06, + "step": 3323, + "training_step_time": 0.17329668998718262 + }, + { + "epoch": 5.072021484375e-06, + "model_forward_time": 0.02468585968017578, + "step": 3324 + }, + { + "epoch": 5.072021484375e-06, + "step": 3324, + "training_step_time": 0.15823149681091309 + }, + { + "epoch": 5.07354736328125e-06, + "model_forward_time": 0.024211883544921875, + "step": 3325 + }, + { + "epoch": 5.07354736328125e-06, + "step": 3325, + "training_step_time": 0.15304350852966309 + }, + { + "epoch": 5.0750732421875e-06, + "model_forward_time": 0.02490401268005371, + "step": 3326 + }, + { + "epoch": 5.0750732421875e-06, + "step": 3326, + "training_step_time": 0.18915510177612305 + }, + { + "epoch": 5.07659912109375e-06, + "model_forward_time": 0.02474522590637207, + "step": 3327 + }, + { + "epoch": 5.07659912109375e-06, + "step": 3327, + "training_step_time": 0.21829891204833984 + }, + { + "epoch": 5.078125e-06, + "model_forward_time": 0.024674415588378906, + "step": 3328 + }, + { + "epoch": 5.078125e-06, + "step": 3328, + "training_step_time": 0.11189699172973633 + }, + { + "epoch": 5.07965087890625e-06, + "model_forward_time": 0.024722814559936523, + "step": 3329 + }, + { + "epoch": 5.07965087890625e-06, + "step": 3329, + "training_step_time": 0.10694050788879395 + }, + { + "epoch": 5.0811767578125e-06, + "grad_norm": 0.4290498197078705, + "learning_rate": 9.89861376156452e-05, + "loss": 0.1248, + "step": 3330 + }, + { + "epoch": 5.0811767578125e-06, + "model_forward_time": 0.02578115463256836, + "step": 3330 + }, + { + "epoch": 5.0811767578125e-06, + "step": 3330, + "training_step_time": 0.10819220542907715 + }, + { + "epoch": 5.08270263671875e-06, + "model_forward_time": 0.025634288787841797, + "step": 3331 + }, + { + "epoch": 5.08270263671875e-06, + "step": 3331, + "training_step_time": 0.10854244232177734 + }, + { + "epoch": 5.084228515625e-06, + "model_forward_time": 0.025829553604125977, + "step": 3332 + }, + { + "epoch": 5.084228515625e-06, + "step": 3332, + "training_step_time": 0.11339330673217773 + }, + { + "epoch": 5.08575439453125e-06, + "model_forward_time": 0.025535106658935547, + "step": 3333 + }, + { + "epoch": 5.08575439453125e-06, + "step": 3333, + "training_step_time": 0.10898137092590332 + }, + { + "epoch": 5.0872802734375e-06, + "model_forward_time": 0.024979352951049805, + "step": 3334 + }, + { + "epoch": 5.0872802734375e-06, + "step": 3334, + "training_step_time": 0.10894775390625 + }, + { + "epoch": 5.08880615234375e-06, + "model_forward_time": 0.025241851806640625, + "step": 3335 + }, + { + "epoch": 5.08880615234375e-06, + "step": 3335, + "training_step_time": 0.1111454963684082 + }, + { + "epoch": 5.09033203125e-06, + "model_forward_time": 0.025610685348510742, + "step": 3336 + }, + { + "epoch": 5.09033203125e-06, + "step": 3336, + "training_step_time": 0.10805106163024902 + }, + { + "epoch": 5.09185791015625e-06, + "model_forward_time": 0.025163650512695312, + "step": 3337 + }, + { + "epoch": 5.09185791015625e-06, + "step": 3337, + "training_step_time": 0.10710334777832031 + }, + { + "epoch": 5.0933837890625e-06, + "model_forward_time": 0.025674104690551758, + "step": 3338 + }, + { + "epoch": 5.0933837890625e-06, + "step": 3338, + "training_step_time": 0.1103048324584961 + }, + { + "epoch": 5.09490966796875e-06, + "model_forward_time": 0.025478124618530273, + "step": 3339 + }, + { + "epoch": 5.09490966796875e-06, + "step": 3339, + "training_step_time": 0.10799813270568848 + }, + { + "epoch": 5.096435546875e-06, + "grad_norm": 0.654565691947937, + "learning_rate": 9.897506499290902e-05, + "loss": 0.1136, + "step": 3340 + }, + { + "epoch": 5.096435546875e-06, + "model_forward_time": 0.02530694007873535, + "step": 3340 + }, + { + "epoch": 5.096435546875e-06, + "step": 3340, + "training_step_time": 0.11374139785766602 + }, + { + "epoch": 5.09796142578125e-06, + "model_forward_time": 0.025504350662231445, + "step": 3341 + }, + { + "epoch": 5.09796142578125e-06, + "step": 3341, + "training_step_time": 0.10834240913391113 + }, + { + "epoch": 5.0994873046875e-06, + "model_forward_time": 0.025019407272338867, + "step": 3342 + }, + { + "epoch": 5.0994873046875e-06, + "step": 3342, + "training_step_time": 0.10620450973510742 + }, + { + "epoch": 5.10101318359375e-06, + "model_forward_time": 0.02547144889831543, + "step": 3343 + }, + { + "epoch": 5.10101318359375e-06, + "step": 3343, + "training_step_time": 0.10982608795166016 + }, + { + "epoch": 5.1025390625e-06, + "model_forward_time": 0.025662660598754883, + "step": 3344 + }, + { + "epoch": 5.1025390625e-06, + "step": 3344, + "training_step_time": 0.10815119743347168 + }, + { + "epoch": 5.10406494140625e-06, + "model_forward_time": 0.025660276412963867, + "step": 3345 + }, + { + "epoch": 5.10406494140625e-06, + "step": 3345, + "training_step_time": 0.10927367210388184 + }, + { + "epoch": 5.1055908203125e-06, + "model_forward_time": 0.02554917335510254, + "step": 3346 + }, + { + "epoch": 5.1055908203125e-06, + "step": 3346, + "training_step_time": 0.10837912559509277 + }, + { + "epoch": 5.10711669921875e-06, + "model_forward_time": 0.025473356246948242, + "step": 3347 + }, + { + "epoch": 5.10711669921875e-06, + "step": 3347, + "training_step_time": 0.10836935043334961 + }, + { + "epoch": 5.108642578125e-06, + "model_forward_time": 0.02554011344909668, + "step": 3348 + }, + { + "epoch": 5.108642578125e-06, + "step": 3348, + "training_step_time": 0.10989522933959961 + }, + { + "epoch": 5.11016845703125e-06, + "model_forward_time": 0.027683496475219727, + "step": 3349 + }, + { + "epoch": 5.11016845703125e-06, + "step": 3349, + "training_step_time": 0.1146082878112793 + }, + { + "epoch": 5.1116943359375e-06, + "grad_norm": 0.5085901618003845, + "learning_rate": 9.896393286085084e-05, + "loss": 0.1138, + "step": 3350 + }, + { + "epoch": 5.1116943359375e-06, + "model_forward_time": 0.026171445846557617, + "step": 3350 + }, + { + "epoch": 5.1116943359375e-06, + "step": 3350, + "training_step_time": 0.10760092735290527 + }, + { + "epoch": 5.11322021484375e-06, + "model_forward_time": 0.026284456253051758, + "step": 3351 + }, + { + "epoch": 5.11322021484375e-06, + "step": 3351, + "training_step_time": 0.11056876182556152 + }, + { + "epoch": 5.11474609375e-06, + "model_forward_time": 0.025292158126831055, + "step": 3352 + }, + { + "epoch": 5.11474609375e-06, + "step": 3352, + "training_step_time": 0.11232972145080566 + }, + { + "epoch": 5.11627197265625e-06, + "model_forward_time": 0.025601625442504883, + "step": 3353 + }, + { + "epoch": 5.11627197265625e-06, + "step": 3353, + "training_step_time": 0.10922074317932129 + }, + { + "epoch": 5.1177978515625e-06, + "model_forward_time": 0.025664806365966797, + "step": 3354 + }, + { + "epoch": 5.1177978515625e-06, + "step": 3354, + "training_step_time": 0.10700678825378418 + }, + { + "epoch": 5.11932373046875e-06, + "model_forward_time": 0.025015830993652344, + "step": 3355 + }, + { + "epoch": 5.11932373046875e-06, + "step": 3355, + "training_step_time": 0.1819911003112793 + }, + { + "epoch": 5.120849609375e-06, + "model_forward_time": 0.02496647834777832, + "step": 3356 + }, + { + "epoch": 5.120849609375e-06, + "step": 3356, + "training_step_time": 0.20984292030334473 + }, + { + "epoch": 5.12237548828125e-06, + "model_forward_time": 0.024555206298828125, + "step": 3357 + }, + { + "epoch": 5.12237548828125e-06, + "step": 3357, + "training_step_time": 0.14247846603393555 + }, + { + "epoch": 5.1239013671875e-06, + "model_forward_time": 0.024637222290039062, + "step": 3358 + }, + { + "epoch": 5.1239013671875e-06, + "step": 3358, + "training_step_time": 0.20355510711669922 + }, + { + "epoch": 5.12542724609375e-06, + "model_forward_time": 0.0248262882232666, + "step": 3359 + }, + { + "epoch": 5.12542724609375e-06, + "step": 3359, + "training_step_time": 0.15606474876403809 + }, + { + "epoch": 5.126953125e-06, + "grad_norm": 0.6300991773605347, + "learning_rate": 9.895274123299723e-05, + "loss": 0.1176, + "step": 3360 + }, + { + "epoch": 5.126953125e-06, + "model_forward_time": 0.024277448654174805, + "step": 3360 + }, + { + "epoch": 5.126953125e-06, + "step": 3360, + "training_step_time": 0.2021167278289795 + }, + { + "epoch": 5.12847900390625e-06, + "model_forward_time": 0.02524113655090332, + "step": 3361 + }, + { + "epoch": 5.12847900390625e-06, + "step": 3361, + "training_step_time": 0.11090779304504395 + }, + { + "epoch": 5.1300048828125e-06, + "model_forward_time": 0.027451038360595703, + "step": 3362 + }, + { + "epoch": 5.1300048828125e-06, + "step": 3362, + "training_step_time": 0.11035871505737305 + }, + { + "epoch": 5.13153076171875e-06, + "model_forward_time": 0.025591373443603516, + "step": 3363 + }, + { + "epoch": 5.13153076171875e-06, + "step": 3363, + "training_step_time": 0.11652016639709473 + }, + { + "epoch": 5.133056640625e-06, + "model_forward_time": 0.02537846565246582, + "step": 3364 + }, + { + "epoch": 5.133056640625e-06, + "step": 3364, + "training_step_time": 0.12974095344543457 + }, + { + "epoch": 5.13458251953125e-06, + "model_forward_time": 0.02446913719177246, + "step": 3365 + }, + { + "epoch": 5.13458251953125e-06, + "step": 3365, + "training_step_time": 0.18522334098815918 + }, + { + "epoch": 5.1361083984375e-06, + "model_forward_time": 0.024740934371948242, + "step": 3366 + }, + { + "epoch": 5.1361083984375e-06, + "step": 3366, + "training_step_time": 0.14810776710510254 + }, + { + "epoch": 5.13763427734375e-06, + "model_forward_time": 0.026396989822387695, + "step": 3367 + }, + { + "epoch": 5.13763427734375e-06, + "step": 3367, + "training_step_time": 0.13106060028076172 + }, + { + "epoch": 5.13916015625e-06, + "model_forward_time": 0.024644136428833008, + "step": 3368 + }, + { + "epoch": 5.13916015625e-06, + "step": 3368, + "training_step_time": 0.20798301696777344 + }, + { + "epoch": 5.14068603515625e-06, + "model_forward_time": 0.02512216567993164, + "step": 3369 + }, + { + "epoch": 5.14068603515625e-06, + "step": 3369, + "training_step_time": 0.13444972038269043 + }, + { + "epoch": 5.1422119140625e-06, + "grad_norm": 0.5910798907279968, + "learning_rate": 9.894149012294708e-05, + "loss": 0.1139, + "step": 3370 + }, + { + "epoch": 5.1422119140625e-06, + "model_forward_time": 0.024836063385009766, + "step": 3370 + }, + { + "epoch": 5.1422119140625e-06, + "step": 3370, + "training_step_time": 0.1796886920928955 + }, + { + "epoch": 5.14373779296875e-06, + "model_forward_time": 0.02502751350402832, + "step": 3371 + }, + { + "epoch": 5.14373779296875e-06, + "step": 3371, + "training_step_time": 0.13038921356201172 + }, + { + "epoch": 5.145263671875e-06, + "model_forward_time": 0.024905681610107422, + "step": 3372 + }, + { + "epoch": 5.145263671875e-06, + "step": 3372, + "training_step_time": 0.1158590316772461 + }, + { + "epoch": 5.14678955078125e-06, + "model_forward_time": 0.025545597076416016, + "step": 3373 + }, + { + "epoch": 5.14678955078125e-06, + "step": 3373, + "training_step_time": 0.1111290454864502 + }, + { + "epoch": 5.1483154296875e-06, + "model_forward_time": 0.025193452835083008, + "step": 3374 + }, + { + "epoch": 5.1483154296875e-06, + "step": 3374, + "training_step_time": 0.11071515083312988 + }, + { + "epoch": 5.14984130859375e-06, + "model_forward_time": 0.025051116943359375, + "step": 3375 + }, + { + "epoch": 5.14984130859375e-06, + "step": 3375, + "training_step_time": 0.1115727424621582 + }, + { + "epoch": 5.1513671875e-06, + "model_forward_time": 0.024564743041992188, + "step": 3376 + }, + { + "epoch": 5.1513671875e-06, + "step": 3376, + "training_step_time": 0.11530661582946777 + }, + { + "epoch": 5.15289306640625e-06, + "model_forward_time": 0.025089263916015625, + "step": 3377 + }, + { + "epoch": 5.15289306640625e-06, + "step": 3377, + "training_step_time": 0.1094825267791748 + }, + { + "epoch": 5.1544189453125e-06, + "model_forward_time": 0.026329755783081055, + "step": 3378 + }, + { + "epoch": 5.1544189453125e-06, + "step": 3378, + "training_step_time": 0.11051535606384277 + }, + { + "epoch": 5.15594482421875e-06, + "model_forward_time": 0.025543212890625, + "step": 3379 + }, + { + "epoch": 5.15594482421875e-06, + "step": 3379, + "training_step_time": 0.10906720161437988 + }, + { + "epoch": 5.157470703125e-06, + "grad_norm": 0.6157618165016174, + "learning_rate": 9.893017954437156e-05, + "loss": 0.158, + "step": 3380 + }, + { + "epoch": 5.157470703125e-06, + "model_forward_time": 0.025409936904907227, + "step": 3380 + }, + { + "epoch": 5.157470703125e-06, + "step": 3380, + "training_step_time": 0.11186718940734863 + }, + { + "epoch": 5.15899658203125e-06, + "model_forward_time": 0.0250246524810791, + "step": 3381 + }, + { + "epoch": 5.15899658203125e-06, + "step": 3381, + "training_step_time": 0.10830307006835938 + }, + { + "epoch": 5.1605224609375e-06, + "model_forward_time": 0.025045156478881836, + "step": 3382 + }, + { + "epoch": 5.1605224609375e-06, + "step": 3382, + "training_step_time": 0.11045622825622559 + }, + { + "epoch": 5.16204833984375e-06, + "model_forward_time": 0.025307893753051758, + "step": 3383 + }, + { + "epoch": 5.16204833984375e-06, + "step": 3383, + "training_step_time": 0.11243581771850586 + }, + { + "epoch": 5.16357421875e-06, + "model_forward_time": 0.025453567504882812, + "step": 3384 + }, + { + "epoch": 5.16357421875e-06, + "step": 3384, + "training_step_time": 0.11086130142211914 + }, + { + "epoch": 5.16510009765625e-06, + "model_forward_time": 0.025103330612182617, + "step": 3385 + }, + { + "epoch": 5.16510009765625e-06, + "step": 3385, + "training_step_time": 0.11155319213867188 + }, + { + "epoch": 5.1666259765625e-06, + "model_forward_time": 0.025478124618530273, + "step": 3386 + }, + { + "epoch": 5.1666259765625e-06, + "step": 3386, + "training_step_time": 0.10824084281921387 + }, + { + "epoch": 5.16815185546875e-06, + "model_forward_time": 0.025166749954223633, + "step": 3387 + }, + { + "epoch": 5.16815185546875e-06, + "step": 3387, + "training_step_time": 0.10843300819396973 + }, + { + "epoch": 5.169677734375e-06, + "model_forward_time": 0.025154590606689453, + "step": 3388 + }, + { + "epoch": 5.169677734375e-06, + "step": 3388, + "training_step_time": 0.1085653305053711 + }, + { + "epoch": 5.17120361328125e-06, + "model_forward_time": 0.025290966033935547, + "step": 3389 + }, + { + "epoch": 5.17120361328125e-06, + "step": 3389, + "training_step_time": 0.10765552520751953 + }, + { + "epoch": 5.1727294921875e-06, + "grad_norm": 0.667768657207489, + "learning_rate": 9.891880951101407e-05, + "loss": 0.1124, + "step": 3390 + }, + { + "epoch": 5.1727294921875e-06, + "model_forward_time": 0.02593064308166504, + "step": 3390 + }, + { + "epoch": 5.1727294921875e-06, + "step": 3390, + "training_step_time": 0.10717606544494629 + }, + { + "epoch": 5.17425537109375e-06, + "model_forward_time": 0.025372743606567383, + "step": 3391 + }, + { + "epoch": 5.17425537109375e-06, + "step": 3391, + "training_step_time": 0.10731053352355957 + }, + { + "epoch": 5.17578125e-06, + "model_forward_time": 0.027755260467529297, + "step": 3392 + }, + { + "epoch": 5.17578125e-06, + "step": 3392, + "training_step_time": 0.1120603084564209 + }, + { + "epoch": 5.17730712890625e-06, + "model_forward_time": 0.025792837142944336, + "step": 3393 + }, + { + "epoch": 5.17730712890625e-06, + "step": 3393, + "training_step_time": 0.10758590698242188 + }, + { + "epoch": 5.1788330078125e-06, + "model_forward_time": 0.0253143310546875, + "step": 3394 + }, + { + "epoch": 5.1788330078125e-06, + "step": 3394, + "training_step_time": 0.11064529418945312 + }, + { + "epoch": 5.18035888671875e-06, + "model_forward_time": 0.02527761459350586, + "step": 3395 + }, + { + "epoch": 5.18035888671875e-06, + "step": 3395, + "training_step_time": 0.10834145545959473 + }, + { + "epoch": 5.181884765625e-06, + "model_forward_time": 0.025114774703979492, + "step": 3396 + }, + { + "epoch": 5.181884765625e-06, + "step": 3396, + "training_step_time": 0.11182904243469238 + }, + { + "epoch": 5.18341064453125e-06, + "model_forward_time": 0.025284528732299805, + "step": 3397 + }, + { + "epoch": 5.18341064453125e-06, + "step": 3397, + "training_step_time": 0.10790872573852539 + }, + { + "epoch": 5.1849365234375e-06, + "model_forward_time": 0.025470495223999023, + "step": 3398 + }, + { + "epoch": 5.1849365234375e-06, + "step": 3398, + "training_step_time": 0.10769772529602051 + }, + { + "epoch": 5.18646240234375e-06, + "model_forward_time": 0.0251772403717041, + "step": 3399 + }, + { + "epoch": 5.18646240234375e-06, + "step": 3399, + "training_step_time": 0.11132264137268066 + }, + { + "epoch": 5.18798828125e-06, + "grad_norm": 0.4903802275657654, + "learning_rate": 9.890738003669029e-05, + "loss": 0.1303, + "step": 3400 + }, + { + "epoch": 5.18798828125e-06, + "model_forward_time": 0.02441859245300293, + "step": 3400 + }, + { + "epoch": 5.18798828125e-06, + "step": 3400, + "training_step_time": 0.15500235557556152 + }, + { + "epoch": 5.18951416015625e-06, + "model_forward_time": 0.02473282814025879, + "step": 3401 + }, + { + "epoch": 5.18951416015625e-06, + "step": 3401, + "training_step_time": 0.14902639389038086 + }, + { + "epoch": 5.1910400390625e-06, + "model_forward_time": 0.02466416358947754, + "step": 3402 + }, + { + "epoch": 5.1910400390625e-06, + "step": 3402, + "training_step_time": 0.11003684997558594 + }, + { + "epoch": 5.19256591796875e-06, + "model_forward_time": 0.024868011474609375, + "step": 3403 + }, + { + "epoch": 5.19256591796875e-06, + "step": 3403, + "training_step_time": 0.15742945671081543 + }, + { + "epoch": 5.194091796875e-06, + "model_forward_time": 0.024973630905151367, + "step": 3404 + }, + { + "epoch": 5.194091796875e-06, + "step": 3404, + "training_step_time": 0.21064186096191406 + }, + { + "epoch": 5.19561767578125e-06, + "model_forward_time": 0.0255889892578125, + "step": 3405 + }, + { + "epoch": 5.19561767578125e-06, + "step": 3405, + "training_step_time": 0.19760489463806152 + }, + { + "epoch": 5.1971435546875e-06, + "model_forward_time": 0.024436473846435547, + "step": 3406 + }, + { + "epoch": 5.1971435546875e-06, + "step": 3406, + "training_step_time": 0.13222670555114746 + }, + { + "epoch": 5.19866943359375e-06, + "model_forward_time": 0.02452397346496582, + "step": 3407 + }, + { + "epoch": 5.19866943359375e-06, + "step": 3407, + "training_step_time": 0.19873738288879395 + }, + { + "epoch": 5.2001953125e-06, + "model_forward_time": 0.02526688575744629, + "step": 3408 + }, + { + "epoch": 5.2001953125e-06, + "step": 3408, + "training_step_time": 0.11535024642944336 + }, + { + "epoch": 5.20172119140625e-06, + "model_forward_time": 0.024330854415893555, + "step": 3409 + }, + { + "epoch": 5.20172119140625e-06, + "step": 3409, + "training_step_time": 0.19217872619628906 + }, + { + "epoch": 5.2032470703125e-06, + "grad_norm": 0.5298376083374023, + "learning_rate": 9.889589113528809e-05, + "loss": 0.1192, + "step": 3410 + }, + { + "epoch": 5.2032470703125e-06, + "model_forward_time": 0.024539947509765625, + "step": 3410 + }, + { + "epoch": 5.2032470703125e-06, + "step": 3410, + "training_step_time": 0.10487961769104004 + }, + { + "epoch": 5.20477294921875e-06, + "model_forward_time": 0.0246279239654541, + "step": 3411 + }, + { + "epoch": 5.20477294921875e-06, + "step": 3411, + "training_step_time": 0.10378456115722656 + }, + { + "epoch": 5.206298828125e-06, + "model_forward_time": 0.02520275115966797, + "step": 3412 + }, + { + "epoch": 5.206298828125e-06, + "step": 3412, + "training_step_time": 0.10572314262390137 + }, + { + "epoch": 5.20782470703125e-06, + "model_forward_time": 0.026127338409423828, + "step": 3413 + }, + { + "epoch": 5.20782470703125e-06, + "step": 3413, + "training_step_time": 0.10959005355834961 + }, + { + "epoch": 5.2093505859375e-06, + "model_forward_time": 0.025098800659179688, + "step": 3414 + }, + { + "epoch": 5.2093505859375e-06, + "step": 3414, + "training_step_time": 0.17122721672058105 + }, + { + "epoch": 5.21087646484375e-06, + "model_forward_time": 0.02494668960571289, + "step": 3415 + }, + { + "epoch": 5.21087646484375e-06, + "step": 3415, + "training_step_time": 0.16518902778625488 + }, + { + "epoch": 5.21240234375e-06, + "model_forward_time": 0.024560928344726562, + "step": 3416 + }, + { + "epoch": 5.21240234375e-06, + "step": 3416, + "training_step_time": 0.10960793495178223 + }, + { + "epoch": 5.21392822265625e-06, + "model_forward_time": 0.025112628936767578, + "step": 3417 + }, + { + "epoch": 5.21392822265625e-06, + "step": 3417, + "training_step_time": 0.21888518333435059 + }, + { + "epoch": 5.2154541015625e-06, + "model_forward_time": 0.024595975875854492, + "step": 3418 + }, + { + "epoch": 5.2154541015625e-06, + "step": 3418, + "training_step_time": 0.10899710655212402 + }, + { + "epoch": 5.21697998046875e-06, + "model_forward_time": 0.02467513084411621, + "step": 3419 + }, + { + "epoch": 5.21697998046875e-06, + "step": 3419, + "training_step_time": 0.11002564430236816 + }, + { + "epoch": 5.218505859375e-06, + "grad_norm": 0.7561694979667664, + "learning_rate": 9.888434282076758e-05, + "loss": 0.1066, + "step": 3420 + }, + { + "epoch": 5.218505859375e-06, + "model_forward_time": 0.02538609504699707, + "step": 3420 + }, + { + "epoch": 5.218505859375e-06, + "step": 3420, + "training_step_time": 0.10863089561462402 + }, + { + "epoch": 5.22003173828125e-06, + "model_forward_time": 0.02506852149963379, + "step": 3421 + }, + { + "epoch": 5.22003173828125e-06, + "step": 3421, + "training_step_time": 0.1073920726776123 + }, + { + "epoch": 5.2215576171875e-06, + "model_forward_time": 0.025315046310424805, + "step": 3422 + }, + { + "epoch": 5.2215576171875e-06, + "step": 3422, + "training_step_time": 0.10891890525817871 + }, + { + "epoch": 5.22308349609375e-06, + "model_forward_time": 0.0248110294342041, + "step": 3423 + }, + { + "epoch": 5.22308349609375e-06, + "step": 3423, + "training_step_time": 0.10869002342224121 + }, + { + "epoch": 5.224609375e-06, + "model_forward_time": 0.025775432586669922, + "step": 3424 + }, + { + "epoch": 5.224609375e-06, + "step": 3424, + "training_step_time": 0.11115384101867676 + }, + { + "epoch": 5.22613525390625e-06, + "model_forward_time": 0.024389028549194336, + "step": 3425 + }, + { + "epoch": 5.22613525390625e-06, + "step": 3425, + "training_step_time": 0.11342382431030273 + }, + { + "epoch": 5.2276611328125e-06, + "model_forward_time": 0.02429056167602539, + "step": 3426 + }, + { + "epoch": 5.2276611328125e-06, + "step": 3426, + "training_step_time": 0.1109933853149414 + }, + { + "epoch": 5.22918701171875e-06, + "model_forward_time": 0.02416706085205078, + "step": 3427 + }, + { + "epoch": 5.22918701171875e-06, + "step": 3427, + "training_step_time": 0.10983943939208984 + }, + { + "epoch": 5.230712890625e-06, + "model_forward_time": 0.025521278381347656, + "step": 3428 + }, + { + "epoch": 5.230712890625e-06, + "step": 3428, + "training_step_time": 0.11211776733398438 + }, + { + "epoch": 5.23223876953125e-06, + "model_forward_time": 0.025494098663330078, + "step": 3429 + }, + { + "epoch": 5.23223876953125e-06, + "step": 3429, + "training_step_time": 0.11052846908569336 + }, + { + "epoch": 5.2337646484375e-06, + "grad_norm": 0.6977363228797913, + "learning_rate": 9.887273510716107e-05, + "loss": 0.1366, + "step": 3430 + }, + { + "epoch": 5.2337646484375e-06, + "model_forward_time": 0.025081157684326172, + "step": 3430 + }, + { + "epoch": 5.2337646484375e-06, + "step": 3430, + "training_step_time": 0.11130237579345703 + }, + { + "epoch": 5.23529052734375e-06, + "model_forward_time": 0.025034427642822266, + "step": 3431 + }, + { + "epoch": 5.23529052734375e-06, + "step": 3431, + "training_step_time": 0.1090855598449707 + }, + { + "epoch": 5.23681640625e-06, + "model_forward_time": 0.025266647338867188, + "step": 3432 + }, + { + "epoch": 5.23681640625e-06, + "step": 3432, + "training_step_time": 0.11012387275695801 + }, + { + "epoch": 5.23834228515625e-06, + "model_forward_time": 0.025238990783691406, + "step": 3433 + }, + { + "epoch": 5.23834228515625e-06, + "step": 3433, + "training_step_time": 0.11205697059631348 + }, + { + "epoch": 5.2398681640625e-06, + "model_forward_time": 0.02556133270263672, + "step": 3434 + }, + { + "epoch": 5.2398681640625e-06, + "step": 3434, + "training_step_time": 0.1110084056854248 + }, + { + "epoch": 5.24139404296875e-06, + "model_forward_time": 0.025668859481811523, + "step": 3435 + }, + { + "epoch": 5.24139404296875e-06, + "step": 3435, + "training_step_time": 0.10806846618652344 + }, + { + "epoch": 5.242919921875e-06, + "model_forward_time": 0.02575206756591797, + "step": 3436 + }, + { + "epoch": 5.242919921875e-06, + "step": 3436, + "training_step_time": 0.10979771614074707 + }, + { + "epoch": 5.24444580078125e-06, + "model_forward_time": 0.025369644165039062, + "step": 3437 + }, + { + "epoch": 5.24444580078125e-06, + "step": 3437, + "training_step_time": 0.11091947555541992 + }, + { + "epoch": 5.2459716796875e-06, + "model_forward_time": 0.025528669357299805, + "step": 3438 + }, + { + "epoch": 5.2459716796875e-06, + "step": 3438, + "training_step_time": 0.10814285278320312 + }, + { + "epoch": 5.24749755859375e-06, + "model_forward_time": 0.02489304542541504, + "step": 3439 + }, + { + "epoch": 5.24749755859375e-06, + "step": 3439, + "training_step_time": 0.10756397247314453 + }, + { + "epoch": 5.2490234375e-06, + "grad_norm": 0.8019386529922485, + "learning_rate": 9.886106800857298e-05, + "loss": 0.1324, + "step": 3440 + }, + { + "epoch": 5.2490234375e-06, + "model_forward_time": 0.025530338287353516, + "step": 3440 + }, + { + "epoch": 5.2490234375e-06, + "step": 3440, + "training_step_time": 0.10972380638122559 + }, + { + "epoch": 5.25054931640625e-06, + "model_forward_time": 0.025622844696044922, + "step": 3441 + }, + { + "epoch": 5.25054931640625e-06, + "step": 3441, + "training_step_time": 0.11240863800048828 + }, + { + "epoch": 5.2520751953125e-06, + "model_forward_time": 0.025661468505859375, + "step": 3442 + }, + { + "epoch": 5.2520751953125e-06, + "step": 3442, + "training_step_time": 0.10773324966430664 + }, + { + "epoch": 5.25360107421875e-06, + "model_forward_time": 0.025330781936645508, + "step": 3443 + }, + { + "epoch": 5.25360107421875e-06, + "step": 3443, + "training_step_time": 0.10838794708251953 + }, + { + "epoch": 5.255126953125e-06, + "model_forward_time": 0.02551722526550293, + "step": 3444 + }, + { + "epoch": 5.255126953125e-06, + "step": 3444, + "training_step_time": 0.20933842658996582 + }, + { + "epoch": 5.25665283203125e-06, + "model_forward_time": 0.02483987808227539, + "step": 3445 + }, + { + "epoch": 5.25665283203125e-06, + "step": 3445, + "training_step_time": 0.18695831298828125 + }, + { + "epoch": 5.2581787109375e-06, + "model_forward_time": 0.024812936782836914, + "step": 3446 + }, + { + "epoch": 5.2581787109375e-06, + "step": 3446, + "training_step_time": 0.15917062759399414 + }, + { + "epoch": 5.25970458984375e-06, + "model_forward_time": 0.025088071823120117, + "step": 3447 + }, + { + "epoch": 5.25970458984375e-06, + "step": 3447, + "training_step_time": 0.19579410552978516 + }, + { + "epoch": 5.26123046875e-06, + "model_forward_time": 0.02516031265258789, + "step": 3448 + }, + { + "epoch": 5.26123046875e-06, + "step": 3448, + "training_step_time": 0.1849205493927002 + }, + { + "epoch": 5.26275634765625e-06, + "model_forward_time": 0.024431705474853516, + "step": 3449 + }, + { + "epoch": 5.26275634765625e-06, + "step": 3449, + "training_step_time": 0.17187905311584473 + }, + { + "epoch": 5.2642822265625e-06, + "grad_norm": 0.5509032607078552, + "learning_rate": 9.884934153917997e-05, + "loss": 0.1291, + "step": 3450 + }, + { + "epoch": 5.2642822265625e-06, + "model_forward_time": 0.02470564842224121, + "step": 3450 + }, + { + "epoch": 5.2642822265625e-06, + "step": 3450, + "training_step_time": 0.18787860870361328 + }, + { + "epoch": 5.26580810546875e-06, + "model_forward_time": 0.024738788604736328, + "step": 3451 + }, + { + "epoch": 5.26580810546875e-06, + "step": 3451, + "training_step_time": 0.11341667175292969 + }, + { + "epoch": 5.267333984375e-06, + "model_forward_time": 0.02483677864074707, + "step": 3452 + }, + { + "epoch": 5.267333984375e-06, + "step": 3452, + "training_step_time": 0.10983419418334961 + }, + { + "epoch": 5.26885986328125e-06, + "model_forward_time": 0.02579498291015625, + "step": 3453 + }, + { + "epoch": 5.26885986328125e-06, + "step": 3453, + "training_step_time": 0.1969153881072998 + }, + { + "epoch": 5.2703857421875e-06, + "model_forward_time": 0.02483391761779785, + "step": 3454 + }, + { + "epoch": 5.2703857421875e-06, + "step": 3454, + "training_step_time": 0.11024641990661621 + }, + { + "epoch": 5.27191162109375e-06, + "model_forward_time": 0.024932861328125, + "step": 3455 + }, + { + "epoch": 5.27191162109375e-06, + "step": 3455, + "training_step_time": 0.10762500762939453 + }, + { + "epoch": 5.2734375e-06, + "model_forward_time": 0.02577996253967285, + "step": 3456 + }, + { + "epoch": 5.2734375e-06, + "step": 3456, + "training_step_time": 0.1114046573638916 + }, + { + "epoch": 5.27496337890625e-06, + "model_forward_time": 0.026149511337280273, + "step": 3457 + }, + { + "epoch": 5.27496337890625e-06, + "step": 3457, + "training_step_time": 0.10918879508972168 + }, + { + "epoch": 5.2764892578125e-06, + "model_forward_time": 0.02549576759338379, + "step": 3458 + }, + { + "epoch": 5.2764892578125e-06, + "step": 3458, + "training_step_time": 0.16745948791503906 + }, + { + "epoch": 5.27801513671875e-06, + "model_forward_time": 0.024671554565429688, + "step": 3459 + }, + { + "epoch": 5.27801513671875e-06, + "step": 3459, + "training_step_time": 0.16326236724853516 + }, + { + "epoch": 5.279541015625e-06, + "grad_norm": 0.6027151346206665, + "learning_rate": 9.88375557132308e-05, + "loss": 0.1113, + "step": 3460 + }, + { + "epoch": 5.279541015625e-06, + "model_forward_time": 0.024532079696655273, + "step": 3460 + }, + { + "epoch": 5.279541015625e-06, + "step": 3460, + "training_step_time": 0.10580945014953613 + }, + { + "epoch": 5.28106689453125e-06, + "model_forward_time": 0.024826526641845703, + "step": 3461 + }, + { + "epoch": 5.28106689453125e-06, + "step": 3461, + "training_step_time": 0.17293524742126465 + }, + { + "epoch": 5.2825927734375e-06, + "model_forward_time": 0.02475595474243164, + "step": 3462 + }, + { + "epoch": 5.2825927734375e-06, + "step": 3462, + "training_step_time": 0.17767882347106934 + }, + { + "epoch": 5.28411865234375e-06, + "model_forward_time": 0.025221586227416992, + "step": 3463 + }, + { + "epoch": 5.28411865234375e-06, + "step": 3463, + "training_step_time": 0.10974311828613281 + }, + { + "epoch": 5.28564453125e-06, + "model_forward_time": 0.025029420852661133, + "step": 3464 + }, + { + "epoch": 5.28564453125e-06, + "step": 3464, + "training_step_time": 0.10975122451782227 + }, + { + "epoch": 5.28717041015625e-06, + "model_forward_time": 0.025022268295288086, + "step": 3465 + }, + { + "epoch": 5.28717041015625e-06, + "step": 3465, + "training_step_time": 0.10857844352722168 + }, + { + "epoch": 5.2886962890625e-06, + "model_forward_time": 0.025325536727905273, + "step": 3466 + }, + { + "epoch": 5.2886962890625e-06, + "step": 3466, + "training_step_time": 0.11226963996887207 + }, + { + "epoch": 5.29022216796875e-06, + "model_forward_time": 0.025382518768310547, + "step": 3467 + }, + { + "epoch": 5.29022216796875e-06, + "step": 3467, + "training_step_time": 0.11269545555114746 + }, + { + "epoch": 5.291748046875e-06, + "model_forward_time": 0.0255889892578125, + "step": 3468 + }, + { + "epoch": 5.291748046875e-06, + "step": 3468, + "training_step_time": 0.11371660232543945 + }, + { + "epoch": 5.29327392578125e-06, + "model_forward_time": 0.025280475616455078, + "step": 3469 + }, + { + "epoch": 5.29327392578125e-06, + "step": 3469, + "training_step_time": 0.1140754222869873 + }, + { + "epoch": 5.2947998046875e-06, + "grad_norm": 0.8043185472488403, + "learning_rate": 9.882571054504636e-05, + "loss": 0.1424, + "step": 3470 + }, + { + "epoch": 5.2947998046875e-06, + "model_forward_time": 0.025569438934326172, + "step": 3470 + }, + { + "epoch": 5.2947998046875e-06, + "step": 3470, + "training_step_time": 0.11008691787719727 + }, + { + "epoch": 5.29632568359375e-06, + "model_forward_time": 0.0256044864654541, + "step": 3471 + }, + { + "epoch": 5.29632568359375e-06, + "step": 3471, + "training_step_time": 0.11122870445251465 + }, + { + "epoch": 5.2978515625e-06, + "model_forward_time": 0.02504444122314453, + "step": 3472 + }, + { + "epoch": 5.2978515625e-06, + "step": 3472, + "training_step_time": 0.11519360542297363 + }, + { + "epoch": 5.29937744140625e-06, + "model_forward_time": 0.025614261627197266, + "step": 3473 + }, + { + "epoch": 5.29937744140625e-06, + "step": 3473, + "training_step_time": 0.1095435619354248 + }, + { + "epoch": 5.3009033203125e-06, + "model_forward_time": 0.025815486907958984, + "step": 3474 + }, + { + "epoch": 5.3009033203125e-06, + "step": 3474, + "training_step_time": 0.1093592643737793 + }, + { + "epoch": 5.30242919921875e-06, + "model_forward_time": 0.024956464767456055, + "step": 3475 + }, + { + "epoch": 5.30242919921875e-06, + "step": 3475, + "training_step_time": 0.11590290069580078 + }, + { + "epoch": 5.303955078125e-06, + "model_forward_time": 0.025304317474365234, + "step": 3476 + }, + { + "epoch": 5.303955078125e-06, + "step": 3476, + "training_step_time": 0.11078190803527832 + }, + { + "epoch": 5.30548095703125e-06, + "model_forward_time": 0.025183677673339844, + "step": 3477 + }, + { + "epoch": 5.30548095703125e-06, + "step": 3477, + "training_step_time": 0.11088895797729492 + }, + { + "epoch": 5.3070068359375e-06, + "model_forward_time": 0.02527308464050293, + "step": 3478 + }, + { + "epoch": 5.3070068359375e-06, + "step": 3478, + "training_step_time": 0.11271524429321289 + }, + { + "epoch": 5.30853271484375e-06, + "model_forward_time": 0.025701522827148438, + "step": 3479 + }, + { + "epoch": 5.30853271484375e-06, + "step": 3479, + "training_step_time": 0.11104512214660645 + }, + { + "epoch": 5.31005859375e-06, + "grad_norm": 0.6473183035850525, + "learning_rate": 9.881380604901964e-05, + "loss": 0.1129, + "step": 3480 + }, + { + "epoch": 5.31005859375e-06, + "model_forward_time": 0.025483369827270508, + "step": 3480 + }, + { + "epoch": 5.31005859375e-06, + "step": 3480, + "training_step_time": 0.10886645317077637 + }, + { + "epoch": 5.31158447265625e-06, + "model_forward_time": 0.025574445724487305, + "step": 3481 + }, + { + "epoch": 5.31158447265625e-06, + "step": 3481, + "training_step_time": 0.11009550094604492 + }, + { + "epoch": 5.3131103515625e-06, + "model_forward_time": 0.02535390853881836, + "step": 3482 + }, + { + "epoch": 5.3131103515625e-06, + "step": 3482, + "training_step_time": 0.10774850845336914 + }, + { + "epoch": 5.31463623046875e-06, + "model_forward_time": 0.024837493896484375, + "step": 3483 + }, + { + "epoch": 5.31463623046875e-06, + "step": 3483, + "training_step_time": 0.11113739013671875 + }, + { + "epoch": 5.316162109375e-06, + "model_forward_time": 0.0252535343170166, + "step": 3484 + }, + { + "epoch": 5.316162109375e-06, + "step": 3484, + "training_step_time": 0.11003375053405762 + }, + { + "epoch": 5.31768798828125e-06, + "model_forward_time": 0.02564072608947754, + "step": 3485 + }, + { + "epoch": 5.31768798828125e-06, + "step": 3485, + "training_step_time": 0.10724639892578125 + }, + { + "epoch": 5.3192138671875e-06, + "model_forward_time": 0.025247573852539062, + "step": 3486 + }, + { + "epoch": 5.3192138671875e-06, + "step": 3486, + "training_step_time": 0.11706209182739258 + }, + { + "epoch": 5.32073974609375e-06, + "model_forward_time": 0.027644872665405273, + "step": 3487 + }, + { + "epoch": 5.32073974609375e-06, + "step": 3487, + "training_step_time": 0.11270380020141602 + }, + { + "epoch": 5.322265625e-06, + "model_forward_time": 0.025614023208618164, + "step": 3488 + }, + { + "epoch": 5.322265625e-06, + "step": 3488, + "training_step_time": 0.15154242515563965 + }, + { + "epoch": 5.32379150390625e-06, + "model_forward_time": 0.024924755096435547, + "step": 3489 + }, + { + "epoch": 5.32379150390625e-06, + "step": 3489, + "training_step_time": 0.20758414268493652 + }, + { + "epoch": 5.3253173828125e-06, + "grad_norm": 0.48119691014289856, + "learning_rate": 9.880184223961573e-05, + "loss": 0.1275, + "step": 3490 + }, + { + "epoch": 5.3253173828125e-06, + "model_forward_time": 0.025310754776000977, + "step": 3490 + }, + { + "epoch": 5.3253173828125e-06, + "step": 3490, + "training_step_time": 0.13411545753479004 + }, + { + "epoch": 5.32684326171875e-06, + "model_forward_time": 0.024502992630004883, + "step": 3491 + }, + { + "epoch": 5.32684326171875e-06, + "step": 3491, + "training_step_time": 0.20780038833618164 + }, + { + "epoch": 5.328369140625e-06, + "model_forward_time": 0.0245358943939209, + "step": 3492 + }, + { + "epoch": 5.328369140625e-06, + "step": 3492, + "training_step_time": 0.1842968463897705 + }, + { + "epoch": 5.32989501953125e-06, + "model_forward_time": 0.02517390251159668, + "step": 3493 + }, + { + "epoch": 5.32989501953125e-06, + "step": 3493, + "training_step_time": 0.17435359954833984 + }, + { + "epoch": 5.3314208984375e-06, + "model_forward_time": 0.024028778076171875, + "step": 3494 + }, + { + "epoch": 5.3314208984375e-06, + "step": 3494, + "training_step_time": 0.1887679100036621 + }, + { + "epoch": 5.33294677734375e-06, + "model_forward_time": 0.024264812469482422, + "step": 3495 + }, + { + "epoch": 5.33294677734375e-06, + "step": 3495, + "training_step_time": 0.11607527732849121 + }, + { + "epoch": 5.33447265625e-06, + "model_forward_time": 0.02495265007019043, + "step": 3496 + }, + { + "epoch": 5.33447265625e-06, + "step": 3496, + "training_step_time": 0.10937714576721191 + }, + { + "epoch": 5.33599853515625e-06, + "model_forward_time": 0.02533555030822754, + "step": 3497 + }, + { + "epoch": 5.33599853515625e-06, + "step": 3497, + "training_step_time": 0.19635248184204102 + }, + { + "epoch": 5.3375244140625e-06, + "model_forward_time": 0.024823665618896484, + "step": 3498 + }, + { + "epoch": 5.3375244140625e-06, + "step": 3498, + "training_step_time": 0.10367059707641602 + }, + { + "epoch": 5.33905029296875e-06, + "model_forward_time": 0.02450251579284668, + "step": 3499 + }, + { + "epoch": 5.33905029296875e-06, + "step": 3499, + "training_step_time": 0.11481595039367676 + }, + { + "epoch": 5.340576171875e-06, + "grad_norm": 0.6093948483467102, + "learning_rate": 9.878981913137179e-05, + "loss": 0.0988, + "step": 3500 + }, + { + "epoch": 5.340576171875e-06, + "model_forward_time": 0.024262666702270508, + "step": 3500 + }, + { + "epoch": 5.340576171875e-06, + "step": 3500, + "training_step_time": 0.10814404487609863 + }, + { + "epoch": 5.34210205078125e-06, + "model_forward_time": 0.025707244873046875, + "step": 3501 + }, + { + "epoch": 5.34210205078125e-06, + "step": 3501, + "training_step_time": 0.10902523994445801 + }, + { + "epoch": 5.3436279296875e-06, + "model_forward_time": 0.026643991470336914, + "step": 3502 + }, + { + "epoch": 5.3436279296875e-06, + "step": 3502, + "training_step_time": 0.11229538917541504 + }, + { + "epoch": 5.34515380859375e-06, + "model_forward_time": 0.025742530822753906, + "step": 3503 + }, + { + "epoch": 5.34515380859375e-06, + "step": 3503, + "training_step_time": 0.11125755310058594 + }, + { + "epoch": 5.3466796875e-06, + "model_forward_time": 0.025651216506958008, + "step": 3504 + }, + { + "epoch": 5.3466796875e-06, + "step": 3504, + "training_step_time": 0.12004995346069336 + }, + { + "epoch": 5.34820556640625e-06, + "model_forward_time": 0.02694225311279297, + "step": 3505 + }, + { + "epoch": 5.34820556640625e-06, + "step": 3505, + "training_step_time": 0.13196349143981934 + }, + { + "epoch": 5.3497314453125e-06, + "model_forward_time": 0.025164127349853516, + "step": 3506 + }, + { + "epoch": 5.3497314453125e-06, + "step": 3506, + "training_step_time": 0.18841218948364258 + }, + { + "epoch": 5.35125732421875e-06, + "model_forward_time": 0.024602890014648438, + "step": 3507 + }, + { + "epoch": 5.35125732421875e-06, + "step": 3507, + "training_step_time": 0.11376333236694336 + }, + { + "epoch": 5.352783203125e-06, + "model_forward_time": 0.024126291275024414, + "step": 3508 + }, + { + "epoch": 5.352783203125e-06, + "step": 3508, + "training_step_time": 0.11130547523498535 + }, + { + "epoch": 5.35430908203125e-06, + "model_forward_time": 0.025876998901367188, + "step": 3509 + }, + { + "epoch": 5.35430908203125e-06, + "step": 3509, + "training_step_time": 0.14697885513305664 + }, + { + "epoch": 5.3558349609375e-06, + "grad_norm": 0.5382101535797119, + "learning_rate": 9.877773673889701e-05, + "loss": 0.1, + "step": 3510 + }, + { + "epoch": 5.3558349609375e-06, + "model_forward_time": 0.02559971809387207, + "step": 3510 + }, + { + "epoch": 5.3558349609375e-06, + "step": 3510, + "training_step_time": 0.1353740692138672 + }, + { + "epoch": 5.35736083984375e-06, + "model_forward_time": 0.025170326232910156, + "step": 3511 + }, + { + "epoch": 5.35736083984375e-06, + "step": 3511, + "training_step_time": 0.1329174041748047 + }, + { + "epoch": 5.35888671875e-06, + "model_forward_time": 0.025002241134643555, + "step": 3512 + }, + { + "epoch": 5.35888671875e-06, + "step": 3512, + "training_step_time": 0.1267375946044922 + }, + { + "epoch": 5.36041259765625e-06, + "model_forward_time": 0.025343894958496094, + "step": 3513 + }, + { + "epoch": 5.36041259765625e-06, + "step": 3513, + "training_step_time": 0.1313920021057129 + }, + { + "epoch": 5.3619384765625e-06, + "model_forward_time": 0.02546834945678711, + "step": 3514 + }, + { + "epoch": 5.3619384765625e-06, + "step": 3514, + "training_step_time": 0.12313628196716309 + }, + { + "epoch": 5.36346435546875e-06, + "model_forward_time": 0.025356531143188477, + "step": 3515 + }, + { + "epoch": 5.36346435546875e-06, + "step": 3515, + "training_step_time": 0.11833810806274414 + }, + { + "epoch": 5.364990234375e-06, + "model_forward_time": 0.024939775466918945, + "step": 3516 + }, + { + "epoch": 5.364990234375e-06, + "step": 3516, + "training_step_time": 0.11993956565856934 + }, + { + "epoch": 5.36651611328125e-06, + "model_forward_time": 0.025254249572753906, + "step": 3517 + }, + { + "epoch": 5.36651611328125e-06, + "step": 3517, + "training_step_time": 0.1139516830444336 + }, + { + "epoch": 5.3680419921875e-06, + "model_forward_time": 0.025592565536499023, + "step": 3518 + }, + { + "epoch": 5.3680419921875e-06, + "step": 3518, + "training_step_time": 0.11178326606750488 + }, + { + "epoch": 5.36956787109375e-06, + "model_forward_time": 0.024964332580566406, + "step": 3519 + }, + { + "epoch": 5.36956787109375e-06, + "step": 3519, + "training_step_time": 0.11345934867858887 + }, + { + "epoch": 5.37109375e-06, + "grad_norm": 0.7704569101333618, + "learning_rate": 9.876559507687267e-05, + "loss": 0.1345, + "step": 3520 + }, + { + "epoch": 5.37109375e-06, + "model_forward_time": 0.024475574493408203, + "step": 3520 + }, + { + "epoch": 5.37109375e-06, + "step": 3520, + "training_step_time": 0.1101231575012207 + }, + { + "epoch": 5.37261962890625e-06, + "model_forward_time": 0.02429819107055664, + "step": 3521 + }, + { + "epoch": 5.37261962890625e-06, + "step": 3521, + "training_step_time": 0.11268067359924316 + }, + { + "epoch": 5.3741455078125e-06, + "model_forward_time": 0.02555990219116211, + "step": 3522 + }, + { + "epoch": 5.3741455078125e-06, + "step": 3522, + "training_step_time": 0.11799907684326172 + }, + { + "epoch": 5.37567138671875e-06, + "model_forward_time": 0.025043010711669922, + "step": 3523 + }, + { + "epoch": 5.37567138671875e-06, + "step": 3523, + "training_step_time": 0.11258649826049805 + }, + { + "epoch": 5.377197265625e-06, + "model_forward_time": 0.025302648544311523, + "step": 3524 + }, + { + "epoch": 5.377197265625e-06, + "step": 3524, + "training_step_time": 0.1079258918762207 + }, + { + "epoch": 5.37872314453125e-06, + "model_forward_time": 0.025361299514770508, + "step": 3525 + }, + { + "epoch": 5.37872314453125e-06, + "step": 3525, + "training_step_time": 0.11121892929077148 + }, + { + "epoch": 5.3802490234375e-06, + "model_forward_time": 0.0252227783203125, + "step": 3526 + }, + { + "epoch": 5.3802490234375e-06, + "step": 3526, + "training_step_time": 0.11141633987426758 + }, + { + "epoch": 5.38177490234375e-06, + "model_forward_time": 0.025115013122558594, + "step": 3527 + }, + { + "epoch": 5.38177490234375e-06, + "step": 3527, + "training_step_time": 0.10831260681152344 + }, + { + "epoch": 5.38330078125e-06, + "model_forward_time": 0.025998830795288086, + "step": 3528 + }, + { + "epoch": 5.38330078125e-06, + "step": 3528, + "training_step_time": 0.1098785400390625 + }, + { + "epoch": 5.38482666015625e-06, + "model_forward_time": 0.02529764175415039, + "step": 3529 + }, + { + "epoch": 5.38482666015625e-06, + "step": 3529, + "training_step_time": 0.10957956314086914 + }, + { + "epoch": 5.3863525390625e-06, + "grad_norm": 0.7169075608253479, + "learning_rate": 9.875339416005202e-05, + "loss": 0.1231, + "step": 3530 + }, + { + "epoch": 5.3863525390625e-06, + "model_forward_time": 0.027828216552734375, + "step": 3530 + }, + { + "epoch": 5.3863525390625e-06, + "step": 3530, + "training_step_time": 0.11369132995605469 + }, + { + "epoch": 5.38787841796875e-06, + "model_forward_time": 0.02522420883178711, + "step": 3531 + }, + { + "epoch": 5.38787841796875e-06, + "step": 3531, + "training_step_time": 0.11010885238647461 + }, + { + "epoch": 5.389404296875e-06, + "model_forward_time": 0.025552988052368164, + "step": 3532 + }, + { + "epoch": 5.389404296875e-06, + "step": 3532, + "training_step_time": 0.1084141731262207 + }, + { + "epoch": 5.39093017578125e-06, + "model_forward_time": 0.025548934936523438, + "step": 3533 + }, + { + "epoch": 5.39093017578125e-06, + "step": 3533, + "training_step_time": 0.17070627212524414 + }, + { + "epoch": 5.3924560546875e-06, + "model_forward_time": 0.024975061416625977, + "step": 3534 + }, + { + "epoch": 5.3924560546875e-06, + "step": 3534, + "training_step_time": 0.18741536140441895 + }, + { + "epoch": 5.39398193359375e-06, + "model_forward_time": 0.024165630340576172, + "step": 3535 + }, + { + "epoch": 5.39398193359375e-06, + "step": 3535, + "training_step_time": 0.15580201148986816 + }, + { + "epoch": 5.3955078125e-06, + "model_forward_time": 0.025069236755371094, + "step": 3536 + }, + { + "epoch": 5.3955078125e-06, + "step": 3536, + "training_step_time": 0.2123889923095703 + }, + { + "epoch": 5.39703369140625e-06, + "model_forward_time": 0.0242156982421875, + "step": 3537 + }, + { + "epoch": 5.39703369140625e-06, + "step": 3537, + "training_step_time": 0.1835479736328125 + }, + { + "epoch": 5.3985595703125e-06, + "model_forward_time": 0.02461862564086914, + "step": 3538 + }, + { + "epoch": 5.3985595703125e-06, + "step": 3538, + "training_step_time": 0.15091466903686523 + }, + { + "epoch": 5.40008544921875e-06, + "model_forward_time": 0.024627685546875, + "step": 3539 + }, + { + "epoch": 5.40008544921875e-06, + "step": 3539, + "training_step_time": 0.18666601181030273 + }, + { + "epoch": 5.401611328125e-06, + "grad_norm": 0.9182036519050598, + "learning_rate": 9.87411340032603e-05, + "loss": 0.1243, + "step": 3540 + }, + { + "epoch": 5.401611328125e-06, + "model_forward_time": 0.02465224266052246, + "step": 3540 + }, + { + "epoch": 5.401611328125e-06, + "step": 3540, + "training_step_time": 0.10913467407226562 + }, + { + "epoch": 5.40313720703125e-06, + "model_forward_time": 0.024746417999267578, + "step": 3541 + }, + { + "epoch": 5.40313720703125e-06, + "step": 3541, + "training_step_time": 0.19028735160827637 + }, + { + "epoch": 5.4046630859375e-06, + "model_forward_time": 0.024817943572998047, + "step": 3542 + }, + { + "epoch": 5.4046630859375e-06, + "step": 3542, + "training_step_time": 0.10945010185241699 + }, + { + "epoch": 5.40618896484375e-06, + "model_forward_time": 0.024428367614746094, + "step": 3543 + }, + { + "epoch": 5.40618896484375e-06, + "step": 3543, + "training_step_time": 0.10692286491394043 + }, + { + "epoch": 5.40771484375e-06, + "model_forward_time": 0.02686452865600586, + "step": 3544 + }, + { + "epoch": 5.40771484375e-06, + "step": 3544, + "training_step_time": 0.10947251319885254 + }, + { + "epoch": 5.40924072265625e-06, + "model_forward_time": 0.02622079849243164, + "step": 3545 + }, + { + "epoch": 5.40924072265625e-06, + "step": 3545, + "training_step_time": 0.1086728572845459 + }, + { + "epoch": 5.4107666015625e-06, + "model_forward_time": 0.026522159576416016, + "step": 3546 + }, + { + "epoch": 5.4107666015625e-06, + "step": 3546, + "training_step_time": 0.10749149322509766 + }, + { + "epoch": 5.41229248046875e-06, + "model_forward_time": 0.02649402618408203, + "step": 3547 + }, + { + "epoch": 5.41229248046875e-06, + "step": 3547, + "training_step_time": 0.10960555076599121 + }, + { + "epoch": 5.413818359375e-06, + "model_forward_time": 0.026070117950439453, + "step": 3548 + }, + { + "epoch": 5.413818359375e-06, + "step": 3548, + "training_step_time": 0.11558008193969727 + }, + { + "epoch": 5.41534423828125e-06, + "model_forward_time": 0.02567911148071289, + "step": 3549 + }, + { + "epoch": 5.41534423828125e-06, + "step": 3549, + "training_step_time": 0.1335005760192871 + }, + { + "epoch": 5.4168701171875e-06, + "grad_norm": 0.9292612075805664, + "learning_rate": 9.872881462139479e-05, + "loss": 0.1365, + "step": 3550 + }, + { + "epoch": 5.4168701171875e-06, + "model_forward_time": 0.025362730026245117, + "step": 3550 + }, + { + "epoch": 5.4168701171875e-06, + "step": 3550, + "training_step_time": 0.20851397514343262 + }, + { + "epoch": 5.41839599609375e-06, + "model_forward_time": 0.024652719497680664, + "step": 3551 + }, + { + "epoch": 5.41839599609375e-06, + "step": 3551, + "training_step_time": 0.14301204681396484 + }, + { + "epoch": 5.419921875e-06, + "model_forward_time": 0.024757862091064453, + "step": 3552 + }, + { + "epoch": 5.419921875e-06, + "step": 3552, + "training_step_time": 0.1331467628479004 + }, + { + "epoch": 5.42144775390625e-06, + "model_forward_time": 0.024792194366455078, + "step": 3553 + }, + { + "epoch": 5.42144775390625e-06, + "step": 3553, + "training_step_time": 0.1241154670715332 + }, + { + "epoch": 5.4229736328125e-06, + "model_forward_time": 0.02486109733581543, + "step": 3554 + }, + { + "epoch": 5.4229736328125e-06, + "step": 3554, + "training_step_time": 0.11758589744567871 + }, + { + "epoch": 5.42449951171875e-06, + "model_forward_time": 0.025557518005371094, + "step": 3555 + }, + { + "epoch": 5.42449951171875e-06, + "step": 3555, + "training_step_time": 0.12010407447814941 + }, + { + "epoch": 5.426025390625e-06, + "model_forward_time": 0.025249004364013672, + "step": 3556 + }, + { + "epoch": 5.426025390625e-06, + "step": 3556, + "training_step_time": 0.11636829376220703 + }, + { + "epoch": 5.42755126953125e-06, + "model_forward_time": 0.025246143341064453, + "step": 3557 + }, + { + "epoch": 5.42755126953125e-06, + "step": 3557, + "training_step_time": 0.1151571273803711 + }, + { + "epoch": 5.4290771484375e-06, + "model_forward_time": 0.025602340698242188, + "step": 3558 + }, + { + "epoch": 5.4290771484375e-06, + "step": 3558, + "training_step_time": 0.11262130737304688 + }, + { + "epoch": 5.43060302734375e-06, + "model_forward_time": 0.02532672882080078, + "step": 3559 + }, + { + "epoch": 5.43060302734375e-06, + "step": 3559, + "training_step_time": 0.11426997184753418 + }, + { + "epoch": 5.43212890625e-06, + "grad_norm": 0.7721486687660217, + "learning_rate": 9.871643602942469e-05, + "loss": 0.0937, + "step": 3560 + }, + { + "epoch": 5.43212890625e-06, + "model_forward_time": 0.025256633758544922, + "step": 3560 + }, + { + "epoch": 5.43212890625e-06, + "step": 3560, + "training_step_time": 0.10849881172180176 + }, + { + "epoch": 5.43365478515625e-06, + "model_forward_time": 0.025162935256958008, + "step": 3561 + }, + { + "epoch": 5.43365478515625e-06, + "step": 3561, + "training_step_time": 0.1158592700958252 + }, + { + "epoch": 5.4351806640625e-06, + "model_forward_time": 0.02406764030456543, + "step": 3562 + }, + { + "epoch": 5.4351806640625e-06, + "step": 3562, + "training_step_time": 0.11043691635131836 + }, + { + "epoch": 5.43670654296875e-06, + "model_forward_time": 0.025604724884033203, + "step": 3563 + }, + { + "epoch": 5.43670654296875e-06, + "step": 3563, + "training_step_time": 0.11426353454589844 + }, + { + "epoch": 5.438232421875e-06, + "model_forward_time": 0.02533245086669922, + "step": 3564 + }, + { + "epoch": 5.438232421875e-06, + "step": 3564, + "training_step_time": 0.11117672920227051 + }, + { + "epoch": 5.43975830078125e-06, + "model_forward_time": 0.025414466857910156, + "step": 3565 + }, + { + "epoch": 5.43975830078125e-06, + "step": 3565, + "training_step_time": 0.11493635177612305 + }, + { + "epoch": 5.4412841796875e-06, + "model_forward_time": 0.02533102035522461, + "step": 3566 + }, + { + "epoch": 5.4412841796875e-06, + "step": 3566, + "training_step_time": 0.10959219932556152 + }, + { + "epoch": 5.44281005859375e-06, + "model_forward_time": 0.025224924087524414, + "step": 3567 + }, + { + "epoch": 5.44281005859375e-06, + "step": 3567, + "training_step_time": 0.10678339004516602 + }, + { + "epoch": 5.4443359375e-06, + "model_forward_time": 0.025284290313720703, + "step": 3568 + }, + { + "epoch": 5.4443359375e-06, + "step": 3568, + "training_step_time": 0.10858750343322754 + }, + { + "epoch": 5.44586181640625e-06, + "model_forward_time": 0.024285554885864258, + "step": 3569 + }, + { + "epoch": 5.44586181640625e-06, + "step": 3569, + "training_step_time": 0.11386609077453613 + }, + { + "epoch": 5.4473876953125e-06, + "grad_norm": 0.5761857628822327, + "learning_rate": 9.870399824239117e-05, + "loss": 0.1072, + "step": 3570 + }, + { + "epoch": 5.4473876953125e-06, + "model_forward_time": 0.025664806365966797, + "step": 3570 + }, + { + "epoch": 5.4473876953125e-06, + "step": 3570, + "training_step_time": 0.10714960098266602 + }, + { + "epoch": 5.44891357421875e-06, + "model_forward_time": 0.024098634719848633, + "step": 3571 + }, + { + "epoch": 5.44891357421875e-06, + "step": 3571, + "training_step_time": 0.11076879501342773 + }, + { + "epoch": 5.450439453125e-06, + "model_forward_time": 0.02530503273010254, + "step": 3572 + }, + { + "epoch": 5.450439453125e-06, + "step": 3572, + "training_step_time": 0.10745954513549805 + }, + { + "epoch": 5.45196533203125e-06, + "model_forward_time": 0.02584552764892578, + "step": 3573 + }, + { + "epoch": 5.45196533203125e-06, + "step": 3573, + "training_step_time": 0.10828232765197754 + }, + { + "epoch": 5.4534912109375e-06, + "model_forward_time": 0.025543212890625, + "step": 3574 + }, + { + "epoch": 5.4534912109375e-06, + "step": 3574, + "training_step_time": 0.11190414428710938 + }, + { + "epoch": 5.45501708984375e-06, + "model_forward_time": 0.025513410568237305, + "step": 3575 + }, + { + "epoch": 5.45501708984375e-06, + "step": 3575, + "training_step_time": 0.10922098159790039 + }, + { + "epoch": 5.45654296875e-06, + "model_forward_time": 0.02511143684387207, + "step": 3576 + }, + { + "epoch": 5.45654296875e-06, + "step": 3576, + "training_step_time": 0.10804390907287598 + }, + { + "epoch": 5.45806884765625e-06, + "model_forward_time": 0.02501535415649414, + "step": 3577 + }, + { + "epoch": 5.45806884765625e-06, + "step": 3577, + "training_step_time": 0.1902482509613037 + }, + { + "epoch": 5.4595947265625e-06, + "model_forward_time": 0.02481818199157715, + "step": 3578 + }, + { + "epoch": 5.4595947265625e-06, + "step": 3578, + "training_step_time": 0.12123632431030273 + }, + { + "epoch": 5.46112060546875e-06, + "model_forward_time": 0.025467395782470703, + "step": 3579 + }, + { + "epoch": 5.46112060546875e-06, + "step": 3579, + "training_step_time": 0.11988615989685059 + }, + { + "epoch": 5.462646484375e-06, + "grad_norm": 0.8048961162567139, + "learning_rate": 9.869150127540727e-05, + "loss": 0.1586, + "step": 3580 + }, + { + "epoch": 5.462646484375e-06, + "model_forward_time": 0.026096820831298828, + "step": 3580 + }, + { + "epoch": 5.462646484375e-06, + "step": 3580, + "training_step_time": 0.2304058074951172 + }, + { + "epoch": 5.46417236328125e-06, + "model_forward_time": 0.024451017379760742, + "step": 3581 + }, + { + "epoch": 5.46417236328125e-06, + "step": 3581, + "training_step_time": 0.15493535995483398 + }, + { + "epoch": 5.4656982421875e-06, + "model_forward_time": 0.024720430374145508, + "step": 3582 + }, + { + "epoch": 5.4656982421875e-06, + "step": 3582, + "training_step_time": 0.20496821403503418 + }, + { + "epoch": 5.46722412109375e-06, + "model_forward_time": 0.024452924728393555, + "step": 3583 + }, + { + "epoch": 5.46722412109375e-06, + "step": 3583, + "training_step_time": 0.1382906436920166 + }, + { + "epoch": 5.46875e-06, + "model_forward_time": 0.024465084075927734, + "step": 3584 + }, + { + "epoch": 5.46875e-06, + "step": 3584, + "training_step_time": 0.12224531173706055 + }, + { + "epoch": 5.47027587890625e-06, + "model_forward_time": 0.024843215942382812, + "step": 3585 + }, + { + "epoch": 5.47027587890625e-06, + "step": 3585, + "training_step_time": 0.10573434829711914 + }, + { + "epoch": 5.4718017578125e-06, + "model_forward_time": 0.02535557746887207, + "step": 3586 + }, + { + "epoch": 5.4718017578125e-06, + "step": 3586, + "training_step_time": 0.20642828941345215 + }, + { + "epoch": 5.47332763671875e-06, + "model_forward_time": 0.024872541427612305, + "step": 3587 + }, + { + "epoch": 5.47332763671875e-06, + "step": 3587, + "training_step_time": 0.10535335540771484 + }, + { + "epoch": 5.474853515625e-06, + "model_forward_time": 0.024709701538085938, + "step": 3588 + }, + { + "epoch": 5.474853515625e-06, + "step": 3588, + "training_step_time": 0.10942673683166504 + }, + { + "epoch": 5.47637939453125e-06, + "model_forward_time": 0.025477886199951172, + "step": 3589 + }, + { + "epoch": 5.47637939453125e-06, + "step": 3589, + "training_step_time": 0.10938739776611328 + }, + { + "epoch": 5.4779052734375e-06, + "grad_norm": 0.9959975481033325, + "learning_rate": 9.867894514365802e-05, + "loss": 0.1271, + "step": 3590 + }, + { + "epoch": 5.4779052734375e-06, + "model_forward_time": 0.02608323097229004, + "step": 3590 + }, + { + "epoch": 5.4779052734375e-06, + "step": 3590, + "training_step_time": 0.11232542991638184 + }, + { + "epoch": 5.47943115234375e-06, + "model_forward_time": 0.025028467178344727, + "step": 3591 + }, + { + "epoch": 5.47943115234375e-06, + "step": 3591, + "training_step_time": 0.20829033851623535 + }, + { + "epoch": 5.48095703125e-06, + "model_forward_time": 0.025417566299438477, + "step": 3592 + }, + { + "epoch": 5.48095703125e-06, + "step": 3592, + "training_step_time": 0.13596153259277344 + }, + { + "epoch": 5.48248291015625e-06, + "model_forward_time": 0.02444148063659668, + "step": 3593 + }, + { + "epoch": 5.48248291015625e-06, + "step": 3593, + "training_step_time": 0.14188671112060547 + }, + { + "epoch": 5.4840087890625e-06, + "model_forward_time": 0.025122642517089844, + "step": 3594 + }, + { + "epoch": 5.4840087890625e-06, + "step": 3594, + "training_step_time": 0.18038082122802734 + }, + { + "epoch": 5.48553466796875e-06, + "model_forward_time": 0.026125192642211914, + "step": 3595 + }, + { + "epoch": 5.48553466796875e-06, + "step": 3595, + "training_step_time": 0.12324070930480957 + }, + { + "epoch": 5.487060546875e-06, + "model_forward_time": 0.026318788528442383, + "step": 3596 + }, + { + "epoch": 5.487060546875e-06, + "step": 3596, + "training_step_time": 0.1178884506225586 + }, + { + "epoch": 5.48858642578125e-06, + "model_forward_time": 0.025536537170410156, + "step": 3597 + }, + { + "epoch": 5.48858642578125e-06, + "step": 3597, + "training_step_time": 0.11551213264465332 + }, + { + "epoch": 5.4901123046875e-06, + "model_forward_time": 0.02524590492248535, + "step": 3598 + }, + { + "epoch": 5.4901123046875e-06, + "step": 3598, + "training_step_time": 0.11681175231933594 + }, + { + "epoch": 5.49163818359375e-06, + "model_forward_time": 0.025385379791259766, + "step": 3599 + }, + { + "epoch": 5.49163818359375e-06, + "step": 3599, + "training_step_time": 0.11041831970214844 + }, + { + "epoch": 5.4931640625e-06, + "grad_norm": 0.5958221554756165, + "learning_rate": 9.86663298624003e-05, + "loss": 0.127, + "step": 3600 + }, + { + "epoch": 5.4931640625e-06, + "model_forward_time": 0.02586841583251953, + "step": 3600 + }, + { + "epoch": 5.4931640625e-06, + "step": 3600, + "training_step_time": 0.11036968231201172 + }, + { + "epoch": 5.49468994140625e-06, + "model_forward_time": 0.025845050811767578, + "step": 3601 + }, + { + "epoch": 5.49468994140625e-06, + "step": 3601, + "training_step_time": 0.11132287979125977 + }, + { + "epoch": 5.4962158203125e-06, + "model_forward_time": 0.025480031967163086, + "step": 3602 + }, + { + "epoch": 5.4962158203125e-06, + "step": 3602, + "training_step_time": 0.10953688621520996 + }, + { + "epoch": 5.49774169921875e-06, + "model_forward_time": 0.0252835750579834, + "step": 3603 + }, + { + "epoch": 5.49774169921875e-06, + "step": 3603, + "training_step_time": 0.10858917236328125 + }, + { + "epoch": 5.499267578125e-06, + "model_forward_time": 0.02533864974975586, + "step": 3604 + }, + { + "epoch": 5.499267578125e-06, + "step": 3604, + "training_step_time": 0.11181068420410156 + }, + { + "epoch": 5.50079345703125e-06, + "model_forward_time": 0.02553248405456543, + "step": 3605 + }, + { + "epoch": 5.50079345703125e-06, + "step": 3605, + "training_step_time": 0.10823225975036621 + }, + { + "epoch": 5.5023193359375e-06, + "model_forward_time": 0.0254366397857666, + "step": 3606 + }, + { + "epoch": 5.5023193359375e-06, + "step": 3606, + "training_step_time": 0.10924696922302246 + }, + { + "epoch": 5.50384521484375e-06, + "model_forward_time": 0.025003671646118164, + "step": 3607 + }, + { + "epoch": 5.50384521484375e-06, + "step": 3607, + "training_step_time": 0.11053276062011719 + }, + { + "epoch": 5.50537109375e-06, + "model_forward_time": 0.0250704288482666, + "step": 3608 + }, + { + "epoch": 5.50537109375e-06, + "step": 3608, + "training_step_time": 0.10642480850219727 + }, + { + "epoch": 5.50689697265625e-06, + "model_forward_time": 0.024239540100097656, + "step": 3609 + }, + { + "epoch": 5.50689697265625e-06, + "step": 3609, + "training_step_time": 0.11119461059570312 + }, + { + "epoch": 5.5084228515625e-06, + "grad_norm": 0.7978876829147339, + "learning_rate": 9.865365544696285e-05, + "loss": 0.1218, + "step": 3610 + }, + { + "epoch": 5.5084228515625e-06, + "model_forward_time": 0.024227380752563477, + "step": 3610 + }, + { + "epoch": 5.5084228515625e-06, + "step": 3610, + "training_step_time": 0.10892510414123535 + }, + { + "epoch": 5.50994873046875e-06, + "model_forward_time": 0.024201393127441406, + "step": 3611 + }, + { + "epoch": 5.50994873046875e-06, + "step": 3611, + "training_step_time": 0.11036515235900879 + }, + { + "epoch": 5.511474609375e-06, + "model_forward_time": 0.024261951446533203, + "step": 3612 + }, + { + "epoch": 5.511474609375e-06, + "step": 3612, + "training_step_time": 0.11133956909179688 + }, + { + "epoch": 5.51300048828125e-06, + "model_forward_time": 0.025452375411987305, + "step": 3613 + }, + { + "epoch": 5.51300048828125e-06, + "step": 3613, + "training_step_time": 0.11284828186035156 + }, + { + "epoch": 5.5145263671875e-06, + "model_forward_time": 0.02516007423400879, + "step": 3614 + }, + { + "epoch": 5.5145263671875e-06, + "step": 3614, + "training_step_time": 0.11010622978210449 + }, + { + "epoch": 5.51605224609375e-06, + "model_forward_time": 0.025304794311523438, + "step": 3615 + }, + { + "epoch": 5.51605224609375e-06, + "step": 3615, + "training_step_time": 0.1081843376159668 + }, + { + "epoch": 5.517578125e-06, + "model_forward_time": 0.025708436965942383, + "step": 3616 + }, + { + "epoch": 5.517578125e-06, + "step": 3616, + "training_step_time": 0.11047792434692383 + }, + { + "epoch": 5.51910400390625e-06, + "model_forward_time": 0.025622844696044922, + "step": 3617 + }, + { + "epoch": 5.51910400390625e-06, + "step": 3617, + "training_step_time": 0.11481785774230957 + }, + { + "epoch": 5.5206298828125e-06, + "model_forward_time": 0.025615692138671875, + "step": 3618 + }, + { + "epoch": 5.5206298828125e-06, + "step": 3618, + "training_step_time": 0.11126589775085449 + }, + { + "epoch": 5.52215576171875e-06, + "model_forward_time": 0.02544879913330078, + "step": 3619 + }, + { + "epoch": 5.52215576171875e-06, + "step": 3619, + "training_step_time": 0.10785293579101562 + }, + { + "epoch": 5.523681640625e-06, + "grad_norm": 0.7753174304962158, + "learning_rate": 9.864092191274632e-05, + "loss": 0.1153, + "step": 3620 + }, + { + "epoch": 5.523681640625e-06, + "model_forward_time": 0.025243282318115234, + "step": 3620 + }, + { + "epoch": 5.523681640625e-06, + "step": 3620, + "training_step_time": 0.11166644096374512 + }, + { + "epoch": 5.52520751953125e-06, + "model_forward_time": 0.02500152587890625, + "step": 3621 + }, + { + "epoch": 5.52520751953125e-06, + "step": 3621, + "training_step_time": 0.22475814819335938 + }, + { + "epoch": 5.5267333984375e-06, + "model_forward_time": 0.024393796920776367, + "step": 3622 + }, + { + "epoch": 5.5267333984375e-06, + "step": 3622, + "training_step_time": 0.2043933868408203 + }, + { + "epoch": 5.52825927734375e-06, + "model_forward_time": 0.025471925735473633, + "step": 3623 + }, + { + "epoch": 5.52825927734375e-06, + "step": 3623, + "training_step_time": 0.14416766166687012 + }, + { + "epoch": 5.52978515625e-06, + "model_forward_time": 0.024672985076904297, + "step": 3624 + }, + { + "epoch": 5.52978515625e-06, + "step": 3624, + "training_step_time": 0.20978093147277832 + }, + { + "epoch": 5.53131103515625e-06, + "model_forward_time": 0.024283885955810547, + "step": 3625 + }, + { + "epoch": 5.53131103515625e-06, + "step": 3625, + "training_step_time": 0.18009114265441895 + }, + { + "epoch": 5.5328369140625e-06, + "model_forward_time": 0.02435612678527832, + "step": 3626 + }, + { + "epoch": 5.5328369140625e-06, + "step": 3626, + "training_step_time": 0.1759657859802246 + }, + { + "epoch": 5.53436279296875e-06, + "model_forward_time": 0.02431797981262207, + "step": 3627 + }, + { + "epoch": 5.53436279296875e-06, + "step": 3627, + "training_step_time": 0.14286208152770996 + }, + { + "epoch": 5.535888671875e-06, + "model_forward_time": 0.02447199821472168, + "step": 3628 + }, + { + "epoch": 5.535888671875e-06, + "step": 3628, + "training_step_time": 0.12184834480285645 + }, + { + "epoch": 5.53741455078125e-06, + "model_forward_time": 0.02477264404296875, + "step": 3629 + }, + { + "epoch": 5.53741455078125e-06, + "step": 3629, + "training_step_time": 0.10953545570373535 + }, + { + "epoch": 5.5389404296875e-06, + "grad_norm": 0.5163019895553589, + "learning_rate": 9.862812927522309e-05, + "loss": 0.0957, + "step": 3630 + }, + { + "epoch": 5.5389404296875e-06, + "model_forward_time": 0.02468729019165039, + "step": 3630 + }, + { + "epoch": 5.5389404296875e-06, + "step": 3630, + "training_step_time": 0.19427728652954102 + }, + { + "epoch": 5.54046630859375e-06, + "model_forward_time": 0.024411678314208984, + "step": 3631 + }, + { + "epoch": 5.54046630859375e-06, + "step": 3631, + "training_step_time": 0.10506319999694824 + }, + { + "epoch": 5.5419921875e-06, + "model_forward_time": 0.024384260177612305, + "step": 3632 + }, + { + "epoch": 5.5419921875e-06, + "step": 3632, + "training_step_time": 0.10987353324890137 + }, + { + "epoch": 5.54351806640625e-06, + "model_forward_time": 0.025397777557373047, + "step": 3633 + }, + { + "epoch": 5.54351806640625e-06, + "step": 3633, + "training_step_time": 0.11170649528503418 + }, + { + "epoch": 5.5450439453125e-06, + "model_forward_time": 0.025423049926757812, + "step": 3634 + }, + { + "epoch": 5.5450439453125e-06, + "step": 3634, + "training_step_time": 0.11060476303100586 + }, + { + "epoch": 5.54656982421875e-06, + "model_forward_time": 0.025203466415405273, + "step": 3635 + }, + { + "epoch": 5.54656982421875e-06, + "step": 3635, + "training_step_time": 0.21367692947387695 + }, + { + "epoch": 5.548095703125e-06, + "model_forward_time": 0.02469801902770996, + "step": 3636 + }, + { + "epoch": 5.548095703125e-06, + "step": 3636, + "training_step_time": 0.11488771438598633 + }, + { + "epoch": 5.54962158203125e-06, + "model_forward_time": 0.02514028549194336, + "step": 3637 + }, + { + "epoch": 5.54962158203125e-06, + "step": 3637, + "training_step_time": 0.10862874984741211 + }, + { + "epoch": 5.5511474609375e-06, + "model_forward_time": 0.0251615047454834, + "step": 3638 + }, + { + "epoch": 5.5511474609375e-06, + "step": 3638, + "training_step_time": 0.17350268363952637 + }, + { + "epoch": 5.55267333984375e-06, + "model_forward_time": 0.025249242782592773, + "step": 3639 + }, + { + "epoch": 5.55267333984375e-06, + "step": 3639, + "training_step_time": 0.16251111030578613 + }, + { + "epoch": 5.55419921875e-06, + "grad_norm": 0.40915581583976746, + "learning_rate": 9.861527754993749e-05, + "loss": 0.1081, + "step": 3640 + }, + { + "epoch": 5.55419921875e-06, + "model_forward_time": 0.02501988410949707, + "step": 3640 + }, + { + "epoch": 5.55419921875e-06, + "step": 3640, + "training_step_time": 0.10607171058654785 + }, + { + "epoch": 5.55572509765625e-06, + "model_forward_time": 0.025009632110595703, + "step": 3641 + }, + { + "epoch": 5.55572509765625e-06, + "step": 3641, + "training_step_time": 0.10691547393798828 + }, + { + "epoch": 5.5572509765625e-06, + "model_forward_time": 0.02519702911376953, + "step": 3642 + }, + { + "epoch": 5.5572509765625e-06, + "step": 3642, + "training_step_time": 0.10802531242370605 + }, + { + "epoch": 5.55877685546875e-06, + "model_forward_time": 0.026309967041015625, + "step": 3643 + }, + { + "epoch": 5.55877685546875e-06, + "step": 3643, + "training_step_time": 0.10938906669616699 + }, + { + "epoch": 5.560302734375e-06, + "model_forward_time": 0.026763916015625, + "step": 3644 + }, + { + "epoch": 5.560302734375e-06, + "step": 3644, + "training_step_time": 0.11163735389709473 + }, + { + "epoch": 5.56182861328125e-06, + "model_forward_time": 0.02516317367553711, + "step": 3645 + }, + { + "epoch": 5.56182861328125e-06, + "step": 3645, + "training_step_time": 0.11041426658630371 + }, + { + "epoch": 5.5633544921875e-06, + "model_forward_time": 0.02609705924987793, + "step": 3646 + }, + { + "epoch": 5.5633544921875e-06, + "step": 3646, + "training_step_time": 0.1114192008972168 + }, + { + "epoch": 5.56488037109375e-06, + "model_forward_time": 0.02512669563293457, + "step": 3647 + }, + { + "epoch": 5.56488037109375e-06, + "step": 3647, + "training_step_time": 0.11021924018859863 + }, + { + "epoch": 5.56640625e-06, + "model_forward_time": 0.02504587173461914, + "step": 3648 + }, + { + "epoch": 5.56640625e-06, + "step": 3648, + "training_step_time": 0.1109774112701416 + }, + { + "epoch": 5.56793212890625e-06, + "model_forward_time": 0.02516317367553711, + "step": 3649 + }, + { + "epoch": 5.56793212890625e-06, + "step": 3649, + "training_step_time": 0.10928463935852051 + }, + { + "epoch": 5.5694580078125e-06, + "grad_norm": 0.5303035378456116, + "learning_rate": 9.860236675250552e-05, + "loss": 0.1355, + "step": 3650 + }, + { + "epoch": 5.5694580078125e-06, + "model_forward_time": 0.025303363800048828, + "step": 3650 + }, + { + "epoch": 5.5694580078125e-06, + "step": 3650, + "training_step_time": 0.10702753067016602 + }, + { + "epoch": 5.57098388671875e-06, + "model_forward_time": 0.025582075119018555, + "step": 3651 + }, + { + "epoch": 5.57098388671875e-06, + "step": 3651, + "training_step_time": 0.10846829414367676 + }, + { + "epoch": 5.572509765625e-06, + "model_forward_time": 0.025735139846801758, + "step": 3652 + }, + { + "epoch": 5.572509765625e-06, + "step": 3652, + "training_step_time": 0.10896635055541992 + }, + { + "epoch": 5.57403564453125e-06, + "model_forward_time": 0.025242328643798828, + "step": 3653 + }, + { + "epoch": 5.57403564453125e-06, + "step": 3653, + "training_step_time": 0.10647916793823242 + }, + { + "epoch": 5.5755615234375e-06, + "model_forward_time": 0.025364160537719727, + "step": 3654 + }, + { + "epoch": 5.5755615234375e-06, + "step": 3654, + "training_step_time": 0.11582779884338379 + }, + { + "epoch": 5.57708740234375e-06, + "model_forward_time": 0.02546095848083496, + "step": 3655 + }, + { + "epoch": 5.57708740234375e-06, + "step": 3655, + "training_step_time": 0.10675239562988281 + }, + { + "epoch": 5.57861328125e-06, + "model_forward_time": 0.025429964065551758, + "step": 3656 + }, + { + "epoch": 5.57861328125e-06, + "step": 3656, + "training_step_time": 0.11009716987609863 + }, + { + "epoch": 5.58013916015625e-06, + "model_forward_time": 0.025098800659179688, + "step": 3657 + }, + { + "epoch": 5.58013916015625e-06, + "step": 3657, + "training_step_time": 0.10571575164794922 + }, + { + "epoch": 5.5816650390625e-06, + "model_forward_time": 0.025443315505981445, + "step": 3658 + }, + { + "epoch": 5.5816650390625e-06, + "step": 3658, + "training_step_time": 0.10824227333068848 + }, + { + "epoch": 5.58319091796875e-06, + "model_forward_time": 0.025310754776000977, + "step": 3659 + }, + { + "epoch": 5.58319091796875e-06, + "step": 3659, + "training_step_time": 0.1077120304107666 + }, + { + "epoch": 5.584716796875e-06, + "grad_norm": 0.4611889123916626, + "learning_rate": 9.858939689861506e-05, + "loss": 0.1111, + "step": 3660 + }, + { + "epoch": 5.584716796875e-06, + "model_forward_time": 0.02488565444946289, + "step": 3660 + }, + { + "epoch": 5.584716796875e-06, + "step": 3660, + "training_step_time": 0.10646867752075195 + }, + { + "epoch": 5.58624267578125e-06, + "model_forward_time": 0.027025461196899414, + "step": 3661 + }, + { + "epoch": 5.58624267578125e-06, + "step": 3661, + "training_step_time": 0.1088418960571289 + }, + { + "epoch": 5.5877685546875e-06, + "model_forward_time": 0.025281667709350586, + "step": 3662 + }, + { + "epoch": 5.5877685546875e-06, + "step": 3662, + "training_step_time": 0.10543608665466309 + }, + { + "epoch": 5.58929443359375e-06, + "model_forward_time": 0.02533245086669922, + "step": 3663 + }, + { + "epoch": 5.58929443359375e-06, + "step": 3663, + "training_step_time": 0.11092305183410645 + }, + { + "epoch": 5.5908203125e-06, + "model_forward_time": 0.02594447135925293, + "step": 3664 + }, + { + "epoch": 5.5908203125e-06, + "step": 3664, + "training_step_time": 0.10593390464782715 + }, + { + "epoch": 5.59234619140625e-06, + "model_forward_time": 0.025614261627197266, + "step": 3665 + }, + { + "epoch": 5.59234619140625e-06, + "step": 3665, + "training_step_time": 0.10619735717773438 + }, + { + "epoch": 5.5938720703125e-06, + "model_forward_time": 0.02579641342163086, + "step": 3666 + }, + { + "epoch": 5.5938720703125e-06, + "step": 3666, + "training_step_time": 0.12946867942810059 + }, + { + "epoch": 5.59539794921875e-06, + "model_forward_time": 0.02540898323059082, + "step": 3667 + }, + { + "epoch": 5.59539794921875e-06, + "step": 3667, + "training_step_time": 0.11614346504211426 + }, + { + "epoch": 5.596923828125e-06, + "model_forward_time": 0.02584099769592285, + "step": 3668 + }, + { + "epoch": 5.596923828125e-06, + "step": 3668, + "training_step_time": 0.15394020080566406 + }, + { + "epoch": 5.59844970703125e-06, + "model_forward_time": 0.025391578674316406, + "step": 3669 + }, + { + "epoch": 5.59844970703125e-06, + "step": 3669, + "training_step_time": 0.15090489387512207 + }, + { + "epoch": 5.5999755859375e-06, + "grad_norm": 0.5910298824310303, + "learning_rate": 9.857636800402568e-05, + "loss": 0.1179, + "step": 3670 + }, + { + "epoch": 5.5999755859375e-06, + "model_forward_time": 0.024151086807250977, + "step": 3670 + }, + { + "epoch": 5.5999755859375e-06, + "step": 3670, + "training_step_time": 0.2219538688659668 + }, + { + "epoch": 5.60150146484375e-06, + "model_forward_time": 0.02447652816772461, + "step": 3671 + }, + { + "epoch": 5.60150146484375e-06, + "step": 3671, + "training_step_time": 0.2072756290435791 + }, + { + "epoch": 5.60302734375e-06, + "model_forward_time": 0.02445077896118164, + "step": 3672 + }, + { + "epoch": 5.60302734375e-06, + "step": 3672, + "training_step_time": 0.13475346565246582 + }, + { + "epoch": 5.60455322265625e-06, + "model_forward_time": 0.024463653564453125, + "step": 3673 + }, + { + "epoch": 5.60455322265625e-06, + "step": 3673, + "training_step_time": 0.20469093322753906 + }, + { + "epoch": 5.6060791015625e-06, + "model_forward_time": 0.024940013885498047, + "step": 3674 + }, + { + "epoch": 5.6060791015625e-06, + "step": 3674, + "training_step_time": 0.1187291145324707 + }, + { + "epoch": 5.60760498046875e-06, + "model_forward_time": 0.024410486221313477, + "step": 3675 + }, + { + "epoch": 5.60760498046875e-06, + "step": 3675, + "training_step_time": 0.17871761322021484 + }, + { + "epoch": 5.609130859375e-06, + "model_forward_time": 0.024702072143554688, + "step": 3676 + }, + { + "epoch": 5.609130859375e-06, + "step": 3676, + "training_step_time": 0.11275410652160645 + }, + { + "epoch": 5.61065673828125e-06, + "model_forward_time": 0.024342775344848633, + "step": 3677 + }, + { + "epoch": 5.61065673828125e-06, + "step": 3677, + "training_step_time": 0.11335635185241699 + }, + { + "epoch": 5.6121826171875e-06, + "model_forward_time": 0.02553391456604004, + "step": 3678 + }, + { + "epoch": 5.6121826171875e-06, + "step": 3678, + "training_step_time": 0.11303186416625977 + }, + { + "epoch": 5.61370849609375e-06, + "model_forward_time": 0.02550482749938965, + "step": 3679 + }, + { + "epoch": 5.61370849609375e-06, + "step": 3679, + "training_step_time": 0.11442041397094727 + }, + { + "epoch": 5.615234375e-06, + "grad_norm": 0.9671534299850464, + "learning_rate": 9.856328008456872e-05, + "loss": 0.1244, + "step": 3680 + }, + { + "epoch": 5.615234375e-06, + "model_forward_time": 0.02512955665588379, + "step": 3680 + }, + { + "epoch": 5.615234375e-06, + "step": 3680, + "training_step_time": 0.21998977661132812 + }, + { + "epoch": 5.61676025390625e-06, + "model_forward_time": 0.02484583854675293, + "step": 3681 + }, + { + "epoch": 5.61676025390625e-06, + "step": 3681, + "training_step_time": 0.2035667896270752 + }, + { + "epoch": 5.6182861328125e-06, + "model_forward_time": 0.024566650390625, + "step": 3682 + }, + { + "epoch": 5.6182861328125e-06, + "step": 3682, + "training_step_time": 0.13980603218078613 + }, + { + "epoch": 5.61981201171875e-06, + "model_forward_time": 0.026850223541259766, + "step": 3683 + }, + { + "epoch": 5.61981201171875e-06, + "step": 3683, + "training_step_time": 0.10766053199768066 + }, + { + "epoch": 5.621337890625e-06, + "model_forward_time": 0.025412797927856445, + "step": 3684 + }, + { + "epoch": 5.621337890625e-06, + "step": 3684, + "training_step_time": 0.10948824882507324 + }, + { + "epoch": 5.62286376953125e-06, + "model_forward_time": 0.025035381317138672, + "step": 3685 + }, + { + "epoch": 5.62286376953125e-06, + "step": 3685, + "training_step_time": 0.10767197608947754 + }, + { + "epoch": 5.6243896484375e-06, + "model_forward_time": 0.025219202041625977, + "step": 3686 + }, + { + "epoch": 5.6243896484375e-06, + "step": 3686, + "training_step_time": 0.10927557945251465 + }, + { + "epoch": 5.62591552734375e-06, + "model_forward_time": 0.025241374969482422, + "step": 3687 + }, + { + "epoch": 5.62591552734375e-06, + "step": 3687, + "training_step_time": 0.10854768753051758 + }, + { + "epoch": 5.62744140625e-06, + "model_forward_time": 0.02672266960144043, + "step": 3688 + }, + { + "epoch": 5.62744140625e-06, + "step": 3688, + "training_step_time": 0.11433148384094238 + }, + { + "epoch": 5.62896728515625e-06, + "model_forward_time": 0.02520918846130371, + "step": 3689 + }, + { + "epoch": 5.62896728515625e-06, + "step": 3689, + "training_step_time": 0.10828781127929688 + }, + { + "epoch": 5.6304931640625e-06, + "grad_norm": 0.5670982599258423, + "learning_rate": 9.855013315614725e-05, + "loss": 0.1106, + "step": 3690 + }, + { + "epoch": 5.6304931640625e-06, + "model_forward_time": 0.025037765502929688, + "step": 3690 + }, + { + "epoch": 5.6304931640625e-06, + "step": 3690, + "training_step_time": 0.10764527320861816 + }, + { + "epoch": 5.63201904296875e-06, + "model_forward_time": 0.026458740234375, + "step": 3691 + }, + { + "epoch": 5.63201904296875e-06, + "step": 3691, + "training_step_time": 0.10952997207641602 + }, + { + "epoch": 5.633544921875e-06, + "model_forward_time": 0.02515268325805664, + "step": 3692 + }, + { + "epoch": 5.633544921875e-06, + "step": 3692, + "training_step_time": 0.10698437690734863 + }, + { + "epoch": 5.63507080078125e-06, + "model_forward_time": 0.025123119354248047, + "step": 3693 + }, + { + "epoch": 5.63507080078125e-06, + "step": 3693, + "training_step_time": 0.11066079139709473 + }, + { + "epoch": 5.6365966796875e-06, + "model_forward_time": 0.025208711624145508, + "step": 3694 + }, + { + "epoch": 5.6365966796875e-06, + "step": 3694, + "training_step_time": 0.10919427871704102 + }, + { + "epoch": 5.63812255859375e-06, + "model_forward_time": 0.02528691291809082, + "step": 3695 + }, + { + "epoch": 5.63812255859375e-06, + "step": 3695, + "training_step_time": 0.10968923568725586 + }, + { + "epoch": 5.6396484375e-06, + "model_forward_time": 0.025018930435180664, + "step": 3696 + }, + { + "epoch": 5.6396484375e-06, + "step": 3696, + "training_step_time": 0.10768294334411621 + }, + { + "epoch": 5.64117431640625e-06, + "model_forward_time": 0.02568364143371582, + "step": 3697 + }, + { + "epoch": 5.64117431640625e-06, + "step": 3697, + "training_step_time": 0.1093449592590332 + }, + { + "epoch": 5.6427001953125e-06, + "model_forward_time": 0.025207042694091797, + "step": 3698 + }, + { + "epoch": 5.6427001953125e-06, + "step": 3698, + "training_step_time": 0.11206626892089844 + }, + { + "epoch": 5.64422607421875e-06, + "model_forward_time": 0.02564263343811035, + "step": 3699 + }, + { + "epoch": 5.64422607421875e-06, + "step": 3699, + "training_step_time": 0.1087958812713623 + }, + { + "epoch": 5.645751953125e-06, + "grad_norm": 0.7125267386436462, + "learning_rate": 9.8536927234736e-05, + "loss": 0.1251, + "step": 3700 + }, + { + "epoch": 5.645751953125e-06, + "model_forward_time": 0.024998903274536133, + "step": 3700 + }, + { + "epoch": 5.645751953125e-06, + "step": 3700, + "training_step_time": 0.10839509963989258 + }, + { + "epoch": 5.64727783203125e-06, + "model_forward_time": 0.0266721248626709, + "step": 3701 + }, + { + "epoch": 5.64727783203125e-06, + "step": 3701, + "training_step_time": 0.10779452323913574 + }, + { + "epoch": 5.6488037109375e-06, + "model_forward_time": 0.025252342224121094, + "step": 3702 + }, + { + "epoch": 5.6488037109375e-06, + "step": 3702, + "training_step_time": 0.10919785499572754 + }, + { + "epoch": 5.65032958984375e-06, + "model_forward_time": 0.025144338607788086, + "step": 3703 + }, + { + "epoch": 5.65032958984375e-06, + "step": 3703, + "training_step_time": 0.10828804969787598 + }, + { + "epoch": 5.65185546875e-06, + "model_forward_time": 0.02506279945373535, + "step": 3704 + }, + { + "epoch": 5.65185546875e-06, + "step": 3704, + "training_step_time": 0.10794401168823242 + }, + { + "epoch": 5.65338134765625e-06, + "model_forward_time": 0.027426719665527344, + "step": 3705 + }, + { + "epoch": 5.65338134765625e-06, + "step": 3705, + "training_step_time": 0.11094355583190918 + }, + { + "epoch": 5.6549072265625e-06, + "model_forward_time": 0.025046348571777344, + "step": 3706 + }, + { + "epoch": 5.6549072265625e-06, + "step": 3706, + "training_step_time": 0.10791707038879395 + }, + { + "epoch": 5.65643310546875e-06, + "model_forward_time": 0.025409936904907227, + "step": 3707 + }, + { + "epoch": 5.65643310546875e-06, + "step": 3707, + "training_step_time": 0.11396574974060059 + }, + { + "epoch": 5.657958984375e-06, + "model_forward_time": 0.025937795639038086, + "step": 3708 + }, + { + "epoch": 5.657958984375e-06, + "step": 3708, + "training_step_time": 0.10824179649353027 + }, + { + "epoch": 5.65948486328125e-06, + "model_forward_time": 0.025185823440551758, + "step": 3709 + }, + { + "epoch": 5.65948486328125e-06, + "step": 3709, + "training_step_time": 0.1088399887084961 + }, + { + "epoch": 5.6610107421875e-06, + "grad_norm": 0.5925086140632629, + "learning_rate": 9.852366233638144e-05, + "loss": 0.1192, + "step": 3710 + }, + { + "epoch": 5.6610107421875e-06, + "model_forward_time": 0.0253448486328125, + "step": 3710 + }, + { + "epoch": 5.6610107421875e-06, + "step": 3710, + "training_step_time": 0.18607091903686523 + }, + { + "epoch": 5.66253662109375e-06, + "model_forward_time": 0.024074792861938477, + "step": 3711 + }, + { + "epoch": 5.66253662109375e-06, + "step": 3711, + "training_step_time": 0.11555075645446777 + }, + { + "epoch": 5.6640625e-06, + "model_forward_time": 0.024435043334960938, + "step": 3712 + }, + { + "epoch": 5.6640625e-06, + "step": 3712, + "training_step_time": 0.17337560653686523 + }, + { + "epoch": 5.66558837890625e-06, + "model_forward_time": 0.024907827377319336, + "step": 3713 + }, + { + "epoch": 5.66558837890625e-06, + "step": 3713, + "training_step_time": 0.13090872764587402 + }, + { + "epoch": 5.6671142578125e-06, + "model_forward_time": 0.02438497543334961, + "step": 3714 + }, + { + "epoch": 5.6671142578125e-06, + "step": 3714, + "training_step_time": 0.21389508247375488 + }, + { + "epoch": 5.66864013671875e-06, + "model_forward_time": 0.024282217025756836, + "step": 3715 + }, + { + "epoch": 5.66864013671875e-06, + "step": 3715, + "training_step_time": 0.13632965087890625 + }, + { + "epoch": 5.670166015625e-06, + "model_forward_time": 0.024297714233398438, + "step": 3716 + }, + { + "epoch": 5.670166015625e-06, + "step": 3716, + "training_step_time": 0.11049032211303711 + }, + { + "epoch": 5.67169189453125e-06, + "model_forward_time": 0.02519822120666504, + "step": 3717 + }, + { + "epoch": 5.67169189453125e-06, + "step": 3717, + "training_step_time": 0.20998024940490723 + }, + { + "epoch": 5.6732177734375e-06, + "model_forward_time": 0.02444624900817871, + "step": 3718 + }, + { + "epoch": 5.6732177734375e-06, + "step": 3718, + "training_step_time": 0.12879157066345215 + }, + { + "epoch": 5.67474365234375e-06, + "model_forward_time": 0.024034500122070312, + "step": 3719 + }, + { + "epoch": 5.67474365234375e-06, + "step": 3719, + "training_step_time": 0.18405604362487793 + }, + { + "epoch": 5.67626953125e-06, + "grad_norm": 0.6077547669410706, + "learning_rate": 9.851033847720166e-05, + "loss": 0.1196, + "step": 3720 + }, + { + "epoch": 5.67626953125e-06, + "model_forward_time": 0.024678945541381836, + "step": 3720 + }, + { + "epoch": 5.67626953125e-06, + "step": 3720, + "training_step_time": 0.12668275833129883 + }, + { + "epoch": 5.67779541015625e-06, + "model_forward_time": 0.023871421813964844, + "step": 3721 + }, + { + "epoch": 5.67779541015625e-06, + "step": 3721, + "training_step_time": 0.11775374412536621 + }, + { + "epoch": 5.6793212890625e-06, + "model_forward_time": 0.02523636817932129, + "step": 3722 + }, + { + "epoch": 5.6793212890625e-06, + "step": 3722, + "training_step_time": 0.1190195083618164 + }, + { + "epoch": 5.68084716796875e-06, + "model_forward_time": 0.025907278060913086, + "step": 3723 + }, + { + "epoch": 5.68084716796875e-06, + "step": 3723, + "training_step_time": 0.12054204940795898 + }, + { + "epoch": 5.682373046875e-06, + "model_forward_time": 0.028300046920776367, + "step": 3724 + }, + { + "epoch": 5.682373046875e-06, + "step": 3724, + "training_step_time": 0.11446428298950195 + }, + { + "epoch": 5.68389892578125e-06, + "model_forward_time": 0.02431631088256836, + "step": 3725 + }, + { + "epoch": 5.68389892578125e-06, + "step": 3725, + "training_step_time": 0.14249157905578613 + }, + { + "epoch": 5.6854248046875e-06, + "model_forward_time": 0.024969816207885742, + "step": 3726 + }, + { + "epoch": 5.6854248046875e-06, + "step": 3726, + "training_step_time": 0.1646106243133545 + }, + { + "epoch": 5.68695068359375e-06, + "model_forward_time": 0.024561166763305664, + "step": 3727 + }, + { + "epoch": 5.68695068359375e-06, + "step": 3727, + "training_step_time": 0.11510896682739258 + }, + { + "epoch": 5.6884765625e-06, + "model_forward_time": 0.025491714477539062, + "step": 3728 + }, + { + "epoch": 5.6884765625e-06, + "step": 3728, + "training_step_time": 0.1345815658569336 + }, + { + "epoch": 5.69000244140625e-06, + "model_forward_time": 0.025413990020751953, + "step": 3729 + }, + { + "epoch": 5.69000244140625e-06, + "step": 3729, + "training_step_time": 0.21203207969665527 + }, + { + "epoch": 5.6915283203125e-06, + "grad_norm": 0.7592687606811523, + "learning_rate": 9.849695567338639e-05, + "loss": 0.1035, + "step": 3730 + }, + { + "epoch": 5.6915283203125e-06, + "model_forward_time": 0.02472972869873047, + "step": 3730 + }, + { + "epoch": 5.6915283203125e-06, + "step": 3730, + "training_step_time": 0.10725569725036621 + }, + { + "epoch": 5.69305419921875e-06, + "model_forward_time": 0.024759292602539062, + "step": 3731 + }, + { + "epoch": 5.69305419921875e-06, + "step": 3731, + "training_step_time": 0.10465645790100098 + }, + { + "epoch": 5.694580078125e-06, + "model_forward_time": 0.025850296020507812, + "step": 3732 + }, + { + "epoch": 5.694580078125e-06, + "step": 3732, + "training_step_time": 0.10812211036682129 + }, + { + "epoch": 5.69610595703125e-06, + "model_forward_time": 0.025228500366210938, + "step": 3733 + }, + { + "epoch": 5.69610595703125e-06, + "step": 3733, + "training_step_time": 0.11033248901367188 + }, + { + "epoch": 5.6976318359375e-06, + "model_forward_time": 0.02506709098815918, + "step": 3734 + }, + { + "epoch": 5.6976318359375e-06, + "step": 3734, + "training_step_time": 0.10956788063049316 + }, + { + "epoch": 5.69915771484375e-06, + "model_forward_time": 0.024998188018798828, + "step": 3735 + }, + { + "epoch": 5.69915771484375e-06, + "step": 3735, + "training_step_time": 0.10901808738708496 + }, + { + "epoch": 5.70068359375e-06, + "model_forward_time": 0.025314807891845703, + "step": 3736 + }, + { + "epoch": 5.70068359375e-06, + "step": 3736, + "training_step_time": 0.10727381706237793 + }, + { + "epoch": 5.70220947265625e-06, + "model_forward_time": 0.025140762329101562, + "step": 3737 + }, + { + "epoch": 5.70220947265625e-06, + "step": 3737, + "training_step_time": 0.10681843757629395 + }, + { + "epoch": 5.7037353515625e-06, + "model_forward_time": 0.025690317153930664, + "step": 3738 + }, + { + "epoch": 5.7037353515625e-06, + "step": 3738, + "training_step_time": 0.11525821685791016 + }, + { + "epoch": 5.70526123046875e-06, + "model_forward_time": 0.0267941951751709, + "step": 3739 + }, + { + "epoch": 5.70526123046875e-06, + "step": 3739, + "training_step_time": 0.139298677444458 + }, + { + "epoch": 5.706787109375e-06, + "grad_norm": 0.6188333034515381, + "learning_rate": 9.848351394119704e-05, + "loss": 0.115, + "step": 3740 + }, + { + "epoch": 5.706787109375e-06, + "model_forward_time": 0.027582645416259766, + "step": 3740 + }, + { + "epoch": 5.706787109375e-06, + "step": 3740, + "training_step_time": 0.18263745307922363 + }, + { + "epoch": 5.70831298828125e-06, + "model_forward_time": 0.02722024917602539, + "step": 3741 + }, + { + "epoch": 5.70831298828125e-06, + "step": 3741, + "training_step_time": 0.21411895751953125 + }, + { + "epoch": 5.7098388671875e-06, + "model_forward_time": 0.028301715850830078, + "step": 3742 + }, + { + "epoch": 5.7098388671875e-06, + "step": 3742, + "training_step_time": 0.23530244827270508 + }, + { + "epoch": 5.71136474609375e-06, + "model_forward_time": 0.029034852981567383, + "step": 3743 + }, + { + "epoch": 5.71136474609375e-06, + "step": 3743, + "training_step_time": 0.2810180187225342 + }, + { + "epoch": 5.712890625e-06, + "model_forward_time": 0.02951979637145996, + "step": 3744 + }, + { + "epoch": 5.712890625e-06, + "step": 3744, + "training_step_time": 0.2805464267730713 + }, + { + "epoch": 5.71441650390625e-06, + "model_forward_time": 0.031998395919799805, + "step": 3745 + }, + { + "epoch": 5.71441650390625e-06, + "step": 3745, + "training_step_time": 0.30208683013916016 + }, + { + "epoch": 5.7159423828125e-06, + "model_forward_time": 0.028746366500854492, + "step": 3746 + }, + { + "epoch": 5.7159423828125e-06, + "step": 3746, + "training_step_time": 0.3071131706237793 + }, + { + "epoch": 5.71746826171875e-06, + "model_forward_time": 0.036649465560913086, + "step": 3747 + }, + { + "epoch": 5.71746826171875e-06, + "step": 3747, + "training_step_time": 0.3875277042388916 + }, + { + "epoch": 5.718994140625e-06, + "model_forward_time": 0.030651569366455078, + "step": 3748 + }, + { + "epoch": 5.718994140625e-06, + "step": 3748, + "training_step_time": 0.3340270519256592 + }, + { + "epoch": 5.72052001953125e-06, + "model_forward_time": 0.030715465545654297, + "step": 3749 + }, + { + "epoch": 5.72052001953125e-06, + "step": 3749, + "training_step_time": 0.3963041305541992 + }, + { + "epoch": 5.7220458984375e-06, + "grad_norm": 0.7434552907943726, + "learning_rate": 9.847001329696653e-05, + "loss": 0.1309, + "step": 3750 + }, + { + "epoch": 5.7220458984375e-06, + "model_forward_time": 0.031092405319213867, + "step": 3750 + }, + { + "epoch": 5.7220458984375e-06, + "step": 3750, + "training_step_time": 0.28721141815185547 + }, + { + "epoch": 5.72357177734375e-06, + "model_forward_time": 0.03343534469604492, + "step": 3751 + }, + { + "epoch": 5.72357177734375e-06, + "step": 3751, + "training_step_time": 0.2773294448852539 + }, + { + "epoch": 5.72509765625e-06, + "model_forward_time": 0.0326848030090332, + "step": 3752 + }, + { + "epoch": 5.72509765625e-06, + "step": 3752, + "training_step_time": 0.2819547653198242 + }, + { + "epoch": 5.72662353515625e-06, + "model_forward_time": 0.03075122833251953, + "step": 3753 + }, + { + "epoch": 5.72662353515625e-06, + "step": 3753, + "training_step_time": 0.14244890213012695 + }, + { + "epoch": 5.7281494140625e-06, + "model_forward_time": 0.030855417251586914, + "step": 3754 + }, + { + "epoch": 5.7281494140625e-06, + "step": 3754, + "training_step_time": 0.17639660835266113 + }, + { + "epoch": 5.72967529296875e-06, + "model_forward_time": 0.030652523040771484, + "step": 3755 + }, + { + "epoch": 5.72967529296875e-06, + "step": 3755, + "training_step_time": 0.17862939834594727 + }, + { + "epoch": 5.731201171875e-06, + "model_forward_time": 0.029019594192504883, + "step": 3756 + }, + { + "epoch": 5.731201171875e-06, + "step": 3756, + "training_step_time": 0.13964104652404785 + }, + { + "epoch": 5.73272705078125e-06, + "model_forward_time": 0.028353452682495117, + "step": 3757 + }, + { + "epoch": 5.73272705078125e-06, + "step": 3757, + "training_step_time": 0.15268564224243164 + }, + { + "epoch": 5.7342529296875e-06, + "model_forward_time": 0.027714252471923828, + "step": 3758 + }, + { + "epoch": 5.7342529296875e-06, + "step": 3758, + "training_step_time": 0.1486358642578125 + }, + { + "epoch": 5.73577880859375e-06, + "model_forward_time": 0.026096343994140625, + "step": 3759 + }, + { + "epoch": 5.73577880859375e-06, + "step": 3759, + "training_step_time": 0.1441047191619873 + }, + { + "epoch": 5.7373046875e-06, + "grad_norm": 0.40131035447120667, + "learning_rate": 9.845645375709945e-05, + "loss": 0.0955, + "step": 3760 + }, + { + "epoch": 5.7373046875e-06, + "model_forward_time": 0.026035547256469727, + "step": 3760 + }, + { + "epoch": 5.7373046875e-06, + "step": 3760, + "training_step_time": 0.13003277778625488 + }, + { + "epoch": 5.73883056640625e-06, + "model_forward_time": 0.02599048614501953, + "step": 3761 + }, + { + "epoch": 5.73883056640625e-06, + "step": 3761, + "training_step_time": 0.11291670799255371 + }, + { + "epoch": 5.7403564453125e-06, + "model_forward_time": 0.02551126480102539, + "step": 3762 + }, + { + "epoch": 5.7403564453125e-06, + "step": 3762, + "training_step_time": 0.1160287857055664 + }, + { + "epoch": 5.74188232421875e-06, + "model_forward_time": 0.0249636173248291, + "step": 3763 + }, + { + "epoch": 5.74188232421875e-06, + "step": 3763, + "training_step_time": 0.11399531364440918 + }, + { + "epoch": 5.743408203125e-06, + "model_forward_time": 0.02434396743774414, + "step": 3764 + }, + { + "epoch": 5.743408203125e-06, + "step": 3764, + "training_step_time": 0.11345434188842773 + }, + { + "epoch": 5.74493408203125e-06, + "model_forward_time": 0.02477431297302246, + "step": 3765 + }, + { + "epoch": 5.74493408203125e-06, + "step": 3765, + "training_step_time": 0.11107635498046875 + }, + { + "epoch": 5.7464599609375e-06, + "model_forward_time": 0.025433778762817383, + "step": 3766 + }, + { + "epoch": 5.7464599609375e-06, + "step": 3766, + "training_step_time": 0.11537432670593262 + }, + { + "epoch": 5.74798583984375e-06, + "model_forward_time": 0.02428722381591797, + "step": 3767 + }, + { + "epoch": 5.74798583984375e-06, + "step": 3767, + "training_step_time": 0.10775327682495117 + }, + { + "epoch": 5.74951171875e-06, + "model_forward_time": 0.025365591049194336, + "step": 3768 + }, + { + "epoch": 5.74951171875e-06, + "step": 3768, + "training_step_time": 0.11297845840454102 + }, + { + "epoch": 5.75103759765625e-06, + "model_forward_time": 0.024929046630859375, + "step": 3769 + }, + { + "epoch": 5.75103759765625e-06, + "step": 3769, + "training_step_time": 0.10987067222595215 + }, + { + "epoch": 5.7525634765625e-06, + "grad_norm": 0.6926906108856201, + "learning_rate": 9.84428353380719e-05, + "loss": 0.1233, + "step": 3770 + }, + { + "epoch": 5.7525634765625e-06, + "model_forward_time": 0.02530646324157715, + "step": 3770 + }, + { + "epoch": 5.7525634765625e-06, + "step": 3770, + "training_step_time": 0.11096477508544922 + }, + { + "epoch": 5.75408935546875e-06, + "model_forward_time": 0.025210857391357422, + "step": 3771 + }, + { + "epoch": 5.75408935546875e-06, + "step": 3771, + "training_step_time": 0.10978221893310547 + }, + { + "epoch": 5.755615234375e-06, + "model_forward_time": 0.02512955665588379, + "step": 3772 + }, + { + "epoch": 5.755615234375e-06, + "step": 3772, + "training_step_time": 0.11090469360351562 + }, + { + "epoch": 5.75714111328125e-06, + "model_forward_time": 0.025104045867919922, + "step": 3773 + }, + { + "epoch": 5.75714111328125e-06, + "step": 3773, + "training_step_time": 0.10835027694702148 + }, + { + "epoch": 5.7586669921875e-06, + "model_forward_time": 0.02537703514099121, + "step": 3774 + }, + { + "epoch": 5.7586669921875e-06, + "step": 3774, + "training_step_time": 0.10801815986633301 + }, + { + "epoch": 5.76019287109375e-06, + "model_forward_time": 0.025606632232666016, + "step": 3775 + }, + { + "epoch": 5.76019287109375e-06, + "step": 3775, + "training_step_time": 0.11304402351379395 + }, + { + "epoch": 5.76171875e-06, + "model_forward_time": 0.025969743728637695, + "step": 3776 + }, + { + "epoch": 5.76171875e-06, + "step": 3776, + "training_step_time": 0.11148881912231445 + }, + { + "epoch": 5.76324462890625e-06, + "model_forward_time": 0.029139041900634766, + "step": 3777 + }, + { + "epoch": 5.76324462890625e-06, + "step": 3777, + "training_step_time": 0.11367225646972656 + }, + { + "epoch": 5.7647705078125e-06, + "model_forward_time": 0.025634288787841797, + "step": 3778 + }, + { + "epoch": 5.7647705078125e-06, + "step": 3778, + "training_step_time": 0.11123108863830566 + }, + { + "epoch": 5.76629638671875e-06, + "model_forward_time": 0.0255887508392334, + "step": 3779 + }, + { + "epoch": 5.76629638671875e-06, + "step": 3779, + "training_step_time": 0.10942316055297852 + }, + { + "epoch": 5.767822265625e-06, + "grad_norm": 0.45874303579330444, + "learning_rate": 9.842915805643155e-05, + "loss": 0.111, + "step": 3780 + }, + { + "epoch": 5.767822265625e-06, + "model_forward_time": 0.02534317970275879, + "step": 3780 + }, + { + "epoch": 5.767822265625e-06, + "step": 3780, + "training_step_time": 0.10964488983154297 + }, + { + "epoch": 5.76934814453125e-06, + "model_forward_time": 0.025545835494995117, + "step": 3781 + }, + { + "epoch": 5.76934814453125e-06, + "step": 3781, + "training_step_time": 0.1132357120513916 + }, + { + "epoch": 5.7708740234375e-06, + "model_forward_time": 0.02583479881286621, + "step": 3782 + }, + { + "epoch": 5.7708740234375e-06, + "step": 3782, + "training_step_time": 0.11069869995117188 + }, + { + "epoch": 5.77239990234375e-06, + "model_forward_time": 0.02854323387145996, + "step": 3783 + }, + { + "epoch": 5.77239990234375e-06, + "step": 3783, + "training_step_time": 0.11237072944641113 + }, + { + "epoch": 5.77392578125e-06, + "model_forward_time": 0.025372743606567383, + "step": 3784 + }, + { + "epoch": 5.77392578125e-06, + "step": 3784, + "training_step_time": 0.10759711265563965 + }, + { + "epoch": 5.77545166015625e-06, + "model_forward_time": 0.025457382202148438, + "step": 3785 + }, + { + "epoch": 5.77545166015625e-06, + "step": 3785, + "training_step_time": 0.13559818267822266 + }, + { + "epoch": 5.7769775390625e-06, + "model_forward_time": 0.02561330795288086, + "step": 3786 + }, + { + "epoch": 5.7769775390625e-06, + "step": 3786, + "training_step_time": 0.10781168937683105 + }, + { + "epoch": 5.77850341796875e-06, + "model_forward_time": 0.025604724884033203, + "step": 3787 + }, + { + "epoch": 5.77850341796875e-06, + "step": 3787, + "training_step_time": 0.1326909065246582 + }, + { + "epoch": 5.780029296875e-06, + "model_forward_time": 0.025356531143188477, + "step": 3788 + }, + { + "epoch": 5.780029296875e-06, + "step": 3788, + "training_step_time": 0.19426393508911133 + }, + { + "epoch": 5.78155517578125e-06, + "model_forward_time": 0.02521491050720215, + "step": 3789 + }, + { + "epoch": 5.78155517578125e-06, + "step": 3789, + "training_step_time": 0.19021892547607422 + }, + { + "epoch": 5.7830810546875e-06, + "grad_norm": 0.3816434442996979, + "learning_rate": 9.841542192879762e-05, + "loss": 0.0942, + "step": 3790 + }, + { + "epoch": 5.7830810546875e-06, + "model_forward_time": 0.026805639266967773, + "step": 3790 + }, + { + "epoch": 5.7830810546875e-06, + "step": 3790, + "training_step_time": 0.1793689727783203 + }, + { + "epoch": 5.78460693359375e-06, + "model_forward_time": 0.02460956573486328, + "step": 3791 + }, + { + "epoch": 5.78460693359375e-06, + "step": 3791, + "training_step_time": 0.19040393829345703 + }, + { + "epoch": 5.7861328125e-06, + "model_forward_time": 0.025027990341186523, + "step": 3792 + }, + { + "epoch": 5.7861328125e-06, + "step": 3792, + "training_step_time": 0.13988256454467773 + }, + { + "epoch": 5.78765869140625e-06, + "model_forward_time": 0.02476334571838379, + "step": 3793 + }, + { + "epoch": 5.78765869140625e-06, + "step": 3793, + "training_step_time": 0.18755531311035156 + }, + { + "epoch": 5.7891845703125e-06, + "model_forward_time": 0.024788856506347656, + "step": 3794 + }, + { + "epoch": 5.7891845703125e-06, + "step": 3794, + "training_step_time": 0.1333937644958496 + }, + { + "epoch": 5.79071044921875e-06, + "model_forward_time": 0.02428746223449707, + "step": 3795 + }, + { + "epoch": 5.79071044921875e-06, + "step": 3795, + "training_step_time": 0.12100100517272949 + }, + { + "epoch": 5.792236328125e-06, + "model_forward_time": 0.025979042053222656, + "step": 3796 + }, + { + "epoch": 5.792236328125e-06, + "step": 3796, + "training_step_time": 0.11063575744628906 + }, + { + "epoch": 5.79376220703125e-06, + "model_forward_time": 0.025365591049194336, + "step": 3797 + }, + { + "epoch": 5.79376220703125e-06, + "step": 3797, + "training_step_time": 0.10877561569213867 + }, + { + "epoch": 5.7952880859375e-06, + "model_forward_time": 0.025317907333374023, + "step": 3798 + }, + { + "epoch": 5.7952880859375e-06, + "step": 3798, + "training_step_time": 0.11102175712585449 + }, + { + "epoch": 5.79681396484375e-06, + "model_forward_time": 0.024837017059326172, + "step": 3799 + }, + { + "epoch": 5.79681396484375e-06, + "step": 3799, + "training_step_time": 0.10570573806762695 + }, + { + "epoch": 5.79833984375e-06, + "grad_norm": 0.8299492001533508, + "learning_rate": 9.840162697186075e-05, + "loss": 0.1299, + "step": 3800 + }, + { + "epoch": 5.79833984375e-06, + "model_forward_time": 0.02722620964050293, + "step": 3800 + }, + { + "epoch": 5.79833984375e-06, + "step": 3800, + "training_step_time": 0.10987448692321777 + }, + { + "epoch": 5.79986572265625e-06, + "model_forward_time": 0.02478814125061035, + "step": 3801 + }, + { + "epoch": 5.79986572265625e-06, + "step": 3801, + "training_step_time": 0.10673379898071289 + }, + { + "epoch": 5.8013916015625e-06, + "model_forward_time": 0.025382041931152344, + "step": 3802 + }, + { + "epoch": 5.8013916015625e-06, + "step": 3802, + "training_step_time": 0.11433601379394531 + }, + { + "epoch": 5.80291748046875e-06, + "model_forward_time": 0.026944637298583984, + "step": 3803 + }, + { + "epoch": 5.80291748046875e-06, + "step": 3803, + "training_step_time": 0.11736917495727539 + }, + { + "epoch": 5.804443359375e-06, + "model_forward_time": 0.025789260864257812, + "step": 3804 + }, + { + "epoch": 5.804443359375e-06, + "step": 3804, + "training_step_time": 0.21269869804382324 + }, + { + "epoch": 5.80596923828125e-06, + "model_forward_time": 0.02446603775024414, + "step": 3805 + }, + { + "epoch": 5.80596923828125e-06, + "step": 3805, + "training_step_time": 0.11800098419189453 + }, + { + "epoch": 5.8074951171875e-06, + "model_forward_time": 0.024841785430908203, + "step": 3806 + }, + { + "epoch": 5.8074951171875e-06, + "step": 3806, + "training_step_time": 0.1126713752746582 + }, + { + "epoch": 5.80902099609375e-06, + "model_forward_time": 0.02555990219116211, + "step": 3807 + }, + { + "epoch": 5.80902099609375e-06, + "step": 3807, + "training_step_time": 0.10752463340759277 + }, + { + "epoch": 5.810546875e-06, + "model_forward_time": 0.025375843048095703, + "step": 3808 + }, + { + "epoch": 5.810546875e-06, + "step": 3808, + "training_step_time": 0.10749697685241699 + }, + { + "epoch": 5.81207275390625e-06, + "model_forward_time": 0.02549290657043457, + "step": 3809 + }, + { + "epoch": 5.81207275390625e-06, + "step": 3809, + "training_step_time": 0.11079144477844238 + }, + { + "epoch": 5.8135986328125e-06, + "grad_norm": 0.49644649028778076, + "learning_rate": 9.838777320238312e-05, + "loss": 0.0955, + "step": 3810 + }, + { + "epoch": 5.8135986328125e-06, + "model_forward_time": 0.02574944496154785, + "step": 3810 + }, + { + "epoch": 5.8135986328125e-06, + "step": 3810, + "training_step_time": 0.12449502944946289 + }, + { + "epoch": 5.81512451171875e-06, + "model_forward_time": 0.025311946868896484, + "step": 3811 + }, + { + "epoch": 5.81512451171875e-06, + "step": 3811, + "training_step_time": 0.1859893798828125 + }, + { + "epoch": 5.816650390625e-06, + "model_forward_time": 0.025127649307250977, + "step": 3812 + }, + { + "epoch": 5.816650390625e-06, + "step": 3812, + "training_step_time": 0.16502833366394043 + }, + { + "epoch": 5.81817626953125e-06, + "model_forward_time": 0.024425983428955078, + "step": 3813 + }, + { + "epoch": 5.81817626953125e-06, + "step": 3813, + "training_step_time": 0.13900041580200195 + }, + { + "epoch": 5.8197021484375e-06, + "model_forward_time": 0.02517104148864746, + "step": 3814 + }, + { + "epoch": 5.8197021484375e-06, + "step": 3814, + "training_step_time": 0.14062261581420898 + }, + { + "epoch": 5.82122802734375e-06, + "model_forward_time": 0.02514815330505371, + "step": 3815 + }, + { + "epoch": 5.82122802734375e-06, + "step": 3815, + "training_step_time": 0.12988805770874023 + }, + { + "epoch": 5.82275390625e-06, + "model_forward_time": 0.025272846221923828, + "step": 3816 + }, + { + "epoch": 5.82275390625e-06, + "step": 3816, + "training_step_time": 0.1257495880126953 + }, + { + "epoch": 5.82427978515625e-06, + "model_forward_time": 0.02889394760131836, + "step": 3817 + }, + { + "epoch": 5.82427978515625e-06, + "step": 3817, + "training_step_time": 0.11177563667297363 + }, + { + "epoch": 5.8258056640625e-06, + "model_forward_time": 0.025252342224121094, + "step": 3818 + }, + { + "epoch": 5.8258056640625e-06, + "step": 3818, + "training_step_time": 0.10793757438659668 + }, + { + "epoch": 5.82733154296875e-06, + "model_forward_time": 0.02523183822631836, + "step": 3819 + }, + { + "epoch": 5.82733154296875e-06, + "step": 3819, + "training_step_time": 0.1123511791229248 + }, + { + "epoch": 5.828857421875e-06, + "grad_norm": 0.6049166321754456, + "learning_rate": 9.83738606371984e-05, + "loss": 0.1118, + "step": 3820 + }, + { + "epoch": 5.828857421875e-06, + "model_forward_time": 0.024628877639770508, + "step": 3820 + }, + { + "epoch": 5.828857421875e-06, + "step": 3820, + "training_step_time": 0.10770964622497559 + }, + { + "epoch": 5.83038330078125e-06, + "model_forward_time": 0.02467513084411621, + "step": 3821 + }, + { + "epoch": 5.83038330078125e-06, + "step": 3821, + "training_step_time": 0.10818028450012207 + }, + { + "epoch": 5.8319091796875e-06, + "model_forward_time": 0.024209022521972656, + "step": 3822 + }, + { + "epoch": 5.8319091796875e-06, + "step": 3822, + "training_step_time": 0.11073660850524902 + }, + { + "epoch": 5.83343505859375e-06, + "model_forward_time": 0.024425983428955078, + "step": 3823 + }, + { + "epoch": 5.83343505859375e-06, + "step": 3823, + "training_step_time": 0.10981321334838867 + }, + { + "epoch": 5.8349609375e-06, + "model_forward_time": 0.02442169189453125, + "step": 3824 + }, + { + "epoch": 5.8349609375e-06, + "step": 3824, + "training_step_time": 0.10732245445251465 + }, + { + "epoch": 5.83648681640625e-06, + "model_forward_time": 0.024677753448486328, + "step": 3825 + }, + { + "epoch": 5.83648681640625e-06, + "step": 3825, + "training_step_time": 0.11009669303894043 + }, + { + "epoch": 5.8380126953125e-06, + "model_forward_time": 0.024405956268310547, + "step": 3826 + }, + { + "epoch": 5.8380126953125e-06, + "step": 3826, + "training_step_time": 0.11384105682373047 + }, + { + "epoch": 5.83953857421875e-06, + "model_forward_time": 0.024467945098876953, + "step": 3827 + }, + { + "epoch": 5.83953857421875e-06, + "step": 3827, + "training_step_time": 0.12632226943969727 + }, + { + "epoch": 5.841064453125e-06, + "model_forward_time": 0.024417400360107422, + "step": 3828 + }, + { + "epoch": 5.841064453125e-06, + "step": 3828, + "training_step_time": 0.15962743759155273 + }, + { + "epoch": 5.84259033203125e-06, + "model_forward_time": 0.023672819137573242, + "step": 3829 + }, + { + "epoch": 5.84259033203125e-06, + "step": 3829, + "training_step_time": 0.24622869491577148 + }, + { + "epoch": 5.8441162109375e-06, + "grad_norm": 0.5268808603286743, + "learning_rate": 9.835988929321165e-05, + "loss": 0.093, + "step": 3830 + }, + { + "epoch": 5.8441162109375e-06, + "model_forward_time": 0.02365422248840332, + "step": 3830 + }, + { + "epoch": 5.8441162109375e-06, + "step": 3830, + "training_step_time": 0.18918681144714355 + }, + { + "epoch": 5.84564208984375e-06, + "model_forward_time": 0.023884057998657227, + "step": 3831 + }, + { + "epoch": 5.84564208984375e-06, + "step": 3831, + "training_step_time": 0.273115873336792 + }, + { + "epoch": 5.84716796875e-06, + "model_forward_time": 0.02313375473022461, + "step": 3832 + }, + { + "epoch": 5.84716796875e-06, + "step": 3832, + "training_step_time": 0.19476556777954102 + }, + { + "epoch": 5.84869384765625e-06, + "model_forward_time": 0.02361607551574707, + "step": 3833 + }, + { + "epoch": 5.84869384765625e-06, + "step": 3833, + "training_step_time": 0.2110886573791504 + }, + { + "epoch": 5.8502197265625e-06, + "model_forward_time": 0.0235598087310791, + "step": 3834 + }, + { + "epoch": 5.8502197265625e-06, + "step": 3834, + "training_step_time": 0.1524813175201416 + }, + { + "epoch": 5.85174560546875e-06, + "model_forward_time": 0.02361130714416504, + "step": 3835 + }, + { + "epoch": 5.85174560546875e-06, + "step": 3835, + "training_step_time": 0.15381860733032227 + }, + { + "epoch": 5.853271484375e-06, + "model_forward_time": 0.02613091468811035, + "step": 3836 + }, + { + "epoch": 5.853271484375e-06, + "step": 3836, + "training_step_time": 0.11510372161865234 + }, + { + "epoch": 5.85479736328125e-06, + "model_forward_time": 0.02428603172302246, + "step": 3837 + }, + { + "epoch": 5.85479736328125e-06, + "step": 3837, + "training_step_time": 0.10776138305664062 + }, + { + "epoch": 5.8563232421875e-06, + "model_forward_time": 0.02475881576538086, + "step": 3838 + }, + { + "epoch": 5.8563232421875e-06, + "step": 3838, + "training_step_time": 0.10681581497192383 + }, + { + "epoch": 5.85784912109375e-06, + "model_forward_time": 0.026027441024780273, + "step": 3839 + }, + { + "epoch": 5.85784912109375e-06, + "step": 3839, + "training_step_time": 0.10921716690063477 + }, + { + "epoch": 5.859375e-06, + "grad_norm": 0.4340176582336426, + "learning_rate": 9.834585918739936e-05, + "loss": 0.0772, + "step": 3840 + }, + { + "epoch": 5.859375e-06, + "model_forward_time": 0.024509429931640625, + "step": 3840 + }, + { + "epoch": 5.859375e-06, + "step": 3840, + "training_step_time": 0.10770916938781738 + }, + { + "epoch": 5.86090087890625e-06, + "model_forward_time": 0.026287555694580078, + "step": 3841 + }, + { + "epoch": 5.86090087890625e-06, + "step": 3841, + "training_step_time": 0.10759139060974121 + }, + { + "epoch": 5.8624267578125e-06, + "model_forward_time": 0.024283885955810547, + "step": 3842 + }, + { + "epoch": 5.8624267578125e-06, + "step": 3842, + "training_step_time": 0.10953569412231445 + }, + { + "epoch": 5.86395263671875e-06, + "model_forward_time": 0.02478623390197754, + "step": 3843 + }, + { + "epoch": 5.86395263671875e-06, + "step": 3843, + "training_step_time": 0.11611151695251465 + }, + { + "epoch": 5.865478515625e-06, + "model_forward_time": 0.02423691749572754, + "step": 3844 + }, + { + "epoch": 5.865478515625e-06, + "step": 3844, + "training_step_time": 0.11401200294494629 + }, + { + "epoch": 5.86700439453125e-06, + "model_forward_time": 0.02481245994567871, + "step": 3845 + }, + { + "epoch": 5.86700439453125e-06, + "step": 3845, + "training_step_time": 0.1250908374786377 + }, + { + "epoch": 5.8685302734375e-06, + "model_forward_time": 0.025206804275512695, + "step": 3846 + }, + { + "epoch": 5.8685302734375e-06, + "step": 3846, + "training_step_time": 0.13276171684265137 + }, + { + "epoch": 5.87005615234375e-06, + "model_forward_time": 0.02493739128112793, + "step": 3847 + }, + { + "epoch": 5.87005615234375e-06, + "step": 3847, + "training_step_time": 0.10967016220092773 + }, + { + "epoch": 5.87158203125e-06, + "model_forward_time": 0.025101423263549805, + "step": 3848 + }, + { + "epoch": 5.87158203125e-06, + "step": 3848, + "training_step_time": 0.11724591255187988 + }, + { + "epoch": 5.87310791015625e-06, + "model_forward_time": 0.02477860450744629, + "step": 3849 + }, + { + "epoch": 5.87310791015625e-06, + "step": 3849, + "training_step_time": 0.10789847373962402 + }, + { + "epoch": 5.8746337890625e-06, + "grad_norm": 0.5973440408706665, + "learning_rate": 9.833177033680944e-05, + "loss": 0.1038, + "step": 3850 + }, + { + "epoch": 5.8746337890625e-06, + "model_forward_time": 0.024653196334838867, + "step": 3850 + }, + { + "epoch": 5.8746337890625e-06, + "step": 3850, + "training_step_time": 0.10861515998840332 + }, + { + "epoch": 5.87615966796875e-06, + "model_forward_time": 0.024451255798339844, + "step": 3851 + }, + { + "epoch": 5.87615966796875e-06, + "step": 3851, + "training_step_time": 0.1444549560546875 + }, + { + "epoch": 5.877685546875e-06, + "model_forward_time": 0.02789020538330078, + "step": 3852 + }, + { + "epoch": 5.877685546875e-06, + "step": 3852, + "training_step_time": 0.12726712226867676 + }, + { + "epoch": 5.87921142578125e-06, + "model_forward_time": 0.02399921417236328, + "step": 3853 + }, + { + "epoch": 5.87921142578125e-06, + "step": 3853, + "training_step_time": 0.1215059757232666 + }, + { + "epoch": 5.8807373046875e-06, + "model_forward_time": 0.024276256561279297, + "step": 3854 + }, + { + "epoch": 5.8807373046875e-06, + "step": 3854, + "training_step_time": 0.12537002563476562 + }, + { + "epoch": 5.88226318359375e-06, + "model_forward_time": 0.024267196655273438, + "step": 3855 + }, + { + "epoch": 5.88226318359375e-06, + "step": 3855, + "training_step_time": 0.11988329887390137 + }, + { + "epoch": 5.8837890625e-06, + "model_forward_time": 0.024638652801513672, + "step": 3856 + }, + { + "epoch": 5.8837890625e-06, + "step": 3856, + "training_step_time": 0.11623358726501465 + }, + { + "epoch": 5.88531494140625e-06, + "model_forward_time": 0.024324893951416016, + "step": 3857 + }, + { + "epoch": 5.88531494140625e-06, + "step": 3857, + "training_step_time": 0.1108846664428711 + }, + { + "epoch": 5.8868408203125e-06, + "model_forward_time": 0.02426743507385254, + "step": 3858 + }, + { + "epoch": 5.8868408203125e-06, + "step": 3858, + "training_step_time": 0.11066842079162598 + }, + { + "epoch": 5.88836669921875e-06, + "model_forward_time": 0.024240493774414062, + "step": 3859 + }, + { + "epoch": 5.88836669921875e-06, + "step": 3859, + "training_step_time": 0.11228013038635254 + }, + { + "epoch": 5.889892578125e-06, + "grad_norm": 0.4913652241230011, + "learning_rate": 9.831762275856118e-05, + "loss": 0.093, + "step": 3860 + }, + { + "epoch": 5.889892578125e-06, + "model_forward_time": 0.024737119674682617, + "step": 3860 + }, + { + "epoch": 5.889892578125e-06, + "step": 3860, + "training_step_time": 0.11471986770629883 + }, + { + "epoch": 5.89141845703125e-06, + "model_forward_time": 0.024394750595092773, + "step": 3861 + }, + { + "epoch": 5.89141845703125e-06, + "step": 3861, + "training_step_time": 0.10892915725708008 + }, + { + "epoch": 5.8929443359375e-06, + "model_forward_time": 0.024424076080322266, + "step": 3862 + }, + { + "epoch": 5.8929443359375e-06, + "step": 3862, + "training_step_time": 0.10992598533630371 + }, + { + "epoch": 5.89447021484375e-06, + "model_forward_time": 0.024509191513061523, + "step": 3863 + }, + { + "epoch": 5.89447021484375e-06, + "step": 3863, + "training_step_time": 0.10797333717346191 + }, + { + "epoch": 5.89599609375e-06, + "model_forward_time": 0.025216102600097656, + "step": 3864 + }, + { + "epoch": 5.89599609375e-06, + "step": 3864, + "training_step_time": 0.10838007926940918 + }, + { + "epoch": 5.89752197265625e-06, + "model_forward_time": 0.024764537811279297, + "step": 3865 + }, + { + "epoch": 5.89752197265625e-06, + "step": 3865, + "training_step_time": 0.11017775535583496 + }, + { + "epoch": 5.8990478515625e-06, + "model_forward_time": 0.024631738662719727, + "step": 3866 + }, + { + "epoch": 5.8990478515625e-06, + "step": 3866, + "training_step_time": 0.10675692558288574 + }, + { + "epoch": 5.90057373046875e-06, + "model_forward_time": 0.024269819259643555, + "step": 3867 + }, + { + "epoch": 5.90057373046875e-06, + "step": 3867, + "training_step_time": 0.10904502868652344 + }, + { + "epoch": 5.902099609375e-06, + "model_forward_time": 0.025847434997558594, + "step": 3868 + }, + { + "epoch": 5.902099609375e-06, + "step": 3868, + "training_step_time": 0.11874961853027344 + }, + { + "epoch": 5.90362548828125e-06, + "model_forward_time": 0.024465322494506836, + "step": 3869 + }, + { + "epoch": 5.90362548828125e-06, + "step": 3869, + "training_step_time": 0.10709261894226074 + }, + { + "epoch": 5.9051513671875e-06, + "grad_norm": 0.3654991686344147, + "learning_rate": 9.830341646984521e-05, + "loss": 0.0947, + "step": 3870 + }, + { + "epoch": 5.9051513671875e-06, + "model_forward_time": 0.024537324905395508, + "step": 3870 + }, + { + "epoch": 5.9051513671875e-06, + "step": 3870, + "training_step_time": 0.10721540451049805 + }, + { + "epoch": 5.90667724609375e-06, + "model_forward_time": 0.025421619415283203, + "step": 3871 + }, + { + "epoch": 5.90667724609375e-06, + "step": 3871, + "training_step_time": 0.10730719566345215 + }, + { + "epoch": 5.908203125e-06, + "model_forward_time": 0.02541041374206543, + "step": 3872 + }, + { + "epoch": 5.908203125e-06, + "step": 3872, + "training_step_time": 0.13470244407653809 + }, + { + "epoch": 5.90972900390625e-06, + "model_forward_time": 0.025674104690551758, + "step": 3873 + }, + { + "epoch": 5.90972900390625e-06, + "step": 3873, + "training_step_time": 0.11907005310058594 + }, + { + "epoch": 5.9112548828125e-06, + "model_forward_time": 0.025252342224121094, + "step": 3874 + }, + { + "epoch": 5.9112548828125e-06, + "step": 3874, + "training_step_time": 0.13324189186096191 + }, + { + "epoch": 5.91278076171875e-06, + "model_forward_time": 0.025063037872314453, + "step": 3875 + }, + { + "epoch": 5.91278076171875e-06, + "step": 3875, + "training_step_time": 0.13765406608581543 + }, + { + "epoch": 5.914306640625e-06, + "model_forward_time": 0.02518320083618164, + "step": 3876 + }, + { + "epoch": 5.914306640625e-06, + "step": 3876, + "training_step_time": 0.1413424015045166 + }, + { + "epoch": 5.91583251953125e-06, + "model_forward_time": 0.025068283081054688, + "step": 3877 + }, + { + "epoch": 5.91583251953125e-06, + "step": 3877, + "training_step_time": 0.20303750038146973 + }, + { + "epoch": 5.9173583984375e-06, + "model_forward_time": 0.024590492248535156, + "step": 3878 + }, + { + "epoch": 5.9173583984375e-06, + "step": 3878, + "training_step_time": 0.13485956192016602 + }, + { + "epoch": 5.91888427734375e-06, + "model_forward_time": 0.02463674545288086, + "step": 3879 + }, + { + "epoch": 5.91888427734375e-06, + "step": 3879, + "training_step_time": 0.11011409759521484 + }, + { + "epoch": 5.92041015625e-06, + "grad_norm": 0.286504864692688, + "learning_rate": 9.828915148792352e-05, + "loss": 0.0737, + "step": 3880 + }, + { + "epoch": 5.92041015625e-06, + "model_forward_time": 0.025462865829467773, + "step": 3880 + }, + { + "epoch": 5.92041015625e-06, + "step": 3880, + "training_step_time": 0.11486101150512695 + }, + { + "epoch": 5.92193603515625e-06, + "model_forward_time": 0.025619983673095703, + "step": 3881 + }, + { + "epoch": 5.92193603515625e-06, + "step": 3881, + "training_step_time": 0.10775518417358398 + }, + { + "epoch": 5.9234619140625e-06, + "model_forward_time": 0.02527928352355957, + "step": 3882 + }, + { + "epoch": 5.9234619140625e-06, + "step": 3882, + "training_step_time": 0.1199493408203125 + }, + { + "epoch": 5.92498779296875e-06, + "model_forward_time": 0.0256345272064209, + "step": 3883 + }, + { + "epoch": 5.92498779296875e-06, + "step": 3883, + "training_step_time": 0.1963350772857666 + }, + { + "epoch": 5.926513671875e-06, + "model_forward_time": 0.02484750747680664, + "step": 3884 + }, + { + "epoch": 5.926513671875e-06, + "step": 3884, + "training_step_time": 0.10986566543579102 + }, + { + "epoch": 5.92803955078125e-06, + "model_forward_time": 0.025430679321289062, + "step": 3885 + }, + { + "epoch": 5.92803955078125e-06, + "step": 3885, + "training_step_time": 0.1064310073852539 + }, + { + "epoch": 5.9295654296875e-06, + "model_forward_time": 0.025760650634765625, + "step": 3886 + }, + { + "epoch": 5.9295654296875e-06, + "step": 3886, + "training_step_time": 0.11071205139160156 + }, + { + "epoch": 5.93109130859375e-06, + "model_forward_time": 0.02566838264465332, + "step": 3887 + }, + { + "epoch": 5.93109130859375e-06, + "step": 3887, + "training_step_time": 0.10995817184448242 + }, + { + "epoch": 5.9326171875e-06, + "model_forward_time": 0.024211883544921875, + "step": 3888 + }, + { + "epoch": 5.9326171875e-06, + "step": 3888, + "training_step_time": 0.10772705078125 + }, + { + "epoch": 5.93414306640625e-06, + "model_forward_time": 0.02414703369140625, + "step": 3889 + }, + { + "epoch": 5.93414306640625e-06, + "step": 3889, + "training_step_time": 0.1589820384979248 + }, + { + "epoch": 5.9356689453125e-06, + "grad_norm": 0.4652571976184845, + "learning_rate": 9.82748278301294e-05, + "loss": 0.1036, + "step": 3890 + }, + { + "epoch": 5.9356689453125e-06, + "model_forward_time": 0.02413654327392578, + "step": 3890 + }, + { + "epoch": 5.9356689453125e-06, + "step": 3890, + "training_step_time": 0.1816256046295166 + }, + { + "epoch": 5.93719482421875e-06, + "model_forward_time": 0.023906946182250977, + "step": 3891 + }, + { + "epoch": 5.93719482421875e-06, + "step": 3891, + "training_step_time": 0.1257925033569336 + }, + { + "epoch": 5.938720703125e-06, + "model_forward_time": 0.023796558380126953, + "step": 3892 + }, + { + "epoch": 5.938720703125e-06, + "step": 3892, + "training_step_time": 0.13409900665283203 + }, + { + "epoch": 5.94024658203125e-06, + "model_forward_time": 0.024081707000732422, + "step": 3893 + }, + { + "epoch": 5.94024658203125e-06, + "step": 3893, + "training_step_time": 0.21071481704711914 + }, + { + "epoch": 5.9417724609375e-06, + "model_forward_time": 0.024678945541381836, + "step": 3894 + }, + { + "epoch": 5.9417724609375e-06, + "step": 3894, + "training_step_time": 0.11780500411987305 + }, + { + "epoch": 5.94329833984375e-06, + "model_forward_time": 0.025034427642822266, + "step": 3895 + }, + { + "epoch": 5.94329833984375e-06, + "step": 3895, + "training_step_time": 0.1179201602935791 + }, + { + "epoch": 5.94482421875e-06, + "model_forward_time": 0.025400638580322266, + "step": 3896 + }, + { + "epoch": 5.94482421875e-06, + "step": 3896, + "training_step_time": 0.11397099494934082 + }, + { + "epoch": 5.94635009765625e-06, + "model_forward_time": 0.0254518985748291, + "step": 3897 + }, + { + "epoch": 5.94635009765625e-06, + "step": 3897, + "training_step_time": 0.11244606971740723 + }, + { + "epoch": 5.9478759765625e-06, + "model_forward_time": 0.02551722526550293, + "step": 3898 + }, + { + "epoch": 5.9478759765625e-06, + "step": 3898, + "training_step_time": 0.11413455009460449 + }, + { + "epoch": 5.94940185546875e-06, + "model_forward_time": 0.02581644058227539, + "step": 3899 + }, + { + "epoch": 5.94940185546875e-06, + "step": 3899, + "training_step_time": 0.10853981971740723 + }, + { + "epoch": 5.950927734375e-06, + "grad_norm": 0.6859496235847473, + "learning_rate": 9.826044551386744e-05, + "loss": 0.1011, + "step": 3900 + }, + { + "epoch": 5.950927734375e-06, + "model_forward_time": 0.02557682991027832, + "step": 3900 + }, + { + "epoch": 5.950927734375e-06, + "step": 3900, + "training_step_time": 0.10816025733947754 + }, + { + "epoch": 5.95245361328125e-06, + "model_forward_time": 0.025686264038085938, + "step": 3901 + }, + { + "epoch": 5.95245361328125e-06, + "step": 3901, + "training_step_time": 0.10817980766296387 + }, + { + "epoch": 5.9539794921875e-06, + "model_forward_time": 0.02534937858581543, + "step": 3902 + }, + { + "epoch": 5.9539794921875e-06, + "step": 3902, + "training_step_time": 0.10775613784790039 + }, + { + "epoch": 5.95550537109375e-06, + "model_forward_time": 0.025724172592163086, + "step": 3903 + }, + { + "epoch": 5.95550537109375e-06, + "step": 3903, + "training_step_time": 0.10944342613220215 + }, + { + "epoch": 5.95703125e-06, + "model_forward_time": 0.026932239532470703, + "step": 3904 + }, + { + "epoch": 5.95703125e-06, + "step": 3904, + "training_step_time": 0.11585474014282227 + }, + { + "epoch": 5.95855712890625e-06, + "model_forward_time": 0.025278329849243164, + "step": 3905 + }, + { + "epoch": 5.95855712890625e-06, + "step": 3905, + "training_step_time": 0.11381697654724121 + }, + { + "epoch": 5.9600830078125e-06, + "model_forward_time": 0.025643587112426758, + "step": 3906 + }, + { + "epoch": 5.9600830078125e-06, + "step": 3906, + "training_step_time": 0.10903525352478027 + }, + { + "epoch": 5.96160888671875e-06, + "model_forward_time": 0.025649309158325195, + "step": 3907 + }, + { + "epoch": 5.96160888671875e-06, + "step": 3907, + "training_step_time": 0.11130738258361816 + }, + { + "epoch": 5.963134765625e-06, + "model_forward_time": 0.02526068687438965, + "step": 3908 + }, + { + "epoch": 5.963134765625e-06, + "step": 3908, + "training_step_time": 0.10825371742248535 + }, + { + "epoch": 5.96466064453125e-06, + "model_forward_time": 0.025218725204467773, + "step": 3909 + }, + { + "epoch": 5.96466064453125e-06, + "step": 3909, + "training_step_time": 0.10562324523925781 + }, + { + "epoch": 5.9661865234375e-06, + "grad_norm": 0.5915921330451965, + "learning_rate": 9.824600455661353e-05, + "loss": 0.0832, + "step": 3910 + }, + { + "epoch": 5.9661865234375e-06, + "model_forward_time": 0.025377511978149414, + "step": 3910 + }, + { + "epoch": 5.9661865234375e-06, + "step": 3910, + "training_step_time": 0.10834217071533203 + }, + { + "epoch": 5.96771240234375e-06, + "model_forward_time": 0.025218963623046875, + "step": 3911 + }, + { + "epoch": 5.96771240234375e-06, + "step": 3911, + "training_step_time": 0.10789036750793457 + }, + { + "epoch": 5.96923828125e-06, + "model_forward_time": 0.025722265243530273, + "step": 3912 + }, + { + "epoch": 5.96923828125e-06, + "step": 3912, + "training_step_time": 0.11448001861572266 + }, + { + "epoch": 5.97076416015625e-06, + "model_forward_time": 0.025814056396484375, + "step": 3913 + }, + { + "epoch": 5.97076416015625e-06, + "step": 3913, + "training_step_time": 0.10883164405822754 + }, + { + "epoch": 5.9722900390625e-06, + "model_forward_time": 0.02528858184814453, + "step": 3914 + }, + { + "epoch": 5.9722900390625e-06, + "step": 3914, + "training_step_time": 0.10640740394592285 + }, + { + "epoch": 5.97381591796875e-06, + "model_forward_time": 0.025534629821777344, + "step": 3915 + }, + { + "epoch": 5.97381591796875e-06, + "step": 3915, + "training_step_time": 0.11209321022033691 + }, + { + "epoch": 5.975341796875e-06, + "model_forward_time": 0.025533437728881836, + "step": 3916 + }, + { + "epoch": 5.975341796875e-06, + "step": 3916, + "training_step_time": 0.11191463470458984 + }, + { + "epoch": 5.97686767578125e-06, + "model_forward_time": 0.025388479232788086, + "step": 3917 + }, + { + "epoch": 5.97686767578125e-06, + "step": 3917, + "training_step_time": 0.1418931484222412 + }, + { + "epoch": 5.9783935546875e-06, + "model_forward_time": 0.024953842163085938, + "step": 3918 + }, + { + "epoch": 5.9783935546875e-06, + "step": 3918, + "training_step_time": 0.1179511547088623 + }, + { + "epoch": 5.97991943359375e-06, + "model_forward_time": 0.02536630630493164, + "step": 3919 + }, + { + "epoch": 5.97991943359375e-06, + "step": 3919, + "training_step_time": 0.12344479560852051 + }, + { + "epoch": 5.9814453125e-06, + "grad_norm": 0.3807622790336609, + "learning_rate": 9.823150497591476e-05, + "loss": 0.093, + "step": 3920 + }, + { + "epoch": 5.9814453125e-06, + "model_forward_time": 0.028074264526367188, + "step": 3920 + }, + { + "epoch": 5.9814453125e-06, + "step": 3920, + "training_step_time": 0.16993212699890137 + }, + { + "epoch": 5.98297119140625e-06, + "model_forward_time": 0.02316451072692871, + "step": 3921 + }, + { + "epoch": 5.98297119140625e-06, + "step": 3921, + "training_step_time": 0.21246814727783203 + }, + { + "epoch": 5.9844970703125e-06, + "model_forward_time": 0.024144649505615234, + "step": 3922 + }, + { + "epoch": 5.9844970703125e-06, + "step": 3922, + "training_step_time": 0.11600542068481445 + }, + { + "epoch": 5.98602294921875e-06, + "model_forward_time": 0.023773670196533203, + "step": 3923 + }, + { + "epoch": 5.98602294921875e-06, + "step": 3923, + "training_step_time": 0.13034272193908691 + }, + { + "epoch": 5.987548828125e-06, + "model_forward_time": 0.025002717971801758, + "step": 3924 + }, + { + "epoch": 5.987548828125e-06, + "step": 3924, + "training_step_time": 0.1120913028717041 + }, + { + "epoch": 5.98907470703125e-06, + "model_forward_time": 0.024188995361328125, + "step": 3925 + }, + { + "epoch": 5.98907470703125e-06, + "step": 3925, + "training_step_time": 0.12143492698669434 + }, + { + "epoch": 5.9906005859375e-06, + "model_forward_time": 0.024413585662841797, + "step": 3926 + }, + { + "epoch": 5.9906005859375e-06, + "step": 3926, + "training_step_time": 0.10945916175842285 + }, + { + "epoch": 5.99212646484375e-06, + "model_forward_time": 0.024651288986206055, + "step": 3927 + }, + { + "epoch": 5.99212646484375e-06, + "step": 3927, + "training_step_time": 0.18586015701293945 + }, + { + "epoch": 5.99365234375e-06, + "model_forward_time": 0.02423095703125, + "step": 3928 + }, + { + "epoch": 5.99365234375e-06, + "step": 3928, + "training_step_time": 0.12376689910888672 + }, + { + "epoch": 5.99517822265625e-06, + "model_forward_time": 0.0237886905670166, + "step": 3929 + }, + { + "epoch": 5.99517822265625e-06, + "step": 3929, + "training_step_time": 0.10692405700683594 + }, + { + "epoch": 5.9967041015625e-06, + "grad_norm": 0.431569367647171, + "learning_rate": 9.821694678938953e-05, + "loss": 0.0773, + "step": 3930 + }, + { + "epoch": 5.9967041015625e-06, + "model_forward_time": 0.024218320846557617, + "step": 3930 + }, + { + "epoch": 5.9967041015625e-06, + "step": 3930, + "training_step_time": 0.11264729499816895 + }, + { + "epoch": 5.99822998046875e-06, + "model_forward_time": 0.02421712875366211, + "step": 3931 + }, + { + "epoch": 5.99822998046875e-06, + "step": 3931, + "training_step_time": 0.1077718734741211 + }, + { + "epoch": 5.999755859375e-06, + "model_forward_time": 0.02498316764831543, + "step": 3932 + }, + { + "epoch": 5.999755859375e-06, + "step": 3932, + "training_step_time": 0.1098182201385498 + }, + { + "epoch": 6.00128173828125e-06, + "model_forward_time": 0.02467203140258789, + "step": 3933 + }, + { + "epoch": 6.00128173828125e-06, + "step": 3933, + "training_step_time": 0.10857200622558594 + }, + { + "epoch": 6.0028076171875e-06, + "model_forward_time": 0.02423882484436035, + "step": 3934 + }, + { + "epoch": 6.0028076171875e-06, + "step": 3934, + "training_step_time": 0.11143040657043457 + }, + { + "epoch": 6.00433349609375e-06, + "model_forward_time": 0.02446913719177246, + "step": 3935 + }, + { + "epoch": 6.00433349609375e-06, + "step": 3935, + "training_step_time": 0.10392570495605469 + }, + { + "epoch": 6.005859375e-06, + "model_forward_time": 0.0241239070892334, + "step": 3936 + }, + { + "epoch": 6.005859375e-06, + "step": 3936, + "training_step_time": 0.12522149085998535 + }, + { + "epoch": 6.00738525390625e-06, + "model_forward_time": 0.024126291275024414, + "step": 3937 + }, + { + "epoch": 6.00738525390625e-06, + "step": 3937, + "training_step_time": 0.12706422805786133 + }, + { + "epoch": 6.0089111328125e-06, + "model_forward_time": 0.024190425872802734, + "step": 3938 + }, + { + "epoch": 6.0089111328125e-06, + "step": 3938, + "training_step_time": 0.11108613014221191 + }, + { + "epoch": 6.01043701171875e-06, + "model_forward_time": 0.02630615234375, + "step": 3939 + }, + { + "epoch": 6.01043701171875e-06, + "step": 3939, + "training_step_time": 0.1137075424194336 + }, + { + "epoch": 6.011962890625e-06, + "grad_norm": 0.9006413817405701, + "learning_rate": 9.820233001472738e-05, + "loss": 0.117, + "step": 3940 + }, + { + "epoch": 6.011962890625e-06, + "model_forward_time": 0.024637460708618164, + "step": 3940 + }, + { + "epoch": 6.011962890625e-06, + "step": 3940, + "training_step_time": 0.11525750160217285 + }, + { + "epoch": 6.01348876953125e-06, + "model_forward_time": 0.02448296546936035, + "step": 3941 + }, + { + "epoch": 6.01348876953125e-06, + "step": 3941, + "training_step_time": 0.12529683113098145 + }, + { + "epoch": 6.0150146484375e-06, + "model_forward_time": 0.024582624435424805, + "step": 3942 + }, + { + "epoch": 6.0150146484375e-06, + "step": 3942, + "training_step_time": 0.12486410140991211 + }, + { + "epoch": 6.01654052734375e-06, + "model_forward_time": 0.024431228637695312, + "step": 3943 + }, + { + "epoch": 6.01654052734375e-06, + "step": 3943, + "training_step_time": 0.11432290077209473 + }, + { + "epoch": 6.01806640625e-06, + "model_forward_time": 0.02467179298400879, + "step": 3944 + }, + { + "epoch": 6.01806640625e-06, + "step": 3944, + "training_step_time": 0.11053967475891113 + }, + { + "epoch": 6.01959228515625e-06, + "model_forward_time": 0.02424025535583496, + "step": 3945 + }, + { + "epoch": 6.01959228515625e-06, + "step": 3945, + "training_step_time": 0.11434650421142578 + }, + { + "epoch": 6.0211181640625e-06, + "model_forward_time": 0.02490830421447754, + "step": 3946 + }, + { + "epoch": 6.0211181640625e-06, + "step": 3946, + "training_step_time": 0.10954451560974121 + }, + { + "epoch": 6.02264404296875e-06, + "model_forward_time": 0.024805307388305664, + "step": 3947 + }, + { + "epoch": 6.02264404296875e-06, + "step": 3947, + "training_step_time": 0.11148619651794434 + }, + { + "epoch": 6.024169921875e-06, + "model_forward_time": 0.024556636810302734, + "step": 3948 + }, + { + "epoch": 6.024169921875e-06, + "step": 3948, + "training_step_time": 0.11260271072387695 + }, + { + "epoch": 6.02569580078125e-06, + "model_forward_time": 0.02450084686279297, + "step": 3949 + }, + { + "epoch": 6.02569580078125e-06, + "step": 3949, + "training_step_time": 0.11630487442016602 + }, + { + "epoch": 6.0272216796875e-06, + "grad_norm": 0.5529266595840454, + "learning_rate": 9.818765466968909e-05, + "loss": 0.0928, + "step": 3950 + }, + { + "epoch": 6.0272216796875e-06, + "model_forward_time": 0.0244143009185791, + "step": 3950 + }, + { + "epoch": 6.0272216796875e-06, + "step": 3950, + "training_step_time": 0.11397242546081543 + }, + { + "epoch": 6.02874755859375e-06, + "model_forward_time": 0.0248258113861084, + "step": 3951 + }, + { + "epoch": 6.02874755859375e-06, + "step": 3951, + "training_step_time": 0.1118779182434082 + }, + { + "epoch": 6.0302734375e-06, + "model_forward_time": 0.024424314498901367, + "step": 3952 + }, + { + "epoch": 6.0302734375e-06, + "step": 3952, + "training_step_time": 0.11092114448547363 + }, + { + "epoch": 6.03179931640625e-06, + "model_forward_time": 0.024285078048706055, + "step": 3953 + }, + { + "epoch": 6.03179931640625e-06, + "step": 3953, + "training_step_time": 0.1101067066192627 + }, + { + "epoch": 6.0333251953125e-06, + "model_forward_time": 0.024440765380859375, + "step": 3954 + }, + { + "epoch": 6.0333251953125e-06, + "step": 3954, + "training_step_time": 0.11153817176818848 + }, + { + "epoch": 6.03485107421875e-06, + "model_forward_time": 0.024410486221313477, + "step": 3955 + }, + { + "epoch": 6.03485107421875e-06, + "step": 3955, + "training_step_time": 0.11780023574829102 + }, + { + "epoch": 6.036376953125e-06, + "model_forward_time": 0.024688720703125, + "step": 3956 + }, + { + "epoch": 6.036376953125e-06, + "step": 3956, + "training_step_time": 0.10722041130065918 + }, + { + "epoch": 6.03790283203125e-06, + "model_forward_time": 0.024570465087890625, + "step": 3957 + }, + { + "epoch": 6.03790283203125e-06, + "step": 3957, + "training_step_time": 0.11048007011413574 + }, + { + "epoch": 6.0394287109375e-06, + "model_forward_time": 0.024798870086669922, + "step": 3958 + }, + { + "epoch": 6.0394287109375e-06, + "step": 3958, + "training_step_time": 0.11306428909301758 + }, + { + "epoch": 6.04095458984375e-06, + "model_forward_time": 0.02511310577392578, + "step": 3959 + }, + { + "epoch": 6.04095458984375e-06, + "step": 3959, + "training_step_time": 0.10807299613952637 + }, + { + "epoch": 6.04248046875e-06, + "grad_norm": 0.26172083616256714, + "learning_rate": 9.817292077210659e-05, + "loss": 0.1002, + "step": 3960 + }, + { + "epoch": 6.04248046875e-06, + "model_forward_time": 0.024895906448364258, + "step": 3960 + }, + { + "epoch": 6.04248046875e-06, + "step": 3960, + "training_step_time": 0.11130285263061523 + }, + { + "epoch": 6.04400634765625e-06, + "model_forward_time": 0.02480936050415039, + "step": 3961 + }, + { + "epoch": 6.04400634765625e-06, + "step": 3961, + "training_step_time": 0.11148691177368164 + }, + { + "epoch": 6.0455322265625e-06, + "model_forward_time": 0.024486064910888672, + "step": 3962 + }, + { + "epoch": 6.0455322265625e-06, + "step": 3962, + "training_step_time": 0.10804605484008789 + }, + { + "epoch": 6.04705810546875e-06, + "model_forward_time": 0.024596452713012695, + "step": 3963 + }, + { + "epoch": 6.04705810546875e-06, + "step": 3963, + "training_step_time": 0.11092400550842285 + }, + { + "epoch": 6.048583984375e-06, + "model_forward_time": 0.02443718910217285, + "step": 3964 + }, + { + "epoch": 6.048583984375e-06, + "step": 3964, + "training_step_time": 0.11322522163391113 + }, + { + "epoch": 6.05010986328125e-06, + "model_forward_time": 0.024370670318603516, + "step": 3965 + }, + { + "epoch": 6.05010986328125e-06, + "step": 3965, + "training_step_time": 0.11672163009643555 + }, + { + "epoch": 6.0516357421875e-06, + "model_forward_time": 0.028784513473510742, + "step": 3966 + }, + { + "epoch": 6.0516357421875e-06, + "step": 3966, + "training_step_time": 0.11127829551696777 + }, + { + "epoch": 6.05316162109375e-06, + "model_forward_time": 0.024376869201660156, + "step": 3967 + }, + { + "epoch": 6.05316162109375e-06, + "step": 3967, + "training_step_time": 0.1616535186767578 + }, + { + "epoch": 6.0546875e-06, + "model_forward_time": 0.02426314353942871, + "step": 3968 + }, + { + "epoch": 6.0546875e-06, + "step": 3968, + "training_step_time": 0.1816425323486328 + }, + { + "epoch": 6.05621337890625e-06, + "model_forward_time": 0.02413773536682129, + "step": 3969 + }, + { + "epoch": 6.05621337890625e-06, + "step": 3969, + "training_step_time": 0.16640210151672363 + }, + { + "epoch": 6.0577392578125e-06, + "grad_norm": 0.6211804747581482, + "learning_rate": 9.815812833988291e-05, + "loss": 0.0915, + "step": 3970 + }, + { + "epoch": 6.0577392578125e-06, + "model_forward_time": 0.023595571517944336, + "step": 3970 + }, + { + "epoch": 6.0577392578125e-06, + "step": 3970, + "training_step_time": 0.12278890609741211 + }, + { + "epoch": 6.05926513671875e-06, + "model_forward_time": 0.023621320724487305, + "step": 3971 + }, + { + "epoch": 6.05926513671875e-06, + "step": 3971, + "training_step_time": 0.11238932609558105 + }, + { + "epoch": 6.060791015625e-06, + "model_forward_time": 0.02471137046813965, + "step": 3972 + }, + { + "epoch": 6.060791015625e-06, + "step": 3972, + "training_step_time": 0.12447094917297363 + }, + { + "epoch": 6.06231689453125e-06, + "model_forward_time": 0.024735212326049805, + "step": 3973 + }, + { + "epoch": 6.06231689453125e-06, + "step": 3973, + "training_step_time": 0.10941576957702637 + }, + { + "epoch": 6.0638427734375e-06, + "model_forward_time": 0.02462005615234375, + "step": 3974 + }, + { + "epoch": 6.0638427734375e-06, + "step": 3974, + "training_step_time": 0.19194483757019043 + }, + { + "epoch": 6.06536865234375e-06, + "model_forward_time": 0.023949146270751953, + "step": 3975 + }, + { + "epoch": 6.06536865234375e-06, + "step": 3975, + "training_step_time": 0.1175544261932373 + }, + { + "epoch": 6.06689453125e-06, + "model_forward_time": 0.024126529693603516, + "step": 3976 + }, + { + "epoch": 6.06689453125e-06, + "step": 3976, + "training_step_time": 0.10764050483703613 + }, + { + "epoch": 6.06842041015625e-06, + "model_forward_time": 0.024681568145751953, + "step": 3977 + }, + { + "epoch": 6.06842041015625e-06, + "step": 3977, + "training_step_time": 0.10711884498596191 + }, + { + "epoch": 6.0699462890625e-06, + "model_forward_time": 0.024898052215576172, + "step": 3978 + }, + { + "epoch": 6.0699462890625e-06, + "step": 3978, + "training_step_time": 0.10722756385803223 + }, + { + "epoch": 6.07147216796875e-06, + "model_forward_time": 0.02580738067626953, + "step": 3979 + }, + { + "epoch": 6.07147216796875e-06, + "step": 3979, + "training_step_time": 0.1071929931640625 + }, + { + "epoch": 6.072998046875e-06, + "grad_norm": 0.3808532655239105, + "learning_rate": 9.81432773909923e-05, + "loss": 0.0931, + "step": 3980 + }, + { + "epoch": 6.072998046875e-06, + "model_forward_time": 0.024674415588378906, + "step": 3980 + }, + { + "epoch": 6.072998046875e-06, + "step": 3980, + "training_step_time": 0.11172676086425781 + }, + { + "epoch": 6.07452392578125e-06, + "model_forward_time": 0.02452707290649414, + "step": 3981 + }, + { + "epoch": 6.07452392578125e-06, + "step": 3981, + "training_step_time": 0.10771393775939941 + }, + { + "epoch": 6.0760498046875e-06, + "model_forward_time": 0.025019168853759766, + "step": 3982 + }, + { + "epoch": 6.0760498046875e-06, + "step": 3982, + "training_step_time": 0.10748434066772461 + }, + { + "epoch": 6.07757568359375e-06, + "model_forward_time": 0.024686336517333984, + "step": 3983 + }, + { + "epoch": 6.07757568359375e-06, + "step": 3983, + "training_step_time": 0.10730838775634766 + }, + { + "epoch": 6.0791015625e-06, + "model_forward_time": 0.02414727210998535, + "step": 3984 + }, + { + "epoch": 6.0791015625e-06, + "step": 3984, + "training_step_time": 0.10560250282287598 + }, + { + "epoch": 6.08062744140625e-06, + "model_forward_time": 0.02419281005859375, + "step": 3985 + }, + { + "epoch": 6.08062744140625e-06, + "step": 3985, + "training_step_time": 0.11469411849975586 + }, + { + "epoch": 6.0821533203125e-06, + "model_forward_time": 0.025173187255859375, + "step": 3986 + }, + { + "epoch": 6.0821533203125e-06, + "step": 3986, + "training_step_time": 0.10912871360778809 + }, + { + "epoch": 6.08367919921875e-06, + "model_forward_time": 0.024566173553466797, + "step": 3987 + }, + { + "epoch": 6.08367919921875e-06, + "step": 3987, + "training_step_time": 0.1077573299407959 + }, + { + "epoch": 6.085205078125e-06, + "model_forward_time": 0.024798154830932617, + "step": 3988 + }, + { + "epoch": 6.085205078125e-06, + "step": 3988, + "training_step_time": 0.17505693435668945 + }, + { + "epoch": 6.08673095703125e-06, + "model_forward_time": 0.02389669418334961, + "step": 3989 + }, + { + "epoch": 6.08673095703125e-06, + "step": 3989, + "training_step_time": 0.16470623016357422 + }, + { + "epoch": 6.0882568359375e-06, + "grad_norm": 0.48456794023513794, + "learning_rate": 9.812836794348004e-05, + "loss": 0.0919, + "step": 3990 + }, + { + "epoch": 6.0882568359375e-06, + "model_forward_time": 0.02516961097717285, + "step": 3990 + }, + { + "epoch": 6.0882568359375e-06, + "step": 3990, + "training_step_time": 0.10391426086425781 + }, + { + "epoch": 6.08978271484375e-06, + "model_forward_time": 0.024981021881103516, + "step": 3991 + }, + { + "epoch": 6.08978271484375e-06, + "step": 3991, + "training_step_time": 0.10563421249389648 + }, + { + "epoch": 6.09130859375e-06, + "model_forward_time": 0.025179147720336914, + "step": 3992 + }, + { + "epoch": 6.09130859375e-06, + "step": 3992, + "training_step_time": 0.10694766044616699 + }, + { + "epoch": 6.09283447265625e-06, + "model_forward_time": 0.02774214744567871, + "step": 3993 + }, + { + "epoch": 6.09283447265625e-06, + "step": 3993, + "training_step_time": 0.11646103858947754 + }, + { + "epoch": 6.0943603515625e-06, + "model_forward_time": 0.025507211685180664, + "step": 3994 + }, + { + "epoch": 6.0943603515625e-06, + "step": 3994, + "training_step_time": 0.11000514030456543 + }, + { + "epoch": 6.09588623046875e-06, + "model_forward_time": 0.02602076530456543, + "step": 3995 + }, + { + "epoch": 6.09588623046875e-06, + "step": 3995, + "training_step_time": 0.1121220588684082 + }, + { + "epoch": 6.097412109375e-06, + "model_forward_time": 0.025761842727661133, + "step": 3996 + }, + { + "epoch": 6.097412109375e-06, + "step": 3996, + "training_step_time": 0.11238551139831543 + }, + { + "epoch": 6.09893798828125e-06, + "model_forward_time": 0.025362014770507812, + "step": 3997 + }, + { + "epoch": 6.09893798828125e-06, + "step": 3997, + "training_step_time": 0.10671401023864746 + }, + { + "epoch": 6.1004638671875e-06, + "model_forward_time": 0.02540135383605957, + "step": 3998 + }, + { + "epoch": 6.1004638671875e-06, + "step": 3998, + "training_step_time": 0.10659956932067871 + }, + { + "epoch": 6.10198974609375e-06, + "model_forward_time": 0.025347232818603516, + "step": 3999 + }, + { + "epoch": 6.10198974609375e-06, + "step": 3999, + "training_step_time": 0.10850644111633301 + }, + { + "epoch": 6.103515625e-06, + "grad_norm": 0.45659807324409485, + "learning_rate": 9.811340001546251e-05, + "loss": 0.0894, + "step": 4000 + }, + { + "epoch": 6.103515625e-06, + "model_forward_time": 0.024462461471557617, + "step": 4000 + }, + { + "epoch": 6.103515625e-06, + "step": 4000, + "training_step_time": 0.10233044624328613 + }, + { + "epoch": 6.10504150390625e-06, + "model_forward_time": 0.023958683013916016, + "step": 4001 + }, + { + "epoch": 6.10504150390625e-06, + "step": 4001, + "training_step_time": 0.10288405418395996 + }, + { + "epoch": 6.1065673828125e-06, + "model_forward_time": 0.023969650268554688, + "step": 4002 + }, + { + "epoch": 6.1065673828125e-06, + "step": 4002, + "training_step_time": 0.10452413558959961 + }, + { + "epoch": 6.10809326171875e-06, + "model_forward_time": 0.02502155303955078, + "step": 4003 + }, + { + "epoch": 6.10809326171875e-06, + "step": 4003, + "training_step_time": 0.10599780082702637 + }, + { + "epoch": 6.109619140625e-06, + "model_forward_time": 0.024352312088012695, + "step": 4004 + }, + { + "epoch": 6.109619140625e-06, + "step": 4004, + "training_step_time": 0.10656309127807617 + }, + { + "epoch": 6.11114501953125e-06, + "model_forward_time": 0.02449321746826172, + "step": 4005 + }, + { + "epoch": 6.11114501953125e-06, + "step": 4005, + "training_step_time": 0.11162996292114258 + }, + { + "epoch": 6.1126708984375e-06, + "model_forward_time": 0.02432084083557129, + "step": 4006 + }, + { + "epoch": 6.1126708984375e-06, + "step": 4006, + "training_step_time": 0.10883665084838867 + }, + { + "epoch": 6.11419677734375e-06, + "model_forward_time": 0.024701833724975586, + "step": 4007 + }, + { + "epoch": 6.11419677734375e-06, + "step": 4007, + "training_step_time": 0.10908937454223633 + }, + { + "epoch": 6.11572265625e-06, + "model_forward_time": 0.02397751808166504, + "step": 4008 + }, + { + "epoch": 6.11572265625e-06, + "step": 4008, + "training_step_time": 0.10703110694885254 + }, + { + "epoch": 6.11724853515625e-06, + "model_forward_time": 0.024418354034423828, + "step": 4009 + }, + { + "epoch": 6.11724853515625e-06, + "step": 4009, + "training_step_time": 0.10777568817138672 + }, + { + "epoch": 6.1187744140625e-06, + "grad_norm": 0.46199831366539, + "learning_rate": 9.80983736251272e-05, + "loss": 0.0817, + "step": 4010 + }, + { + "epoch": 6.1187744140625e-06, + "model_forward_time": 0.024737119674682617, + "step": 4010 + }, + { + "epoch": 6.1187744140625e-06, + "step": 4010, + "training_step_time": 0.10531497001647949 + }, + { + "epoch": 6.12030029296875e-06, + "model_forward_time": 0.024564743041992188, + "step": 4011 + }, + { + "epoch": 6.12030029296875e-06, + "step": 4011, + "training_step_time": 0.10743546485900879 + }, + { + "epoch": 6.121826171875e-06, + "model_forward_time": 0.02490520477294922, + "step": 4012 + }, + { + "epoch": 6.121826171875e-06, + "step": 4012, + "training_step_time": 0.10643982887268066 + }, + { + "epoch": 6.12335205078125e-06, + "model_forward_time": 0.024359703063964844, + "step": 4013 + }, + { + "epoch": 6.12335205078125e-06, + "step": 4013, + "training_step_time": 0.10345602035522461 + }, + { + "epoch": 6.1248779296875e-06, + "model_forward_time": 0.024936914443969727, + "step": 4014 + }, + { + "epoch": 6.1248779296875e-06, + "step": 4014, + "training_step_time": 0.11194539070129395 + }, + { + "epoch": 6.12640380859375e-06, + "model_forward_time": 0.02465963363647461, + "step": 4015 + }, + { + "epoch": 6.12640380859375e-06, + "step": 4015, + "training_step_time": 0.11056137084960938 + }, + { + "epoch": 6.1279296875e-06, + "model_forward_time": 0.024490833282470703, + "step": 4016 + }, + { + "epoch": 6.1279296875e-06, + "step": 4016, + "training_step_time": 0.10954999923706055 + }, + { + "epoch": 6.12945556640625e-06, + "model_forward_time": 0.02495884895324707, + "step": 4017 + }, + { + "epoch": 6.12945556640625e-06, + "step": 4017, + "training_step_time": 0.11131048202514648 + }, + { + "epoch": 6.1309814453125e-06, + "model_forward_time": 0.02453017234802246, + "step": 4018 + }, + { + "epoch": 6.1309814453125e-06, + "step": 4018, + "training_step_time": 0.1126103401184082 + }, + { + "epoch": 6.13250732421875e-06, + "model_forward_time": 0.02529168128967285, + "step": 4019 + }, + { + "epoch": 6.13250732421875e-06, + "step": 4019, + "training_step_time": 0.11054158210754395 + }, + { + "epoch": 6.134033203125e-06, + "grad_norm": 0.2944953143596649, + "learning_rate": 9.808328879073251e-05, + "loss": 0.095, + "step": 4020 + }, + { + "epoch": 6.134033203125e-06, + "model_forward_time": 0.024592161178588867, + "step": 4020 + }, + { + "epoch": 6.134033203125e-06, + "step": 4020, + "training_step_time": 0.1082160472869873 + }, + { + "epoch": 6.13555908203125e-06, + "model_forward_time": 0.024862051010131836, + "step": 4021 + }, + { + "epoch": 6.13555908203125e-06, + "step": 4021, + "training_step_time": 0.17844057083129883 + }, + { + "epoch": 6.1370849609375e-06, + "model_forward_time": 0.024178266525268555, + "step": 4022 + }, + { + "epoch": 6.1370849609375e-06, + "step": 4022, + "training_step_time": 0.1128687858581543 + }, + { + "epoch": 6.13861083984375e-06, + "model_forward_time": 0.02386164665222168, + "step": 4023 + }, + { + "epoch": 6.13861083984375e-06, + "step": 4023, + "training_step_time": 0.21379590034484863 + }, + { + "epoch": 6.14013671875e-06, + "model_forward_time": 0.02419304847717285, + "step": 4024 + }, + { + "epoch": 6.14013671875e-06, + "step": 4024, + "training_step_time": 0.18358683586120605 + }, + { + "epoch": 6.14166259765625e-06, + "model_forward_time": 0.02388167381286621, + "step": 4025 + }, + { + "epoch": 6.14166259765625e-06, + "step": 4025, + "training_step_time": 0.19825243949890137 + }, + { + "epoch": 6.1431884765625e-06, + "model_forward_time": 0.023534059524536133, + "step": 4026 + }, + { + "epoch": 6.1431884765625e-06, + "step": 4026, + "training_step_time": 0.19337844848632812 + }, + { + "epoch": 6.14471435546875e-06, + "model_forward_time": 0.0263822078704834, + "step": 4027 + }, + { + "epoch": 6.14471435546875e-06, + "step": 4027, + "training_step_time": 0.1702885627746582 + }, + { + "epoch": 6.146240234375e-06, + "model_forward_time": 0.023482561111450195, + "step": 4028 + }, + { + "epoch": 6.146240234375e-06, + "step": 4028, + "training_step_time": 0.16748619079589844 + }, + { + "epoch": 6.14776611328125e-06, + "model_forward_time": 0.023217439651489258, + "step": 4029 + }, + { + "epoch": 6.14776611328125e-06, + "step": 4029, + "training_step_time": 0.10916352272033691 + }, + { + "epoch": 6.1492919921875e-06, + "grad_norm": 0.5035075545310974, + "learning_rate": 9.806814553060801e-05, + "loss": 0.1049, + "step": 4030 + }, + { + "epoch": 6.1492919921875e-06, + "model_forward_time": 0.024348974227905273, + "step": 4030 + }, + { + "epoch": 6.1492919921875e-06, + "step": 4030, + "training_step_time": 0.11574149131774902 + }, + { + "epoch": 6.15081787109375e-06, + "model_forward_time": 0.02473759651184082, + "step": 4031 + }, + { + "epoch": 6.15081787109375e-06, + "step": 4031, + "training_step_time": 0.10732030868530273 + }, + { + "epoch": 6.15234375e-06, + "model_forward_time": 0.0256803035736084, + "step": 4032 + }, + { + "epoch": 6.15234375e-06, + "step": 4032, + "training_step_time": 0.1124420166015625 + }, + { + "epoch": 6.15386962890625e-06, + "model_forward_time": 0.025347471237182617, + "step": 4033 + }, + { + "epoch": 6.15386962890625e-06, + "step": 4033, + "training_step_time": 0.11317825317382812 + }, + { + "epoch": 6.1553955078125e-06, + "model_forward_time": 0.025418519973754883, + "step": 4034 + }, + { + "epoch": 6.1553955078125e-06, + "step": 4034, + "training_step_time": 0.11313104629516602 + }, + { + "epoch": 6.15692138671875e-06, + "model_forward_time": 0.024643659591674805, + "step": 4035 + }, + { + "epoch": 6.15692138671875e-06, + "step": 4035, + "training_step_time": 0.11036419868469238 + }, + { + "epoch": 6.158447265625e-06, + "model_forward_time": 0.02450418472290039, + "step": 4036 + }, + { + "epoch": 6.158447265625e-06, + "step": 4036, + "training_step_time": 0.1073148250579834 + }, + { + "epoch": 6.15997314453125e-06, + "model_forward_time": 0.024698734283447266, + "step": 4037 + }, + { + "epoch": 6.15997314453125e-06, + "step": 4037, + "training_step_time": 0.10583186149597168 + }, + { + "epoch": 6.1614990234375e-06, + "model_forward_time": 0.024637460708618164, + "step": 4038 + }, + { + "epoch": 6.1614990234375e-06, + "step": 4038, + "training_step_time": 0.1082921028137207 + }, + { + "epoch": 6.16302490234375e-06, + "model_forward_time": 0.024567604064941406, + "step": 4039 + }, + { + "epoch": 6.16302490234375e-06, + "step": 4039, + "training_step_time": 0.10560321807861328 + }, + { + "epoch": 6.16455078125e-06, + "grad_norm": 0.2992149293422699, + "learning_rate": 9.805294386315415e-05, + "loss": 0.0681, + "step": 4040 + }, + { + "epoch": 6.16455078125e-06, + "model_forward_time": 0.024450302124023438, + "step": 4040 + }, + { + "epoch": 6.16455078125e-06, + "step": 4040, + "training_step_time": 0.21399307250976562 + }, + { + "epoch": 6.16607666015625e-06, + "model_forward_time": 0.023947715759277344, + "step": 4041 + }, + { + "epoch": 6.16607666015625e-06, + "step": 4041, + "training_step_time": 0.10506224632263184 + }, + { + "epoch": 6.1676025390625e-06, + "model_forward_time": 0.027935028076171875, + "step": 4042 + }, + { + "epoch": 6.1676025390625e-06, + "step": 4042, + "training_step_time": 0.11345887184143066 + }, + { + "epoch": 6.16912841796875e-06, + "model_forward_time": 0.024759531021118164, + "step": 4043 + }, + { + "epoch": 6.16912841796875e-06, + "step": 4043, + "training_step_time": 0.1062314510345459 + }, + { + "epoch": 6.170654296875e-06, + "model_forward_time": 0.02488231658935547, + "step": 4044 + }, + { + "epoch": 6.170654296875e-06, + "step": 4044, + "training_step_time": 0.11951088905334473 + }, + { + "epoch": 6.17218017578125e-06, + "model_forward_time": 0.0244748592376709, + "step": 4045 + }, + { + "epoch": 6.17218017578125e-06, + "step": 4045, + "training_step_time": 0.10823583602905273 + }, + { + "epoch": 6.1737060546875e-06, + "model_forward_time": 0.02515125274658203, + "step": 4046 + }, + { + "epoch": 6.1737060546875e-06, + "step": 4046, + "training_step_time": 0.108154296875 + }, + { + "epoch": 6.17523193359375e-06, + "model_forward_time": 0.025255441665649414, + "step": 4047 + }, + { + "epoch": 6.17523193359375e-06, + "step": 4047, + "training_step_time": 0.11082696914672852 + }, + { + "epoch": 6.1767578125e-06, + "model_forward_time": 0.0252077579498291, + "step": 4048 + }, + { + "epoch": 6.1767578125e-06, + "step": 4048, + "training_step_time": 0.10903191566467285 + }, + { + "epoch": 6.17828369140625e-06, + "model_forward_time": 0.024511337280273438, + "step": 4049 + }, + { + "epoch": 6.17828369140625e-06, + "step": 4049, + "training_step_time": 0.10984539985656738 + }, + { + "epoch": 6.1798095703125e-06, + "grad_norm": 0.3301985561847687, + "learning_rate": 9.803768380684242e-05, + "loss": 0.1019, + "step": 4050 + }, + { + "epoch": 6.1798095703125e-06, + "model_forward_time": 0.02468109130859375, + "step": 4050 + }, + { + "epoch": 6.1798095703125e-06, + "step": 4050, + "training_step_time": 0.1150820255279541 + }, + { + "epoch": 6.18133544921875e-06, + "model_forward_time": 0.02499532699584961, + "step": 4051 + }, + { + "epoch": 6.18133544921875e-06, + "step": 4051, + "training_step_time": 0.1070556640625 + }, + { + "epoch": 6.182861328125e-06, + "model_forward_time": 0.024591684341430664, + "step": 4052 + }, + { + "epoch": 6.182861328125e-06, + "step": 4052, + "training_step_time": 0.11088418960571289 + }, + { + "epoch": 6.18438720703125e-06, + "model_forward_time": 0.02476358413696289, + "step": 4053 + }, + { + "epoch": 6.18438720703125e-06, + "step": 4053, + "training_step_time": 0.11005401611328125 + }, + { + "epoch": 6.1859130859375e-06, + "model_forward_time": 0.025078773498535156, + "step": 4054 + }, + { + "epoch": 6.1859130859375e-06, + "step": 4054, + "training_step_time": 0.11010098457336426 + }, + { + "epoch": 6.18743896484375e-06, + "model_forward_time": 0.026221036911010742, + "step": 4055 + }, + { + "epoch": 6.18743896484375e-06, + "step": 4055, + "training_step_time": 0.10863089561462402 + }, + { + "epoch": 6.18896484375e-06, + "model_forward_time": 0.02585911750793457, + "step": 4056 + }, + { + "epoch": 6.18896484375e-06, + "step": 4056, + "training_step_time": 0.10872244834899902 + }, + { + "epoch": 6.19049072265625e-06, + "model_forward_time": 0.02552509307861328, + "step": 4057 + }, + { + "epoch": 6.19049072265625e-06, + "step": 4057, + "training_step_time": 0.10986971855163574 + }, + { + "epoch": 6.1920166015625e-06, + "model_forward_time": 0.02529597282409668, + "step": 4058 + }, + { + "epoch": 6.1920166015625e-06, + "step": 4058, + "training_step_time": 0.10926675796508789 + }, + { + "epoch": 6.19354248046875e-06, + "model_forward_time": 0.02483081817626953, + "step": 4059 + }, + { + "epoch": 6.19354248046875e-06, + "step": 4059, + "training_step_time": 0.11312460899353027 + }, + { + "epoch": 6.195068359375e-06, + "grad_norm": 0.4493350386619568, + "learning_rate": 9.802236538021518e-05, + "loss": 0.0866, + "step": 4060 + }, + { + "epoch": 6.195068359375e-06, + "model_forward_time": 0.024946928024291992, + "step": 4060 + }, + { + "epoch": 6.195068359375e-06, + "step": 4060, + "training_step_time": 0.11127257347106934 + }, + { + "epoch": 6.19659423828125e-06, + "model_forward_time": 0.025792598724365234, + "step": 4061 + }, + { + "epoch": 6.19659423828125e-06, + "step": 4061, + "training_step_time": 0.1120147705078125 + }, + { + "epoch": 6.1981201171875e-06, + "model_forward_time": 0.025043964385986328, + "step": 4062 + }, + { + "epoch": 6.1981201171875e-06, + "step": 4062, + "training_step_time": 0.10994243621826172 + }, + { + "epoch": 6.19964599609375e-06, + "model_forward_time": 0.0253140926361084, + "step": 4063 + }, + { + "epoch": 6.19964599609375e-06, + "step": 4063, + "training_step_time": 0.11213088035583496 + }, + { + "epoch": 6.201171875e-06, + "model_forward_time": 0.025109052658081055, + "step": 4064 + }, + { + "epoch": 6.201171875e-06, + "step": 4064, + "training_step_time": 0.10786986351013184 + }, + { + "epoch": 6.20269775390625e-06, + "model_forward_time": 0.025150537490844727, + "step": 4065 + }, + { + "epoch": 6.20269775390625e-06, + "step": 4065, + "training_step_time": 0.11389446258544922 + }, + { + "epoch": 6.2042236328125e-06, + "model_forward_time": 0.02537369728088379, + "step": 4066 + }, + { + "epoch": 6.2042236328125e-06, + "step": 4066, + "training_step_time": 0.11095404624938965 + }, + { + "epoch": 6.20574951171875e-06, + "model_forward_time": 0.02577805519104004, + "step": 4067 + }, + { + "epoch": 6.20574951171875e-06, + "step": 4067, + "training_step_time": 0.14825224876403809 + }, + { + "epoch": 6.207275390625e-06, + "model_forward_time": 0.026848316192626953, + "step": 4068 + }, + { + "epoch": 6.207275390625e-06, + "step": 4068, + "training_step_time": 0.11350083351135254 + }, + { + "epoch": 6.20880126953125e-06, + "model_forward_time": 0.02527761459350586, + "step": 4069 + }, + { + "epoch": 6.20880126953125e-06, + "step": 4069, + "training_step_time": 0.13245630264282227 + }, + { + "epoch": 6.2103271484375e-06, + "grad_norm": 0.3537490963935852, + "learning_rate": 9.80069886018858e-05, + "loss": 0.0774, + "step": 4070 + }, + { + "epoch": 6.2103271484375e-06, + "model_forward_time": 0.02567887306213379, + "step": 4070 + }, + { + "epoch": 6.2103271484375e-06, + "step": 4070, + "training_step_time": 0.13516759872436523 + }, + { + "epoch": 6.21185302734375e-06, + "model_forward_time": 0.02460169792175293, + "step": 4071 + }, + { + "epoch": 6.21185302734375e-06, + "step": 4071, + "training_step_time": 0.1274712085723877 + }, + { + "epoch": 6.21337890625e-06, + "model_forward_time": 0.02482295036315918, + "step": 4072 + }, + { + "epoch": 6.21337890625e-06, + "step": 4072, + "training_step_time": 0.21318578720092773 + }, + { + "epoch": 6.21490478515625e-06, + "model_forward_time": 0.02386617660522461, + "step": 4073 + }, + { + "epoch": 6.21490478515625e-06, + "step": 4073, + "training_step_time": 0.17571640014648438 + }, + { + "epoch": 6.2164306640625e-06, + "model_forward_time": 0.02415299415588379, + "step": 4074 + }, + { + "epoch": 6.2164306640625e-06, + "step": 4074, + "training_step_time": 0.19040417671203613 + }, + { + "epoch": 6.21795654296875e-06, + "model_forward_time": 0.024153947830200195, + "step": 4075 + }, + { + "epoch": 6.21795654296875e-06, + "step": 4075, + "training_step_time": 0.14196443557739258 + }, + { + "epoch": 6.219482421875e-06, + "model_forward_time": 0.024690866470336914, + "step": 4076 + }, + { + "epoch": 6.219482421875e-06, + "step": 4076, + "training_step_time": 0.10529947280883789 + }, + { + "epoch": 6.22100830078125e-06, + "model_forward_time": 0.025324583053588867, + "step": 4077 + }, + { + "epoch": 6.22100830078125e-06, + "step": 4077, + "training_step_time": 0.10925817489624023 + }, + { + "epoch": 6.2225341796875e-06, + "model_forward_time": 0.02523040771484375, + "step": 4078 + }, + { + "epoch": 6.2225341796875e-06, + "step": 4078, + "training_step_time": 0.10924935340881348 + }, + { + "epoch": 6.22406005859375e-06, + "model_forward_time": 0.025370359420776367, + "step": 4079 + }, + { + "epoch": 6.22406005859375e-06, + "step": 4079, + "training_step_time": 0.10969829559326172 + }, + { + "epoch": 6.2255859375e-06, + "grad_norm": 0.38843709230422974, + "learning_rate": 9.799155349053851e-05, + "loss": 0.1, + "step": 4080 + }, + { + "epoch": 6.2255859375e-06, + "model_forward_time": 0.02525162696838379, + "step": 4080 + }, + { + "epoch": 6.2255859375e-06, + "step": 4080, + "training_step_time": 0.1104423999786377 + }, + { + "epoch": 6.22711181640625e-06, + "model_forward_time": 0.025877952575683594, + "step": 4081 + }, + { + "epoch": 6.22711181640625e-06, + "step": 4081, + "training_step_time": 0.1095893383026123 + }, + { + "epoch": 6.2286376953125e-06, + "model_forward_time": 0.025164365768432617, + "step": 4082 + }, + { + "epoch": 6.2286376953125e-06, + "step": 4082, + "training_step_time": 0.10735297203063965 + }, + { + "epoch": 6.23016357421875e-06, + "model_forward_time": 0.02541065216064453, + "step": 4083 + }, + { + "epoch": 6.23016357421875e-06, + "step": 4083, + "training_step_time": 0.1112053394317627 + }, + { + "epoch": 6.231689453125e-06, + "model_forward_time": 0.025327205657958984, + "step": 4084 + }, + { + "epoch": 6.231689453125e-06, + "step": 4084, + "training_step_time": 0.11129474639892578 + }, + { + "epoch": 6.23321533203125e-06, + "model_forward_time": 0.025369644165039062, + "step": 4085 + }, + { + "epoch": 6.23321533203125e-06, + "step": 4085, + "training_step_time": 0.11371779441833496 + }, + { + "epoch": 6.2347412109375e-06, + "model_forward_time": 0.025699853897094727, + "step": 4086 + }, + { + "epoch": 6.2347412109375e-06, + "step": 4086, + "training_step_time": 0.11709117889404297 + }, + { + "epoch": 6.23626708984375e-06, + "model_forward_time": 0.02540421485900879, + "step": 4087 + }, + { + "epoch": 6.23626708984375e-06, + "step": 4087, + "training_step_time": 0.22353315353393555 + }, + { + "epoch": 6.23779296875e-06, + "model_forward_time": 0.025333642959594727, + "step": 4088 + }, + { + "epoch": 6.23779296875e-06, + "step": 4088, + "training_step_time": 0.1325850486755371 + }, + { + "epoch": 6.23931884765625e-06, + "model_forward_time": 0.024460792541503906, + "step": 4089 + }, + { + "epoch": 6.23931884765625e-06, + "step": 4089, + "training_step_time": 0.18323898315429688 + }, + { + "epoch": 6.2408447265625e-06, + "grad_norm": 0.429690957069397, + "learning_rate": 9.797606006492841e-05, + "loss": 0.0834, + "step": 4090 + }, + { + "epoch": 6.2408447265625e-06, + "model_forward_time": 0.02521371841430664, + "step": 4090 + }, + { + "epoch": 6.2408447265625e-06, + "step": 4090, + "training_step_time": 0.1265251636505127 + }, + { + "epoch": 6.24237060546875e-06, + "model_forward_time": 0.025280237197875977, + "step": 4091 + }, + { + "epoch": 6.24237060546875e-06, + "step": 4091, + "training_step_time": 0.11684131622314453 + }, + { + "epoch": 6.243896484375e-06, + "model_forward_time": 0.024863243103027344, + "step": 4092 + }, + { + "epoch": 6.243896484375e-06, + "step": 4092, + "training_step_time": 0.11471700668334961 + }, + { + "epoch": 6.24542236328125e-06, + "model_forward_time": 0.025652408599853516, + "step": 4093 + }, + { + "epoch": 6.24542236328125e-06, + "step": 4093, + "training_step_time": 0.11330246925354004 + }, + { + "epoch": 6.2469482421875e-06, + "model_forward_time": 0.025325298309326172, + "step": 4094 + }, + { + "epoch": 6.2469482421875e-06, + "step": 4094, + "training_step_time": 0.11207771301269531 + }, + { + "epoch": 6.24847412109375e-06, + "model_forward_time": 0.029282331466674805, + "step": 4095 + }, + { + "epoch": 6.24847412109375e-06, + "step": 4095, + "training_step_time": 0.11543917655944824 + }, + { + "epoch": 6.25e-06, + "model_forward_time": 0.025379657745361328, + "step": 4096 + }, + { + "epoch": 6.25e-06, + "step": 4096, + "training_step_time": 0.11293745040893555 + }, + { + "epoch": 6.25152587890625e-06, + "model_forward_time": 0.025273561477661133, + "step": 4097 + }, + { + "epoch": 6.25152587890625e-06, + "step": 4097, + "training_step_time": 0.11040186882019043 + }, + { + "epoch": 6.2530517578125e-06, + "model_forward_time": 0.02525925636291504, + "step": 4098 + }, + { + "epoch": 6.2530517578125e-06, + "step": 4098, + "training_step_time": 0.10831952095031738 + }, + { + "epoch": 6.25457763671875e-06, + "model_forward_time": 0.02610492706298828, + "step": 4099 + }, + { + "epoch": 6.25457763671875e-06, + "step": 4099, + "training_step_time": 0.11008810997009277 + }, + { + "epoch": 6.256103515625e-06, + "grad_norm": 0.49703845381736755, + "learning_rate": 9.796050834388149e-05, + "loss": 0.0782, + "step": 4100 + }, + { + "epoch": 6.256103515625e-06, + "model_forward_time": 0.02568793296813965, + "step": 4100 + }, + { + "epoch": 6.256103515625e-06, + "step": 4100, + "training_step_time": 0.10692286491394043 + }, + { + "epoch": 6.25762939453125e-06, + "model_forward_time": 0.025857210159301758, + "step": 4101 + }, + { + "epoch": 6.25762939453125e-06, + "step": 4101, + "training_step_time": 0.10795879364013672 + }, + { + "epoch": 6.2591552734375e-06, + "model_forward_time": 0.0258638858795166, + "step": 4102 + }, + { + "epoch": 6.2591552734375e-06, + "step": 4102, + "training_step_time": 0.11413073539733887 + }, + { + "epoch": 6.26068115234375e-06, + "model_forward_time": 0.02537846565246582, + "step": 4103 + }, + { + "epoch": 6.26068115234375e-06, + "step": 4103, + "training_step_time": 0.11638569831848145 + }, + { + "epoch": 6.26220703125e-06, + "model_forward_time": 0.02515888214111328, + "step": 4104 + }, + { + "epoch": 6.26220703125e-06, + "step": 4104, + "training_step_time": 0.1065373420715332 + }, + { + "epoch": 6.26373291015625e-06, + "model_forward_time": 0.025812387466430664, + "step": 4105 + }, + { + "epoch": 6.26373291015625e-06, + "step": 4105, + "training_step_time": 0.1092829704284668 + }, + { + "epoch": 6.2652587890625e-06, + "model_forward_time": 0.02547621726989746, + "step": 4106 + }, + { + "epoch": 6.2652587890625e-06, + "step": 4106, + "training_step_time": 0.10764813423156738 + }, + { + "epoch": 6.26678466796875e-06, + "model_forward_time": 0.02538156509399414, + "step": 4107 + }, + { + "epoch": 6.26678466796875e-06, + "step": 4107, + "training_step_time": 0.10922741889953613 + }, + { + "epoch": 6.268310546875e-06, + "model_forward_time": 0.02568960189819336, + "step": 4108 + }, + { + "epoch": 6.268310546875e-06, + "step": 4108, + "training_step_time": 0.11070084571838379 + }, + { + "epoch": 6.26983642578125e-06, + "model_forward_time": 0.02570319175720215, + "step": 4109 + }, + { + "epoch": 6.26983642578125e-06, + "step": 4109, + "training_step_time": 0.10958242416381836 + }, + { + "epoch": 6.2713623046875e-06, + "grad_norm": 0.310067743062973, + "learning_rate": 9.794489834629455e-05, + "loss": 0.0907, + "step": 4110 + }, + { + "epoch": 6.2713623046875e-06, + "model_forward_time": 0.02605748176574707, + "step": 4110 + }, + { + "epoch": 6.2713623046875e-06, + "step": 4110, + "training_step_time": 0.10885930061340332 + }, + { + "epoch": 6.27288818359375e-06, + "model_forward_time": 0.025922298431396484, + "step": 4111 + }, + { + "epoch": 6.27288818359375e-06, + "step": 4111, + "training_step_time": 0.11516809463500977 + }, + { + "epoch": 6.2744140625e-06, + "model_forward_time": 0.026095151901245117, + "step": 4112 + }, + { + "epoch": 6.2744140625e-06, + "step": 4112, + "training_step_time": 0.18856191635131836 + }, + { + "epoch": 6.27593994140625e-06, + "model_forward_time": 0.02498459815979004, + "step": 4113 + }, + { + "epoch": 6.27593994140625e-06, + "step": 4113, + "training_step_time": 0.11460161209106445 + }, + { + "epoch": 6.2774658203125e-06, + "model_forward_time": 0.0253603458404541, + "step": 4114 + }, + { + "epoch": 6.2774658203125e-06, + "step": 4114, + "training_step_time": 0.14036059379577637 + }, + { + "epoch": 6.27899169921875e-06, + "model_forward_time": 0.02765345573425293, + "step": 4115 + }, + { + "epoch": 6.27899169921875e-06, + "step": 4115, + "training_step_time": 0.1616523265838623 + }, + { + "epoch": 6.280517578125e-06, + "model_forward_time": 0.02487802505493164, + "step": 4116 + }, + { + "epoch": 6.280517578125e-06, + "step": 4116, + "training_step_time": 0.21881699562072754 + }, + { + "epoch": 6.28204345703125e-06, + "model_forward_time": 0.024625539779663086, + "step": 4117 + }, + { + "epoch": 6.28204345703125e-06, + "step": 4117, + "training_step_time": 0.20145440101623535 + }, + { + "epoch": 6.2835693359375e-06, + "model_forward_time": 0.024894237518310547, + "step": 4118 + }, + { + "epoch": 6.2835693359375e-06, + "step": 4118, + "training_step_time": 0.13270807266235352 + }, + { + "epoch": 6.28509521484375e-06, + "model_forward_time": 0.02697134017944336, + "step": 4119 + }, + { + "epoch": 6.28509521484375e-06, + "step": 4119, + "training_step_time": 0.2041921615600586 + }, + { + "epoch": 6.28662109375e-06, + "grad_norm": 0.6953232884407043, + "learning_rate": 9.792923009113522e-05, + "loss": 0.0958, + "step": 4120 + }, + { + "epoch": 6.28662109375e-06, + "model_forward_time": 0.024796724319458008, + "step": 4120 + }, + { + "epoch": 6.28662109375e-06, + "step": 4120, + "training_step_time": 0.11383175849914551 + }, + { + "epoch": 6.28814697265625e-06, + "model_forward_time": 0.024889707565307617, + "step": 4121 + }, + { + "epoch": 6.28814697265625e-06, + "step": 4121, + "training_step_time": 0.10593914985656738 + }, + { + "epoch": 6.2896728515625e-06, + "model_forward_time": 0.025442838668823242, + "step": 4122 + }, + { + "epoch": 6.2896728515625e-06, + "step": 4122, + "training_step_time": 0.1990976333618164 + }, + { + "epoch": 6.29119873046875e-06, + "model_forward_time": 0.02472996711730957, + "step": 4123 + }, + { + "epoch": 6.29119873046875e-06, + "step": 4123, + "training_step_time": 0.10743141174316406 + }, + { + "epoch": 6.292724609375e-06, + "model_forward_time": 0.02549004554748535, + "step": 4124 + }, + { + "epoch": 6.292724609375e-06, + "step": 4124, + "training_step_time": 0.10535788536071777 + }, + { + "epoch": 6.29425048828125e-06, + "model_forward_time": 0.02464461326599121, + "step": 4125 + }, + { + "epoch": 6.29425048828125e-06, + "step": 4125, + "training_step_time": 0.11254453659057617 + }, + { + "epoch": 6.2957763671875e-06, + "model_forward_time": 0.025549888610839844, + "step": 4126 + }, + { + "epoch": 6.2957763671875e-06, + "step": 4126, + "training_step_time": 0.11060237884521484 + }, + { + "epoch": 6.29730224609375e-06, + "model_forward_time": 0.025659799575805664, + "step": 4127 + }, + { + "epoch": 6.29730224609375e-06, + "step": 4127, + "training_step_time": 0.109893798828125 + }, + { + "epoch": 6.298828125e-06, + "model_forward_time": 0.025371551513671875, + "step": 4128 + }, + { + "epoch": 6.298828125e-06, + "step": 4128, + "training_step_time": 0.10962367057800293 + }, + { + "epoch": 6.30035400390625e-06, + "model_forward_time": 0.025635957717895508, + "step": 4129 + }, + { + "epoch": 6.30035400390625e-06, + "step": 4129, + "training_step_time": 0.1079559326171875 + }, + { + "epoch": 6.3018798828125e-06, + "grad_norm": 0.3230479657649994, + "learning_rate": 9.791350359744189e-05, + "loss": 0.0931, + "step": 4130 + }, + { + "epoch": 6.3018798828125e-06, + "model_forward_time": 0.025505542755126953, + "step": 4130 + }, + { + "epoch": 6.3018798828125e-06, + "step": 4130, + "training_step_time": 0.10912442207336426 + }, + { + "epoch": 6.30340576171875e-06, + "model_forward_time": 0.025497913360595703, + "step": 4131 + }, + { + "epoch": 6.30340576171875e-06, + "step": 4131, + "training_step_time": 0.1731739044189453 + }, + { + "epoch": 6.304931640625e-06, + "model_forward_time": 0.025216102600097656, + "step": 4132 + }, + { + "epoch": 6.304931640625e-06, + "step": 4132, + "training_step_time": 0.15938234329223633 + }, + { + "epoch": 6.30645751953125e-06, + "model_forward_time": 0.024874210357666016, + "step": 4133 + }, + { + "epoch": 6.30645751953125e-06, + "step": 4133, + "training_step_time": 0.11167740821838379 + }, + { + "epoch": 6.3079833984375e-06, + "model_forward_time": 0.025465965270996094, + "step": 4134 + }, + { + "epoch": 6.3079833984375e-06, + "step": 4134, + "training_step_time": 0.1725161075592041 + }, + { + "epoch": 6.30950927734375e-06, + "model_forward_time": 0.02478313446044922, + "step": 4135 + }, + { + "epoch": 6.30950927734375e-06, + "step": 4135, + "training_step_time": 0.16680645942687988 + }, + { + "epoch": 6.31103515625e-06, + "model_forward_time": 0.024608612060546875, + "step": 4136 + }, + { + "epoch": 6.31103515625e-06, + "step": 4136, + "training_step_time": 0.10528993606567383 + }, + { + "epoch": 6.31256103515625e-06, + "model_forward_time": 0.025942087173461914, + "step": 4137 + }, + { + "epoch": 6.31256103515625e-06, + "step": 4137, + "training_step_time": 0.11053037643432617 + }, + { + "epoch": 6.3140869140625e-06, + "model_forward_time": 0.02563190460205078, + "step": 4138 + }, + { + "epoch": 6.3140869140625e-06, + "step": 4138, + "training_step_time": 0.11240792274475098 + }, + { + "epoch": 6.31561279296875e-06, + "model_forward_time": 0.02590155601501465, + "step": 4139 + }, + { + "epoch": 6.31561279296875e-06, + "step": 4139, + "training_step_time": 0.1082000732421875 + }, + { + "epoch": 6.317138671875e-06, + "grad_norm": 0.33710578083992004, + "learning_rate": 9.789771888432375e-05, + "loss": 0.0952, + "step": 4140 + }, + { + "epoch": 6.317138671875e-06, + "model_forward_time": 0.02593541145324707, + "step": 4140 + }, + { + "epoch": 6.317138671875e-06, + "step": 4140, + "training_step_time": 0.1146235466003418 + }, + { + "epoch": 6.31866455078125e-06, + "model_forward_time": 0.02570939064025879, + "step": 4141 + }, + { + "epoch": 6.31866455078125e-06, + "step": 4141, + "training_step_time": 0.10707736015319824 + }, + { + "epoch": 6.3201904296875e-06, + "model_forward_time": 0.025591611862182617, + "step": 4142 + }, + { + "epoch": 6.3201904296875e-06, + "step": 4142, + "training_step_time": 0.10786318778991699 + }, + { + "epoch": 6.32171630859375e-06, + "model_forward_time": 0.025484561920166016, + "step": 4143 + }, + { + "epoch": 6.32171630859375e-06, + "step": 4143, + "training_step_time": 0.10895299911499023 + }, + { + "epoch": 6.3232421875e-06, + "model_forward_time": 0.026085853576660156, + "step": 4144 + }, + { + "epoch": 6.3232421875e-06, + "step": 4144, + "training_step_time": 0.11407089233398438 + }, + { + "epoch": 6.32476806640625e-06, + "model_forward_time": 0.024844884872436523, + "step": 4145 + }, + { + "epoch": 6.32476806640625e-06, + "step": 4145, + "training_step_time": 0.11085891723632812 + }, + { + "epoch": 6.3262939453125e-06, + "model_forward_time": 0.02576899528503418, + "step": 4146 + }, + { + "epoch": 6.3262939453125e-06, + "step": 4146, + "training_step_time": 0.11180472373962402 + }, + { + "epoch": 6.32781982421875e-06, + "model_forward_time": 0.02591681480407715, + "step": 4147 + }, + { + "epoch": 6.32781982421875e-06, + "step": 4147, + "training_step_time": 0.11078500747680664 + }, + { + "epoch": 6.329345703125e-06, + "model_forward_time": 0.02558112144470215, + "step": 4148 + }, + { + "epoch": 6.329345703125e-06, + "step": 4148, + "training_step_time": 0.10840368270874023 + }, + { + "epoch": 6.33087158203125e-06, + "model_forward_time": 0.025533676147460938, + "step": 4149 + }, + { + "epoch": 6.33087158203125e-06, + "step": 4149, + "training_step_time": 0.10827779769897461 + }, + { + "epoch": 6.3323974609375e-06, + "grad_norm": 0.43281710147857666, + "learning_rate": 9.788187597096069e-05, + "loss": 0.1034, + "step": 4150 + }, + { + "epoch": 6.3323974609375e-06, + "model_forward_time": 0.025478601455688477, + "step": 4150 + }, + { + "epoch": 6.3323974609375e-06, + "step": 4150, + "training_step_time": 0.11036348342895508 + }, + { + "epoch": 6.33392333984375e-06, + "model_forward_time": 0.02601313591003418, + "step": 4151 + }, + { + "epoch": 6.33392333984375e-06, + "step": 4151, + "training_step_time": 0.1087794303894043 + }, + { + "epoch": 6.33544921875e-06, + "model_forward_time": 0.025410890579223633, + "step": 4152 + }, + { + "epoch": 6.33544921875e-06, + "step": 4152, + "training_step_time": 0.11059260368347168 + }, + { + "epoch": 6.33697509765625e-06, + "model_forward_time": 0.025416851043701172, + "step": 4153 + }, + { + "epoch": 6.33697509765625e-06, + "step": 4153, + "training_step_time": 0.145219087600708 + }, + { + "epoch": 6.3385009765625e-06, + "model_forward_time": 0.0247952938079834, + "step": 4154 + }, + { + "epoch": 6.3385009765625e-06, + "step": 4154, + "training_step_time": 0.1537766456604004 + }, + { + "epoch": 6.34002685546875e-06, + "model_forward_time": 0.024566173553466797, + "step": 4155 + }, + { + "epoch": 6.34002685546875e-06, + "step": 4155, + "training_step_time": 0.14045000076293945 + }, + { + "epoch": 6.341552734375e-06, + "model_forward_time": 0.024799108505249023, + "step": 4156 + }, + { + "epoch": 6.341552734375e-06, + "step": 4156, + "training_step_time": 0.12798380851745605 + }, + { + "epoch": 6.34307861328125e-06, + "model_forward_time": 0.025618314743041992, + "step": 4157 + }, + { + "epoch": 6.34307861328125e-06, + "step": 4157, + "training_step_time": 0.21854376792907715 + }, + { + "epoch": 6.3446044921875e-06, + "model_forward_time": 0.024955272674560547, + "step": 4158 + }, + { + "epoch": 6.3446044921875e-06, + "step": 4158, + "training_step_time": 0.172349214553833 + }, + { + "epoch": 6.34613037109375e-06, + "model_forward_time": 0.024769306182861328, + "step": 4159 + }, + { + "epoch": 6.34613037109375e-06, + "step": 4159, + "training_step_time": 0.21823573112487793 + }, + { + "epoch": 6.34765625e-06, + "grad_norm": 0.6297304630279541, + "learning_rate": 9.786597487660337e-05, + "loss": 0.0858, + "step": 4160 + }, + { + "epoch": 6.34765625e-06, + "model_forward_time": 0.02901601791381836, + "step": 4160 + }, + { + "epoch": 6.34765625e-06, + "step": 4160, + "training_step_time": 0.18529033660888672 + }, + { + "epoch": 6.34918212890625e-06, + "model_forward_time": 0.026053667068481445, + "step": 4161 + }, + { + "epoch": 6.34918212890625e-06, + "step": 4161, + "training_step_time": 0.2175889015197754 + }, + { + "epoch": 6.3507080078125e-06, + "model_forward_time": 0.02538609504699707, + "step": 4162 + }, + { + "epoch": 6.3507080078125e-06, + "step": 4162, + "training_step_time": 0.14953351020812988 + }, + { + "epoch": 6.35223388671875e-06, + "model_forward_time": 0.024985790252685547, + "step": 4163 + }, + { + "epoch": 6.35223388671875e-06, + "step": 4163, + "training_step_time": 0.1788921356201172 + }, + { + "epoch": 6.353759765625e-06, + "model_forward_time": 0.02517533302307129, + "step": 4164 + }, + { + "epoch": 6.353759765625e-06, + "step": 4164, + "training_step_time": 0.14028716087341309 + }, + { + "epoch": 6.35528564453125e-06, + "model_forward_time": 0.02542567253112793, + "step": 4165 + }, + { + "epoch": 6.35528564453125e-06, + "step": 4165, + "training_step_time": 0.11775922775268555 + }, + { + "epoch": 6.3568115234375e-06, + "model_forward_time": 0.02551102638244629, + "step": 4166 + }, + { + "epoch": 6.3568115234375e-06, + "step": 4166, + "training_step_time": 0.10731697082519531 + }, + { + "epoch": 6.35833740234375e-06, + "model_forward_time": 0.025676965713500977, + "step": 4167 + }, + { + "epoch": 6.35833740234375e-06, + "step": 4167, + "training_step_time": 0.10850191116333008 + }, + { + "epoch": 6.35986328125e-06, + "model_forward_time": 0.025908470153808594, + "step": 4168 + }, + { + "epoch": 6.35986328125e-06, + "step": 4168, + "training_step_time": 0.1078636646270752 + }, + { + "epoch": 6.36138916015625e-06, + "model_forward_time": 0.02579975128173828, + "step": 4169 + }, + { + "epoch": 6.36138916015625e-06, + "step": 4169, + "training_step_time": 0.10861635208129883 + }, + { + "epoch": 6.3629150390625e-06, + "grad_norm": 0.5773082375526428, + "learning_rate": 9.785001562057309e-05, + "loss": 0.0785, + "step": 4170 + }, + { + "epoch": 6.3629150390625e-06, + "model_forward_time": 0.025102853775024414, + "step": 4170 + }, + { + "epoch": 6.3629150390625e-06, + "step": 4170, + "training_step_time": 0.11014986038208008 + }, + { + "epoch": 6.36444091796875e-06, + "model_forward_time": 0.025821685791015625, + "step": 4171 + }, + { + "epoch": 6.36444091796875e-06, + "step": 4171, + "training_step_time": 0.10789680480957031 + }, + { + "epoch": 6.365966796875e-06, + "model_forward_time": 0.02501654624938965, + "step": 4172 + }, + { + "epoch": 6.365966796875e-06, + "step": 4172, + "training_step_time": 0.10932159423828125 + }, + { + "epoch": 6.36749267578125e-06, + "model_forward_time": 0.026610851287841797, + "step": 4173 + }, + { + "epoch": 6.36749267578125e-06, + "step": 4173, + "training_step_time": 0.10804438591003418 + }, + { + "epoch": 6.3690185546875e-06, + "model_forward_time": 0.029104232788085938, + "step": 4174 + }, + { + "epoch": 6.3690185546875e-06, + "step": 4174, + "training_step_time": 0.11399102210998535 + }, + { + "epoch": 6.37054443359375e-06, + "model_forward_time": 0.025867462158203125, + "step": 4175 + }, + { + "epoch": 6.37054443359375e-06, + "step": 4175, + "training_step_time": 0.10845398902893066 + }, + { + "epoch": 6.3720703125e-06, + "model_forward_time": 0.025876283645629883, + "step": 4176 + }, + { + "epoch": 6.3720703125e-06, + "step": 4176, + "training_step_time": 0.1079716682434082 + }, + { + "epoch": 6.37359619140625e-06, + "model_forward_time": 0.025553464889526367, + "step": 4177 + }, + { + "epoch": 6.37359619140625e-06, + "step": 4177, + "training_step_time": 0.11165976524353027 + }, + { + "epoch": 6.3751220703125e-06, + "model_forward_time": 0.025737285614013672, + "step": 4178 + }, + { + "epoch": 6.3751220703125e-06, + "step": 4178, + "training_step_time": 0.17055392265319824 + }, + { + "epoch": 6.37664794921875e-06, + "model_forward_time": 0.02431488037109375, + "step": 4179 + }, + { + "epoch": 6.37664794921875e-06, + "step": 4179, + "training_step_time": 0.17862343788146973 + }, + { + "epoch": 6.378173828125e-06, + "grad_norm": 0.38035139441490173, + "learning_rate": 9.783399822226189e-05, + "loss": 0.0876, + "step": 4180 + }, + { + "epoch": 6.378173828125e-06, + "model_forward_time": 0.024325847625732422, + "step": 4180 + }, + { + "epoch": 6.378173828125e-06, + "step": 4180, + "training_step_time": 0.10556364059448242 + }, + { + "epoch": 6.37969970703125e-06, + "model_forward_time": 0.02483963966369629, + "step": 4181 + }, + { + "epoch": 6.37969970703125e-06, + "step": 4181, + "training_step_time": 0.1257801055908203 + }, + { + "epoch": 6.3812255859375e-06, + "model_forward_time": 0.024977445602416992, + "step": 4182 + }, + { + "epoch": 6.3812255859375e-06, + "step": 4182, + "training_step_time": 0.12380051612854004 + }, + { + "epoch": 6.38275146484375e-06, + "model_forward_time": 0.025151968002319336, + "step": 4183 + }, + { + "epoch": 6.38275146484375e-06, + "step": 4183, + "training_step_time": 0.11729049682617188 + }, + { + "epoch": 6.38427734375e-06, + "model_forward_time": 0.02647566795349121, + "step": 4184 + }, + { + "epoch": 6.38427734375e-06, + "step": 4184, + "training_step_time": 0.11842012405395508 + }, + { + "epoch": 6.38580322265625e-06, + "model_forward_time": 0.025264501571655273, + "step": 4185 + }, + { + "epoch": 6.38580322265625e-06, + "step": 4185, + "training_step_time": 0.11367273330688477 + }, + { + "epoch": 6.3873291015625e-06, + "model_forward_time": 0.025666475296020508, + "step": 4186 + }, + { + "epoch": 6.3873291015625e-06, + "step": 4186, + "training_step_time": 0.11475920677185059 + }, + { + "epoch": 6.38885498046875e-06, + "model_forward_time": 0.02521038055419922, + "step": 4187 + }, + { + "epoch": 6.38885498046875e-06, + "step": 4187, + "training_step_time": 0.10945248603820801 + }, + { + "epoch": 6.390380859375e-06, + "model_forward_time": 0.025188207626342773, + "step": 4188 + }, + { + "epoch": 6.390380859375e-06, + "step": 4188, + "training_step_time": 0.10903692245483398 + }, + { + "epoch": 6.39190673828125e-06, + "model_forward_time": 0.025120019912719727, + "step": 4189 + }, + { + "epoch": 6.39190673828125e-06, + "step": 4189, + "training_step_time": 0.11234569549560547 + }, + { + "epoch": 6.3934326171875e-06, + "grad_norm": 0.5089605450630188, + "learning_rate": 9.781792270113241e-05, + "loss": 0.0875, + "step": 4190 + }, + { + "epoch": 6.3934326171875e-06, + "model_forward_time": 0.025568246841430664, + "step": 4190 + }, + { + "epoch": 6.3934326171875e-06, + "step": 4190, + "training_step_time": 0.10845589637756348 + }, + { + "epoch": 6.39495849609375e-06, + "model_forward_time": 0.025587081909179688, + "step": 4191 + }, + { + "epoch": 6.39495849609375e-06, + "step": 4191, + "training_step_time": 0.10786962509155273 + }, + { + "epoch": 6.396484375e-06, + "model_forward_time": 0.025639772415161133, + "step": 4192 + }, + { + "epoch": 6.396484375e-06, + "step": 4192, + "training_step_time": 0.10895895957946777 + }, + { + "epoch": 6.39801025390625e-06, + "model_forward_time": 0.025166749954223633, + "step": 4193 + }, + { + "epoch": 6.39801025390625e-06, + "step": 4193, + "training_step_time": 0.11327099800109863 + }, + { + "epoch": 6.3995361328125e-06, + "model_forward_time": 0.025104761123657227, + "step": 4194 + }, + { + "epoch": 6.3995361328125e-06, + "step": 4194, + "training_step_time": 0.10799121856689453 + }, + { + "epoch": 6.40106201171875e-06, + "model_forward_time": 0.025744199752807617, + "step": 4195 + }, + { + "epoch": 6.40106201171875e-06, + "step": 4195, + "training_step_time": 0.10897135734558105 + }, + { + "epoch": 6.402587890625e-06, + "model_forward_time": 0.026881933212280273, + "step": 4196 + }, + { + "epoch": 6.402587890625e-06, + "step": 4196, + "training_step_time": 0.10857796669006348 + }, + { + "epoch": 6.40411376953125e-06, + "model_forward_time": 0.024936676025390625, + "step": 4197 + }, + { + "epoch": 6.40411376953125e-06, + "step": 4197, + "training_step_time": 0.11081933975219727 + }, + { + "epoch": 6.4056396484375e-06, + "model_forward_time": 0.025452613830566406, + "step": 4198 + }, + { + "epoch": 6.4056396484375e-06, + "step": 4198, + "training_step_time": 0.11033487319946289 + }, + { + "epoch": 6.40716552734375e-06, + "model_forward_time": 0.025463342666625977, + "step": 4199 + }, + { + "epoch": 6.40716552734375e-06, + "step": 4199, + "training_step_time": 0.10696649551391602 + }, + { + "epoch": 6.40869140625e-06, + "grad_norm": 0.3083915710449219, + "learning_rate": 9.780178907671789e-05, + "loss": 0.0864, + "step": 4200 + }, + { + "epoch": 6.40869140625e-06, + "model_forward_time": 0.025232315063476562, + "step": 4200 + }, + { + "epoch": 6.40869140625e-06, + "step": 4200, + "training_step_time": 0.15863680839538574 + }, + { + "epoch": 6.41021728515625e-06, + "model_forward_time": 0.024700164794921875, + "step": 4201 + }, + { + "epoch": 6.41021728515625e-06, + "step": 4201, + "training_step_time": 0.11150646209716797 + }, + { + "epoch": 6.4117431640625e-06, + "model_forward_time": 0.024940013885498047, + "step": 4202 + }, + { + "epoch": 6.4117431640625e-06, + "step": 4202, + "training_step_time": 0.1314857006072998 + }, + { + "epoch": 6.41326904296875e-06, + "model_forward_time": 0.02516341209411621, + "step": 4203 + }, + { + "epoch": 6.41326904296875e-06, + "step": 4203, + "training_step_time": 0.14148187637329102 + }, + { + "epoch": 6.414794921875e-06, + "model_forward_time": 0.02490544319152832, + "step": 4204 + }, + { + "epoch": 6.414794921875e-06, + "step": 4204, + "training_step_time": 0.20806312561035156 + }, + { + "epoch": 6.41632080078125e-06, + "model_forward_time": 0.024341583251953125, + "step": 4205 + }, + { + "epoch": 6.41632080078125e-06, + "step": 4205, + "training_step_time": 0.16859102249145508 + }, + { + "epoch": 6.4178466796875e-06, + "model_forward_time": 0.024672985076904297, + "step": 4206 + }, + { + "epoch": 6.4178466796875e-06, + "step": 4206, + "training_step_time": 0.1752152442932129 + }, + { + "epoch": 6.41937255859375e-06, + "model_forward_time": 0.024674415588378906, + "step": 4207 + }, + { + "epoch": 6.41937255859375e-06, + "step": 4207, + "training_step_time": 0.12914180755615234 + }, + { + "epoch": 6.4208984375e-06, + "model_forward_time": 0.02481985092163086, + "step": 4208 + }, + { + "epoch": 6.4208984375e-06, + "step": 4208, + "training_step_time": 0.10686182975769043 + }, + { + "epoch": 6.42242431640625e-06, + "model_forward_time": 0.02546072006225586, + "step": 4209 + }, + { + "epoch": 6.42242431640625e-06, + "step": 4209, + "training_step_time": 0.12917852401733398 + }, + { + "epoch": 6.4239501953125e-06, + "grad_norm": 0.37601393461227417, + "learning_rate": 9.778559736862223e-05, + "loss": 0.0863, + "step": 4210 + }, + { + "epoch": 6.4239501953125e-06, + "model_forward_time": 0.02543807029724121, + "step": 4210 + }, + { + "epoch": 6.4239501953125e-06, + "step": 4210, + "training_step_time": 0.1768176555633545 + }, + { + "epoch": 6.42547607421875e-06, + "model_forward_time": 0.025008678436279297, + "step": 4211 + }, + { + "epoch": 6.42547607421875e-06, + "step": 4211, + "training_step_time": 0.10656189918518066 + }, + { + "epoch": 6.427001953125e-06, + "model_forward_time": 0.02434086799621582, + "step": 4212 + }, + { + "epoch": 6.427001953125e-06, + "step": 4212, + "training_step_time": 0.10739660263061523 + }, + { + "epoch": 6.42852783203125e-06, + "model_forward_time": 0.024610042572021484, + "step": 4213 + }, + { + "epoch": 6.42852783203125e-06, + "step": 4213, + "training_step_time": 0.10789179801940918 + }, + { + "epoch": 6.4300537109375e-06, + "model_forward_time": 0.025213956832885742, + "step": 4214 + }, + { + "epoch": 6.4300537109375e-06, + "step": 4214, + "training_step_time": 0.11526608467102051 + }, + { + "epoch": 6.43157958984375e-06, + "model_forward_time": 0.02553081512451172, + "step": 4215 + }, + { + "epoch": 6.43157958984375e-06, + "step": 4215, + "training_step_time": 0.11276459693908691 + }, + { + "epoch": 6.43310546875e-06, + "model_forward_time": 0.025235652923583984, + "step": 4216 + }, + { + "epoch": 6.43310546875e-06, + "step": 4216, + "training_step_time": 0.10721158981323242 + }, + { + "epoch": 6.43463134765625e-06, + "model_forward_time": 0.025502443313598633, + "step": 4217 + }, + { + "epoch": 6.43463134765625e-06, + "step": 4217, + "training_step_time": 0.10886263847351074 + }, + { + "epoch": 6.4361572265625e-06, + "model_forward_time": 0.02545952796936035, + "step": 4218 + }, + { + "epoch": 6.4361572265625e-06, + "step": 4218, + "training_step_time": 0.10973095893859863 + }, + { + "epoch": 6.43768310546875e-06, + "model_forward_time": 0.025874853134155273, + "step": 4219 + }, + { + "epoch": 6.43768310546875e-06, + "step": 4219, + "training_step_time": 0.11324596405029297 + }, + { + "epoch": 6.439208984375e-06, + "grad_norm": 0.38798025250434875, + "learning_rate": 9.776934759651988e-05, + "loss": 0.101, + "step": 4220 + }, + { + "epoch": 6.439208984375e-06, + "model_forward_time": 0.024994611740112305, + "step": 4220 + }, + { + "epoch": 6.439208984375e-06, + "step": 4220, + "training_step_time": 0.2174069881439209 + }, + { + "epoch": 6.44073486328125e-06, + "model_forward_time": 0.02433609962463379, + "step": 4221 + }, + { + "epoch": 6.44073486328125e-06, + "step": 4221, + "training_step_time": 0.12962818145751953 + }, + { + "epoch": 6.4422607421875e-06, + "model_forward_time": 0.02396702766418457, + "step": 4222 + }, + { + "epoch": 6.4422607421875e-06, + "step": 4222, + "training_step_time": 0.18463897705078125 + }, + { + "epoch": 6.44378662109375e-06, + "model_forward_time": 0.024483919143676758, + "step": 4223 + }, + { + "epoch": 6.44378662109375e-06, + "step": 4223, + "training_step_time": 0.13622665405273438 + }, + { + "epoch": 6.4453125e-06, + "model_forward_time": 0.02459263801574707, + "step": 4224 + }, + { + "epoch": 6.4453125e-06, + "step": 4224, + "training_step_time": 0.1140594482421875 + }, + { + "epoch": 6.44683837890625e-06, + "model_forward_time": 0.025010108947753906, + "step": 4225 + }, + { + "epoch": 6.44683837890625e-06, + "step": 4225, + "training_step_time": 0.11551117897033691 + }, + { + "epoch": 6.4483642578125e-06, + "model_forward_time": 0.025216102600097656, + "step": 4226 + }, + { + "epoch": 6.4483642578125e-06, + "step": 4226, + "training_step_time": 0.11089873313903809 + }, + { + "epoch": 6.44989013671875e-06, + "model_forward_time": 0.0251767635345459, + "step": 4227 + }, + { + "epoch": 6.44989013671875e-06, + "step": 4227, + "training_step_time": 0.10906553268432617 + }, + { + "epoch": 6.451416015625e-06, + "model_forward_time": 0.025974035263061523, + "step": 4228 + }, + { + "epoch": 6.451416015625e-06, + "step": 4228, + "training_step_time": 0.11062121391296387 + }, + { + "epoch": 6.45294189453125e-06, + "model_forward_time": 0.02478337287902832, + "step": 4229 + }, + { + "epoch": 6.45294189453125e-06, + "step": 4229, + "training_step_time": 0.10900616645812988 + }, + { + "epoch": 6.4544677734375e-06, + "grad_norm": 0.49198153614997864, + "learning_rate": 9.775303978015585e-05, + "loss": 0.0974, + "step": 4230 + }, + { + "epoch": 6.4544677734375e-06, + "model_forward_time": 0.02502274513244629, + "step": 4230 + }, + { + "epoch": 6.4544677734375e-06, + "step": 4230, + "training_step_time": 0.10780000686645508 + }, + { + "epoch": 6.45599365234375e-06, + "model_forward_time": 0.027882099151611328, + "step": 4231 + }, + { + "epoch": 6.45599365234375e-06, + "step": 4231, + "training_step_time": 0.11106705665588379 + }, + { + "epoch": 6.45751953125e-06, + "model_forward_time": 0.025277137756347656, + "step": 4232 + }, + { + "epoch": 6.45751953125e-06, + "step": 4232, + "training_step_time": 0.1104590892791748 + }, + { + "epoch": 6.45904541015625e-06, + "model_forward_time": 0.02507781982421875, + "step": 4233 + }, + { + "epoch": 6.45904541015625e-06, + "step": 4233, + "training_step_time": 0.11197614669799805 + }, + { + "epoch": 6.4605712890625e-06, + "model_forward_time": 0.02429938316345215, + "step": 4234 + }, + { + "epoch": 6.4605712890625e-06, + "step": 4234, + "training_step_time": 0.1090998649597168 + }, + { + "epoch": 6.46209716796875e-06, + "model_forward_time": 0.025304079055786133, + "step": 4235 + }, + { + "epoch": 6.46209716796875e-06, + "step": 4235, + "training_step_time": 0.10630369186401367 + }, + { + "epoch": 6.463623046875e-06, + "model_forward_time": 0.024771451950073242, + "step": 4236 + }, + { + "epoch": 6.463623046875e-06, + "step": 4236, + "training_step_time": 0.10446619987487793 + }, + { + "epoch": 6.46514892578125e-06, + "model_forward_time": 0.02440619468688965, + "step": 4237 + }, + { + "epoch": 6.46514892578125e-06, + "step": 4237, + "training_step_time": 0.10984206199645996 + }, + { + "epoch": 6.4666748046875e-06, + "model_forward_time": 0.0257875919342041, + "step": 4238 + }, + { + "epoch": 6.4666748046875e-06, + "step": 4238, + "training_step_time": 0.1082451343536377 + }, + { + "epoch": 6.46820068359375e-06, + "model_forward_time": 0.024472475051879883, + "step": 4239 + }, + { + "epoch": 6.46820068359375e-06, + "step": 4239, + "training_step_time": 0.1058354377746582 + }, + { + "epoch": 6.4697265625e-06, + "grad_norm": 0.5333660244941711, + "learning_rate": 9.773667393934567e-05, + "loss": 0.1021, + "step": 4240 + }, + { + "epoch": 6.4697265625e-06, + "model_forward_time": 0.024556875228881836, + "step": 4240 + }, + { + "epoch": 6.4697265625e-06, + "step": 4240, + "training_step_time": 0.10894632339477539 + }, + { + "epoch": 6.47125244140625e-06, + "model_forward_time": 0.024594783782958984, + "step": 4241 + }, + { + "epoch": 6.47125244140625e-06, + "step": 4241, + "training_step_time": 0.10696077346801758 + }, + { + "epoch": 6.4727783203125e-06, + "model_forward_time": 0.028829336166381836, + "step": 4242 + }, + { + "epoch": 6.4727783203125e-06, + "step": 4242, + "training_step_time": 0.11257076263427734 + }, + { + "epoch": 6.47430419921875e-06, + "model_forward_time": 0.024416208267211914, + "step": 4243 + }, + { + "epoch": 6.47430419921875e-06, + "step": 4243, + "training_step_time": 0.11062979698181152 + }, + { + "epoch": 6.475830078125e-06, + "model_forward_time": 0.024642229080200195, + "step": 4244 + }, + { + "epoch": 6.475830078125e-06, + "step": 4244, + "training_step_time": 0.10557866096496582 + }, + { + "epoch": 6.47735595703125e-06, + "model_forward_time": 0.024239540100097656, + "step": 4245 + }, + { + "epoch": 6.47735595703125e-06, + "step": 4245, + "training_step_time": 0.22206473350524902 + }, + { + "epoch": 6.4788818359375e-06, + "model_forward_time": 0.023090124130249023, + "step": 4246 + }, + { + "epoch": 6.4788818359375e-06, + "step": 4246, + "training_step_time": 0.10763239860534668 + }, + { + "epoch": 6.48040771484375e-06, + "model_forward_time": 0.02409815788269043, + "step": 4247 + }, + { + "epoch": 6.48040771484375e-06, + "step": 4247, + "training_step_time": 0.13665151596069336 + }, + { + "epoch": 6.48193359375e-06, + "model_forward_time": 0.02477860450744629, + "step": 4248 + }, + { + "epoch": 6.48193359375e-06, + "step": 4248, + "training_step_time": 0.1581439971923828 + }, + { + "epoch": 6.48345947265625e-06, + "model_forward_time": 0.026182889938354492, + "step": 4249 + }, + { + "epoch": 6.48345947265625e-06, + "step": 4249, + "training_step_time": 0.18472933769226074 + }, + { + "epoch": 6.4849853515625e-06, + "grad_norm": 0.686373770236969, + "learning_rate": 9.772025009397537e-05, + "loss": 0.0912, + "step": 4250 + }, + { + "epoch": 6.4849853515625e-06, + "model_forward_time": 0.024106502532958984, + "step": 4250 + }, + { + "epoch": 6.4849853515625e-06, + "step": 4250, + "training_step_time": 0.18533730506896973 + }, + { + "epoch": 6.48651123046875e-06, + "model_forward_time": 0.02342534065246582, + "step": 4251 + }, + { + "epoch": 6.48651123046875e-06, + "step": 4251, + "training_step_time": 0.15431475639343262 + }, + { + "epoch": 6.488037109375e-06, + "model_forward_time": 0.023751258850097656, + "step": 4252 + }, + { + "epoch": 6.488037109375e-06, + "step": 4252, + "training_step_time": 0.1993701457977295 + }, + { + "epoch": 6.48956298828125e-06, + "model_forward_time": 0.02353668212890625, + "step": 4253 + }, + { + "epoch": 6.48956298828125e-06, + "step": 4253, + "training_step_time": 0.17065000534057617 + }, + { + "epoch": 6.4910888671875e-06, + "model_forward_time": 0.023382902145385742, + "step": 4254 + }, + { + "epoch": 6.4910888671875e-06, + "step": 4254, + "training_step_time": 0.15351653099060059 + }, + { + "epoch": 6.49261474609375e-06, + "model_forward_time": 0.023538827896118164, + "step": 4255 + }, + { + "epoch": 6.49261474609375e-06, + "step": 4255, + "training_step_time": 0.10841250419616699 + }, + { + "epoch": 6.494140625e-06, + "model_forward_time": 0.023777246475219727, + "step": 4256 + }, + { + "epoch": 6.494140625e-06, + "step": 4256, + "training_step_time": 0.11066818237304688 + }, + { + "epoch": 6.49566650390625e-06, + "model_forward_time": 0.024583101272583008, + "step": 4257 + }, + { + "epoch": 6.49566650390625e-06, + "step": 4257, + "training_step_time": 0.11031103134155273 + }, + { + "epoch": 6.4971923828125e-06, + "model_forward_time": 0.024266481399536133, + "step": 4258 + }, + { + "epoch": 6.4971923828125e-06, + "step": 4258, + "training_step_time": 0.11453986167907715 + }, + { + "epoch": 6.49871826171875e-06, + "model_forward_time": 0.024438858032226562, + "step": 4259 + }, + { + "epoch": 6.49871826171875e-06, + "step": 4259, + "training_step_time": 0.10804557800292969 + }, + { + "epoch": 6.500244140625e-06, + "grad_norm": 0.7169239521026611, + "learning_rate": 9.77037682640015e-05, + "loss": 0.0958, + "step": 4260 + }, + { + "epoch": 6.500244140625e-06, + "model_forward_time": 0.024553537368774414, + "step": 4260 + }, + { + "epoch": 6.500244140625e-06, + "step": 4260, + "training_step_time": 0.11106705665588379 + }, + { + "epoch": 6.50177001953125e-06, + "model_forward_time": 0.02481675148010254, + "step": 4261 + }, + { + "epoch": 6.50177001953125e-06, + "step": 4261, + "training_step_time": 0.11205410957336426 + }, + { + "epoch": 6.5032958984375e-06, + "model_forward_time": 0.02467060089111328, + "step": 4262 + }, + { + "epoch": 6.5032958984375e-06, + "step": 4262, + "training_step_time": 0.11338663101196289 + }, + { + "epoch": 6.50482177734375e-06, + "model_forward_time": 0.025105714797973633, + "step": 4263 + }, + { + "epoch": 6.50482177734375e-06, + "step": 4263, + "training_step_time": 0.10671401023864746 + }, + { + "epoch": 6.50634765625e-06, + "model_forward_time": 0.024008512496948242, + "step": 4264 + }, + { + "epoch": 6.50634765625e-06, + "step": 4264, + "training_step_time": 0.10971808433532715 + }, + { + "epoch": 6.50787353515625e-06, + "model_forward_time": 0.024332046508789062, + "step": 4265 + }, + { + "epoch": 6.50787353515625e-06, + "step": 4265, + "training_step_time": 0.10995745658874512 + }, + { + "epoch": 6.5093994140625e-06, + "model_forward_time": 0.0247652530670166, + "step": 4266 + }, + { + "epoch": 6.5093994140625e-06, + "step": 4266, + "training_step_time": 0.11231565475463867 + }, + { + "epoch": 6.51092529296875e-06, + "model_forward_time": 0.02478504180908203, + "step": 4267 + }, + { + "epoch": 6.51092529296875e-06, + "step": 4267, + "training_step_time": 0.10709953308105469 + }, + { + "epoch": 6.512451171875e-06, + "model_forward_time": 0.02458953857421875, + "step": 4268 + }, + { + "epoch": 6.512451171875e-06, + "step": 4268, + "training_step_time": 0.10959029197692871 + }, + { + "epoch": 6.51397705078125e-06, + "model_forward_time": 0.024944067001342773, + "step": 4269 + }, + { + "epoch": 6.51397705078125e-06, + "step": 4269, + "training_step_time": 0.11844182014465332 + }, + { + "epoch": 6.5155029296875e-06, + "grad_norm": 0.5746374726295471, + "learning_rate": 9.7687228469451e-05, + "loss": 0.1016, + "step": 4270 + }, + { + "epoch": 6.5155029296875e-06, + "model_forward_time": 0.02433943748474121, + "step": 4270 + }, + { + "epoch": 6.5155029296875e-06, + "step": 4270, + "training_step_time": 0.11708927154541016 + }, + { + "epoch": 6.51702880859375e-06, + "model_forward_time": 0.024867773056030273, + "step": 4271 + }, + { + "epoch": 6.51702880859375e-06, + "step": 4271, + "training_step_time": 0.11280155181884766 + }, + { + "epoch": 6.5185546875e-06, + "model_forward_time": 0.025377273559570312, + "step": 4272 + }, + { + "epoch": 6.5185546875e-06, + "step": 4272, + "training_step_time": 0.11459827423095703 + }, + { + "epoch": 6.52008056640625e-06, + "model_forward_time": 0.024292707443237305, + "step": 4273 + }, + { + "epoch": 6.52008056640625e-06, + "step": 4273, + "training_step_time": 0.1119072437286377 + }, + { + "epoch": 6.5216064453125e-06, + "model_forward_time": 0.024770498275756836, + "step": 4274 + }, + { + "epoch": 6.5216064453125e-06, + "step": 4274, + "training_step_time": 0.10953712463378906 + }, + { + "epoch": 6.52313232421875e-06, + "model_forward_time": 0.024352550506591797, + "step": 4275 + }, + { + "epoch": 6.52313232421875e-06, + "step": 4275, + "training_step_time": 0.10800933837890625 + }, + { + "epoch": 6.524658203125e-06, + "model_forward_time": 0.025091171264648438, + "step": 4276 + }, + { + "epoch": 6.524658203125e-06, + "step": 4276, + "training_step_time": 0.1094820499420166 + }, + { + "epoch": 6.52618408203125e-06, + "model_forward_time": 0.024437665939331055, + "step": 4277 + }, + { + "epoch": 6.52618408203125e-06, + "step": 4277, + "training_step_time": 0.10814356803894043 + }, + { + "epoch": 6.5277099609375e-06, + "model_forward_time": 0.02449822425842285, + "step": 4278 + }, + { + "epoch": 6.5277099609375e-06, + "step": 4278, + "training_step_time": 0.11236715316772461 + }, + { + "epoch": 6.52923583984375e-06, + "model_forward_time": 0.024829387664794922, + "step": 4279 + }, + { + "epoch": 6.52923583984375e-06, + "step": 4279, + "training_step_time": 0.10782623291015625 + }, + { + "epoch": 6.53076171875e-06, + "grad_norm": 0.3435329496860504, + "learning_rate": 9.76706307304213e-05, + "loss": 0.084, + "step": 4280 + }, + { + "epoch": 6.53076171875e-06, + "model_forward_time": 0.0246884822845459, + "step": 4280 + }, + { + "epoch": 6.53076171875e-06, + "step": 4280, + "training_step_time": 0.10952377319335938 + }, + { + "epoch": 6.53228759765625e-06, + "model_forward_time": 0.024920940399169922, + "step": 4281 + }, + { + "epoch": 6.53228759765625e-06, + "step": 4281, + "training_step_time": 0.10822224617004395 + }, + { + "epoch": 6.5338134765625e-06, + "model_forward_time": 0.024636507034301758, + "step": 4282 + }, + { + "epoch": 6.5338134765625e-06, + "step": 4282, + "training_step_time": 0.10974931716918945 + }, + { + "epoch": 6.53533935546875e-06, + "model_forward_time": 0.02486419677734375, + "step": 4283 + }, + { + "epoch": 6.53533935546875e-06, + "step": 4283, + "training_step_time": 0.10721540451049805 + }, + { + "epoch": 6.536865234375e-06, + "model_forward_time": 0.024460315704345703, + "step": 4284 + }, + { + "epoch": 6.536865234375e-06, + "step": 4284, + "training_step_time": 0.10967063903808594 + }, + { + "epoch": 6.53839111328125e-06, + "model_forward_time": 0.024729490280151367, + "step": 4285 + }, + { + "epoch": 6.53839111328125e-06, + "step": 4285, + "training_step_time": 0.10894012451171875 + }, + { + "epoch": 6.5399169921875e-06, + "model_forward_time": 0.024495363235473633, + "step": 4286 + }, + { + "epoch": 6.5399169921875e-06, + "step": 4286, + "training_step_time": 0.10609817504882812 + }, + { + "epoch": 6.54144287109375e-06, + "model_forward_time": 0.0285031795501709, + "step": 4287 + }, + { + "epoch": 6.54144287109375e-06, + "step": 4287, + "training_step_time": 0.11117935180664062 + }, + { + "epoch": 6.54296875e-06, + "model_forward_time": 0.02460789680480957, + "step": 4288 + }, + { + "epoch": 6.54296875e-06, + "step": 4288, + "training_step_time": 0.1117103099822998 + }, + { + "epoch": 6.54449462890625e-06, + "model_forward_time": 0.024452686309814453, + "step": 4289 + }, + { + "epoch": 6.54449462890625e-06, + "step": 4289, + "training_step_time": 0.10788917541503906 + }, + { + "epoch": 6.5460205078125e-06, + "grad_norm": 0.6555113792419434, + "learning_rate": 9.765397506708023e-05, + "loss": 0.0917, + "step": 4290 + }, + { + "epoch": 6.5460205078125e-06, + "model_forward_time": 0.024636030197143555, + "step": 4290 + }, + { + "epoch": 6.5460205078125e-06, + "step": 4290, + "training_step_time": 0.10979056358337402 + }, + { + "epoch": 6.54754638671875e-06, + "model_forward_time": 0.0249631404876709, + "step": 4291 + }, + { + "epoch": 6.54754638671875e-06, + "step": 4291, + "training_step_time": 0.17656445503234863 + }, + { + "epoch": 6.549072265625e-06, + "model_forward_time": 0.023801803588867188, + "step": 4292 + }, + { + "epoch": 6.549072265625e-06, + "step": 4292, + "training_step_time": 0.12189984321594238 + }, + { + "epoch": 6.55059814453125e-06, + "model_forward_time": 0.02409076690673828, + "step": 4293 + }, + { + "epoch": 6.55059814453125e-06, + "step": 4293, + "training_step_time": 0.11351752281188965 + }, + { + "epoch": 6.5521240234375e-06, + "model_forward_time": 0.02458930015563965, + "step": 4294 + }, + { + "epoch": 6.5521240234375e-06, + "step": 4294, + "training_step_time": 0.1238248348236084 + }, + { + "epoch": 6.55364990234375e-06, + "model_forward_time": 0.02507162094116211, + "step": 4295 + }, + { + "epoch": 6.55364990234375e-06, + "step": 4295, + "training_step_time": 0.17083096504211426 + }, + { + "epoch": 6.55517578125e-06, + "model_forward_time": 0.024291515350341797, + "step": 4296 + }, + { + "epoch": 6.55517578125e-06, + "step": 4296, + "training_step_time": 0.18315649032592773 + }, + { + "epoch": 6.55670166015625e-06, + "model_forward_time": 0.023514509201049805, + "step": 4297 + }, + { + "epoch": 6.55670166015625e-06, + "step": 4297, + "training_step_time": 0.21477389335632324 + }, + { + "epoch": 6.5582275390625e-06, + "model_forward_time": 0.02351832389831543, + "step": 4298 + }, + { + "epoch": 6.5582275390625e-06, + "step": 4298, + "training_step_time": 0.1544947624206543 + }, + { + "epoch": 6.55975341796875e-06, + "model_forward_time": 0.02344512939453125, + "step": 4299 + }, + { + "epoch": 6.55975341796875e-06, + "step": 4299, + "training_step_time": 0.17835426330566406 + }, + { + "epoch": 6.561279296875e-06, + "grad_norm": 1.0753647089004517, + "learning_rate": 9.763726149966596e-05, + "loss": 0.0747, + "step": 4300 + }, + { + "epoch": 6.561279296875e-06, + "model_forward_time": 0.02583622932434082, + "step": 4300 + }, + { + "epoch": 6.561279296875e-06, + "step": 4300, + "training_step_time": 0.1500256061553955 + }, + { + "epoch": 6.56280517578125e-06, + "model_forward_time": 0.024130582809448242, + "step": 4301 + }, + { + "epoch": 6.56280517578125e-06, + "step": 4301, + "training_step_time": 0.1619892120361328 + }, + { + "epoch": 6.5643310546875e-06, + "model_forward_time": 0.023835420608520508, + "step": 4302 + }, + { + "epoch": 6.5643310546875e-06, + "step": 4302, + "training_step_time": 0.11077260971069336 + }, + { + "epoch": 6.56585693359375e-06, + "model_forward_time": 0.024020910263061523, + "step": 4303 + }, + { + "epoch": 6.56585693359375e-06, + "step": 4303, + "training_step_time": 0.11815953254699707 + }, + { + "epoch": 6.5673828125e-06, + "model_forward_time": 0.02437567710876465, + "step": 4304 + }, + { + "epoch": 6.5673828125e-06, + "step": 4304, + "training_step_time": 0.12411046028137207 + }, + { + "epoch": 6.56890869140625e-06, + "model_forward_time": 0.0261995792388916, + "step": 4305 + }, + { + "epoch": 6.56890869140625e-06, + "step": 4305, + "training_step_time": 0.12511682510375977 + }, + { + "epoch": 6.5704345703125e-06, + "model_forward_time": 0.024224519729614258, + "step": 4306 + }, + { + "epoch": 6.5704345703125e-06, + "step": 4306, + "training_step_time": 0.1197059154510498 + }, + { + "epoch": 6.57196044921875e-06, + "model_forward_time": 0.02500128746032715, + "step": 4307 + }, + { + "epoch": 6.57196044921875e-06, + "step": 4307, + "training_step_time": 0.12176656723022461 + }, + { + "epoch": 6.573486328125e-06, + "model_forward_time": 0.024811983108520508, + "step": 4308 + }, + { + "epoch": 6.573486328125e-06, + "step": 4308, + "training_step_time": 0.12289047241210938 + }, + { + "epoch": 6.57501220703125e-06, + "model_forward_time": 0.025588512420654297, + "step": 4309 + }, + { + "epoch": 6.57501220703125e-06, + "step": 4309, + "training_step_time": 0.1091775894165039 + }, + { + "epoch": 6.5765380859375e-06, + "grad_norm": 0.2607550323009491, + "learning_rate": 9.762049004848706e-05, + "loss": 0.0866, + "step": 4310 + }, + { + "epoch": 6.5765380859375e-06, + "model_forward_time": 0.024265766143798828, + "step": 4310 + }, + { + "epoch": 6.5765380859375e-06, + "step": 4310, + "training_step_time": 0.17173528671264648 + }, + { + "epoch": 6.57806396484375e-06, + "model_forward_time": 0.02408146858215332, + "step": 4311 + }, + { + "epoch": 6.57806396484375e-06, + "step": 4311, + "training_step_time": 0.16152739524841309 + }, + { + "epoch": 6.57958984375e-06, + "model_forward_time": 0.023791790008544922, + "step": 4312 + }, + { + "epoch": 6.57958984375e-06, + "step": 4312, + "training_step_time": 0.11660385131835938 + }, + { + "epoch": 6.58111572265625e-06, + "model_forward_time": 0.024196863174438477, + "step": 4313 + }, + { + "epoch": 6.58111572265625e-06, + "step": 4313, + "training_step_time": 0.22089767456054688 + }, + { + "epoch": 6.5826416015625e-06, + "model_forward_time": 0.02404475212097168, + "step": 4314 + }, + { + "epoch": 6.5826416015625e-06, + "step": 4314, + "training_step_time": 0.10869741439819336 + }, + { + "epoch": 6.58416748046875e-06, + "model_forward_time": 0.02373504638671875, + "step": 4315 + }, + { + "epoch": 6.58416748046875e-06, + "step": 4315, + "training_step_time": 0.10488748550415039 + }, + { + "epoch": 6.585693359375e-06, + "model_forward_time": 0.02417778968811035, + "step": 4316 + }, + { + "epoch": 6.585693359375e-06, + "step": 4316, + "training_step_time": 0.10684585571289062 + }, + { + "epoch": 6.58721923828125e-06, + "model_forward_time": 0.024859905242919922, + "step": 4317 + }, + { + "epoch": 6.58721923828125e-06, + "step": 4317, + "training_step_time": 0.10750150680541992 + }, + { + "epoch": 6.5887451171875e-06, + "model_forward_time": 0.024730205535888672, + "step": 4318 + }, + { + "epoch": 6.5887451171875e-06, + "step": 4318, + "training_step_time": 0.10858845710754395 + }, + { + "epoch": 6.59027099609375e-06, + "model_forward_time": 0.0245969295501709, + "step": 4319 + }, + { + "epoch": 6.59027099609375e-06, + "step": 4319, + "training_step_time": 0.10819029808044434 + }, + { + "epoch": 6.591796875e-06, + "grad_norm": 0.3992424011230469, + "learning_rate": 9.760366073392246e-05, + "loss": 0.1157, + "step": 4320 + }, + { + "epoch": 6.591796875e-06, + "model_forward_time": 0.0243532657623291, + "step": 4320 + }, + { + "epoch": 6.591796875e-06, + "step": 4320, + "training_step_time": 0.10850739479064941 + }, + { + "epoch": 6.59332275390625e-06, + "model_forward_time": 0.02482748031616211, + "step": 4321 + }, + { + "epoch": 6.59332275390625e-06, + "step": 4321, + "training_step_time": 0.10821819305419922 + }, + { + "epoch": 6.5948486328125e-06, + "model_forward_time": 0.02773761749267578, + "step": 4322 + }, + { + "epoch": 6.5948486328125e-06, + "step": 4322, + "training_step_time": 0.11365103721618652 + }, + { + "epoch": 6.59637451171875e-06, + "model_forward_time": 0.024251699447631836, + "step": 4323 + }, + { + "epoch": 6.59637451171875e-06, + "step": 4323, + "training_step_time": 0.10649824142456055 + }, + { + "epoch": 6.597900390625e-06, + "model_forward_time": 0.024547338485717773, + "step": 4324 + }, + { + "epoch": 6.597900390625e-06, + "step": 4324, + "training_step_time": 0.10802435874938965 + }, + { + "epoch": 6.59942626953125e-06, + "model_forward_time": 0.024929046630859375, + "step": 4325 + }, + { + "epoch": 6.59942626953125e-06, + "step": 4325, + "training_step_time": 0.10693168640136719 + }, + { + "epoch": 6.6009521484375e-06, + "model_forward_time": 0.024374008178710938, + "step": 4326 + }, + { + "epoch": 6.6009521484375e-06, + "step": 4326, + "training_step_time": 0.10775017738342285 + }, + { + "epoch": 6.60247802734375e-06, + "model_forward_time": 0.024434566497802734, + "step": 4327 + }, + { + "epoch": 6.60247802734375e-06, + "step": 4327, + "training_step_time": 0.11057329177856445 + }, + { + "epoch": 6.60400390625e-06, + "model_forward_time": 0.024326324462890625, + "step": 4328 + }, + { + "epoch": 6.60400390625e-06, + "step": 4328, + "training_step_time": 0.10746598243713379 + }, + { + "epoch": 6.60552978515625e-06, + "model_forward_time": 0.02614116668701172, + "step": 4329 + }, + { + "epoch": 6.60552978515625e-06, + "step": 4329, + "training_step_time": 0.1083993911743164 + }, + { + "epoch": 6.6070556640625e-06, + "grad_norm": 0.4989405572414398, + "learning_rate": 9.758677357642131e-05, + "loss": 0.076, + "step": 4330 + }, + { + "epoch": 6.6070556640625e-06, + "model_forward_time": 0.0244901180267334, + "step": 4330 + }, + { + "epoch": 6.6070556640625e-06, + "step": 4330, + "training_step_time": 0.11144828796386719 + }, + { + "epoch": 6.60858154296875e-06, + "model_forward_time": 0.025089740753173828, + "step": 4331 + }, + { + "epoch": 6.60858154296875e-06, + "step": 4331, + "training_step_time": 0.11182928085327148 + }, + { + "epoch": 6.610107421875e-06, + "model_forward_time": 0.02445054054260254, + "step": 4332 + }, + { + "epoch": 6.610107421875e-06, + "step": 4332, + "training_step_time": 0.10628414154052734 + }, + { + "epoch": 6.61163330078125e-06, + "model_forward_time": 0.02442789077758789, + "step": 4333 + }, + { + "epoch": 6.61163330078125e-06, + "step": 4333, + "training_step_time": 0.10907697677612305 + }, + { + "epoch": 6.6131591796875e-06, + "model_forward_time": 0.02432870864868164, + "step": 4334 + }, + { + "epoch": 6.6131591796875e-06, + "step": 4334, + "training_step_time": 0.10872054100036621 + }, + { + "epoch": 6.61468505859375e-06, + "model_forward_time": 0.024495363235473633, + "step": 4335 + }, + { + "epoch": 6.61468505859375e-06, + "step": 4335, + "training_step_time": 0.18063569068908691 + }, + { + "epoch": 6.6162109375e-06, + "model_forward_time": 0.023784637451171875, + "step": 4336 + }, + { + "epoch": 6.6162109375e-06, + "step": 4336, + "training_step_time": 0.10920238494873047 + }, + { + "epoch": 6.61773681640625e-06, + "model_forward_time": 0.024243831634521484, + "step": 4337 + }, + { + "epoch": 6.61773681640625e-06, + "step": 4337, + "training_step_time": 0.22081351280212402 + }, + { + "epoch": 6.6192626953125e-06, + "model_forward_time": 0.02374100685119629, + "step": 4338 + }, + { + "epoch": 6.6192626953125e-06, + "step": 4338, + "training_step_time": 0.10631680488586426 + }, + { + "epoch": 6.62078857421875e-06, + "model_forward_time": 0.023685693740844727, + "step": 4339 + }, + { + "epoch": 6.62078857421875e-06, + "step": 4339, + "training_step_time": 0.1123359203338623 + }, + { + "epoch": 6.622314453125e-06, + "grad_norm": 0.4247824549674988, + "learning_rate": 9.756982859650314e-05, + "loss": 0.0757, + "step": 4340 + }, + { + "epoch": 6.622314453125e-06, + "model_forward_time": 0.024617910385131836, + "step": 4340 + }, + { + "epoch": 6.622314453125e-06, + "step": 4340, + "training_step_time": 0.19577360153198242 + }, + { + "epoch": 6.62384033203125e-06, + "model_forward_time": 0.023773670196533203, + "step": 4341 + }, + { + "epoch": 6.62384033203125e-06, + "step": 4341, + "training_step_time": 0.20076966285705566 + }, + { + "epoch": 6.6253662109375e-06, + "model_forward_time": 0.023917675018310547, + "step": 4342 + }, + { + "epoch": 6.6253662109375e-06, + "step": 4342, + "training_step_time": 0.18840265274047852 + }, + { + "epoch": 6.62689208984375e-06, + "model_forward_time": 0.02486562728881836, + "step": 4343 + }, + { + "epoch": 6.62689208984375e-06, + "step": 4343, + "training_step_time": 0.15751051902770996 + }, + { + "epoch": 6.62841796875e-06, + "model_forward_time": 0.023736000061035156, + "step": 4344 + }, + { + "epoch": 6.62841796875e-06, + "step": 4344, + "training_step_time": 0.1656327247619629 + }, + { + "epoch": 6.62994384765625e-06, + "model_forward_time": 0.023743391036987305, + "step": 4345 + }, + { + "epoch": 6.62994384765625e-06, + "step": 4345, + "training_step_time": 0.1403505802154541 + }, + { + "epoch": 6.6314697265625e-06, + "model_forward_time": 0.023730993270874023, + "step": 4346 + }, + { + "epoch": 6.6314697265625e-06, + "step": 4346, + "training_step_time": 0.1095430850982666 + }, + { + "epoch": 6.63299560546875e-06, + "model_forward_time": 0.024067401885986328, + "step": 4347 + }, + { + "epoch": 6.63299560546875e-06, + "step": 4347, + "training_step_time": 0.1119375228881836 + }, + { + "epoch": 6.634521484375e-06, + "model_forward_time": 0.02450871467590332, + "step": 4348 + }, + { + "epoch": 6.634521484375e-06, + "step": 4348, + "training_step_time": 0.11150598526000977 + }, + { + "epoch": 6.63604736328125e-06, + "model_forward_time": 0.023972034454345703, + "step": 4349 + }, + { + "epoch": 6.63604736328125e-06, + "step": 4349, + "training_step_time": 0.11609840393066406 + }, + { + "epoch": 6.6375732421875e-06, + "grad_norm": 0.35964709520339966, + "learning_rate": 9.755282581475769e-05, + "loss": 0.0697, + "step": 4350 + }, + { + "epoch": 6.6375732421875e-06, + "model_forward_time": 0.02448415756225586, + "step": 4350 + }, + { + "epoch": 6.6375732421875e-06, + "step": 4350, + "training_step_time": 0.11177849769592285 + }, + { + "epoch": 6.63909912109375e-06, + "model_forward_time": 0.02499866485595703, + "step": 4351 + }, + { + "epoch": 6.63909912109375e-06, + "step": 4351, + "training_step_time": 0.1087958812713623 + }, + { + "epoch": 6.640625e-06, + "model_forward_time": 0.024538516998291016, + "step": 4352 + }, + { + "epoch": 6.640625e-06, + "step": 4352, + "training_step_time": 0.11056375503540039 + }, + { + "epoch": 6.64215087890625e-06, + "model_forward_time": 0.024620532989501953, + "step": 4353 + }, + { + "epoch": 6.64215087890625e-06, + "step": 4353, + "training_step_time": 0.11011075973510742 + }, + { + "epoch": 6.6436767578125e-06, + "model_forward_time": 0.02473735809326172, + "step": 4354 + }, + { + "epoch": 6.6436767578125e-06, + "step": 4354, + "training_step_time": 0.1092836856842041 + }, + { + "epoch": 6.64520263671875e-06, + "model_forward_time": 0.024943828582763672, + "step": 4355 + }, + { + "epoch": 6.64520263671875e-06, + "step": 4355, + "training_step_time": 0.2120351791381836 + }, + { + "epoch": 6.646728515625e-06, + "model_forward_time": 0.024372339248657227, + "step": 4356 + }, + { + "epoch": 6.646728515625e-06, + "step": 4356, + "training_step_time": 0.11501884460449219 + }, + { + "epoch": 6.64825439453125e-06, + "model_forward_time": 0.02452254295349121, + "step": 4357 + }, + { + "epoch": 6.64825439453125e-06, + "step": 4357, + "training_step_time": 0.10913777351379395 + }, + { + "epoch": 6.6497802734375e-06, + "model_forward_time": 0.024715662002563477, + "step": 4358 + }, + { + "epoch": 6.6497802734375e-06, + "step": 4358, + "training_step_time": 0.18436503410339355 + }, + { + "epoch": 6.65130615234375e-06, + "model_forward_time": 0.02393198013305664, + "step": 4359 + }, + { + "epoch": 6.65130615234375e-06, + "step": 4359, + "training_step_time": 0.1616814136505127 + }, + { + "epoch": 6.65283203125e-06, + "grad_norm": 0.49390262365341187, + "learning_rate": 9.753576525184492e-05, + "loss": 0.0804, + "step": 4360 + }, + { + "epoch": 6.65283203125e-06, + "model_forward_time": 0.02370905876159668, + "step": 4360 + }, + { + "epoch": 6.65283203125e-06, + "step": 4360, + "training_step_time": 0.13049793243408203 + }, + { + "epoch": 6.65435791015625e-06, + "model_forward_time": 0.023852109909057617, + "step": 4361 + }, + { + "epoch": 6.65435791015625e-06, + "step": 4361, + "training_step_time": 0.1284928321838379 + }, + { + "epoch": 6.6558837890625e-06, + "model_forward_time": 0.024065017700195312, + "step": 4362 + }, + { + "epoch": 6.6558837890625e-06, + "step": 4362, + "training_step_time": 0.12180685997009277 + }, + { + "epoch": 6.65740966796875e-06, + "model_forward_time": 0.023887157440185547, + "step": 4363 + }, + { + "epoch": 6.65740966796875e-06, + "step": 4363, + "training_step_time": 0.11612677574157715 + }, + { + "epoch": 6.658935546875e-06, + "model_forward_time": 0.024860858917236328, + "step": 4364 + }, + { + "epoch": 6.658935546875e-06, + "step": 4364, + "training_step_time": 0.11703896522521973 + }, + { + "epoch": 6.66046142578125e-06, + "model_forward_time": 0.028395891189575195, + "step": 4365 + }, + { + "epoch": 6.66046142578125e-06, + "step": 4365, + "training_step_time": 0.11362886428833008 + }, + { + "epoch": 6.6619873046875e-06, + "model_forward_time": 0.024801254272460938, + "step": 4366 + }, + { + "epoch": 6.6619873046875e-06, + "step": 4366, + "training_step_time": 0.11715197563171387 + }, + { + "epoch": 6.66351318359375e-06, + "model_forward_time": 0.024309873580932617, + "step": 4367 + }, + { + "epoch": 6.66351318359375e-06, + "step": 4367, + "training_step_time": 0.10759758949279785 + }, + { + "epoch": 6.6650390625e-06, + "model_forward_time": 0.024828195571899414, + "step": 4368 + }, + { + "epoch": 6.6650390625e-06, + "step": 4368, + "training_step_time": 0.11082744598388672 + }, + { + "epoch": 6.66656494140625e-06, + "model_forward_time": 0.02506875991821289, + "step": 4369 + }, + { + "epoch": 6.66656494140625e-06, + "step": 4369, + "training_step_time": 0.11076688766479492 + }, + { + "epoch": 6.6680908203125e-06, + "grad_norm": 0.580703854560852, + "learning_rate": 9.751864692849504e-05, + "loss": 0.084, + "step": 4370 + }, + { + "epoch": 6.6680908203125e-06, + "model_forward_time": 0.025594472885131836, + "step": 4370 + }, + { + "epoch": 6.6680908203125e-06, + "step": 4370, + "training_step_time": 0.1085672378540039 + }, + { + "epoch": 6.66961669921875e-06, + "model_forward_time": 0.02512383460998535, + "step": 4371 + }, + { + "epoch": 6.66961669921875e-06, + "step": 4371, + "training_step_time": 0.10904216766357422 + }, + { + "epoch": 6.671142578125e-06, + "model_forward_time": 0.025000810623168945, + "step": 4372 + }, + { + "epoch": 6.671142578125e-06, + "step": 4372, + "training_step_time": 0.12130069732666016 + }, + { + "epoch": 6.67266845703125e-06, + "model_forward_time": 0.024539470672607422, + "step": 4373 + }, + { + "epoch": 6.67266845703125e-06, + "step": 4373, + "training_step_time": 0.11635756492614746 + }, + { + "epoch": 6.6741943359375e-06, + "model_forward_time": 0.024425506591796875, + "step": 4374 + }, + { + "epoch": 6.6741943359375e-06, + "step": 4374, + "training_step_time": 0.10800528526306152 + }, + { + "epoch": 6.67572021484375e-06, + "model_forward_time": 0.02487349510192871, + "step": 4375 + }, + { + "epoch": 6.67572021484375e-06, + "step": 4375, + "training_step_time": 0.10786843299865723 + }, + { + "epoch": 6.67724609375e-06, + "model_forward_time": 0.024628400802612305, + "step": 4376 + }, + { + "epoch": 6.67724609375e-06, + "step": 4376, + "training_step_time": 0.11495780944824219 + }, + { + "epoch": 6.67877197265625e-06, + "model_forward_time": 0.024732112884521484, + "step": 4377 + }, + { + "epoch": 6.67877197265625e-06, + "step": 4377, + "training_step_time": 0.11054325103759766 + }, + { + "epoch": 6.6802978515625e-06, + "model_forward_time": 0.024713754653930664, + "step": 4378 + }, + { + "epoch": 6.6802978515625e-06, + "step": 4378, + "training_step_time": 0.10920143127441406 + }, + { + "epoch": 6.68182373046875e-06, + "model_forward_time": 0.02444314956665039, + "step": 4379 + }, + { + "epoch": 6.68182373046875e-06, + "step": 4379, + "training_step_time": 0.12654709815979004 + }, + { + "epoch": 6.683349609375e-06, + "grad_norm": 0.4432964324951172, + "learning_rate": 9.750147086550844e-05, + "loss": 0.0828, + "step": 4380 + }, + { + "epoch": 6.683349609375e-06, + "model_forward_time": 0.02497124671936035, + "step": 4380 + }, + { + "epoch": 6.683349609375e-06, + "step": 4380, + "training_step_time": 0.11072230339050293 + }, + { + "epoch": 6.68487548828125e-06, + "model_forward_time": 0.024817705154418945, + "step": 4381 + }, + { + "epoch": 6.68487548828125e-06, + "step": 4381, + "training_step_time": 0.22046279907226562 + }, + { + "epoch": 6.6864013671875e-06, + "model_forward_time": 0.023878812789916992, + "step": 4382 + }, + { + "epoch": 6.6864013671875e-06, + "step": 4382, + "training_step_time": 0.18476247787475586 + }, + { + "epoch": 6.68792724609375e-06, + "model_forward_time": 0.023889541625976562, + "step": 4383 + }, + { + "epoch": 6.68792724609375e-06, + "step": 4383, + "training_step_time": 0.1253042221069336 + }, + { + "epoch": 6.689453125e-06, + "model_forward_time": 0.02368640899658203, + "step": 4384 + }, + { + "epoch": 6.689453125e-06, + "step": 4384, + "training_step_time": 0.1708064079284668 + }, + { + "epoch": 6.69097900390625e-06, + "model_forward_time": 0.024415969848632812, + "step": 4385 + }, + { + "epoch": 6.69097900390625e-06, + "step": 4385, + "training_step_time": 0.14625191688537598 + }, + { + "epoch": 6.6925048828125e-06, + "model_forward_time": 0.023844003677368164, + "step": 4386 + }, + { + "epoch": 6.6925048828125e-06, + "step": 4386, + "training_step_time": 0.12918376922607422 + }, + { + "epoch": 6.69403076171875e-06, + "model_forward_time": 0.023836612701416016, + "step": 4387 + }, + { + "epoch": 6.69403076171875e-06, + "step": 4387, + "training_step_time": 0.2027883529663086 + }, + { + "epoch": 6.695556640625e-06, + "model_forward_time": 0.0232546329498291, + "step": 4388 + }, + { + "epoch": 6.695556640625e-06, + "step": 4388, + "training_step_time": 0.15815496444702148 + }, + { + "epoch": 6.69708251953125e-06, + "model_forward_time": 0.02433037757873535, + "step": 4389 + }, + { + "epoch": 6.69708251953125e-06, + "step": 4389, + "training_step_time": 0.15130901336669922 + }, + { + "epoch": 6.6986083984375e-06, + "grad_norm": 0.4692542254924774, + "learning_rate": 9.748423708375563e-05, + "loss": 0.0777, + "step": 4390 + }, + { + "epoch": 6.6986083984375e-06, + "model_forward_time": 0.023773670196533203, + "step": 4390 + }, + { + "epoch": 6.6986083984375e-06, + "step": 4390, + "training_step_time": 0.10547184944152832 + }, + { + "epoch": 6.70013427734375e-06, + "model_forward_time": 0.0239410400390625, + "step": 4391 + }, + { + "epoch": 6.70013427734375e-06, + "step": 4391, + "training_step_time": 0.10596251487731934 + }, + { + "epoch": 6.70166015625e-06, + "model_forward_time": 0.02483224868774414, + "step": 4392 + }, + { + "epoch": 6.70166015625e-06, + "step": 4392, + "training_step_time": 0.10634493827819824 + }, + { + "epoch": 6.70318603515625e-06, + "model_forward_time": 0.02451300621032715, + "step": 4393 + }, + { + "epoch": 6.70318603515625e-06, + "step": 4393, + "training_step_time": 0.11151385307312012 + }, + { + "epoch": 6.7047119140625e-06, + "model_forward_time": 0.024956226348876953, + "step": 4394 + }, + { + "epoch": 6.7047119140625e-06, + "step": 4394, + "training_step_time": 0.11178135871887207 + }, + { + "epoch": 6.70623779296875e-06, + "model_forward_time": 0.024352312088012695, + "step": 4395 + }, + { + "epoch": 6.70623779296875e-06, + "step": 4395, + "training_step_time": 0.1072533130645752 + }, + { + "epoch": 6.707763671875e-06, + "model_forward_time": 0.02488875389099121, + "step": 4396 + }, + { + "epoch": 6.707763671875e-06, + "step": 4396, + "training_step_time": 0.10666418075561523 + }, + { + "epoch": 6.70928955078125e-06, + "model_forward_time": 0.02468276023864746, + "step": 4397 + }, + { + "epoch": 6.70928955078125e-06, + "step": 4397, + "training_step_time": 0.11514735221862793 + }, + { + "epoch": 6.7108154296875e-06, + "model_forward_time": 0.02582573890686035, + "step": 4398 + }, + { + "epoch": 6.7108154296875e-06, + "step": 4398, + "training_step_time": 0.10753536224365234 + }, + { + "epoch": 6.71234130859375e-06, + "model_forward_time": 0.025196313858032227, + "step": 4399 + }, + { + "epoch": 6.71234130859375e-06, + "step": 4399, + "training_step_time": 0.10587358474731445 + }, + { + "epoch": 6.7138671875e-06, + "grad_norm": 0.3315945863723755, + "learning_rate": 9.746694560417731e-05, + "loss": 0.1073, + "step": 4400 + }, + { + "epoch": 6.7138671875e-06, + "model_forward_time": 0.025259733200073242, + "step": 4400 + }, + { + "epoch": 6.7138671875e-06, + "step": 4400, + "training_step_time": 0.1230614185333252 + }, + { + "epoch": 6.71539306640625e-06, + "model_forward_time": 0.025791406631469727, + "step": 4401 + }, + { + "epoch": 6.71539306640625e-06, + "step": 4401, + "training_step_time": 0.11361527442932129 + }, + { + "epoch": 6.7169189453125e-06, + "model_forward_time": 0.028496265411376953, + "step": 4402 + }, + { + "epoch": 6.7169189453125e-06, + "step": 4402, + "training_step_time": 0.11051273345947266 + }, + { + "epoch": 6.71844482421875e-06, + "model_forward_time": 0.02579665184020996, + "step": 4403 + }, + { + "epoch": 6.71844482421875e-06, + "step": 4403, + "training_step_time": 0.20893192291259766 + }, + { + "epoch": 6.719970703125e-06, + "model_forward_time": 0.02483367919921875, + "step": 4404 + }, + { + "epoch": 6.719970703125e-06, + "step": 4404, + "training_step_time": 0.13214373588562012 + }, + { + "epoch": 6.72149658203125e-06, + "model_forward_time": 0.024660348892211914, + "step": 4405 + }, + { + "epoch": 6.72149658203125e-06, + "step": 4405, + "training_step_time": 0.12876605987548828 + }, + { + "epoch": 6.7230224609375e-06, + "model_forward_time": 0.024568557739257812, + "step": 4406 + }, + { + "epoch": 6.7230224609375e-06, + "step": 4406, + "training_step_time": 0.12694811820983887 + }, + { + "epoch": 6.72454833984375e-06, + "model_forward_time": 0.025042057037353516, + "step": 4407 + }, + { + "epoch": 6.72454833984375e-06, + "step": 4407, + "training_step_time": 0.12434959411621094 + }, + { + "epoch": 6.72607421875e-06, + "model_forward_time": 0.025457382202148438, + "step": 4408 + }, + { + "epoch": 6.72607421875e-06, + "step": 4408, + "training_step_time": 0.1208031177520752 + }, + { + "epoch": 6.72760009765625e-06, + "model_forward_time": 0.028746843338012695, + "step": 4409 + }, + { + "epoch": 6.72760009765625e-06, + "step": 4409, + "training_step_time": 0.11565351486206055 + }, + { + "epoch": 6.7291259765625e-06, + "grad_norm": 0.44205155968666077, + "learning_rate": 9.744959644778422e-05, + "loss": 0.0855, + "step": 4410 + }, + { + "epoch": 6.7291259765625e-06, + "model_forward_time": 0.025493621826171875, + "step": 4410 + }, + { + "epoch": 6.7291259765625e-06, + "step": 4410, + "training_step_time": 0.11760306358337402 + }, + { + "epoch": 6.73065185546875e-06, + "model_forward_time": 0.025439023971557617, + "step": 4411 + }, + { + "epoch": 6.73065185546875e-06, + "step": 4411, + "training_step_time": 0.11013936996459961 + }, + { + "epoch": 6.732177734375e-06, + "model_forward_time": 0.025485754013061523, + "step": 4412 + }, + { + "epoch": 6.732177734375e-06, + "step": 4412, + "training_step_time": 0.10869908332824707 + }, + { + "epoch": 6.73370361328125e-06, + "model_forward_time": 0.025135517120361328, + "step": 4413 + }, + { + "epoch": 6.73370361328125e-06, + "step": 4413, + "training_step_time": 0.11286568641662598 + }, + { + "epoch": 6.7352294921875e-06, + "model_forward_time": 0.025252103805541992, + "step": 4414 + }, + { + "epoch": 6.7352294921875e-06, + "step": 4414, + "training_step_time": 0.10966968536376953 + }, + { + "epoch": 6.73675537109375e-06, + "model_forward_time": 0.025339126586914062, + "step": 4415 + }, + { + "epoch": 6.73675537109375e-06, + "step": 4415, + "training_step_time": 0.10991954803466797 + }, + { + "epoch": 6.73828125e-06, + "model_forward_time": 0.024278879165649414, + "step": 4416 + }, + { + "epoch": 6.73828125e-06, + "step": 4416, + "training_step_time": 0.11007261276245117 + }, + { + "epoch": 6.73980712890625e-06, + "model_forward_time": 0.024446964263916016, + "step": 4417 + }, + { + "epoch": 6.73980712890625e-06, + "step": 4417, + "training_step_time": 0.11290287971496582 + }, + { + "epoch": 6.7413330078125e-06, + "model_forward_time": 0.025385379791259766, + "step": 4418 + }, + { + "epoch": 6.7413330078125e-06, + "step": 4418, + "training_step_time": 0.10982131958007812 + }, + { + "epoch": 6.74285888671875e-06, + "model_forward_time": 0.025242328643798828, + "step": 4419 + }, + { + "epoch": 6.74285888671875e-06, + "step": 4419, + "training_step_time": 0.1099998950958252 + }, + { + "epoch": 6.744384765625e-06, + "grad_norm": 0.6597705483436584, + "learning_rate": 9.743218963565725e-05, + "loss": 0.1024, + "step": 4420 + }, + { + "epoch": 6.744384765625e-06, + "model_forward_time": 0.02510809898376465, + "step": 4420 + }, + { + "epoch": 6.744384765625e-06, + "step": 4420, + "training_step_time": 0.11147475242614746 + }, + { + "epoch": 6.74591064453125e-06, + "model_forward_time": 0.027329444885253906, + "step": 4421 + }, + { + "epoch": 6.74591064453125e-06, + "step": 4421, + "training_step_time": 0.11153841018676758 + }, + { + "epoch": 6.7474365234375e-06, + "model_forward_time": 0.02553081512451172, + "step": 4422 + }, + { + "epoch": 6.7474365234375e-06, + "step": 4422, + "training_step_time": 0.10847020149230957 + }, + { + "epoch": 6.74896240234375e-06, + "model_forward_time": 0.02446913719177246, + "step": 4423 + }, + { + "epoch": 6.74896240234375e-06, + "step": 4423, + "training_step_time": 0.23080039024353027 + }, + { + "epoch": 6.75048828125e-06, + "model_forward_time": 0.02478313446044922, + "step": 4424 + }, + { + "epoch": 6.75048828125e-06, + "step": 4424, + "training_step_time": 0.12435793876647949 + }, + { + "epoch": 6.75201416015625e-06, + "model_forward_time": 0.024983644485473633, + "step": 4425 + }, + { + "epoch": 6.75201416015625e-06, + "step": 4425, + "training_step_time": 0.12787890434265137 + }, + { + "epoch": 6.7535400390625e-06, + "model_forward_time": 0.025197744369506836, + "step": 4426 + }, + { + "epoch": 6.7535400390625e-06, + "step": 4426, + "training_step_time": 0.11344385147094727 + }, + { + "epoch": 6.75506591796875e-06, + "model_forward_time": 0.025916099548339844, + "step": 4427 + }, + { + "epoch": 6.75506591796875e-06, + "step": 4427, + "training_step_time": 0.16034555435180664 + }, + { + "epoch": 6.756591796875e-06, + "model_forward_time": 0.024791955947875977, + "step": 4428 + }, + { + "epoch": 6.756591796875e-06, + "step": 4428, + "training_step_time": 0.12233471870422363 + }, + { + "epoch": 6.75811767578125e-06, + "model_forward_time": 0.02547454833984375, + "step": 4429 + }, + { + "epoch": 6.75811767578125e-06, + "step": 4429, + "training_step_time": 0.1455223560333252 + }, + { + "epoch": 6.7596435546875e-06, + "grad_norm": 0.3992781639099121, + "learning_rate": 9.74147251889473e-05, + "loss": 0.0782, + "step": 4430 + }, + { + "epoch": 6.7596435546875e-06, + "model_forward_time": 0.025011777877807617, + "step": 4430 + }, + { + "epoch": 6.7596435546875e-06, + "step": 4430, + "training_step_time": 0.17708420753479004 + }, + { + "epoch": 6.76116943359375e-06, + "model_forward_time": 0.02461719512939453, + "step": 4431 + }, + { + "epoch": 6.76116943359375e-06, + "step": 4431, + "training_step_time": 0.18003058433532715 + }, + { + "epoch": 6.7626953125e-06, + "model_forward_time": 0.024953365325927734, + "step": 4432 + }, + { + "epoch": 6.7626953125e-06, + "step": 4432, + "training_step_time": 0.1970045566558838 + }, + { + "epoch": 6.76422119140625e-06, + "model_forward_time": 0.025161266326904297, + "step": 4433 + }, + { + "epoch": 6.76422119140625e-06, + "step": 4433, + "training_step_time": 0.11963486671447754 + }, + { + "epoch": 6.7657470703125e-06, + "model_forward_time": 0.023435115814208984, + "step": 4434 + }, + { + "epoch": 6.7657470703125e-06, + "step": 4434, + "training_step_time": 0.11079859733581543 + }, + { + "epoch": 6.76727294921875e-06, + "model_forward_time": 0.024517297744750977, + "step": 4435 + }, + { + "epoch": 6.76727294921875e-06, + "step": 4435, + "training_step_time": 0.1070866584777832 + }, + { + "epoch": 6.768798828125e-06, + "model_forward_time": 0.025110244750976562, + "step": 4436 + }, + { + "epoch": 6.768798828125e-06, + "step": 4436, + "training_step_time": 0.10853815078735352 + }, + { + "epoch": 6.77032470703125e-06, + "model_forward_time": 0.025333404541015625, + "step": 4437 + }, + { + "epoch": 6.77032470703125e-06, + "step": 4437, + "training_step_time": 0.1096348762512207 + }, + { + "epoch": 6.7718505859375e-06, + "model_forward_time": 0.025403261184692383, + "step": 4438 + }, + { + "epoch": 6.7718505859375e-06, + "step": 4438, + "training_step_time": 0.16763520240783691 + }, + { + "epoch": 6.77337646484375e-06, + "model_forward_time": 0.02462315559387207, + "step": 4439 + }, + { + "epoch": 6.77337646484375e-06, + "step": 4439, + "training_step_time": 0.1921398639678955 + }, + { + "epoch": 6.77490234375e-06, + "grad_norm": 0.3353877067565918, + "learning_rate": 9.739720312887535e-05, + "loss": 0.0888, + "step": 4440 + }, + { + "epoch": 6.77490234375e-06, + "model_forward_time": 0.02477550506591797, + "step": 4440 + }, + { + "epoch": 6.77490234375e-06, + "step": 4440, + "training_step_time": 0.18576407432556152 + }, + { + "epoch": 6.77642822265625e-06, + "model_forward_time": 0.024771690368652344, + "step": 4441 + }, + { + "epoch": 6.77642822265625e-06, + "step": 4441, + "training_step_time": 0.1255650520324707 + }, + { + "epoch": 6.7779541015625e-06, + "model_forward_time": 0.024149417877197266, + "step": 4442 + }, + { + "epoch": 6.7779541015625e-06, + "step": 4442, + "training_step_time": 0.16859841346740723 + }, + { + "epoch": 6.77947998046875e-06, + "model_forward_time": 0.024303913116455078, + "step": 4443 + }, + { + "epoch": 6.77947998046875e-06, + "step": 4443, + "training_step_time": 0.1571030616760254 + }, + { + "epoch": 6.781005859375e-06, + "model_forward_time": 0.025760173797607422, + "step": 4444 + }, + { + "epoch": 6.781005859375e-06, + "step": 4444, + "training_step_time": 0.10898613929748535 + }, + { + "epoch": 6.78253173828125e-06, + "model_forward_time": 0.025356054306030273, + "step": 4445 + }, + { + "epoch": 6.78253173828125e-06, + "step": 4445, + "training_step_time": 0.13817977905273438 + }, + { + "epoch": 6.7840576171875e-06, + "model_forward_time": 0.025652647018432617, + "step": 4446 + }, + { + "epoch": 6.7840576171875e-06, + "step": 4446, + "training_step_time": 0.11881566047668457 + }, + { + "epoch": 6.78558349609375e-06, + "model_forward_time": 0.02527785301208496, + "step": 4447 + }, + { + "epoch": 6.78558349609375e-06, + "step": 4447, + "training_step_time": 0.11393356323242188 + }, + { + "epoch": 6.787109375e-06, + "model_forward_time": 0.02528858184814453, + "step": 4448 + }, + { + "epoch": 6.787109375e-06, + "step": 4448, + "training_step_time": 0.10541319847106934 + }, + { + "epoch": 6.78863525390625e-06, + "model_forward_time": 0.025516986846923828, + "step": 4449 + }, + { + "epoch": 6.78863525390625e-06, + "step": 4449, + "training_step_time": 0.10833263397216797 + }, + { + "epoch": 6.7901611328125e-06, + "grad_norm": 0.5004896521568298, + "learning_rate": 9.737962347673231e-05, + "loss": 0.1129, + "step": 4450 + }, + { + "epoch": 6.7901611328125e-06, + "model_forward_time": 0.025115013122558594, + "step": 4450 + }, + { + "epoch": 6.7901611328125e-06, + "step": 4450, + "training_step_time": 0.10771918296813965 + }, + { + "epoch": 6.79168701171875e-06, + "model_forward_time": 0.025381803512573242, + "step": 4451 + }, + { + "epoch": 6.79168701171875e-06, + "step": 4451, + "training_step_time": 0.11132264137268066 + }, + { + "epoch": 6.793212890625e-06, + "model_forward_time": 0.025698423385620117, + "step": 4452 + }, + { + "epoch": 6.793212890625e-06, + "step": 4452, + "training_step_time": 0.1142113208770752 + }, + { + "epoch": 6.79473876953125e-06, + "model_forward_time": 0.02529311180114746, + "step": 4453 + }, + { + "epoch": 6.79473876953125e-06, + "step": 4453, + "training_step_time": 0.10810112953186035 + }, + { + "epoch": 6.7962646484375e-06, + "model_forward_time": 0.02525639533996582, + "step": 4454 + }, + { + "epoch": 6.7962646484375e-06, + "step": 4454, + "training_step_time": 0.10873818397521973 + }, + { + "epoch": 6.79779052734375e-06, + "model_forward_time": 0.025655269622802734, + "step": 4455 + }, + { + "epoch": 6.79779052734375e-06, + "step": 4455, + "training_step_time": 0.11636924743652344 + }, + { + "epoch": 6.79931640625e-06, + "model_forward_time": 0.024880170822143555, + "step": 4456 + }, + { + "epoch": 6.79931640625e-06, + "step": 4456, + "training_step_time": 0.12848615646362305 + }, + { + "epoch": 6.80084228515625e-06, + "model_forward_time": 0.025758981704711914, + "step": 4457 + }, + { + "epoch": 6.80084228515625e-06, + "step": 4457, + "training_step_time": 0.16585397720336914 + }, + { + "epoch": 6.8023681640625e-06, + "model_forward_time": 0.024266481399536133, + "step": 4458 + }, + { + "epoch": 6.8023681640625e-06, + "step": 4458, + "training_step_time": 0.15198183059692383 + }, + { + "epoch": 6.80389404296875e-06, + "model_forward_time": 0.025344371795654297, + "step": 4459 + }, + { + "epoch": 6.80389404296875e-06, + "step": 4459, + "training_step_time": 0.15577244758605957 + }, + { + "epoch": 6.805419921875e-06, + "grad_norm": 0.4311482012271881, + "learning_rate": 9.736198625387916e-05, + "loss": 0.0864, + "step": 4460 + }, + { + "epoch": 6.805419921875e-06, + "model_forward_time": 0.024741172790527344, + "step": 4460 + }, + { + "epoch": 6.805419921875e-06, + "step": 4460, + "training_step_time": 0.1317591667175293 + }, + { + "epoch": 6.80694580078125e-06, + "model_forward_time": 0.02438831329345703, + "step": 4461 + }, + { + "epoch": 6.80694580078125e-06, + "step": 4461, + "training_step_time": 0.1337747573852539 + }, + { + "epoch": 6.8084716796875e-06, + "model_forward_time": 0.024546146392822266, + "step": 4462 + }, + { + "epoch": 6.8084716796875e-06, + "step": 4462, + "training_step_time": 0.1287229061126709 + }, + { + "epoch": 6.80999755859375e-06, + "model_forward_time": 0.02500629425048828, + "step": 4463 + }, + { + "epoch": 6.80999755859375e-06, + "step": 4463, + "training_step_time": 0.11901473999023438 + }, + { + "epoch": 6.8115234375e-06, + "model_forward_time": 0.02517843246459961, + "step": 4464 + }, + { + "epoch": 6.8115234375e-06, + "step": 4464, + "training_step_time": 0.12088823318481445 + }, + { + "epoch": 6.81304931640625e-06, + "model_forward_time": 0.025204181671142578, + "step": 4465 + }, + { + "epoch": 6.81304931640625e-06, + "step": 4465, + "training_step_time": 0.18205976486206055 + }, + { + "epoch": 6.8145751953125e-06, + "model_forward_time": 0.0249481201171875, + "step": 4466 + }, + { + "epoch": 6.8145751953125e-06, + "step": 4466, + "training_step_time": 0.10940718650817871 + }, + { + "epoch": 6.81610107421875e-06, + "model_forward_time": 0.0248568058013916, + "step": 4467 + }, + { + "epoch": 6.81610107421875e-06, + "step": 4467, + "training_step_time": 0.138472318649292 + }, + { + "epoch": 6.817626953125e-06, + "model_forward_time": 0.025551795959472656, + "step": 4468 + }, + { + "epoch": 6.817626953125e-06, + "step": 4468, + "training_step_time": 0.16276073455810547 + }, + { + "epoch": 6.81915283203125e-06, + "model_forward_time": 0.024810075759887695, + "step": 4469 + }, + { + "epoch": 6.81915283203125e-06, + "step": 4469, + "training_step_time": 0.21833467483520508 + }, + { + "epoch": 6.8206787109375e-06, + "grad_norm": 0.43905940651893616, + "learning_rate": 9.734429148174675e-05, + "loss": 0.0785, + "step": 4470 + }, + { + "epoch": 6.8206787109375e-06, + "model_forward_time": 0.02467513084411621, + "step": 4470 + }, + { + "epoch": 6.8206787109375e-06, + "step": 4470, + "training_step_time": 0.1417839527130127 + }, + { + "epoch": 6.82220458984375e-06, + "model_forward_time": 0.024777889251708984, + "step": 4471 + }, + { + "epoch": 6.82220458984375e-06, + "step": 4471, + "training_step_time": 0.13990020751953125 + }, + { + "epoch": 6.82373046875e-06, + "model_forward_time": 0.026890039443969727, + "step": 4472 + }, + { + "epoch": 6.82373046875e-06, + "step": 4472, + "training_step_time": 0.20887351036071777 + }, + { + "epoch": 6.82525634765625e-06, + "model_forward_time": 0.02489614486694336, + "step": 4473 + }, + { + "epoch": 6.82525634765625e-06, + "step": 4473, + "training_step_time": 0.13181209564208984 + }, + { + "epoch": 6.8267822265625e-06, + "model_forward_time": 0.024866580963134766, + "step": 4474 + }, + { + "epoch": 6.8267822265625e-06, + "step": 4474, + "training_step_time": 0.1098332405090332 + }, + { + "epoch": 6.82830810546875e-06, + "model_forward_time": 0.025820255279541016, + "step": 4475 + }, + { + "epoch": 6.82830810546875e-06, + "step": 4475, + "training_step_time": 0.19210577011108398 + }, + { + "epoch": 6.829833984375e-06, + "model_forward_time": 0.024728775024414062, + "step": 4476 + }, + { + "epoch": 6.829833984375e-06, + "step": 4476, + "training_step_time": 0.1051633358001709 + }, + { + "epoch": 6.83135986328125e-06, + "model_forward_time": 0.024882078170776367, + "step": 4477 + }, + { + "epoch": 6.83135986328125e-06, + "step": 4477, + "training_step_time": 0.10464668273925781 + }, + { + "epoch": 6.8328857421875e-06, + "model_forward_time": 0.027993202209472656, + "step": 4478 + }, + { + "epoch": 6.8328857421875e-06, + "step": 4478, + "training_step_time": 0.11600089073181152 + }, + { + "epoch": 6.83441162109375e-06, + "model_forward_time": 0.027083396911621094, + "step": 4479 + }, + { + "epoch": 6.83441162109375e-06, + "step": 4479, + "training_step_time": 0.11164259910583496 + }, + { + "epoch": 6.8359375e-06, + "grad_norm": 0.5180590748786926, + "learning_rate": 9.732653918183592e-05, + "loss": 0.0885, + "step": 4480 + }, + { + "epoch": 6.8359375e-06, + "model_forward_time": 0.02594733238220215, + "step": 4480 + }, + { + "epoch": 6.8359375e-06, + "step": 4480, + "training_step_time": 0.11564159393310547 + }, + { + "epoch": 6.83746337890625e-06, + "model_forward_time": 0.026409626007080078, + "step": 4481 + }, + { + "epoch": 6.83746337890625e-06, + "step": 4481, + "training_step_time": 0.11275362968444824 + }, + { + "epoch": 6.8389892578125e-06, + "model_forward_time": 0.025445938110351562, + "step": 4482 + }, + { + "epoch": 6.8389892578125e-06, + "step": 4482, + "training_step_time": 0.11760067939758301 + }, + { + "epoch": 6.84051513671875e-06, + "model_forward_time": 0.025847673416137695, + "step": 4483 + }, + { + "epoch": 6.84051513671875e-06, + "step": 4483, + "training_step_time": 0.11334729194641113 + }, + { + "epoch": 6.842041015625e-06, + "model_forward_time": 0.02576470375061035, + "step": 4484 + }, + { + "epoch": 6.842041015625e-06, + "step": 4484, + "training_step_time": 0.10874128341674805 + }, + { + "epoch": 6.84356689453125e-06, + "model_forward_time": 0.025194644927978516, + "step": 4485 + }, + { + "epoch": 6.84356689453125e-06, + "step": 4485, + "training_step_time": 0.11578106880187988 + }, + { + "epoch": 6.8450927734375e-06, + "model_forward_time": 0.028726577758789062, + "step": 4486 + }, + { + "epoch": 6.8450927734375e-06, + "step": 4486, + "training_step_time": 0.12242484092712402 + }, + { + "epoch": 6.84661865234375e-06, + "model_forward_time": 0.026030302047729492, + "step": 4487 + }, + { + "epoch": 6.84661865234375e-06, + "step": 4487, + "training_step_time": 0.11379861831665039 + }, + { + "epoch": 6.84814453125e-06, + "model_forward_time": 0.025763988494873047, + "step": 4488 + }, + { + "epoch": 6.84814453125e-06, + "step": 4488, + "training_step_time": 0.1197657585144043 + }, + { + "epoch": 6.84967041015625e-06, + "model_forward_time": 0.02585458755493164, + "step": 4489 + }, + { + "epoch": 6.84967041015625e-06, + "step": 4489, + "training_step_time": 0.11082053184509277 + }, + { + "epoch": 6.8511962890625e-06, + "grad_norm": 0.5689246654510498, + "learning_rate": 9.730872937571739e-05, + "loss": 0.0898, + "step": 4490 + }, + { + "epoch": 6.8511962890625e-06, + "model_forward_time": 0.025580644607543945, + "step": 4490 + }, + { + "epoch": 6.8511962890625e-06, + "step": 4490, + "training_step_time": 0.11792278289794922 + }, + { + "epoch": 6.85272216796875e-06, + "model_forward_time": 0.02572941780090332, + "step": 4491 + }, + { + "epoch": 6.85272216796875e-06, + "step": 4491, + "training_step_time": 0.10931515693664551 + }, + { + "epoch": 6.854248046875e-06, + "model_forward_time": 0.02576756477355957, + "step": 4492 + }, + { + "epoch": 6.854248046875e-06, + "step": 4492, + "training_step_time": 0.10883283615112305 + }, + { + "epoch": 6.85577392578125e-06, + "model_forward_time": 0.02566671371459961, + "step": 4493 + }, + { + "epoch": 6.85577392578125e-06, + "step": 4493, + "training_step_time": 0.10997486114501953 + }, + { + "epoch": 6.8572998046875e-06, + "model_forward_time": 0.0287020206451416, + "step": 4494 + }, + { + "epoch": 6.8572998046875e-06, + "step": 4494, + "training_step_time": 0.11341476440429688 + }, + { + "epoch": 6.85882568359375e-06, + "model_forward_time": 0.025387048721313477, + "step": 4495 + }, + { + "epoch": 6.85882568359375e-06, + "step": 4495, + "training_step_time": 0.1075284481048584 + }, + { + "epoch": 6.8603515625e-06, + "model_forward_time": 0.025438308715820312, + "step": 4496 + }, + { + "epoch": 6.8603515625e-06, + "step": 4496, + "training_step_time": 0.11192560195922852 + }, + { + "epoch": 6.86187744140625e-06, + "model_forward_time": 0.025542020797729492, + "step": 4497 + }, + { + "epoch": 6.86187744140625e-06, + "step": 4497, + "training_step_time": 0.10827088356018066 + }, + { + "epoch": 6.8634033203125e-06, + "model_forward_time": 0.025399446487426758, + "step": 4498 + }, + { + "epoch": 6.8634033203125e-06, + "step": 4498, + "training_step_time": 0.10962748527526855 + }, + { + "epoch": 6.86492919921875e-06, + "model_forward_time": 0.02547430992126465, + "step": 4499 + }, + { + "epoch": 6.86492919921875e-06, + "step": 4499, + "training_step_time": 0.11098170280456543 + }, + { + "epoch": 6.866455078125e-06, + "grad_norm": 0.5847257971763611, + "learning_rate": 9.729086208503174e-05, + "loss": 0.0886, + "step": 4500 + }, + { + "epoch": 6.866455078125e-06, + "model_forward_time": 0.024441003799438477, + "step": 4500 + }, + { + "epoch": 6.866455078125e-06, + "step": 4500, + "training_step_time": 0.10938405990600586 + }, + { + "epoch": 6.86798095703125e-06, + "model_forward_time": 0.024552345275878906, + "step": 4501 + }, + { + "epoch": 6.86798095703125e-06, + "step": 4501, + "training_step_time": 0.10780000686645508 + }, + { + "epoch": 6.8695068359375e-06, + "model_forward_time": 0.025714635848999023, + "step": 4502 + }, + { + "epoch": 6.8695068359375e-06, + "step": 4502, + "training_step_time": 0.11142683029174805 + }, + { + "epoch": 6.87103271484375e-06, + "model_forward_time": 0.025624513626098633, + "step": 4503 + }, + { + "epoch": 6.87103271484375e-06, + "step": 4503, + "training_step_time": 0.1113278865814209 + }, + { + "epoch": 6.87255859375e-06, + "model_forward_time": 0.02548956871032715, + "step": 4504 + }, + { + "epoch": 6.87255859375e-06, + "step": 4504, + "training_step_time": 0.1092061996459961 + }, + { + "epoch": 6.87408447265625e-06, + "model_forward_time": 0.025415658950805664, + "step": 4505 + }, + { + "epoch": 6.87408447265625e-06, + "step": 4505, + "training_step_time": 0.10877656936645508 + }, + { + "epoch": 6.8756103515625e-06, + "model_forward_time": 0.025763988494873047, + "step": 4506 + }, + { + "epoch": 6.8756103515625e-06, + "step": 4506, + "training_step_time": 0.11118531227111816 + }, + { + "epoch": 6.87713623046875e-06, + "model_forward_time": 0.02573561668395996, + "step": 4507 + }, + { + "epoch": 6.87713623046875e-06, + "step": 4507, + "training_step_time": 0.10768461227416992 + }, + { + "epoch": 6.878662109375e-06, + "model_forward_time": 0.025513887405395508, + "step": 4508 + }, + { + "epoch": 6.878662109375e-06, + "step": 4508, + "training_step_time": 0.1124882698059082 + }, + { + "epoch": 6.88018798828125e-06, + "model_forward_time": 0.025415897369384766, + "step": 4509 + }, + { + "epoch": 6.88018798828125e-06, + "step": 4509, + "training_step_time": 0.10879039764404297 + }, + { + "epoch": 6.8817138671875e-06, + "grad_norm": 0.30061620473861694, + "learning_rate": 9.727293733148942e-05, + "loss": 0.0993, + "step": 4510 + }, + { + "epoch": 6.8817138671875e-06, + "model_forward_time": 0.025926589965820312, + "step": 4510 + }, + { + "epoch": 6.8817138671875e-06, + "step": 4510, + "training_step_time": 0.11031818389892578 + }, + { + "epoch": 6.88323974609375e-06, + "model_forward_time": 0.02551746368408203, + "step": 4511 + }, + { + "epoch": 6.88323974609375e-06, + "step": 4511, + "training_step_time": 0.12284135818481445 + }, + { + "epoch": 6.884765625e-06, + "model_forward_time": 0.025502681732177734, + "step": 4512 + }, + { + "epoch": 6.884765625e-06, + "step": 4512, + "training_step_time": 0.1155092716217041 + }, + { + "epoch": 6.88629150390625e-06, + "model_forward_time": 0.025554180145263672, + "step": 4513 + }, + { + "epoch": 6.88629150390625e-06, + "step": 4513, + "training_step_time": 0.13530898094177246 + }, + { + "epoch": 6.8878173828125e-06, + "model_forward_time": 0.02549600601196289, + "step": 4514 + }, + { + "epoch": 6.8878173828125e-06, + "step": 4514, + "training_step_time": 0.1698307991027832 + }, + { + "epoch": 6.88934326171875e-06, + "model_forward_time": 0.024968385696411133, + "step": 4515 + }, + { + "epoch": 6.88934326171875e-06, + "step": 4515, + "training_step_time": 0.2276754379272461 + }, + { + "epoch": 6.890869140625e-06, + "model_forward_time": 0.024751901626586914, + "step": 4516 + }, + { + "epoch": 6.890869140625e-06, + "step": 4516, + "training_step_time": 0.18165802955627441 + }, + { + "epoch": 6.89239501953125e-06, + "model_forward_time": 0.025082826614379883, + "step": 4517 + }, + { + "epoch": 6.89239501953125e-06, + "step": 4517, + "training_step_time": 0.15519428253173828 + }, + { + "epoch": 6.8939208984375e-06, + "model_forward_time": 0.024975299835205078, + "step": 4518 + }, + { + "epoch": 6.8939208984375e-06, + "step": 4518, + "training_step_time": 0.19071102142333984 + }, + { + "epoch": 6.89544677734375e-06, + "model_forward_time": 0.024646282196044922, + "step": 4519 + }, + { + "epoch": 6.89544677734375e-06, + "step": 4519, + "training_step_time": 0.14542627334594727 + }, + { + "epoch": 6.89697265625e-06, + "grad_norm": 0.556941568851471, + "learning_rate": 9.72549551368707e-05, + "loss": 0.0898, + "step": 4520 + }, + { + "epoch": 6.89697265625e-06, + "model_forward_time": 0.02476954460144043, + "step": 4520 + }, + { + "epoch": 6.89697265625e-06, + "step": 4520, + "training_step_time": 0.1619255542755127 + }, + { + "epoch": 6.89849853515625e-06, + "model_forward_time": 0.024382591247558594, + "step": 4521 + }, + { + "epoch": 6.89849853515625e-06, + "step": 4521, + "training_step_time": 0.16274809837341309 + }, + { + "epoch": 6.9000244140625e-06, + "model_forward_time": 0.025071382522583008, + "step": 4522 + }, + { + "epoch": 6.9000244140625e-06, + "step": 4522, + "training_step_time": 0.1696312427520752 + }, + { + "epoch": 6.90155029296875e-06, + "model_forward_time": 0.024303674697875977, + "step": 4523 + }, + { + "epoch": 6.90155029296875e-06, + "step": 4523, + "training_step_time": 0.15805792808532715 + }, + { + "epoch": 6.903076171875e-06, + "model_forward_time": 0.024580717086791992, + "step": 4524 + }, + { + "epoch": 6.903076171875e-06, + "step": 4524, + "training_step_time": 0.14712285995483398 + }, + { + "epoch": 6.90460205078125e-06, + "model_forward_time": 0.024869203567504883, + "step": 4525 + }, + { + "epoch": 6.90460205078125e-06, + "step": 4525, + "training_step_time": 0.12766027450561523 + }, + { + "epoch": 6.9061279296875e-06, + "model_forward_time": 0.026020050048828125, + "step": 4526 + }, + { + "epoch": 6.9061279296875e-06, + "step": 4526, + "training_step_time": 0.12654900550842285 + }, + { + "epoch": 6.90765380859375e-06, + "model_forward_time": 0.02553701400756836, + "step": 4527 + }, + { + "epoch": 6.90765380859375e-06, + "step": 4527, + "training_step_time": 0.1243131160736084 + }, + { + "epoch": 6.9091796875e-06, + "model_forward_time": 0.02545905113220215, + "step": 4528 + }, + { + "epoch": 6.9091796875e-06, + "step": 4528, + "training_step_time": 0.12047910690307617 + }, + { + "epoch": 6.91070556640625e-06, + "model_forward_time": 0.02570509910583496, + "step": 4529 + }, + { + "epoch": 6.91070556640625e-06, + "step": 4529, + "training_step_time": 0.1938011646270752 + }, + { + "epoch": 6.9122314453125e-06, + "grad_norm": 0.3187704384326935, + "learning_rate": 9.723691552302562e-05, + "loss": 0.0912, + "step": 4530 + }, + { + "epoch": 6.9122314453125e-06, + "model_forward_time": 0.024973154067993164, + "step": 4530 + }, + { + "epoch": 6.9122314453125e-06, + "step": 4530, + "training_step_time": 0.12613987922668457 + }, + { + "epoch": 6.91375732421875e-06, + "model_forward_time": 0.025076866149902344, + "step": 4531 + }, + { + "epoch": 6.91375732421875e-06, + "step": 4531, + "training_step_time": 0.10954952239990234 + }, + { + "epoch": 6.915283203125e-06, + "model_forward_time": 0.02588176727294922, + "step": 4532 + }, + { + "epoch": 6.915283203125e-06, + "step": 4532, + "training_step_time": 0.11190438270568848 + }, + { + "epoch": 6.91680908203125e-06, + "model_forward_time": 0.025758981704711914, + "step": 4533 + }, + { + "epoch": 6.91680908203125e-06, + "step": 4533, + "training_step_time": 0.2253270149230957 + }, + { + "epoch": 6.9183349609375e-06, + "model_forward_time": 0.024959325790405273, + "step": 4534 + }, + { + "epoch": 6.9183349609375e-06, + "step": 4534, + "training_step_time": 0.10701322555541992 + }, + { + "epoch": 6.91986083984375e-06, + "model_forward_time": 0.024904727935791016, + "step": 4535 + }, + { + "epoch": 6.91986083984375e-06, + "step": 4535, + "training_step_time": 0.1057734489440918 + }, + { + "epoch": 6.92138671875e-06, + "model_forward_time": 0.027437448501586914, + "step": 4536 + }, + { + "epoch": 6.92138671875e-06, + "step": 4536, + "training_step_time": 0.11645674705505371 + }, + { + "epoch": 6.92291259765625e-06, + "model_forward_time": 0.025565385818481445, + "step": 4537 + }, + { + "epoch": 6.92291259765625e-06, + "step": 4537, + "training_step_time": 0.1094202995300293 + }, + { + "epoch": 6.9244384765625e-06, + "model_forward_time": 0.025213241577148438, + "step": 4538 + }, + { + "epoch": 6.9244384765625e-06, + "step": 4538, + "training_step_time": 0.11330199241638184 + }, + { + "epoch": 6.92596435546875e-06, + "model_forward_time": 0.026393651962280273, + "step": 4539 + }, + { + "epoch": 6.92596435546875e-06, + "step": 4539, + "training_step_time": 0.11123394966125488 + }, + { + "epoch": 6.927490234375e-06, + "grad_norm": 0.38522833585739136, + "learning_rate": 9.721881851187406e-05, + "loss": 0.1091, + "step": 4540 + }, + { + "epoch": 6.927490234375e-06, + "model_forward_time": 0.0257723331451416, + "step": 4540 + }, + { + "epoch": 6.927490234375e-06, + "step": 4540, + "training_step_time": 0.11416125297546387 + }, + { + "epoch": 6.92901611328125e-06, + "model_forward_time": 0.02500319480895996, + "step": 4541 + }, + { + "epoch": 6.92901611328125e-06, + "step": 4541, + "training_step_time": 0.11039328575134277 + }, + { + "epoch": 6.9305419921875e-06, + "model_forward_time": 0.025172710418701172, + "step": 4542 + }, + { + "epoch": 6.9305419921875e-06, + "step": 4542, + "training_step_time": 0.10945510864257812 + }, + { + "epoch": 6.93206787109375e-06, + "model_forward_time": 0.02553391456604004, + "step": 4543 + }, + { + "epoch": 6.93206787109375e-06, + "step": 4543, + "training_step_time": 0.11800885200500488 + }, + { + "epoch": 6.93359375e-06, + "model_forward_time": 0.02550029754638672, + "step": 4544 + }, + { + "epoch": 6.93359375e-06, + "step": 4544, + "training_step_time": 0.11364555358886719 + }, + { + "epoch": 6.93511962890625e-06, + "model_forward_time": 0.025170326232910156, + "step": 4545 + }, + { + "epoch": 6.93511962890625e-06, + "step": 4545, + "training_step_time": 0.1092677116394043 + }, + { + "epoch": 6.9366455078125e-06, + "model_forward_time": 0.025568485260009766, + "step": 4546 + }, + { + "epoch": 6.9366455078125e-06, + "step": 4546, + "training_step_time": 0.11096715927124023 + }, + { + "epoch": 6.93817138671875e-06, + "model_forward_time": 0.025572776794433594, + "step": 4547 + }, + { + "epoch": 6.93817138671875e-06, + "step": 4547, + "training_step_time": 0.10884761810302734 + }, + { + "epoch": 6.939697265625e-06, + "model_forward_time": 0.025199174880981445, + "step": 4548 + }, + { + "epoch": 6.939697265625e-06, + "step": 4548, + "training_step_time": 0.10773277282714844 + }, + { + "epoch": 6.94122314453125e-06, + "model_forward_time": 0.025654315948486328, + "step": 4549 + }, + { + "epoch": 6.94122314453125e-06, + "step": 4549, + "training_step_time": 0.11041855812072754 + }, + { + "epoch": 6.9427490234375e-06, + "grad_norm": 0.3061712086200714, + "learning_rate": 9.720066412540554e-05, + "loss": 0.1097, + "step": 4550 + }, + { + "epoch": 6.9427490234375e-06, + "model_forward_time": 0.02569866180419922, + "step": 4550 + }, + { + "epoch": 6.9427490234375e-06, + "step": 4550, + "training_step_time": 0.10866689682006836 + }, + { + "epoch": 6.94427490234375e-06, + "model_forward_time": 0.025941133499145508, + "step": 4551 + }, + { + "epoch": 6.94427490234375e-06, + "step": 4551, + "training_step_time": 0.10921406745910645 + }, + { + "epoch": 6.94580078125e-06, + "model_forward_time": 0.025366544723510742, + "step": 4552 + }, + { + "epoch": 6.94580078125e-06, + "step": 4552, + "training_step_time": 0.11448407173156738 + }, + { + "epoch": 6.94732666015625e-06, + "model_forward_time": 0.02539515495300293, + "step": 4553 + }, + { + "epoch": 6.94732666015625e-06, + "step": 4553, + "training_step_time": 0.2008509635925293 + }, + { + "epoch": 6.9488525390625e-06, + "model_forward_time": 0.024308204650878906, + "step": 4554 + }, + { + "epoch": 6.9488525390625e-06, + "step": 4554, + "training_step_time": 0.1146554946899414 + }, + { + "epoch": 6.95037841796875e-06, + "model_forward_time": 0.024580955505371094, + "step": 4555 + }, + { + "epoch": 6.95037841796875e-06, + "step": 4555, + "training_step_time": 0.1324918270111084 + }, + { + "epoch": 6.951904296875e-06, + "model_forward_time": 0.025465726852416992, + "step": 4556 + }, + { + "epoch": 6.951904296875e-06, + "step": 4556, + "training_step_time": 0.16433191299438477 + }, + { + "epoch": 6.95343017578125e-06, + "model_forward_time": 0.02474212646484375, + "step": 4557 + }, + { + "epoch": 6.95343017578125e-06, + "step": 4557, + "training_step_time": 0.21648859977722168 + }, + { + "epoch": 6.9549560546875e-06, + "model_forward_time": 0.024992704391479492, + "step": 4558 + }, + { + "epoch": 6.9549560546875e-06, + "step": 4558, + "training_step_time": 0.10788702964782715 + }, + { + "epoch": 6.95648193359375e-06, + "model_forward_time": 0.025012731552124023, + "step": 4559 + }, + { + "epoch": 6.95648193359375e-06, + "step": 4559, + "training_step_time": 0.14208555221557617 + }, + { + "epoch": 6.9580078125e-06, + "grad_norm": 0.4172661602497101, + "learning_rate": 9.718245238567939e-05, + "loss": 0.0835, + "step": 4560 + }, + { + "epoch": 6.9580078125e-06, + "model_forward_time": 0.025568008422851562, + "step": 4560 + }, + { + "epoch": 6.9580078125e-06, + "step": 4560, + "training_step_time": 0.10921645164489746 + }, + { + "epoch": 6.95953369140625e-06, + "model_forward_time": 0.02555108070373535, + "step": 4561 + }, + { + "epoch": 6.95953369140625e-06, + "step": 4561, + "training_step_time": 0.1126859188079834 + }, + { + "epoch": 6.9610595703125e-06, + "model_forward_time": 0.026616811752319336, + "step": 4562 + }, + { + "epoch": 6.9610595703125e-06, + "step": 4562, + "training_step_time": 0.12203764915466309 + }, + { + "epoch": 6.96258544921875e-06, + "model_forward_time": 0.027092456817626953, + "step": 4563 + }, + { + "epoch": 6.96258544921875e-06, + "step": 4563, + "training_step_time": 0.18268942832946777 + }, + { + "epoch": 6.964111328125e-06, + "model_forward_time": 0.02462482452392578, + "step": 4564 + }, + { + "epoch": 6.964111328125e-06, + "step": 4564, + "training_step_time": 0.18396282196044922 + }, + { + "epoch": 6.96563720703125e-06, + "model_forward_time": 0.024209260940551758, + "step": 4565 + }, + { + "epoch": 6.96563720703125e-06, + "step": 4565, + "training_step_time": 0.113128662109375 + }, + { + "epoch": 6.9671630859375e-06, + "model_forward_time": 0.024965286254882812, + "step": 4566 + }, + { + "epoch": 6.9671630859375e-06, + "step": 4566, + "training_step_time": 0.10502767562866211 + }, + { + "epoch": 6.96868896484375e-06, + "model_forward_time": 0.025495052337646484, + "step": 4567 + }, + { + "epoch": 6.96868896484375e-06, + "step": 4567, + "training_step_time": 0.1083064079284668 + }, + { + "epoch": 6.97021484375e-06, + "model_forward_time": 0.02565455436706543, + "step": 4568 + }, + { + "epoch": 6.97021484375e-06, + "step": 4568, + "training_step_time": 0.1105339527130127 + }, + { + "epoch": 6.97174072265625e-06, + "model_forward_time": 0.026070356369018555, + "step": 4569 + }, + { + "epoch": 6.97174072265625e-06, + "step": 4569, + "training_step_time": 0.10763764381408691 + }, + { + "epoch": 6.9732666015625e-06, + "grad_norm": 0.6116529703140259, + "learning_rate": 9.716418331482458e-05, + "loss": 0.0924, + "step": 4570 + }, + { + "epoch": 6.9732666015625e-06, + "model_forward_time": 0.025993824005126953, + "step": 4570 + }, + { + "epoch": 6.9732666015625e-06, + "step": 4570, + "training_step_time": 0.11006593704223633 + }, + { + "epoch": 6.97479248046875e-06, + "model_forward_time": 0.02561354637145996, + "step": 4571 + }, + { + "epoch": 6.97479248046875e-06, + "step": 4571, + "training_step_time": 0.11244559288024902 + }, + { + "epoch": 6.976318359375e-06, + "model_forward_time": 0.02550506591796875, + "step": 4572 + }, + { + "epoch": 6.976318359375e-06, + "step": 4572, + "training_step_time": 0.10854148864746094 + }, + { + "epoch": 6.97784423828125e-06, + "model_forward_time": 0.024932146072387695, + "step": 4573 + }, + { + "epoch": 6.97784423828125e-06, + "step": 4573, + "training_step_time": 0.11133050918579102 + }, + { + "epoch": 6.9793701171875e-06, + "model_forward_time": 0.025921106338500977, + "step": 4574 + }, + { + "epoch": 6.9793701171875e-06, + "step": 4574, + "training_step_time": 0.11167144775390625 + }, + { + "epoch": 6.98089599609375e-06, + "model_forward_time": 0.02514338493347168, + "step": 4575 + }, + { + "epoch": 6.98089599609375e-06, + "step": 4575, + "training_step_time": 0.21238327026367188 + }, + { + "epoch": 6.982421875e-06, + "model_forward_time": 0.02506232261657715, + "step": 4576 + }, + { + "epoch": 6.982421875e-06, + "step": 4576, + "training_step_time": 0.11548423767089844 + }, + { + "epoch": 6.98394775390625e-06, + "model_forward_time": 0.024883747100830078, + "step": 4577 + }, + { + "epoch": 6.98394775390625e-06, + "step": 4577, + "training_step_time": 0.11183857917785645 + }, + { + "epoch": 6.9854736328125e-06, + "model_forward_time": 0.025638103485107422, + "step": 4578 + }, + { + "epoch": 6.9854736328125e-06, + "step": 4578, + "training_step_time": 0.17650318145751953 + }, + { + "epoch": 6.98699951171875e-06, + "model_forward_time": 0.02788567543029785, + "step": 4579 + }, + { + "epoch": 6.98699951171875e-06, + "step": 4579, + "training_step_time": 0.16304922103881836 + }, + { + "epoch": 6.988525390625e-06, + "grad_norm": 0.46140211820602417, + "learning_rate": 9.714585693503974e-05, + "loss": 0.0848, + "step": 4580 + }, + { + "epoch": 6.988525390625e-06, + "model_forward_time": 0.025172710418701172, + "step": 4580 + }, + { + "epoch": 6.988525390625e-06, + "step": 4580, + "training_step_time": 0.11041736602783203 + }, + { + "epoch": 6.99005126953125e-06, + "model_forward_time": 0.026709318161010742, + "step": 4581 + }, + { + "epoch": 6.99005126953125e-06, + "step": 4581, + "training_step_time": 0.11118030548095703 + }, + { + "epoch": 6.9915771484375e-06, + "model_forward_time": 0.025292634963989258, + "step": 4582 + }, + { + "epoch": 6.9915771484375e-06, + "step": 4582, + "training_step_time": 0.1097104549407959 + }, + { + "epoch": 6.99310302734375e-06, + "model_forward_time": 0.02509617805480957, + "step": 4583 + }, + { + "epoch": 6.99310302734375e-06, + "step": 4583, + "training_step_time": 0.1124885082244873 + }, + { + "epoch": 6.99462890625e-06, + "model_forward_time": 0.025370359420776367, + "step": 4584 + }, + { + "epoch": 6.99462890625e-06, + "step": 4584, + "training_step_time": 0.10934329032897949 + }, + { + "epoch": 6.99615478515625e-06, + "model_forward_time": 0.025284290313720703, + "step": 4585 + }, + { + "epoch": 6.99615478515625e-06, + "step": 4585, + "training_step_time": 0.11519885063171387 + }, + { + "epoch": 6.9976806640625e-06, + "model_forward_time": 0.025420665740966797, + "step": 4586 + }, + { + "epoch": 6.9976806640625e-06, + "step": 4586, + "training_step_time": 0.16600513458251953 + }, + { + "epoch": 6.99920654296875e-06, + "model_forward_time": 0.023598432540893555, + "step": 4587 + }, + { + "epoch": 6.99920654296875e-06, + "step": 4587, + "training_step_time": 0.18465733528137207 + }, + { + "epoch": 7.000732421875e-06, + "model_forward_time": 0.024540185928344727, + "step": 4588 + }, + { + "epoch": 7.000732421875e-06, + "step": 4588, + "training_step_time": 0.18309807777404785 + }, + { + "epoch": 7.00225830078125e-06, + "model_forward_time": 0.024094343185424805, + "step": 4589 + }, + { + "epoch": 7.00225830078125e-06, + "step": 4589, + "training_step_time": 0.16628265380859375 + }, + { + "epoch": 7.0037841796875e-06, + "grad_norm": 0.5488386750221252, + "learning_rate": 9.712747326859315e-05, + "loss": 0.0904, + "step": 4590 + }, + { + "epoch": 7.0037841796875e-06, + "model_forward_time": 0.024530649185180664, + "step": 4590 + }, + { + "epoch": 7.0037841796875e-06, + "step": 4590, + "training_step_time": 0.15399384498596191 + }, + { + "epoch": 7.00531005859375e-06, + "model_forward_time": 0.024194002151489258, + "step": 4591 + }, + { + "epoch": 7.00531005859375e-06, + "step": 4591, + "training_step_time": 0.14029884338378906 + }, + { + "epoch": 7.0068359375e-06, + "model_forward_time": 0.024872541427612305, + "step": 4592 + }, + { + "epoch": 7.0068359375e-06, + "step": 4592, + "training_step_time": 0.1476421356201172 + }, + { + "epoch": 7.00836181640625e-06, + "model_forward_time": 0.024680614471435547, + "step": 4593 + }, + { + "epoch": 7.00836181640625e-06, + "step": 4593, + "training_step_time": 0.12543892860412598 + }, + { + "epoch": 7.0098876953125e-06, + "model_forward_time": 0.024805068969726562, + "step": 4594 + }, + { + "epoch": 7.0098876953125e-06, + "step": 4594, + "training_step_time": 0.12495279312133789 + }, + { + "epoch": 7.01141357421875e-06, + "model_forward_time": 0.02508997917175293, + "step": 4595 + }, + { + "epoch": 7.01141357421875e-06, + "step": 4595, + "training_step_time": 0.12061452865600586 + }, + { + "epoch": 7.012939453125e-06, + "model_forward_time": 0.024953126907348633, + "step": 4596 + }, + { + "epoch": 7.012939453125e-06, + "step": 4596, + "training_step_time": 0.11956357955932617 + }, + { + "epoch": 7.01446533203125e-06, + "model_forward_time": 0.02517080307006836, + "step": 4597 + }, + { + "epoch": 7.01446533203125e-06, + "step": 4597, + "training_step_time": 0.12378978729248047 + }, + { + "epoch": 7.0159912109375e-06, + "model_forward_time": 0.025837421417236328, + "step": 4598 + }, + { + "epoch": 7.0159912109375e-06, + "step": 4598, + "training_step_time": 0.15790629386901855 + }, + { + "epoch": 7.01751708984375e-06, + "model_forward_time": 0.025115013122558594, + "step": 4599 + }, + { + "epoch": 7.01751708984375e-06, + "step": 4599, + "training_step_time": 0.1769266128540039 + }, + { + "epoch": 7.01904296875e-06, + "grad_norm": 0.4443153142929077, + "learning_rate": 9.710903233782272e-05, + "loss": 0.0917, + "step": 4600 + }, + { + "epoch": 7.01904296875e-06, + "model_forward_time": 0.02614116668701172, + "step": 4600 + }, + { + "epoch": 7.01904296875e-06, + "step": 4600, + "training_step_time": 0.1532728672027588 + }, + { + "epoch": 7.02056884765625e-06, + "model_forward_time": 0.02469921112060547, + "step": 4601 + }, + { + "epoch": 7.02056884765625e-06, + "step": 4601, + "training_step_time": 0.10748171806335449 + }, + { + "epoch": 7.0220947265625e-06, + "model_forward_time": 0.025840044021606445, + "step": 4602 + }, + { + "epoch": 7.0220947265625e-06, + "step": 4602, + "training_step_time": 0.11820530891418457 + }, + { + "epoch": 7.02362060546875e-06, + "model_forward_time": 0.025635480880737305, + "step": 4603 + }, + { + "epoch": 7.02362060546875e-06, + "step": 4603, + "training_step_time": 0.1128084659576416 + }, + { + "epoch": 7.025146484375e-06, + "model_forward_time": 0.02547478675842285, + "step": 4604 + }, + { + "epoch": 7.025146484375e-06, + "step": 4604, + "training_step_time": 0.17396831512451172 + }, + { + "epoch": 7.02667236328125e-06, + "model_forward_time": 0.025224924087524414, + "step": 4605 + }, + { + "epoch": 7.02667236328125e-06, + "step": 4605, + "training_step_time": 0.17707014083862305 + }, + { + "epoch": 7.0281982421875e-06, + "model_forward_time": 0.024532556533813477, + "step": 4606 + }, + { + "epoch": 7.0281982421875e-06, + "step": 4606, + "training_step_time": 0.11643719673156738 + }, + { + "epoch": 7.02972412109375e-06, + "model_forward_time": 0.028357267379760742, + "step": 4607 + }, + { + "epoch": 7.02972412109375e-06, + "step": 4607, + "training_step_time": 0.1205284595489502 + }, + { + "epoch": 7.03125e-06, + "model_forward_time": 0.02562546730041504, + "step": 4608 + }, + { + "epoch": 7.03125e-06, + "step": 4608, + "training_step_time": 0.10737967491149902 + }, + { + "epoch": 7.03277587890625e-06, + "model_forward_time": 0.025376558303833008, + "step": 4609 + }, + { + "epoch": 7.03277587890625e-06, + "step": 4609, + "training_step_time": 0.1080939769744873 + }, + { + "epoch": 7.0343017578125e-06, + "grad_norm": 0.5178048610687256, + "learning_rate": 9.709053416513592e-05, + "loss": 0.0848, + "step": 4610 + }, + { + "epoch": 7.0343017578125e-06, + "model_forward_time": 0.025567054748535156, + "step": 4610 + }, + { + "epoch": 7.0343017578125e-06, + "step": 4610, + "training_step_time": 0.14335012435913086 + }, + { + "epoch": 7.03582763671875e-06, + "model_forward_time": 0.025458574295043945, + "step": 4611 + }, + { + "epoch": 7.03582763671875e-06, + "step": 4611, + "training_step_time": 0.16852974891662598 + }, + { + "epoch": 7.037353515625e-06, + "model_forward_time": 0.024663686752319336, + "step": 4612 + }, + { + "epoch": 7.037353515625e-06, + "step": 4612, + "training_step_time": 0.15189242362976074 + }, + { + "epoch": 7.03887939453125e-06, + "model_forward_time": 0.024314403533935547, + "step": 4613 + }, + { + "epoch": 7.03887939453125e-06, + "step": 4613, + "training_step_time": 0.1353294849395752 + }, + { + "epoch": 7.0404052734375e-06, + "model_forward_time": 0.028273820877075195, + "step": 4614 + }, + { + "epoch": 7.0404052734375e-06, + "step": 4614, + "training_step_time": 0.13442587852478027 + }, + { + "epoch": 7.04193115234375e-06, + "model_forward_time": 0.025510787963867188, + "step": 4615 + }, + { + "epoch": 7.04193115234375e-06, + "step": 4615, + "training_step_time": 0.11014699935913086 + }, + { + "epoch": 7.04345703125e-06, + "model_forward_time": 0.025209903717041016, + "step": 4616 + }, + { + "epoch": 7.04345703125e-06, + "step": 4616, + "training_step_time": 0.1221158504486084 + }, + { + "epoch": 7.04498291015625e-06, + "model_forward_time": 0.02506852149963379, + "step": 4617 + }, + { + "epoch": 7.04498291015625e-06, + "step": 4617, + "training_step_time": 0.11951327323913574 + }, + { + "epoch": 7.0465087890625e-06, + "model_forward_time": 0.02617192268371582, + "step": 4618 + }, + { + "epoch": 7.0465087890625e-06, + "step": 4618, + "training_step_time": 0.19681954383850098 + }, + { + "epoch": 7.04803466796875e-06, + "model_forward_time": 0.02478480339050293, + "step": 4619 + }, + { + "epoch": 7.04803466796875e-06, + "step": 4619, + "training_step_time": 0.1119844913482666 + }, + { + "epoch": 7.049560546875e-06, + "grad_norm": 0.37912517786026, + "learning_rate": 9.707197877300974e-05, + "loss": 0.0726, + "step": 4620 + }, + { + "epoch": 7.049560546875e-06, + "model_forward_time": 0.025426626205444336, + "step": 4620 + }, + { + "epoch": 7.049560546875e-06, + "step": 4620, + "training_step_time": 0.11473941802978516 + }, + { + "epoch": 7.05108642578125e-06, + "model_forward_time": 0.02573418617248535, + "step": 4621 + }, + { + "epoch": 7.05108642578125e-06, + "step": 4621, + "training_step_time": 0.1118619441986084 + }, + { + "epoch": 7.0526123046875e-06, + "model_forward_time": 0.025964021682739258, + "step": 4622 + }, + { + "epoch": 7.0526123046875e-06, + "step": 4622, + "training_step_time": 0.11052846908569336 + }, + { + "epoch": 7.05413818359375e-06, + "model_forward_time": 0.025844573974609375, + "step": 4623 + }, + { + "epoch": 7.05413818359375e-06, + "step": 4623, + "training_step_time": 0.11656045913696289 + }, + { + "epoch": 7.0556640625e-06, + "model_forward_time": 0.02527451515197754, + "step": 4624 + }, + { + "epoch": 7.0556640625e-06, + "step": 4624, + "training_step_time": 0.11308622360229492 + }, + { + "epoch": 7.05718994140625e-06, + "model_forward_time": 0.025414705276489258, + "step": 4625 + }, + { + "epoch": 7.05718994140625e-06, + "step": 4625, + "training_step_time": 0.11145639419555664 + }, + { + "epoch": 7.0587158203125e-06, + "model_forward_time": 0.026253700256347656, + "step": 4626 + }, + { + "epoch": 7.0587158203125e-06, + "step": 4626, + "training_step_time": 0.11387109756469727 + }, + { + "epoch": 7.06024169921875e-06, + "model_forward_time": 0.025023221969604492, + "step": 4627 + }, + { + "epoch": 7.06024169921875e-06, + "step": 4627, + "training_step_time": 0.11048150062561035 + }, + { + "epoch": 7.061767578125e-06, + "model_forward_time": 0.025444746017456055, + "step": 4628 + }, + { + "epoch": 7.061767578125e-06, + "step": 4628, + "training_step_time": 0.11424756050109863 + }, + { + "epoch": 7.06329345703125e-06, + "model_forward_time": 0.025431156158447266, + "step": 4629 + }, + { + "epoch": 7.06329345703125e-06, + "step": 4629, + "training_step_time": 0.11282157897949219 + }, + { + "epoch": 7.0648193359375e-06, + "grad_norm": 0.703524649143219, + "learning_rate": 9.705336618399077e-05, + "loss": 0.0832, + "step": 4630 + }, + { + "epoch": 7.0648193359375e-06, + "model_forward_time": 0.025510787963867188, + "step": 4630 + }, + { + "epoch": 7.0648193359375e-06, + "step": 4630, + "training_step_time": 0.10958480834960938 + }, + { + "epoch": 7.06634521484375e-06, + "model_forward_time": 0.025275468826293945, + "step": 4631 + }, + { + "epoch": 7.06634521484375e-06, + "step": 4631, + "training_step_time": 0.11008763313293457 + }, + { + "epoch": 7.06787109375e-06, + "model_forward_time": 0.025235891342163086, + "step": 4632 + }, + { + "epoch": 7.06787109375e-06, + "step": 4632, + "training_step_time": 0.10924983024597168 + }, + { + "epoch": 7.06939697265625e-06, + "model_forward_time": 0.02527475357055664, + "step": 4633 + }, + { + "epoch": 7.06939697265625e-06, + "step": 4633, + "training_step_time": 0.10968446731567383 + }, + { + "epoch": 7.0709228515625e-06, + "model_forward_time": 0.0252687931060791, + "step": 4634 + }, + { + "epoch": 7.0709228515625e-06, + "step": 4634, + "training_step_time": 0.1123650074005127 + }, + { + "epoch": 7.07244873046875e-06, + "model_forward_time": 0.025458097457885742, + "step": 4635 + }, + { + "epoch": 7.07244873046875e-06, + "step": 4635, + "training_step_time": 0.1084136962890625 + }, + { + "epoch": 7.073974609375e-06, + "model_forward_time": 0.02536296844482422, + "step": 4636 + }, + { + "epoch": 7.073974609375e-06, + "step": 4636, + "training_step_time": 0.11239242553710938 + }, + { + "epoch": 7.07550048828125e-06, + "model_forward_time": 0.029045820236206055, + "step": 4637 + }, + { + "epoch": 7.07550048828125e-06, + "step": 4637, + "training_step_time": 0.11421704292297363 + }, + { + "epoch": 7.0770263671875e-06, + "model_forward_time": 0.025288820266723633, + "step": 4638 + }, + { + "epoch": 7.0770263671875e-06, + "step": 4638, + "training_step_time": 0.11121821403503418 + }, + { + "epoch": 7.07855224609375e-06, + "model_forward_time": 0.026355981826782227, + "step": 4639 + }, + { + "epoch": 7.07855224609375e-06, + "step": 4639, + "training_step_time": 0.11396312713623047 + }, + { + "epoch": 7.080078125e-06, + "grad_norm": 0.5431896448135376, + "learning_rate": 9.703469642069503e-05, + "loss": 0.0866, + "step": 4640 + }, + { + "epoch": 7.080078125e-06, + "model_forward_time": 0.025588512420654297, + "step": 4640 + }, + { + "epoch": 7.080078125e-06, + "step": 4640, + "training_step_time": 0.11531591415405273 + }, + { + "epoch": 7.08160400390625e-06, + "model_forward_time": 0.0265805721282959, + "step": 4641 + }, + { + "epoch": 7.08160400390625e-06, + "step": 4641, + "training_step_time": 0.11524343490600586 + }, + { + "epoch": 7.0831298828125e-06, + "model_forward_time": 0.02535414695739746, + "step": 4642 + }, + { + "epoch": 7.0831298828125e-06, + "step": 4642, + "training_step_time": 0.11068892478942871 + }, + { + "epoch": 7.08465576171875e-06, + "model_forward_time": 0.024876117706298828, + "step": 4643 + }, + { + "epoch": 7.08465576171875e-06, + "step": 4643, + "training_step_time": 0.12270236015319824 + }, + { + "epoch": 7.086181640625e-06, + "model_forward_time": 0.02570343017578125, + "step": 4644 + }, + { + "epoch": 7.086181640625e-06, + "step": 4644, + "training_step_time": 0.11148619651794434 + }, + { + "epoch": 7.08770751953125e-06, + "model_forward_time": 0.02771162986755371, + "step": 4645 + }, + { + "epoch": 7.08770751953125e-06, + "step": 4645, + "training_step_time": 0.11736011505126953 + }, + { + "epoch": 7.0892333984375e-06, + "model_forward_time": 0.02540302276611328, + "step": 4646 + }, + { + "epoch": 7.0892333984375e-06, + "step": 4646, + "training_step_time": 0.1275320053100586 + }, + { + "epoch": 7.09075927734375e-06, + "model_forward_time": 0.026554346084594727, + "step": 4647 + }, + { + "epoch": 7.09075927734375e-06, + "step": 4647, + "training_step_time": 0.12313508987426758 + }, + { + "epoch": 7.09228515625e-06, + "model_forward_time": 0.02524876594543457, + "step": 4648 + }, + { + "epoch": 7.09228515625e-06, + "step": 4648, + "training_step_time": 0.11437582969665527 + }, + { + "epoch": 7.09381103515625e-06, + "model_forward_time": 0.025593042373657227, + "step": 4649 + }, + { + "epoch": 7.09381103515625e-06, + "step": 4649, + "training_step_time": 0.20876097679138184 + }, + { + "epoch": 7.0953369140625e-06, + "grad_norm": 0.30929675698280334, + "learning_rate": 9.701596950580806e-05, + "loss": 0.0714, + "step": 4650 + }, + { + "epoch": 7.0953369140625e-06, + "model_forward_time": 0.02449512481689453, + "step": 4650 + }, + { + "epoch": 7.0953369140625e-06, + "step": 4650, + "training_step_time": 0.1154944896697998 + }, + { + "epoch": 7.09686279296875e-06, + "model_forward_time": 0.024818897247314453, + "step": 4651 + }, + { + "epoch": 7.09686279296875e-06, + "step": 4651, + "training_step_time": 0.22406482696533203 + }, + { + "epoch": 7.098388671875e-06, + "model_forward_time": 0.026859521865844727, + "step": 4652 + }, + { + "epoch": 7.098388671875e-06, + "step": 4652, + "training_step_time": 0.13698959350585938 + }, + { + "epoch": 7.09991455078125e-06, + "model_forward_time": 0.024960041046142578, + "step": 4653 + }, + { + "epoch": 7.09991455078125e-06, + "step": 4653, + "training_step_time": 0.11091470718383789 + }, + { + "epoch": 7.1014404296875e-06, + "model_forward_time": 0.02565598487854004, + "step": 4654 + }, + { + "epoch": 7.1014404296875e-06, + "step": 4654, + "training_step_time": 0.11881065368652344 + }, + { + "epoch": 7.10296630859375e-06, + "model_forward_time": 0.025463581085205078, + "step": 4655 + }, + { + "epoch": 7.10296630859375e-06, + "step": 4655, + "training_step_time": 0.1113440990447998 + }, + { + "epoch": 7.1044921875e-06, + "model_forward_time": 0.025736570358276367, + "step": 4656 + }, + { + "epoch": 7.1044921875e-06, + "step": 4656, + "training_step_time": 0.11044645309448242 + }, + { + "epoch": 7.10601806640625e-06, + "model_forward_time": 0.025641202926635742, + "step": 4657 + }, + { + "epoch": 7.10601806640625e-06, + "step": 4657, + "training_step_time": 0.11413908004760742 + }, + { + "epoch": 7.1075439453125e-06, + "model_forward_time": 0.025508880615234375, + "step": 4658 + }, + { + "epoch": 7.1075439453125e-06, + "step": 4658, + "training_step_time": 0.11565136909484863 + }, + { + "epoch": 7.10906982421875e-06, + "model_forward_time": 0.02566838264465332, + "step": 4659 + }, + { + "epoch": 7.10906982421875e-06, + "step": 4659, + "training_step_time": 0.11598777770996094 + }, + { + "epoch": 7.110595703125e-06, + "grad_norm": 0.4373496472835541, + "learning_rate": 9.699718546208484e-05, + "loss": 0.1117, + "step": 4660 + }, + { + "epoch": 7.110595703125e-06, + "model_forward_time": 0.02506399154663086, + "step": 4660 + }, + { + "epoch": 7.110595703125e-06, + "step": 4660, + "training_step_time": 0.1157388687133789 + }, + { + "epoch": 7.11212158203125e-06, + "model_forward_time": 0.0249176025390625, + "step": 4661 + }, + { + "epoch": 7.11212158203125e-06, + "step": 4661, + "training_step_time": 0.10822415351867676 + }, + { + "epoch": 7.1136474609375e-06, + "model_forward_time": 0.02518630027770996, + "step": 4662 + }, + { + "epoch": 7.1136474609375e-06, + "step": 4662, + "training_step_time": 0.17522430419921875 + }, + { + "epoch": 7.11517333984375e-06, + "model_forward_time": 0.024921894073486328, + "step": 4663 + }, + { + "epoch": 7.11517333984375e-06, + "step": 4663, + "training_step_time": 0.15344667434692383 + }, + { + "epoch": 7.11669921875e-06, + "model_forward_time": 0.02471017837524414, + "step": 4664 + }, + { + "epoch": 7.11669921875e-06, + "step": 4664, + "training_step_time": 0.10583686828613281 + }, + { + "epoch": 7.11822509765625e-06, + "model_forward_time": 0.02735304832458496, + "step": 4665 + }, + { + "epoch": 7.11822509765625e-06, + "step": 4665, + "training_step_time": 0.10798430442810059 + }, + { + "epoch": 7.1197509765625e-06, + "model_forward_time": 0.02536320686340332, + "step": 4666 + }, + { + "epoch": 7.1197509765625e-06, + "step": 4666, + "training_step_time": 0.21842479705810547 + }, + { + "epoch": 7.12127685546875e-06, + "model_forward_time": 0.024820804595947266, + "step": 4667 + }, + { + "epoch": 7.12127685546875e-06, + "step": 4667, + "training_step_time": 0.12018632888793945 + }, + { + "epoch": 7.122802734375e-06, + "model_forward_time": 0.02475118637084961, + "step": 4668 + }, + { + "epoch": 7.122802734375e-06, + "step": 4668, + "training_step_time": 0.10997509956359863 + }, + { + "epoch": 7.12432861328125e-06, + "model_forward_time": 0.025513648986816406, + "step": 4669 + }, + { + "epoch": 7.12432861328125e-06, + "step": 4669, + "training_step_time": 0.10856842994689941 + }, + { + "epoch": 7.1258544921875e-06, + "grad_norm": 0.5405192375183105, + "learning_rate": 9.697834431234973e-05, + "loss": 0.0827, + "step": 4670 + }, + { + "epoch": 7.1258544921875e-06, + "model_forward_time": 0.025429725646972656, + "step": 4670 + }, + { + "epoch": 7.1258544921875e-06, + "step": 4670, + "training_step_time": 0.1075296401977539 + }, + { + "epoch": 7.12738037109375e-06, + "model_forward_time": 0.025585651397705078, + "step": 4671 + }, + { + "epoch": 7.12738037109375e-06, + "step": 4671, + "training_step_time": 0.10703492164611816 + }, + { + "epoch": 7.12890625e-06, + "model_forward_time": 0.02559947967529297, + "step": 4672 + }, + { + "epoch": 7.12890625e-06, + "step": 4672, + "training_step_time": 0.11008214950561523 + }, + { + "epoch": 7.13043212890625e-06, + "model_forward_time": 0.026738882064819336, + "step": 4673 + }, + { + "epoch": 7.13043212890625e-06, + "step": 4673, + "training_step_time": 0.10806107521057129 + }, + { + "epoch": 7.1319580078125e-06, + "model_forward_time": 0.025689125061035156, + "step": 4674 + }, + { + "epoch": 7.1319580078125e-06, + "step": 4674, + "training_step_time": 0.1119537353515625 + }, + { + "epoch": 7.13348388671875e-06, + "model_forward_time": 0.025330066680908203, + "step": 4675 + }, + { + "epoch": 7.13348388671875e-06, + "step": 4675, + "training_step_time": 0.10892415046691895 + }, + { + "epoch": 7.135009765625e-06, + "model_forward_time": 0.02545022964477539, + "step": 4676 + }, + { + "epoch": 7.135009765625e-06, + "step": 4676, + "training_step_time": 0.12022924423217773 + }, + { + "epoch": 7.13653564453125e-06, + "model_forward_time": 0.0248873233795166, + "step": 4677 + }, + { + "epoch": 7.13653564453125e-06, + "step": 4677, + "training_step_time": 0.11647558212280273 + }, + { + "epoch": 7.1380615234375e-06, + "model_forward_time": 0.025716304779052734, + "step": 4678 + }, + { + "epoch": 7.1380615234375e-06, + "step": 4678, + "training_step_time": 0.1166234016418457 + }, + { + "epoch": 7.13958740234375e-06, + "model_forward_time": 0.025435209274291992, + "step": 4679 + }, + { + "epoch": 7.13958740234375e-06, + "step": 4679, + "training_step_time": 0.11140942573547363 + }, + { + "epoch": 7.14111328125e-06, + "grad_norm": 0.4129830300807953, + "learning_rate": 9.695944607949649e-05, + "loss": 0.0891, + "step": 4680 + }, + { + "epoch": 7.14111328125e-06, + "model_forward_time": 0.025557994842529297, + "step": 4680 + }, + { + "epoch": 7.14111328125e-06, + "step": 4680, + "training_step_time": 0.11357522010803223 + }, + { + "epoch": 7.14263916015625e-06, + "model_forward_time": 0.025053739547729492, + "step": 4681 + }, + { + "epoch": 7.14263916015625e-06, + "step": 4681, + "training_step_time": 0.11266279220581055 + }, + { + "epoch": 7.1441650390625e-06, + "model_forward_time": 0.025322914123535156, + "step": 4682 + }, + { + "epoch": 7.1441650390625e-06, + "step": 4682, + "training_step_time": 0.11516499519348145 + }, + { + "epoch": 7.14569091796875e-06, + "model_forward_time": 0.025264501571655273, + "step": 4683 + }, + { + "epoch": 7.14569091796875e-06, + "step": 4683, + "training_step_time": 0.11101269721984863 + }, + { + "epoch": 7.147216796875e-06, + "model_forward_time": 0.025271177291870117, + "step": 4684 + }, + { + "epoch": 7.147216796875e-06, + "step": 4684, + "training_step_time": 0.11160826683044434 + }, + { + "epoch": 7.14874267578125e-06, + "model_forward_time": 0.025448322296142578, + "step": 4685 + }, + { + "epoch": 7.14874267578125e-06, + "step": 4685, + "training_step_time": 0.11337518692016602 + }, + { + "epoch": 7.1502685546875e-06, + "model_forward_time": 0.025115013122558594, + "step": 4686 + }, + { + "epoch": 7.1502685546875e-06, + "step": 4686, + "training_step_time": 0.2000284194946289 + }, + { + "epoch": 7.15179443359375e-06, + "model_forward_time": 0.024370193481445312, + "step": 4687 + }, + { + "epoch": 7.15179443359375e-06, + "step": 4687, + "training_step_time": 0.10823369026184082 + }, + { + "epoch": 7.1533203125e-06, + "model_forward_time": 0.024539947509765625, + "step": 4688 + }, + { + "epoch": 7.1533203125e-06, + "step": 4688, + "training_step_time": 0.1290898323059082 + }, + { + "epoch": 7.15484619140625e-06, + "model_forward_time": 0.02523493766784668, + "step": 4689 + }, + { + "epoch": 7.15484619140625e-06, + "step": 4689, + "training_step_time": 0.10941481590270996 + }, + { + "epoch": 7.1563720703125e-06, + "grad_norm": 0.34348055720329285, + "learning_rate": 9.69404907864883e-05, + "loss": 0.0967, + "step": 4690 + }, + { + "epoch": 7.1563720703125e-06, + "model_forward_time": 0.02564263343811035, + "step": 4690 + }, + { + "epoch": 7.1563720703125e-06, + "step": 4690, + "training_step_time": 0.17798900604248047 + }, + { + "epoch": 7.15789794921875e-06, + "model_forward_time": 0.02455449104309082, + "step": 4691 + }, + { + "epoch": 7.15789794921875e-06, + "step": 4691, + "training_step_time": 0.14079976081848145 + }, + { + "epoch": 7.159423828125e-06, + "model_forward_time": 0.02468395233154297, + "step": 4692 + }, + { + "epoch": 7.159423828125e-06, + "step": 4692, + "training_step_time": 0.1131138801574707 + }, + { + "epoch": 7.16094970703125e-06, + "model_forward_time": 0.024825572967529297, + "step": 4693 + }, + { + "epoch": 7.16094970703125e-06, + "step": 4693, + "training_step_time": 0.16120600700378418 + }, + { + "epoch": 7.1624755859375e-06, + "model_forward_time": 0.02475738525390625, + "step": 4694 + }, + { + "epoch": 7.1624755859375e-06, + "step": 4694, + "training_step_time": 0.18631601333618164 + }, + { + "epoch": 7.16400146484375e-06, + "model_forward_time": 0.024166345596313477, + "step": 4695 + }, + { + "epoch": 7.16400146484375e-06, + "step": 4695, + "training_step_time": 0.18218517303466797 + }, + { + "epoch": 7.16552734375e-06, + "model_forward_time": 0.02521371841430664, + "step": 4696 + }, + { + "epoch": 7.16552734375e-06, + "step": 4696, + "training_step_time": 0.1628427505493164 + }, + { + "epoch": 7.16705322265625e-06, + "model_forward_time": 0.024338722229003906, + "step": 4697 + }, + { + "epoch": 7.16705322265625e-06, + "step": 4697, + "training_step_time": 0.1306002140045166 + }, + { + "epoch": 7.1685791015625e-06, + "model_forward_time": 0.024640321731567383, + "step": 4698 + }, + { + "epoch": 7.1685791015625e-06, + "step": 4698, + "training_step_time": 0.12343811988830566 + }, + { + "epoch": 7.17010498046875e-06, + "model_forward_time": 0.02477407455444336, + "step": 4699 + }, + { + "epoch": 7.17010498046875e-06, + "step": 4699, + "training_step_time": 0.13312363624572754 + }, + { + "epoch": 7.171630859375e-06, + "grad_norm": 0.4429035186767578, + "learning_rate": 9.692147845635761e-05, + "loss": 0.085, + "step": 4700 + }, + { + "epoch": 7.171630859375e-06, + "model_forward_time": 0.02480316162109375, + "step": 4700 + }, + { + "epoch": 7.171630859375e-06, + "step": 4700, + "training_step_time": 0.12159991264343262 + }, + { + "epoch": 7.17315673828125e-06, + "model_forward_time": 0.025419235229492188, + "step": 4701 + }, + { + "epoch": 7.17315673828125e-06, + "step": 4701, + "training_step_time": 0.11999964714050293 + }, + { + "epoch": 7.1746826171875e-06, + "model_forward_time": 0.025213956832885742, + "step": 4702 + }, + { + "epoch": 7.1746826171875e-06, + "step": 4702, + "training_step_time": 0.11420083045959473 + }, + { + "epoch": 7.17620849609375e-06, + "model_forward_time": 0.02863621711730957, + "step": 4703 + }, + { + "epoch": 7.17620849609375e-06, + "step": 4703, + "training_step_time": 0.11843442916870117 + }, + { + "epoch": 7.177734375e-06, + "model_forward_time": 0.025918245315551758, + "step": 4704 + }, + { + "epoch": 7.177734375e-06, + "step": 4704, + "training_step_time": 0.10933852195739746 + }, + { + "epoch": 7.17926025390625e-06, + "model_forward_time": 0.02541327476501465, + "step": 4705 + }, + { + "epoch": 7.17926025390625e-06, + "step": 4705, + "training_step_time": 0.11254739761352539 + }, + { + "epoch": 7.1807861328125e-06, + "model_forward_time": 0.02548956871032715, + "step": 4706 + }, + { + "epoch": 7.1807861328125e-06, + "step": 4706, + "training_step_time": 0.10585522651672363 + }, + { + "epoch": 7.18231201171875e-06, + "model_forward_time": 0.02466583251953125, + "step": 4707 + }, + { + "epoch": 7.18231201171875e-06, + "step": 4707, + "training_step_time": 0.10611462593078613 + }, + { + "epoch": 7.183837890625e-06, + "model_forward_time": 0.024873733520507812, + "step": 4708 + }, + { + "epoch": 7.183837890625e-06, + "step": 4708, + "training_step_time": 0.10912036895751953 + }, + { + "epoch": 7.18536376953125e-06, + "model_forward_time": 0.02541208267211914, + "step": 4709 + }, + { + "epoch": 7.18536376953125e-06, + "step": 4709, + "training_step_time": 0.11274433135986328 + }, + { + "epoch": 7.1868896484375e-06, + "grad_norm": 0.34829989075660706, + "learning_rate": 9.690240911220618e-05, + "loss": 0.0911, + "step": 4710 + }, + { + "epoch": 7.1868896484375e-06, + "model_forward_time": 0.025482654571533203, + "step": 4710 + }, + { + "epoch": 7.1868896484375e-06, + "step": 4710, + "training_step_time": 0.11333894729614258 + }, + { + "epoch": 7.18841552734375e-06, + "model_forward_time": 0.025823116302490234, + "step": 4711 + }, + { + "epoch": 7.18841552734375e-06, + "step": 4711, + "training_step_time": 0.21737074851989746 + }, + { + "epoch": 7.18994140625e-06, + "model_forward_time": 0.024890422821044922, + "step": 4712 + }, + { + "epoch": 7.18994140625e-06, + "step": 4712, + "training_step_time": 0.11287426948547363 + }, + { + "epoch": 7.19146728515625e-06, + "model_forward_time": 0.02470231056213379, + "step": 4713 + }, + { + "epoch": 7.19146728515625e-06, + "step": 4713, + "training_step_time": 0.10760116577148438 + }, + { + "epoch": 7.1929931640625e-06, + "model_forward_time": 0.025428056716918945, + "step": 4714 + }, + { + "epoch": 7.1929931640625e-06, + "step": 4714, + "training_step_time": 0.10931229591369629 + }, + { + "epoch": 7.19451904296875e-06, + "model_forward_time": 0.025420188903808594, + "step": 4715 + }, + { + "epoch": 7.19451904296875e-06, + "step": 4715, + "training_step_time": 0.11054682731628418 + }, + { + "epoch": 7.196044921875e-06, + "model_forward_time": 0.025307178497314453, + "step": 4716 + }, + { + "epoch": 7.196044921875e-06, + "step": 4716, + "training_step_time": 0.10775995254516602 + }, + { + "epoch": 7.19757080078125e-06, + "model_forward_time": 0.025296926498413086, + "step": 4717 + }, + { + "epoch": 7.19757080078125e-06, + "step": 4717, + "training_step_time": 0.10805392265319824 + }, + { + "epoch": 7.1990966796875e-06, + "model_forward_time": 0.025362253189086914, + "step": 4718 + }, + { + "epoch": 7.1990966796875e-06, + "step": 4718, + "training_step_time": 0.11155128479003906 + }, + { + "epoch": 7.20062255859375e-06, + "model_forward_time": 0.02552938461303711, + "step": 4719 + }, + { + "epoch": 7.20062255859375e-06, + "step": 4719, + "training_step_time": 0.11119389533996582 + }, + { + "epoch": 7.2021484375e-06, + "grad_norm": 0.4955514669418335, + "learning_rate": 9.688328277720507e-05, + "loss": 0.0818, + "step": 4720 + }, + { + "epoch": 7.2021484375e-06, + "model_forward_time": 0.024996280670166016, + "step": 4720 + }, + { + "epoch": 7.2021484375e-06, + "step": 4720, + "training_step_time": 0.1117095947265625 + }, + { + "epoch": 7.20367431640625e-06, + "model_forward_time": 0.02483820915222168, + "step": 4721 + }, + { + "epoch": 7.20367431640625e-06, + "step": 4721, + "training_step_time": 0.10961270332336426 + }, + { + "epoch": 7.2052001953125e-06, + "model_forward_time": 0.02516341209411621, + "step": 4722 + }, + { + "epoch": 7.2052001953125e-06, + "step": 4722, + "training_step_time": 0.10739707946777344 + }, + { + "epoch": 7.20672607421875e-06, + "model_forward_time": 0.024984359741210938, + "step": 4723 + }, + { + "epoch": 7.20672607421875e-06, + "step": 4723, + "training_step_time": 0.11313128471374512 + }, + { + "epoch": 7.208251953125e-06, + "model_forward_time": 0.025177955627441406, + "step": 4724 + }, + { + "epoch": 7.208251953125e-06, + "step": 4724, + "training_step_time": 0.10971808433532715 + }, + { + "epoch": 7.20977783203125e-06, + "model_forward_time": 0.024903535842895508, + "step": 4725 + }, + { + "epoch": 7.20977783203125e-06, + "step": 4725, + "training_step_time": 0.10666942596435547 + }, + { + "epoch": 7.2113037109375e-06, + "model_forward_time": 0.02503657341003418, + "step": 4726 + }, + { + "epoch": 7.2113037109375e-06, + "step": 4726, + "training_step_time": 0.11020350456237793 + }, + { + "epoch": 7.21282958984375e-06, + "model_forward_time": 0.025382280349731445, + "step": 4727 + }, + { + "epoch": 7.21282958984375e-06, + "step": 4727, + "training_step_time": 0.10953283309936523 + }, + { + "epoch": 7.21435546875e-06, + "model_forward_time": 0.025365352630615234, + "step": 4728 + }, + { + "epoch": 7.21435546875e-06, + "step": 4728, + "training_step_time": 0.10731363296508789 + }, + { + "epoch": 7.21588134765625e-06, + "model_forward_time": 0.025560379028320312, + "step": 4729 + }, + { + "epoch": 7.21588134765625e-06, + "step": 4729, + "training_step_time": 0.10726451873779297 + }, + { + "epoch": 7.2174072265625e-06, + "grad_norm": 0.2986734211444855, + "learning_rate": 9.686409947459458e-05, + "loss": 0.0894, + "step": 4730 + }, + { + "epoch": 7.2174072265625e-06, + "model_forward_time": 0.025249958038330078, + "step": 4730 + }, + { + "epoch": 7.2174072265625e-06, + "step": 4730, + "training_step_time": 0.11115741729736328 + }, + { + "epoch": 7.21893310546875e-06, + "model_forward_time": 0.025550365447998047, + "step": 4731 + }, + { + "epoch": 7.21893310546875e-06, + "step": 4731, + "training_step_time": 0.20619535446166992 + }, + { + "epoch": 7.220458984375e-06, + "model_forward_time": 0.024132966995239258, + "step": 4732 + }, + { + "epoch": 7.220458984375e-06, + "step": 4732, + "training_step_time": 0.11654090881347656 + }, + { + "epoch": 7.22198486328125e-06, + "model_forward_time": 0.0243375301361084, + "step": 4733 + }, + { + "epoch": 7.22198486328125e-06, + "step": 4733, + "training_step_time": 0.14156818389892578 + }, + { + "epoch": 7.2235107421875e-06, + "model_forward_time": 0.025305747985839844, + "step": 4734 + }, + { + "epoch": 7.2235107421875e-06, + "step": 4734, + "training_step_time": 0.16042780876159668 + }, + { + "epoch": 7.22503662109375e-06, + "model_forward_time": 0.024780750274658203, + "step": 4735 + }, + { + "epoch": 7.22503662109375e-06, + "step": 4735, + "training_step_time": 0.22370696067810059 + }, + { + "epoch": 7.2265625e-06, + "model_forward_time": 0.02442646026611328, + "step": 4736 + }, + { + "epoch": 7.2265625e-06, + "step": 4736, + "training_step_time": 0.11294078826904297 + }, + { + "epoch": 7.22808837890625e-06, + "model_forward_time": 0.02524280548095703, + "step": 4737 + }, + { + "epoch": 7.22808837890625e-06, + "step": 4737, + "training_step_time": 0.10611438751220703 + }, + { + "epoch": 7.2296142578125e-06, + "model_forward_time": 0.025254249572753906, + "step": 4738 + }, + { + "epoch": 7.2296142578125e-06, + "step": 4738, + "training_step_time": 0.10901308059692383 + }, + { + "epoch": 7.23114013671875e-06, + "model_forward_time": 0.025461912155151367, + "step": 4739 + }, + { + "epoch": 7.23114013671875e-06, + "step": 4739, + "training_step_time": 0.17905044555664062 + }, + { + "epoch": 7.232666015625e-06, + "grad_norm": 0.4050094783306122, + "learning_rate": 9.684485922768422e-05, + "loss": 0.0831, + "step": 4740 + }, + { + "epoch": 7.232666015625e-06, + "model_forward_time": 0.025724411010742188, + "step": 4740 + }, + { + "epoch": 7.232666015625e-06, + "step": 4740, + "training_step_time": 0.14004945755004883 + }, + { + "epoch": 7.23419189453125e-06, + "model_forward_time": 0.024344682693481445, + "step": 4741 + }, + { + "epoch": 7.23419189453125e-06, + "step": 4741, + "training_step_time": 0.10919427871704102 + }, + { + "epoch": 7.2357177734375e-06, + "model_forward_time": 0.02638101577758789, + "step": 4742 + }, + { + "epoch": 7.2357177734375e-06, + "step": 4742, + "training_step_time": 0.12328267097473145 + }, + { + "epoch": 7.23724365234375e-06, + "model_forward_time": 0.025105953216552734, + "step": 4743 + }, + { + "epoch": 7.23724365234375e-06, + "step": 4743, + "training_step_time": 0.12960028648376465 + }, + { + "epoch": 7.23876953125e-06, + "model_forward_time": 0.025790929794311523, + "step": 4744 + }, + { + "epoch": 7.23876953125e-06, + "step": 4744, + "training_step_time": 0.11344027519226074 + }, + { + "epoch": 7.24029541015625e-06, + "model_forward_time": 0.025326013565063477, + "step": 4745 + }, + { + "epoch": 7.24029541015625e-06, + "step": 4745, + "training_step_time": 0.12403416633605957 + }, + { + "epoch": 7.2418212890625e-06, + "model_forward_time": 0.025507688522338867, + "step": 4746 + }, + { + "epoch": 7.2418212890625e-06, + "step": 4746, + "training_step_time": 0.10900402069091797 + }, + { + "epoch": 7.24334716796875e-06, + "model_forward_time": 0.02517104148864746, + "step": 4747 + }, + { + "epoch": 7.24334716796875e-06, + "step": 4747, + "training_step_time": 0.10666775703430176 + }, + { + "epoch": 7.244873046875e-06, + "model_forward_time": 0.025615930557250977, + "step": 4748 + }, + { + "epoch": 7.244873046875e-06, + "step": 4748, + "training_step_time": 0.11224555969238281 + }, + { + "epoch": 7.24639892578125e-06, + "model_forward_time": 0.025099515914916992, + "step": 4749 + }, + { + "epoch": 7.24639892578125e-06, + "step": 4749, + "training_step_time": 0.14161157608032227 + }, + { + "epoch": 7.2479248046875e-06, + "grad_norm": 0.49814528226852417, + "learning_rate": 9.682556205985274e-05, + "loss": 0.0765, + "step": 4750 + }, + { + "epoch": 7.2479248046875e-06, + "model_forward_time": 0.0240936279296875, + "step": 4750 + }, + { + "epoch": 7.2479248046875e-06, + "step": 4750, + "training_step_time": 0.170487642288208 + }, + { + "epoch": 7.24945068359375e-06, + "model_forward_time": 0.024842500686645508, + "step": 4751 + }, + { + "epoch": 7.24945068359375e-06, + "step": 4751, + "training_step_time": 0.11417365074157715 + }, + { + "epoch": 7.2509765625e-06, + "model_forward_time": 0.0250244140625, + "step": 4752 + }, + { + "epoch": 7.2509765625e-06, + "step": 4752, + "training_step_time": 0.2241358757019043 + }, + { + "epoch": 7.25250244140625e-06, + "model_forward_time": 0.024251222610473633, + "step": 4753 + }, + { + "epoch": 7.25250244140625e-06, + "step": 4753, + "training_step_time": 0.14681768417358398 + }, + { + "epoch": 7.2540283203125e-06, + "model_forward_time": 0.024530887603759766, + "step": 4754 + }, + { + "epoch": 7.2540283203125e-06, + "step": 4754, + "training_step_time": 0.1694498062133789 + }, + { + "epoch": 7.25555419921875e-06, + "model_forward_time": 0.024786710739135742, + "step": 4755 + }, + { + "epoch": 7.25555419921875e-06, + "step": 4755, + "training_step_time": 0.1360621452331543 + }, + { + "epoch": 7.257080078125e-06, + "model_forward_time": 0.025052785873413086, + "step": 4756 + }, + { + "epoch": 7.257080078125e-06, + "step": 4756, + "training_step_time": 0.12085866928100586 + }, + { + "epoch": 7.25860595703125e-06, + "model_forward_time": 0.024811744689941406, + "step": 4757 + }, + { + "epoch": 7.25860595703125e-06, + "step": 4757, + "training_step_time": 0.11929082870483398 + }, + { + "epoch": 7.2601318359375e-06, + "model_forward_time": 0.024414777755737305, + "step": 4758 + }, + { + "epoch": 7.2601318359375e-06, + "step": 4758, + "training_step_time": 0.1154484748840332 + }, + { + "epoch": 7.26165771484375e-06, + "model_forward_time": 0.024258136749267578, + "step": 4759 + }, + { + "epoch": 7.26165771484375e-06, + "step": 4759, + "training_step_time": 0.11262106895446777 + }, + { + "epoch": 7.26318359375e-06, + "grad_norm": 0.5395764112472534, + "learning_rate": 9.6806207994548e-05, + "loss": 0.0837, + "step": 4760 + }, + { + "epoch": 7.26318359375e-06, + "model_forward_time": 0.024302244186401367, + "step": 4760 + }, + { + "epoch": 7.26318359375e-06, + "step": 4760, + "training_step_time": 0.10857677459716797 + }, + { + "epoch": 7.26470947265625e-06, + "model_forward_time": 0.02565765380859375, + "step": 4761 + }, + { + "epoch": 7.26470947265625e-06, + "step": 4761, + "training_step_time": 0.11458134651184082 + }, + { + "epoch": 7.2662353515625e-06, + "model_forward_time": 0.025076627731323242, + "step": 4762 + }, + { + "epoch": 7.2662353515625e-06, + "step": 4762, + "training_step_time": 0.11231398582458496 + }, + { + "epoch": 7.26776123046875e-06, + "model_forward_time": 0.02521228790283203, + "step": 4763 + }, + { + "epoch": 7.26776123046875e-06, + "step": 4763, + "training_step_time": 0.1078341007232666 + }, + { + "epoch": 7.269287109375e-06, + "model_forward_time": 0.025334596633911133, + "step": 4764 + }, + { + "epoch": 7.269287109375e-06, + "step": 4764, + "training_step_time": 0.11000871658325195 + }, + { + "epoch": 7.27081298828125e-06, + "model_forward_time": 0.024931907653808594, + "step": 4765 + }, + { + "epoch": 7.27081298828125e-06, + "step": 4765, + "training_step_time": 0.1079704761505127 + }, + { + "epoch": 7.2723388671875e-06, + "model_forward_time": 0.025057554244995117, + "step": 4766 + }, + { + "epoch": 7.2723388671875e-06, + "step": 4766, + "training_step_time": 0.11118173599243164 + }, + { + "epoch": 7.27386474609375e-06, + "model_forward_time": 0.025484561920166016, + "step": 4767 + }, + { + "epoch": 7.27386474609375e-06, + "step": 4767, + "training_step_time": 0.11023402214050293 + }, + { + "epoch": 7.275390625e-06, + "model_forward_time": 0.024859189987182617, + "step": 4768 + }, + { + "epoch": 7.275390625e-06, + "step": 4768, + "training_step_time": 0.10971426963806152 + }, + { + "epoch": 7.27691650390625e-06, + "model_forward_time": 0.025653600692749023, + "step": 4769 + }, + { + "epoch": 7.27691650390625e-06, + "step": 4769, + "training_step_time": 0.11468052864074707 + }, + { + "epoch": 7.2784423828125e-06, + "grad_norm": 0.4616451859474182, + "learning_rate": 9.6786797055287e-05, + "loss": 0.0988, + "step": 4770 + }, + { + "epoch": 7.2784423828125e-06, + "model_forward_time": 0.02511739730834961, + "step": 4770 + }, + { + "epoch": 7.2784423828125e-06, + "step": 4770, + "training_step_time": 0.10827922821044922 + }, + { + "epoch": 7.27996826171875e-06, + "model_forward_time": 0.025227785110473633, + "step": 4771 + }, + { + "epoch": 7.27996826171875e-06, + "step": 4771, + "training_step_time": 0.10714459419250488 + }, + { + "epoch": 7.281494140625e-06, + "model_forward_time": 0.024961471557617188, + "step": 4772 + }, + { + "epoch": 7.281494140625e-06, + "step": 4772, + "training_step_time": 0.11166548728942871 + }, + { + "epoch": 7.28302001953125e-06, + "model_forward_time": 0.02535867691040039, + "step": 4773 + }, + { + "epoch": 7.28302001953125e-06, + "step": 4773, + "training_step_time": 0.10982370376586914 + }, + { + "epoch": 7.2845458984375e-06, + "model_forward_time": 0.0250699520111084, + "step": 4774 + }, + { + "epoch": 7.2845458984375e-06, + "step": 4774, + "training_step_time": 0.10844683647155762 + }, + { + "epoch": 7.28607177734375e-06, + "model_forward_time": 0.025260210037231445, + "step": 4775 + }, + { + "epoch": 7.28607177734375e-06, + "step": 4775, + "training_step_time": 0.10875964164733887 + }, + { + "epoch": 7.28759765625e-06, + "model_forward_time": 0.025423288345336914, + "step": 4776 + }, + { + "epoch": 7.28759765625e-06, + "step": 4776, + "training_step_time": 0.16981196403503418 + }, + { + "epoch": 7.28912353515625e-06, + "model_forward_time": 0.02450847625732422, + "step": 4777 + }, + { + "epoch": 7.28912353515625e-06, + "step": 4777, + "training_step_time": 0.12093997001647949 + }, + { + "epoch": 7.2906494140625e-06, + "model_forward_time": 0.025540828704833984, + "step": 4778 + }, + { + "epoch": 7.2906494140625e-06, + "step": 4778, + "training_step_time": 0.127671480178833 + }, + { + "epoch": 7.29217529296875e-06, + "model_forward_time": 0.025681734085083008, + "step": 4779 + }, + { + "epoch": 7.29217529296875e-06, + "step": 4779, + "training_step_time": 0.10622692108154297 + }, + { + "epoch": 7.293701171875e-06, + "grad_norm": 0.4182220697402954, + "learning_rate": 9.676732926565585e-05, + "loss": 0.0882, + "step": 4780 + }, + { + "epoch": 7.293701171875e-06, + "model_forward_time": 0.025564908981323242, + "step": 4780 + }, + { + "epoch": 7.293701171875e-06, + "step": 4780, + "training_step_time": 0.1468055248260498 + }, + { + "epoch": 7.29522705078125e-06, + "model_forward_time": 0.02535271644592285, + "step": 4781 + }, + { + "epoch": 7.29522705078125e-06, + "step": 4781, + "training_step_time": 0.1356792449951172 + }, + { + "epoch": 7.2967529296875e-06, + "model_forward_time": 0.026181697845458984, + "step": 4782 + }, + { + "epoch": 7.2967529296875e-06, + "step": 4782, + "training_step_time": 0.11040091514587402 + }, + { + "epoch": 7.29827880859375e-06, + "model_forward_time": 0.024973154067993164, + "step": 4783 + }, + { + "epoch": 7.29827880859375e-06, + "step": 4783, + "training_step_time": 0.1093595027923584 + }, + { + "epoch": 7.2998046875e-06, + "model_forward_time": 0.02541971206665039, + "step": 4784 + }, + { + "epoch": 7.2998046875e-06, + "step": 4784, + "training_step_time": 0.1130976676940918 + }, + { + "epoch": 7.30133056640625e-06, + "model_forward_time": 0.025498390197753906, + "step": 4785 + }, + { + "epoch": 7.30133056640625e-06, + "step": 4785, + "training_step_time": 0.17387175559997559 + }, + { + "epoch": 7.3028564453125e-06, + "model_forward_time": 0.024752140045166016, + "step": 4786 + }, + { + "epoch": 7.3028564453125e-06, + "step": 4786, + "training_step_time": 0.13927507400512695 + }, + { + "epoch": 7.30438232421875e-06, + "model_forward_time": 0.025344371795654297, + "step": 4787 + }, + { + "epoch": 7.30438232421875e-06, + "step": 4787, + "training_step_time": 0.20563936233520508 + }, + { + "epoch": 7.305908203125e-06, + "model_forward_time": 0.024556636810302734, + "step": 4788 + }, + { + "epoch": 7.305908203125e-06, + "step": 4788, + "training_step_time": 0.13536930084228516 + }, + { + "epoch": 7.30743408203125e-06, + "model_forward_time": 0.02483654022216797, + "step": 4789 + }, + { + "epoch": 7.30743408203125e-06, + "step": 4789, + "training_step_time": 0.12777328491210938 + }, + { + "epoch": 7.3089599609375e-06, + "grad_norm": 0.6424593329429626, + "learning_rate": 9.674780464930979e-05, + "loss": 0.085, + "step": 4790 + }, + { + "epoch": 7.3089599609375e-06, + "model_forward_time": 0.024939298629760742, + "step": 4790 + }, + { + "epoch": 7.3089599609375e-06, + "step": 4790, + "training_step_time": 0.18844985961914062 + }, + { + "epoch": 7.31048583984375e-06, + "model_forward_time": 0.024781465530395508, + "step": 4791 + }, + { + "epoch": 7.31048583984375e-06, + "step": 4791, + "training_step_time": 0.1173393726348877 + }, + { + "epoch": 7.31201171875e-06, + "model_forward_time": 0.02458333969116211, + "step": 4792 + }, + { + "epoch": 7.31201171875e-06, + "step": 4792, + "training_step_time": 0.11635994911193848 + }, + { + "epoch": 7.31353759765625e-06, + "model_forward_time": 0.025293588638305664, + "step": 4793 + }, + { + "epoch": 7.31353759765625e-06, + "step": 4793, + "training_step_time": 0.10970664024353027 + }, + { + "epoch": 7.3150634765625e-06, + "model_forward_time": 0.025192975997924805, + "step": 4794 + }, + { + "epoch": 7.3150634765625e-06, + "step": 4794, + "training_step_time": 0.11306452751159668 + }, + { + "epoch": 7.31658935546875e-06, + "model_forward_time": 0.02521681785583496, + "step": 4795 + }, + { + "epoch": 7.31658935546875e-06, + "step": 4795, + "training_step_time": 0.10897374153137207 + }, + { + "epoch": 7.318115234375e-06, + "model_forward_time": 0.025452136993408203, + "step": 4796 + }, + { + "epoch": 7.318115234375e-06, + "step": 4796, + "training_step_time": 0.1086111068725586 + }, + { + "epoch": 7.31964111328125e-06, + "model_forward_time": 0.025265216827392578, + "step": 4797 + }, + { + "epoch": 7.31964111328125e-06, + "step": 4797, + "training_step_time": 0.21549034118652344 + }, + { + "epoch": 7.3211669921875e-06, + "model_forward_time": 0.025507450103759766, + "step": 4798 + }, + { + "epoch": 7.3211669921875e-06, + "step": 4798, + "training_step_time": 0.12057685852050781 + }, + { + "epoch": 7.32269287109375e-06, + "model_forward_time": 0.02481245994567871, + "step": 4799 + }, + { + "epoch": 7.32269287109375e-06, + "step": 4799, + "training_step_time": 0.1065824031829834 + }, + { + "epoch": 7.32421875e-06, + "grad_norm": 0.7688208222389221, + "learning_rate": 9.672822322997305e-05, + "loss": 0.0851, + "step": 4800 + }, + { + "epoch": 7.32421875e-06, + "model_forward_time": 0.025484085083007812, + "step": 4800 + }, + { + "epoch": 7.32421875e-06, + "step": 4800, + "training_step_time": 0.21938180923461914 + }, + { + "epoch": 7.32574462890625e-06, + "model_forward_time": 0.024980545043945312, + "step": 4801 + }, + { + "epoch": 7.32574462890625e-06, + "step": 4801, + "training_step_time": 0.10861349105834961 + }, + { + "epoch": 7.3272705078125e-06, + "model_forward_time": 0.024562597274780273, + "step": 4802 + }, + { + "epoch": 7.3272705078125e-06, + "step": 4802, + "training_step_time": 0.1035916805267334 + }, + { + "epoch": 7.32879638671875e-06, + "model_forward_time": 0.02525925636291504, + "step": 4803 + }, + { + "epoch": 7.32879638671875e-06, + "step": 4803, + "training_step_time": 0.11091804504394531 + }, + { + "epoch": 7.330322265625e-06, + "model_forward_time": 0.025448322296142578, + "step": 4804 + }, + { + "epoch": 7.330322265625e-06, + "step": 4804, + "training_step_time": 0.10921454429626465 + }, + { + "epoch": 7.33184814453125e-06, + "model_forward_time": 0.025190353393554688, + "step": 4805 + }, + { + "epoch": 7.33184814453125e-06, + "step": 4805, + "training_step_time": 0.1101534366607666 + }, + { + "epoch": 7.3333740234375e-06, + "model_forward_time": 0.025453805923461914, + "step": 4806 + }, + { + "epoch": 7.3333740234375e-06, + "step": 4806, + "training_step_time": 0.1105642318725586 + }, + { + "epoch": 7.33489990234375e-06, + "model_forward_time": 0.02539229393005371, + "step": 4807 + }, + { + "epoch": 7.33489990234375e-06, + "step": 4807, + "training_step_time": 0.10724973678588867 + }, + { + "epoch": 7.33642578125e-06, + "model_forward_time": 0.02509903907775879, + "step": 4808 + }, + { + "epoch": 7.33642578125e-06, + "step": 4808, + "training_step_time": 0.10509634017944336 + }, + { + "epoch": 7.33795166015625e-06, + "model_forward_time": 0.025491952896118164, + "step": 4809 + }, + { + "epoch": 7.33795166015625e-06, + "step": 4809, + "training_step_time": 0.10988926887512207 + }, + { + "epoch": 7.3394775390625e-06, + "grad_norm": 0.3652362525463104, + "learning_rate": 9.67085850314389e-05, + "loss": 0.0785, + "step": 4810 + }, + { + "epoch": 7.3394775390625e-06, + "model_forward_time": 0.025037288665771484, + "step": 4810 + }, + { + "epoch": 7.3394775390625e-06, + "step": 4810, + "training_step_time": 0.10843038558959961 + }, + { + "epoch": 7.34100341796875e-06, + "model_forward_time": 0.025537729263305664, + "step": 4811 + }, + { + "epoch": 7.34100341796875e-06, + "step": 4811, + "training_step_time": 0.10916590690612793 + }, + { + "epoch": 7.342529296875e-06, + "model_forward_time": 0.02484130859375, + "step": 4812 + }, + { + "epoch": 7.342529296875e-06, + "step": 4812, + "training_step_time": 0.10716819763183594 + }, + { + "epoch": 7.34405517578125e-06, + "model_forward_time": 0.02549004554748535, + "step": 4813 + }, + { + "epoch": 7.34405517578125e-06, + "step": 4813, + "training_step_time": 0.10799288749694824 + }, + { + "epoch": 7.3455810546875e-06, + "model_forward_time": 0.026071548461914062, + "step": 4814 + }, + { + "epoch": 7.3455810546875e-06, + "step": 4814, + "training_step_time": 0.10906171798706055 + }, + { + "epoch": 7.34710693359375e-06, + "model_forward_time": 0.025719642639160156, + "step": 4815 + }, + { + "epoch": 7.34710693359375e-06, + "step": 4815, + "training_step_time": 0.10698795318603516 + }, + { + "epoch": 7.3486328125e-06, + "model_forward_time": 0.025721073150634766, + "step": 4816 + }, + { + "epoch": 7.3486328125e-06, + "step": 4816, + "training_step_time": 0.10698890686035156 + }, + { + "epoch": 7.35015869140625e-06, + "model_forward_time": 0.025378942489624023, + "step": 4817 + }, + { + "epoch": 7.35015869140625e-06, + "step": 4817, + "training_step_time": 0.10692906379699707 + }, + { + "epoch": 7.3516845703125e-06, + "model_forward_time": 0.025424957275390625, + "step": 4818 + }, + { + "epoch": 7.3516845703125e-06, + "step": 4818, + "training_step_time": 0.10910320281982422 + }, + { + "epoch": 7.35321044921875e-06, + "model_forward_time": 0.025832653045654297, + "step": 4819 + }, + { + "epoch": 7.35321044921875e-06, + "step": 4819, + "training_step_time": 0.10638761520385742 + }, + { + "epoch": 7.354736328125e-06, + "grad_norm": 0.6113207936286926, + "learning_rate": 9.668889007756961e-05, + "loss": 0.0911, + "step": 4820 + }, + { + "epoch": 7.354736328125e-06, + "model_forward_time": 0.025240182876586914, + "step": 4820 + }, + { + "epoch": 7.354736328125e-06, + "step": 4820, + "training_step_time": 0.10725903511047363 + }, + { + "epoch": 7.35626220703125e-06, + "model_forward_time": 0.025260448455810547, + "step": 4821 + }, + { + "epoch": 7.35626220703125e-06, + "step": 4821, + "training_step_time": 0.10784530639648438 + }, + { + "epoch": 7.3577880859375e-06, + "model_forward_time": 0.025668859481811523, + "step": 4822 + }, + { + "epoch": 7.3577880859375e-06, + "step": 4822, + "training_step_time": 0.1378769874572754 + }, + { + "epoch": 7.35931396484375e-06, + "model_forward_time": 0.025341272354125977, + "step": 4823 + }, + { + "epoch": 7.35931396484375e-06, + "step": 4823, + "training_step_time": 0.1064598560333252 + }, + { + "epoch": 7.36083984375e-06, + "model_forward_time": 0.025493860244750977, + "step": 4824 + }, + { + "epoch": 7.36083984375e-06, + "step": 4824, + "training_step_time": 0.13395953178405762 + }, + { + "epoch": 7.36236572265625e-06, + "model_forward_time": 0.025141477584838867, + "step": 4825 + }, + { + "epoch": 7.36236572265625e-06, + "step": 4825, + "training_step_time": 0.10646414756774902 + }, + { + "epoch": 7.3638916015625e-06, + "model_forward_time": 0.025498151779174805, + "step": 4826 + }, + { + "epoch": 7.3638916015625e-06, + "step": 4826, + "training_step_time": 0.15715265274047852 + }, + { + "epoch": 7.36541748046875e-06, + "model_forward_time": 0.02454066276550293, + "step": 4827 + }, + { + "epoch": 7.36541748046875e-06, + "step": 4827, + "training_step_time": 0.1482715606689453 + }, + { + "epoch": 7.366943359375e-06, + "model_forward_time": 0.02443099021911621, + "step": 4828 + }, + { + "epoch": 7.366943359375e-06, + "step": 4828, + "training_step_time": 0.21246743202209473 + }, + { + "epoch": 7.36846923828125e-06, + "model_forward_time": 0.025068998336791992, + "step": 4829 + }, + { + "epoch": 7.36846923828125e-06, + "step": 4829, + "training_step_time": 0.1546189785003662 + }, + { + "epoch": 7.3699951171875e-06, + "grad_norm": 0.329088419675827, + "learning_rate": 9.66691383922964e-05, + "loss": 0.0792, + "step": 4830 + }, + { + "epoch": 7.3699951171875e-06, + "model_forward_time": 0.024265289306640625, + "step": 4830 + }, + { + "epoch": 7.3699951171875e-06, + "step": 4830, + "training_step_time": 0.1656179428100586 + }, + { + "epoch": 7.37152099609375e-06, + "model_forward_time": 0.024873971939086914, + "step": 4831 + }, + { + "epoch": 7.37152099609375e-06, + "step": 4831, + "training_step_time": 0.18037652969360352 + }, + { + "epoch": 7.373046875e-06, + "model_forward_time": 0.025984764099121094, + "step": 4832 + }, + { + "epoch": 7.373046875e-06, + "step": 4832, + "training_step_time": 0.14254474639892578 + }, + { + "epoch": 7.37457275390625e-06, + "model_forward_time": 0.028557300567626953, + "step": 4833 + }, + { + "epoch": 7.37457275390625e-06, + "step": 4833, + "training_step_time": 0.12417340278625488 + }, + { + "epoch": 7.3760986328125e-06, + "model_forward_time": 0.024771928787231445, + "step": 4834 + }, + { + "epoch": 7.3760986328125e-06, + "step": 4834, + "training_step_time": 0.11800670623779297 + }, + { + "epoch": 7.37762451171875e-06, + "model_forward_time": 0.025444984436035156, + "step": 4835 + }, + { + "epoch": 7.37762451171875e-06, + "step": 4835, + "training_step_time": 0.16401004791259766 + }, + { + "epoch": 7.379150390625e-06, + "model_forward_time": 0.024940013885498047, + "step": 4836 + }, + { + "epoch": 7.379150390625e-06, + "step": 4836, + "training_step_time": 0.1127314567565918 + }, + { + "epoch": 7.38067626953125e-06, + "model_forward_time": 0.024539709091186523, + "step": 4837 + }, + { + "epoch": 7.38067626953125e-06, + "step": 4837, + "training_step_time": 0.11633038520812988 + }, + { + "epoch": 7.3822021484375e-06, + "model_forward_time": 0.02541804313659668, + "step": 4838 + }, + { + "epoch": 7.3822021484375e-06, + "step": 4838, + "training_step_time": 0.11622262001037598 + }, + { + "epoch": 7.38372802734375e-06, + "model_forward_time": 0.025007963180541992, + "step": 4839 + }, + { + "epoch": 7.38372802734375e-06, + "step": 4839, + "training_step_time": 0.11131119728088379 + }, + { + "epoch": 7.38525390625e-06, + "grad_norm": 0.32527342438697815, + "learning_rate": 9.664932999961942e-05, + "loss": 0.0816, + "step": 4840 + }, + { + "epoch": 7.38525390625e-06, + "model_forward_time": 0.025327682495117188, + "step": 4840 + }, + { + "epoch": 7.38525390625e-06, + "step": 4840, + "training_step_time": 0.11251616477966309 + }, + { + "epoch": 7.38677978515625e-06, + "model_forward_time": 0.025928735733032227, + "step": 4841 + }, + { + "epoch": 7.38677978515625e-06, + "step": 4841, + "training_step_time": 0.10884833335876465 + }, + { + "epoch": 7.3883056640625e-06, + "model_forward_time": 0.02536630630493164, + "step": 4842 + }, + { + "epoch": 7.3883056640625e-06, + "step": 4842, + "training_step_time": 0.21310710906982422 + }, + { + "epoch": 7.38983154296875e-06, + "model_forward_time": 0.02477860450744629, + "step": 4843 + }, + { + "epoch": 7.38983154296875e-06, + "step": 4843, + "training_step_time": 0.11549973487854004 + }, + { + "epoch": 7.391357421875e-06, + "model_forward_time": 0.0248260498046875, + "step": 4844 + }, + { + "epoch": 7.391357421875e-06, + "step": 4844, + "training_step_time": 0.11368012428283691 + }, + { + "epoch": 7.39288330078125e-06, + "model_forward_time": 0.02546381950378418, + "step": 4845 + }, + { + "epoch": 7.39288330078125e-06, + "step": 4845, + "training_step_time": 0.17196416854858398 + }, + { + "epoch": 7.3944091796875e-06, + "model_forward_time": 0.025854110717773438, + "step": 4846 + }, + { + "epoch": 7.3944091796875e-06, + "step": 4846, + "training_step_time": 0.1599137783050537 + }, + { + "epoch": 7.39593505859375e-06, + "model_forward_time": 0.025415897369384766, + "step": 4847 + }, + { + "epoch": 7.39593505859375e-06, + "step": 4847, + "training_step_time": 0.10622811317443848 + }, + { + "epoch": 7.3974609375e-06, + "model_forward_time": 0.02519702911376953, + "step": 4848 + }, + { + "epoch": 7.3974609375e-06, + "step": 4848, + "training_step_time": 0.10799169540405273 + }, + { + "epoch": 7.39898681640625e-06, + "model_forward_time": 0.025924205780029297, + "step": 4849 + }, + { + "epoch": 7.39898681640625e-06, + "step": 4849, + "training_step_time": 0.11356973648071289 + }, + { + "epoch": 7.4005126953125e-06, + "grad_norm": 0.4368501901626587, + "learning_rate": 9.662946492360776e-05, + "loss": 0.0825, + "step": 4850 + }, + { + "epoch": 7.4005126953125e-06, + "model_forward_time": 0.025371551513671875, + "step": 4850 + }, + { + "epoch": 7.4005126953125e-06, + "step": 4850, + "training_step_time": 0.1088564395904541 + }, + { + "epoch": 7.40203857421875e-06, + "model_forward_time": 0.025156736373901367, + "step": 4851 + }, + { + "epoch": 7.40203857421875e-06, + "step": 4851, + "training_step_time": 0.10562515258789062 + }, + { + "epoch": 7.403564453125e-06, + "model_forward_time": 0.025761842727661133, + "step": 4852 + }, + { + "epoch": 7.403564453125e-06, + "step": 4852, + "training_step_time": 0.10777449607849121 + }, + { + "epoch": 7.40509033203125e-06, + "model_forward_time": 0.02581501007080078, + "step": 4853 + }, + { + "epoch": 7.40509033203125e-06, + "step": 4853, + "training_step_time": 0.10723352432250977 + }, + { + "epoch": 7.4066162109375e-06, + "model_forward_time": 0.02555227279663086, + "step": 4854 + }, + { + "epoch": 7.4066162109375e-06, + "step": 4854, + "training_step_time": 0.10753989219665527 + }, + { + "epoch": 7.40814208984375e-06, + "model_forward_time": 0.025534629821777344, + "step": 4855 + }, + { + "epoch": 7.40814208984375e-06, + "step": 4855, + "training_step_time": 0.10834622383117676 + }, + { + "epoch": 7.40966796875e-06, + "model_forward_time": 0.025435924530029297, + "step": 4856 + }, + { + "epoch": 7.40966796875e-06, + "step": 4856, + "training_step_time": 0.10753273963928223 + }, + { + "epoch": 7.41119384765625e-06, + "model_forward_time": 0.02552199363708496, + "step": 4857 + }, + { + "epoch": 7.41119384765625e-06, + "step": 4857, + "training_step_time": 0.10619354248046875 + }, + { + "epoch": 7.4127197265625e-06, + "model_forward_time": 0.025669574737548828, + "step": 4858 + }, + { + "epoch": 7.4127197265625e-06, + "step": 4858, + "training_step_time": 0.11019539833068848 + }, + { + "epoch": 7.41424560546875e-06, + "model_forward_time": 0.02552938461303711, + "step": 4859 + }, + { + "epoch": 7.41424560546875e-06, + "step": 4859, + "training_step_time": 0.10848450660705566 + }, + { + "epoch": 7.415771484375e-06, + "grad_norm": 0.32892781496047974, + "learning_rate": 9.660954318839933e-05, + "loss": 0.078, + "step": 4860 + }, + { + "epoch": 7.415771484375e-06, + "model_forward_time": 0.02518939971923828, + "step": 4860 + }, + { + "epoch": 7.415771484375e-06, + "step": 4860, + "training_step_time": 0.10766482353210449 + }, + { + "epoch": 7.41729736328125e-06, + "model_forward_time": 0.025138378143310547, + "step": 4861 + }, + { + "epoch": 7.41729736328125e-06, + "step": 4861, + "training_step_time": 0.10631537437438965 + }, + { + "epoch": 7.4188232421875e-06, + "model_forward_time": 0.025055408477783203, + "step": 4862 + }, + { + "epoch": 7.4188232421875e-06, + "step": 4862, + "training_step_time": 0.10762691497802734 + }, + { + "epoch": 7.42034912109375e-06, + "model_forward_time": 0.025050878524780273, + "step": 4863 + }, + { + "epoch": 7.42034912109375e-06, + "step": 4863, + "training_step_time": 0.1134636402130127 + }, + { + "epoch": 7.421875e-06, + "model_forward_time": 0.025411605834960938, + "step": 4864 + }, + { + "epoch": 7.421875e-06, + "step": 4864, + "training_step_time": 0.11272764205932617 + }, + { + "epoch": 7.42340087890625e-06, + "model_forward_time": 0.02487945556640625, + "step": 4865 + }, + { + "epoch": 7.42340087890625e-06, + "step": 4865, + "training_step_time": 0.10802841186523438 + }, + { + "epoch": 7.4249267578125e-06, + "model_forward_time": 0.025154829025268555, + "step": 4866 + }, + { + "epoch": 7.4249267578125e-06, + "step": 4866, + "training_step_time": 0.11296749114990234 + }, + { + "epoch": 7.42645263671875e-06, + "model_forward_time": 0.02513742446899414, + "step": 4867 + }, + { + "epoch": 7.42645263671875e-06, + "step": 4867, + "training_step_time": 0.133314847946167 + }, + { + "epoch": 7.427978515625e-06, + "model_forward_time": 0.02507185935974121, + "step": 4868 + }, + { + "epoch": 7.427978515625e-06, + "step": 4868, + "training_step_time": 0.12149262428283691 + }, + { + "epoch": 7.42950439453125e-06, + "model_forward_time": 0.025061845779418945, + "step": 4869 + }, + { + "epoch": 7.42950439453125e-06, + "step": 4869, + "training_step_time": 0.11511659622192383 + }, + { + "epoch": 7.4310302734375e-06, + "grad_norm": 0.5905614495277405, + "learning_rate": 9.658956481820094e-05, + "loss": 0.085, + "step": 4870 + }, + { + "epoch": 7.4310302734375e-06, + "model_forward_time": 0.025396108627319336, + "step": 4870 + }, + { + "epoch": 7.4310302734375e-06, + "step": 4870, + "training_step_time": 0.11768198013305664 + }, + { + "epoch": 7.43255615234375e-06, + "model_forward_time": 0.025496482849121094, + "step": 4871 + }, + { + "epoch": 7.43255615234375e-06, + "step": 4871, + "training_step_time": 0.18574738502502441 + }, + { + "epoch": 7.43408203125e-06, + "model_forward_time": 0.025285720825195312, + "step": 4872 + }, + { + "epoch": 7.43408203125e-06, + "step": 4872, + "training_step_time": 0.14091873168945312 + }, + { + "epoch": 7.43560791015625e-06, + "model_forward_time": 0.024801969528198242, + "step": 4873 + }, + { + "epoch": 7.43560791015625e-06, + "step": 4873, + "training_step_time": 0.11728286743164062 + }, + { + "epoch": 7.4371337890625e-06, + "model_forward_time": 0.025107145309448242, + "step": 4874 + }, + { + "epoch": 7.4371337890625e-06, + "step": 4874, + "training_step_time": 0.11075830459594727 + }, + { + "epoch": 7.43865966796875e-06, + "model_forward_time": 0.02520155906677246, + "step": 4875 + }, + { + "epoch": 7.43865966796875e-06, + "step": 4875, + "training_step_time": 0.11309528350830078 + }, + { + "epoch": 7.440185546875e-06, + "model_forward_time": 0.025040864944458008, + "step": 4876 + }, + { + "epoch": 7.440185546875e-06, + "step": 4876, + "training_step_time": 0.19763612747192383 + }, + { + "epoch": 7.44171142578125e-06, + "model_forward_time": 0.0266268253326416, + "step": 4877 + }, + { + "epoch": 7.44171142578125e-06, + "step": 4877, + "training_step_time": 0.15999817848205566 + }, + { + "epoch": 7.4432373046875e-06, + "model_forward_time": 0.024660348892211914, + "step": 4878 + }, + { + "epoch": 7.4432373046875e-06, + "step": 4878, + "training_step_time": 0.1298222541809082 + }, + { + "epoch": 7.44476318359375e-06, + "model_forward_time": 0.024340391159057617, + "step": 4879 + }, + { + "epoch": 7.44476318359375e-06, + "step": 4879, + "training_step_time": 0.1301717758178711 + }, + { + "epoch": 7.4462890625e-06, + "grad_norm": 0.3333793878555298, + "learning_rate": 9.65695298372882e-05, + "loss": 0.0809, + "step": 4880 + }, + { + "epoch": 7.4462890625e-06, + "model_forward_time": 0.024929046630859375, + "step": 4880 + }, + { + "epoch": 7.4462890625e-06, + "step": 4880, + "training_step_time": 0.11731147766113281 + }, + { + "epoch": 7.44781494140625e-06, + "model_forward_time": 0.025187969207763672, + "step": 4881 + }, + { + "epoch": 7.44781494140625e-06, + "step": 4881, + "training_step_time": 0.1163172721862793 + }, + { + "epoch": 7.4493408203125e-06, + "model_forward_time": 0.025133132934570312, + "step": 4882 + }, + { + "epoch": 7.4493408203125e-06, + "step": 4882, + "training_step_time": 0.11022019386291504 + }, + { + "epoch": 7.45086669921875e-06, + "model_forward_time": 0.026172876358032227, + "step": 4883 + }, + { + "epoch": 7.45086669921875e-06, + "step": 4883, + "training_step_time": 0.1091313362121582 + }, + { + "epoch": 7.452392578125e-06, + "model_forward_time": 0.025336503982543945, + "step": 4884 + }, + { + "epoch": 7.452392578125e-06, + "step": 4884, + "training_step_time": 0.10798287391662598 + }, + { + "epoch": 7.45391845703125e-06, + "model_forward_time": 0.025363445281982422, + "step": 4885 + }, + { + "epoch": 7.45391845703125e-06, + "step": 4885, + "training_step_time": 0.10889506340026855 + }, + { + "epoch": 7.4554443359375e-06, + "model_forward_time": 0.025667667388916016, + "step": 4886 + }, + { + "epoch": 7.4554443359375e-06, + "step": 4886, + "training_step_time": 0.10933065414428711 + }, + { + "epoch": 7.45697021484375e-06, + "model_forward_time": 0.025874614715576172, + "step": 4887 + }, + { + "epoch": 7.45697021484375e-06, + "step": 4887, + "training_step_time": 0.10619497299194336 + }, + { + "epoch": 7.45849609375e-06, + "model_forward_time": 0.0258944034576416, + "step": 4888 + }, + { + "epoch": 7.45849609375e-06, + "step": 4888, + "training_step_time": 0.11079168319702148 + }, + { + "epoch": 7.46002197265625e-06, + "model_forward_time": 0.025641679763793945, + "step": 4889 + }, + { + "epoch": 7.46002197265625e-06, + "step": 4889, + "training_step_time": 0.11134529113769531 + }, + { + "epoch": 7.4615478515625e-06, + "grad_norm": 0.5133267641067505, + "learning_rate": 9.654943827000548e-05, + "loss": 0.1024, + "step": 4890 + }, + { + "epoch": 7.4615478515625e-06, + "model_forward_time": 0.02568507194519043, + "step": 4890 + }, + { + "epoch": 7.4615478515625e-06, + "step": 4890, + "training_step_time": 0.10720133781433105 + }, + { + "epoch": 7.46307373046875e-06, + "model_forward_time": 0.02544999122619629, + "step": 4891 + }, + { + "epoch": 7.46307373046875e-06, + "step": 4891, + "training_step_time": 0.10991883277893066 + }, + { + "epoch": 7.464599609375e-06, + "model_forward_time": 0.025607824325561523, + "step": 4892 + }, + { + "epoch": 7.464599609375e-06, + "step": 4892, + "training_step_time": 0.10703444480895996 + }, + { + "epoch": 7.46612548828125e-06, + "model_forward_time": 0.025355100631713867, + "step": 4893 + }, + { + "epoch": 7.46612548828125e-06, + "step": 4893, + "training_step_time": 0.10680198669433594 + }, + { + "epoch": 7.4676513671875e-06, + "model_forward_time": 0.025754928588867188, + "step": 4894 + }, + { + "epoch": 7.4676513671875e-06, + "step": 4894, + "training_step_time": 0.12208080291748047 + }, + { + "epoch": 7.46917724609375e-06, + "model_forward_time": 0.025475502014160156, + "step": 4895 + }, + { + "epoch": 7.46917724609375e-06, + "step": 4895, + "training_step_time": 0.10849285125732422 + }, + { + "epoch": 7.470703125e-06, + "model_forward_time": 0.02537083625793457, + "step": 4896 + }, + { + "epoch": 7.470703125e-06, + "step": 4896, + "training_step_time": 0.11135053634643555 + }, + { + "epoch": 7.47222900390625e-06, + "model_forward_time": 0.02474355697631836, + "step": 4897 + }, + { + "epoch": 7.47222900390625e-06, + "step": 4897, + "training_step_time": 0.11593842506408691 + }, + { + "epoch": 7.4737548828125e-06, + "model_forward_time": 0.0256803035736084, + "step": 4898 + }, + { + "epoch": 7.4737548828125e-06, + "step": 4898, + "training_step_time": 0.13811230659484863 + }, + { + "epoch": 7.47528076171875e-06, + "model_forward_time": 0.024883031845092773, + "step": 4899 + }, + { + "epoch": 7.47528076171875e-06, + "step": 4899, + "training_step_time": 0.1265702247619629 + }, + { + "epoch": 7.476806640625e-06, + "grad_norm": 0.44653648138046265, + "learning_rate": 9.652929014076593e-05, + "loss": 0.0724, + "step": 4900 + }, + { + "epoch": 7.476806640625e-06, + "model_forward_time": 0.02468109130859375, + "step": 4900 + }, + { + "epoch": 7.476806640625e-06, + "step": 4900, + "training_step_time": 0.1215810775756836 + }, + { + "epoch": 7.47833251953125e-06, + "model_forward_time": 0.02508831024169922, + "step": 4901 + }, + { + "epoch": 7.47833251953125e-06, + "step": 4901, + "training_step_time": 0.1208188533782959 + }, + { + "epoch": 7.4798583984375e-06, + "model_forward_time": 0.02523493766784668, + "step": 4902 + }, + { + "epoch": 7.4798583984375e-06, + "step": 4902, + "training_step_time": 0.11416935920715332 + }, + { + "epoch": 7.48138427734375e-06, + "model_forward_time": 0.025150060653686523, + "step": 4903 + }, + { + "epoch": 7.48138427734375e-06, + "step": 4903, + "training_step_time": 0.11278820037841797 + }, + { + "epoch": 7.48291015625e-06, + "model_forward_time": 0.027357816696166992, + "step": 4904 + }, + { + "epoch": 7.48291015625e-06, + "step": 4904, + "training_step_time": 0.11539578437805176 + }, + { + "epoch": 7.48443603515625e-06, + "model_forward_time": 0.024977445602416992, + "step": 4905 + }, + { + "epoch": 7.48443603515625e-06, + "step": 4905, + "training_step_time": 0.1111152172088623 + }, + { + "epoch": 7.4859619140625e-06, + "model_forward_time": 0.025313854217529297, + "step": 4906 + }, + { + "epoch": 7.4859619140625e-06, + "step": 4906, + "training_step_time": 0.11122775077819824 + }, + { + "epoch": 7.48748779296875e-06, + "model_forward_time": 0.02551102638244629, + "step": 4907 + }, + { + "epoch": 7.48748779296875e-06, + "step": 4907, + "training_step_time": 0.10899519920349121 + }, + { + "epoch": 7.489013671875e-06, + "model_forward_time": 0.02572178840637207, + "step": 4908 + }, + { + "epoch": 7.489013671875e-06, + "step": 4908, + "training_step_time": 0.11268043518066406 + }, + { + "epoch": 7.49053955078125e-06, + "model_forward_time": 0.0253603458404541, + "step": 4909 + }, + { + "epoch": 7.49053955078125e-06, + "step": 4909, + "training_step_time": 0.11006927490234375 + }, + { + "epoch": 7.4920654296875e-06, + "grad_norm": 0.41987279057502747, + "learning_rate": 9.650908547405144e-05, + "loss": 0.0763, + "step": 4910 + }, + { + "epoch": 7.4920654296875e-06, + "model_forward_time": 0.025511980056762695, + "step": 4910 + }, + { + "epoch": 7.4920654296875e-06, + "step": 4910, + "training_step_time": 0.11098289489746094 + }, + { + "epoch": 7.49359130859375e-06, + "model_forward_time": 0.02529740333557129, + "step": 4911 + }, + { + "epoch": 7.49359130859375e-06, + "step": 4911, + "training_step_time": 0.10816621780395508 + }, + { + "epoch": 7.4951171875e-06, + "model_forward_time": 0.02529740333557129, + "step": 4912 + }, + { + "epoch": 7.4951171875e-06, + "step": 4912, + "training_step_time": 0.11208987236022949 + }, + { + "epoch": 7.49664306640625e-06, + "model_forward_time": 0.025233745574951172, + "step": 4913 + }, + { + "epoch": 7.49664306640625e-06, + "step": 4913, + "training_step_time": 0.10738110542297363 + }, + { + "epoch": 7.4981689453125e-06, + "model_forward_time": 0.02541375160217285, + "step": 4914 + }, + { + "epoch": 7.4981689453125e-06, + "step": 4914, + "training_step_time": 0.14761805534362793 + }, + { + "epoch": 7.49969482421875e-06, + "model_forward_time": 0.025133371353149414, + "step": 4915 + }, + { + "epoch": 7.49969482421875e-06, + "step": 4915, + "training_step_time": 0.10699057579040527 + }, + { + "epoch": 7.501220703125e-06, + "model_forward_time": 0.02520298957824707, + "step": 4916 + }, + { + "epoch": 7.501220703125e-06, + "step": 4916, + "training_step_time": 0.10644841194152832 + }, + { + "epoch": 7.50274658203125e-06, + "model_forward_time": 0.0252382755279541, + "step": 4917 + }, + { + "epoch": 7.50274658203125e-06, + "step": 4917, + "training_step_time": 0.1490633487701416 + }, + { + "epoch": 7.5042724609375e-06, + "model_forward_time": 0.024939775466918945, + "step": 4918 + }, + { + "epoch": 7.5042724609375e-06, + "step": 4918, + "training_step_time": 0.18489670753479004 + }, + { + "epoch": 7.50579833984375e-06, + "model_forward_time": 0.024770736694335938, + "step": 4919 + }, + { + "epoch": 7.50579833984375e-06, + "step": 4919, + "training_step_time": 0.18448424339294434 + }, + { + "epoch": 7.50732421875e-06, + "grad_norm": 0.23898987472057343, + "learning_rate": 9.648882429441257e-05, + "loss": 0.0837, + "step": 4920 + }, + { + "epoch": 7.50732421875e-06, + "model_forward_time": 0.024801254272460938, + "step": 4920 + }, + { + "epoch": 7.50732421875e-06, + "step": 4920, + "training_step_time": 0.11690568923950195 + }, + { + "epoch": 7.50885009765625e-06, + "model_forward_time": 0.02475285530090332, + "step": 4921 + }, + { + "epoch": 7.50885009765625e-06, + "step": 4921, + "training_step_time": 0.11158609390258789 + }, + { + "epoch": 7.5103759765625e-06, + "model_forward_time": 0.02538895606994629, + "step": 4922 + }, + { + "epoch": 7.5103759765625e-06, + "step": 4922, + "training_step_time": 0.11053919792175293 + }, + { + "epoch": 7.51190185546875e-06, + "model_forward_time": 0.025562047958374023, + "step": 4923 + }, + { + "epoch": 7.51190185546875e-06, + "step": 4923, + "training_step_time": 0.19273734092712402 + }, + { + "epoch": 7.513427734375e-06, + "model_forward_time": 0.024800777435302734, + "step": 4924 + }, + { + "epoch": 7.513427734375e-06, + "step": 4924, + "training_step_time": 0.13716959953308105 + }, + { + "epoch": 7.51495361328125e-06, + "model_forward_time": 0.02483987808227539, + "step": 4925 + }, + { + "epoch": 7.51495361328125e-06, + "step": 4925, + "training_step_time": 0.12864446640014648 + }, + { + "epoch": 7.5164794921875e-06, + "model_forward_time": 0.025580644607543945, + "step": 4926 + }, + { + "epoch": 7.5164794921875e-06, + "step": 4926, + "training_step_time": 0.1304795742034912 + }, + { + "epoch": 7.51800537109375e-06, + "model_forward_time": 0.0253751277923584, + "step": 4927 + }, + { + "epoch": 7.51800537109375e-06, + "step": 4927, + "training_step_time": 0.11570143699645996 + }, + { + "epoch": 7.51953125e-06, + "model_forward_time": 0.025242090225219727, + "step": 4928 + }, + { + "epoch": 7.51953125e-06, + "step": 4928, + "training_step_time": 0.11199760437011719 + }, + { + "epoch": 7.52105712890625e-06, + "model_forward_time": 0.02543330192565918, + "step": 4929 + }, + { + "epoch": 7.52105712890625e-06, + "step": 4929, + "training_step_time": 0.1078336238861084 + }, + { + "epoch": 7.5225830078125e-06, + "grad_norm": 0.28251850605010986, + "learning_rate": 9.646850662646859e-05, + "loss": 0.0887, + "step": 4930 + }, + { + "epoch": 7.5225830078125e-06, + "model_forward_time": 0.02592945098876953, + "step": 4930 + }, + { + "epoch": 7.5225830078125e-06, + "step": 4930, + "training_step_time": 0.11133694648742676 + }, + { + "epoch": 7.52410888671875e-06, + "model_forward_time": 0.026216506958007812, + "step": 4931 + }, + { + "epoch": 7.52410888671875e-06, + "step": 4931, + "training_step_time": 0.11084461212158203 + }, + { + "epoch": 7.525634765625e-06, + "model_forward_time": 0.02541375160217285, + "step": 4932 + }, + { + "epoch": 7.525634765625e-06, + "step": 4932, + "training_step_time": 0.11019587516784668 + }, + { + "epoch": 7.52716064453125e-06, + "model_forward_time": 0.025500774383544922, + "step": 4933 + }, + { + "epoch": 7.52716064453125e-06, + "step": 4933, + "training_step_time": 0.11139369010925293 + }, + { + "epoch": 7.5286865234375e-06, + "model_forward_time": 0.02955341339111328, + "step": 4934 + }, + { + "epoch": 7.5286865234375e-06, + "step": 4934, + "training_step_time": 0.11224579811096191 + }, + { + "epoch": 7.53021240234375e-06, + "model_forward_time": 0.02563309669494629, + "step": 4935 + }, + { + "epoch": 7.53021240234375e-06, + "step": 4935, + "training_step_time": 0.2149190902709961 + }, + { + "epoch": 7.53173828125e-06, + "model_forward_time": 0.02507948875427246, + "step": 4936 + }, + { + "epoch": 7.53173828125e-06, + "step": 4936, + "training_step_time": 0.11234664916992188 + }, + { + "epoch": 7.53326416015625e-06, + "model_forward_time": 0.02517414093017578, + "step": 4937 + }, + { + "epoch": 7.53326416015625e-06, + "step": 4937, + "training_step_time": 0.10972094535827637 + }, + { + "epoch": 7.5347900390625e-06, + "model_forward_time": 0.025571346282958984, + "step": 4938 + }, + { + "epoch": 7.5347900390625e-06, + "step": 4938, + "training_step_time": 0.10860347747802734 + }, + { + "epoch": 7.53631591796875e-06, + "model_forward_time": 0.025551319122314453, + "step": 4939 + }, + { + "epoch": 7.53631591796875e-06, + "step": 4939, + "training_step_time": 0.10878515243530273 + }, + { + "epoch": 7.537841796875e-06, + "grad_norm": 0.39003708958625793, + "learning_rate": 9.644813249490735e-05, + "loss": 0.0709, + "step": 4940 + }, + { + "epoch": 7.537841796875e-06, + "model_forward_time": 0.025496244430541992, + "step": 4940 + }, + { + "epoch": 7.537841796875e-06, + "step": 4940, + "training_step_time": 0.1107637882232666 + }, + { + "epoch": 7.53936767578125e-06, + "model_forward_time": 0.02537083625793457, + "step": 4941 + }, + { + "epoch": 7.53936767578125e-06, + "step": 4941, + "training_step_time": 0.11152291297912598 + }, + { + "epoch": 7.5408935546875e-06, + "model_forward_time": 0.02521800994873047, + "step": 4942 + }, + { + "epoch": 7.5408935546875e-06, + "step": 4942, + "training_step_time": 0.10835933685302734 + }, + { + "epoch": 7.54241943359375e-06, + "model_forward_time": 0.02482748031616211, + "step": 4943 + }, + { + "epoch": 7.54241943359375e-06, + "step": 4943, + "training_step_time": 0.10524463653564453 + }, + { + "epoch": 7.5439453125e-06, + "model_forward_time": 0.025475263595581055, + "step": 4944 + }, + { + "epoch": 7.5439453125e-06, + "step": 4944, + "training_step_time": 0.1106879711151123 + }, + { + "epoch": 7.54547119140625e-06, + "model_forward_time": 0.025432348251342773, + "step": 4945 + }, + { + "epoch": 7.54547119140625e-06, + "step": 4945, + "training_step_time": 0.10875391960144043 + }, + { + "epoch": 7.5469970703125e-06, + "model_forward_time": 0.025447607040405273, + "step": 4946 + }, + { + "epoch": 7.5469970703125e-06, + "step": 4946, + "training_step_time": 0.10958719253540039 + }, + { + "epoch": 7.54852294921875e-06, + "model_forward_time": 0.025499582290649414, + "step": 4947 + }, + { + "epoch": 7.54852294921875e-06, + "step": 4947, + "training_step_time": 0.11153674125671387 + }, + { + "epoch": 7.550048828125e-06, + "model_forward_time": 0.02516341209411621, + "step": 4948 + }, + { + "epoch": 7.550048828125e-06, + "step": 4948, + "training_step_time": 0.10888934135437012 + }, + { + "epoch": 7.55157470703125e-06, + "model_forward_time": 0.025346755981445312, + "step": 4949 + }, + { + "epoch": 7.55157470703125e-06, + "step": 4949, + "training_step_time": 0.11162805557250977 + }, + { + "epoch": 7.5531005859375e-06, + "grad_norm": 0.47882208228111267, + "learning_rate": 9.642770192448536e-05, + "loss": 0.0885, + "step": 4950 + }, + { + "epoch": 7.5531005859375e-06, + "model_forward_time": 0.025487184524536133, + "step": 4950 + }, + { + "epoch": 7.5531005859375e-06, + "step": 4950, + "training_step_time": 0.11057901382446289 + }, + { + "epoch": 7.55462646484375e-06, + "model_forward_time": 0.02506732940673828, + "step": 4951 + }, + { + "epoch": 7.55462646484375e-06, + "step": 4951, + "training_step_time": 0.10875964164733887 + }, + { + "epoch": 7.55615234375e-06, + "model_forward_time": 0.024926424026489258, + "step": 4952 + }, + { + "epoch": 7.55615234375e-06, + "step": 4952, + "training_step_time": 0.10720324516296387 + }, + { + "epoch": 7.55767822265625e-06, + "model_forward_time": 0.025419950485229492, + "step": 4953 + }, + { + "epoch": 7.55767822265625e-06, + "step": 4953, + "training_step_time": 0.10667800903320312 + }, + { + "epoch": 7.5592041015625e-06, + "model_forward_time": 0.02504587173461914, + "step": 4954 + }, + { + "epoch": 7.5592041015625e-06, + "step": 4954, + "training_step_time": 0.10863447189331055 + }, + { + "epoch": 7.56072998046875e-06, + "model_forward_time": 0.025823354721069336, + "step": 4955 + }, + { + "epoch": 7.56072998046875e-06, + "step": 4955, + "training_step_time": 0.10754513740539551 + }, + { + "epoch": 7.562255859375e-06, + "model_forward_time": 0.025046110153198242, + "step": 4956 + }, + { + "epoch": 7.562255859375e-06, + "step": 4956, + "training_step_time": 0.14478564262390137 + }, + { + "epoch": 7.56378173828125e-06, + "model_forward_time": 0.02461862564086914, + "step": 4957 + }, + { + "epoch": 7.56378173828125e-06, + "step": 4957, + "training_step_time": 0.16135287284851074 + }, + { + "epoch": 7.5653076171875e-06, + "model_forward_time": 0.024411439895629883, + "step": 4958 + }, + { + "epoch": 7.5653076171875e-06, + "step": 4958, + "training_step_time": 0.15050053596496582 + }, + { + "epoch": 7.56683349609375e-06, + "model_forward_time": 0.02342987060546875, + "step": 4959 + }, + { + "epoch": 7.56683349609375e-06, + "step": 4959, + "training_step_time": 0.15452027320861816 + }, + { + "epoch": 7.568359375e-06, + "grad_norm": 0.31477200984954834, + "learning_rate": 9.640721494002769e-05, + "loss": 0.0724, + "step": 4960 + }, + { + "epoch": 7.568359375e-06, + "model_forward_time": 0.02475118637084961, + "step": 4960 + }, + { + "epoch": 7.568359375e-06, + "step": 4960, + "training_step_time": 0.16670513153076172 + }, + { + "epoch": 7.56988525390625e-06, + "model_forward_time": 0.02426624298095703, + "step": 4961 + }, + { + "epoch": 7.56988525390625e-06, + "step": 4961, + "training_step_time": 0.14691925048828125 + }, + { + "epoch": 7.5714111328125e-06, + "model_forward_time": 0.02520608901977539, + "step": 4962 + }, + { + "epoch": 7.5714111328125e-06, + "step": 4962, + "training_step_time": 0.17913126945495605 + }, + { + "epoch": 7.57293701171875e-06, + "model_forward_time": 0.02399444580078125, + "step": 4963 + }, + { + "epoch": 7.57293701171875e-06, + "step": 4963, + "training_step_time": 0.11542296409606934 + }, + { + "epoch": 7.574462890625e-06, + "model_forward_time": 0.023713350296020508, + "step": 4964 + }, + { + "epoch": 7.574462890625e-06, + "step": 4964, + "training_step_time": 0.11583471298217773 + }, + { + "epoch": 7.57598876953125e-06, + "model_forward_time": 0.024901151657104492, + "step": 4965 + }, + { + "epoch": 7.57598876953125e-06, + "step": 4965, + "training_step_time": 0.11467123031616211 + }, + { + "epoch": 7.5775146484375e-06, + "model_forward_time": 0.02547764778137207, + "step": 4966 + }, + { + "epoch": 7.5775146484375e-06, + "step": 4966, + "training_step_time": 0.11422514915466309 + }, + { + "epoch": 7.57904052734375e-06, + "model_forward_time": 0.025173187255859375, + "step": 4967 + }, + { + "epoch": 7.57904052734375e-06, + "step": 4967, + "training_step_time": 0.12285208702087402 + }, + { + "epoch": 7.58056640625e-06, + "model_forward_time": 0.025578737258911133, + "step": 4968 + }, + { + "epoch": 7.58056640625e-06, + "step": 4968, + "training_step_time": 0.19126558303833008 + }, + { + "epoch": 7.58209228515625e-06, + "model_forward_time": 0.023888111114501953, + "step": 4969 + }, + { + "epoch": 7.58209228515625e-06, + "step": 4969, + "training_step_time": 0.15446972846984863 + }, + { + "epoch": 7.5836181640625e-06, + "grad_norm": 0.3941463530063629, + "learning_rate": 9.638667156642794e-05, + "loss": 0.0949, + "step": 4970 + }, + { + "epoch": 7.5836181640625e-06, + "model_forward_time": 0.02462029457092285, + "step": 4970 + }, + { + "epoch": 7.5836181640625e-06, + "step": 4970, + "training_step_time": 0.12002444267272949 + }, + { + "epoch": 7.58514404296875e-06, + "model_forward_time": 0.024794816970825195, + "step": 4971 + }, + { + "epoch": 7.58514404296875e-06, + "step": 4971, + "training_step_time": 0.11831355094909668 + }, + { + "epoch": 7.586669921875e-06, + "model_forward_time": 0.025547266006469727, + "step": 4972 + }, + { + "epoch": 7.586669921875e-06, + "step": 4972, + "training_step_time": 0.11830830574035645 + }, + { + "epoch": 7.58819580078125e-06, + "model_forward_time": 0.025634765625, + "step": 4973 + }, + { + "epoch": 7.58819580078125e-06, + "step": 4973, + "training_step_time": 0.11304569244384766 + }, + { + "epoch": 7.5897216796875e-06, + "model_forward_time": 0.02518630027770996, + "step": 4974 + }, + { + "epoch": 7.5897216796875e-06, + "step": 4974, + "training_step_time": 0.11473965644836426 + }, + { + "epoch": 7.59124755859375e-06, + "model_forward_time": 0.025035858154296875, + "step": 4975 + }, + { + "epoch": 7.59124755859375e-06, + "step": 4975, + "training_step_time": 0.11049509048461914 + }, + { + "epoch": 7.5927734375e-06, + "model_forward_time": 0.025081157684326172, + "step": 4976 + }, + { + "epoch": 7.5927734375e-06, + "step": 4976, + "training_step_time": 0.11325383186340332 + }, + { + "epoch": 7.59429931640625e-06, + "model_forward_time": 0.02512669563293457, + "step": 4977 + }, + { + "epoch": 7.59429931640625e-06, + "step": 4977, + "training_step_time": 0.10912799835205078 + }, + { + "epoch": 7.5958251953125e-06, + "model_forward_time": 0.025065183639526367, + "step": 4978 + }, + { + "epoch": 7.5958251953125e-06, + "step": 4978, + "training_step_time": 0.10924196243286133 + }, + { + "epoch": 7.59735107421875e-06, + "model_forward_time": 0.025452613830566406, + "step": 4979 + }, + { + "epoch": 7.59735107421875e-06, + "step": 4979, + "training_step_time": 0.1107790470123291 + }, + { + "epoch": 7.598876953125e-06, + "grad_norm": 0.42142805457115173, + "learning_rate": 9.636607182864827e-05, + "loss": 0.0862, + "step": 4980 + }, + { + "epoch": 7.598876953125e-06, + "model_forward_time": 0.025710344314575195, + "step": 4980 + }, + { + "epoch": 7.598876953125e-06, + "step": 4980, + "training_step_time": 0.10665369033813477 + }, + { + "epoch": 7.60040283203125e-06, + "model_forward_time": 0.02704477310180664, + "step": 4981 + }, + { + "epoch": 7.60040283203125e-06, + "step": 4981, + "training_step_time": 0.10802435874938965 + }, + { + "epoch": 7.6019287109375e-06, + "model_forward_time": 0.025354385375976562, + "step": 4982 + }, + { + "epoch": 7.6019287109375e-06, + "step": 4982, + "training_step_time": 0.11356186866760254 + }, + { + "epoch": 7.60345458984375e-06, + "model_forward_time": 0.025492429733276367, + "step": 4983 + }, + { + "epoch": 7.60345458984375e-06, + "step": 4983, + "training_step_time": 0.11075949668884277 + }, + { + "epoch": 7.60498046875e-06, + "model_forward_time": 0.025390625, + "step": 4984 + }, + { + "epoch": 7.60498046875e-06, + "step": 4984, + "training_step_time": 0.10998678207397461 + }, + { + "epoch": 7.60650634765625e-06, + "model_forward_time": 0.027068376541137695, + "step": 4985 + }, + { + "epoch": 7.60650634765625e-06, + "step": 4985, + "training_step_time": 0.1377875804901123 + }, + { + "epoch": 7.6080322265625e-06, + "model_forward_time": 0.02533698081970215, + "step": 4986 + }, + { + "epoch": 7.6080322265625e-06, + "step": 4986, + "training_step_time": 0.2097783088684082 + }, + { + "epoch": 7.60955810546875e-06, + "model_forward_time": 0.0262300968170166, + "step": 4987 + }, + { + "epoch": 7.60955810546875e-06, + "step": 4987, + "training_step_time": 0.1419978141784668 + }, + { + "epoch": 7.611083984375e-06, + "model_forward_time": 0.02506232261657715, + "step": 4988 + }, + { + "epoch": 7.611083984375e-06, + "step": 4988, + "training_step_time": 0.13737893104553223 + }, + { + "epoch": 7.61260986328125e-06, + "model_forward_time": 0.024457693099975586, + "step": 4989 + }, + { + "epoch": 7.61260986328125e-06, + "step": 4989, + "training_step_time": 0.12887787818908691 + }, + { + "epoch": 7.6141357421875e-06, + "grad_norm": 0.49523815512657166, + "learning_rate": 9.634541575171929e-05, + "loss": 0.0783, + "step": 4990 + }, + { + "epoch": 7.6141357421875e-06, + "model_forward_time": 0.02469038963317871, + "step": 4990 + }, + { + "epoch": 7.6141357421875e-06, + "step": 4990, + "training_step_time": 0.1218104362487793 + }, + { + "epoch": 7.61566162109375e-06, + "model_forward_time": 0.02471137046813965, + "step": 4991 + }, + { + "epoch": 7.61566162109375e-06, + "step": 4991, + "training_step_time": 0.11813116073608398 + }, + { + "epoch": 7.6171875e-06, + "model_forward_time": 0.02520465850830078, + "step": 4992 + }, + { + "epoch": 7.6171875e-06, + "step": 4992, + "training_step_time": 0.12045454978942871 + }, + { + "epoch": 7.61871337890625e-06, + "model_forward_time": 0.02532052993774414, + "step": 4993 + }, + { + "epoch": 7.61871337890625e-06, + "step": 4993, + "training_step_time": 0.11654019355773926 + }, + { + "epoch": 7.6202392578125e-06, + "model_forward_time": 0.025274038314819336, + "step": 4994 + }, + { + "epoch": 7.6202392578125e-06, + "step": 4994, + "training_step_time": 0.1146087646484375 + }, + { + "epoch": 7.62176513671875e-06, + "model_forward_time": 0.025234460830688477, + "step": 4995 + }, + { + "epoch": 7.62176513671875e-06, + "step": 4995, + "training_step_time": 0.11065053939819336 + }, + { + "epoch": 7.623291015625e-06, + "model_forward_time": 0.025374174118041992, + "step": 4996 + }, + { + "epoch": 7.623291015625e-06, + "step": 4996, + "training_step_time": 0.10798358917236328 + }, + { + "epoch": 7.62481689453125e-06, + "model_forward_time": 0.026602745056152344, + "step": 4997 + }, + { + "epoch": 7.62481689453125e-06, + "step": 4997, + "training_step_time": 0.11116313934326172 + }, + { + "epoch": 7.6263427734375e-06, + "model_forward_time": 0.025016069412231445, + "step": 4998 + }, + { + "epoch": 7.6263427734375e-06, + "step": 4998, + "training_step_time": 0.10956907272338867 + }, + { + "epoch": 7.62786865234375e-06, + "model_forward_time": 0.025268077850341797, + "step": 4999 + }, + { + "epoch": 7.62786865234375e-06, + "step": 4999, + "training_step_time": 0.10773944854736328 + }, + { + "epoch": 7.62939453125e-06, + "grad_norm": 0.5361136794090271, + "learning_rate": 9.632470336074009e-05, + "loss": 0.0978, + "step": 5000 + }, + { + "epoch": 7.62939453125e-06, + "model_forward_time": 0.026927947998046875, + "step": 5000 + }, + { + "epoch": 7.62939453125e-06, + "step": 5000, + "training_step_time": 0.10859799385070801 + }, + { + "epoch": 7.63092041015625e-06, + "model_forward_time": 0.023974895477294922, + "step": 5001 + }, + { + "epoch": 7.63092041015625e-06, + "step": 5001, + "training_step_time": 0.10182738304138184 + }, + { + "epoch": 7.6324462890625e-06, + "model_forward_time": 0.02519059181213379, + "step": 5002 + }, + { + "epoch": 7.6324462890625e-06, + "step": 5002, + "training_step_time": 0.10440278053283691 + }, + { + "epoch": 7.63397216796875e-06, + "model_forward_time": 0.024693965911865234, + "step": 5003 + }, + { + "epoch": 7.63397216796875e-06, + "step": 5003, + "training_step_time": 0.11014342308044434 + }, + { + "epoch": 7.635498046875e-06, + "model_forward_time": 0.025823354721069336, + "step": 5004 + }, + { + "epoch": 7.635498046875e-06, + "step": 5004, + "training_step_time": 0.10577702522277832 + }, + { + "epoch": 7.63702392578125e-06, + "model_forward_time": 0.025117158889770508, + "step": 5005 + }, + { + "epoch": 7.63702392578125e-06, + "step": 5005, + "training_step_time": 0.10631299018859863 + }, + { + "epoch": 7.6385498046875e-06, + "model_forward_time": 0.02579355239868164, + "step": 5006 + }, + { + "epoch": 7.6385498046875e-06, + "step": 5006, + "training_step_time": 0.10712122917175293 + }, + { + "epoch": 7.64007568359375e-06, + "model_forward_time": 0.02536940574645996, + "step": 5007 + }, + { + "epoch": 7.64007568359375e-06, + "step": 5007, + "training_step_time": 0.10812020301818848 + }, + { + "epoch": 7.6416015625e-06, + "model_forward_time": 0.028054475784301758, + "step": 5008 + }, + { + "epoch": 7.6416015625e-06, + "step": 5008, + "training_step_time": 0.11114716529846191 + }, + { + "epoch": 7.64312744140625e-06, + "model_forward_time": 0.02658390998840332, + "step": 5009 + }, + { + "epoch": 7.64312744140625e-06, + "step": 5009, + "training_step_time": 0.10692739486694336 + }, + { + "epoch": 7.6446533203125e-06, + "grad_norm": 0.3657020926475525, + "learning_rate": 9.630393468087818e-05, + "loss": 0.0795, + "step": 5010 + }, + { + "epoch": 7.6446533203125e-06, + "model_forward_time": 0.025216341018676758, + "step": 5010 + }, + { + "epoch": 7.6446533203125e-06, + "step": 5010, + "training_step_time": 0.10995364189147949 + }, + { + "epoch": 7.64617919921875e-06, + "model_forward_time": 0.025510549545288086, + "step": 5011 + }, + { + "epoch": 7.64617919921875e-06, + "step": 5011, + "training_step_time": 0.11351227760314941 + }, + { + "epoch": 7.647705078125e-06, + "model_forward_time": 0.0254669189453125, + "step": 5012 + }, + { + "epoch": 7.647705078125e-06, + "step": 5012, + "training_step_time": 0.14704322814941406 + }, + { + "epoch": 7.64923095703125e-06, + "model_forward_time": 0.025402545928955078, + "step": 5013 + }, + { + "epoch": 7.64923095703125e-06, + "step": 5013, + "training_step_time": 0.12297320365905762 + }, + { + "epoch": 7.6507568359375e-06, + "model_forward_time": 0.025186777114868164, + "step": 5014 + }, + { + "epoch": 7.6507568359375e-06, + "step": 5014, + "training_step_time": 0.12395977973937988 + }, + { + "epoch": 7.65228271484375e-06, + "model_forward_time": 0.025627613067626953, + "step": 5015 + }, + { + "epoch": 7.65228271484375e-06, + "step": 5015, + "training_step_time": 0.14246249198913574 + }, + { + "epoch": 7.65380859375e-06, + "model_forward_time": 0.02500748634338379, + "step": 5016 + }, + { + "epoch": 7.65380859375e-06, + "step": 5016, + "training_step_time": 0.11869072914123535 + }, + { + "epoch": 7.65533447265625e-06, + "model_forward_time": 0.02583789825439453, + "step": 5017 + }, + { + "epoch": 7.65533447265625e-06, + "step": 5017, + "training_step_time": 0.12934136390686035 + }, + { + "epoch": 7.6568603515625e-06, + "model_forward_time": 0.024960756301879883, + "step": 5018 + }, + { + "epoch": 7.6568603515625e-06, + "step": 5018, + "training_step_time": 0.18588614463806152 + }, + { + "epoch": 7.65838623046875e-06, + "model_forward_time": 0.025450468063354492, + "step": 5019 + }, + { + "epoch": 7.65838623046875e-06, + "step": 5019, + "training_step_time": 0.12991809844970703 + }, + { + "epoch": 7.659912109375e-06, + "grad_norm": 0.38604509830474854, + "learning_rate": 9.628310973736943e-05, + "loss": 0.0876, + "step": 5020 + }, + { + "epoch": 7.659912109375e-06, + "model_forward_time": 0.023944616317749023, + "step": 5020 + }, + { + "epoch": 7.659912109375e-06, + "step": 5020, + "training_step_time": 0.1101841926574707 + }, + { + "epoch": 7.66143798828125e-06, + "model_forward_time": 0.025321483612060547, + "step": 5021 + }, + { + "epoch": 7.66143798828125e-06, + "step": 5021, + "training_step_time": 0.11579370498657227 + }, + { + "epoch": 7.6629638671875e-06, + "model_forward_time": 0.025134801864624023, + "step": 5022 + }, + { + "epoch": 7.6629638671875e-06, + "step": 5022, + "training_step_time": 0.11626219749450684 + }, + { + "epoch": 7.66448974609375e-06, + "model_forward_time": 0.024925708770751953, + "step": 5023 + }, + { + "epoch": 7.66448974609375e-06, + "step": 5023, + "training_step_time": 0.1073770523071289 + }, + { + "epoch": 7.666015625e-06, + "model_forward_time": 0.025336027145385742, + "step": 5024 + }, + { + "epoch": 7.666015625e-06, + "step": 5024, + "training_step_time": 0.19783616065979004 + }, + { + "epoch": 7.66754150390625e-06, + "model_forward_time": 0.02418208122253418, + "step": 5025 + }, + { + "epoch": 7.66754150390625e-06, + "step": 5025, + "training_step_time": 0.1741950511932373 + }, + { + "epoch": 7.6690673828125e-06, + "model_forward_time": 0.02390313148498535, + "step": 5026 + }, + { + "epoch": 7.6690673828125e-06, + "step": 5026, + "training_step_time": 0.1154928207397461 + }, + { + "epoch": 7.67059326171875e-06, + "model_forward_time": 0.02483677864074707, + "step": 5027 + }, + { + "epoch": 7.67059326171875e-06, + "step": 5027, + "training_step_time": 0.11839485168457031 + }, + { + "epoch": 7.672119140625e-06, + "model_forward_time": 0.024872779846191406, + "step": 5028 + }, + { + "epoch": 7.672119140625e-06, + "step": 5028, + "training_step_time": 0.14141178131103516 + }, + { + "epoch": 7.67364501953125e-06, + "model_forward_time": 0.02516937255859375, + "step": 5029 + }, + { + "epoch": 7.67364501953125e-06, + "step": 5029, + "training_step_time": 0.13424110412597656 + }, + { + "epoch": 7.6751708984375e-06, + "grad_norm": 0.6332946419715881, + "learning_rate": 9.626222855551816e-05, + "loss": 0.0744, + "step": 5030 + }, + { + "epoch": 7.6751708984375e-06, + "model_forward_time": 0.024690628051757812, + "step": 5030 + }, + { + "epoch": 7.6751708984375e-06, + "step": 5030, + "training_step_time": 0.1775212287902832 + }, + { + "epoch": 7.67669677734375e-06, + "model_forward_time": 0.02417445182800293, + "step": 5031 + }, + { + "epoch": 7.67669677734375e-06, + "step": 5031, + "training_step_time": 0.1160440444946289 + }, + { + "epoch": 7.67822265625e-06, + "model_forward_time": 0.023741960525512695, + "step": 5032 + }, + { + "epoch": 7.67822265625e-06, + "step": 5032, + "training_step_time": 0.11839485168457031 + }, + { + "epoch": 7.67974853515625e-06, + "model_forward_time": 0.024202823638916016, + "step": 5033 + }, + { + "epoch": 7.67974853515625e-06, + "step": 5033, + "training_step_time": 0.11390423774719238 + }, + { + "epoch": 7.6812744140625e-06, + "model_forward_time": 0.025263309478759766, + "step": 5034 + }, + { + "epoch": 7.6812744140625e-06, + "step": 5034, + "training_step_time": 0.11560988426208496 + }, + { + "epoch": 7.68280029296875e-06, + "model_forward_time": 0.025209426879882812, + "step": 5035 + }, + { + "epoch": 7.68280029296875e-06, + "step": 5035, + "training_step_time": 0.10874438285827637 + }, + { + "epoch": 7.684326171875e-06, + "model_forward_time": 0.02582263946533203, + "step": 5036 + }, + { + "epoch": 7.684326171875e-06, + "step": 5036, + "training_step_time": 0.2162775993347168 + }, + { + "epoch": 7.68585205078125e-06, + "model_forward_time": 0.025000572204589844, + "step": 5037 + }, + { + "epoch": 7.68585205078125e-06, + "step": 5037, + "training_step_time": 0.10718274116516113 + }, + { + "epoch": 7.6873779296875e-06, + "model_forward_time": 0.024698495864868164, + "step": 5038 + }, + { + "epoch": 7.6873779296875e-06, + "step": 5038, + "training_step_time": 0.10795450210571289 + }, + { + "epoch": 7.68890380859375e-06, + "model_forward_time": 0.02606987953186035, + "step": 5039 + }, + { + "epoch": 7.68890380859375e-06, + "step": 5039, + "training_step_time": 0.10921597480773926 + }, + { + "epoch": 7.6904296875e-06, + "grad_norm": 0.3652441203594208, + "learning_rate": 9.624129116069694e-05, + "loss": 0.0681, + "step": 5040 + }, + { + "epoch": 7.6904296875e-06, + "model_forward_time": 0.025727510452270508, + "step": 5040 + }, + { + "epoch": 7.6904296875e-06, + "step": 5040, + "training_step_time": 0.11357378959655762 + }, + { + "epoch": 7.69195556640625e-06, + "model_forward_time": 0.02573108673095703, + "step": 5041 + }, + { + "epoch": 7.69195556640625e-06, + "step": 5041, + "training_step_time": 0.11369466781616211 + }, + { + "epoch": 7.6934814453125e-06, + "model_forward_time": 0.024309873580932617, + "step": 5042 + }, + { + "epoch": 7.6934814453125e-06, + "step": 5042, + "training_step_time": 0.10545206069946289 + }, + { + "epoch": 7.69500732421875e-06, + "model_forward_time": 0.02532052993774414, + "step": 5043 + }, + { + "epoch": 7.69500732421875e-06, + "step": 5043, + "training_step_time": 0.10808062553405762 + }, + { + "epoch": 7.696533203125e-06, + "model_forward_time": 0.025396347045898438, + "step": 5044 + }, + { + "epoch": 7.696533203125e-06, + "step": 5044, + "training_step_time": 0.10766434669494629 + }, + { + "epoch": 7.69805908203125e-06, + "model_forward_time": 0.025324106216430664, + "step": 5045 + }, + { + "epoch": 7.69805908203125e-06, + "step": 5045, + "training_step_time": 0.10772562026977539 + }, + { + "epoch": 7.6995849609375e-06, + "model_forward_time": 0.025367259979248047, + "step": 5046 + }, + { + "epoch": 7.6995849609375e-06, + "step": 5046, + "training_step_time": 0.10894322395324707 + }, + { + "epoch": 7.70111083984375e-06, + "model_forward_time": 0.025348663330078125, + "step": 5047 + }, + { + "epoch": 7.70111083984375e-06, + "step": 5047, + "training_step_time": 0.10654807090759277 + }, + { + "epoch": 7.70263671875e-06, + "model_forward_time": 0.025162220001220703, + "step": 5048 + }, + { + "epoch": 7.70263671875e-06, + "step": 5048, + "training_step_time": 0.11753630638122559 + }, + { + "epoch": 7.70416259765625e-06, + "model_forward_time": 0.02490830421447754, + "step": 5049 + }, + { + "epoch": 7.70416259765625e-06, + "step": 5049, + "training_step_time": 0.10760307312011719 + }, + { + "epoch": 7.7056884765625e-06, + "grad_norm": 0.3991225063800812, + "learning_rate": 9.62202975783467e-05, + "loss": 0.0871, + "step": 5050 + }, + { + "epoch": 7.7056884765625e-06, + "model_forward_time": 0.024593353271484375, + "step": 5050 + }, + { + "epoch": 7.7056884765625e-06, + "step": 5050, + "training_step_time": 0.10759758949279785 + }, + { + "epoch": 7.70721435546875e-06, + "model_forward_time": 0.025439739227294922, + "step": 5051 + }, + { + "epoch": 7.70721435546875e-06, + "step": 5051, + "training_step_time": 0.10791921615600586 + }, + { + "epoch": 7.708740234375e-06, + "model_forward_time": 0.025187253952026367, + "step": 5052 + }, + { + "epoch": 7.708740234375e-06, + "step": 5052, + "training_step_time": 0.10755205154418945 + }, + { + "epoch": 7.71026611328125e-06, + "model_forward_time": 0.025600194931030273, + "step": 5053 + }, + { + "epoch": 7.71026611328125e-06, + "step": 5053, + "training_step_time": 0.11006999015808105 + }, + { + "epoch": 7.7117919921875e-06, + "model_forward_time": 0.024882078170776367, + "step": 5054 + }, + { + "epoch": 7.7117919921875e-06, + "step": 5054, + "training_step_time": 0.10697746276855469 + }, + { + "epoch": 7.71331787109375e-06, + "model_forward_time": 0.025122880935668945, + "step": 5055 + }, + { + "epoch": 7.71331787109375e-06, + "step": 5055, + "training_step_time": 0.10991668701171875 + }, + { + "epoch": 7.71484375e-06, + "model_forward_time": 0.024379968643188477, + "step": 5056 + }, + { + "epoch": 7.71484375e-06, + "step": 5056, + "training_step_time": 0.11037611961364746 + }, + { + "epoch": 7.71636962890625e-06, + "model_forward_time": 0.02559494972229004, + "step": 5057 + }, + { + "epoch": 7.71636962890625e-06, + "step": 5057, + "training_step_time": 0.21707653999328613 + }, + { + "epoch": 7.7178955078125e-06, + "model_forward_time": 0.024936199188232422, + "step": 5058 + }, + { + "epoch": 7.7178955078125e-06, + "step": 5058, + "training_step_time": 0.1100008487701416 + }, + { + "epoch": 7.71942138671875e-06, + "model_forward_time": 0.024688005447387695, + "step": 5059 + }, + { + "epoch": 7.71942138671875e-06, + "step": 5059, + "training_step_time": 0.12002801895141602 + }, + { + "epoch": 7.720947265625e-06, + "grad_norm": 0.46479085087776184, + "learning_rate": 9.619924783397661e-05, + "loss": 0.0693, + "step": 5060 + }, + { + "epoch": 7.720947265625e-06, + "model_forward_time": 0.025546789169311523, + "step": 5060 + }, + { + "epoch": 7.720947265625e-06, + "step": 5060, + "training_step_time": 0.1540844440460205 + }, + { + "epoch": 7.72247314453125e-06, + "model_forward_time": 0.02441120147705078, + "step": 5061 + }, + { + "epoch": 7.72247314453125e-06, + "step": 5061, + "training_step_time": 0.21178293228149414 + }, + { + "epoch": 7.7239990234375e-06, + "model_forward_time": 0.02568340301513672, + "step": 5062 + }, + { + "epoch": 7.7239990234375e-06, + "step": 5062, + "training_step_time": 0.13126587867736816 + }, + { + "epoch": 7.72552490234375e-06, + "model_forward_time": 0.024960041046142578, + "step": 5063 + }, + { + "epoch": 7.72552490234375e-06, + "step": 5063, + "training_step_time": 0.12482142448425293 + }, + { + "epoch": 7.72705078125e-06, + "model_forward_time": 0.026921987533569336, + "step": 5064 + }, + { + "epoch": 7.72705078125e-06, + "step": 5064, + "training_step_time": 0.135298490524292 + }, + { + "epoch": 7.72857666015625e-06, + "model_forward_time": 0.02526688575744629, + "step": 5065 + }, + { + "epoch": 7.72857666015625e-06, + "step": 5065, + "training_step_time": 0.10916519165039062 + }, + { + "epoch": 7.7301025390625e-06, + "model_forward_time": 0.025776386260986328, + "step": 5066 + }, + { + "epoch": 7.7301025390625e-06, + "step": 5066, + "training_step_time": 0.11279296875 + }, + { + "epoch": 7.73162841796875e-06, + "model_forward_time": 0.025534868240356445, + "step": 5067 + }, + { + "epoch": 7.73162841796875e-06, + "step": 5067, + "training_step_time": 0.11626148223876953 + }, + { + "epoch": 7.733154296875e-06, + "model_forward_time": 0.02498793601989746, + "step": 5068 + }, + { + "epoch": 7.733154296875e-06, + "step": 5068, + "training_step_time": 0.1060945987701416 + }, + { + "epoch": 7.73468017578125e-06, + "model_forward_time": 0.02555680274963379, + "step": 5069 + }, + { + "epoch": 7.73468017578125e-06, + "step": 5069, + "training_step_time": 0.1988391876220703 + }, + { + "epoch": 7.7362060546875e-06, + "grad_norm": 0.4381895959377289, + "learning_rate": 9.617814195316411e-05, + "loss": 0.1059, + "step": 5070 + }, + { + "epoch": 7.7362060546875e-06, + "model_forward_time": 0.026225805282592773, + "step": 5070 + }, + { + "epoch": 7.7362060546875e-06, + "step": 5070, + "training_step_time": 0.1167140007019043 + }, + { + "epoch": 7.73773193359375e-06, + "model_forward_time": 0.02488231658935547, + "step": 5071 + }, + { + "epoch": 7.73773193359375e-06, + "step": 5071, + "training_step_time": 0.15398955345153809 + }, + { + "epoch": 7.7392578125e-06, + "model_forward_time": 0.02504110336303711, + "step": 5072 + }, + { + "epoch": 7.7392578125e-06, + "step": 5072, + "training_step_time": 0.22089600563049316 + }, + { + "epoch": 7.74078369140625e-06, + "model_forward_time": 0.024670839309692383, + "step": 5073 + }, + { + "epoch": 7.74078369140625e-06, + "step": 5073, + "training_step_time": 0.17334461212158203 + }, + { + "epoch": 7.7423095703125e-06, + "model_forward_time": 0.02419114112854004, + "step": 5074 + }, + { + "epoch": 7.7423095703125e-06, + "step": 5074, + "training_step_time": 0.1717381477355957 + }, + { + "epoch": 7.74383544921875e-06, + "model_forward_time": 0.025037050247192383, + "step": 5075 + }, + { + "epoch": 7.74383544921875e-06, + "step": 5075, + "training_step_time": 0.1658949851989746 + }, + { + "epoch": 7.745361328125e-06, + "model_forward_time": 0.024564027786254883, + "step": 5076 + }, + { + "epoch": 7.745361328125e-06, + "step": 5076, + "training_step_time": 0.14272284507751465 + }, + { + "epoch": 7.74688720703125e-06, + "model_forward_time": 0.024655580520629883, + "step": 5077 + }, + { + "epoch": 7.74688720703125e-06, + "step": 5077, + "training_step_time": 0.1324291229248047 + }, + { + "epoch": 7.7484130859375e-06, + "model_forward_time": 0.025058507919311523, + "step": 5078 + }, + { + "epoch": 7.7484130859375e-06, + "step": 5078, + "training_step_time": 0.11436796188354492 + }, + { + "epoch": 7.74993896484375e-06, + "model_forward_time": 0.024914026260375977, + "step": 5079 + }, + { + "epoch": 7.74993896484375e-06, + "step": 5079, + "training_step_time": 0.1658475399017334 + }, + { + "epoch": 7.75146484375e-06, + "grad_norm": 0.3558763265609741, + "learning_rate": 9.61569799615548e-05, + "loss": 0.0959, + "step": 5080 + }, + { + "epoch": 7.75146484375e-06, + "model_forward_time": 0.024217605590820312, + "step": 5080 + }, + { + "epoch": 7.75146484375e-06, + "step": 5080, + "training_step_time": 0.16362261772155762 + }, + { + "epoch": 7.75299072265625e-06, + "model_forward_time": 0.024431228637695312, + "step": 5081 + }, + { + "epoch": 7.75299072265625e-06, + "step": 5081, + "training_step_time": 0.10477352142333984 + }, + { + "epoch": 7.7545166015625e-06, + "model_forward_time": 0.02491021156311035, + "step": 5082 + }, + { + "epoch": 7.7545166015625e-06, + "step": 5082, + "training_step_time": 0.10980844497680664 + }, + { + "epoch": 7.75604248046875e-06, + "model_forward_time": 0.025683879852294922, + "step": 5083 + }, + { + "epoch": 7.75604248046875e-06, + "step": 5083, + "training_step_time": 0.1112060546875 + }, + { + "epoch": 7.757568359375e-06, + "model_forward_time": 0.025236845016479492, + "step": 5084 + }, + { + "epoch": 7.757568359375e-06, + "step": 5084, + "training_step_time": 0.11798906326293945 + }, + { + "epoch": 7.75909423828125e-06, + "model_forward_time": 0.025272846221923828, + "step": 5085 + }, + { + "epoch": 7.75909423828125e-06, + "step": 5085, + "training_step_time": 0.10834503173828125 + }, + { + "epoch": 7.7606201171875e-06, + "model_forward_time": 0.025069475173950195, + "step": 5086 + }, + { + "epoch": 7.7606201171875e-06, + "step": 5086, + "training_step_time": 0.10696101188659668 + }, + { + "epoch": 7.76214599609375e-06, + "model_forward_time": 0.025365591049194336, + "step": 5087 + }, + { + "epoch": 7.76214599609375e-06, + "step": 5087, + "training_step_time": 0.10917925834655762 + }, + { + "epoch": 7.763671875e-06, + "model_forward_time": 0.02533578872680664, + "step": 5088 + }, + { + "epoch": 7.763671875e-06, + "step": 5088, + "training_step_time": 0.11208510398864746 + }, + { + "epoch": 7.76519775390625e-06, + "model_forward_time": 0.02526998519897461, + "step": 5089 + }, + { + "epoch": 7.76519775390625e-06, + "step": 5089, + "training_step_time": 0.10644888877868652 + }, + { + "epoch": 7.7667236328125e-06, + "grad_norm": 0.4785032272338867, + "learning_rate": 9.613576188486253e-05, + "loss": 0.08, + "step": 5090 + }, + { + "epoch": 7.7667236328125e-06, + "model_forward_time": 0.025149822235107422, + "step": 5090 + }, + { + "epoch": 7.7667236328125e-06, + "step": 5090, + "training_step_time": 0.11103463172912598 + }, + { + "epoch": 7.76824951171875e-06, + "model_forward_time": 0.025137662887573242, + "step": 5091 + }, + { + "epoch": 7.76824951171875e-06, + "step": 5091, + "training_step_time": 0.1049797534942627 + }, + { + "epoch": 7.769775390625e-06, + "model_forward_time": 0.025326967239379883, + "step": 5092 + }, + { + "epoch": 7.769775390625e-06, + "step": 5092, + "training_step_time": 0.11314988136291504 + }, + { + "epoch": 7.77130126953125e-06, + "model_forward_time": 0.024926424026489258, + "step": 5093 + }, + { + "epoch": 7.77130126953125e-06, + "step": 5093, + "training_step_time": 0.1066884994506836 + }, + { + "epoch": 7.7728271484375e-06, + "model_forward_time": 0.025122404098510742, + "step": 5094 + }, + { + "epoch": 7.7728271484375e-06, + "step": 5094, + "training_step_time": 0.10620594024658203 + }, + { + "epoch": 7.77435302734375e-06, + "model_forward_time": 0.02527022361755371, + "step": 5095 + }, + { + "epoch": 7.77435302734375e-06, + "step": 5095, + "training_step_time": 0.1057744026184082 + }, + { + "epoch": 7.77587890625e-06, + "model_forward_time": 0.02557539939880371, + "step": 5096 + }, + { + "epoch": 7.77587890625e-06, + "step": 5096, + "training_step_time": 0.10718941688537598 + }, + { + "epoch": 7.77740478515625e-06, + "model_forward_time": 0.02547931671142578, + "step": 5097 + }, + { + "epoch": 7.77740478515625e-06, + "step": 5097, + "training_step_time": 0.1082770824432373 + }, + { + "epoch": 7.7789306640625e-06, + "model_forward_time": 0.02510833740234375, + "step": 5098 + }, + { + "epoch": 7.7789306640625e-06, + "step": 5098, + "training_step_time": 0.1084136962890625 + }, + { + "epoch": 7.78045654296875e-06, + "model_forward_time": 0.02668619155883789, + "step": 5099 + }, + { + "epoch": 7.78045654296875e-06, + "step": 5099, + "training_step_time": 0.11053085327148438 + }, + { + "epoch": 7.781982421875e-06, + "grad_norm": 0.4167155921459198, + "learning_rate": 9.611448774886924e-05, + "loss": 0.0953, + "step": 5100 + }, + { + "epoch": 7.781982421875e-06, + "model_forward_time": 0.025283098220825195, + "step": 5100 + }, + { + "epoch": 7.781982421875e-06, + "step": 5100, + "training_step_time": 0.14626860618591309 + }, + { + "epoch": 7.78350830078125e-06, + "model_forward_time": 0.025179147720336914, + "step": 5101 + }, + { + "epoch": 7.78350830078125e-06, + "step": 5101, + "training_step_time": 0.17770838737487793 + }, + { + "epoch": 7.7850341796875e-06, + "model_forward_time": 0.026099681854248047, + "step": 5102 + }, + { + "epoch": 7.7850341796875e-06, + "step": 5102, + "training_step_time": 0.17916131019592285 + }, + { + "epoch": 7.78656005859375e-06, + "model_forward_time": 0.02507758140563965, + "step": 5103 + }, + { + "epoch": 7.78656005859375e-06, + "step": 5103, + "training_step_time": 0.16151666641235352 + }, + { + "epoch": 7.7880859375e-06, + "model_forward_time": 0.02428889274597168, + "step": 5104 + }, + { + "epoch": 7.7880859375e-06, + "step": 5104, + "training_step_time": 0.15611696243286133 + }, + { + "epoch": 7.78961181640625e-06, + "model_forward_time": 0.026854991912841797, + "step": 5105 + }, + { + "epoch": 7.78961181640625e-06, + "step": 5105, + "training_step_time": 0.14175748825073242 + }, + { + "epoch": 7.7911376953125e-06, + "model_forward_time": 0.024842500686645508, + "step": 5106 + }, + { + "epoch": 7.7911376953125e-06, + "step": 5106, + "training_step_time": 0.14162063598632812 + }, + { + "epoch": 7.79266357421875e-06, + "model_forward_time": 0.026187419891357422, + "step": 5107 + }, + { + "epoch": 7.79266357421875e-06, + "step": 5107, + "training_step_time": 0.1306629180908203 + }, + { + "epoch": 7.794189453125e-06, + "model_forward_time": 0.02481532096862793, + "step": 5108 + }, + { + "epoch": 7.794189453125e-06, + "step": 5108, + "training_step_time": 0.11838006973266602 + }, + { + "epoch": 7.79571533203125e-06, + "model_forward_time": 0.025243282318115234, + "step": 5109 + }, + { + "epoch": 7.79571533203125e-06, + "step": 5109, + "training_step_time": 0.11831212043762207 + }, + { + "epoch": 7.7972412109375e-06, + "grad_norm": 0.30426469445228577, + "learning_rate": 9.609315757942503e-05, + "loss": 0.0968, + "step": 5110 + }, + { + "epoch": 7.7972412109375e-06, + "model_forward_time": 0.02549910545349121, + "step": 5110 + }, + { + "epoch": 7.7972412109375e-06, + "step": 5110, + "training_step_time": 0.1936659812927246 + }, + { + "epoch": 7.79876708984375e-06, + "model_forward_time": 0.024448871612548828, + "step": 5111 + }, + { + "epoch": 7.79876708984375e-06, + "step": 5111, + "training_step_time": 0.10869193077087402 + }, + { + "epoch": 7.80029296875e-06, + "model_forward_time": 0.024936676025390625, + "step": 5112 + }, + { + "epoch": 7.80029296875e-06, + "step": 5112, + "training_step_time": 0.11344146728515625 + }, + { + "epoch": 7.80181884765625e-06, + "model_forward_time": 0.025222063064575195, + "step": 5113 + }, + { + "epoch": 7.80181884765625e-06, + "step": 5113, + "training_step_time": 0.11933016777038574 + }, + { + "epoch": 7.8033447265625e-06, + "model_forward_time": 0.02550339698791504, + "step": 5114 + }, + { + "epoch": 7.8033447265625e-06, + "step": 5114, + "training_step_time": 0.11156916618347168 + }, + { + "epoch": 7.80487060546875e-06, + "model_forward_time": 0.025215625762939453, + "step": 5115 + }, + { + "epoch": 7.80487060546875e-06, + "step": 5115, + "training_step_time": 0.11398673057556152 + }, + { + "epoch": 7.806396484375e-06, + "model_forward_time": 0.02557086944580078, + "step": 5116 + }, + { + "epoch": 7.806396484375e-06, + "step": 5116, + "training_step_time": 0.11448311805725098 + }, + { + "epoch": 7.80792236328125e-06, + "model_forward_time": 0.02543020248413086, + "step": 5117 + }, + { + "epoch": 7.80792236328125e-06, + "step": 5117, + "training_step_time": 0.13334393501281738 + }, + { + "epoch": 7.8094482421875e-06, + "model_forward_time": 0.024976730346679688, + "step": 5118 + }, + { + "epoch": 7.8094482421875e-06, + "step": 5118, + "training_step_time": 0.11498093605041504 + }, + { + "epoch": 7.81097412109375e-06, + "model_forward_time": 0.025333881378173828, + "step": 5119 + }, + { + "epoch": 7.81097412109375e-06, + "step": 5119, + "training_step_time": 0.11364006996154785 + }, + { + "epoch": 7.8125e-06, + "grad_norm": 0.47002390027046204, + "learning_rate": 9.607177140244806e-05, + "loss": 0.0971, + "step": 5120 + }, + { + "epoch": 7.8125e-06, + "model_forward_time": 0.02544999122619629, + "step": 5120 + }, + { + "epoch": 7.8125e-06, + "step": 5120, + "training_step_time": 0.11374187469482422 + }, + { + "epoch": 7.81402587890625e-06, + "model_forward_time": 0.0249786376953125, + "step": 5121 + }, + { + "epoch": 7.81402587890625e-06, + "step": 5121, + "training_step_time": 0.11122488975524902 + }, + { + "epoch": 7.8155517578125e-06, + "model_forward_time": 0.025346994400024414, + "step": 5122 + }, + { + "epoch": 7.8155517578125e-06, + "step": 5122, + "training_step_time": 0.10609054565429688 + }, + { + "epoch": 7.81707763671875e-06, + "model_forward_time": 0.025271177291870117, + "step": 5123 + }, + { + "epoch": 7.81707763671875e-06, + "step": 5123, + "training_step_time": 0.10923027992248535 + }, + { + "epoch": 7.818603515625e-06, + "model_forward_time": 0.026297569274902344, + "step": 5124 + }, + { + "epoch": 7.818603515625e-06, + "step": 5124, + "training_step_time": 0.1091604232788086 + }, + { + "epoch": 7.82012939453125e-06, + "model_forward_time": 0.025348186492919922, + "step": 5125 + }, + { + "epoch": 7.82012939453125e-06, + "step": 5125, + "training_step_time": 0.17068076133728027 + }, + { + "epoch": 7.8216552734375e-06, + "model_forward_time": 0.024475574493408203, + "step": 5126 + }, + { + "epoch": 7.8216552734375e-06, + "step": 5126, + "training_step_time": 0.16425800323486328 + }, + { + "epoch": 7.82318115234375e-06, + "model_forward_time": 0.025218486785888672, + "step": 5127 + }, + { + "epoch": 7.82318115234375e-06, + "step": 5127, + "training_step_time": 0.10821413993835449 + }, + { + "epoch": 7.82470703125e-06, + "model_forward_time": 0.024888992309570312, + "step": 5128 + }, + { + "epoch": 7.82470703125e-06, + "step": 5128, + "training_step_time": 0.10391950607299805 + }, + { + "epoch": 7.82623291015625e-06, + "model_forward_time": 0.025673866271972656, + "step": 5129 + }, + { + "epoch": 7.82623291015625e-06, + "step": 5129, + "training_step_time": 0.1188809871673584 + }, + { + "epoch": 7.8277587890625e-06, + "grad_norm": 0.5986289381980896, + "learning_rate": 9.605032924392457e-05, + "loss": 0.0978, + "step": 5130 + }, + { + "epoch": 7.8277587890625e-06, + "model_forward_time": 0.025788307189941406, + "step": 5130 + }, + { + "epoch": 7.8277587890625e-06, + "step": 5130, + "training_step_time": 0.11152005195617676 + }, + { + "epoch": 7.82928466796875e-06, + "model_forward_time": 0.025192975997924805, + "step": 5131 + }, + { + "epoch": 7.82928466796875e-06, + "step": 5131, + "training_step_time": 0.10466122627258301 + }, + { + "epoch": 7.830810546875e-06, + "model_forward_time": 0.02535700798034668, + "step": 5132 + }, + { + "epoch": 7.830810546875e-06, + "step": 5132, + "training_step_time": 0.10646581649780273 + }, + { + "epoch": 7.83233642578125e-06, + "model_forward_time": 0.025621652603149414, + "step": 5133 + }, + { + "epoch": 7.83233642578125e-06, + "step": 5133, + "training_step_time": 0.13620877265930176 + }, + { + "epoch": 7.8338623046875e-06, + "model_forward_time": 0.02577829360961914, + "step": 5134 + }, + { + "epoch": 7.8338623046875e-06, + "step": 5134, + "training_step_time": 0.1522979736328125 + }, + { + "epoch": 7.83538818359375e-06, + "model_forward_time": 0.025319337844848633, + "step": 5135 + }, + { + "epoch": 7.83538818359375e-06, + "step": 5135, + "training_step_time": 0.1520693302154541 + }, + { + "epoch": 7.8369140625e-06, + "model_forward_time": 0.024809598922729492, + "step": 5136 + }, + { + "epoch": 7.8369140625e-06, + "step": 5136, + "training_step_time": 0.13495230674743652 + }, + { + "epoch": 7.83843994140625e-06, + "model_forward_time": 0.02466726303100586, + "step": 5137 + }, + { + "epoch": 7.83843994140625e-06, + "step": 5137, + "training_step_time": 0.12909626960754395 + }, + { + "epoch": 7.8399658203125e-06, + "model_forward_time": 0.024352073669433594, + "step": 5138 + }, + { + "epoch": 7.8399658203125e-06, + "step": 5138, + "training_step_time": 0.12425851821899414 + }, + { + "epoch": 7.84149169921875e-06, + "model_forward_time": 0.025299549102783203, + "step": 5139 + }, + { + "epoch": 7.84149169921875e-06, + "step": 5139, + "training_step_time": 0.12037348747253418 + }, + { + "epoch": 7.843017578125e-06, + "grad_norm": 0.3774958550930023, + "learning_rate": 9.602883112990875e-05, + "loss": 0.0798, + "step": 5140 + }, + { + "epoch": 7.843017578125e-06, + "model_forward_time": 0.02543473243713379, + "step": 5140 + }, + { + "epoch": 7.843017578125e-06, + "step": 5140, + "training_step_time": 0.10543036460876465 + }, + { + "epoch": 7.84454345703125e-06, + "model_forward_time": 0.025210857391357422, + "step": 5141 + }, + { + "epoch": 7.84454345703125e-06, + "step": 5141, + "training_step_time": 0.1053779125213623 + }, + { + "epoch": 7.8460693359375e-06, + "model_forward_time": 0.025737524032592773, + "step": 5142 + }, + { + "epoch": 7.8460693359375e-06, + "step": 5142, + "training_step_time": 0.11282730102539062 + }, + { + "epoch": 7.84759521484375e-06, + "model_forward_time": 0.025147438049316406, + "step": 5143 + }, + { + "epoch": 7.84759521484375e-06, + "step": 5143, + "training_step_time": 0.10622644424438477 + }, + { + "epoch": 7.84912109375e-06, + "model_forward_time": 0.024768829345703125, + "step": 5144 + }, + { + "epoch": 7.84912109375e-06, + "step": 5144, + "training_step_time": 0.1564195156097412 + }, + { + "epoch": 7.85064697265625e-06, + "model_forward_time": 0.024646520614624023, + "step": 5145 + }, + { + "epoch": 7.85064697265625e-06, + "step": 5145, + "training_step_time": 0.10473775863647461 + }, + { + "epoch": 7.8521728515625e-06, + "model_forward_time": 0.024808645248413086, + "step": 5146 + }, + { + "epoch": 7.8521728515625e-06, + "step": 5146, + "training_step_time": 0.11070871353149414 + }, + { + "epoch": 7.85369873046875e-06, + "model_forward_time": 0.027230501174926758, + "step": 5147 + }, + { + "epoch": 7.85369873046875e-06, + "step": 5147, + "training_step_time": 0.1368732452392578 + }, + { + "epoch": 7.855224609375e-06, + "model_forward_time": 0.025020599365234375, + "step": 5148 + }, + { + "epoch": 7.855224609375e-06, + "step": 5148, + "training_step_time": 0.18535470962524414 + }, + { + "epoch": 7.85675048828125e-06, + "model_forward_time": 0.026046276092529297, + "step": 5149 + }, + { + "epoch": 7.85675048828125e-06, + "step": 5149, + "training_step_time": 0.10913944244384766 + }, + { + "epoch": 7.8582763671875e-06, + "grad_norm": 0.3100931942462921, + "learning_rate": 9.600727708652289e-05, + "loss": 0.0715, + "step": 5150 + }, + { + "epoch": 7.8582763671875e-06, + "model_forward_time": 0.02467179298400879, + "step": 5150 + }, + { + "epoch": 7.8582763671875e-06, + "step": 5150, + "training_step_time": 0.10547018051147461 + }, + { + "epoch": 7.85980224609375e-06, + "model_forward_time": 0.025094032287597656, + "step": 5151 + }, + { + "epoch": 7.85980224609375e-06, + "step": 5151, + "training_step_time": 0.1842031478881836 + }, + { + "epoch": 7.861328125e-06, + "model_forward_time": 0.024509906768798828, + "step": 5152 + }, + { + "epoch": 7.861328125e-06, + "step": 5152, + "training_step_time": 0.14297962188720703 + }, + { + "epoch": 7.86285400390625e-06, + "model_forward_time": 0.024283170700073242, + "step": 5153 + }, + { + "epoch": 7.86285400390625e-06, + "step": 5153, + "training_step_time": 0.10908865928649902 + }, + { + "epoch": 7.8643798828125e-06, + "model_forward_time": 0.024906396865844727, + "step": 5154 + }, + { + "epoch": 7.8643798828125e-06, + "step": 5154, + "training_step_time": 0.11689114570617676 + }, + { + "epoch": 7.86590576171875e-06, + "model_forward_time": 0.025175809860229492, + "step": 5155 + }, + { + "epoch": 7.86590576171875e-06, + "step": 5155, + "training_step_time": 0.11237907409667969 + }, + { + "epoch": 7.867431640625e-06, + "model_forward_time": 0.025460481643676758, + "step": 5156 + }, + { + "epoch": 7.867431640625e-06, + "step": 5156, + "training_step_time": 0.10687518119812012 + }, + { + "epoch": 7.86895751953125e-06, + "model_forward_time": 0.026054859161376953, + "step": 5157 + }, + { + "epoch": 7.86895751953125e-06, + "step": 5157, + "training_step_time": 0.1987161636352539 + }, + { + "epoch": 7.8704833984375e-06, + "model_forward_time": 0.024463891983032227, + "step": 5158 + }, + { + "epoch": 7.8704833984375e-06, + "step": 5158, + "training_step_time": 0.20881032943725586 + }, + { + "epoch": 7.87200927734375e-06, + "model_forward_time": 0.024082422256469727, + "step": 5159 + }, + { + "epoch": 7.87200927734375e-06, + "step": 5159, + "training_step_time": 0.11146974563598633 + }, + { + "epoch": 7.87353515625e-06, + "grad_norm": 0.48995721340179443, + "learning_rate": 9.598566713995718e-05, + "loss": 0.091, + "step": 5160 + }, + { + "epoch": 7.87353515625e-06, + "model_forward_time": 0.024123430252075195, + "step": 5160 + }, + { + "epoch": 7.87353515625e-06, + "step": 5160, + "training_step_time": 0.17784380912780762 + }, + { + "epoch": 7.87506103515625e-06, + "model_forward_time": 0.024363279342651367, + "step": 5161 + }, + { + "epoch": 7.87506103515625e-06, + "step": 5161, + "training_step_time": 0.17849969863891602 + }, + { + "epoch": 7.8765869140625e-06, + "model_forward_time": 0.023779630661010742, + "step": 5162 + }, + { + "epoch": 7.8765869140625e-06, + "step": 5162, + "training_step_time": 0.1200704574584961 + }, + { + "epoch": 7.87811279296875e-06, + "model_forward_time": 0.023793458938598633, + "step": 5163 + }, + { + "epoch": 7.87811279296875e-06, + "step": 5163, + "training_step_time": 0.12340831756591797 + }, + { + "epoch": 7.879638671875e-06, + "model_forward_time": 0.025189876556396484, + "step": 5164 + }, + { + "epoch": 7.879638671875e-06, + "step": 5164, + "training_step_time": 0.11617374420166016 + }, + { + "epoch": 7.88116455078125e-06, + "model_forward_time": 0.02508544921875, + "step": 5165 + }, + { + "epoch": 7.88116455078125e-06, + "step": 5165, + "training_step_time": 0.11361527442932129 + }, + { + "epoch": 7.8826904296875e-06, + "model_forward_time": 0.025226116180419922, + "step": 5166 + }, + { + "epoch": 7.8826904296875e-06, + "step": 5166, + "training_step_time": 0.12002849578857422 + }, + { + "epoch": 7.88421630859375e-06, + "model_forward_time": 0.025399446487426758, + "step": 5167 + }, + { + "epoch": 7.88421630859375e-06, + "step": 5167, + "training_step_time": 0.11835718154907227 + }, + { + "epoch": 7.8857421875e-06, + "model_forward_time": 0.025426387786865234, + "step": 5168 + }, + { + "epoch": 7.8857421875e-06, + "step": 5168, + "training_step_time": 0.11257052421569824 + }, + { + "epoch": 7.88726806640625e-06, + "model_forward_time": 0.025269746780395508, + "step": 5169 + }, + { + "epoch": 7.88726806640625e-06, + "step": 5169, + "training_step_time": 0.10788369178771973 + }, + { + "epoch": 7.8887939453125e-06, + "grad_norm": 0.33801034092903137, + "learning_rate": 9.596400131646972e-05, + "loss": 0.0808, + "step": 5170 + }, + { + "epoch": 7.8887939453125e-06, + "model_forward_time": 0.025659561157226562, + "step": 5170 + }, + { + "epoch": 7.8887939453125e-06, + "step": 5170, + "training_step_time": 0.21669483184814453 + }, + { + "epoch": 7.89031982421875e-06, + "model_forward_time": 0.024983882904052734, + "step": 5171 + }, + { + "epoch": 7.89031982421875e-06, + "step": 5171, + "training_step_time": 0.1127464771270752 + }, + { + "epoch": 7.891845703125e-06, + "model_forward_time": 0.025427818298339844, + "step": 5172 + }, + { + "epoch": 7.891845703125e-06, + "step": 5172, + "training_step_time": 0.10818171501159668 + }, + { + "epoch": 7.89337158203125e-06, + "model_forward_time": 0.025274276733398438, + "step": 5173 + }, + { + "epoch": 7.89337158203125e-06, + "step": 5173, + "training_step_time": 0.17029953002929688 + }, + { + "epoch": 7.8948974609375e-06, + "model_forward_time": 0.025063514709472656, + "step": 5174 + }, + { + "epoch": 7.8948974609375e-06, + "step": 5174, + "training_step_time": 0.1663830280303955 + }, + { + "epoch": 7.89642333984375e-06, + "model_forward_time": 0.0246732234954834, + "step": 5175 + }, + { + "epoch": 7.89642333984375e-06, + "step": 5175, + "training_step_time": 0.11016058921813965 + }, + { + "epoch": 7.89794921875e-06, + "model_forward_time": 0.025157451629638672, + "step": 5176 + }, + { + "epoch": 7.89794921875e-06, + "step": 5176, + "training_step_time": 0.11135053634643555 + }, + { + "epoch": 7.89947509765625e-06, + "model_forward_time": 0.025400400161743164, + "step": 5177 + }, + { + "epoch": 7.89947509765625e-06, + "step": 5177, + "training_step_time": 0.11173701286315918 + }, + { + "epoch": 7.9010009765625e-06, + "model_forward_time": 0.025698423385620117, + "step": 5178 + }, + { + "epoch": 7.9010009765625e-06, + "step": 5178, + "training_step_time": 0.10787200927734375 + }, + { + "epoch": 7.90252685546875e-06, + "model_forward_time": 0.025220870971679688, + "step": 5179 + }, + { + "epoch": 7.90252685546875e-06, + "step": 5179, + "training_step_time": 0.10729575157165527 + }, + { + "epoch": 7.904052734375e-06, + "grad_norm": 0.3467779755592346, + "learning_rate": 9.594227964238653e-05, + "loss": 0.0726, + "step": 5180 + }, + { + "epoch": 7.904052734375e-06, + "model_forward_time": 0.02515554428100586, + "step": 5180 + }, + { + "epoch": 7.904052734375e-06, + "step": 5180, + "training_step_time": 0.11008381843566895 + }, + { + "epoch": 7.90557861328125e-06, + "model_forward_time": 0.02536320686340332, + "step": 5181 + }, + { + "epoch": 7.90557861328125e-06, + "step": 5181, + "training_step_time": 0.10834002494812012 + }, + { + "epoch": 7.9071044921875e-06, + "model_forward_time": 0.02501201629638672, + "step": 5182 + }, + { + "epoch": 7.9071044921875e-06, + "step": 5182, + "training_step_time": 0.10660028457641602 + }, + { + "epoch": 7.90863037109375e-06, + "model_forward_time": 0.025293588638305664, + "step": 5183 + }, + { + "epoch": 7.90863037109375e-06, + "step": 5183, + "training_step_time": 0.10933327674865723 + }, + { + "epoch": 7.91015625e-06, + "model_forward_time": 0.02571725845336914, + "step": 5184 + }, + { + "epoch": 7.91015625e-06, + "step": 5184, + "training_step_time": 0.10790586471557617 + }, + { + "epoch": 7.91168212890625e-06, + "model_forward_time": 0.02548980712890625, + "step": 5185 + }, + { + "epoch": 7.91168212890625e-06, + "step": 5185, + "training_step_time": 0.12157034873962402 + }, + { + "epoch": 7.9132080078125e-06, + "model_forward_time": 0.02502894401550293, + "step": 5186 + }, + { + "epoch": 7.9132080078125e-06, + "step": 5186, + "training_step_time": 0.11047649383544922 + }, + { + "epoch": 7.91473388671875e-06, + "model_forward_time": 0.02510666847229004, + "step": 5187 + }, + { + "epoch": 7.91473388671875e-06, + "step": 5187, + "training_step_time": 0.130279541015625 + }, + { + "epoch": 7.916259765625e-06, + "model_forward_time": 0.02514505386352539, + "step": 5188 + }, + { + "epoch": 7.916259765625e-06, + "step": 5188, + "training_step_time": 0.1640608310699463 + }, + { + "epoch": 7.91778564453125e-06, + "model_forward_time": 0.02451491355895996, + "step": 5189 + }, + { + "epoch": 7.91778564453125e-06, + "step": 5189, + "training_step_time": 0.125579833984375 + }, + { + "epoch": 7.9193115234375e-06, + "grad_norm": 0.44719254970550537, + "learning_rate": 9.59205021441015e-05, + "loss": 0.1019, + "step": 5190 + }, + { + "epoch": 7.9193115234375e-06, + "model_forward_time": 0.024780988693237305, + "step": 5190 + }, + { + "epoch": 7.9193115234375e-06, + "step": 5190, + "training_step_time": 0.13920855522155762 + }, + { + "epoch": 7.92083740234375e-06, + "model_forward_time": 0.025107145309448242, + "step": 5191 + }, + { + "epoch": 7.92083740234375e-06, + "step": 5191, + "training_step_time": 0.10826945304870605 + }, + { + "epoch": 7.92236328125e-06, + "model_forward_time": 0.024735450744628906, + "step": 5192 + }, + { + "epoch": 7.92236328125e-06, + "step": 5192, + "training_step_time": 0.10696792602539062 + }, + { + "epoch": 7.92388916015625e-06, + "model_forward_time": 0.02563309669494629, + "step": 5193 + }, + { + "epoch": 7.92388916015625e-06, + "step": 5193, + "training_step_time": 0.10863161087036133 + }, + { + "epoch": 7.9254150390625e-06, + "model_forward_time": 0.025914430618286133, + "step": 5194 + }, + { + "epoch": 7.9254150390625e-06, + "step": 5194, + "training_step_time": 0.10904383659362793 + }, + { + "epoch": 7.92694091796875e-06, + "model_forward_time": 0.025487184524536133, + "step": 5195 + }, + { + "epoch": 7.92694091796875e-06, + "step": 5195, + "training_step_time": 0.20251774787902832 + }, + { + "epoch": 7.928466796875e-06, + "model_forward_time": 0.024526596069335938, + "step": 5196 + }, + { + "epoch": 7.928466796875e-06, + "step": 5196, + "training_step_time": 0.13445353507995605 + }, + { + "epoch": 7.92999267578125e-06, + "model_forward_time": 0.0247802734375, + "step": 5197 + }, + { + "epoch": 7.92999267578125e-06, + "step": 5197, + "training_step_time": 0.11343836784362793 + }, + { + "epoch": 7.9315185546875e-06, + "model_forward_time": 0.02491021156311035, + "step": 5198 + }, + { + "epoch": 7.9315185546875e-06, + "step": 5198, + "training_step_time": 0.11269283294677734 + }, + { + "epoch": 7.93304443359375e-06, + "model_forward_time": 0.025566577911376953, + "step": 5199 + }, + { + "epoch": 7.93304443359375e-06, + "step": 5199, + "training_step_time": 0.11288022994995117 + }, + { + "epoch": 7.9345703125e-06, + "grad_norm": 0.23744435608386993, + "learning_rate": 9.589866884807635e-05, + "loss": 0.0593, + "step": 5200 + }, + { + "epoch": 7.9345703125e-06, + "model_forward_time": 0.025332927703857422, + "step": 5200 + }, + { + "epoch": 7.9345703125e-06, + "step": 5200, + "training_step_time": 0.1583256721496582 + }, + { + "epoch": 7.93609619140625e-06, + "model_forward_time": 0.024845600128173828, + "step": 5201 + }, + { + "epoch": 7.93609619140625e-06, + "step": 5201, + "training_step_time": 0.14405035972595215 + }, + { + "epoch": 7.9376220703125e-06, + "model_forward_time": 0.024580001831054688, + "step": 5202 + }, + { + "epoch": 7.9376220703125e-06, + "step": 5202, + "training_step_time": 0.10844230651855469 + }, + { + "epoch": 7.93914794921875e-06, + "model_forward_time": 0.0248720645904541, + "step": 5203 + }, + { + "epoch": 7.93914794921875e-06, + "step": 5203, + "training_step_time": 0.13251066207885742 + }, + { + "epoch": 7.940673828125e-06, + "model_forward_time": 0.026002168655395508, + "step": 5204 + }, + { + "epoch": 7.940673828125e-06, + "step": 5204, + "training_step_time": 0.10731244087219238 + }, + { + "epoch": 7.94219970703125e-06, + "model_forward_time": 0.025651216506958008, + "step": 5205 + }, + { + "epoch": 7.94219970703125e-06, + "step": 5205, + "training_step_time": 0.11096382141113281 + }, + { + "epoch": 7.9437255859375e-06, + "model_forward_time": 0.02507925033569336, + "step": 5206 + }, + { + "epoch": 7.9437255859375e-06, + "step": 5206, + "training_step_time": 0.13328027725219727 + }, + { + "epoch": 7.94525146484375e-06, + "model_forward_time": 0.025351285934448242, + "step": 5207 + }, + { + "epoch": 7.94525146484375e-06, + "step": 5207, + "training_step_time": 0.12077665328979492 + }, + { + "epoch": 7.94677734375e-06, + "model_forward_time": 0.025285959243774414, + "step": 5208 + }, + { + "epoch": 7.94677734375e-06, + "step": 5208, + "training_step_time": 0.11151623725891113 + }, + { + "epoch": 7.94830322265625e-06, + "model_forward_time": 0.024895191192626953, + "step": 5209 + }, + { + "epoch": 7.94830322265625e-06, + "step": 5209, + "training_step_time": 0.12568116188049316 + }, + { + "epoch": 7.9498291015625e-06, + "grad_norm": 0.38819506764411926, + "learning_rate": 9.58767797808406e-05, + "loss": 0.0832, + "step": 5210 + }, + { + "epoch": 7.9498291015625e-06, + "model_forward_time": 0.02763080596923828, + "step": 5210 + }, + { + "epoch": 7.9498291015625e-06, + "step": 5210, + "training_step_time": 0.1424870491027832 + }, + { + "epoch": 7.95135498046875e-06, + "model_forward_time": 0.025747060775756836, + "step": 5211 + }, + { + "epoch": 7.95135498046875e-06, + "step": 5211, + "training_step_time": 0.16529154777526855 + }, + { + "epoch": 7.952880859375e-06, + "model_forward_time": 0.024414539337158203, + "step": 5212 + }, + { + "epoch": 7.952880859375e-06, + "step": 5212, + "training_step_time": 0.1477675437927246 + }, + { + "epoch": 7.95440673828125e-06, + "model_forward_time": 0.02502131462097168, + "step": 5213 + }, + { + "epoch": 7.95440673828125e-06, + "step": 5213, + "training_step_time": 0.10902667045593262 + }, + { + "epoch": 7.9559326171875e-06, + "model_forward_time": 0.0247499942779541, + "step": 5214 + }, + { + "epoch": 7.9559326171875e-06, + "step": 5214, + "training_step_time": 0.13261079788208008 + }, + { + "epoch": 7.95745849609375e-06, + "model_forward_time": 0.024192094802856445, + "step": 5215 + }, + { + "epoch": 7.95745849609375e-06, + "step": 5215, + "training_step_time": 0.1263728141784668 + }, + { + "epoch": 7.958984375e-06, + "model_forward_time": 0.02495098114013672, + "step": 5216 + }, + { + "epoch": 7.958984375e-06, + "step": 5216, + "training_step_time": 0.18819475173950195 + }, + { + "epoch": 7.96051025390625e-06, + "model_forward_time": 0.02496051788330078, + "step": 5217 + }, + { + "epoch": 7.96051025390625e-06, + "step": 5217, + "training_step_time": 0.11850810050964355 + }, + { + "epoch": 7.9620361328125e-06, + "model_forward_time": 0.026095151901245117, + "step": 5218 + }, + { + "epoch": 7.9620361328125e-06, + "step": 5218, + "training_step_time": 0.11518359184265137 + }, + { + "epoch": 7.96356201171875e-06, + "model_forward_time": 0.02638387680053711, + "step": 5219 + }, + { + "epoch": 7.96356201171875e-06, + "step": 5219, + "training_step_time": 0.11182236671447754 + }, + { + "epoch": 7.965087890625e-06, + "grad_norm": 0.6575888991355896, + "learning_rate": 9.58548349689915e-05, + "loss": 0.0811, + "step": 5220 + }, + { + "epoch": 7.965087890625e-06, + "model_forward_time": 0.025304555892944336, + "step": 5220 + }, + { + "epoch": 7.965087890625e-06, + "step": 5220, + "training_step_time": 0.10832977294921875 + }, + { + "epoch": 7.96661376953125e-06, + "model_forward_time": 0.029742956161499023, + "step": 5221 + }, + { + "epoch": 7.96661376953125e-06, + "step": 5221, + "training_step_time": 0.1139075756072998 + }, + { + "epoch": 7.9681396484375e-06, + "model_forward_time": 0.02559828758239746, + "step": 5222 + }, + { + "epoch": 7.9681396484375e-06, + "step": 5222, + "training_step_time": 0.10848140716552734 + }, + { + "epoch": 7.96966552734375e-06, + "model_forward_time": 0.023992061614990234, + "step": 5223 + }, + { + "epoch": 7.96966552734375e-06, + "step": 5223, + "training_step_time": 0.10816287994384766 + }, + { + "epoch": 7.97119140625e-06, + "model_forward_time": 0.02505040168762207, + "step": 5224 + }, + { + "epoch": 7.97119140625e-06, + "step": 5224, + "training_step_time": 0.11077737808227539 + }, + { + "epoch": 7.97271728515625e-06, + "model_forward_time": 0.02503061294555664, + "step": 5225 + }, + { + "epoch": 7.97271728515625e-06, + "step": 5225, + "training_step_time": 0.10807394981384277 + }, + { + "epoch": 7.9742431640625e-06, + "model_forward_time": 0.02598428726196289, + "step": 5226 + }, + { + "epoch": 7.9742431640625e-06, + "step": 5226, + "training_step_time": 0.10855960845947266 + }, + { + "epoch": 7.97576904296875e-06, + "model_forward_time": 0.025916099548339844, + "step": 5227 + }, + { + "epoch": 7.97576904296875e-06, + "step": 5227, + "training_step_time": 0.11346054077148438 + }, + { + "epoch": 7.977294921875e-06, + "model_forward_time": 0.027048110961914062, + "step": 5228 + }, + { + "epoch": 7.977294921875e-06, + "step": 5228, + "training_step_time": 0.11012005805969238 + }, + { + "epoch": 7.97882080078125e-06, + "model_forward_time": 0.025899648666381836, + "step": 5229 + }, + { + "epoch": 7.97882080078125e-06, + "step": 5229, + "training_step_time": 0.11141538619995117 + }, + { + "epoch": 7.9803466796875e-06, + "grad_norm": 0.48782312870025635, + "learning_rate": 9.583283443919409e-05, + "loss": 0.0736, + "step": 5230 + }, + { + "epoch": 7.9803466796875e-06, + "model_forward_time": 0.024985313415527344, + "step": 5230 + }, + { + "epoch": 7.9803466796875e-06, + "step": 5230, + "training_step_time": 0.11250972747802734 + }, + { + "epoch": 7.98187255859375e-06, + "model_forward_time": 0.025539159774780273, + "step": 5231 + }, + { + "epoch": 7.98187255859375e-06, + "step": 5231, + "training_step_time": 0.11655855178833008 + }, + { + "epoch": 7.9833984375e-06, + "model_forward_time": 0.025284528732299805, + "step": 5232 + }, + { + "epoch": 7.9833984375e-06, + "step": 5232, + "training_step_time": 0.11226630210876465 + }, + { + "epoch": 7.98492431640625e-06, + "model_forward_time": 0.02663254737854004, + "step": 5233 + }, + { + "epoch": 7.98492431640625e-06, + "step": 5233, + "training_step_time": 0.11646366119384766 + }, + { + "epoch": 7.9864501953125e-06, + "model_forward_time": 0.025320768356323242, + "step": 5234 + }, + { + "epoch": 7.9864501953125e-06, + "step": 5234, + "training_step_time": 0.11366105079650879 + }, + { + "epoch": 7.98797607421875e-06, + "model_forward_time": 0.026799917221069336, + "step": 5235 + }, + { + "epoch": 7.98797607421875e-06, + "step": 5235, + "training_step_time": 0.11258387565612793 + }, + { + "epoch": 7.989501953125e-06, + "model_forward_time": 0.026203155517578125, + "step": 5236 + }, + { + "epoch": 7.989501953125e-06, + "step": 5236, + "training_step_time": 0.11353731155395508 + }, + { + "epoch": 7.99102783203125e-06, + "model_forward_time": 0.02542257308959961, + "step": 5237 + }, + { + "epoch": 7.99102783203125e-06, + "step": 5237, + "training_step_time": 0.11251950263977051 + }, + { + "epoch": 7.9925537109375e-06, + "model_forward_time": 0.025846481323242188, + "step": 5238 + }, + { + "epoch": 7.9925537109375e-06, + "step": 5238, + "training_step_time": 0.10818099975585938 + }, + { + "epoch": 7.99407958984375e-06, + "model_forward_time": 0.024787187576293945, + "step": 5239 + }, + { + "epoch": 7.99407958984375e-06, + "step": 5239, + "training_step_time": 0.11342144012451172 + }, + { + "epoch": 7.99560546875e-06, + "grad_norm": 0.3838462829589844, + "learning_rate": 9.581077821818109e-05, + "loss": 0.0776, + "step": 5240 + }, + { + "epoch": 7.99560546875e-06, + "model_forward_time": 0.025307893753051758, + "step": 5240 + }, + { + "epoch": 7.99560546875e-06, + "step": 5240, + "training_step_time": 0.11136531829833984 + }, + { + "epoch": 7.99713134765625e-06, + "model_forward_time": 0.025649309158325195, + "step": 5241 + }, + { + "epoch": 7.99713134765625e-06, + "step": 5241, + "training_step_time": 0.15327787399291992 + }, + { + "epoch": 7.9986572265625e-06, + "model_forward_time": 0.025033235549926758, + "step": 5242 + }, + { + "epoch": 7.9986572265625e-06, + "step": 5242, + "training_step_time": 0.12426638603210449 + }, + { + "epoch": 8.00018310546875e-06, + "model_forward_time": 0.024860858917236328, + "step": 5243 + }, + { + "epoch": 8.00018310546875e-06, + "step": 5243, + "training_step_time": 0.177994966506958 + }, + { + "epoch": 8.001708984375e-06, + "model_forward_time": 0.025020837783813477, + "step": 5244 + }, + { + "epoch": 8.001708984375e-06, + "step": 5244, + "training_step_time": 0.22289562225341797 + }, + { + "epoch": 8.00323486328125e-06, + "model_forward_time": 0.024318695068359375, + "step": 5245 + }, + { + "epoch": 8.00323486328125e-06, + "step": 5245, + "training_step_time": 0.20108366012573242 + }, + { + "epoch": 8.0047607421875e-06, + "model_forward_time": 0.02483081817626953, + "step": 5246 + }, + { + "epoch": 8.0047607421875e-06, + "step": 5246, + "training_step_time": 0.17573904991149902 + }, + { + "epoch": 8.00628662109375e-06, + "model_forward_time": 0.024463415145874023, + "step": 5247 + }, + { + "epoch": 8.00628662109375e-06, + "step": 5247, + "training_step_time": 0.21791696548461914 + }, + { + "epoch": 8.0078125e-06, + "model_forward_time": 0.024885892868041992, + "step": 5248 + }, + { + "epoch": 8.0078125e-06, + "step": 5248, + "training_step_time": 0.2028646469116211 + }, + { + "epoch": 8.00933837890625e-06, + "model_forward_time": 0.02528524398803711, + "step": 5249 + }, + { + "epoch": 8.00933837890625e-06, + "step": 5249, + "training_step_time": 0.1425306797027588 + }, + { + "epoch": 8.0108642578125e-06, + "grad_norm": 0.4634803235530853, + "learning_rate": 9.578866633275288e-05, + "loss": 0.0977, + "step": 5250 + }, + { + "epoch": 8.0108642578125e-06, + "model_forward_time": 0.025180339813232422, + "step": 5250 + }, + { + "epoch": 8.0108642578125e-06, + "step": 5250, + "training_step_time": 0.13477802276611328 + }, + { + "epoch": 8.01239013671875e-06, + "model_forward_time": 0.024420499801635742, + "step": 5251 + }, + { + "epoch": 8.01239013671875e-06, + "step": 5251, + "training_step_time": 0.20290207862854004 + }, + { + "epoch": 8.013916015625e-06, + "model_forward_time": 0.024542808532714844, + "step": 5252 + }, + { + "epoch": 8.013916015625e-06, + "step": 5252, + "training_step_time": 0.10658621788024902 + }, + { + "epoch": 8.01544189453125e-06, + "model_forward_time": 0.026471376419067383, + "step": 5253 + }, + { + "epoch": 8.01544189453125e-06, + "step": 5253, + "training_step_time": 0.10831499099731445 + }, + { + "epoch": 8.0169677734375e-06, + "model_forward_time": 0.025778770446777344, + "step": 5254 + }, + { + "epoch": 8.0169677734375e-06, + "step": 5254, + "training_step_time": 0.10617899894714355 + }, + { + "epoch": 8.01849365234375e-06, + "model_forward_time": 0.02537679672241211, + "step": 5255 + }, + { + "epoch": 8.01849365234375e-06, + "step": 5255, + "training_step_time": 0.10796642303466797 + }, + { + "epoch": 8.02001953125e-06, + "model_forward_time": 0.029983043670654297, + "step": 5256 + }, + { + "epoch": 8.02001953125e-06, + "step": 5256, + "training_step_time": 0.11199426651000977 + }, + { + "epoch": 8.02154541015625e-06, + "model_forward_time": 0.025526046752929688, + "step": 5257 + }, + { + "epoch": 8.02154541015625e-06, + "step": 5257, + "training_step_time": 0.10637068748474121 + }, + { + "epoch": 8.0230712890625e-06, + "model_forward_time": 0.02505326271057129, + "step": 5258 + }, + { + "epoch": 8.0230712890625e-06, + "step": 5258, + "training_step_time": 0.1094517707824707 + }, + { + "epoch": 8.02459716796875e-06, + "model_forward_time": 0.025861501693725586, + "step": 5259 + }, + { + "epoch": 8.02459716796875e-06, + "step": 5259, + "training_step_time": 0.1114358901977539 + }, + { + "epoch": 8.026123046875e-06, + "grad_norm": 0.5310544371604919, + "learning_rate": 9.576649880977748e-05, + "loss": 0.0694, + "step": 5260 + }, + { + "epoch": 8.026123046875e-06, + "model_forward_time": 0.025437355041503906, + "step": 5260 + }, + { + "epoch": 8.026123046875e-06, + "step": 5260, + "training_step_time": 0.11064004898071289 + }, + { + "epoch": 8.02764892578125e-06, + "model_forward_time": 0.026332378387451172, + "step": 5261 + }, + { + "epoch": 8.02764892578125e-06, + "step": 5261, + "training_step_time": 0.2164137363433838 + }, + { + "epoch": 8.0291748046875e-06, + "model_forward_time": 0.026065587997436523, + "step": 5262 + }, + { + "epoch": 8.0291748046875e-06, + "step": 5262, + "training_step_time": 0.13115239143371582 + }, + { + "epoch": 8.03070068359375e-06, + "model_forward_time": 0.024517297744750977, + "step": 5263 + }, + { + "epoch": 8.03070068359375e-06, + "step": 5263, + "training_step_time": 0.11920166015625 + }, + { + "epoch": 8.0322265625e-06, + "model_forward_time": 0.025888442993164062, + "step": 5264 + }, + { + "epoch": 8.0322265625e-06, + "step": 5264, + "training_step_time": 0.11392378807067871 + }, + { + "epoch": 8.03375244140625e-06, + "model_forward_time": 0.025484323501586914, + "step": 5265 + }, + { + "epoch": 8.03375244140625e-06, + "step": 5265, + "training_step_time": 0.11995553970336914 + }, + { + "epoch": 8.0352783203125e-06, + "model_forward_time": 0.025064706802368164, + "step": 5266 + }, + { + "epoch": 8.0352783203125e-06, + "step": 5266, + "training_step_time": 0.11385583877563477 + }, + { + "epoch": 8.03680419921875e-06, + "model_forward_time": 0.02621316909790039, + "step": 5267 + }, + { + "epoch": 8.03680419921875e-06, + "step": 5267, + "training_step_time": 0.11371660232543945 + }, + { + "epoch": 8.038330078125e-06, + "model_forward_time": 0.026324987411499023, + "step": 5268 + }, + { + "epoch": 8.038330078125e-06, + "step": 5268, + "training_step_time": 0.11027073860168457 + }, + { + "epoch": 8.03985595703125e-06, + "model_forward_time": 0.025281906127929688, + "step": 5269 + }, + { + "epoch": 8.03985595703125e-06, + "step": 5269, + "training_step_time": 0.11310696601867676 + }, + { + "epoch": 8.0413818359375e-06, + "grad_norm": 0.3431408107280731, + "learning_rate": 9.574427567619053e-05, + "loss": 0.0789, + "step": 5270 + }, + { + "epoch": 8.0413818359375e-06, + "model_forward_time": 0.025124073028564453, + "step": 5270 + }, + { + "epoch": 8.0413818359375e-06, + "step": 5270, + "training_step_time": 0.1110532283782959 + }, + { + "epoch": 8.04290771484375e-06, + "model_forward_time": 0.02528524398803711, + "step": 5271 + }, + { + "epoch": 8.04290771484375e-06, + "step": 5271, + "training_step_time": 0.11463022232055664 + }, + { + "epoch": 8.04443359375e-06, + "model_forward_time": 0.02519965171813965, + "step": 5272 + }, + { + "epoch": 8.04443359375e-06, + "step": 5272, + "training_step_time": 0.11262750625610352 + }, + { + "epoch": 8.04595947265625e-06, + "model_forward_time": 0.025567054748535156, + "step": 5273 + }, + { + "epoch": 8.04595947265625e-06, + "step": 5273, + "training_step_time": 0.11079263687133789 + }, + { + "epoch": 8.0474853515625e-06, + "model_forward_time": 0.02654719352722168, + "step": 5274 + }, + { + "epoch": 8.0474853515625e-06, + "step": 5274, + "training_step_time": 0.12954401969909668 + }, + { + "epoch": 8.04901123046875e-06, + "model_forward_time": 0.02549910545349121, + "step": 5275 + }, + { + "epoch": 8.04901123046875e-06, + "step": 5275, + "training_step_time": 0.12763690948486328 + }, + { + "epoch": 8.050537109375e-06, + "model_forward_time": 0.02565908432006836, + "step": 5276 + }, + { + "epoch": 8.050537109375e-06, + "step": 5276, + "training_step_time": 0.10661649703979492 + }, + { + "epoch": 8.05206298828125e-06, + "model_forward_time": 0.02559041976928711, + "step": 5277 + }, + { + "epoch": 8.05206298828125e-06, + "step": 5277, + "training_step_time": 0.11897683143615723 + }, + { + "epoch": 8.0535888671875e-06, + "model_forward_time": 0.02492809295654297, + "step": 5278 + }, + { + "epoch": 8.0535888671875e-06, + "step": 5278, + "training_step_time": 0.11183881759643555 + }, + { + "epoch": 8.05511474609375e-06, + "model_forward_time": 0.02506256103515625, + "step": 5279 + }, + { + "epoch": 8.05511474609375e-06, + "step": 5279, + "training_step_time": 0.11780691146850586 + }, + { + "epoch": 8.056640625e-06, + "grad_norm": 0.4310734272003174, + "learning_rate": 9.572199695899522e-05, + "loss": 0.0882, + "step": 5280 + }, + { + "epoch": 8.056640625e-06, + "model_forward_time": 0.026833295822143555, + "step": 5280 + }, + { + "epoch": 8.056640625e-06, + "step": 5280, + "training_step_time": 0.1896214485168457 + }, + { + "epoch": 8.05816650390625e-06, + "model_forward_time": 0.025113344192504883, + "step": 5281 + }, + { + "epoch": 8.05816650390625e-06, + "step": 5281, + "training_step_time": 0.10593342781066895 + }, + { + "epoch": 8.0596923828125e-06, + "model_forward_time": 0.024857282638549805, + "step": 5282 + }, + { + "epoch": 8.0596923828125e-06, + "step": 5282, + "training_step_time": 0.1719036102294922 + }, + { + "epoch": 8.06121826171875e-06, + "model_forward_time": 0.025017738342285156, + "step": 5283 + }, + { + "epoch": 8.06121826171875e-06, + "step": 5283, + "training_step_time": 0.16849613189697266 + }, + { + "epoch": 8.062744140625e-06, + "model_forward_time": 0.024463653564453125, + "step": 5284 + }, + { + "epoch": 8.062744140625e-06, + "step": 5284, + "training_step_time": 0.10752320289611816 + }, + { + "epoch": 8.06427001953125e-06, + "model_forward_time": 0.02465677261352539, + "step": 5285 + }, + { + "epoch": 8.06427001953125e-06, + "step": 5285, + "training_step_time": 0.11778140068054199 + }, + { + "epoch": 8.0657958984375e-06, + "model_forward_time": 0.025216341018676758, + "step": 5286 + }, + { + "epoch": 8.0657958984375e-06, + "step": 5286, + "training_step_time": 0.11623454093933105 + }, + { + "epoch": 8.06732177734375e-06, + "model_forward_time": 0.025198698043823242, + "step": 5287 + }, + { + "epoch": 8.06732177734375e-06, + "step": 5287, + "training_step_time": 0.11030411720275879 + }, + { + "epoch": 8.06884765625e-06, + "model_forward_time": 0.02516913414001465, + "step": 5288 + }, + { + "epoch": 8.06884765625e-06, + "step": 5288, + "training_step_time": 0.19890332221984863 + }, + { + "epoch": 8.07037353515625e-06, + "model_forward_time": 0.024277687072753906, + "step": 5289 + }, + { + "epoch": 8.07037353515625e-06, + "step": 5289, + "training_step_time": 0.11043524742126465 + }, + { + "epoch": 8.0718994140625e-06, + "grad_norm": 0.43320193886756897, + "learning_rate": 9.569966268526232e-05, + "loss": 0.0858, + "step": 5290 + }, + { + "epoch": 8.0718994140625e-06, + "model_forward_time": 0.023698091506958008, + "step": 5290 + }, + { + "epoch": 8.0718994140625e-06, + "step": 5290, + "training_step_time": 0.10822796821594238 + }, + { + "epoch": 8.07342529296875e-06, + "model_forward_time": 0.024959564208984375, + "step": 5291 + }, + { + "epoch": 8.07342529296875e-06, + "step": 5291, + "training_step_time": 0.1333456039428711 + }, + { + "epoch": 8.074951171875e-06, + "model_forward_time": 0.025366783142089844, + "step": 5292 + }, + { + "epoch": 8.074951171875e-06, + "step": 5292, + "training_step_time": 0.11459040641784668 + }, + { + "epoch": 8.07647705078125e-06, + "model_forward_time": 0.024825572967529297, + "step": 5293 + }, + { + "epoch": 8.07647705078125e-06, + "step": 5293, + "training_step_time": 0.11894011497497559 + }, + { + "epoch": 8.0780029296875e-06, + "model_forward_time": 0.025238513946533203, + "step": 5294 + }, + { + "epoch": 8.0780029296875e-06, + "step": 5294, + "training_step_time": 0.12187957763671875 + }, + { + "epoch": 8.07952880859375e-06, + "model_forward_time": 0.025052547454833984, + "step": 5295 + }, + { + "epoch": 8.07952880859375e-06, + "step": 5295, + "training_step_time": 0.11922979354858398 + }, + { + "epoch": 8.0810546875e-06, + "model_forward_time": 0.025362491607666016, + "step": 5296 + }, + { + "epoch": 8.0810546875e-06, + "step": 5296, + "training_step_time": 0.1137394905090332 + }, + { + "epoch": 8.08258056640625e-06, + "model_forward_time": 0.024750471115112305, + "step": 5297 + }, + { + "epoch": 8.08258056640625e-06, + "step": 5297, + "training_step_time": 0.1154181957244873 + }, + { + "epoch": 8.0841064453125e-06, + "model_forward_time": 0.025093793869018555, + "step": 5298 + }, + { + "epoch": 8.0841064453125e-06, + "step": 5298, + "training_step_time": 0.11117410659790039 + }, + { + "epoch": 8.08563232421875e-06, + "model_forward_time": 0.02474522590637207, + "step": 5299 + }, + { + "epoch": 8.08563232421875e-06, + "step": 5299, + "training_step_time": 0.1108696460723877 + }, + { + "epoch": 8.087158203125e-06, + "grad_norm": 0.3055928647518158, + "learning_rate": 9.567727288213005e-05, + "loss": 0.0681, + "step": 5300 + }, + { + "epoch": 8.087158203125e-06, + "model_forward_time": 0.025014638900756836, + "step": 5300 + }, + { + "epoch": 8.087158203125e-06, + "step": 5300, + "training_step_time": 0.11122751235961914 + }, + { + "epoch": 8.08868408203125e-06, + "model_forward_time": 0.02488851547241211, + "step": 5301 + }, + { + "epoch": 8.08868408203125e-06, + "step": 5301, + "training_step_time": 0.11302781105041504 + }, + { + "epoch": 8.0902099609375e-06, + "model_forward_time": 0.026043415069580078, + "step": 5302 + }, + { + "epoch": 8.0902099609375e-06, + "step": 5302, + "training_step_time": 0.10748934745788574 + }, + { + "epoch": 8.09173583984375e-06, + "model_forward_time": 0.025462627410888672, + "step": 5303 + }, + { + "epoch": 8.09173583984375e-06, + "step": 5303, + "training_step_time": 0.10917472839355469 + }, + { + "epoch": 8.09326171875e-06, + "model_forward_time": 0.025010347366333008, + "step": 5304 + }, + { + "epoch": 8.09326171875e-06, + "step": 5304, + "training_step_time": 0.10982155799865723 + }, + { + "epoch": 8.09478759765625e-06, + "model_forward_time": 0.025212764739990234, + "step": 5305 + }, + { + "epoch": 8.09478759765625e-06, + "step": 5305, + "training_step_time": 0.1089167594909668 + }, + { + "epoch": 8.0963134765625e-06, + "model_forward_time": 0.025351524353027344, + "step": 5306 + }, + { + "epoch": 8.0963134765625e-06, + "step": 5306, + "training_step_time": 0.11288189888000488 + }, + { + "epoch": 8.09783935546875e-06, + "model_forward_time": 0.024886369705200195, + "step": 5307 + }, + { + "epoch": 8.09783935546875e-06, + "step": 5307, + "training_step_time": 0.224470853805542 + }, + { + "epoch": 8.099365234375e-06, + "model_forward_time": 0.024619579315185547, + "step": 5308 + }, + { + "epoch": 8.099365234375e-06, + "step": 5308, + "training_step_time": 0.12902545928955078 + }, + { + "epoch": 8.10089111328125e-06, + "model_forward_time": 0.025023698806762695, + "step": 5309 + }, + { + "epoch": 8.10089111328125e-06, + "step": 5309, + "training_step_time": 0.12845373153686523 + }, + { + "epoch": 8.1024169921875e-06, + "grad_norm": 0.5125011205673218, + "learning_rate": 9.565482757680415e-05, + "loss": 0.0722, + "step": 5310 + }, + { + "epoch": 8.1024169921875e-06, + "model_forward_time": 0.02520895004272461, + "step": 5310 + }, + { + "epoch": 8.1024169921875e-06, + "step": 5310, + "training_step_time": 0.12505292892456055 + }, + { + "epoch": 8.10394287109375e-06, + "model_forward_time": 0.02446269989013672, + "step": 5311 + }, + { + "epoch": 8.10394287109375e-06, + "step": 5311, + "training_step_time": 0.11622929573059082 + }, + { + "epoch": 8.10546875e-06, + "model_forward_time": 0.02521371841430664, + "step": 5312 + }, + { + "epoch": 8.10546875e-06, + "step": 5312, + "training_step_time": 0.11935734748840332 + }, + { + "epoch": 8.10699462890625e-06, + "model_forward_time": 0.025005102157592773, + "step": 5313 + }, + { + "epoch": 8.10699462890625e-06, + "step": 5313, + "training_step_time": 0.11413335800170898 + }, + { + "epoch": 8.1085205078125e-06, + "model_forward_time": 0.025180816650390625, + "step": 5314 + }, + { + "epoch": 8.1085205078125e-06, + "step": 5314, + "training_step_time": 0.11236166954040527 + }, + { + "epoch": 8.11004638671875e-06, + "model_forward_time": 0.02509593963623047, + "step": 5315 + }, + { + "epoch": 8.11004638671875e-06, + "step": 5315, + "training_step_time": 0.11250638961791992 + }, + { + "epoch": 8.111572265625e-06, + "model_forward_time": 0.02527141571044922, + "step": 5316 + }, + { + "epoch": 8.111572265625e-06, + "step": 5316, + "training_step_time": 0.11560392379760742 + }, + { + "epoch": 8.11309814453125e-06, + "model_forward_time": 0.025257587432861328, + "step": 5317 + }, + { + "epoch": 8.11309814453125e-06, + "step": 5317, + "training_step_time": 0.1089942455291748 + }, + { + "epoch": 8.1146240234375e-06, + "model_forward_time": 0.0250852108001709, + "step": 5318 + }, + { + "epoch": 8.1146240234375e-06, + "step": 5318, + "training_step_time": 0.11086845397949219 + }, + { + "epoch": 8.11614990234375e-06, + "model_forward_time": 0.025179386138916016, + "step": 5319 + }, + { + "epoch": 8.11614990234375e-06, + "step": 5319, + "training_step_time": 0.10838532447814941 + }, + { + "epoch": 8.11767578125e-06, + "grad_norm": 0.49064013361930847, + "learning_rate": 9.563232679655776e-05, + "loss": 0.0706, + "step": 5320 + }, + { + "epoch": 8.11767578125e-06, + "model_forward_time": 0.024863004684448242, + "step": 5320 + }, + { + "epoch": 8.11767578125e-06, + "step": 5320, + "training_step_time": 0.1320352554321289 + }, + { + "epoch": 8.11920166015625e-06, + "model_forward_time": 0.02538919448852539, + "step": 5321 + }, + { + "epoch": 8.11920166015625e-06, + "step": 5321, + "training_step_time": 0.12091398239135742 + }, + { + "epoch": 8.1207275390625e-06, + "model_forward_time": 0.025599956512451172, + "step": 5322 + }, + { + "epoch": 8.1207275390625e-06, + "step": 5322, + "training_step_time": 0.12876605987548828 + }, + { + "epoch": 8.12225341796875e-06, + "model_forward_time": 0.025117158889770508, + "step": 5323 + }, + { + "epoch": 8.12225341796875e-06, + "step": 5323, + "training_step_time": 0.11061525344848633 + }, + { + "epoch": 8.123779296875e-06, + "model_forward_time": 0.02523040771484375, + "step": 5324 + }, + { + "epoch": 8.123779296875e-06, + "step": 5324, + "training_step_time": 0.12949824333190918 + }, + { + "epoch": 8.12530517578125e-06, + "model_forward_time": 0.024835586547851562, + "step": 5325 + }, + { + "epoch": 8.12530517578125e-06, + "step": 5325, + "training_step_time": 0.2041008472442627 + }, + { + "epoch": 8.1268310546875e-06, + "model_forward_time": 0.024351119995117188, + "step": 5326 + }, + { + "epoch": 8.1268310546875e-06, + "step": 5326, + "training_step_time": 0.11992979049682617 + }, + { + "epoch": 8.12835693359375e-06, + "model_forward_time": 0.024280309677124023, + "step": 5327 + }, + { + "epoch": 8.12835693359375e-06, + "step": 5327, + "training_step_time": 0.1040639877319336 + }, + { + "epoch": 8.1298828125e-06, + "model_forward_time": 0.025501728057861328, + "step": 5328 + }, + { + "epoch": 8.1298828125e-06, + "step": 5328, + "training_step_time": 0.1516432762145996 + }, + { + "epoch": 8.13140869140625e-06, + "model_forward_time": 0.02527022361755371, + "step": 5329 + }, + { + "epoch": 8.13140869140625e-06, + "step": 5329, + "training_step_time": 0.11720442771911621 + }, + { + "epoch": 8.1329345703125e-06, + "grad_norm": 0.43517252802848816, + "learning_rate": 9.560977056873149e-05, + "loss": 0.0971, + "step": 5330 + }, + { + "epoch": 8.1329345703125e-06, + "model_forward_time": 0.024717092514038086, + "step": 5330 + }, + { + "epoch": 8.1329345703125e-06, + "step": 5330, + "training_step_time": 0.1860368251800537 + }, + { + "epoch": 8.13446044921875e-06, + "model_forward_time": 0.02491474151611328, + "step": 5331 + }, + { + "epoch": 8.13446044921875e-06, + "step": 5331, + "training_step_time": 0.17110538482666016 + }, + { + "epoch": 8.135986328125e-06, + "model_forward_time": 0.024711132049560547, + "step": 5332 + }, + { + "epoch": 8.135986328125e-06, + "step": 5332, + "training_step_time": 0.17906785011291504 + }, + { + "epoch": 8.13751220703125e-06, + "model_forward_time": 0.02483224868774414, + "step": 5333 + }, + { + "epoch": 8.13751220703125e-06, + "step": 5333, + "training_step_time": 0.1056206226348877 + }, + { + "epoch": 8.1390380859375e-06, + "model_forward_time": 0.02440476417541504, + "step": 5334 + }, + { + "epoch": 8.1390380859375e-06, + "step": 5334, + "training_step_time": 0.10475826263427734 + }, + { + "epoch": 8.14056396484375e-06, + "model_forward_time": 0.02509284019470215, + "step": 5335 + }, + { + "epoch": 8.14056396484375e-06, + "step": 5335, + "training_step_time": 0.10658097267150879 + }, + { + "epoch": 8.14208984375e-06, + "model_forward_time": 0.025140047073364258, + "step": 5336 + }, + { + "epoch": 8.14208984375e-06, + "step": 5336, + "training_step_time": 0.17899727821350098 + }, + { + "epoch": 8.14361572265625e-06, + "model_forward_time": 0.024337053298950195, + "step": 5337 + }, + { + "epoch": 8.14361572265625e-06, + "step": 5337, + "training_step_time": 0.14462876319885254 + }, + { + "epoch": 8.1451416015625e-06, + "model_forward_time": 0.02455282211303711, + "step": 5338 + }, + { + "epoch": 8.1451416015625e-06, + "step": 5338, + "training_step_time": 0.11198210716247559 + }, + { + "epoch": 8.14666748046875e-06, + "model_forward_time": 0.024847030639648438, + "step": 5339 + }, + { + "epoch": 8.14666748046875e-06, + "step": 5339, + "training_step_time": 0.12465620040893555 + }, + { + "epoch": 8.148193359375e-06, + "grad_norm": 0.5305927991867065, + "learning_rate": 9.558715892073323e-05, + "loss": 0.0793, + "step": 5340 + }, + { + "epoch": 8.148193359375e-06, + "model_forward_time": 0.025543689727783203, + "step": 5340 + }, + { + "epoch": 8.148193359375e-06, + "step": 5340, + "training_step_time": 0.12249422073364258 + }, + { + "epoch": 8.14971923828125e-06, + "model_forward_time": 0.024969100952148438, + "step": 5341 + }, + { + "epoch": 8.14971923828125e-06, + "step": 5341, + "training_step_time": 0.11046671867370605 + }, + { + "epoch": 8.1512451171875e-06, + "model_forward_time": 0.026295900344848633, + "step": 5342 + }, + { + "epoch": 8.1512451171875e-06, + "step": 5342, + "training_step_time": 0.12152767181396484 + }, + { + "epoch": 8.15277099609375e-06, + "model_forward_time": 0.025275468826293945, + "step": 5343 + }, + { + "epoch": 8.15277099609375e-06, + "step": 5343, + "training_step_time": 0.11175131797790527 + }, + { + "epoch": 8.154296875e-06, + "model_forward_time": 0.026798248291015625, + "step": 5344 + }, + { + "epoch": 8.154296875e-06, + "step": 5344, + "training_step_time": 0.10860991477966309 + }, + { + "epoch": 8.15582275390625e-06, + "model_forward_time": 0.028039932250976562, + "step": 5345 + }, + { + "epoch": 8.15582275390625e-06, + "step": 5345, + "training_step_time": 0.1104886531829834 + }, + { + "epoch": 8.1573486328125e-06, + "model_forward_time": 0.025432109832763672, + "step": 5346 + }, + { + "epoch": 8.1573486328125e-06, + "step": 5346, + "training_step_time": 0.11380195617675781 + }, + { + "epoch": 8.15887451171875e-06, + "model_forward_time": 0.024781465530395508, + "step": 5347 + }, + { + "epoch": 8.15887451171875e-06, + "step": 5347, + "training_step_time": 0.12786364555358887 + }, + { + "epoch": 8.160400390625e-06, + "model_forward_time": 0.025107383728027344, + "step": 5348 + }, + { + "epoch": 8.160400390625e-06, + "step": 5348, + "training_step_time": 0.17631149291992188 + }, + { + "epoch": 8.16192626953125e-06, + "model_forward_time": 0.025258541107177734, + "step": 5349 + }, + { + "epoch": 8.16192626953125e-06, + "step": 5349, + "training_step_time": 0.14897370338439941 + }, + { + "epoch": 8.1634521484375e-06, + "grad_norm": 0.8434357643127441, + "learning_rate": 9.556449188003831e-05, + "loss": 0.0726, + "step": 5350 + }, + { + "epoch": 8.1634521484375e-06, + "model_forward_time": 0.024253368377685547, + "step": 5350 + }, + { + "epoch": 8.1634521484375e-06, + "step": 5350, + "training_step_time": 0.21562600135803223 + }, + { + "epoch": 8.16497802734375e-06, + "model_forward_time": 0.024890899658203125, + "step": 5351 + }, + { + "epoch": 8.16497802734375e-06, + "step": 5351, + "training_step_time": 0.12376523017883301 + }, + { + "epoch": 8.16650390625e-06, + "model_forward_time": 0.02453136444091797, + "step": 5352 + }, + { + "epoch": 8.16650390625e-06, + "step": 5352, + "training_step_time": 0.11982059478759766 + }, + { + "epoch": 8.16802978515625e-06, + "model_forward_time": 0.024935245513916016, + "step": 5353 + }, + { + "epoch": 8.16802978515625e-06, + "step": 5353, + "training_step_time": 0.11817359924316406 + }, + { + "epoch": 8.1695556640625e-06, + "model_forward_time": 0.025550127029418945, + "step": 5354 + }, + { + "epoch": 8.1695556640625e-06, + "step": 5354, + "training_step_time": 0.11632728576660156 + }, + { + "epoch": 8.17108154296875e-06, + "model_forward_time": 0.02511143684387207, + "step": 5355 + }, + { + "epoch": 8.17108154296875e-06, + "step": 5355, + "training_step_time": 0.11466741561889648 + }, + { + "epoch": 8.172607421875e-06, + "model_forward_time": 0.02514052391052246, + "step": 5356 + }, + { + "epoch": 8.172607421875e-06, + "step": 5356, + "training_step_time": 0.11188411712646484 + }, + { + "epoch": 8.17413330078125e-06, + "model_forward_time": 0.024961233139038086, + "step": 5357 + }, + { + "epoch": 8.17413330078125e-06, + "step": 5357, + "training_step_time": 0.11230134963989258 + }, + { + "epoch": 8.1756591796875e-06, + "model_forward_time": 0.025269269943237305, + "step": 5358 + }, + { + "epoch": 8.1756591796875e-06, + "step": 5358, + "training_step_time": 0.1098630428314209 + }, + { + "epoch": 8.17718505859375e-06, + "model_forward_time": 0.025458574295043945, + "step": 5359 + }, + { + "epoch": 8.17718505859375e-06, + "step": 5359, + "training_step_time": 0.11239409446716309 + }, + { + "epoch": 8.1787109375e-06, + "grad_norm": 0.4748266637325287, + "learning_rate": 9.554176947418931e-05, + "loss": 0.0804, + "step": 5360 + }, + { + "epoch": 8.1787109375e-06, + "model_forward_time": 0.025731801986694336, + "step": 5360 + }, + { + "epoch": 8.1787109375e-06, + "step": 5360, + "training_step_time": 0.11655783653259277 + }, + { + "epoch": 8.18023681640625e-06, + "model_forward_time": 0.025305986404418945, + "step": 5361 + }, + { + "epoch": 8.18023681640625e-06, + "step": 5361, + "training_step_time": 0.11531305313110352 + }, + { + "epoch": 8.1817626953125e-06, + "model_forward_time": 0.02523660659790039, + "step": 5362 + }, + { + "epoch": 8.1817626953125e-06, + "step": 5362, + "training_step_time": 0.11222195625305176 + }, + { + "epoch": 8.18328857421875e-06, + "model_forward_time": 0.02534031867980957, + "step": 5363 + }, + { + "epoch": 8.18328857421875e-06, + "step": 5363, + "training_step_time": 0.22997641563415527 + }, + { + "epoch": 8.184814453125e-06, + "model_forward_time": 0.024592161178588867, + "step": 5364 + }, + { + "epoch": 8.184814453125e-06, + "step": 5364, + "training_step_time": 0.10897541046142578 + }, + { + "epoch": 8.18634033203125e-06, + "model_forward_time": 0.024296283721923828, + "step": 5365 + }, + { + "epoch": 8.18634033203125e-06, + "step": 5365, + "training_step_time": 0.14478683471679688 + }, + { + "epoch": 8.1878662109375e-06, + "model_forward_time": 0.025159597396850586, + "step": 5366 + }, + { + "epoch": 8.1878662109375e-06, + "step": 5366, + "training_step_time": 0.16909313201904297 + }, + { + "epoch": 8.18939208984375e-06, + "model_forward_time": 0.024129390716552734, + "step": 5367 + }, + { + "epoch": 8.18939208984375e-06, + "step": 5367, + "training_step_time": 0.17233514785766602 + }, + { + "epoch": 8.19091796875e-06, + "model_forward_time": 0.024161577224731445, + "step": 5368 + }, + { + "epoch": 8.19091796875e-06, + "step": 5368, + "training_step_time": 0.17670559883117676 + }, + { + "epoch": 8.19244384765625e-06, + "model_forward_time": 0.024163007736206055, + "step": 5369 + }, + { + "epoch": 8.19244384765625e-06, + "step": 5369, + "training_step_time": 0.11319446563720703 + }, + { + "epoch": 8.1939697265625e-06, + "grad_norm": 0.3221302330493927, + "learning_rate": 9.551899173079607e-05, + "loss": 0.0664, + "step": 5370 + }, + { + "epoch": 8.1939697265625e-06, + "model_forward_time": 0.024658203125, + "step": 5370 + }, + { + "epoch": 8.1939697265625e-06, + "step": 5370, + "training_step_time": 0.13960003852844238 + }, + { + "epoch": 8.19549560546875e-06, + "model_forward_time": 0.025228023529052734, + "step": 5371 + }, + { + "epoch": 8.19549560546875e-06, + "step": 5371, + "training_step_time": 0.10997891426086426 + }, + { + "epoch": 8.197021484375e-06, + "model_forward_time": 0.025026321411132812, + "step": 5372 + }, + { + "epoch": 8.197021484375e-06, + "step": 5372, + "training_step_time": 0.12163496017456055 + }, + { + "epoch": 8.19854736328125e-06, + "model_forward_time": 0.024892330169677734, + "step": 5373 + }, + { + "epoch": 8.19854736328125e-06, + "step": 5373, + "training_step_time": 0.1429598331451416 + }, + { + "epoch": 8.2000732421875e-06, + "model_forward_time": 0.02488541603088379, + "step": 5374 + }, + { + "epoch": 8.2000732421875e-06, + "step": 5374, + "training_step_time": 0.17635488510131836 + }, + { + "epoch": 8.20159912109375e-06, + "model_forward_time": 0.02484607696533203, + "step": 5375 + }, + { + "epoch": 8.20159912109375e-06, + "step": 5375, + "training_step_time": 0.14194035530090332 + }, + { + "epoch": 8.203125e-06, + "model_forward_time": 0.023650169372558594, + "step": 5376 + }, + { + "epoch": 8.203125e-06, + "step": 5376, + "training_step_time": 0.12967753410339355 + }, + { + "epoch": 8.20465087890625e-06, + "model_forward_time": 0.023450136184692383, + "step": 5377 + }, + { + "epoch": 8.20465087890625e-06, + "step": 5377, + "training_step_time": 0.12477755546569824 + }, + { + "epoch": 8.2061767578125e-06, + "model_forward_time": 0.023659944534301758, + "step": 5378 + }, + { + "epoch": 8.2061767578125e-06, + "step": 5378, + "training_step_time": 0.1084284782409668 + }, + { + "epoch": 8.20770263671875e-06, + "model_forward_time": 0.024930715560913086, + "step": 5379 + }, + { + "epoch": 8.20770263671875e-06, + "step": 5379, + "training_step_time": 0.18893170356750488 + }, + { + "epoch": 8.209228515625e-06, + "grad_norm": 0.34595009684562683, + "learning_rate": 9.549615867753573e-05, + "loss": 0.0756, + "step": 5380 + }, + { + "epoch": 8.209228515625e-06, + "model_forward_time": 0.02407526969909668, + "step": 5380 + }, + { + "epoch": 8.209228515625e-06, + "step": 5380, + "training_step_time": 0.11600518226623535 + }, + { + "epoch": 8.21075439453125e-06, + "model_forward_time": 0.02679133415222168, + "step": 5381 + }, + { + "epoch": 8.21075439453125e-06, + "step": 5381, + "training_step_time": 0.15187382698059082 + }, + { + "epoch": 8.2122802734375e-06, + "model_forward_time": 0.02459883689880371, + "step": 5382 + }, + { + "epoch": 8.2122802734375e-06, + "step": 5382, + "training_step_time": 0.206329345703125 + }, + { + "epoch": 8.21380615234375e-06, + "model_forward_time": 0.02478647232055664, + "step": 5383 + }, + { + "epoch": 8.21380615234375e-06, + "step": 5383, + "training_step_time": 0.10579752922058105 + }, + { + "epoch": 8.21533203125e-06, + "model_forward_time": 0.02439594268798828, + "step": 5384 + }, + { + "epoch": 8.21533203125e-06, + "step": 5384, + "training_step_time": 0.12673449516296387 + }, + { + "epoch": 8.21685791015625e-06, + "model_forward_time": 0.025148391723632812, + "step": 5385 + }, + { + "epoch": 8.21685791015625e-06, + "step": 5385, + "training_step_time": 0.10904955863952637 + }, + { + "epoch": 8.2183837890625e-06, + "model_forward_time": 0.02585768699645996, + "step": 5386 + }, + { + "epoch": 8.2183837890625e-06, + "step": 5386, + "training_step_time": 0.11176323890686035 + }, + { + "epoch": 8.21990966796875e-06, + "model_forward_time": 0.024792909622192383, + "step": 5387 + }, + { + "epoch": 8.21990966796875e-06, + "step": 5387, + "training_step_time": 0.11025881767272949 + }, + { + "epoch": 8.221435546875e-06, + "model_forward_time": 0.02603769302368164, + "step": 5388 + }, + { + "epoch": 8.221435546875e-06, + "step": 5388, + "training_step_time": 0.10681033134460449 + }, + { + "epoch": 8.22296142578125e-06, + "model_forward_time": 0.025087356567382812, + "step": 5389 + }, + { + "epoch": 8.22296142578125e-06, + "step": 5389, + "training_step_time": 0.10564279556274414 + }, + { + "epoch": 8.2244873046875e-06, + "grad_norm": 0.4957561194896698, + "learning_rate": 9.54732703421526e-05, + "loss": 0.0693, + "step": 5390 + }, + { + "epoch": 8.2244873046875e-06, + "model_forward_time": 0.025244474411010742, + "step": 5390 + }, + { + "epoch": 8.2244873046875e-06, + "step": 5390, + "training_step_time": 0.10712337493896484 + }, + { + "epoch": 8.22601318359375e-06, + "model_forward_time": 0.02538919448852539, + "step": 5391 + }, + { + "epoch": 8.22601318359375e-06, + "step": 5391, + "training_step_time": 0.11452078819274902 + }, + { + "epoch": 8.2275390625e-06, + "model_forward_time": 0.025253772735595703, + "step": 5392 + }, + { + "epoch": 8.2275390625e-06, + "step": 5392, + "training_step_time": 0.11937642097473145 + }, + { + "epoch": 8.22906494140625e-06, + "model_forward_time": 0.02517557144165039, + "step": 5393 + }, + { + "epoch": 8.22906494140625e-06, + "step": 5393, + "training_step_time": 0.25300049781799316 + }, + { + "epoch": 8.2305908203125e-06, + "model_forward_time": 0.0240328311920166, + "step": 5394 + }, + { + "epoch": 8.2305908203125e-06, + "step": 5394, + "training_step_time": 0.2064368724822998 + }, + { + "epoch": 8.23211669921875e-06, + "model_forward_time": 0.023857593536376953, + "step": 5395 + }, + { + "epoch": 8.23211669921875e-06, + "step": 5395, + "training_step_time": 0.19422578811645508 + }, + { + "epoch": 8.233642578125e-06, + "model_forward_time": 0.02392411231994629, + "step": 5396 + }, + { + "epoch": 8.233642578125e-06, + "step": 5396, + "training_step_time": 0.1833946704864502 + }, + { + "epoch": 8.23516845703125e-06, + "model_forward_time": 0.02422046661376953, + "step": 5397 + }, + { + "epoch": 8.23516845703125e-06, + "step": 5397, + "training_step_time": 0.17303013801574707 + }, + { + "epoch": 8.2366943359375e-06, + "model_forward_time": 0.02458477020263672, + "step": 5398 + }, + { + "epoch": 8.2366943359375e-06, + "step": 5398, + "training_step_time": 0.16502642631530762 + }, + { + "epoch": 8.23822021484375e-06, + "model_forward_time": 0.02414679527282715, + "step": 5399 + }, + { + "epoch": 8.23822021484375e-06, + "step": 5399, + "training_step_time": 0.10382938385009766 + }, + { + "epoch": 8.23974609375e-06, + "grad_norm": 0.2264564335346222, + "learning_rate": 9.545032675245813e-05, + "loss": 0.0814, + "step": 5400 + }, + { + "epoch": 8.23974609375e-06, + "model_forward_time": 0.024617433547973633, + "step": 5400 + }, + { + "epoch": 8.23974609375e-06, + "step": 5400, + "training_step_time": 0.10681724548339844 + }, + { + "epoch": 8.24127197265625e-06, + "model_forward_time": 0.025135040283203125, + "step": 5401 + }, + { + "epoch": 8.24127197265625e-06, + "step": 5401, + "training_step_time": 0.11032557487487793 + }, + { + "epoch": 8.2427978515625e-06, + "model_forward_time": 0.025371789932250977, + "step": 5402 + }, + { + "epoch": 8.2427978515625e-06, + "step": 5402, + "training_step_time": 0.10884857177734375 + }, + { + "epoch": 8.24432373046875e-06, + "model_forward_time": 0.0252377986907959, + "step": 5403 + }, + { + "epoch": 8.24432373046875e-06, + "step": 5403, + "training_step_time": 0.1079401969909668 + }, + { + "epoch": 8.245849609375e-06, + "model_forward_time": 0.02488398551940918, + "step": 5404 + }, + { + "epoch": 8.245849609375e-06, + "step": 5404, + "training_step_time": 0.15450048446655273 + }, + { + "epoch": 8.24737548828125e-06, + "model_forward_time": 0.024698495864868164, + "step": 5405 + }, + { + "epoch": 8.24737548828125e-06, + "step": 5405, + "training_step_time": 0.11373424530029297 + }, + { + "epoch": 8.2489013671875e-06, + "model_forward_time": 0.02431511878967285, + "step": 5406 + }, + { + "epoch": 8.2489013671875e-06, + "step": 5406, + "training_step_time": 0.12069821357727051 + }, + { + "epoch": 8.25042724609375e-06, + "model_forward_time": 0.02542257308959961, + "step": 5407 + }, + { + "epoch": 8.25042724609375e-06, + "step": 5407, + "training_step_time": 0.1241157054901123 + }, + { + "epoch": 8.251953125e-06, + "model_forward_time": 0.026100873947143555, + "step": 5408 + }, + { + "epoch": 8.251953125e-06, + "step": 5408, + "training_step_time": 0.16297125816345215 + }, + { + "epoch": 8.25347900390625e-06, + "model_forward_time": 0.02475762367248535, + "step": 5409 + }, + { + "epoch": 8.25347900390625e-06, + "step": 5409, + "training_step_time": 0.13051819801330566 + }, + { + "epoch": 8.2550048828125e-06, + "grad_norm": 0.3755182921886444, + "learning_rate": 9.542732793633098e-05, + "loss": 0.074, + "step": 5410 + }, + { + "epoch": 8.2550048828125e-06, + "model_forward_time": 0.024434328079223633, + "step": 5410 + }, + { + "epoch": 8.2550048828125e-06, + "step": 5410, + "training_step_time": 0.19635677337646484 + }, + { + "epoch": 8.25653076171875e-06, + "model_forward_time": 0.024559736251831055, + "step": 5411 + }, + { + "epoch": 8.25653076171875e-06, + "step": 5411, + "training_step_time": 0.14107203483581543 + }, + { + "epoch": 8.258056640625e-06, + "model_forward_time": 0.024749279022216797, + "step": 5412 + }, + { + "epoch": 8.258056640625e-06, + "step": 5412, + "training_step_time": 0.10939240455627441 + }, + { + "epoch": 8.25958251953125e-06, + "model_forward_time": 0.02518486976623535, + "step": 5413 + }, + { + "epoch": 8.25958251953125e-06, + "step": 5413, + "training_step_time": 0.11303281784057617 + }, + { + "epoch": 8.2611083984375e-06, + "model_forward_time": 0.025122880935668945, + "step": 5414 + }, + { + "epoch": 8.2611083984375e-06, + "step": 5414, + "training_step_time": 0.11872005462646484 + }, + { + "epoch": 8.26263427734375e-06, + "model_forward_time": 0.02526712417602539, + "step": 5415 + }, + { + "epoch": 8.26263427734375e-06, + "step": 5415, + "training_step_time": 0.10767006874084473 + }, + { + "epoch": 8.26416015625e-06, + "model_forward_time": 0.02498149871826172, + "step": 5416 + }, + { + "epoch": 8.26416015625e-06, + "step": 5416, + "training_step_time": 0.19707155227661133 + }, + { + "epoch": 8.26568603515625e-06, + "model_forward_time": 0.023816347122192383, + "step": 5417 + }, + { + "epoch": 8.26568603515625e-06, + "step": 5417, + "training_step_time": 0.10553479194641113 + }, + { + "epoch": 8.2672119140625e-06, + "model_forward_time": 0.024667024612426758, + "step": 5418 + }, + { + "epoch": 8.2672119140625e-06, + "step": 5418, + "training_step_time": 0.10673952102661133 + }, + { + "epoch": 8.26873779296875e-06, + "model_forward_time": 0.025097370147705078, + "step": 5419 + }, + { + "epoch": 8.26873779296875e-06, + "step": 5419, + "training_step_time": 0.11037850379943848 + }, + { + "epoch": 8.270263671875e-06, + "grad_norm": 0.3703526258468628, + "learning_rate": 9.540427392171688e-05, + "loss": 0.0823, + "step": 5420 + }, + { + "epoch": 8.270263671875e-06, + "model_forward_time": 0.025620222091674805, + "step": 5420 + }, + { + "epoch": 8.270263671875e-06, + "step": 5420, + "training_step_time": 0.11850118637084961 + }, + { + "epoch": 8.27178955078125e-06, + "model_forward_time": 0.025363683700561523, + "step": 5421 + }, + { + "epoch": 8.27178955078125e-06, + "step": 5421, + "training_step_time": 0.11427664756774902 + }, + { + "epoch": 8.2733154296875e-06, + "model_forward_time": 0.02570056915283203, + "step": 5422 + }, + { + "epoch": 8.2733154296875e-06, + "step": 5422, + "training_step_time": 0.11615395545959473 + }, + { + "epoch": 8.27484130859375e-06, + "model_forward_time": 0.02510213851928711, + "step": 5423 + }, + { + "epoch": 8.27484130859375e-06, + "step": 5423, + "training_step_time": 0.11487960815429688 + }, + { + "epoch": 8.2763671875e-06, + "model_forward_time": 0.024814844131469727, + "step": 5424 + }, + { + "epoch": 8.2763671875e-06, + "step": 5424, + "training_step_time": 0.2116868495941162 + }, + { + "epoch": 8.27789306640625e-06, + "model_forward_time": 0.023987293243408203, + "step": 5425 + }, + { + "epoch": 8.27789306640625e-06, + "step": 5425, + "training_step_time": 0.13030052185058594 + }, + { + "epoch": 8.2794189453125e-06, + "model_forward_time": 0.023725509643554688, + "step": 5426 + }, + { + "epoch": 8.2794189453125e-06, + "step": 5426, + "training_step_time": 0.12003946304321289 + }, + { + "epoch": 8.28094482421875e-06, + "model_forward_time": 0.024985313415527344, + "step": 5427 + }, + { + "epoch": 8.28094482421875e-06, + "step": 5427, + "training_step_time": 0.11780261993408203 + }, + { + "epoch": 8.282470703125e-06, + "model_forward_time": 0.02527785301208496, + "step": 5428 + }, + { + "epoch": 8.282470703125e-06, + "step": 5428, + "training_step_time": 0.10997509956359863 + }, + { + "epoch": 8.28399658203125e-06, + "model_forward_time": 0.025172948837280273, + "step": 5429 + }, + { + "epoch": 8.28399658203125e-06, + "step": 5429, + "training_step_time": 0.11288118362426758 + }, + { + "epoch": 8.2855224609375e-06, + "grad_norm": 0.38096684217453003, + "learning_rate": 9.538116473662861e-05, + "loss": 0.0823, + "step": 5430 + }, + { + "epoch": 8.2855224609375e-06, + "model_forward_time": 0.02511882781982422, + "step": 5430 + }, + { + "epoch": 8.2855224609375e-06, + "step": 5430, + "training_step_time": 0.10942912101745605 + }, + { + "epoch": 8.28704833984375e-06, + "model_forward_time": 0.024660110473632812, + "step": 5431 + }, + { + "epoch": 8.28704833984375e-06, + "step": 5431, + "training_step_time": 0.10489344596862793 + }, + { + "epoch": 8.28857421875e-06, + "model_forward_time": 0.024590253829956055, + "step": 5432 + }, + { + "epoch": 8.28857421875e-06, + "step": 5432, + "training_step_time": 0.11569452285766602 + }, + { + "epoch": 8.29010009765625e-06, + "model_forward_time": 0.02505016326904297, + "step": 5433 + }, + { + "epoch": 8.29010009765625e-06, + "step": 5433, + "training_step_time": 0.1094369888305664 + }, + { + "epoch": 8.2916259765625e-06, + "model_forward_time": 0.025224685668945312, + "step": 5434 + }, + { + "epoch": 8.2916259765625e-06, + "step": 5434, + "training_step_time": 0.10945534706115723 + }, + { + "epoch": 8.29315185546875e-06, + "model_forward_time": 0.025134563446044922, + "step": 5435 + }, + { + "epoch": 8.29315185546875e-06, + "step": 5435, + "training_step_time": 0.17635011672973633 + }, + { + "epoch": 8.294677734375e-06, + "model_forward_time": 0.02438831329345703, + "step": 5436 + }, + { + "epoch": 8.294677734375e-06, + "step": 5436, + "training_step_time": 0.15650248527526855 + }, + { + "epoch": 8.29620361328125e-06, + "model_forward_time": 0.024444103240966797, + "step": 5437 + }, + { + "epoch": 8.29620361328125e-06, + "step": 5437, + "training_step_time": 0.10532855987548828 + }, + { + "epoch": 8.2977294921875e-06, + "model_forward_time": 0.024596452713012695, + "step": 5438 + }, + { + "epoch": 8.2977294921875e-06, + "step": 5438, + "training_step_time": 0.1057741641998291 + }, + { + "epoch": 8.29925537109375e-06, + "model_forward_time": 0.025053977966308594, + "step": 5439 + }, + { + "epoch": 8.29925537109375e-06, + "step": 5439, + "training_step_time": 0.11162543296813965 + }, + { + "epoch": 8.30078125e-06, + "grad_norm": 0.45108526945114136, + "learning_rate": 9.535800040914601e-05, + "loss": 0.0933, + "step": 5440 + }, + { + "epoch": 8.30078125e-06, + "model_forward_time": 0.025286436080932617, + "step": 5440 + }, + { + "epoch": 8.30078125e-06, + "step": 5440, + "training_step_time": 0.10928082466125488 + }, + { + "epoch": 8.30230712890625e-06, + "model_forward_time": 0.025066852569580078, + "step": 5441 + }, + { + "epoch": 8.30230712890625e-06, + "step": 5441, + "training_step_time": 0.10747909545898438 + }, + { + "epoch": 8.3038330078125e-06, + "model_forward_time": 0.02475118637084961, + "step": 5442 + }, + { + "epoch": 8.3038330078125e-06, + "step": 5442, + "training_step_time": 0.10696268081665039 + }, + { + "epoch": 8.30535888671875e-06, + "model_forward_time": 0.025704145431518555, + "step": 5443 + }, + { + "epoch": 8.30535888671875e-06, + "step": 5443, + "training_step_time": 0.1087336540222168 + }, + { + "epoch": 8.306884765625e-06, + "model_forward_time": 0.024983882904052734, + "step": 5444 + }, + { + "epoch": 8.306884765625e-06, + "step": 5444, + "training_step_time": 0.10955119132995605 + }, + { + "epoch": 8.30841064453125e-06, + "model_forward_time": 0.024893760681152344, + "step": 5445 + }, + { + "epoch": 8.30841064453125e-06, + "step": 5445, + "training_step_time": 0.11089968681335449 + }, + { + "epoch": 8.3099365234375e-06, + "model_forward_time": 0.02477264404296875, + "step": 5446 + }, + { + "epoch": 8.3099365234375e-06, + "step": 5446, + "training_step_time": 0.11092591285705566 + }, + { + "epoch": 8.31146240234375e-06, + "model_forward_time": 0.025128602981567383, + "step": 5447 + }, + { + "epoch": 8.31146240234375e-06, + "step": 5447, + "training_step_time": 0.11223387718200684 + }, + { + "epoch": 8.31298828125e-06, + "model_forward_time": 0.025223493576049805, + "step": 5448 + }, + { + "epoch": 8.31298828125e-06, + "step": 5448, + "training_step_time": 0.11142301559448242 + }, + { + "epoch": 8.31451416015625e-06, + "model_forward_time": 0.024811267852783203, + "step": 5449 + }, + { + "epoch": 8.31451416015625e-06, + "step": 5449, + "training_step_time": 0.17998123168945312 + }, + { + "epoch": 8.3160400390625e-06, + "grad_norm": 0.5918139219284058, + "learning_rate": 9.533478096741597e-05, + "loss": 0.0578, + "step": 5450 + }, + { + "epoch": 8.3160400390625e-06, + "model_forward_time": 0.024448156356811523, + "step": 5450 + }, + { + "epoch": 8.3160400390625e-06, + "step": 5450, + "training_step_time": 0.1714034080505371 + }, + { + "epoch": 8.31756591796875e-06, + "model_forward_time": 0.024703025817871094, + "step": 5451 + }, + { + "epoch": 8.31756591796875e-06, + "step": 5451, + "training_step_time": 0.10742902755737305 + }, + { + "epoch": 8.319091796875e-06, + "model_forward_time": 0.024678945541381836, + "step": 5452 + }, + { + "epoch": 8.319091796875e-06, + "step": 5452, + "training_step_time": 0.11219930648803711 + }, + { + "epoch": 8.32061767578125e-06, + "model_forward_time": 0.024983644485473633, + "step": 5453 + }, + { + "epoch": 8.32061767578125e-06, + "step": 5453, + "training_step_time": 0.10629034042358398 + }, + { + "epoch": 8.3221435546875e-06, + "model_forward_time": 0.025378942489624023, + "step": 5454 + }, + { + "epoch": 8.3221435546875e-06, + "step": 5454, + "training_step_time": 0.11167120933532715 + }, + { + "epoch": 8.32366943359375e-06, + "model_forward_time": 0.02494645118713379, + "step": 5455 + }, + { + "epoch": 8.32366943359375e-06, + "step": 5455, + "training_step_time": 0.1153714656829834 + }, + { + "epoch": 8.3251953125e-06, + "model_forward_time": 0.02495431900024414, + "step": 5456 + }, + { + "epoch": 8.3251953125e-06, + "step": 5456, + "training_step_time": 0.16915011405944824 + }, + { + "epoch": 8.32672119140625e-06, + "model_forward_time": 0.024498462677001953, + "step": 5457 + }, + { + "epoch": 8.32672119140625e-06, + "step": 5457, + "training_step_time": 0.1372990608215332 + }, + { + "epoch": 8.3282470703125e-06, + "model_forward_time": 0.024845600128173828, + "step": 5458 + }, + { + "epoch": 8.3282470703125e-06, + "step": 5458, + "training_step_time": 0.11327838897705078 + }, + { + "epoch": 8.32977294921875e-06, + "model_forward_time": 0.025232791900634766, + "step": 5459 + }, + { + "epoch": 8.32977294921875e-06, + "step": 5459, + "training_step_time": 0.1132197380065918 + }, + { + "epoch": 8.331298828125e-06, + "grad_norm": 0.5240521430969238, + "learning_rate": 9.531150643965223e-05, + "loss": 0.0678, + "step": 5460 + }, + { + "epoch": 8.331298828125e-06, + "model_forward_time": 0.024753093719482422, + "step": 5460 + }, + { + "epoch": 8.331298828125e-06, + "step": 5460, + "training_step_time": 0.11138176918029785 + }, + { + "epoch": 8.33282470703125e-06, + "model_forward_time": 0.02484750747680664, + "step": 5461 + }, + { + "epoch": 8.33282470703125e-06, + "step": 5461, + "training_step_time": 0.1871335506439209 + }, + { + "epoch": 8.3343505859375e-06, + "model_forward_time": 0.024805307388305664, + "step": 5462 + }, + { + "epoch": 8.3343505859375e-06, + "step": 5462, + "training_step_time": 0.11164474487304688 + }, + { + "epoch": 8.33587646484375e-06, + "model_forward_time": 0.024579286575317383, + "step": 5463 + }, + { + "epoch": 8.33587646484375e-06, + "step": 5463, + "training_step_time": 0.10885429382324219 + }, + { + "epoch": 8.33740234375e-06, + "model_forward_time": 0.02498483657836914, + "step": 5464 + }, + { + "epoch": 8.33740234375e-06, + "step": 5464, + "training_step_time": 0.10873198509216309 + }, + { + "epoch": 8.33892822265625e-06, + "model_forward_time": 0.025329113006591797, + "step": 5465 + }, + { + "epoch": 8.33892822265625e-06, + "step": 5465, + "training_step_time": 0.11266231536865234 + }, + { + "epoch": 8.3404541015625e-06, + "model_forward_time": 0.026561260223388672, + "step": 5466 + }, + { + "epoch": 8.3404541015625e-06, + "step": 5466, + "training_step_time": 0.10968375205993652 + }, + { + "epoch": 8.34197998046875e-06, + "model_forward_time": 0.025075197219848633, + "step": 5467 + }, + { + "epoch": 8.34197998046875e-06, + "step": 5467, + "training_step_time": 0.10701274871826172 + }, + { + "epoch": 8.343505859375e-06, + "model_forward_time": 0.0249021053314209, + "step": 5468 + }, + { + "epoch": 8.343505859375e-06, + "step": 5468, + "training_step_time": 0.14616847038269043 + }, + { + "epoch": 8.34503173828125e-06, + "model_forward_time": 0.02481365203857422, + "step": 5469 + }, + { + "epoch": 8.34503173828125e-06, + "step": 5469, + "training_step_time": 0.11165738105773926 + }, + { + "epoch": 8.3465576171875e-06, + "grad_norm": 0.28891709446907043, + "learning_rate": 9.528817685413558e-05, + "loss": 0.0659, + "step": 5470 + }, + { + "epoch": 8.3465576171875e-06, + "model_forward_time": 0.024705171585083008, + "step": 5470 + }, + { + "epoch": 8.3465576171875e-06, + "step": 5470, + "training_step_time": 0.22222423553466797 + }, + { + "epoch": 8.34808349609375e-06, + "model_forward_time": 0.024585723876953125, + "step": 5471 + }, + { + "epoch": 8.34808349609375e-06, + "step": 5471, + "training_step_time": 0.13445329666137695 + }, + { + "epoch": 8.349609375e-06, + "model_forward_time": 0.02397942543029785, + "step": 5472 + }, + { + "epoch": 8.349609375e-06, + "step": 5472, + "training_step_time": 0.11018991470336914 + }, + { + "epoch": 8.35113525390625e-06, + "model_forward_time": 0.025214433670043945, + "step": 5473 + }, + { + "epoch": 8.35113525390625e-06, + "step": 5473, + "training_step_time": 0.1356046199798584 + }, + { + "epoch": 8.3526611328125e-06, + "model_forward_time": 0.025006771087646484, + "step": 5474 + }, + { + "epoch": 8.3526611328125e-06, + "step": 5474, + "training_step_time": 0.15095186233520508 + }, + { + "epoch": 8.35418701171875e-06, + "model_forward_time": 0.024355173110961914, + "step": 5475 + }, + { + "epoch": 8.35418701171875e-06, + "step": 5475, + "training_step_time": 0.10936498641967773 + }, + { + "epoch": 8.355712890625e-06, + "model_forward_time": 0.024937152862548828, + "step": 5476 + }, + { + "epoch": 8.355712890625e-06, + "step": 5476, + "training_step_time": 0.13046550750732422 + }, + { + "epoch": 8.35723876953125e-06, + "model_forward_time": 0.024749755859375, + "step": 5477 + }, + { + "epoch": 8.35723876953125e-06, + "step": 5477, + "training_step_time": 0.19885826110839844 + }, + { + "epoch": 8.3587646484375e-06, + "model_forward_time": 0.024549245834350586, + "step": 5478 + }, + { + "epoch": 8.3587646484375e-06, + "step": 5478, + "training_step_time": 0.12249517440795898 + }, + { + "epoch": 8.36029052734375e-06, + "model_forward_time": 0.024651050567626953, + "step": 5479 + }, + { + "epoch": 8.36029052734375e-06, + "step": 5479, + "training_step_time": 0.21123838424682617 + }, + { + "epoch": 8.36181640625e-06, + "grad_norm": 0.5504735112190247, + "learning_rate": 9.526479223921366e-05, + "loss": 0.0731, + "step": 5480 + }, + { + "epoch": 8.36181640625e-06, + "model_forward_time": 0.024690628051757812, + "step": 5480 + }, + { + "epoch": 8.36181640625e-06, + "step": 5480, + "training_step_time": 0.11457467079162598 + }, + { + "epoch": 8.36334228515625e-06, + "model_forward_time": 0.024468660354614258, + "step": 5481 + }, + { + "epoch": 8.36334228515625e-06, + "step": 5481, + "training_step_time": 0.10823631286621094 + }, + { + "epoch": 8.3648681640625e-06, + "model_forward_time": 0.02501821517944336, + "step": 5482 + }, + { + "epoch": 8.3648681640625e-06, + "step": 5482, + "training_step_time": 0.10824394226074219 + }, + { + "epoch": 8.36639404296875e-06, + "model_forward_time": 0.025504112243652344, + "step": 5483 + }, + { + "epoch": 8.36639404296875e-06, + "step": 5483, + "training_step_time": 0.1091454029083252 + }, + { + "epoch": 8.367919921875e-06, + "model_forward_time": 0.025091886520385742, + "step": 5484 + }, + { + "epoch": 8.367919921875e-06, + "step": 5484, + "training_step_time": 0.10916900634765625 + }, + { + "epoch": 8.36944580078125e-06, + "model_forward_time": 0.025087833404541016, + "step": 5485 + }, + { + "epoch": 8.36944580078125e-06, + "step": 5485, + "training_step_time": 0.10770893096923828 + }, + { + "epoch": 8.3709716796875e-06, + "model_forward_time": 0.02560257911682129, + "step": 5486 + }, + { + "epoch": 8.3709716796875e-06, + "step": 5486, + "training_step_time": 0.11017322540283203 + }, + { + "epoch": 8.37249755859375e-06, + "model_forward_time": 0.02578258514404297, + "step": 5487 + }, + { + "epoch": 8.37249755859375e-06, + "step": 5487, + "training_step_time": 0.11110877990722656 + }, + { + "epoch": 8.3740234375e-06, + "model_forward_time": 0.024693965911865234, + "step": 5488 + }, + { + "epoch": 8.3740234375e-06, + "step": 5488, + "training_step_time": 0.11010169982910156 + }, + { + "epoch": 8.37554931640625e-06, + "model_forward_time": 0.025443553924560547, + "step": 5489 + }, + { + "epoch": 8.37554931640625e-06, + "step": 5489, + "training_step_time": 0.11905550956726074 + }, + { + "epoch": 8.3770751953125e-06, + "grad_norm": 0.4239727258682251, + "learning_rate": 9.524135262330098e-05, + "loss": 0.071, + "step": 5490 + }, + { + "epoch": 8.3770751953125e-06, + "model_forward_time": 0.025124549865722656, + "step": 5490 + }, + { + "epoch": 8.3770751953125e-06, + "step": 5490, + "training_step_time": 0.10994172096252441 + }, + { + "epoch": 8.37860107421875e-06, + "model_forward_time": 0.02520608901977539, + "step": 5491 + }, + { + "epoch": 8.37860107421875e-06, + "step": 5491, + "training_step_time": 0.1077728271484375 + }, + { + "epoch": 8.380126953125e-06, + "model_forward_time": 0.025278568267822266, + "step": 5492 + }, + { + "epoch": 8.380126953125e-06, + "step": 5492, + "training_step_time": 0.1076817512512207 + }, + { + "epoch": 8.38165283203125e-06, + "model_forward_time": 0.025472640991210938, + "step": 5493 + }, + { + "epoch": 8.38165283203125e-06, + "step": 5493, + "training_step_time": 0.10670733451843262 + }, + { + "epoch": 8.3831787109375e-06, + "model_forward_time": 0.025128602981567383, + "step": 5494 + }, + { + "epoch": 8.3831787109375e-06, + "step": 5494, + "training_step_time": 0.14341998100280762 + }, + { + "epoch": 8.38470458984375e-06, + "model_forward_time": 0.02534651756286621, + "step": 5495 + }, + { + "epoch": 8.38470458984375e-06, + "step": 5495, + "training_step_time": 0.1097710132598877 + }, + { + "epoch": 8.38623046875e-06, + "model_forward_time": 0.0252532958984375, + "step": 5496 + }, + { + "epoch": 8.38623046875e-06, + "step": 5496, + "training_step_time": 0.13894319534301758 + }, + { + "epoch": 8.38775634765625e-06, + "model_forward_time": 0.02528667449951172, + "step": 5497 + }, + { + "epoch": 8.38775634765625e-06, + "step": 5497, + "training_step_time": 0.11038494110107422 + }, + { + "epoch": 8.3892822265625e-06, + "model_forward_time": 0.025043487548828125, + "step": 5498 + }, + { + "epoch": 8.3892822265625e-06, + "step": 5498, + "training_step_time": 0.17298007011413574 + }, + { + "epoch": 8.39080810546875e-06, + "model_forward_time": 0.024107933044433594, + "step": 5499 + }, + { + "epoch": 8.39080810546875e-06, + "step": 5499, + "training_step_time": 0.13412261009216309 + }, + { + "epoch": 8.392333984375e-06, + "grad_norm": 0.356250137090683, + "learning_rate": 9.521785803487889e-05, + "loss": 0.1008, + "step": 5500 + }, + { + "epoch": 8.392333984375e-06, + "model_forward_time": 0.024596214294433594, + "step": 5500 + }, + { + "epoch": 8.392333984375e-06, + "step": 5500, + "training_step_time": 0.13425779342651367 + }, + { + "epoch": 8.39385986328125e-06, + "model_forward_time": 0.024463891983032227, + "step": 5501 + }, + { + "epoch": 8.39385986328125e-06, + "step": 5501, + "training_step_time": 0.1372542381286621 + }, + { + "epoch": 8.3953857421875e-06, + "model_forward_time": 0.02429485321044922, + "step": 5502 + }, + { + "epoch": 8.3953857421875e-06, + "step": 5502, + "training_step_time": 0.11211919784545898 + }, + { + "epoch": 8.39691162109375e-06, + "model_forward_time": 0.024601221084594727, + "step": 5503 + }, + { + "epoch": 8.39691162109375e-06, + "step": 5503, + "training_step_time": 0.11571311950683594 + }, + { + "epoch": 8.3984375e-06, + "model_forward_time": 0.025115251541137695, + "step": 5504 + }, + { + "epoch": 8.3984375e-06, + "step": 5504, + "training_step_time": 0.11552047729492188 + }, + { + "epoch": 8.39996337890625e-06, + "model_forward_time": 0.025099992752075195, + "step": 5505 + }, + { + "epoch": 8.39996337890625e-06, + "step": 5505, + "training_step_time": 0.15906310081481934 + }, + { + "epoch": 8.4014892578125e-06, + "model_forward_time": 0.024593830108642578, + "step": 5506 + }, + { + "epoch": 8.4014892578125e-06, + "step": 5506, + "training_step_time": 0.14723944664001465 + }, + { + "epoch": 8.40301513671875e-06, + "model_forward_time": 0.024686098098754883, + "step": 5507 + }, + { + "epoch": 8.40301513671875e-06, + "step": 5507, + "training_step_time": 0.10294818878173828 + }, + { + "epoch": 8.404541015625e-06, + "model_forward_time": 0.024959564208984375, + "step": 5508 + }, + { + "epoch": 8.404541015625e-06, + "step": 5508, + "training_step_time": 0.10719561576843262 + }, + { + "epoch": 8.40606689453125e-06, + "model_forward_time": 0.025610923767089844, + "step": 5509 + }, + { + "epoch": 8.40606689453125e-06, + "step": 5509, + "training_step_time": 0.10536742210388184 + }, + { + "epoch": 8.4075927734375e-06, + "grad_norm": 0.6870419979095459, + "learning_rate": 9.51943085024955e-05, + "loss": 0.0899, + "step": 5510 + }, + { + "epoch": 8.4075927734375e-06, + "model_forward_time": 0.024843692779541016, + "step": 5510 + }, + { + "epoch": 8.4075927734375e-06, + "step": 5510, + "training_step_time": 0.11047911643981934 + }, + { + "epoch": 8.40911865234375e-06, + "model_forward_time": 0.0254213809967041, + "step": 5511 + }, + { + "epoch": 8.40911865234375e-06, + "step": 5511, + "training_step_time": 0.11006999015808105 + }, + { + "epoch": 8.41064453125e-06, + "model_forward_time": 0.025430679321289062, + "step": 5512 + }, + { + "epoch": 8.41064453125e-06, + "step": 5512, + "training_step_time": 0.18528962135314941 + }, + { + "epoch": 8.41217041015625e-06, + "model_forward_time": 0.025561094284057617, + "step": 5513 + }, + { + "epoch": 8.41217041015625e-06, + "step": 5513, + "training_step_time": 0.11009383201599121 + }, + { + "epoch": 8.4136962890625e-06, + "model_forward_time": 0.02498793601989746, + "step": 5514 + }, + { + "epoch": 8.4136962890625e-06, + "step": 5514, + "training_step_time": 0.10881209373474121 + }, + { + "epoch": 8.41522216796875e-06, + "model_forward_time": 0.02525949478149414, + "step": 5515 + }, + { + "epoch": 8.41522216796875e-06, + "step": 5515, + "training_step_time": 0.11917877197265625 + }, + { + "epoch": 8.416748046875e-06, + "model_forward_time": 0.025124073028564453, + "step": 5516 + }, + { + "epoch": 8.416748046875e-06, + "step": 5516, + "training_step_time": 0.1311483383178711 + }, + { + "epoch": 8.41827392578125e-06, + "model_forward_time": 0.025047779083251953, + "step": 5517 + }, + { + "epoch": 8.41827392578125e-06, + "step": 5517, + "training_step_time": 0.1192178726196289 + }, + { + "epoch": 8.4197998046875e-06, + "model_forward_time": 0.02488422393798828, + "step": 5518 + }, + { + "epoch": 8.4197998046875e-06, + "step": 5518, + "training_step_time": 0.1235342025756836 + }, + { + "epoch": 8.42132568359375e-06, + "model_forward_time": 0.02543044090270996, + "step": 5519 + }, + { + "epoch": 8.42132568359375e-06, + "step": 5519, + "training_step_time": 0.11320638656616211 + }, + { + "epoch": 8.4228515625e-06, + "grad_norm": 0.36433157324790955, + "learning_rate": 9.517070405476575e-05, + "loss": 0.0688, + "step": 5520 + }, + { + "epoch": 8.4228515625e-06, + "model_forward_time": 0.025127410888671875, + "step": 5520 + }, + { + "epoch": 8.4228515625e-06, + "step": 5520, + "training_step_time": 0.1095728874206543 + }, + { + "epoch": 8.42437744140625e-06, + "model_forward_time": 0.024392366409301758, + "step": 5521 + }, + { + "epoch": 8.42437744140625e-06, + "step": 5521, + "training_step_time": 0.16790127754211426 + }, + { + "epoch": 8.4259033203125e-06, + "model_forward_time": 0.02427506446838379, + "step": 5522 + }, + { + "epoch": 8.4259033203125e-06, + "step": 5522, + "training_step_time": 0.16735625267028809 + }, + { + "epoch": 8.42742919921875e-06, + "model_forward_time": 0.024598121643066406, + "step": 5523 + }, + { + "epoch": 8.42742919921875e-06, + "step": 5523, + "training_step_time": 0.10605931282043457 + }, + { + "epoch": 8.428955078125e-06, + "model_forward_time": 0.024770498275756836, + "step": 5524 + }, + { + "epoch": 8.428955078125e-06, + "step": 5524, + "training_step_time": 0.172194242477417 + }, + { + "epoch": 8.43048095703125e-06, + "model_forward_time": 0.024447202682495117, + "step": 5525 + }, + { + "epoch": 8.43048095703125e-06, + "step": 5525, + "training_step_time": 0.16829872131347656 + }, + { + "epoch": 8.4320068359375e-06, + "model_forward_time": 0.02463245391845703, + "step": 5526 + }, + { + "epoch": 8.4320068359375e-06, + "step": 5526, + "training_step_time": 0.10637164115905762 + }, + { + "epoch": 8.43353271484375e-06, + "model_forward_time": 0.02500438690185547, + "step": 5527 + }, + { + "epoch": 8.43353271484375e-06, + "step": 5527, + "training_step_time": 0.10713505744934082 + }, + { + "epoch": 8.43505859375e-06, + "model_forward_time": 0.02534627914428711, + "step": 5528 + }, + { + "epoch": 8.43505859375e-06, + "step": 5528, + "training_step_time": 0.10852265357971191 + }, + { + "epoch": 8.43658447265625e-06, + "model_forward_time": 0.027458667755126953, + "step": 5529 + }, + { + "epoch": 8.43658447265625e-06, + "step": 5529, + "training_step_time": 0.1109466552734375 + }, + { + "epoch": 8.4381103515625e-06, + "grad_norm": 0.5929972529411316, + "learning_rate": 9.514704472037123e-05, + "loss": 0.0789, + "step": 5530 + }, + { + "epoch": 8.4381103515625e-06, + "model_forward_time": 0.024906635284423828, + "step": 5530 + }, + { + "epoch": 8.4381103515625e-06, + "step": 5530, + "training_step_time": 0.11322665214538574 + }, + { + "epoch": 8.43963623046875e-06, + "model_forward_time": 0.026383638381958008, + "step": 5531 + }, + { + "epoch": 8.43963623046875e-06, + "step": 5531, + "training_step_time": 0.1084601879119873 + }, + { + "epoch": 8.441162109375e-06, + "model_forward_time": 0.025345325469970703, + "step": 5532 + }, + { + "epoch": 8.441162109375e-06, + "step": 5532, + "training_step_time": 0.1101071834564209 + }, + { + "epoch": 8.44268798828125e-06, + "model_forward_time": 0.024779319763183594, + "step": 5533 + }, + { + "epoch": 8.44268798828125e-06, + "step": 5533, + "training_step_time": 0.10842418670654297 + }, + { + "epoch": 8.4442138671875e-06, + "model_forward_time": 0.02519989013671875, + "step": 5534 + }, + { + "epoch": 8.4442138671875e-06, + "step": 5534, + "training_step_time": 0.10825228691101074 + }, + { + "epoch": 8.44573974609375e-06, + "model_forward_time": 0.02506279945373535, + "step": 5535 + }, + { + "epoch": 8.44573974609375e-06, + "step": 5535, + "training_step_time": 0.1068274974822998 + }, + { + "epoch": 8.447265625e-06, + "model_forward_time": 0.024892091751098633, + "step": 5536 + }, + { + "epoch": 8.447265625e-06, + "step": 5536, + "training_step_time": 0.11709976196289062 + }, + { + "epoch": 8.44879150390625e-06, + "model_forward_time": 0.025391101837158203, + "step": 5537 + }, + { + "epoch": 8.44879150390625e-06, + "step": 5537, + "training_step_time": 0.10761404037475586 + }, + { + "epoch": 8.4503173828125e-06, + "model_forward_time": 0.025211334228515625, + "step": 5538 + }, + { + "epoch": 8.4503173828125e-06, + "step": 5538, + "training_step_time": 0.10755133628845215 + }, + { + "epoch": 8.45184326171875e-06, + "model_forward_time": 0.025348663330078125, + "step": 5539 + }, + { + "epoch": 8.45184326171875e-06, + "step": 5539, + "training_step_time": 0.11601042747497559 + }, + { + "epoch": 8.453369140625e-06, + "grad_norm": 0.5573577284812927, + "learning_rate": 9.512333052806033e-05, + "loss": 0.0686, + "step": 5540 + }, + { + "epoch": 8.453369140625e-06, + "model_forward_time": 0.025214195251464844, + "step": 5540 + }, + { + "epoch": 8.453369140625e-06, + "step": 5540, + "training_step_time": 0.11416077613830566 + }, + { + "epoch": 8.45489501953125e-06, + "model_forward_time": 0.025387048721313477, + "step": 5541 + }, + { + "epoch": 8.45489501953125e-06, + "step": 5541, + "training_step_time": 0.11627912521362305 + }, + { + "epoch": 8.4564208984375e-06, + "model_forward_time": 0.024802207946777344, + "step": 5542 + }, + { + "epoch": 8.4564208984375e-06, + "step": 5542, + "training_step_time": 0.13913702964782715 + }, + { + "epoch": 8.45794677734375e-06, + "model_forward_time": 0.025142431259155273, + "step": 5543 + }, + { + "epoch": 8.45794677734375e-06, + "step": 5543, + "training_step_time": 0.15565252304077148 + }, + { + "epoch": 8.45947265625e-06, + "model_forward_time": 0.02472662925720215, + "step": 5544 + }, + { + "epoch": 8.45947265625e-06, + "step": 5544, + "training_step_time": 0.20890450477600098 + }, + { + "epoch": 8.46099853515625e-06, + "model_forward_time": 0.024294376373291016, + "step": 5545 + }, + { + "epoch": 8.46099853515625e-06, + "step": 5545, + "training_step_time": 0.14517688751220703 + }, + { + "epoch": 8.4625244140625e-06, + "model_forward_time": 0.02404952049255371, + "step": 5546 + }, + { + "epoch": 8.4625244140625e-06, + "step": 5546, + "training_step_time": 0.134260892868042 + }, + { + "epoch": 8.46405029296875e-06, + "model_forward_time": 0.02417159080505371, + "step": 5547 + }, + { + "epoch": 8.46405029296875e-06, + "step": 5547, + "training_step_time": 0.11555767059326172 + }, + { + "epoch": 8.465576171875e-06, + "model_forward_time": 0.02479243278503418, + "step": 5548 + }, + { + "epoch": 8.465576171875e-06, + "step": 5548, + "training_step_time": 0.12091636657714844 + }, + { + "epoch": 8.46710205078125e-06, + "model_forward_time": 0.025285959243774414, + "step": 5549 + }, + { + "epoch": 8.46710205078125e-06, + "step": 5549, + "training_step_time": 0.11031556129455566 + }, + { + "epoch": 8.4686279296875e-06, + "grad_norm": 0.539337158203125, + "learning_rate": 9.509956150664796e-05, + "loss": 0.0709, + "step": 5550 + }, + { + "epoch": 8.4686279296875e-06, + "model_forward_time": 0.025218486785888672, + "step": 5550 + }, + { + "epoch": 8.4686279296875e-06, + "step": 5550, + "training_step_time": 0.15806031227111816 + }, + { + "epoch": 8.47015380859375e-06, + "model_forward_time": 0.024510622024536133, + "step": 5551 + }, + { + "epoch": 8.47015380859375e-06, + "step": 5551, + "training_step_time": 0.15035152435302734 + }, + { + "epoch": 8.4716796875e-06, + "model_forward_time": 0.025373220443725586, + "step": 5552 + }, + { + "epoch": 8.4716796875e-06, + "step": 5552, + "training_step_time": 0.11540603637695312 + }, + { + "epoch": 8.47320556640625e-06, + "model_forward_time": 0.024956703186035156, + "step": 5553 + }, + { + "epoch": 8.47320556640625e-06, + "step": 5553, + "training_step_time": 0.10674762725830078 + }, + { + "epoch": 8.4747314453125e-06, + "model_forward_time": 0.02509284019470215, + "step": 5554 + }, + { + "epoch": 8.4747314453125e-06, + "step": 5554, + "training_step_time": 0.11182975769042969 + }, + { + "epoch": 8.47625732421875e-06, + "model_forward_time": 0.025166988372802734, + "step": 5555 + }, + { + "epoch": 8.47625732421875e-06, + "step": 5555, + "training_step_time": 0.10920238494873047 + }, + { + "epoch": 8.477783203125e-06, + "model_forward_time": 0.024697065353393555, + "step": 5556 + }, + { + "epoch": 8.477783203125e-06, + "step": 5556, + "training_step_time": 0.10861468315124512 + }, + { + "epoch": 8.47930908203125e-06, + "model_forward_time": 0.025240659713745117, + "step": 5557 + }, + { + "epoch": 8.47930908203125e-06, + "step": 5557, + "training_step_time": 0.1769580841064453 + }, + { + "epoch": 8.4808349609375e-06, + "model_forward_time": 0.02511906623840332, + "step": 5558 + }, + { + "epoch": 8.4808349609375e-06, + "step": 5558, + "training_step_time": 0.14783430099487305 + }, + { + "epoch": 8.48236083984375e-06, + "model_forward_time": 0.02468705177307129, + "step": 5559 + }, + { + "epoch": 8.48236083984375e-06, + "step": 5559, + "training_step_time": 0.10929179191589355 + }, + { + "epoch": 8.48388671875e-06, + "grad_norm": 0.40480631589889526, + "learning_rate": 9.507573768501574e-05, + "loss": 0.0729, + "step": 5560 + }, + { + "epoch": 8.48388671875e-06, + "model_forward_time": 0.028118371963500977, + "step": 5560 + }, + { + "epoch": 8.48388671875e-06, + "step": 5560, + "training_step_time": 0.11645936965942383 + }, + { + "epoch": 8.48541259765625e-06, + "model_forward_time": 0.025391101837158203, + "step": 5561 + }, + { + "epoch": 8.48541259765625e-06, + "step": 5561, + "training_step_time": 0.13700103759765625 + }, + { + "epoch": 8.4869384765625e-06, + "model_forward_time": 0.025828838348388672, + "step": 5562 + }, + { + "epoch": 8.4869384765625e-06, + "step": 5562, + "training_step_time": 0.10600495338439941 + }, + { + "epoch": 8.48846435546875e-06, + "model_forward_time": 0.025227069854736328, + "step": 5563 + }, + { + "epoch": 8.48846435546875e-06, + "step": 5563, + "training_step_time": 0.1769402027130127 + }, + { + "epoch": 8.489990234375e-06, + "model_forward_time": 0.02561020851135254, + "step": 5564 + }, + { + "epoch": 8.489990234375e-06, + "step": 5564, + "training_step_time": 0.10607075691223145 + }, + { + "epoch": 8.49151611328125e-06, + "model_forward_time": 0.02473282814025879, + "step": 5565 + }, + { + "epoch": 8.49151611328125e-06, + "step": 5565, + "training_step_time": 0.10555696487426758 + }, + { + "epoch": 8.4930419921875e-06, + "model_forward_time": 0.02570319175720215, + "step": 5566 + }, + { + "epoch": 8.4930419921875e-06, + "step": 5566, + "training_step_time": 0.1150972843170166 + }, + { + "epoch": 8.49456787109375e-06, + "model_forward_time": 0.028559446334838867, + "step": 5567 + }, + { + "epoch": 8.49456787109375e-06, + "step": 5567, + "training_step_time": 0.11337924003601074 + }, + { + "epoch": 8.49609375e-06, + "model_forward_time": 0.025995731353759766, + "step": 5568 + }, + { + "epoch": 8.49609375e-06, + "step": 5568, + "training_step_time": 0.10977768898010254 + }, + { + "epoch": 8.49761962890625e-06, + "model_forward_time": 0.025331497192382812, + "step": 5569 + }, + { + "epoch": 8.49761962890625e-06, + "step": 5569, + "training_step_time": 0.21548223495483398 + }, + { + "epoch": 8.4991455078125e-06, + "grad_norm": 0.43420183658599854, + "learning_rate": 9.505185909211188e-05, + "loss": 0.0852, + "step": 5570 + }, + { + "epoch": 8.4991455078125e-06, + "model_forward_time": 0.02463364601135254, + "step": 5570 + }, + { + "epoch": 8.4991455078125e-06, + "step": 5570, + "training_step_time": 0.1160881519317627 + }, + { + "epoch": 8.50067138671875e-06, + "model_forward_time": 0.024601459503173828, + "step": 5571 + }, + { + "epoch": 8.50067138671875e-06, + "step": 5571, + "training_step_time": 0.10824370384216309 + }, + { + "epoch": 8.502197265625e-06, + "model_forward_time": 0.02516961097717285, + "step": 5572 + }, + { + "epoch": 8.502197265625e-06, + "step": 5572, + "training_step_time": 0.11001777648925781 + }, + { + "epoch": 8.50372314453125e-06, + "model_forward_time": 0.025398969650268555, + "step": 5573 + }, + { + "epoch": 8.50372314453125e-06, + "step": 5573, + "training_step_time": 0.10730361938476562 + }, + { + "epoch": 8.5052490234375e-06, + "model_forward_time": 0.025559663772583008, + "step": 5574 + }, + { + "epoch": 8.5052490234375e-06, + "step": 5574, + "training_step_time": 0.11347579956054688 + }, + { + "epoch": 8.50677490234375e-06, + "model_forward_time": 0.025579452514648438, + "step": 5575 + }, + { + "epoch": 8.50677490234375e-06, + "step": 5575, + "training_step_time": 0.10850358009338379 + }, + { + "epoch": 8.50830078125e-06, + "model_forward_time": 0.025150060653686523, + "step": 5576 + }, + { + "epoch": 8.50830078125e-06, + "step": 5576, + "training_step_time": 0.11190223693847656 + }, + { + "epoch": 8.50982666015625e-06, + "model_forward_time": 0.025427579879760742, + "step": 5577 + }, + { + "epoch": 8.50982666015625e-06, + "step": 5577, + "training_step_time": 0.10918688774108887 + }, + { + "epoch": 8.5113525390625e-06, + "model_forward_time": 0.02529621124267578, + "step": 5578 + }, + { + "epoch": 8.5113525390625e-06, + "step": 5578, + "training_step_time": 0.11261510848999023 + }, + { + "epoch": 8.51287841796875e-06, + "model_forward_time": 0.025478839874267578, + "step": 5579 + }, + { + "epoch": 8.51287841796875e-06, + "step": 5579, + "training_step_time": 0.1089320182800293 + }, + { + "epoch": 8.514404296875e-06, + "grad_norm": 0.5152722001075745, + "learning_rate": 9.502792575695112e-05, + "loss": 0.096, + "step": 5580 + }, + { + "epoch": 8.514404296875e-06, + "model_forward_time": 0.02508234977722168, + "step": 5580 + }, + { + "epoch": 8.514404296875e-06, + "step": 5580, + "training_step_time": 0.11014628410339355 + }, + { + "epoch": 8.51593017578125e-06, + "model_forward_time": 0.02529740333557129, + "step": 5581 + }, + { + "epoch": 8.51593017578125e-06, + "step": 5581, + "training_step_time": 0.11190176010131836 + }, + { + "epoch": 8.5174560546875e-06, + "model_forward_time": 0.025408267974853516, + "step": 5582 + }, + { + "epoch": 8.5174560546875e-06, + "step": 5582, + "training_step_time": 0.10768556594848633 + }, + { + "epoch": 8.51898193359375e-06, + "model_forward_time": 0.02550339698791504, + "step": 5583 + }, + { + "epoch": 8.51898193359375e-06, + "step": 5583, + "training_step_time": 0.10976409912109375 + }, + { + "epoch": 8.5205078125e-06, + "model_forward_time": 0.025527000427246094, + "step": 5584 + }, + { + "epoch": 8.5205078125e-06, + "step": 5584, + "training_step_time": 0.10895490646362305 + }, + { + "epoch": 8.52203369140625e-06, + "model_forward_time": 0.025280237197875977, + "step": 5585 + }, + { + "epoch": 8.52203369140625e-06, + "step": 5585, + "training_step_time": 0.13655710220336914 + }, + { + "epoch": 8.5235595703125e-06, + "model_forward_time": 0.025352001190185547, + "step": 5586 + }, + { + "epoch": 8.5235595703125e-06, + "step": 5586, + "training_step_time": 0.12762689590454102 + }, + { + "epoch": 8.52508544921875e-06, + "model_forward_time": 0.024984359741210938, + "step": 5587 + }, + { + "epoch": 8.52508544921875e-06, + "step": 5587, + "training_step_time": 0.11026358604431152 + }, + { + "epoch": 8.526611328125e-06, + "model_forward_time": 0.024921655654907227, + "step": 5588 + }, + { + "epoch": 8.526611328125e-06, + "step": 5588, + "training_step_time": 0.11644244194030762 + }, + { + "epoch": 8.52813720703125e-06, + "model_forward_time": 0.025429248809814453, + "step": 5589 + }, + { + "epoch": 8.52813720703125e-06, + "step": 5589, + "training_step_time": 0.10667204856872559 + }, + { + "epoch": 8.5296630859375e-06, + "grad_norm": 0.4931207001209259, + "learning_rate": 9.50039377086147e-05, + "loss": 0.0733, + "step": 5590 + }, + { + "epoch": 8.5296630859375e-06, + "model_forward_time": 0.02527475357055664, + "step": 5590 + }, + { + "epoch": 8.5296630859375e-06, + "step": 5590, + "training_step_time": 0.1112833023071289 + }, + { + "epoch": 8.53118896484375e-06, + "model_forward_time": 0.027823209762573242, + "step": 5591 + }, + { + "epoch": 8.53118896484375e-06, + "step": 5591, + "training_step_time": 0.11296486854553223 + }, + { + "epoch": 8.53271484375e-06, + "model_forward_time": 0.025278568267822266, + "step": 5592 + }, + { + "epoch": 8.53271484375e-06, + "step": 5592, + "training_step_time": 0.11172080039978027 + }, + { + "epoch": 8.53424072265625e-06, + "model_forward_time": 0.02546834945678711, + "step": 5593 + }, + { + "epoch": 8.53424072265625e-06, + "step": 5593, + "training_step_time": 0.12639856338500977 + }, + { + "epoch": 8.5357666015625e-06, + "model_forward_time": 0.02545332908630371, + "step": 5594 + }, + { + "epoch": 8.5357666015625e-06, + "step": 5594, + "training_step_time": 0.11121511459350586 + }, + { + "epoch": 8.53729248046875e-06, + "model_forward_time": 0.02931523323059082, + "step": 5595 + }, + { + "epoch": 8.53729248046875e-06, + "step": 5595, + "training_step_time": 0.11070609092712402 + }, + { + "epoch": 8.538818359375e-06, + "model_forward_time": 0.025269031524658203, + "step": 5596 + }, + { + "epoch": 8.538818359375e-06, + "step": 5596, + "training_step_time": 0.10912632942199707 + }, + { + "epoch": 8.54034423828125e-06, + "model_forward_time": 0.025653600692749023, + "step": 5597 + }, + { + "epoch": 8.54034423828125e-06, + "step": 5597, + "training_step_time": 0.12185859680175781 + }, + { + "epoch": 8.5418701171875e-06, + "model_forward_time": 0.025304317474365234, + "step": 5598 + }, + { + "epoch": 8.5418701171875e-06, + "step": 5598, + "training_step_time": 0.11203265190124512 + }, + { + "epoch": 8.54339599609375e-06, + "model_forward_time": 0.025623321533203125, + "step": 5599 + }, + { + "epoch": 8.54339599609375e-06, + "step": 5599, + "training_step_time": 0.19733309745788574 + }, + { + "epoch": 8.544921875e-06, + "grad_norm": 0.6653024554252625, + "learning_rate": 9.497989497625035e-05, + "loss": 0.0807, + "step": 5600 + }, + { + "epoch": 8.544921875e-06, + "model_forward_time": 0.02411341667175293, + "step": 5600 + }, + { + "epoch": 8.544921875e-06, + "step": 5600, + "training_step_time": 0.1137847900390625 + }, + { + "epoch": 8.54644775390625e-06, + "model_forward_time": 0.02620530128479004, + "step": 5601 + }, + { + "epoch": 8.54644775390625e-06, + "step": 5601, + "training_step_time": 0.11612081527709961 + }, + { + "epoch": 8.5479736328125e-06, + "model_forward_time": 0.025931119918823242, + "step": 5602 + }, + { + "epoch": 8.5479736328125e-06, + "step": 5602, + "training_step_time": 0.11466312408447266 + }, + { + "epoch": 8.54949951171875e-06, + "model_forward_time": 0.025912046432495117, + "step": 5603 + }, + { + "epoch": 8.54949951171875e-06, + "step": 5603, + "training_step_time": 0.11445784568786621 + }, + { + "epoch": 8.551025390625e-06, + "model_forward_time": 0.02537846565246582, + "step": 5604 + }, + { + "epoch": 8.551025390625e-06, + "step": 5604, + "training_step_time": 0.14040780067443848 + }, + { + "epoch": 8.55255126953125e-06, + "model_forward_time": 0.025027751922607422, + "step": 5605 + }, + { + "epoch": 8.55255126953125e-06, + "step": 5605, + "training_step_time": 0.1156473159790039 + }, + { + "epoch": 8.5540771484375e-06, + "model_forward_time": 0.02823472023010254, + "step": 5606 + }, + { + "epoch": 8.5540771484375e-06, + "step": 5606, + "training_step_time": 0.21980500221252441 + }, + { + "epoch": 8.55560302734375e-06, + "model_forward_time": 0.024739503860473633, + "step": 5607 + }, + { + "epoch": 8.55560302734375e-06, + "step": 5607, + "training_step_time": 0.1320197582244873 + }, + { + "epoch": 8.55712890625e-06, + "model_forward_time": 0.026064395904541016, + "step": 5608 + }, + { + "epoch": 8.55712890625e-06, + "step": 5608, + "training_step_time": 0.1114811897277832 + }, + { + "epoch": 8.55865478515625e-06, + "model_forward_time": 0.025989532470703125, + "step": 5609 + }, + { + "epoch": 8.55865478515625e-06, + "step": 5609, + "training_step_time": 0.11773967742919922 + }, + { + "epoch": 8.5601806640625e-06, + "grad_norm": 0.3101156949996948, + "learning_rate": 9.49557975890723e-05, + "loss": 0.0811, + "step": 5610 + }, + { + "epoch": 8.5601806640625e-06, + "model_forward_time": 0.02553248405456543, + "step": 5610 + }, + { + "epoch": 8.5601806640625e-06, + "step": 5610, + "training_step_time": 0.11394095420837402 + }, + { + "epoch": 8.56170654296875e-06, + "model_forward_time": 0.027122020721435547, + "step": 5611 + }, + { + "epoch": 8.56170654296875e-06, + "step": 5611, + "training_step_time": 0.1120908260345459 + }, + { + "epoch": 8.563232421875e-06, + "model_forward_time": 0.025753498077392578, + "step": 5612 + }, + { + "epoch": 8.563232421875e-06, + "step": 5612, + "training_step_time": 0.21460676193237305 + }, + { + "epoch": 8.56475830078125e-06, + "model_forward_time": 0.024910449981689453, + "step": 5613 + }, + { + "epoch": 8.56475830078125e-06, + "step": 5613, + "training_step_time": 0.11077499389648438 + }, + { + "epoch": 8.5662841796875e-06, + "model_forward_time": 0.02472543716430664, + "step": 5614 + }, + { + "epoch": 8.5662841796875e-06, + "step": 5614, + "training_step_time": 0.10970449447631836 + }, + { + "epoch": 8.56781005859375e-06, + "model_forward_time": 0.025194644927978516, + "step": 5615 + }, + { + "epoch": 8.56781005859375e-06, + "step": 5615, + "training_step_time": 0.16884398460388184 + }, + { + "epoch": 8.5693359375e-06, + "model_forward_time": 0.02461838722229004, + "step": 5616 + }, + { + "epoch": 8.5693359375e-06, + "step": 5616, + "training_step_time": 0.16986918449401855 + }, + { + "epoch": 8.57086181640625e-06, + "model_forward_time": 0.024666786193847656, + "step": 5617 + }, + { + "epoch": 8.57086181640625e-06, + "step": 5617, + "training_step_time": 0.10829949378967285 + }, + { + "epoch": 8.5723876953125e-06, + "model_forward_time": 0.024996519088745117, + "step": 5618 + }, + { + "epoch": 8.5723876953125e-06, + "step": 5618, + "training_step_time": 0.10442185401916504 + }, + { + "epoch": 8.57391357421875e-06, + "model_forward_time": 0.025464296340942383, + "step": 5619 + }, + { + "epoch": 8.57391357421875e-06, + "step": 5619, + "training_step_time": 0.11213421821594238 + }, + { + "epoch": 8.575439453125e-06, + "grad_norm": 0.4704367518424988, + "learning_rate": 9.493164557636112e-05, + "loss": 0.0747, + "step": 5620 + }, + { + "epoch": 8.575439453125e-06, + "model_forward_time": 0.025829076766967773, + "step": 5620 + }, + { + "epoch": 8.575439453125e-06, + "step": 5620, + "training_step_time": 0.10870933532714844 + }, + { + "epoch": 8.57696533203125e-06, + "model_forward_time": 0.025187969207763672, + "step": 5621 + }, + { + "epoch": 8.57696533203125e-06, + "step": 5621, + "training_step_time": 0.10694003105163574 + }, + { + "epoch": 8.5784912109375e-06, + "model_forward_time": 0.025395631790161133, + "step": 5622 + }, + { + "epoch": 8.5784912109375e-06, + "step": 5622, + "training_step_time": 0.10631585121154785 + }, + { + "epoch": 8.58001708984375e-06, + "model_forward_time": 0.0256650447845459, + "step": 5623 + }, + { + "epoch": 8.58001708984375e-06, + "step": 5623, + "training_step_time": 0.10675477981567383 + }, + { + "epoch": 8.58154296875e-06, + "model_forward_time": 0.026307344436645508, + "step": 5624 + }, + { + "epoch": 8.58154296875e-06, + "step": 5624, + "training_step_time": 0.10687804222106934 + }, + { + "epoch": 8.58306884765625e-06, + "model_forward_time": 0.025311946868896484, + "step": 5625 + }, + { + "epoch": 8.58306884765625e-06, + "step": 5625, + "training_step_time": 0.10824322700500488 + }, + { + "epoch": 8.5845947265625e-06, + "model_forward_time": 0.02512216567993164, + "step": 5626 + }, + { + "epoch": 8.5845947265625e-06, + "step": 5626, + "training_step_time": 0.1092989444732666 + }, + { + "epoch": 8.58612060546875e-06, + "model_forward_time": 0.025203943252563477, + "step": 5627 + }, + { + "epoch": 8.58612060546875e-06, + "step": 5627, + "training_step_time": 0.11082839965820312 + }, + { + "epoch": 8.587646484375e-06, + "model_forward_time": 0.02580547332763672, + "step": 5628 + }, + { + "epoch": 8.587646484375e-06, + "step": 5628, + "training_step_time": 0.11706662178039551 + }, + { + "epoch": 8.58917236328125e-06, + "model_forward_time": 0.0254366397857666, + "step": 5629 + }, + { + "epoch": 8.58917236328125e-06, + "step": 5629, + "training_step_time": 0.11065983772277832 + }, + { + "epoch": 8.5906982421875e-06, + "grad_norm": 0.3832702040672302, + "learning_rate": 9.49074389674638e-05, + "loss": 0.0623, + "step": 5630 + }, + { + "epoch": 8.5906982421875e-06, + "model_forward_time": 0.0255584716796875, + "step": 5630 + }, + { + "epoch": 8.5906982421875e-06, + "step": 5630, + "training_step_time": 0.1160573959350586 + }, + { + "epoch": 8.59222412109375e-06, + "model_forward_time": 0.025298118591308594, + "step": 5631 + }, + { + "epoch": 8.59222412109375e-06, + "step": 5631, + "training_step_time": 0.11720824241638184 + }, + { + "epoch": 8.59375e-06, + "model_forward_time": 0.02544856071472168, + "step": 5632 + }, + { + "epoch": 8.59375e-06, + "step": 5632, + "training_step_time": 0.11803793907165527 + }, + { + "epoch": 8.59527587890625e-06, + "model_forward_time": 0.026454687118530273, + "step": 5633 + }, + { + "epoch": 8.59527587890625e-06, + "step": 5633, + "training_step_time": 0.12260031700134277 + }, + { + "epoch": 8.5968017578125e-06, + "model_forward_time": 0.024752378463745117, + "step": 5634 + }, + { + "epoch": 8.5968017578125e-06, + "step": 5634, + "training_step_time": 0.13315796852111816 + }, + { + "epoch": 8.59832763671875e-06, + "model_forward_time": 0.025404691696166992, + "step": 5635 + }, + { + "epoch": 8.59832763671875e-06, + "step": 5635, + "training_step_time": 0.11161470413208008 + }, + { + "epoch": 8.599853515625e-06, + "model_forward_time": 0.026782751083374023, + "step": 5636 + }, + { + "epoch": 8.599853515625e-06, + "step": 5636, + "training_step_time": 0.18033337593078613 + }, + { + "epoch": 8.60137939453125e-06, + "model_forward_time": 0.02684950828552246, + "step": 5637 + }, + { + "epoch": 8.60137939453125e-06, + "step": 5637, + "training_step_time": 0.1273043155670166 + }, + { + "epoch": 8.6029052734375e-06, + "model_forward_time": 0.02421855926513672, + "step": 5638 + }, + { + "epoch": 8.6029052734375e-06, + "step": 5638, + "training_step_time": 0.1807572841644287 + }, + { + "epoch": 8.60443115234375e-06, + "model_forward_time": 0.025712013244628906, + "step": 5639 + }, + { + "epoch": 8.60443115234375e-06, + "step": 5639, + "training_step_time": 0.14128756523132324 + }, + { + "epoch": 8.60595703125e-06, + "grad_norm": 0.6627846956253052, + "learning_rate": 9.488317779179361e-05, + "loss": 0.0708, + "step": 5640 + }, + { + "epoch": 8.60595703125e-06, + "model_forward_time": 0.024538755416870117, + "step": 5640 + }, + { + "epoch": 8.60595703125e-06, + "step": 5640, + "training_step_time": 0.11018824577331543 + }, + { + "epoch": 8.60748291015625e-06, + "model_forward_time": 0.0245513916015625, + "step": 5641 + }, + { + "epoch": 8.60748291015625e-06, + "step": 5641, + "training_step_time": 0.10836195945739746 + }, + { + "epoch": 8.6090087890625e-06, + "model_forward_time": 0.025039196014404297, + "step": 5642 + }, + { + "epoch": 8.6090087890625e-06, + "step": 5642, + "training_step_time": 0.11410284042358398 + }, + { + "epoch": 8.61053466796875e-06, + "model_forward_time": 0.0258638858795166, + "step": 5643 + }, + { + "epoch": 8.61053466796875e-06, + "step": 5643, + "training_step_time": 0.1118018627166748 + }, + { + "epoch": 8.612060546875e-06, + "model_forward_time": 0.025200605392456055, + "step": 5644 + }, + { + "epoch": 8.612060546875e-06, + "step": 5644, + "training_step_time": 0.20527362823486328 + }, + { + "epoch": 8.61358642578125e-06, + "model_forward_time": 0.024642229080200195, + "step": 5645 + }, + { + "epoch": 8.61358642578125e-06, + "step": 5645, + "training_step_time": 0.11366033554077148 + }, + { + "epoch": 8.6151123046875e-06, + "model_forward_time": 0.024674177169799805, + "step": 5646 + }, + { + "epoch": 8.6151123046875e-06, + "step": 5646, + "training_step_time": 0.11691999435424805 + }, + { + "epoch": 8.61663818359375e-06, + "model_forward_time": 0.025403738021850586, + "step": 5647 + }, + { + "epoch": 8.61663818359375e-06, + "step": 5647, + "training_step_time": 0.1130514144897461 + }, + { + "epoch": 8.6181640625e-06, + "model_forward_time": 0.02541184425354004, + "step": 5648 + }, + { + "epoch": 8.6181640625e-06, + "step": 5648, + "training_step_time": 0.11801934242248535 + }, + { + "epoch": 8.61968994140625e-06, + "model_forward_time": 0.025005102157592773, + "step": 5649 + }, + { + "epoch": 8.61968994140625e-06, + "step": 5649, + "training_step_time": 0.1329195499420166 + }, + { + "epoch": 8.6212158203125e-06, + "grad_norm": 0.34214547276496887, + "learning_rate": 9.485886207883022e-05, + "loss": 0.089, + "step": 5650 + }, + { + "epoch": 8.6212158203125e-06, + "model_forward_time": 0.025602340698242188, + "step": 5650 + }, + { + "epoch": 8.6212158203125e-06, + "step": 5650, + "training_step_time": 0.12142086029052734 + }, + { + "epoch": 8.62274169921875e-06, + "model_forward_time": 0.025153160095214844, + "step": 5651 + }, + { + "epoch": 8.62274169921875e-06, + "step": 5651, + "training_step_time": 0.21445465087890625 + }, + { + "epoch": 8.624267578125e-06, + "model_forward_time": 0.02443838119506836, + "step": 5652 + }, + { + "epoch": 8.624267578125e-06, + "step": 5652, + "training_step_time": 0.1307835578918457 + }, + { + "epoch": 8.62579345703125e-06, + "model_forward_time": 0.024609804153442383, + "step": 5653 + }, + { + "epoch": 8.62579345703125e-06, + "step": 5653, + "training_step_time": 0.11268973350524902 + }, + { + "epoch": 8.6273193359375e-06, + "model_forward_time": 0.02524113655090332, + "step": 5654 + }, + { + "epoch": 8.6273193359375e-06, + "step": 5654, + "training_step_time": 0.11936759948730469 + }, + { + "epoch": 8.62884521484375e-06, + "model_forward_time": 0.025458812713623047, + "step": 5655 + }, + { + "epoch": 8.62884521484375e-06, + "step": 5655, + "training_step_time": 0.10752582550048828 + }, + { + "epoch": 8.63037109375e-06, + "model_forward_time": 0.025011062622070312, + "step": 5656 + }, + { + "epoch": 8.63037109375e-06, + "step": 5656, + "training_step_time": 0.11252474784851074 + }, + { + "epoch": 8.63189697265625e-06, + "model_forward_time": 0.02499675750732422, + "step": 5657 + }, + { + "epoch": 8.63189697265625e-06, + "step": 5657, + "training_step_time": 0.1105186939239502 + }, + { + "epoch": 8.6334228515625e-06, + "model_forward_time": 0.025557756423950195, + "step": 5658 + }, + { + "epoch": 8.6334228515625e-06, + "step": 5658, + "training_step_time": 0.11146974563598633 + }, + { + "epoch": 8.63494873046875e-06, + "model_forward_time": 0.025761127471923828, + "step": 5659 + }, + { + "epoch": 8.63494873046875e-06, + "step": 5659, + "training_step_time": 0.11534976959228516 + }, + { + "epoch": 8.636474609375e-06, + "grad_norm": 0.6344464421272278, + "learning_rate": 9.483449185811948e-05, + "loss": 0.0694, + "step": 5660 + }, + { + "epoch": 8.636474609375e-06, + "model_forward_time": 0.025650739669799805, + "step": 5660 + }, + { + "epoch": 8.636474609375e-06, + "step": 5660, + "training_step_time": 0.20753979682922363 + }, + { + "epoch": 8.63800048828125e-06, + "model_forward_time": 0.02511882781982422, + "step": 5661 + }, + { + "epoch": 8.63800048828125e-06, + "step": 5661, + "training_step_time": 0.12133455276489258 + }, + { + "epoch": 8.6395263671875e-06, + "model_forward_time": 0.024400711059570312, + "step": 5662 + }, + { + "epoch": 8.6395263671875e-06, + "step": 5662, + "training_step_time": 0.11008977890014648 + }, + { + "epoch": 8.64105224609375e-06, + "model_forward_time": 0.025488615036010742, + "step": 5663 + }, + { + "epoch": 8.64105224609375e-06, + "step": 5663, + "training_step_time": 0.10864996910095215 + }, + { + "epoch": 8.642578125e-06, + "model_forward_time": 0.0258331298828125, + "step": 5664 + }, + { + "epoch": 8.642578125e-06, + "step": 5664, + "training_step_time": 0.11136102676391602 + }, + { + "epoch": 8.64410400390625e-06, + "model_forward_time": 0.02547621726989746, + "step": 5665 + }, + { + "epoch": 8.64410400390625e-06, + "step": 5665, + "training_step_time": 0.11194777488708496 + }, + { + "epoch": 8.6456298828125e-06, + "model_forward_time": 0.025681257247924805, + "step": 5666 + }, + { + "epoch": 8.6456298828125e-06, + "step": 5666, + "training_step_time": 0.11451148986816406 + }, + { + "epoch": 8.64715576171875e-06, + "model_forward_time": 0.025636911392211914, + "step": 5667 + }, + { + "epoch": 8.64715576171875e-06, + "step": 5667, + "training_step_time": 0.11452174186706543 + }, + { + "epoch": 8.648681640625e-06, + "model_forward_time": 0.025289535522460938, + "step": 5668 + }, + { + "epoch": 8.648681640625e-06, + "step": 5668, + "training_step_time": 0.10977911949157715 + }, + { + "epoch": 8.65020751953125e-06, + "model_forward_time": 0.025005817413330078, + "step": 5669 + }, + { + "epoch": 8.65020751953125e-06, + "step": 5669, + "training_step_time": 0.10927271842956543 + }, + { + "epoch": 8.6517333984375e-06, + "grad_norm": 0.5223129987716675, + "learning_rate": 9.481006715927351e-05, + "loss": 0.081, + "step": 5670 + }, + { + "epoch": 8.6517333984375e-06, + "model_forward_time": 0.02517533302307129, + "step": 5670 + }, + { + "epoch": 8.6517333984375e-06, + "step": 5670, + "training_step_time": 0.10946846008300781 + }, + { + "epoch": 8.65325927734375e-06, + "model_forward_time": 0.025597572326660156, + "step": 5671 + }, + { + "epoch": 8.65325927734375e-06, + "step": 5671, + "training_step_time": 0.11063480377197266 + }, + { + "epoch": 8.65478515625e-06, + "model_forward_time": 0.028668642044067383, + "step": 5672 + }, + { + "epoch": 8.65478515625e-06, + "step": 5672, + "training_step_time": 0.11253213882446289 + }, + { + "epoch": 8.65631103515625e-06, + "model_forward_time": 0.025168180465698242, + "step": 5673 + }, + { + "epoch": 8.65631103515625e-06, + "step": 5673, + "training_step_time": 0.10771870613098145 + }, + { + "epoch": 8.6578369140625e-06, + "model_forward_time": 0.02535867691040039, + "step": 5674 + }, + { + "epoch": 8.6578369140625e-06, + "step": 5674, + "training_step_time": 0.10741615295410156 + }, + { + "epoch": 8.65936279296875e-06, + "model_forward_time": 0.025580167770385742, + "step": 5675 + }, + { + "epoch": 8.65936279296875e-06, + "step": 5675, + "training_step_time": 0.1098787784576416 + }, + { + "epoch": 8.660888671875e-06, + "model_forward_time": 0.02544879913330078, + "step": 5676 + }, + { + "epoch": 8.660888671875e-06, + "step": 5676, + "training_step_time": 0.1420128345489502 + }, + { + "epoch": 8.66241455078125e-06, + "model_forward_time": 0.024190664291381836, + "step": 5677 + }, + { + "epoch": 8.66241455078125e-06, + "step": 5677, + "training_step_time": 0.17170953750610352 + }, + { + "epoch": 8.6639404296875e-06, + "model_forward_time": 0.023317813873291016, + "step": 5678 + }, + { + "epoch": 8.6639404296875e-06, + "step": 5678, + "training_step_time": 0.22379803657531738 + }, + { + "epoch": 8.66546630859375e-06, + "model_forward_time": 0.02490377426147461, + "step": 5679 + }, + { + "epoch": 8.66546630859375e-06, + "step": 5679, + "training_step_time": 0.16246676445007324 + }, + { + "epoch": 8.6669921875e-06, + "grad_norm": 0.5693213939666748, + "learning_rate": 9.478558801197065e-05, + "loss": 0.079, + "step": 5680 + }, + { + "epoch": 8.6669921875e-06, + "model_forward_time": 0.024120807647705078, + "step": 5680 + }, + { + "epoch": 8.6669921875e-06, + "step": 5680, + "training_step_time": 0.20720553398132324 + }, + { + "epoch": 8.66851806640625e-06, + "model_forward_time": 0.024667024612426758, + "step": 5681 + }, + { + "epoch": 8.66851806640625e-06, + "step": 5681, + "training_step_time": 0.1292276382446289 + }, + { + "epoch": 8.6700439453125e-06, + "model_forward_time": 0.024073362350463867, + "step": 5682 + }, + { + "epoch": 8.6700439453125e-06, + "step": 5682, + "training_step_time": 0.12343025207519531 + }, + { + "epoch": 8.67156982421875e-06, + "model_forward_time": 0.024791955947875977, + "step": 5683 + }, + { + "epoch": 8.67156982421875e-06, + "step": 5683, + "training_step_time": 0.12181878089904785 + }, + { + "epoch": 8.673095703125e-06, + "model_forward_time": 0.02552652359008789, + "step": 5684 + }, + { + "epoch": 8.673095703125e-06, + "step": 5684, + "training_step_time": 0.16749215126037598 + }, + { + "epoch": 8.67462158203125e-06, + "model_forward_time": 0.02756333351135254, + "step": 5685 + }, + { + "epoch": 8.67462158203125e-06, + "step": 5685, + "training_step_time": 0.16584062576293945 + }, + { + "epoch": 8.6761474609375e-06, + "model_forward_time": 0.024524211883544922, + "step": 5686 + }, + { + "epoch": 8.6761474609375e-06, + "step": 5686, + "training_step_time": 0.11107349395751953 + }, + { + "epoch": 8.67767333984375e-06, + "model_forward_time": 0.024565696716308594, + "step": 5687 + }, + { + "epoch": 8.67767333984375e-06, + "step": 5687, + "training_step_time": 0.20196819305419922 + }, + { + "epoch": 8.67919921875e-06, + "model_forward_time": 0.024936437606811523, + "step": 5688 + }, + { + "epoch": 8.67919921875e-06, + "step": 5688, + "training_step_time": 0.10818862915039062 + }, + { + "epoch": 8.68072509765625e-06, + "model_forward_time": 0.024222612380981445, + "step": 5689 + }, + { + "epoch": 8.68072509765625e-06, + "step": 5689, + "training_step_time": 0.11050271987915039 + }, + { + "epoch": 8.6822509765625e-06, + "grad_norm": 0.3727593421936035, + "learning_rate": 9.476105444595534e-05, + "loss": 0.071, + "step": 5690 + }, + { + "epoch": 8.6822509765625e-06, + "model_forward_time": 0.02541327476501465, + "step": 5690 + }, + { + "epoch": 8.6822509765625e-06, + "step": 5690, + "training_step_time": 0.11319899559020996 + }, + { + "epoch": 8.68377685546875e-06, + "model_forward_time": 0.02712249755859375, + "step": 5691 + }, + { + "epoch": 8.68377685546875e-06, + "step": 5691, + "training_step_time": 0.12575364112854004 + }, + { + "epoch": 8.685302734375e-06, + "model_forward_time": 0.027634859085083008, + "step": 5692 + }, + { + "epoch": 8.685302734375e-06, + "step": 5692, + "training_step_time": 0.1254730224609375 + }, + { + "epoch": 8.68682861328125e-06, + "model_forward_time": 0.025128602981567383, + "step": 5693 + }, + { + "epoch": 8.68682861328125e-06, + "step": 5693, + "training_step_time": 0.10976171493530273 + }, + { + "epoch": 8.6883544921875e-06, + "model_forward_time": 0.02542734146118164, + "step": 5694 + }, + { + "epoch": 8.6883544921875e-06, + "step": 5694, + "training_step_time": 0.21788573265075684 + }, + { + "epoch": 8.68988037109375e-06, + "model_forward_time": 0.024222135543823242, + "step": 5695 + }, + { + "epoch": 8.68988037109375e-06, + "step": 5695, + "training_step_time": 0.13775038719177246 + }, + { + "epoch": 8.69140625e-06, + "model_forward_time": 0.024397611618041992, + "step": 5696 + }, + { + "epoch": 8.69140625e-06, + "step": 5696, + "training_step_time": 0.1179811954498291 + }, + { + "epoch": 8.69293212890625e-06, + "model_forward_time": 0.024003028869628906, + "step": 5697 + }, + { + "epoch": 8.69293212890625e-06, + "step": 5697, + "training_step_time": 0.11922788619995117 + }, + { + "epoch": 8.6944580078125e-06, + "model_forward_time": 0.025225400924682617, + "step": 5698 + }, + { + "epoch": 8.6944580078125e-06, + "step": 5698, + "training_step_time": 0.11105680465698242 + }, + { + "epoch": 8.69598388671875e-06, + "model_forward_time": 0.024890899658203125, + "step": 5699 + }, + { + "epoch": 8.69598388671875e-06, + "step": 5699, + "training_step_time": 0.21383905410766602 + }, + { + "epoch": 8.697509765625e-06, + "grad_norm": 0.1906253546476364, + "learning_rate": 9.473646649103818e-05, + "loss": 0.0659, + "step": 5700 + }, + { + "epoch": 8.697509765625e-06, + "model_forward_time": 0.02528548240661621, + "step": 5700 + }, + { + "epoch": 8.697509765625e-06, + "step": 5700, + "training_step_time": 0.10573935508728027 + }, + { + "epoch": 8.69903564453125e-06, + "model_forward_time": 0.02491307258605957, + "step": 5701 + }, + { + "epoch": 8.69903564453125e-06, + "step": 5701, + "training_step_time": 0.11289453506469727 + }, + { + "epoch": 8.7005615234375e-06, + "model_forward_time": 0.024271011352539062, + "step": 5702 + }, + { + "epoch": 8.7005615234375e-06, + "step": 5702, + "training_step_time": 0.2180488109588623 + }, + { + "epoch": 8.70208740234375e-06, + "model_forward_time": 0.02500176429748535, + "step": 5703 + }, + { + "epoch": 8.70208740234375e-06, + "step": 5703, + "training_step_time": 0.11678266525268555 + }, + { + "epoch": 8.70361328125e-06, + "model_forward_time": 0.025105953216552734, + "step": 5704 + }, + { + "epoch": 8.70361328125e-06, + "step": 5704, + "training_step_time": 0.10588240623474121 + }, + { + "epoch": 8.70513916015625e-06, + "model_forward_time": 0.02847003936767578, + "step": 5705 + }, + { + "epoch": 8.70513916015625e-06, + "step": 5705, + "training_step_time": 0.11067557334899902 + }, + { + "epoch": 8.7066650390625e-06, + "model_forward_time": 0.0252685546875, + "step": 5706 + }, + { + "epoch": 8.7066650390625e-06, + "step": 5706, + "training_step_time": 0.10599088668823242 + }, + { + "epoch": 8.70819091796875e-06, + "model_forward_time": 0.02571725845336914, + "step": 5707 + }, + { + "epoch": 8.70819091796875e-06, + "step": 5707, + "training_step_time": 0.11007094383239746 + }, + { + "epoch": 8.709716796875e-06, + "model_forward_time": 0.025586843490600586, + "step": 5708 + }, + { + "epoch": 8.709716796875e-06, + "step": 5708, + "training_step_time": 0.10913658142089844 + }, + { + "epoch": 8.71124267578125e-06, + "model_forward_time": 0.025495052337646484, + "step": 5709 + }, + { + "epoch": 8.71124267578125e-06, + "step": 5709, + "training_step_time": 0.10552549362182617 + }, + { + "epoch": 8.7127685546875e-06, + "grad_norm": 0.3626631200313568, + "learning_rate": 9.471182417709587e-05, + "loss": 0.0793, + "step": 5710 + }, + { + "epoch": 8.7127685546875e-06, + "model_forward_time": 0.02524399757385254, + "step": 5710 + }, + { + "epoch": 8.7127685546875e-06, + "step": 5710, + "training_step_time": 0.11177921295166016 + }, + { + "epoch": 8.71429443359375e-06, + "model_forward_time": 0.025163888931274414, + "step": 5711 + }, + { + "epoch": 8.71429443359375e-06, + "step": 5711, + "training_step_time": 0.1053006649017334 + }, + { + "epoch": 8.7158203125e-06, + "model_forward_time": 0.025257349014282227, + "step": 5712 + }, + { + "epoch": 8.7158203125e-06, + "step": 5712, + "training_step_time": 0.10671877861022949 + }, + { + "epoch": 8.71734619140625e-06, + "model_forward_time": 0.025287866592407227, + "step": 5713 + }, + { + "epoch": 8.71734619140625e-06, + "step": 5713, + "training_step_time": 0.11122894287109375 + }, + { + "epoch": 8.7188720703125e-06, + "model_forward_time": 0.025355100631713867, + "step": 5714 + }, + { + "epoch": 8.7188720703125e-06, + "step": 5714, + "training_step_time": 0.10686850547790527 + }, + { + "epoch": 8.72039794921875e-06, + "model_forward_time": 0.02519392967224121, + "step": 5715 + }, + { + "epoch": 8.72039794921875e-06, + "step": 5715, + "training_step_time": 0.1063542366027832 + }, + { + "epoch": 8.721923828125e-06, + "model_forward_time": 0.025996685028076172, + "step": 5716 + }, + { + "epoch": 8.721923828125e-06, + "step": 5716, + "training_step_time": 0.10877251625061035 + }, + { + "epoch": 8.72344970703125e-06, + "model_forward_time": 0.026959896087646484, + "step": 5717 + }, + { + "epoch": 8.72344970703125e-06, + "step": 5717, + "training_step_time": 0.10953927040100098 + }, + { + "epoch": 8.7249755859375e-06, + "model_forward_time": 0.025590896606445312, + "step": 5718 + }, + { + "epoch": 8.7249755859375e-06, + "step": 5718, + "training_step_time": 0.11183333396911621 + }, + { + "epoch": 8.72650146484375e-06, + "model_forward_time": 0.025211095809936523, + "step": 5719 + }, + { + "epoch": 8.72650146484375e-06, + "step": 5719, + "training_step_time": 0.18321776390075684 + }, + { + "epoch": 8.72802734375e-06, + "grad_norm": 0.4794430136680603, + "learning_rate": 9.468712753407112e-05, + "loss": 0.0699, + "step": 5720 + }, + { + "epoch": 8.72802734375e-06, + "model_forward_time": 0.025931596755981445, + "step": 5720 + }, + { + "epoch": 8.72802734375e-06, + "step": 5720, + "training_step_time": 0.10962438583374023 + }, + { + "epoch": 8.72955322265625e-06, + "model_forward_time": 0.024333477020263672, + "step": 5721 + }, + { + "epoch": 8.72955322265625e-06, + "step": 5721, + "training_step_time": 0.1341235637664795 + }, + { + "epoch": 8.7310791015625e-06, + "model_forward_time": 0.025473594665527344, + "step": 5722 + }, + { + "epoch": 8.7310791015625e-06, + "step": 5722, + "training_step_time": 0.16264986991882324 + }, + { + "epoch": 8.73260498046875e-06, + "model_forward_time": 0.024927616119384766, + "step": 5723 + }, + { + "epoch": 8.73260498046875e-06, + "step": 5723, + "training_step_time": 0.22059941291809082 + }, + { + "epoch": 8.734130859375e-06, + "model_forward_time": 0.024748563766479492, + "step": 5724 + }, + { + "epoch": 8.734130859375e-06, + "step": 5724, + "training_step_time": 0.11192679405212402 + }, + { + "epoch": 8.73565673828125e-06, + "model_forward_time": 0.024431228637695312, + "step": 5725 + }, + { + "epoch": 8.73565673828125e-06, + "step": 5725, + "training_step_time": 0.14302587509155273 + }, + { + "epoch": 8.7371826171875e-06, + "model_forward_time": 0.024878978729248047, + "step": 5726 + }, + { + "epoch": 8.7371826171875e-06, + "step": 5726, + "training_step_time": 0.14807486534118652 + }, + { + "epoch": 8.73870849609375e-06, + "model_forward_time": 0.024555683135986328, + "step": 5727 + }, + { + "epoch": 8.73870849609375e-06, + "step": 5727, + "training_step_time": 0.11493229866027832 + }, + { + "epoch": 8.740234375e-06, + "model_forward_time": 0.025223970413208008, + "step": 5728 + }, + { + "epoch": 8.740234375e-06, + "step": 5728, + "training_step_time": 0.11080574989318848 + }, + { + "epoch": 8.74176025390625e-06, + "model_forward_time": 0.02585911750793457, + "step": 5729 + }, + { + "epoch": 8.74176025390625e-06, + "step": 5729, + "training_step_time": 0.1092691421508789 + }, + { + "epoch": 8.7432861328125e-06, + "grad_norm": 0.3267877399921417, + "learning_rate": 9.46623765919727e-05, + "loss": 0.0777, + "step": 5730 + }, + { + "epoch": 8.7432861328125e-06, + "model_forward_time": 0.02543020248413086, + "step": 5730 + }, + { + "epoch": 8.7432861328125e-06, + "step": 5730, + "training_step_time": 0.16281914710998535 + }, + { + "epoch": 8.74481201171875e-06, + "model_forward_time": 0.025120019912719727, + "step": 5731 + }, + { + "epoch": 8.74481201171875e-06, + "step": 5731, + "training_step_time": 0.14694476127624512 + }, + { + "epoch": 8.746337890625e-06, + "model_forward_time": 0.024566650390625, + "step": 5732 + }, + { + "epoch": 8.746337890625e-06, + "step": 5732, + "training_step_time": 0.1152794361114502 + }, + { + "epoch": 8.74786376953125e-06, + "model_forward_time": 0.02460002899169922, + "step": 5733 + }, + { + "epoch": 8.74786376953125e-06, + "step": 5733, + "training_step_time": 0.10911345481872559 + }, + { + "epoch": 8.7493896484375e-06, + "model_forward_time": 0.025265932083129883, + "step": 5734 + }, + { + "epoch": 8.7493896484375e-06, + "step": 5734, + "training_step_time": 0.17486906051635742 + }, + { + "epoch": 8.75091552734375e-06, + "model_forward_time": 0.024076223373413086, + "step": 5735 + }, + { + "epoch": 8.75091552734375e-06, + "step": 5735, + "training_step_time": 0.23529672622680664 + }, + { + "epoch": 8.75244140625e-06, + "model_forward_time": 0.024236679077148438, + "step": 5736 + }, + { + "epoch": 8.75244140625e-06, + "step": 5736, + "training_step_time": 0.2153477668762207 + }, + { + "epoch": 8.75396728515625e-06, + "model_forward_time": 0.024945497512817383, + "step": 5737 + }, + { + "epoch": 8.75396728515625e-06, + "step": 5737, + "training_step_time": 0.24265789985656738 + }, + { + "epoch": 8.7554931640625e-06, + "model_forward_time": 0.02516007423400879, + "step": 5738 + }, + { + "epoch": 8.7554931640625e-06, + "step": 5738, + "training_step_time": 0.20834684371948242 + }, + { + "epoch": 8.75701904296875e-06, + "model_forward_time": 0.028120756149291992, + "step": 5739 + }, + { + "epoch": 8.75701904296875e-06, + "step": 5739, + "training_step_time": 0.10858917236328125 + }, + { + "epoch": 8.758544921875e-06, + "grad_norm": 0.37637773156166077, + "learning_rate": 9.463757138087535e-05, + "loss": 0.0874, + "step": 5740 + }, + { + "epoch": 8.758544921875e-06, + "model_forward_time": 0.025181055068969727, + "step": 5740 + }, + { + "epoch": 8.758544921875e-06, + "step": 5740, + "training_step_time": 0.21411871910095215 + }, + { + "epoch": 8.76007080078125e-06, + "model_forward_time": 0.024881839752197266, + "step": 5741 + }, + { + "epoch": 8.76007080078125e-06, + "step": 5741, + "training_step_time": 0.11036086082458496 + }, + { + "epoch": 8.7615966796875e-06, + "model_forward_time": 0.02480912208557129, + "step": 5742 + }, + { + "epoch": 8.7615966796875e-06, + "step": 5742, + "training_step_time": 0.10637235641479492 + }, + { + "epoch": 8.76312255859375e-06, + "model_forward_time": 0.025555849075317383, + "step": 5743 + }, + { + "epoch": 8.76312255859375e-06, + "step": 5743, + "training_step_time": 0.10836005210876465 + }, + { + "epoch": 8.7646484375e-06, + "model_forward_time": 0.02655792236328125, + "step": 5744 + }, + { + "epoch": 8.7646484375e-06, + "step": 5744, + "training_step_time": 0.11038470268249512 + }, + { + "epoch": 8.76617431640625e-06, + "model_forward_time": 0.025667667388916016, + "step": 5745 + }, + { + "epoch": 8.76617431640625e-06, + "step": 5745, + "training_step_time": 0.11467695236206055 + }, + { + "epoch": 8.7677001953125e-06, + "model_forward_time": 0.025905370712280273, + "step": 5746 + }, + { + "epoch": 8.7677001953125e-06, + "step": 5746, + "training_step_time": 0.11172795295715332 + }, + { + "epoch": 8.76922607421875e-06, + "model_forward_time": 0.025571107864379883, + "step": 5747 + }, + { + "epoch": 8.76922607421875e-06, + "step": 5747, + "training_step_time": 0.11497855186462402 + }, + { + "epoch": 8.770751953125e-06, + "model_forward_time": 0.025565385818481445, + "step": 5748 + }, + { + "epoch": 8.770751953125e-06, + "step": 5748, + "training_step_time": 0.11220669746398926 + }, + { + "epoch": 8.77227783203125e-06, + "model_forward_time": 0.025272607803344727, + "step": 5749 + }, + { + "epoch": 8.77227783203125e-06, + "step": 5749, + "training_step_time": 0.12531065940856934 + }, + { + "epoch": 8.7738037109375e-06, + "grad_norm": 0.38166099786758423, + "learning_rate": 9.46127119309197e-05, + "loss": 0.0651, + "step": 5750 + }, + { + "epoch": 8.7738037109375e-06, + "model_forward_time": 0.024326324462890625, + "step": 5750 + }, + { + "epoch": 8.7738037109375e-06, + "step": 5750, + "training_step_time": 0.1891329288482666 + }, + { + "epoch": 8.77532958984375e-06, + "model_forward_time": 0.024070024490356445, + "step": 5751 + }, + { + "epoch": 8.77532958984375e-06, + "step": 5751, + "training_step_time": 0.21283817291259766 + }, + { + "epoch": 8.77685546875e-06, + "model_forward_time": 0.024276018142700195, + "step": 5752 + }, + { + "epoch": 8.77685546875e-06, + "step": 5752, + "training_step_time": 0.21341156959533691 + }, + { + "epoch": 8.77838134765625e-06, + "model_forward_time": 0.024446964263916016, + "step": 5753 + }, + { + "epoch": 8.77838134765625e-06, + "step": 5753, + "training_step_time": 0.21104049682617188 + }, + { + "epoch": 8.7799072265625e-06, + "model_forward_time": 0.024374723434448242, + "step": 5754 + }, + { + "epoch": 8.7799072265625e-06, + "step": 5754, + "training_step_time": 0.2086327075958252 + }, + { + "epoch": 8.78143310546875e-06, + "model_forward_time": 0.024730205535888672, + "step": 5755 + }, + { + "epoch": 8.78143310546875e-06, + "step": 5755, + "training_step_time": 0.20209670066833496 + }, + { + "epoch": 8.782958984375e-06, + "model_forward_time": 0.024913787841796875, + "step": 5756 + }, + { + "epoch": 8.782958984375e-06, + "step": 5756, + "training_step_time": 0.20732712745666504 + }, + { + "epoch": 8.78448486328125e-06, + "model_forward_time": 0.024971485137939453, + "step": 5757 + }, + { + "epoch": 8.78448486328125e-06, + "step": 5757, + "training_step_time": 0.11805987358093262 + }, + { + "epoch": 8.7860107421875e-06, + "model_forward_time": 0.024193525314331055, + "step": 5758 + }, + { + "epoch": 8.7860107421875e-06, + "step": 5758, + "training_step_time": 0.13003110885620117 + }, + { + "epoch": 8.78753662109375e-06, + "model_forward_time": 0.025430679321289062, + "step": 5759 + }, + { + "epoch": 8.78753662109375e-06, + "step": 5759, + "training_step_time": 0.15594959259033203 + }, + { + "epoch": 8.7890625e-06, + "grad_norm": 0.35037916898727417, + "learning_rate": 9.458779827231237e-05, + "loss": 0.0699, + "step": 5760 + }, + { + "epoch": 8.7890625e-06, + "model_forward_time": 0.024963855743408203, + "step": 5760 + }, + { + "epoch": 8.7890625e-06, + "step": 5760, + "training_step_time": 0.21167707443237305 + }, + { + "epoch": 8.79058837890625e-06, + "model_forward_time": 0.02439093589782715, + "step": 5761 + }, + { + "epoch": 8.79058837890625e-06, + "step": 5761, + "training_step_time": 0.1234140396118164 + }, + { + "epoch": 8.7921142578125e-06, + "model_forward_time": 0.024369239807128906, + "step": 5762 + }, + { + "epoch": 8.7921142578125e-06, + "step": 5762, + "training_step_time": 0.13631796836853027 + }, + { + "epoch": 8.79364013671875e-06, + "model_forward_time": 0.02476978302001953, + "step": 5763 + }, + { + "epoch": 8.79364013671875e-06, + "step": 5763, + "training_step_time": 0.14091110229492188 + }, + { + "epoch": 8.795166015625e-06, + "model_forward_time": 0.025115966796875, + "step": 5764 + }, + { + "epoch": 8.795166015625e-06, + "step": 5764, + "training_step_time": 0.11105227470397949 + }, + { + "epoch": 8.79669189453125e-06, + "model_forward_time": 0.025090932846069336, + "step": 5765 + }, + { + "epoch": 8.79669189453125e-06, + "step": 5765, + "training_step_time": 0.11116409301757812 + }, + { + "epoch": 8.7982177734375e-06, + "model_forward_time": 0.025231122970581055, + "step": 5766 + }, + { + "epoch": 8.7982177734375e-06, + "step": 5766, + "training_step_time": 0.130354642868042 + }, + { + "epoch": 8.79974365234375e-06, + "model_forward_time": 0.025452136993408203, + "step": 5767 + }, + { + "epoch": 8.79974365234375e-06, + "step": 5767, + "training_step_time": 0.20346641540527344 + }, + { + "epoch": 8.80126953125e-06, + "model_forward_time": 0.024509668350219727, + "step": 5768 + }, + { + "epoch": 8.80126953125e-06, + "step": 5768, + "training_step_time": 0.12837600708007812 + }, + { + "epoch": 8.80279541015625e-06, + "model_forward_time": 0.0244295597076416, + "step": 5769 + }, + { + "epoch": 8.80279541015625e-06, + "step": 5769, + "training_step_time": 0.12457752227783203 + }, + { + "epoch": 8.8043212890625e-06, + "grad_norm": 0.4511484205722809, + "learning_rate": 9.456283043532576e-05, + "loss": 0.0775, + "step": 5770 + }, + { + "epoch": 8.8043212890625e-06, + "model_forward_time": 0.02500605583190918, + "step": 5770 + }, + { + "epoch": 8.8043212890625e-06, + "step": 5770, + "training_step_time": 0.12194275856018066 + }, + { + "epoch": 8.80584716796875e-06, + "model_forward_time": 0.025419950485229492, + "step": 5771 + }, + { + "epoch": 8.80584716796875e-06, + "step": 5771, + "training_step_time": 0.11929988861083984 + }, + { + "epoch": 8.807373046875e-06, + "model_forward_time": 0.02536153793334961, + "step": 5772 + }, + { + "epoch": 8.807373046875e-06, + "step": 5772, + "training_step_time": 0.17539334297180176 + }, + { + "epoch": 8.80889892578125e-06, + "model_forward_time": 0.024729013442993164, + "step": 5773 + }, + { + "epoch": 8.80889892578125e-06, + "step": 5773, + "training_step_time": 0.11239981651306152 + }, + { + "epoch": 8.8104248046875e-06, + "model_forward_time": 0.024279356002807617, + "step": 5774 + }, + { + "epoch": 8.8104248046875e-06, + "step": 5774, + "training_step_time": 0.11043047904968262 + }, + { + "epoch": 8.81195068359375e-06, + "model_forward_time": 0.02524590492248535, + "step": 5775 + }, + { + "epoch": 8.81195068359375e-06, + "step": 5775, + "training_step_time": 0.12261509895324707 + }, + { + "epoch": 8.8134765625e-06, + "model_forward_time": 0.025134563446044922, + "step": 5776 + }, + { + "epoch": 8.8134765625e-06, + "step": 5776, + "training_step_time": 0.12656331062316895 + }, + { + "epoch": 8.81500244140625e-06, + "model_forward_time": 0.025783777236938477, + "step": 5777 + }, + { + "epoch": 8.81500244140625e-06, + "step": 5777, + "training_step_time": 0.1118016242980957 + }, + { + "epoch": 8.8165283203125e-06, + "model_forward_time": 0.026101350784301758, + "step": 5778 + }, + { + "epoch": 8.8165283203125e-06, + "step": 5778, + "training_step_time": 0.12193107604980469 + }, + { + "epoch": 8.81805419921875e-06, + "model_forward_time": 0.025826454162597656, + "step": 5779 + }, + { + "epoch": 8.81805419921875e-06, + "step": 5779, + "training_step_time": 0.10889267921447754 + }, + { + "epoch": 8.819580078125e-06, + "grad_norm": 0.3592827022075653, + "learning_rate": 9.453780845029821e-05, + "loss": 0.0823, + "step": 5780 + }, + { + "epoch": 8.819580078125e-06, + "model_forward_time": 0.024648189544677734, + "step": 5780 + }, + { + "epoch": 8.819580078125e-06, + "step": 5780, + "training_step_time": 0.14451336860656738 + }, + { + "epoch": 8.82110595703125e-06, + "model_forward_time": 0.025223493576049805, + "step": 5781 + }, + { + "epoch": 8.82110595703125e-06, + "step": 5781, + "training_step_time": 0.10784506797790527 + }, + { + "epoch": 8.8226318359375e-06, + "model_forward_time": 0.025252342224121094, + "step": 5782 + }, + { + "epoch": 8.8226318359375e-06, + "step": 5782, + "training_step_time": 0.2090592384338379 + }, + { + "epoch": 8.82415771484375e-06, + "model_forward_time": 0.024483680725097656, + "step": 5783 + }, + { + "epoch": 8.82415771484375e-06, + "step": 5783, + "training_step_time": 0.13753032684326172 + }, + { + "epoch": 8.82568359375e-06, + "model_forward_time": 0.024658679962158203, + "step": 5784 + }, + { + "epoch": 8.82568359375e-06, + "step": 5784, + "training_step_time": 0.11122369766235352 + }, + { + "epoch": 8.82720947265625e-06, + "model_forward_time": 0.025246381759643555, + "step": 5785 + }, + { + "epoch": 8.82720947265625e-06, + "step": 5785, + "training_step_time": 0.11168313026428223 + }, + { + "epoch": 8.8287353515625e-06, + "model_forward_time": 0.02494335174560547, + "step": 5786 + }, + { + "epoch": 8.8287353515625e-06, + "step": 5786, + "training_step_time": 0.10713315010070801 + }, + { + "epoch": 8.83026123046875e-06, + "model_forward_time": 0.025080442428588867, + "step": 5787 + }, + { + "epoch": 8.83026123046875e-06, + "step": 5787, + "training_step_time": 0.10864138603210449 + }, + { + "epoch": 8.831787109375e-06, + "model_forward_time": 0.025205135345458984, + "step": 5788 + }, + { + "epoch": 8.831787109375e-06, + "step": 5788, + "training_step_time": 0.10888957977294922 + }, + { + "epoch": 8.83331298828125e-06, + "model_forward_time": 0.025120258331298828, + "step": 5789 + }, + { + "epoch": 8.83331298828125e-06, + "step": 5789, + "training_step_time": 0.1076960563659668 + }, + { + "epoch": 8.8348388671875e-06, + "grad_norm": 0.4227290153503418, + "learning_rate": 9.451273234763371e-05, + "loss": 0.0719, + "step": 5790 + }, + { + "epoch": 8.8348388671875e-06, + "model_forward_time": 0.02511739730834961, + "step": 5790 + }, + { + "epoch": 8.8348388671875e-06, + "step": 5790, + "training_step_time": 0.10841917991638184 + }, + { + "epoch": 8.83636474609375e-06, + "model_forward_time": 0.024907827377319336, + "step": 5791 + }, + { + "epoch": 8.83636474609375e-06, + "step": 5791, + "training_step_time": 0.11310505867004395 + }, + { + "epoch": 8.837890625e-06, + "model_forward_time": 0.025264978408813477, + "step": 5792 + }, + { + "epoch": 8.837890625e-06, + "step": 5792, + "training_step_time": 0.10797905921936035 + }, + { + "epoch": 8.83941650390625e-06, + "model_forward_time": 0.026732206344604492, + "step": 5793 + }, + { + "epoch": 8.83941650390625e-06, + "step": 5793, + "training_step_time": 0.11080360412597656 + }, + { + "epoch": 8.8409423828125e-06, + "model_forward_time": 0.025563478469848633, + "step": 5794 + }, + { + "epoch": 8.8409423828125e-06, + "step": 5794, + "training_step_time": 0.11052417755126953 + }, + { + "epoch": 8.84246826171875e-06, + "model_forward_time": 0.025222063064575195, + "step": 5795 + }, + { + "epoch": 8.84246826171875e-06, + "step": 5795, + "training_step_time": 0.10729503631591797 + }, + { + "epoch": 8.843994140625e-06, + "model_forward_time": 0.025298595428466797, + "step": 5796 + }, + { + "epoch": 8.843994140625e-06, + "step": 5796, + "training_step_time": 0.10972142219543457 + }, + { + "epoch": 8.84552001953125e-06, + "model_forward_time": 0.0251924991607666, + "step": 5797 + }, + { + "epoch": 8.84552001953125e-06, + "step": 5797, + "training_step_time": 0.10793375968933105 + }, + { + "epoch": 8.8470458984375e-06, + "model_forward_time": 0.025262832641601562, + "step": 5798 + }, + { + "epoch": 8.8470458984375e-06, + "step": 5798, + "training_step_time": 0.11197161674499512 + }, + { + "epoch": 8.84857177734375e-06, + "model_forward_time": 0.025038480758666992, + "step": 5799 + }, + { + "epoch": 8.84857177734375e-06, + "step": 5799, + "training_step_time": 0.10808491706848145 + }, + { + "epoch": 8.85009765625e-06, + "grad_norm": 0.3732927739620209, + "learning_rate": 9.448760215780217e-05, + "loss": 0.0976, + "step": 5800 + }, + { + "epoch": 8.85009765625e-06, + "model_forward_time": 0.025250911712646484, + "step": 5800 + }, + { + "epoch": 8.85009765625e-06, + "step": 5800, + "training_step_time": 0.21102237701416016 + }, + { + "epoch": 8.85162353515625e-06, + "model_forward_time": 0.024827003479003906, + "step": 5801 + }, + { + "epoch": 8.85162353515625e-06, + "step": 5801, + "training_step_time": 0.14337944984436035 + }, + { + "epoch": 8.8531494140625e-06, + "model_forward_time": 0.02445507049560547, + "step": 5802 + }, + { + "epoch": 8.8531494140625e-06, + "step": 5802, + "training_step_time": 0.1565699577331543 + }, + { + "epoch": 8.85467529296875e-06, + "model_forward_time": 0.024457454681396484, + "step": 5803 + }, + { + "epoch": 8.85467529296875e-06, + "step": 5803, + "training_step_time": 0.15153717994689941 + }, + { + "epoch": 8.856201171875e-06, + "model_forward_time": 0.024574995040893555, + "step": 5804 + }, + { + "epoch": 8.856201171875e-06, + "step": 5804, + "training_step_time": 0.18435430526733398 + }, + { + "epoch": 8.85772705078125e-06, + "model_forward_time": 0.02444624900817871, + "step": 5805 + }, + { + "epoch": 8.85772705078125e-06, + "step": 5805, + "training_step_time": 0.12979626655578613 + }, + { + "epoch": 8.8592529296875e-06, + "model_forward_time": 0.024531841278076172, + "step": 5806 + }, + { + "epoch": 8.8592529296875e-06, + "step": 5806, + "training_step_time": 0.14025068283081055 + }, + { + "epoch": 8.86077880859375e-06, + "model_forward_time": 0.025538206100463867, + "step": 5807 + }, + { + "epoch": 8.86077880859375e-06, + "step": 5807, + "training_step_time": 0.13111233711242676 + }, + { + "epoch": 8.8623046875e-06, + "model_forward_time": 0.02507638931274414, + "step": 5808 + }, + { + "epoch": 8.8623046875e-06, + "step": 5808, + "training_step_time": 0.1113440990447998 + }, + { + "epoch": 8.86383056640625e-06, + "model_forward_time": 0.025632381439208984, + "step": 5809 + }, + { + "epoch": 8.86383056640625e-06, + "step": 5809, + "training_step_time": 0.11174201965332031 + }, + { + "epoch": 8.8653564453125e-06, + "grad_norm": 0.3051137328147888, + "learning_rate": 9.446241791133907e-05, + "loss": 0.072, + "step": 5810 + }, + { + "epoch": 8.8653564453125e-06, + "model_forward_time": 0.02534627914428711, + "step": 5810 + }, + { + "epoch": 8.8653564453125e-06, + "step": 5810, + "training_step_time": 0.11006307601928711 + }, + { + "epoch": 8.86688232421875e-06, + "model_forward_time": 0.025818347930908203, + "step": 5811 + }, + { + "epoch": 8.86688232421875e-06, + "step": 5811, + "training_step_time": 0.11287665367126465 + }, + { + "epoch": 8.868408203125e-06, + "model_forward_time": 0.025643587112426758, + "step": 5812 + }, + { + "epoch": 8.868408203125e-06, + "step": 5812, + "training_step_time": 0.189713716506958 + }, + { + "epoch": 8.86993408203125e-06, + "model_forward_time": 0.026393651962280273, + "step": 5813 + }, + { + "epoch": 8.86993408203125e-06, + "step": 5813, + "training_step_time": 0.10646700859069824 + }, + { + "epoch": 8.8714599609375e-06, + "model_forward_time": 0.024766206741333008, + "step": 5814 + }, + { + "epoch": 8.8714599609375e-06, + "step": 5814, + "training_step_time": 0.10984349250793457 + }, + { + "epoch": 8.87298583984375e-06, + "model_forward_time": 0.025118350982666016, + "step": 5815 + }, + { + "epoch": 8.87298583984375e-06, + "step": 5815, + "training_step_time": 0.10932445526123047 + }, + { + "epoch": 8.87451171875e-06, + "model_forward_time": 0.026819944381713867, + "step": 5816 + }, + { + "epoch": 8.87451171875e-06, + "step": 5816, + "training_step_time": 0.10876083374023438 + }, + { + "epoch": 8.87603759765625e-06, + "model_forward_time": 0.025341033935546875, + "step": 5817 + }, + { + "epoch": 8.87603759765625e-06, + "step": 5817, + "training_step_time": 0.15482735633850098 + }, + { + "epoch": 8.8775634765625e-06, + "model_forward_time": 0.02510356903076172, + "step": 5818 + }, + { + "epoch": 8.8775634765625e-06, + "step": 5818, + "training_step_time": 0.11521434783935547 + }, + { + "epoch": 8.87908935546875e-06, + "model_forward_time": 0.02530646324157715, + "step": 5819 + }, + { + "epoch": 8.87908935546875e-06, + "step": 5819, + "training_step_time": 0.18747901916503906 + }, + { + "epoch": 8.880615234375e-06, + "grad_norm": 0.5610553026199341, + "learning_rate": 9.443717963884569e-05, + "loss": 0.0839, + "step": 5820 + }, + { + "epoch": 8.880615234375e-06, + "model_forward_time": 0.024465560913085938, + "step": 5820 + }, + { + "epoch": 8.880615234375e-06, + "step": 5820, + "training_step_time": 0.18909311294555664 + }, + { + "epoch": 8.88214111328125e-06, + "model_forward_time": 0.026720523834228516, + "step": 5821 + }, + { + "epoch": 8.88214111328125e-06, + "step": 5821, + "training_step_time": 0.17702102661132812 + }, + { + "epoch": 8.8836669921875e-06, + "model_forward_time": 0.024750709533691406, + "step": 5822 + }, + { + "epoch": 8.8836669921875e-06, + "step": 5822, + "training_step_time": 0.15208792686462402 + }, + { + "epoch": 8.88519287109375e-06, + "model_forward_time": 0.025708675384521484, + "step": 5823 + }, + { + "epoch": 8.88519287109375e-06, + "step": 5823, + "training_step_time": 0.11994004249572754 + }, + { + "epoch": 8.88671875e-06, + "model_forward_time": 0.02420353889465332, + "step": 5824 + }, + { + "epoch": 8.88671875e-06, + "step": 5824, + "training_step_time": 0.12740635871887207 + }, + { + "epoch": 8.88824462890625e-06, + "model_forward_time": 0.025762319564819336, + "step": 5825 + }, + { + "epoch": 8.88824462890625e-06, + "step": 5825, + "training_step_time": 0.11667156219482422 + }, + { + "epoch": 8.8897705078125e-06, + "model_forward_time": 0.025492429733276367, + "step": 5826 + }, + { + "epoch": 8.8897705078125e-06, + "step": 5826, + "training_step_time": 0.17093229293823242 + }, + { + "epoch": 8.89129638671875e-06, + "model_forward_time": 0.024620771408081055, + "step": 5827 + }, + { + "epoch": 8.89129638671875e-06, + "step": 5827, + "training_step_time": 0.16476893424987793 + }, + { + "epoch": 8.892822265625e-06, + "model_forward_time": 0.025287628173828125, + "step": 5828 + }, + { + "epoch": 8.892822265625e-06, + "step": 5828, + "training_step_time": 0.1212470531463623 + }, + { + "epoch": 8.89434814453125e-06, + "model_forward_time": 0.02477407455444336, + "step": 5829 + }, + { + "epoch": 8.89434814453125e-06, + "step": 5829, + "training_step_time": 0.11799836158752441 + }, + { + "epoch": 8.8958740234375e-06, + "grad_norm": 0.5057721138000488, + "learning_rate": 9.441188737098889e-05, + "loss": 0.0899, + "step": 5830 + }, + { + "epoch": 8.8958740234375e-06, + "model_forward_time": 0.02567291259765625, + "step": 5830 + }, + { + "epoch": 8.8958740234375e-06, + "step": 5830, + "training_step_time": 0.11262321472167969 + }, + { + "epoch": 8.89739990234375e-06, + "model_forward_time": 0.02634739875793457, + "step": 5831 + }, + { + "epoch": 8.89739990234375e-06, + "step": 5831, + "training_step_time": 0.11351990699768066 + }, + { + "epoch": 8.89892578125e-06, + "model_forward_time": 0.0253145694732666, + "step": 5832 + }, + { + "epoch": 8.89892578125e-06, + "step": 5832, + "training_step_time": 0.1112065315246582 + }, + { + "epoch": 8.90045166015625e-06, + "model_forward_time": 0.02509927749633789, + "step": 5833 + }, + { + "epoch": 8.90045166015625e-06, + "step": 5833, + "training_step_time": 0.10861444473266602 + }, + { + "epoch": 8.9019775390625e-06, + "model_forward_time": 0.02516460418701172, + "step": 5834 + }, + { + "epoch": 8.9019775390625e-06, + "step": 5834, + "training_step_time": 0.10914993286132812 + }, + { + "epoch": 8.90350341796875e-06, + "model_forward_time": 0.0250394344329834, + "step": 5835 + }, + { + "epoch": 8.90350341796875e-06, + "step": 5835, + "training_step_time": 0.11852455139160156 + }, + { + "epoch": 8.905029296875e-06, + "model_forward_time": 0.02480459213256836, + "step": 5836 + }, + { + "epoch": 8.905029296875e-06, + "step": 5836, + "training_step_time": 0.11158275604248047 + }, + { + "epoch": 8.90655517578125e-06, + "model_forward_time": 0.02518296241760254, + "step": 5837 + }, + { + "epoch": 8.90655517578125e-06, + "step": 5837, + "training_step_time": 0.10818243026733398 + }, + { + "epoch": 8.9080810546875e-06, + "model_forward_time": 0.025329113006591797, + "step": 5838 + }, + { + "epoch": 8.9080810546875e-06, + "step": 5838, + "training_step_time": 0.11196494102478027 + }, + { + "epoch": 8.90960693359375e-06, + "model_forward_time": 0.02407217025756836, + "step": 5839 + }, + { + "epoch": 8.90960693359375e-06, + "step": 5839, + "training_step_time": 0.10945892333984375 + }, + { + "epoch": 8.9111328125e-06, + "grad_norm": 0.31711509823799133, + "learning_rate": 9.438654113850118e-05, + "loss": 0.0723, + "step": 5840 + }, + { + "epoch": 8.9111328125e-06, + "model_forward_time": 0.024309158325195312, + "step": 5840 + }, + { + "epoch": 8.9111328125e-06, + "step": 5840, + "training_step_time": 0.11232709884643555 + }, + { + "epoch": 8.91265869140625e-06, + "model_forward_time": 0.025162458419799805, + "step": 5841 + }, + { + "epoch": 8.91265869140625e-06, + "step": 5841, + "training_step_time": 0.10844254493713379 + }, + { + "epoch": 8.9141845703125e-06, + "model_forward_time": 0.024079084396362305, + "step": 5842 + }, + { + "epoch": 8.9141845703125e-06, + "step": 5842, + "training_step_time": 0.10884809494018555 + }, + { + "epoch": 8.91571044921875e-06, + "model_forward_time": 0.025310516357421875, + "step": 5843 + }, + { + "epoch": 8.91571044921875e-06, + "step": 5843, + "training_step_time": 0.22782158851623535 + }, + { + "epoch": 8.917236328125e-06, + "model_forward_time": 0.024475574493408203, + "step": 5844 + }, + { + "epoch": 8.917236328125e-06, + "step": 5844, + "training_step_time": 0.11497926712036133 + }, + { + "epoch": 8.91876220703125e-06, + "model_forward_time": 0.023504972457885742, + "step": 5845 + }, + { + "epoch": 8.91876220703125e-06, + "step": 5845, + "training_step_time": 0.13498163223266602 + }, + { + "epoch": 8.9202880859375e-06, + "model_forward_time": 0.024995088577270508, + "step": 5846 + }, + { + "epoch": 8.9202880859375e-06, + "step": 5846, + "training_step_time": 0.1605982780456543 + }, + { + "epoch": 8.92181396484375e-06, + "model_forward_time": 0.024432897567749023, + "step": 5847 + }, + { + "epoch": 8.92181396484375e-06, + "step": 5847, + "training_step_time": 0.22236394882202148 + }, + { + "epoch": 8.92333984375e-06, + "model_forward_time": 0.024111270904541016, + "step": 5848 + }, + { + "epoch": 8.92333984375e-06, + "step": 5848, + "training_step_time": 0.11499524116516113 + }, + { + "epoch": 8.92486572265625e-06, + "model_forward_time": 0.024158716201782227, + "step": 5849 + }, + { + "epoch": 8.92486572265625e-06, + "step": 5849, + "training_step_time": 0.15272808074951172 + }, + { + "epoch": 8.9263916015625e-06, + "grad_norm": 0.5319364070892334, + "learning_rate": 9.43611409721806e-05, + "loss": 0.0633, + "step": 5850 + }, + { + "epoch": 8.9263916015625e-06, + "model_forward_time": 0.0243685245513916, + "step": 5850 + }, + { + "epoch": 8.9263916015625e-06, + "step": 5850, + "training_step_time": 0.13464713096618652 + }, + { + "epoch": 8.92791748046875e-06, + "model_forward_time": 0.0245208740234375, + "step": 5851 + }, + { + "epoch": 8.92791748046875e-06, + "step": 5851, + "training_step_time": 0.10843944549560547 + }, + { + "epoch": 8.929443359375e-06, + "model_forward_time": 0.024949312210083008, + "step": 5852 + }, + { + "epoch": 8.929443359375e-06, + "step": 5852, + "training_step_time": 0.11367082595825195 + }, + { + "epoch": 8.93096923828125e-06, + "model_forward_time": 0.024866342544555664, + "step": 5853 + }, + { + "epoch": 8.93096923828125e-06, + "step": 5853, + "training_step_time": 0.12192869186401367 + }, + { + "epoch": 8.9324951171875e-06, + "model_forward_time": 0.02535223960876465, + "step": 5854 + }, + { + "epoch": 8.9324951171875e-06, + "step": 5854, + "training_step_time": 0.10739850997924805 + }, + { + "epoch": 8.93402099609375e-06, + "model_forward_time": 0.025511741638183594, + "step": 5855 + }, + { + "epoch": 8.93402099609375e-06, + "step": 5855, + "training_step_time": 0.21050477027893066 + }, + { + "epoch": 8.935546875e-06, + "model_forward_time": 0.03985714912414551, + "step": 5856 + }, + { + "epoch": 8.935546875e-06, + "step": 5856, + "training_step_time": 0.13285589218139648 + }, + { + "epoch": 8.93707275390625e-06, + "model_forward_time": 0.025583744049072266, + "step": 5857 + }, + { + "epoch": 8.93707275390625e-06, + "step": 5857, + "training_step_time": 0.10788893699645996 + }, + { + "epoch": 8.9385986328125e-06, + "model_forward_time": 0.026898860931396484, + "step": 5858 + }, + { + "epoch": 8.9385986328125e-06, + "step": 5858, + "training_step_time": 0.10962200164794922 + }, + { + "epoch": 8.94012451171875e-06, + "model_forward_time": 0.026428937911987305, + "step": 5859 + }, + { + "epoch": 8.94012451171875e-06, + "step": 5859, + "training_step_time": 0.11249017715454102 + }, + { + "epoch": 8.941650390625e-06, + "grad_norm": 0.5092139840126038, + "learning_rate": 9.433568690289075e-05, + "loss": 0.0842, + "step": 5860 + }, + { + "epoch": 8.941650390625e-06, + "model_forward_time": 0.026331424713134766, + "step": 5860 + }, + { + "epoch": 8.941650390625e-06, + "step": 5860, + "training_step_time": 0.14335322380065918 + }, + { + "epoch": 8.94317626953125e-06, + "model_forward_time": 0.026547670364379883, + "step": 5861 + }, + { + "epoch": 8.94317626953125e-06, + "step": 5861, + "training_step_time": 0.11163830757141113 + }, + { + "epoch": 8.9447021484375e-06, + "model_forward_time": 0.025946378707885742, + "step": 5862 + }, + { + "epoch": 8.9447021484375e-06, + "step": 5862, + "training_step_time": 0.11416816711425781 + }, + { + "epoch": 8.94622802734375e-06, + "model_forward_time": 0.02638864517211914, + "step": 5863 + }, + { + "epoch": 8.94622802734375e-06, + "step": 5863, + "training_step_time": 0.11352372169494629 + }, + { + "epoch": 8.94775390625e-06, + "model_forward_time": 0.02850794792175293, + "step": 5864 + }, + { + "epoch": 8.94775390625e-06, + "step": 5864, + "training_step_time": 0.1380002498626709 + }, + { + "epoch": 8.94927978515625e-06, + "model_forward_time": 0.026858806610107422, + "step": 5865 + }, + { + "epoch": 8.94927978515625e-06, + "step": 5865, + "training_step_time": 0.11621904373168945 + }, + { + "epoch": 8.9508056640625e-06, + "model_forward_time": 0.025970935821533203, + "step": 5866 + }, + { + "epoch": 8.9508056640625e-06, + "step": 5866, + "training_step_time": 0.11582231521606445 + }, + { + "epoch": 8.95233154296875e-06, + "model_forward_time": 0.026712894439697266, + "step": 5867 + }, + { + "epoch": 8.95233154296875e-06, + "step": 5867, + "training_step_time": 0.11066222190856934 + }, + { + "epoch": 8.953857421875e-06, + "model_forward_time": 0.02659916877746582, + "step": 5868 + }, + { + "epoch": 8.953857421875e-06, + "step": 5868, + "training_step_time": 0.11015057563781738 + }, + { + "epoch": 8.95538330078125e-06, + "model_forward_time": 0.026492595672607422, + "step": 5869 + }, + { + "epoch": 8.95538330078125e-06, + "step": 5869, + "training_step_time": 0.21688055992126465 + }, + { + "epoch": 8.9569091796875e-06, + "grad_norm": 0.36725756525993347, + "learning_rate": 9.431017896156074e-05, + "loss": 0.0792, + "step": 5870 + }, + { + "epoch": 8.9569091796875e-06, + "model_forward_time": 0.02571249008178711, + "step": 5870 + }, + { + "epoch": 8.9569091796875e-06, + "step": 5870, + "training_step_time": 0.11063218116760254 + }, + { + "epoch": 8.95843505859375e-06, + "model_forward_time": 0.025835752487182617, + "step": 5871 + }, + { + "epoch": 8.95843505859375e-06, + "step": 5871, + "training_step_time": 0.10873532295227051 + }, + { + "epoch": 8.9599609375e-06, + "model_forward_time": 0.026432514190673828, + "step": 5872 + }, + { + "epoch": 8.9599609375e-06, + "step": 5872, + "training_step_time": 0.1101069450378418 + }, + { + "epoch": 8.96148681640625e-06, + "model_forward_time": 0.026633501052856445, + "step": 5873 + }, + { + "epoch": 8.96148681640625e-06, + "step": 5873, + "training_step_time": 0.12423205375671387 + }, + { + "epoch": 8.9630126953125e-06, + "model_forward_time": 0.026635169982910156, + "step": 5874 + }, + { + "epoch": 8.9630126953125e-06, + "step": 5874, + "training_step_time": 0.13721823692321777 + }, + { + "epoch": 8.96453857421875e-06, + "model_forward_time": 0.02652716636657715, + "step": 5875 + }, + { + "epoch": 8.96453857421875e-06, + "step": 5875, + "training_step_time": 0.13985204696655273 + }, + { + "epoch": 8.966064453125e-06, + "model_forward_time": 0.025879383087158203, + "step": 5876 + }, + { + "epoch": 8.966064453125e-06, + "step": 5876, + "training_step_time": 0.12676477432250977 + }, + { + "epoch": 8.96759033203125e-06, + "model_forward_time": 0.025313615798950195, + "step": 5877 + }, + { + "epoch": 8.96759033203125e-06, + "step": 5877, + "training_step_time": 0.12378740310668945 + }, + { + "epoch": 8.9691162109375e-06, + "model_forward_time": 0.0267791748046875, + "step": 5878 + }, + { + "epoch": 8.9691162109375e-06, + "step": 5878, + "training_step_time": 0.12459158897399902 + }, + { + "epoch": 8.97064208984375e-06, + "model_forward_time": 0.0261538028717041, + "step": 5879 + }, + { + "epoch": 8.97064208984375e-06, + "step": 5879, + "training_step_time": 0.12115287780761719 + }, + { + "epoch": 8.97216796875e-06, + "grad_norm": 0.27330896258354187, + "learning_rate": 9.428461717918511e-05, + "loss": 0.0756, + "step": 5880 + }, + { + "epoch": 8.97216796875e-06, + "model_forward_time": 0.026023149490356445, + "step": 5880 + }, + { + "epoch": 8.97216796875e-06, + "step": 5880, + "training_step_time": 0.11267638206481934 + }, + { + "epoch": 8.97369384765625e-06, + "model_forward_time": 0.028307437896728516, + "step": 5881 + }, + { + "epoch": 8.97369384765625e-06, + "step": 5881, + "training_step_time": 0.11522889137268066 + }, + { + "epoch": 8.9752197265625e-06, + "model_forward_time": 0.026381254196166992, + "step": 5882 + }, + { + "epoch": 8.9752197265625e-06, + "step": 5882, + "training_step_time": 0.11294960975646973 + }, + { + "epoch": 8.97674560546875e-06, + "model_forward_time": 0.025607824325561523, + "step": 5883 + }, + { + "epoch": 8.97674560546875e-06, + "step": 5883, + "training_step_time": 0.1104576587677002 + }, + { + "epoch": 8.978271484375e-06, + "model_forward_time": 0.026769638061523438, + "step": 5884 + }, + { + "epoch": 8.978271484375e-06, + "step": 5884, + "training_step_time": 0.11121988296508789 + }, + { + "epoch": 8.97979736328125e-06, + "model_forward_time": 0.026705503463745117, + "step": 5885 + }, + { + "epoch": 8.97979736328125e-06, + "step": 5885, + "training_step_time": 0.11024928092956543 + }, + { + "epoch": 8.9813232421875e-06, + "model_forward_time": 0.026415586471557617, + "step": 5886 + }, + { + "epoch": 8.9813232421875e-06, + "step": 5886, + "training_step_time": 0.11356520652770996 + }, + { + "epoch": 8.98284912109375e-06, + "model_forward_time": 0.025795936584472656, + "step": 5887 + }, + { + "epoch": 8.98284912109375e-06, + "step": 5887, + "training_step_time": 0.21764588356018066 + }, + { + "epoch": 8.984375e-06, + "model_forward_time": 0.025852441787719727, + "step": 5888 + }, + { + "epoch": 8.984375e-06, + "step": 5888, + "training_step_time": 0.11114215850830078 + }, + { + "epoch": 8.98590087890625e-06, + "model_forward_time": 0.025300025939941406, + "step": 5889 + }, + { + "epoch": 8.98590087890625e-06, + "step": 5889, + "training_step_time": 0.13242411613464355 + }, + { + "epoch": 8.9874267578125e-06, + "grad_norm": 0.3354921042919159, + "learning_rate": 9.425900158682385e-05, + "loss": 0.0779, + "step": 5890 + }, + { + "epoch": 8.9874267578125e-06, + "model_forward_time": 0.025885820388793945, + "step": 5890 + }, + { + "epoch": 8.9874267578125e-06, + "step": 5890, + "training_step_time": 0.1098470687866211 + }, + { + "epoch": 8.98895263671875e-06, + "model_forward_time": 0.02617645263671875, + "step": 5891 + }, + { + "epoch": 8.98895263671875e-06, + "step": 5891, + "training_step_time": 0.16656708717346191 + }, + { + "epoch": 8.990478515625e-06, + "model_forward_time": 0.02542567253112793, + "step": 5892 + }, + { + "epoch": 8.990478515625e-06, + "step": 5892, + "training_step_time": 0.16001534461975098 + }, + { + "epoch": 8.99200439453125e-06, + "model_forward_time": 0.02576136589050293, + "step": 5893 + }, + { + "epoch": 8.99200439453125e-06, + "step": 5893, + "training_step_time": 0.11061358451843262 + }, + { + "epoch": 8.9935302734375e-06, + "model_forward_time": 0.025977611541748047, + "step": 5894 + }, + { + "epoch": 8.9935302734375e-06, + "step": 5894, + "training_step_time": 0.10731363296508789 + }, + { + "epoch": 8.99505615234375e-06, + "model_forward_time": 0.026017427444458008, + "step": 5895 + }, + { + "epoch": 8.99505615234375e-06, + "step": 5895, + "training_step_time": 0.14051508903503418 + }, + { + "epoch": 8.99658203125e-06, + "model_forward_time": 0.026168107986450195, + "step": 5896 + }, + { + "epoch": 8.99658203125e-06, + "step": 5896, + "training_step_time": 0.11793398857116699 + }, + { + "epoch": 8.99810791015625e-06, + "model_forward_time": 0.026320695877075195, + "step": 5897 + }, + { + "epoch": 8.99810791015625e-06, + "step": 5897, + "training_step_time": 0.11550617218017578 + }, + { + "epoch": 8.9996337890625e-06, + "model_forward_time": 0.02654266357421875, + "step": 5898 + }, + { + "epoch": 8.9996337890625e-06, + "step": 5898, + "training_step_time": 0.10700750350952148 + }, + { + "epoch": 9.00115966796875e-06, + "model_forward_time": 0.02664351463317871, + "step": 5899 + }, + { + "epoch": 9.00115966796875e-06, + "step": 5899, + "training_step_time": 0.10949516296386719 + }, + { + "epoch": 9.002685546875e-06, + "grad_norm": 0.5841869115829468, + "learning_rate": 9.42333322156023e-05, + "loss": 0.0963, + "step": 5900 + }, + { + "epoch": 9.002685546875e-06, + "model_forward_time": 0.026638031005859375, + "step": 5900 + }, + { + "epoch": 9.002685546875e-06, + "step": 5900, + "training_step_time": 0.205946683883667 + }, + { + "epoch": 9.00421142578125e-06, + "model_forward_time": 0.025427579879760742, + "step": 5901 + }, + { + "epoch": 9.00421142578125e-06, + "step": 5901, + "training_step_time": 0.11169576644897461 + }, + { + "epoch": 9.0057373046875e-06, + "model_forward_time": 0.02533698081970215, + "step": 5902 + }, + { + "epoch": 9.0057373046875e-06, + "step": 5902, + "training_step_time": 0.10987305641174316 + }, + { + "epoch": 9.00726318359375e-06, + "model_forward_time": 0.026617050170898438, + "step": 5903 + }, + { + "epoch": 9.00726318359375e-06, + "step": 5903, + "training_step_time": 0.10966324806213379 + }, + { + "epoch": 9.0087890625e-06, + "model_forward_time": 0.026145458221435547, + "step": 5904 + }, + { + "epoch": 9.0087890625e-06, + "step": 5904, + "training_step_time": 0.10892581939697266 + }, + { + "epoch": 9.01031494140625e-06, + "model_forward_time": 0.026487112045288086, + "step": 5905 + }, + { + "epoch": 9.01031494140625e-06, + "step": 5905, + "training_step_time": 0.2060689926147461 + }, + { + "epoch": 9.0118408203125e-06, + "model_forward_time": 0.02399754524230957, + "step": 5906 + }, + { + "epoch": 9.0118408203125e-06, + "step": 5906, + "training_step_time": 0.10992431640625 + }, + { + "epoch": 9.01336669921875e-06, + "model_forward_time": 0.025414705276489258, + "step": 5907 + }, + { + "epoch": 9.01336669921875e-06, + "step": 5907, + "training_step_time": 0.10914397239685059 + }, + { + "epoch": 9.014892578125e-06, + "model_forward_time": 0.026842832565307617, + "step": 5908 + }, + { + "epoch": 9.014892578125e-06, + "step": 5908, + "training_step_time": 0.11783885955810547 + }, + { + "epoch": 9.01641845703125e-06, + "model_forward_time": 0.028972864151000977, + "step": 5909 + }, + { + "epoch": 9.01641845703125e-06, + "step": 5909, + "training_step_time": 0.13008832931518555 + }, + { + "epoch": 9.0179443359375e-06, + "grad_norm": 0.42277806997299194, + "learning_rate": 9.420760909671118e-05, + "loss": 0.0828, + "step": 5910 + }, + { + "epoch": 9.0179443359375e-06, + "model_forward_time": 0.02595686912536621, + "step": 5910 + }, + { + "epoch": 9.0179443359375e-06, + "step": 5910, + "training_step_time": 0.11119413375854492 + }, + { + "epoch": 9.01947021484375e-06, + "model_forward_time": 0.026522159576416016, + "step": 5911 + }, + { + "epoch": 9.01947021484375e-06, + "step": 5911, + "training_step_time": 0.11870765686035156 + }, + { + "epoch": 9.02099609375e-06, + "model_forward_time": 0.02681422233581543, + "step": 5912 + }, + { + "epoch": 9.02099609375e-06, + "step": 5912, + "training_step_time": 0.10959482192993164 + }, + { + "epoch": 9.02252197265625e-06, + "model_forward_time": 0.025684595108032227, + "step": 5913 + }, + { + "epoch": 9.02252197265625e-06, + "step": 5913, + "training_step_time": 0.10828876495361328 + }, + { + "epoch": 9.0240478515625e-06, + "model_forward_time": 0.02603745460510254, + "step": 5914 + }, + { + "epoch": 9.0240478515625e-06, + "step": 5914, + "training_step_time": 0.18803858757019043 + }, + { + "epoch": 9.02557373046875e-06, + "model_forward_time": 0.025936126708984375, + "step": 5915 + }, + { + "epoch": 9.02557373046875e-06, + "step": 5915, + "training_step_time": 0.2149946689605713 + }, + { + "epoch": 9.027099609375e-06, + "model_forward_time": 0.025760650634765625, + "step": 5916 + }, + { + "epoch": 9.027099609375e-06, + "step": 5916, + "training_step_time": 0.2101743221282959 + }, + { + "epoch": 9.02862548828125e-06, + "model_forward_time": 0.025112152099609375, + "step": 5917 + }, + { + "epoch": 9.02862548828125e-06, + "step": 5917, + "training_step_time": 0.1929612159729004 + }, + { + "epoch": 9.0301513671875e-06, + "model_forward_time": 0.025174856185913086, + "step": 5918 + }, + { + "epoch": 9.0301513671875e-06, + "step": 5918, + "training_step_time": 0.1795511245727539 + }, + { + "epoch": 9.03167724609375e-06, + "model_forward_time": 0.025325298309326172, + "step": 5919 + }, + { + "epoch": 9.03167724609375e-06, + "step": 5919, + "training_step_time": 0.10358929634094238 + }, + { + "epoch": 9.033203125e-06, + "grad_norm": 0.571058452129364, + "learning_rate": 9.41818322614065e-05, + "loss": 0.0717, + "step": 5920 + }, + { + "epoch": 9.033203125e-06, + "model_forward_time": 0.0256803035736084, + "step": 5920 + }, + { + "epoch": 9.033203125e-06, + "step": 5920, + "training_step_time": 0.11001372337341309 + }, + { + "epoch": 9.03472900390625e-06, + "model_forward_time": 0.026933908462524414, + "step": 5921 + }, + { + "epoch": 9.03472900390625e-06, + "step": 5921, + "training_step_time": 0.1099863052368164 + }, + { + "epoch": 9.0362548828125e-06, + "model_forward_time": 0.02663588523864746, + "step": 5922 + }, + { + "epoch": 9.0362548828125e-06, + "step": 5922, + "training_step_time": 0.11111044883728027 + }, + { + "epoch": 9.03778076171875e-06, + "model_forward_time": 0.02638530731201172, + "step": 5923 + }, + { + "epoch": 9.03778076171875e-06, + "step": 5923, + "training_step_time": 0.109222412109375 + }, + { + "epoch": 9.039306640625e-06, + "model_forward_time": 0.026906967163085938, + "step": 5924 + }, + { + "epoch": 9.039306640625e-06, + "step": 5924, + "training_step_time": 0.10845232009887695 + }, + { + "epoch": 9.04083251953125e-06, + "model_forward_time": 0.026295900344848633, + "step": 5925 + }, + { + "epoch": 9.04083251953125e-06, + "step": 5925, + "training_step_time": 0.1078338623046875 + }, + { + "epoch": 9.0423583984375e-06, + "model_forward_time": 0.02643752098083496, + "step": 5926 + }, + { + "epoch": 9.0423583984375e-06, + "step": 5926, + "training_step_time": 0.11005520820617676 + }, + { + "epoch": 9.04388427734375e-06, + "model_forward_time": 0.026426076889038086, + "step": 5927 + }, + { + "epoch": 9.04388427734375e-06, + "step": 5927, + "training_step_time": 0.10824060440063477 + }, + { + "epoch": 9.04541015625e-06, + "model_forward_time": 0.02659749984741211, + "step": 5928 + }, + { + "epoch": 9.04541015625e-06, + "step": 5928, + "training_step_time": 0.12475919723510742 + }, + { + "epoch": 9.04693603515625e-06, + "model_forward_time": 0.026145458221435547, + "step": 5929 + }, + { + "epoch": 9.04693603515625e-06, + "step": 5929, + "training_step_time": 0.10899567604064941 + }, + { + "epoch": 9.0484619140625e-06, + "grad_norm": 0.5669389963150024, + "learning_rate": 9.415600174100956e-05, + "loss": 0.077, + "step": 5930 + }, + { + "epoch": 9.0484619140625e-06, + "model_forward_time": 0.0261080265045166, + "step": 5930 + }, + { + "epoch": 9.0484619140625e-06, + "step": 5930, + "training_step_time": 0.13007378578186035 + }, + { + "epoch": 9.04998779296875e-06, + "model_forward_time": 0.026097774505615234, + "step": 5931 + }, + { + "epoch": 9.04998779296875e-06, + "step": 5931, + "training_step_time": 0.18753290176391602 + }, + { + "epoch": 9.051513671875e-06, + "model_forward_time": 0.025613069534301758, + "step": 5932 + }, + { + "epoch": 9.051513671875e-06, + "step": 5932, + "training_step_time": 0.19725966453552246 + }, + { + "epoch": 9.05303955078125e-06, + "model_forward_time": 0.026819229125976562, + "step": 5933 + }, + { + "epoch": 9.05303955078125e-06, + "step": 5933, + "training_step_time": 0.17894816398620605 + }, + { + "epoch": 9.0545654296875e-06, + "model_forward_time": 0.025414228439331055, + "step": 5934 + }, + { + "epoch": 9.0545654296875e-06, + "step": 5934, + "training_step_time": 0.15322375297546387 + }, + { + "epoch": 9.05609130859375e-06, + "model_forward_time": 0.026224851608276367, + "step": 5935 + }, + { + "epoch": 9.05609130859375e-06, + "step": 5935, + "training_step_time": 0.20427942276000977 + }, + { + "epoch": 9.0576171875e-06, + "model_forward_time": 0.024692296981811523, + "step": 5936 + }, + { + "epoch": 9.0576171875e-06, + "step": 5936, + "training_step_time": 0.17146587371826172 + }, + { + "epoch": 9.05914306640625e-06, + "model_forward_time": 0.025773286819458008, + "step": 5937 + }, + { + "epoch": 9.05914306640625e-06, + "step": 5937, + "training_step_time": 0.19130420684814453 + }, + { + "epoch": 9.0606689453125e-06, + "model_forward_time": 0.02550816535949707, + "step": 5938 + }, + { + "epoch": 9.0606689453125e-06, + "step": 5938, + "training_step_time": 0.1665341854095459 + }, + { + "epoch": 9.06219482421875e-06, + "model_forward_time": 0.0258636474609375, + "step": 5939 + }, + { + "epoch": 9.06219482421875e-06, + "step": 5939, + "training_step_time": 0.18404364585876465 + }, + { + "epoch": 9.063720703125e-06, + "grad_norm": 0.3362850844860077, + "learning_rate": 9.413011756690685e-05, + "loss": 0.0751, + "step": 5940 + }, + { + "epoch": 9.063720703125e-06, + "model_forward_time": 0.025766611099243164, + "step": 5940 + }, + { + "epoch": 9.063720703125e-06, + "step": 5940, + "training_step_time": 0.10847282409667969 + }, + { + "epoch": 9.06524658203125e-06, + "model_forward_time": 0.025864839553833008, + "step": 5941 + }, + { + "epoch": 9.06524658203125e-06, + "step": 5941, + "training_step_time": 0.1095890998840332 + }, + { + "epoch": 9.0667724609375e-06, + "model_forward_time": 0.026218414306640625, + "step": 5942 + }, + { + "epoch": 9.0667724609375e-06, + "step": 5942, + "training_step_time": 0.1092383861541748 + }, + { + "epoch": 9.06829833984375e-06, + "model_forward_time": 0.02784132957458496, + "step": 5943 + }, + { + "epoch": 9.06829833984375e-06, + "step": 5943, + "training_step_time": 0.1113736629486084 + }, + { + "epoch": 9.06982421875e-06, + "model_forward_time": 0.026187658309936523, + "step": 5944 + }, + { + "epoch": 9.06982421875e-06, + "step": 5944, + "training_step_time": 0.1093747615814209 + }, + { + "epoch": 9.07135009765625e-06, + "model_forward_time": 0.02668285369873047, + "step": 5945 + }, + { + "epoch": 9.07135009765625e-06, + "step": 5945, + "training_step_time": 0.11614322662353516 + }, + { + "epoch": 9.0728759765625e-06, + "model_forward_time": 0.02617955207824707, + "step": 5946 + }, + { + "epoch": 9.0728759765625e-06, + "step": 5946, + "training_step_time": 0.14415621757507324 + }, + { + "epoch": 9.07440185546875e-06, + "model_forward_time": 0.02759552001953125, + "step": 5947 + }, + { + "epoch": 9.07440185546875e-06, + "step": 5947, + "training_step_time": 0.11200642585754395 + }, + { + "epoch": 9.075927734375e-06, + "model_forward_time": 0.02657628059387207, + "step": 5948 + }, + { + "epoch": 9.075927734375e-06, + "step": 5948, + "training_step_time": 0.11479640007019043 + }, + { + "epoch": 9.07745361328125e-06, + "model_forward_time": 0.026170015335083008, + "step": 5949 + }, + { + "epoch": 9.07745361328125e-06, + "step": 5949, + "training_step_time": 0.1187889575958252 + }, + { + "epoch": 9.0789794921875e-06, + "grad_norm": 0.4865238070487976, + "learning_rate": 9.410417977055011e-05, + "loss": 0.0747, + "step": 5950 + }, + { + "epoch": 9.0789794921875e-06, + "model_forward_time": 0.02617192268371582, + "step": 5950 + }, + { + "epoch": 9.0789794921875e-06, + "step": 5950, + "training_step_time": 0.1213524341583252 + }, + { + "epoch": 9.08050537109375e-06, + "model_forward_time": 0.026523351669311523, + "step": 5951 + }, + { + "epoch": 9.08050537109375e-06, + "step": 5951, + "training_step_time": 0.11065912246704102 + }, + { + "epoch": 9.08203125e-06, + "model_forward_time": 0.02640986442565918, + "step": 5952 + }, + { + "epoch": 9.08203125e-06, + "step": 5952, + "training_step_time": 0.11548829078674316 + }, + { + "epoch": 9.08355712890625e-06, + "model_forward_time": 0.027084827423095703, + "step": 5953 + }, + { + "epoch": 9.08355712890625e-06, + "step": 5953, + "training_step_time": 0.11052417755126953 + }, + { + "epoch": 9.0850830078125e-06, + "model_forward_time": 0.026594161987304688, + "step": 5954 + }, + { + "epoch": 9.0850830078125e-06, + "step": 5954, + "training_step_time": 0.17098760604858398 + }, + { + "epoch": 9.08660888671875e-06, + "model_forward_time": 0.02605438232421875, + "step": 5955 + }, + { + "epoch": 9.08660888671875e-06, + "step": 5955, + "training_step_time": 0.16991281509399414 + }, + { + "epoch": 9.088134765625e-06, + "model_forward_time": 0.025337696075439453, + "step": 5956 + }, + { + "epoch": 9.088134765625e-06, + "step": 5956, + "training_step_time": 0.11057281494140625 + }, + { + "epoch": 9.08966064453125e-06, + "model_forward_time": 0.025789737701416016, + "step": 5957 + }, + { + "epoch": 9.08966064453125e-06, + "step": 5957, + "training_step_time": 0.21548056602478027 + }, + { + "epoch": 9.0911865234375e-06, + "model_forward_time": 0.02542424201965332, + "step": 5958 + }, + { + "epoch": 9.0911865234375e-06, + "step": 5958, + "training_step_time": 0.12054777145385742 + }, + { + "epoch": 9.09271240234375e-06, + "model_forward_time": 0.025702714920043945, + "step": 5959 + }, + { + "epoch": 9.09271240234375e-06, + "step": 5959, + "training_step_time": 0.11264419555664062 + }, + { + "epoch": 9.09423828125e-06, + "grad_norm": 0.5005446672439575, + "learning_rate": 9.407818838345619e-05, + "loss": 0.0752, + "step": 5960 + }, + { + "epoch": 9.09423828125e-06, + "model_forward_time": 0.02633190155029297, + "step": 5960 + }, + { + "epoch": 9.09423828125e-06, + "step": 5960, + "training_step_time": 0.11399555206298828 + }, + { + "epoch": 9.09576416015625e-06, + "model_forward_time": 0.02656078338623047, + "step": 5961 + }, + { + "epoch": 9.09576416015625e-06, + "step": 5961, + "training_step_time": 0.11565780639648438 + }, + { + "epoch": 9.0972900390625e-06, + "model_forward_time": 0.027135848999023438, + "step": 5962 + }, + { + "epoch": 9.0972900390625e-06, + "step": 5962, + "training_step_time": 0.11146664619445801 + }, + { + "epoch": 9.09881591796875e-06, + "model_forward_time": 0.02624654769897461, + "step": 5963 + }, + { + "epoch": 9.09881591796875e-06, + "step": 5963, + "training_step_time": 0.10973644256591797 + }, + { + "epoch": 9.100341796875e-06, + "model_forward_time": 0.02635788917541504, + "step": 5964 + }, + { + "epoch": 9.100341796875e-06, + "step": 5964, + "training_step_time": 0.1099846363067627 + }, + { + "epoch": 9.10186767578125e-06, + "model_forward_time": 0.026107311248779297, + "step": 5965 + }, + { + "epoch": 9.10186767578125e-06, + "step": 5965, + "training_step_time": 0.10917329788208008 + }, + { + "epoch": 9.1033935546875e-06, + "model_forward_time": 0.026184558868408203, + "step": 5966 + }, + { + "epoch": 9.1033935546875e-06, + "step": 5966, + "training_step_time": 0.11481904983520508 + }, + { + "epoch": 9.10491943359375e-06, + "model_forward_time": 0.026482582092285156, + "step": 5967 + }, + { + "epoch": 9.10491943359375e-06, + "step": 5967, + "training_step_time": 0.10909175872802734 + }, + { + "epoch": 9.1064453125e-06, + "model_forward_time": 0.026105642318725586, + "step": 5968 + }, + { + "epoch": 9.1064453125e-06, + "step": 5968, + "training_step_time": 0.11098051071166992 + }, + { + "epoch": 9.10797119140625e-06, + "model_forward_time": 0.0263974666595459, + "step": 5969 + }, + { + "epoch": 9.10797119140625e-06, + "step": 5969, + "training_step_time": 0.11201834678649902 + }, + { + "epoch": 9.1094970703125e-06, + "grad_norm": 0.39406514167785645, + "learning_rate": 9.405214343720707e-05, + "loss": 0.0861, + "step": 5970 + }, + { + "epoch": 9.1094970703125e-06, + "model_forward_time": 0.0286407470703125, + "step": 5970 + }, + { + "epoch": 9.1094970703125e-06, + "step": 5970, + "training_step_time": 0.11314868927001953 + }, + { + "epoch": 9.11102294921875e-06, + "model_forward_time": 0.026851177215576172, + "step": 5971 + }, + { + "epoch": 9.11102294921875e-06, + "step": 5971, + "training_step_time": 0.11033892631530762 + }, + { + "epoch": 9.112548828125e-06, + "model_forward_time": 0.026098251342773438, + "step": 5972 + }, + { + "epoch": 9.112548828125e-06, + "step": 5972, + "training_step_time": 0.11026859283447266 + }, + { + "epoch": 9.11407470703125e-06, + "model_forward_time": 0.02621006965637207, + "step": 5973 + }, + { + "epoch": 9.11407470703125e-06, + "step": 5973, + "training_step_time": 0.1108860969543457 + }, + { + "epoch": 9.1156005859375e-06, + "model_forward_time": 0.026166439056396484, + "step": 5974 + }, + { + "epoch": 9.1156005859375e-06, + "step": 5974, + "training_step_time": 0.1348400115966797 + }, + { + "epoch": 9.11712646484375e-06, + "model_forward_time": 0.026229143142700195, + "step": 5975 + }, + { + "epoch": 9.11712646484375e-06, + "step": 5975, + "training_step_time": 0.11576271057128906 + }, + { + "epoch": 9.11865234375e-06, + "model_forward_time": 0.026325702667236328, + "step": 5976 + }, + { + "epoch": 9.11865234375e-06, + "step": 5976, + "training_step_time": 0.13353204727172852 + }, + { + "epoch": 9.12017822265625e-06, + "model_forward_time": 0.026549816131591797, + "step": 5977 + }, + { + "epoch": 9.12017822265625e-06, + "step": 5977, + "training_step_time": 0.15739655494689941 + }, + { + "epoch": 9.1217041015625e-06, + "model_forward_time": 0.025455713272094727, + "step": 5978 + }, + { + "epoch": 9.1217041015625e-06, + "step": 5978, + "training_step_time": 0.21822071075439453 + }, + { + "epoch": 9.12322998046875e-06, + "model_forward_time": 0.03693079948425293, + "step": 5979 + }, + { + "epoch": 9.12322998046875e-06, + "step": 5979, + "training_step_time": 0.1488494873046875 + }, + { + "epoch": 9.124755859375e-06, + "grad_norm": 0.29772940278053284, + "learning_rate": 9.402604496344984e-05, + "loss": 0.087, + "step": 5980 + }, + { + "epoch": 9.124755859375e-06, + "model_forward_time": 0.024530649185180664, + "step": 5980 + }, + { + "epoch": 9.124755859375e-06, + "step": 5980, + "training_step_time": 0.10814857482910156 + }, + { + "epoch": 9.12628173828125e-06, + "model_forward_time": 0.02520585060119629, + "step": 5981 + }, + { + "epoch": 9.12628173828125e-06, + "step": 5981, + "training_step_time": 0.11840581893920898 + }, + { + "epoch": 9.1278076171875e-06, + "model_forward_time": 0.025279760360717773, + "step": 5982 + }, + { + "epoch": 9.1278076171875e-06, + "step": 5982, + "training_step_time": 0.11430883407592773 + }, + { + "epoch": 9.12933349609375e-06, + "model_forward_time": 0.025073528289794922, + "step": 5983 + }, + { + "epoch": 9.12933349609375e-06, + "step": 5983, + "training_step_time": 0.1108250617980957 + }, + { + "epoch": 9.130859375e-06, + "model_forward_time": 0.025407075881958008, + "step": 5984 + }, + { + "epoch": 9.130859375e-06, + "step": 5984, + "training_step_time": 0.19687366485595703 + }, + { + "epoch": 9.13238525390625e-06, + "model_forward_time": 0.024477005004882812, + "step": 5985 + }, + { + "epoch": 9.13238525390625e-06, + "step": 5985, + "training_step_time": 0.10642647743225098 + }, + { + "epoch": 9.1339111328125e-06, + "model_forward_time": 0.024410486221313477, + "step": 5986 + }, + { + "epoch": 9.1339111328125e-06, + "step": 5986, + "training_step_time": 0.10787487030029297 + }, + { + "epoch": 9.13543701171875e-06, + "model_forward_time": 0.02472543716430664, + "step": 5987 + }, + { + "epoch": 9.13543701171875e-06, + "step": 5987, + "training_step_time": 0.11516404151916504 + }, + { + "epoch": 9.136962890625e-06, + "model_forward_time": 0.025516986846923828, + "step": 5988 + }, + { + "epoch": 9.136962890625e-06, + "step": 5988, + "training_step_time": 0.10868453979492188 + }, + { + "epoch": 9.13848876953125e-06, + "model_forward_time": 0.02478313446044922, + "step": 5989 + }, + { + "epoch": 9.13848876953125e-06, + "step": 5989, + "training_step_time": 0.11188912391662598 + }, + { + "epoch": 9.1400146484375e-06, + "grad_norm": 0.4380154311656952, + "learning_rate": 9.399989299389661e-05, + "loss": 0.089, + "step": 5990 + }, + { + "epoch": 9.1400146484375e-06, + "model_forward_time": 0.025496244430541992, + "step": 5990 + }, + { + "epoch": 9.1400146484375e-06, + "step": 5990, + "training_step_time": 0.11003255844116211 + }, + { + "epoch": 9.14154052734375e-06, + "model_forward_time": 0.025023698806762695, + "step": 5991 + }, + { + "epoch": 9.14154052734375e-06, + "step": 5991, + "training_step_time": 0.19466519355773926 + }, + { + "epoch": 9.14306640625e-06, + "model_forward_time": 0.024448156356811523, + "step": 5992 + }, + { + "epoch": 9.14306640625e-06, + "step": 5992, + "training_step_time": 0.1272737979888916 + }, + { + "epoch": 9.14459228515625e-06, + "model_forward_time": 0.02480459213256836, + "step": 5993 + }, + { + "epoch": 9.14459228515625e-06, + "step": 5993, + "training_step_time": 0.21369647979736328 + }, + { + "epoch": 9.1461181640625e-06, + "model_forward_time": 0.024152755737304688, + "step": 5994 + }, + { + "epoch": 9.1461181640625e-06, + "step": 5994, + "training_step_time": 0.13400959968566895 + }, + { + "epoch": 9.14764404296875e-06, + "model_forward_time": 0.02422952651977539, + "step": 5995 + }, + { + "epoch": 9.14764404296875e-06, + "step": 5995, + "training_step_time": 0.11555933952331543 + }, + { + "epoch": 9.149169921875e-06, + "model_forward_time": 0.025062084197998047, + "step": 5996 + }, + { + "epoch": 9.149169921875e-06, + "step": 5996, + "training_step_time": 0.11402225494384766 + }, + { + "epoch": 9.15069580078125e-06, + "model_forward_time": 0.025150299072265625, + "step": 5997 + }, + { + "epoch": 9.15069580078125e-06, + "step": 5997, + "training_step_time": 0.11268353462219238 + }, + { + "epoch": 9.1522216796875e-06, + "model_forward_time": 0.025678634643554688, + "step": 5998 + }, + { + "epoch": 9.1522216796875e-06, + "step": 5998, + "training_step_time": 0.2115478515625 + }, + { + "epoch": 9.15374755859375e-06, + "model_forward_time": 0.024707794189453125, + "step": 5999 + }, + { + "epoch": 9.15374755859375e-06, + "step": 5999, + "training_step_time": 0.11007881164550781 + }, + { + "epoch": 9.1552734375e-06, + "grad_norm": 0.3040682077407837, + "learning_rate": 9.397368756032445e-05, + "loss": 0.0881, + "step": 6000 + }, + { + "epoch": 9.1552734375e-06, + "model_forward_time": 0.026244163513183594, + "step": 6000 + }, + { + "epoch": 9.1552734375e-06, + "step": 6000, + "training_step_time": 0.11234521865844727 + }, + { + "epoch": 9.15679931640625e-06, + "model_forward_time": 0.02359604835510254, + "step": 6001 + }, + { + "epoch": 9.15679931640625e-06, + "step": 6001, + "training_step_time": 0.1065206527709961 + }, + { + "epoch": 9.1583251953125e-06, + "model_forward_time": 0.023903846740722656, + "step": 6002 + }, + { + "epoch": 9.1583251953125e-06, + "step": 6002, + "training_step_time": 0.10554099082946777 + }, + { + "epoch": 9.15985107421875e-06, + "model_forward_time": 0.024934053421020508, + "step": 6003 + }, + { + "epoch": 9.15985107421875e-06, + "step": 6003, + "training_step_time": 0.11041474342346191 + }, + { + "epoch": 9.161376953125e-06, + "model_forward_time": 0.025054454803466797, + "step": 6004 + }, + { + "epoch": 9.161376953125e-06, + "step": 6004, + "training_step_time": 0.12824249267578125 + }, + { + "epoch": 9.16290283203125e-06, + "model_forward_time": 0.02612471580505371, + "step": 6005 + }, + { + "epoch": 9.16290283203125e-06, + "step": 6005, + "training_step_time": 0.12433052062988281 + }, + { + "epoch": 9.1644287109375e-06, + "model_forward_time": 0.024792909622192383, + "step": 6006 + }, + { + "epoch": 9.1644287109375e-06, + "step": 6006, + "training_step_time": 0.11235356330871582 + }, + { + "epoch": 9.16595458984375e-06, + "model_forward_time": 0.025784969329833984, + "step": 6007 + }, + { + "epoch": 9.16595458984375e-06, + "step": 6007, + "training_step_time": 0.1082925796508789 + }, + { + "epoch": 9.16748046875e-06, + "model_forward_time": 0.030033111572265625, + "step": 6008 + }, + { + "epoch": 9.16748046875e-06, + "step": 6008, + "training_step_time": 0.17102766036987305 + }, + { + "epoch": 9.16900634765625e-06, + "model_forward_time": 0.025413990020751953, + "step": 6009 + }, + { + "epoch": 9.16900634765625e-06, + "step": 6009, + "training_step_time": 0.15809226036071777 + }, + { + "epoch": 9.1705322265625e-06, + "grad_norm": 0.5795313119888306, + "learning_rate": 9.394742869457547e-05, + "loss": 0.0773, + "step": 6010 + }, + { + "epoch": 9.1705322265625e-06, + "model_forward_time": 0.02652120590209961, + "step": 6010 + }, + { + "epoch": 9.1705322265625e-06, + "step": 6010, + "training_step_time": 0.11453890800476074 + }, + { + "epoch": 9.17205810546875e-06, + "model_forward_time": 0.02512526512145996, + "step": 6011 + }, + { + "epoch": 9.17205810546875e-06, + "step": 6011, + "training_step_time": 0.2179718017578125 + }, + { + "epoch": 9.173583984375e-06, + "model_forward_time": 0.025237560272216797, + "step": 6012 + }, + { + "epoch": 9.173583984375e-06, + "step": 6012, + "training_step_time": 0.11551570892333984 + }, + { + "epoch": 9.17510986328125e-06, + "model_forward_time": 0.02501392364501953, + "step": 6013 + }, + { + "epoch": 9.17510986328125e-06, + "step": 6013, + "training_step_time": 0.10555505752563477 + }, + { + "epoch": 9.1766357421875e-06, + "model_forward_time": 0.025770187377929688, + "step": 6014 + }, + { + "epoch": 9.1766357421875e-06, + "step": 6014, + "training_step_time": 0.11014151573181152 + }, + { + "epoch": 9.17816162109375e-06, + "model_forward_time": 0.025701522827148438, + "step": 6015 + }, + { + "epoch": 9.17816162109375e-06, + "step": 6015, + "training_step_time": 0.10884952545166016 + }, + { + "epoch": 9.1796875e-06, + "model_forward_time": 0.02527022361755371, + "step": 6016 + }, + { + "epoch": 9.1796875e-06, + "step": 6016, + "training_step_time": 0.11094999313354492 + }, + { + "epoch": 9.18121337890625e-06, + "model_forward_time": 0.02512502670288086, + "step": 6017 + }, + { + "epoch": 9.18121337890625e-06, + "step": 6017, + "training_step_time": 0.11036252975463867 + }, + { + "epoch": 9.1827392578125e-06, + "model_forward_time": 0.02505016326904297, + "step": 6018 + }, + { + "epoch": 9.1827392578125e-06, + "step": 6018, + "training_step_time": 0.1149897575378418 + }, + { + "epoch": 9.18426513671875e-06, + "model_forward_time": 0.025131702423095703, + "step": 6019 + }, + { + "epoch": 9.18426513671875e-06, + "step": 6019, + "training_step_time": 0.11183929443359375 + }, + { + "epoch": 9.185791015625e-06, + "grad_norm": 0.3081001341342926, + "learning_rate": 9.392111642855665e-05, + "loss": 0.0758, + "step": 6020 + }, + { + "epoch": 9.185791015625e-06, + "model_forward_time": 0.025185108184814453, + "step": 6020 + }, + { + "epoch": 9.185791015625e-06, + "step": 6020, + "training_step_time": 0.11016368865966797 + }, + { + "epoch": 9.18731689453125e-06, + "model_forward_time": 0.025304079055786133, + "step": 6021 + }, + { + "epoch": 9.18731689453125e-06, + "step": 6021, + "training_step_time": 0.11100363731384277 + }, + { + "epoch": 9.1888427734375e-06, + "model_forward_time": 0.02507615089416504, + "step": 6022 + }, + { + "epoch": 9.1888427734375e-06, + "step": 6022, + "training_step_time": 0.10936427116394043 + }, + { + "epoch": 9.19036865234375e-06, + "model_forward_time": 0.026483774185180664, + "step": 6023 + }, + { + "epoch": 9.19036865234375e-06, + "step": 6023, + "training_step_time": 0.11307716369628906 + }, + { + "epoch": 9.19189453125e-06, + "model_forward_time": 0.025040864944458008, + "step": 6024 + }, + { + "epoch": 9.19189453125e-06, + "step": 6024, + "training_step_time": 0.10874485969543457 + }, + { + "epoch": 9.19342041015625e-06, + "model_forward_time": 0.026123523712158203, + "step": 6025 + }, + { + "epoch": 9.19342041015625e-06, + "step": 6025, + "training_step_time": 0.10905599594116211 + }, + { + "epoch": 9.1949462890625e-06, + "model_forward_time": 0.025483369827270508, + "step": 6026 + }, + { + "epoch": 9.1949462890625e-06, + "step": 6026, + "training_step_time": 0.10969758033752441 + }, + { + "epoch": 9.19647216796875e-06, + "model_forward_time": 0.025699377059936523, + "step": 6027 + }, + { + "epoch": 9.19647216796875e-06, + "step": 6027, + "training_step_time": 0.21152830123901367 + }, + { + "epoch": 9.197998046875e-06, + "model_forward_time": 0.024741172790527344, + "step": 6028 + }, + { + "epoch": 9.197998046875e-06, + "step": 6028, + "training_step_time": 0.117645263671875 + }, + { + "epoch": 9.19952392578125e-06, + "model_forward_time": 0.024483203887939453, + "step": 6029 + }, + { + "epoch": 9.19952392578125e-06, + "step": 6029, + "training_step_time": 0.12608861923217773 + }, + { + "epoch": 9.2010498046875e-06, + "grad_norm": 0.2689782679080963, + "learning_rate": 9.389475079423988e-05, + "loss": 0.0849, + "step": 6030 + }, + { + "epoch": 9.2010498046875e-06, + "model_forward_time": 0.02559518814086914, + "step": 6030 + }, + { + "epoch": 9.2010498046875e-06, + "step": 6030, + "training_step_time": 0.15968847274780273 + }, + { + "epoch": 9.20257568359375e-06, + "model_forward_time": 0.025335311889648438, + "step": 6031 + }, + { + "epoch": 9.20257568359375e-06, + "step": 6031, + "training_step_time": 0.17377400398254395 + }, + { + "epoch": 9.2041015625e-06, + "model_forward_time": 0.024378538131713867, + "step": 6032 + }, + { + "epoch": 9.2041015625e-06, + "step": 6032, + "training_step_time": 0.21812844276428223 + }, + { + "epoch": 9.20562744140625e-06, + "model_forward_time": 0.024892568588256836, + "step": 6033 + }, + { + "epoch": 9.20562744140625e-06, + "step": 6033, + "training_step_time": 0.11520123481750488 + }, + { + "epoch": 9.2071533203125e-06, + "model_forward_time": 0.02422475814819336, + "step": 6034 + }, + { + "epoch": 9.2071533203125e-06, + "step": 6034, + "training_step_time": 0.11438488960266113 + }, + { + "epoch": 9.20867919921875e-06, + "model_forward_time": 0.025313615798950195, + "step": 6035 + }, + { + "epoch": 9.20867919921875e-06, + "step": 6035, + "training_step_time": 0.1146860122680664 + }, + { + "epoch": 9.210205078125e-06, + "model_forward_time": 0.025505542755126953, + "step": 6036 + }, + { + "epoch": 9.210205078125e-06, + "step": 6036, + "training_step_time": 0.1875319480895996 + }, + { + "epoch": 9.21173095703125e-06, + "model_forward_time": 0.0250699520111084, + "step": 6037 + }, + { + "epoch": 9.21173095703125e-06, + "step": 6037, + "training_step_time": 0.11597681045532227 + }, + { + "epoch": 9.2132568359375e-06, + "model_forward_time": 0.024235010147094727, + "step": 6038 + }, + { + "epoch": 9.2132568359375e-06, + "step": 6038, + "training_step_time": 0.10536479949951172 + }, + { + "epoch": 9.21478271484375e-06, + "model_forward_time": 0.02575087547302246, + "step": 6039 + }, + { + "epoch": 9.21478271484375e-06, + "step": 6039, + "training_step_time": 0.11037302017211914 + }, + { + "epoch": 9.21630859375e-06, + "grad_norm": 0.48524272441864014, + "learning_rate": 9.38683318236619e-05, + "loss": 0.0829, + "step": 6040 + }, + { + "epoch": 9.21630859375e-06, + "model_forward_time": 0.025158405303955078, + "step": 6040 + }, + { + "epoch": 9.21630859375e-06, + "step": 6040, + "training_step_time": 0.11300539970397949 + }, + { + "epoch": 9.21783447265625e-06, + "model_forward_time": 0.025410175323486328, + "step": 6041 + }, + { + "epoch": 9.21783447265625e-06, + "step": 6041, + "training_step_time": 0.11036491394042969 + }, + { + "epoch": 9.2193603515625e-06, + "model_forward_time": 0.025224685668945312, + "step": 6042 + }, + { + "epoch": 9.2193603515625e-06, + "step": 6042, + "training_step_time": 0.10720705986022949 + }, + { + "epoch": 9.22088623046875e-06, + "model_forward_time": 0.025285720825195312, + "step": 6043 + }, + { + "epoch": 9.22088623046875e-06, + "step": 6043, + "training_step_time": 0.11042618751525879 + }, + { + "epoch": 9.222412109375e-06, + "model_forward_time": 0.025348901748657227, + "step": 6044 + }, + { + "epoch": 9.222412109375e-06, + "step": 6044, + "training_step_time": 0.11624026298522949 + }, + { + "epoch": 9.22393798828125e-06, + "model_forward_time": 0.025226116180419922, + "step": 6045 + }, + { + "epoch": 9.22393798828125e-06, + "step": 6045, + "training_step_time": 0.11383605003356934 + }, + { + "epoch": 9.2254638671875e-06, + "model_forward_time": 0.0254974365234375, + "step": 6046 + }, + { + "epoch": 9.2254638671875e-06, + "step": 6046, + "training_step_time": 0.12353253364562988 + }, + { + "epoch": 9.22698974609375e-06, + "model_forward_time": 0.0258944034576416, + "step": 6047 + }, + { + "epoch": 9.22698974609375e-06, + "step": 6047, + "training_step_time": 0.11751556396484375 + }, + { + "epoch": 9.228515625e-06, + "model_forward_time": 0.02533435821533203, + "step": 6048 + }, + { + "epoch": 9.228515625e-06, + "step": 6048, + "training_step_time": 0.17653131484985352 + }, + { + "epoch": 9.23004150390625e-06, + "model_forward_time": 0.024591684341430664, + "step": 6049 + }, + { + "epoch": 9.23004150390625e-06, + "step": 6049, + "training_step_time": 0.1816096305847168 + }, + { + "epoch": 9.2315673828125e-06, + "grad_norm": 0.4617111086845398, + "learning_rate": 9.384185954892422e-05, + "loss": 0.0872, + "step": 6050 + }, + { + "epoch": 9.2315673828125e-06, + "model_forward_time": 0.024418115615844727, + "step": 6050 + }, + { + "epoch": 9.2315673828125e-06, + "step": 6050, + "training_step_time": 0.11177706718444824 + }, + { + "epoch": 9.23309326171875e-06, + "model_forward_time": 0.029127836227416992, + "step": 6051 + }, + { + "epoch": 9.23309326171875e-06, + "step": 6051, + "training_step_time": 0.11348795890808105 + }, + { + "epoch": 9.234619140625e-06, + "model_forward_time": 0.025887012481689453, + "step": 6052 + }, + { + "epoch": 9.234619140625e-06, + "step": 6052, + "training_step_time": 0.216156005859375 + }, + { + "epoch": 9.23614501953125e-06, + "model_forward_time": 0.025029420852661133, + "step": 6053 + }, + { + "epoch": 9.23614501953125e-06, + "step": 6053, + "training_step_time": 0.11359930038452148 + }, + { + "epoch": 9.2376708984375e-06, + "model_forward_time": 0.025159835815429688, + "step": 6054 + }, + { + "epoch": 9.2376708984375e-06, + "step": 6054, + "training_step_time": 0.10709095001220703 + }, + { + "epoch": 9.23919677734375e-06, + "model_forward_time": 0.025363922119140625, + "step": 6055 + }, + { + "epoch": 9.23919677734375e-06, + "step": 6055, + "training_step_time": 0.183363676071167 + }, + { + "epoch": 9.24072265625e-06, + "model_forward_time": 0.024658918380737305, + "step": 6056 + }, + { + "epoch": 9.24072265625e-06, + "step": 6056, + "training_step_time": 0.13089203834533691 + }, + { + "epoch": 9.24224853515625e-06, + "model_forward_time": 0.02485799789428711, + "step": 6057 + }, + { + "epoch": 9.24224853515625e-06, + "step": 6057, + "training_step_time": 0.10773396492004395 + }, + { + "epoch": 9.2437744140625e-06, + "model_forward_time": 0.025780677795410156, + "step": 6058 + }, + { + "epoch": 9.2437744140625e-06, + "step": 6058, + "training_step_time": 0.10868597030639648 + }, + { + "epoch": 9.24530029296875e-06, + "model_forward_time": 0.025675296783447266, + "step": 6059 + }, + { + "epoch": 9.24530029296875e-06, + "step": 6059, + "training_step_time": 0.11011481285095215 + }, + { + "epoch": 9.246826171875e-06, + "grad_norm": 0.436614066362381, + "learning_rate": 9.381533400219318e-05, + "loss": 0.0644, + "step": 6060 + }, + { + "epoch": 9.246826171875e-06, + "model_forward_time": 0.025234222412109375, + "step": 6060 + }, + { + "epoch": 9.246826171875e-06, + "step": 6060, + "training_step_time": 0.11311459541320801 + }, + { + "epoch": 9.24835205078125e-06, + "model_forward_time": 0.025025367736816406, + "step": 6061 + }, + { + "epoch": 9.24835205078125e-06, + "step": 6061, + "training_step_time": 0.10574030876159668 + }, + { + "epoch": 9.2498779296875e-06, + "model_forward_time": 0.025058269500732422, + "step": 6062 + }, + { + "epoch": 9.2498779296875e-06, + "step": 6062, + "training_step_time": 0.10557246208190918 + }, + { + "epoch": 9.25140380859375e-06, + "model_forward_time": 0.025112390518188477, + "step": 6063 + }, + { + "epoch": 9.25140380859375e-06, + "step": 6063, + "training_step_time": 0.10891342163085938 + }, + { + "epoch": 9.2529296875e-06, + "model_forward_time": 0.02516961097717285, + "step": 6064 + }, + { + "epoch": 9.2529296875e-06, + "step": 6064, + "training_step_time": 0.10945272445678711 + }, + { + "epoch": 9.25445556640625e-06, + "model_forward_time": 0.025258541107177734, + "step": 6065 + }, + { + "epoch": 9.25445556640625e-06, + "step": 6065, + "training_step_time": 0.11072397232055664 + }, + { + "epoch": 9.2559814453125e-06, + "model_forward_time": 0.026082754135131836, + "step": 6066 + }, + { + "epoch": 9.2559814453125e-06, + "step": 6066, + "training_step_time": 0.11956906318664551 + }, + { + "epoch": 9.25750732421875e-06, + "model_forward_time": 0.024955272674560547, + "step": 6067 + }, + { + "epoch": 9.25750732421875e-06, + "step": 6067, + "training_step_time": 0.10808420181274414 + }, + { + "epoch": 9.259033203125e-06, + "model_forward_time": 0.02546095848083496, + "step": 6068 + }, + { + "epoch": 9.259033203125e-06, + "step": 6068, + "training_step_time": 0.10856771469116211 + }, + { + "epoch": 9.26055908203125e-06, + "model_forward_time": 0.024938583374023438, + "step": 6069 + }, + { + "epoch": 9.26055908203125e-06, + "step": 6069, + "training_step_time": 0.1090707778930664 + }, + { + "epoch": 9.2620849609375e-06, + "grad_norm": 0.5905839204788208, + "learning_rate": 9.378875521569981e-05, + "loss": 0.0834, + "step": 6070 + }, + { + "epoch": 9.2620849609375e-06, + "model_forward_time": 0.025115966796875, + "step": 6070 + }, + { + "epoch": 9.2620849609375e-06, + "step": 6070, + "training_step_time": 0.18505430221557617 + }, + { + "epoch": 9.26361083984375e-06, + "model_forward_time": 0.024486303329467773, + "step": 6071 + }, + { + "epoch": 9.26361083984375e-06, + "step": 6071, + "training_step_time": 0.20850515365600586 + }, + { + "epoch": 9.26513671875e-06, + "model_forward_time": 0.024213314056396484, + "step": 6072 + }, + { + "epoch": 9.26513671875e-06, + "step": 6072, + "training_step_time": 0.20346498489379883 + }, + { + "epoch": 9.26666259765625e-06, + "model_forward_time": 0.023839235305786133, + "step": 6073 + }, + { + "epoch": 9.26666259765625e-06, + "step": 6073, + "training_step_time": 0.20498895645141602 + }, + { + "epoch": 9.2681884765625e-06, + "model_forward_time": 0.02411675453186035, + "step": 6074 + }, + { + "epoch": 9.2681884765625e-06, + "step": 6074, + "training_step_time": 0.11251711845397949 + }, + { + "epoch": 9.26971435546875e-06, + "model_forward_time": 0.025182723999023438, + "step": 6075 + }, + { + "epoch": 9.26971435546875e-06, + "step": 6075, + "training_step_time": 0.10746002197265625 + }, + { + "epoch": 9.271240234375e-06, + "model_forward_time": 0.02534770965576172, + "step": 6076 + }, + { + "epoch": 9.271240234375e-06, + "step": 6076, + "training_step_time": 0.15038490295410156 + }, + { + "epoch": 9.27276611328125e-06, + "model_forward_time": 0.02530503273010254, + "step": 6077 + }, + { + "epoch": 9.27276611328125e-06, + "step": 6077, + "training_step_time": 0.10592985153198242 + }, + { + "epoch": 9.2742919921875e-06, + "model_forward_time": 0.025371551513671875, + "step": 6078 + }, + { + "epoch": 9.2742919921875e-06, + "step": 6078, + "training_step_time": 0.10715937614440918 + }, + { + "epoch": 9.27581787109375e-06, + "model_forward_time": 0.025108814239501953, + "step": 6079 + }, + { + "epoch": 9.27581787109375e-06, + "step": 6079, + "training_step_time": 0.11736893653869629 + }, + { + "epoch": 9.27734375e-06, + "grad_norm": 0.40063944458961487, + "learning_rate": 9.376212322173985e-05, + "loss": 0.0914, + "step": 6080 + }, + { + "epoch": 9.27734375e-06, + "model_forward_time": 0.026326894760131836, + "step": 6080 + }, + { + "epoch": 9.27734375e-06, + "step": 6080, + "training_step_time": 0.10839056968688965 + }, + { + "epoch": 9.27886962890625e-06, + "model_forward_time": 0.02506279945373535, + "step": 6081 + }, + { + "epoch": 9.27886962890625e-06, + "step": 6081, + "training_step_time": 0.1984419822692871 + }, + { + "epoch": 9.2803955078125e-06, + "model_forward_time": 0.024677753448486328, + "step": 6082 + }, + { + "epoch": 9.2803955078125e-06, + "step": 6082, + "training_step_time": 0.1103506088256836 + }, + { + "epoch": 9.28192138671875e-06, + "model_forward_time": 0.0246737003326416, + "step": 6083 + }, + { + "epoch": 9.28192138671875e-06, + "step": 6083, + "training_step_time": 0.10547709465026855 + }, + { + "epoch": 9.283447265625e-06, + "model_forward_time": 0.02567601203918457, + "step": 6084 + }, + { + "epoch": 9.283447265625e-06, + "step": 6084, + "training_step_time": 0.11008095741271973 + }, + { + "epoch": 9.28497314453125e-06, + "model_forward_time": 0.025330543518066406, + "step": 6085 + }, + { + "epoch": 9.28497314453125e-06, + "step": 6085, + "training_step_time": 0.10984945297241211 + }, + { + "epoch": 9.2864990234375e-06, + "model_forward_time": 0.025125503540039062, + "step": 6086 + }, + { + "epoch": 9.2864990234375e-06, + "step": 6086, + "training_step_time": 0.11048316955566406 + }, + { + "epoch": 9.28802490234375e-06, + "model_forward_time": 0.024862289428710938, + "step": 6087 + }, + { + "epoch": 9.28802490234375e-06, + "step": 6087, + "training_step_time": 0.11214327812194824 + }, + { + "epoch": 9.28955078125e-06, + "model_forward_time": 0.025130271911621094, + "step": 6088 + }, + { + "epoch": 9.28955078125e-06, + "step": 6088, + "training_step_time": 0.11055469512939453 + }, + { + "epoch": 9.29107666015625e-06, + "model_forward_time": 0.025161027908325195, + "step": 6089 + }, + { + "epoch": 9.29107666015625e-06, + "step": 6089, + "training_step_time": 0.10976648330688477 + }, + { + "epoch": 9.2926025390625e-06, + "grad_norm": 0.5862994194030762, + "learning_rate": 9.373543805267368e-05, + "loss": 0.0908, + "step": 6090 + }, + { + "epoch": 9.2926025390625e-06, + "model_forward_time": 0.02575206756591797, + "step": 6090 + }, + { + "epoch": 9.2926025390625e-06, + "step": 6090, + "training_step_time": 0.17469048500061035 + }, + { + "epoch": 9.29412841796875e-06, + "model_forward_time": 0.024547576904296875, + "step": 6091 + }, + { + "epoch": 9.29412841796875e-06, + "step": 6091, + "training_step_time": 0.10777735710144043 + }, + { + "epoch": 9.295654296875e-06, + "model_forward_time": 0.02457904815673828, + "step": 6092 + }, + { + "epoch": 9.295654296875e-06, + "step": 6092, + "training_step_time": 0.11010622978210449 + }, + { + "epoch": 9.29718017578125e-06, + "model_forward_time": 0.025714874267578125, + "step": 6093 + }, + { + "epoch": 9.29718017578125e-06, + "step": 6093, + "training_step_time": 0.11882877349853516 + }, + { + "epoch": 9.2987060546875e-06, + "model_forward_time": 0.026005029678344727, + "step": 6094 + }, + { + "epoch": 9.2987060546875e-06, + "step": 6094, + "training_step_time": 0.13192081451416016 + }, + { + "epoch": 9.30023193359375e-06, + "model_forward_time": 0.02557516098022461, + "step": 6095 + }, + { + "epoch": 9.30023193359375e-06, + "step": 6095, + "training_step_time": 0.1109921932220459 + }, + { + "epoch": 9.3017578125e-06, + "model_forward_time": 0.02533245086669922, + "step": 6096 + }, + { + "epoch": 9.3017578125e-06, + "step": 6096, + "training_step_time": 0.10853457450866699 + }, + { + "epoch": 9.30328369140625e-06, + "model_forward_time": 0.024608373641967773, + "step": 6097 + }, + { + "epoch": 9.30328369140625e-06, + "step": 6097, + "training_step_time": 0.10795736312866211 + }, + { + "epoch": 9.3048095703125e-06, + "model_forward_time": 0.025374174118041992, + "step": 6098 + }, + { + "epoch": 9.3048095703125e-06, + "step": 6098, + "training_step_time": 0.11507225036621094 + }, + { + "epoch": 9.30633544921875e-06, + "model_forward_time": 0.02528977394104004, + "step": 6099 + }, + { + "epoch": 9.30633544921875e-06, + "step": 6099, + "training_step_time": 0.11120748519897461 + }, + { + "epoch": 9.307861328125e-06, + "grad_norm": 0.621437132358551, + "learning_rate": 9.370869974092629e-05, + "loss": 0.1028, + "step": 6100 + }, + { + "epoch": 9.307861328125e-06, + "model_forward_time": 0.02544379234313965, + "step": 6100 + }, + { + "epoch": 9.307861328125e-06, + "step": 6100, + "training_step_time": 0.11160445213317871 + }, + { + "epoch": 9.30938720703125e-06, + "model_forward_time": 0.025341033935546875, + "step": 6101 + }, + { + "epoch": 9.30938720703125e-06, + "step": 6101, + "training_step_time": 0.21409273147583008 + }, + { + "epoch": 9.3109130859375e-06, + "model_forward_time": 0.025267362594604492, + "step": 6102 + }, + { + "epoch": 9.3109130859375e-06, + "step": 6102, + "training_step_time": 0.12935900688171387 + }, + { + "epoch": 9.31243896484375e-06, + "model_forward_time": 0.024766921997070312, + "step": 6103 + }, + { + "epoch": 9.31243896484375e-06, + "step": 6103, + "training_step_time": 0.1256544589996338 + }, + { + "epoch": 9.31396484375e-06, + "model_forward_time": 0.024988174438476562, + "step": 6104 + }, + { + "epoch": 9.31396484375e-06, + "step": 6104, + "training_step_time": 0.12459301948547363 + }, + { + "epoch": 9.31549072265625e-06, + "model_forward_time": 0.025059223175048828, + "step": 6105 + }, + { + "epoch": 9.31549072265625e-06, + "step": 6105, + "training_step_time": 0.12719130516052246 + }, + { + "epoch": 9.3170166015625e-06, + "model_forward_time": 0.025389671325683594, + "step": 6106 + }, + { + "epoch": 9.3170166015625e-06, + "step": 6106, + "training_step_time": 0.1249997615814209 + }, + { + "epoch": 9.31854248046875e-06, + "model_forward_time": 0.024181842803955078, + "step": 6107 + }, + { + "epoch": 9.31854248046875e-06, + "step": 6107, + "training_step_time": 0.12222790718078613 + }, + { + "epoch": 9.320068359375e-06, + "model_forward_time": 0.02405714988708496, + "step": 6108 + }, + { + "epoch": 9.320068359375e-06, + "step": 6108, + "training_step_time": 0.12341189384460449 + }, + { + "epoch": 9.32159423828125e-06, + "model_forward_time": 0.024137020111083984, + "step": 6109 + }, + { + "epoch": 9.32159423828125e-06, + "step": 6109, + "training_step_time": 0.11735177040100098 + }, + { + "epoch": 9.3231201171875e-06, + "grad_norm": 0.6244925856590271, + "learning_rate": 9.368190831898724e-05, + "loss": 0.0705, + "step": 6110 + }, + { + "epoch": 9.3231201171875e-06, + "model_forward_time": 0.024169921875, + "step": 6110 + }, + { + "epoch": 9.3231201171875e-06, + "step": 6110, + "training_step_time": 0.11231660842895508 + }, + { + "epoch": 9.32464599609375e-06, + "model_forward_time": 0.025360822677612305, + "step": 6111 + }, + { + "epoch": 9.32464599609375e-06, + "step": 6111, + "training_step_time": 0.1119542121887207 + }, + { + "epoch": 9.326171875e-06, + "model_forward_time": 0.02526235580444336, + "step": 6112 + }, + { + "epoch": 9.326171875e-06, + "step": 6112, + "training_step_time": 0.12076354026794434 + }, + { + "epoch": 9.32769775390625e-06, + "model_forward_time": 0.025669574737548828, + "step": 6113 + }, + { + "epoch": 9.32769775390625e-06, + "step": 6113, + "training_step_time": 0.11294817924499512 + }, + { + "epoch": 9.3292236328125e-06, + "model_forward_time": 0.025708436965942383, + "step": 6114 + }, + { + "epoch": 9.3292236328125e-06, + "step": 6114, + "training_step_time": 0.11118698120117188 + }, + { + "epoch": 9.33074951171875e-06, + "model_forward_time": 0.025431156158447266, + "step": 6115 + }, + { + "epoch": 9.33074951171875e-06, + "step": 6115, + "training_step_time": 0.11300110816955566 + }, + { + "epoch": 9.332275390625e-06, + "model_forward_time": 0.02508091926574707, + "step": 6116 + }, + { + "epoch": 9.332275390625e-06, + "step": 6116, + "training_step_time": 0.14181160926818848 + }, + { + "epoch": 9.33380126953125e-06, + "model_forward_time": 0.028625011444091797, + "step": 6117 + }, + { + "epoch": 9.33380126953125e-06, + "step": 6117, + "training_step_time": 0.1242678165435791 + }, + { + "epoch": 9.3353271484375e-06, + "model_forward_time": 0.02494978904724121, + "step": 6118 + }, + { + "epoch": 9.3353271484375e-06, + "step": 6118, + "training_step_time": 0.13138628005981445 + }, + { + "epoch": 9.33685302734375e-06, + "model_forward_time": 0.025508642196655273, + "step": 6119 + }, + { + "epoch": 9.33685302734375e-06, + "step": 6119, + "training_step_time": 0.1087806224822998 + }, + { + "epoch": 9.33837890625e-06, + "grad_norm": 0.3642801344394684, + "learning_rate": 9.365506381941066e-05, + "loss": 0.0719, + "step": 6120 + }, + { + "epoch": 9.33837890625e-06, + "model_forward_time": 0.02524399757385254, + "step": 6120 + }, + { + "epoch": 9.33837890625e-06, + "step": 6120, + "training_step_time": 0.17859911918640137 + }, + { + "epoch": 9.33990478515625e-06, + "model_forward_time": 0.025277137756347656, + "step": 6121 + }, + { + "epoch": 9.33990478515625e-06, + "step": 6121, + "training_step_time": 0.21158599853515625 + }, + { + "epoch": 9.3414306640625e-06, + "model_forward_time": 0.025077104568481445, + "step": 6122 + }, + { + "epoch": 9.3414306640625e-06, + "step": 6122, + "training_step_time": 0.10510659217834473 + }, + { + "epoch": 9.34295654296875e-06, + "model_forward_time": 0.026541471481323242, + "step": 6123 + }, + { + "epoch": 9.34295654296875e-06, + "step": 6123, + "training_step_time": 0.11869454383850098 + }, + { + "epoch": 9.344482421875e-06, + "model_forward_time": 0.025813817977905273, + "step": 6124 + }, + { + "epoch": 9.344482421875e-06, + "step": 6124, + "training_step_time": 0.11863088607788086 + }, + { + "epoch": 9.34600830078125e-06, + "model_forward_time": 0.025994300842285156, + "step": 6125 + }, + { + "epoch": 9.34600830078125e-06, + "step": 6125, + "training_step_time": 0.10991549491882324 + }, + { + "epoch": 9.3475341796875e-06, + "model_forward_time": 0.02545905113220215, + "step": 6126 + }, + { + "epoch": 9.3475341796875e-06, + "step": 6126, + "training_step_time": 0.19866538047790527 + }, + { + "epoch": 9.34906005859375e-06, + "model_forward_time": 0.02476978302001953, + "step": 6127 + }, + { + "epoch": 9.34906005859375e-06, + "step": 6127, + "training_step_time": 0.10454964637756348 + }, + { + "epoch": 9.3505859375e-06, + "model_forward_time": 0.02542424201965332, + "step": 6128 + }, + { + "epoch": 9.3505859375e-06, + "step": 6128, + "training_step_time": 0.11229300498962402 + }, + { + "epoch": 9.35211181640625e-06, + "model_forward_time": 0.025456905364990234, + "step": 6129 + }, + { + "epoch": 9.35211181640625e-06, + "step": 6129, + "training_step_time": 0.10899591445922852 + }, + { + "epoch": 9.3536376953125e-06, + "grad_norm": 0.30002570152282715, + "learning_rate": 9.362816627481512e-05, + "loss": 0.0783, + "step": 6130 + }, + { + "epoch": 9.3536376953125e-06, + "model_forward_time": 0.02555537223815918, + "step": 6130 + }, + { + "epoch": 9.3536376953125e-06, + "step": 6130, + "training_step_time": 0.10988306999206543 + }, + { + "epoch": 9.35516357421875e-06, + "model_forward_time": 0.02541518211364746, + "step": 6131 + }, + { + "epoch": 9.35516357421875e-06, + "step": 6131, + "training_step_time": 0.1168522834777832 + }, + { + "epoch": 9.356689453125e-06, + "model_forward_time": 0.025153636932373047, + "step": 6132 + }, + { + "epoch": 9.356689453125e-06, + "step": 6132, + "training_step_time": 0.10937237739562988 + }, + { + "epoch": 9.35821533203125e-06, + "model_forward_time": 0.025398969650268555, + "step": 6133 + }, + { + "epoch": 9.35821533203125e-06, + "step": 6133, + "training_step_time": 0.1102149486541748 + }, + { + "epoch": 9.3597412109375e-06, + "model_forward_time": 0.02559661865234375, + "step": 6134 + }, + { + "epoch": 9.3597412109375e-06, + "step": 6134, + "training_step_time": 0.10921001434326172 + }, + { + "epoch": 9.36126708984375e-06, + "model_forward_time": 0.025552749633789062, + "step": 6135 + }, + { + "epoch": 9.36126708984375e-06, + "step": 6135, + "training_step_time": 0.18670964241027832 + }, + { + "epoch": 9.36279296875e-06, + "model_forward_time": 0.02482128143310547, + "step": 6136 + }, + { + "epoch": 9.36279296875e-06, + "step": 6136, + "training_step_time": 0.10925698280334473 + }, + { + "epoch": 9.36431884765625e-06, + "model_forward_time": 0.0248110294342041, + "step": 6137 + }, + { + "epoch": 9.36431884765625e-06, + "step": 6137, + "training_step_time": 0.10845065116882324 + }, + { + "epoch": 9.3658447265625e-06, + "model_forward_time": 0.025116920471191406, + "step": 6138 + }, + { + "epoch": 9.3658447265625e-06, + "step": 6138, + "training_step_time": 0.12160754203796387 + }, + { + "epoch": 9.36737060546875e-06, + "model_forward_time": 0.025736331939697266, + "step": 6139 + }, + { + "epoch": 9.36737060546875e-06, + "step": 6139, + "training_step_time": 0.13090991973876953 + }, + { + "epoch": 9.368896484375e-06, + "grad_norm": 0.35604357719421387, + "learning_rate": 9.360121571788371e-05, + "loss": 0.0863, + "step": 6140 + }, + { + "epoch": 9.368896484375e-06, + "model_forward_time": 0.02489185333251953, + "step": 6140 + }, + { + "epoch": 9.368896484375e-06, + "step": 6140, + "training_step_time": 0.1155397891998291 + }, + { + "epoch": 9.37042236328125e-06, + "model_forward_time": 0.025765657424926758, + "step": 6141 + }, + { + "epoch": 9.37042236328125e-06, + "step": 6141, + "training_step_time": 0.10877752304077148 + }, + { + "epoch": 9.3719482421875e-06, + "model_forward_time": 0.025333404541015625, + "step": 6142 + }, + { + "epoch": 9.3719482421875e-06, + "step": 6142, + "training_step_time": 0.21905922889709473 + }, + { + "epoch": 9.37347412109375e-06, + "model_forward_time": 0.02503657341003418, + "step": 6143 + }, + { + "epoch": 9.37347412109375e-06, + "step": 6143, + "training_step_time": 0.11533713340759277 + }, + { + "epoch": 9.375e-06, + "model_forward_time": 0.024666786193847656, + "step": 6144 + }, + { + "epoch": 9.375e-06, + "step": 6144, + "training_step_time": 0.1097874641418457 + }, + { + "epoch": 9.37652587890625e-06, + "model_forward_time": 0.025460243225097656, + "step": 6145 + }, + { + "epoch": 9.37652587890625e-06, + "step": 6145, + "training_step_time": 0.21968793869018555 + }, + { + "epoch": 9.3780517578125e-06, + "model_forward_time": 0.02520298957824707, + "step": 6146 + }, + { + "epoch": 9.3780517578125e-06, + "step": 6146, + "training_step_time": 0.11222696304321289 + }, + { + "epoch": 9.37957763671875e-06, + "model_forward_time": 0.0250399112701416, + "step": 6147 + }, + { + "epoch": 9.37957763671875e-06, + "step": 6147, + "training_step_time": 0.10654711723327637 + }, + { + "epoch": 9.381103515625e-06, + "model_forward_time": 0.025681018829345703, + "step": 6148 + }, + { + "epoch": 9.381103515625e-06, + "step": 6148, + "training_step_time": 0.11099672317504883 + }, + { + "epoch": 9.38262939453125e-06, + "model_forward_time": 0.02589869499206543, + "step": 6149 + }, + { + "epoch": 9.38262939453125e-06, + "step": 6149, + "training_step_time": 0.10991072654724121 + }, + { + "epoch": 9.3841552734375e-06, + "grad_norm": 0.37343111634254456, + "learning_rate": 9.357421218136386e-05, + "loss": 0.0946, + "step": 6150 + }, + { + "epoch": 9.3841552734375e-06, + "model_forward_time": 0.02591109275817871, + "step": 6150 + }, + { + "epoch": 9.3841552734375e-06, + "step": 6150, + "training_step_time": 0.11036872863769531 + }, + { + "epoch": 9.38568115234375e-06, + "model_forward_time": 0.025465011596679688, + "step": 6151 + }, + { + "epoch": 9.38568115234375e-06, + "step": 6151, + "training_step_time": 0.11012673377990723 + }, + { + "epoch": 9.38720703125e-06, + "model_forward_time": 0.025464296340942383, + "step": 6152 + }, + { + "epoch": 9.38720703125e-06, + "step": 6152, + "training_step_time": 0.10990166664123535 + }, + { + "epoch": 9.38873291015625e-06, + "model_forward_time": 0.02550649642944336, + "step": 6153 + }, + { + "epoch": 9.38873291015625e-06, + "step": 6153, + "training_step_time": 0.11498093605041504 + }, + { + "epoch": 9.3902587890625e-06, + "model_forward_time": 0.025923728942871094, + "step": 6154 + }, + { + "epoch": 9.3902587890625e-06, + "step": 6154, + "training_step_time": 0.1114034652709961 + }, + { + "epoch": 9.39178466796875e-06, + "model_forward_time": 0.025403499603271484, + "step": 6155 + }, + { + "epoch": 9.39178466796875e-06, + "step": 6155, + "training_step_time": 0.11058354377746582 + }, + { + "epoch": 9.393310546875e-06, + "model_forward_time": 0.025498151779174805, + "step": 6156 + }, + { + "epoch": 9.393310546875e-06, + "step": 6156, + "training_step_time": 0.10887289047241211 + }, + { + "epoch": 9.39483642578125e-06, + "model_forward_time": 0.025255203247070312, + "step": 6157 + }, + { + "epoch": 9.39483642578125e-06, + "step": 6157, + "training_step_time": 0.10874533653259277 + }, + { + "epoch": 9.3963623046875e-06, + "model_forward_time": 0.025488615036010742, + "step": 6158 + }, + { + "epoch": 9.3963623046875e-06, + "step": 6158, + "training_step_time": 0.10801100730895996 + }, + { + "epoch": 9.39788818359375e-06, + "model_forward_time": 0.025959491729736328, + "step": 6159 + }, + { + "epoch": 9.39788818359375e-06, + "step": 6159, + "training_step_time": 0.11063981056213379 + }, + { + "epoch": 9.3994140625e-06, + "grad_norm": 0.564619779586792, + "learning_rate": 9.354715569806744e-05, + "loss": 0.0981, + "step": 6160 + }, + { + "epoch": 9.3994140625e-06, + "model_forward_time": 0.02518439292907715, + "step": 6160 + }, + { + "epoch": 9.3994140625e-06, + "step": 6160, + "training_step_time": 0.11025691032409668 + }, + { + "epoch": 9.40093994140625e-06, + "model_forward_time": 0.025241851806640625, + "step": 6161 + }, + { + "epoch": 9.40093994140625e-06, + "step": 6161, + "training_step_time": 0.20263051986694336 + }, + { + "epoch": 9.4024658203125e-06, + "model_forward_time": 0.02475738525390625, + "step": 6162 + }, + { + "epoch": 9.4024658203125e-06, + "step": 6162, + "training_step_time": 0.11385011672973633 + }, + { + "epoch": 9.40399169921875e-06, + "model_forward_time": 0.0248873233795166, + "step": 6163 + }, + { + "epoch": 9.40399169921875e-06, + "step": 6163, + "training_step_time": 0.12442660331726074 + }, + { + "epoch": 9.405517578125e-06, + "model_forward_time": 0.025155305862426758, + "step": 6164 + }, + { + "epoch": 9.405517578125e-06, + "step": 6164, + "training_step_time": 0.14322829246520996 + }, + { + "epoch": 9.40704345703125e-06, + "model_forward_time": 0.0251619815826416, + "step": 6165 + }, + { + "epoch": 9.40704345703125e-06, + "step": 6165, + "training_step_time": 0.11675572395324707 + }, + { + "epoch": 9.4085693359375e-06, + "model_forward_time": 0.02509331703186035, + "step": 6166 + }, + { + "epoch": 9.4085693359375e-06, + "step": 6166, + "training_step_time": 0.13042712211608887 + }, + { + "epoch": 9.41009521484375e-06, + "model_forward_time": 0.025488853454589844, + "step": 6167 + }, + { + "epoch": 9.41009521484375e-06, + "step": 6167, + "training_step_time": 0.1405041217803955 + }, + { + "epoch": 9.41162109375e-06, + "model_forward_time": 0.024792194366455078, + "step": 6168 + }, + { + "epoch": 9.41162109375e-06, + "step": 6168, + "training_step_time": 0.11320900917053223 + }, + { + "epoch": 9.41314697265625e-06, + "model_forward_time": 0.02507495880126953, + "step": 6169 + }, + { + "epoch": 9.41314697265625e-06, + "step": 6169, + "training_step_time": 0.11163187026977539 + }, + { + "epoch": 9.4146728515625e-06, + "grad_norm": 0.583834171295166, + "learning_rate": 9.352004630087062e-05, + "loss": 0.0866, + "step": 6170 + }, + { + "epoch": 9.4146728515625e-06, + "model_forward_time": 0.02593088150024414, + "step": 6170 + }, + { + "epoch": 9.4146728515625e-06, + "step": 6170, + "training_step_time": 0.1136016845703125 + }, + { + "epoch": 9.41619873046875e-06, + "model_forward_time": 0.025444507598876953, + "step": 6171 + }, + { + "epoch": 9.41619873046875e-06, + "step": 6171, + "training_step_time": 0.1195216178894043 + }, + { + "epoch": 9.417724609375e-06, + "model_forward_time": 0.025081396102905273, + "step": 6172 + }, + { + "epoch": 9.417724609375e-06, + "step": 6172, + "training_step_time": 0.12510013580322266 + }, + { + "epoch": 9.41925048828125e-06, + "model_forward_time": 0.025089740753173828, + "step": 6173 + }, + { + "epoch": 9.41925048828125e-06, + "step": 6173, + "training_step_time": 0.13684582710266113 + }, + { + "epoch": 9.4207763671875e-06, + "model_forward_time": 0.024561643600463867, + "step": 6174 + }, + { + "epoch": 9.4207763671875e-06, + "step": 6174, + "training_step_time": 0.13260364532470703 + }, + { + "epoch": 9.42230224609375e-06, + "model_forward_time": 0.02380514144897461, + "step": 6175 + }, + { + "epoch": 9.42230224609375e-06, + "step": 6175, + "training_step_time": 0.12708663940429688 + }, + { + "epoch": 9.423828125e-06, + "model_forward_time": 0.024834156036376953, + "step": 6176 + }, + { + "epoch": 9.423828125e-06, + "step": 6176, + "training_step_time": 0.12423372268676758 + }, + { + "epoch": 9.42535400390625e-06, + "model_forward_time": 0.025018692016601562, + "step": 6177 + }, + { + "epoch": 9.42535400390625e-06, + "step": 6177, + "training_step_time": 0.11737179756164551 + }, + { + "epoch": 9.4268798828125e-06, + "model_forward_time": 0.02531266212463379, + "step": 6178 + }, + { + "epoch": 9.4268798828125e-06, + "step": 6178, + "training_step_time": 0.1186058521270752 + }, + { + "epoch": 9.42840576171875e-06, + "model_forward_time": 0.025248050689697266, + "step": 6179 + }, + { + "epoch": 9.42840576171875e-06, + "step": 6179, + "training_step_time": 0.11556363105773926 + }, + { + "epoch": 9.429931640625e-06, + "grad_norm": 0.41108426451683044, + "learning_rate": 9.349288402271388e-05, + "loss": 0.0706, + "step": 6180 + }, + { + "epoch": 9.429931640625e-06, + "model_forward_time": 0.02515387535095215, + "step": 6180 + }, + { + "epoch": 9.429931640625e-06, + "step": 6180, + "training_step_time": 0.11102104187011719 + }, + { + "epoch": 9.43145751953125e-06, + "model_forward_time": 0.02567458152770996, + "step": 6181 + }, + { + "epoch": 9.43145751953125e-06, + "step": 6181, + "training_step_time": 0.12255430221557617 + }, + { + "epoch": 9.4329833984375e-06, + "model_forward_time": 0.02473163604736328, + "step": 6182 + }, + { + "epoch": 9.4329833984375e-06, + "step": 6182, + "training_step_time": 0.1137089729309082 + }, + { + "epoch": 9.43450927734375e-06, + "model_forward_time": 0.024974346160888672, + "step": 6183 + }, + { + "epoch": 9.43450927734375e-06, + "step": 6183, + "training_step_time": 0.23195481300354004 + }, + { + "epoch": 9.43603515625e-06, + "model_forward_time": 0.024399518966674805, + "step": 6184 + }, + { + "epoch": 9.43603515625e-06, + "step": 6184, + "training_step_time": 0.12193107604980469 + }, + { + "epoch": 9.43756103515625e-06, + "model_forward_time": 0.024416446685791016, + "step": 6185 + }, + { + "epoch": 9.43756103515625e-06, + "step": 6185, + "training_step_time": 0.11677813529968262 + }, + { + "epoch": 9.4390869140625e-06, + "model_forward_time": 0.025165557861328125, + "step": 6186 + }, + { + "epoch": 9.4390869140625e-06, + "step": 6186, + "training_step_time": 0.10760855674743652 + }, + { + "epoch": 9.44061279296875e-06, + "model_forward_time": 0.024599790573120117, + "step": 6187 + }, + { + "epoch": 9.44061279296875e-06, + "step": 6187, + "training_step_time": 0.1706552505493164 + }, + { + "epoch": 9.442138671875e-06, + "model_forward_time": 0.024727821350097656, + "step": 6188 + }, + { + "epoch": 9.442138671875e-06, + "step": 6188, + "training_step_time": 0.15900611877441406 + }, + { + "epoch": 9.44366455078125e-06, + "model_forward_time": 0.02461409568786621, + "step": 6189 + }, + { + "epoch": 9.44366455078125e-06, + "step": 6189, + "training_step_time": 0.12340497970581055 + }, + { + "epoch": 9.4451904296875e-06, + "grad_norm": 0.30063387751579285, + "learning_rate": 9.346566889660193e-05, + "loss": 0.0882, + "step": 6190 + }, + { + "epoch": 9.4451904296875e-06, + "model_forward_time": 0.024756908416748047, + "step": 6190 + }, + { + "epoch": 9.4451904296875e-06, + "step": 6190, + "training_step_time": 0.2031841278076172 + }, + { + "epoch": 9.44671630859375e-06, + "model_forward_time": 0.024410724639892578, + "step": 6191 + }, + { + "epoch": 9.44671630859375e-06, + "step": 6191, + "training_step_time": 0.1258711814880371 + }, + { + "epoch": 9.4482421875e-06, + "model_forward_time": 0.024598360061645508, + "step": 6192 + }, + { + "epoch": 9.4482421875e-06, + "step": 6192, + "training_step_time": 0.10519027709960938 + }, + { + "epoch": 9.44976806640625e-06, + "model_forward_time": 0.025116920471191406, + "step": 6193 + }, + { + "epoch": 9.44976806640625e-06, + "step": 6193, + "training_step_time": 0.10613679885864258 + }, + { + "epoch": 9.4512939453125e-06, + "model_forward_time": 0.025054931640625, + "step": 6194 + }, + { + "epoch": 9.4512939453125e-06, + "step": 6194, + "training_step_time": 0.10699892044067383 + }, + { + "epoch": 9.45281982421875e-06, + "model_forward_time": 0.025023221969604492, + "step": 6195 + }, + { + "epoch": 9.45281982421875e-06, + "step": 6195, + "training_step_time": 0.1083076000213623 + }, + { + "epoch": 9.454345703125e-06, + "model_forward_time": 0.025154829025268555, + "step": 6196 + }, + { + "epoch": 9.454345703125e-06, + "step": 6196, + "training_step_time": 0.1218252182006836 + }, + { + "epoch": 9.45587158203125e-06, + "model_forward_time": 0.025255441665649414, + "step": 6197 + }, + { + "epoch": 9.45587158203125e-06, + "step": 6197, + "training_step_time": 0.13233566284179688 + }, + { + "epoch": 9.4573974609375e-06, + "model_forward_time": 0.0247042179107666, + "step": 6198 + }, + { + "epoch": 9.4573974609375e-06, + "step": 6198, + "training_step_time": 0.12758111953735352 + }, + { + "epoch": 9.45892333984375e-06, + "model_forward_time": 0.02450275421142578, + "step": 6199 + }, + { + "epoch": 9.45892333984375e-06, + "step": 6199, + "training_step_time": 0.1211395263671875 + }, + { + "epoch": 9.46044921875e-06, + "grad_norm": 0.42109575867652893, + "learning_rate": 9.343840095560372e-05, + "loss": 0.0643, + "step": 6200 + }, + { + "epoch": 9.46044921875e-06, + "model_forward_time": 0.02542853355407715, + "step": 6200 + }, + { + "epoch": 9.46044921875e-06, + "step": 6200, + "training_step_time": 0.11516880989074707 + }, + { + "epoch": 9.46197509765625e-06, + "model_forward_time": 0.025655269622802734, + "step": 6201 + }, + { + "epoch": 9.46197509765625e-06, + "step": 6201, + "training_step_time": 0.11709213256835938 + }, + { + "epoch": 9.4635009765625e-06, + "model_forward_time": 0.024913311004638672, + "step": 6202 + }, + { + "epoch": 9.4635009765625e-06, + "step": 6202, + "training_step_time": 0.11503815650939941 + }, + { + "epoch": 9.46502685546875e-06, + "model_forward_time": 0.0254209041595459, + "step": 6203 + }, + { + "epoch": 9.46502685546875e-06, + "step": 6203, + "training_step_time": 0.11393857002258301 + }, + { + "epoch": 9.466552734375e-06, + "model_forward_time": 0.024818897247314453, + "step": 6204 + }, + { + "epoch": 9.466552734375e-06, + "step": 6204, + "training_step_time": 0.1094815731048584 + }, + { + "epoch": 9.46807861328125e-06, + "model_forward_time": 0.02553558349609375, + "step": 6205 + }, + { + "epoch": 9.46807861328125e-06, + "step": 6205, + "training_step_time": 0.11114192008972168 + }, + { + "epoch": 9.4696044921875e-06, + "model_forward_time": 0.024440288543701172, + "step": 6206 + }, + { + "epoch": 9.4696044921875e-06, + "step": 6206, + "training_step_time": 0.10841751098632812 + }, + { + "epoch": 9.47113037109375e-06, + "model_forward_time": 0.02457880973815918, + "step": 6207 + }, + { + "epoch": 9.47113037109375e-06, + "step": 6207, + "training_step_time": 0.17777109146118164 + }, + { + "epoch": 9.47265625e-06, + "model_forward_time": 0.024361848831176758, + "step": 6208 + }, + { + "epoch": 9.47265625e-06, + "step": 6208, + "training_step_time": 0.1142737865447998 + }, + { + "epoch": 9.47418212890625e-06, + "model_forward_time": 0.024552345275878906, + "step": 6209 + }, + { + "epoch": 9.47418212890625e-06, + "step": 6209, + "training_step_time": 0.13236141204833984 + }, + { + "epoch": 9.4757080078125e-06, + "grad_norm": 0.3996535837650299, + "learning_rate": 9.341108023285238e-05, + "loss": 0.0814, + "step": 6210 + }, + { + "epoch": 9.4757080078125e-06, + "model_forward_time": 0.025156259536743164, + "step": 6210 + }, + { + "epoch": 9.4757080078125e-06, + "step": 6210, + "training_step_time": 0.1558079719543457 + }, + { + "epoch": 9.47723388671875e-06, + "model_forward_time": 0.024362802505493164, + "step": 6211 + }, + { + "epoch": 9.47723388671875e-06, + "step": 6211, + "training_step_time": 0.22547006607055664 + }, + { + "epoch": 9.478759765625e-06, + "model_forward_time": 0.02451014518737793, + "step": 6212 + }, + { + "epoch": 9.478759765625e-06, + "step": 6212, + "training_step_time": 0.11100244522094727 + }, + { + "epoch": 9.48028564453125e-06, + "model_forward_time": 0.025341510772705078, + "step": 6213 + }, + { + "epoch": 9.48028564453125e-06, + "step": 6213, + "training_step_time": 0.10994076728820801 + }, + { + "epoch": 9.4818115234375e-06, + "model_forward_time": 0.02511906623840332, + "step": 6214 + }, + { + "epoch": 9.4818115234375e-06, + "step": 6214, + "training_step_time": 0.1165621280670166 + }, + { + "epoch": 9.48333740234375e-06, + "model_forward_time": 0.024886608123779297, + "step": 6215 + }, + { + "epoch": 9.48333740234375e-06, + "step": 6215, + "training_step_time": 0.10929584503173828 + }, + { + "epoch": 9.48486328125e-06, + "model_forward_time": 0.02535867691040039, + "step": 6216 + }, + { + "epoch": 9.48486328125e-06, + "step": 6216, + "training_step_time": 0.19078516960144043 + }, + { + "epoch": 9.48638916015625e-06, + "model_forward_time": 0.02449178695678711, + "step": 6217 + }, + { + "epoch": 9.48638916015625e-06, + "step": 6217, + "training_step_time": 0.1040031909942627 + }, + { + "epoch": 9.4879150390625e-06, + "model_forward_time": 0.023343324661254883, + "step": 6218 + }, + { + "epoch": 9.4879150390625e-06, + "step": 6218, + "training_step_time": 0.10399699211120605 + }, + { + "epoch": 9.48944091796875e-06, + "model_forward_time": 0.025333404541015625, + "step": 6219 + }, + { + "epoch": 9.48944091796875e-06, + "step": 6219, + "training_step_time": 0.11214232444763184 + }, + { + "epoch": 9.490966796875e-06, + "grad_norm": 0.6419973969459534, + "learning_rate": 9.338370676154516e-05, + "loss": 0.0668, + "step": 6220 + }, + { + "epoch": 9.490966796875e-06, + "model_forward_time": 0.025419235229492188, + "step": 6220 + }, + { + "epoch": 9.490966796875e-06, + "step": 6220, + "training_step_time": 0.10859036445617676 + }, + { + "epoch": 9.49249267578125e-06, + "model_forward_time": 0.02519702911376953, + "step": 6221 + }, + { + "epoch": 9.49249267578125e-06, + "step": 6221, + "training_step_time": 0.1115415096282959 + }, + { + "epoch": 9.4940185546875e-06, + "model_forward_time": 0.025003910064697266, + "step": 6222 + }, + { + "epoch": 9.4940185546875e-06, + "step": 6222, + "training_step_time": 0.12275409698486328 + }, + { + "epoch": 9.49554443359375e-06, + "model_forward_time": 0.02475595474243164, + "step": 6223 + }, + { + "epoch": 9.49554443359375e-06, + "step": 6223, + "training_step_time": 0.11707901954650879 + }, + { + "epoch": 9.4970703125e-06, + "model_forward_time": 0.0244448184967041, + "step": 6224 + }, + { + "epoch": 9.4970703125e-06, + "step": 6224, + "training_step_time": 0.11331820487976074 + }, + { + "epoch": 9.49859619140625e-06, + "model_forward_time": 0.025983810424804688, + "step": 6225 + }, + { + "epoch": 9.49859619140625e-06, + "step": 6225, + "training_step_time": 0.167982816696167 + }, + { + "epoch": 9.5001220703125e-06, + "model_forward_time": 0.02460169792175293, + "step": 6226 + }, + { + "epoch": 9.5001220703125e-06, + "step": 6226, + "training_step_time": 0.11571502685546875 + }, + { + "epoch": 9.50164794921875e-06, + "model_forward_time": 0.02713298797607422, + "step": 6227 + }, + { + "epoch": 9.50164794921875e-06, + "step": 6227, + "training_step_time": 0.17585968971252441 + }, + { + "epoch": 9.503173828125e-06, + "model_forward_time": 0.024763822555541992, + "step": 6228 + }, + { + "epoch": 9.503173828125e-06, + "step": 6228, + "training_step_time": 0.18323731422424316 + }, + { + "epoch": 9.50469970703125e-06, + "model_forward_time": 0.023694515228271484, + "step": 6229 + }, + { + "epoch": 9.50469970703125e-06, + "step": 6229, + "training_step_time": 0.11025428771972656 + }, + { + "epoch": 9.5062255859375e-06, + "grad_norm": 0.36760419607162476, + "learning_rate": 9.335628057494341e-05, + "loss": 0.0705, + "step": 6230 + }, + { + "epoch": 9.5062255859375e-06, + "model_forward_time": 0.024565458297729492, + "step": 6230 + }, + { + "epoch": 9.5062255859375e-06, + "step": 6230, + "training_step_time": 0.18889498710632324 + }, + { + "epoch": 9.50775146484375e-06, + "model_forward_time": 0.026060104370117188, + "step": 6231 + }, + { + "epoch": 9.50775146484375e-06, + "step": 6231, + "training_step_time": 0.12735915184020996 + }, + { + "epoch": 9.50927734375e-06, + "model_forward_time": 0.02496933937072754, + "step": 6232 + }, + { + "epoch": 9.50927734375e-06, + "step": 6232, + "training_step_time": 0.10544681549072266 + }, + { + "epoch": 9.51080322265625e-06, + "model_forward_time": 0.025466203689575195, + "step": 6233 + }, + { + "epoch": 9.51080322265625e-06, + "step": 6233, + "training_step_time": 0.10987114906311035 + }, + { + "epoch": 9.5123291015625e-06, + "model_forward_time": 0.025264501571655273, + "step": 6234 + }, + { + "epoch": 9.5123291015625e-06, + "step": 6234, + "training_step_time": 0.17144393920898438 + }, + { + "epoch": 9.51385498046875e-06, + "model_forward_time": 0.024371862411499023, + "step": 6235 + }, + { + "epoch": 9.51385498046875e-06, + "step": 6235, + "training_step_time": 0.16897082328796387 + }, + { + "epoch": 9.515380859375e-06, + "model_forward_time": 0.02393341064453125, + "step": 6236 + }, + { + "epoch": 9.515380859375e-06, + "step": 6236, + "training_step_time": 0.10353326797485352 + }, + { + "epoch": 9.51690673828125e-06, + "model_forward_time": 0.024433374404907227, + "step": 6237 + }, + { + "epoch": 9.51690673828125e-06, + "step": 6237, + "training_step_time": 0.10612702369689941 + }, + { + "epoch": 9.5184326171875e-06, + "model_forward_time": 0.024900197982788086, + "step": 6238 + }, + { + "epoch": 9.5184326171875e-06, + "step": 6238, + "training_step_time": 0.10895013809204102 + }, + { + "epoch": 9.51995849609375e-06, + "model_forward_time": 0.02509784698486328, + "step": 6239 + }, + { + "epoch": 9.51995849609375e-06, + "step": 6239, + "training_step_time": 0.10925722122192383 + }, + { + "epoch": 9.521484375e-06, + "grad_norm": 0.4853487014770508, + "learning_rate": 9.332880170637252e-05, + "loss": 0.0732, + "step": 6240 + }, + { + "epoch": 9.521484375e-06, + "model_forward_time": 0.02447199821472168, + "step": 6240 + }, + { + "epoch": 9.521484375e-06, + "step": 6240, + "training_step_time": 0.1110687255859375 + }, + { + "epoch": 9.52301025390625e-06, + "model_forward_time": 0.0251615047454834, + "step": 6241 + }, + { + "epoch": 9.52301025390625e-06, + "step": 6241, + "training_step_time": 0.10802507400512695 + }, + { + "epoch": 9.5245361328125e-06, + "model_forward_time": 0.024557113647460938, + "step": 6242 + }, + { + "epoch": 9.5245361328125e-06, + "step": 6242, + "training_step_time": 0.10723423957824707 + }, + { + "epoch": 9.52606201171875e-06, + "model_forward_time": 0.025428295135498047, + "step": 6243 + }, + { + "epoch": 9.52606201171875e-06, + "step": 6243, + "training_step_time": 0.10991764068603516 + }, + { + "epoch": 9.527587890625e-06, + "model_forward_time": 0.024806976318359375, + "step": 6244 + }, + { + "epoch": 9.527587890625e-06, + "step": 6244, + "training_step_time": 0.11141562461853027 + }, + { + "epoch": 9.52911376953125e-06, + "model_forward_time": 0.024971485137939453, + "step": 6245 + }, + { + "epoch": 9.52911376953125e-06, + "step": 6245, + "training_step_time": 0.10636734962463379 + }, + { + "epoch": 9.5306396484375e-06, + "model_forward_time": 0.024776458740234375, + "step": 6246 + }, + { + "epoch": 9.5306396484375e-06, + "step": 6246, + "training_step_time": 0.13183140754699707 + }, + { + "epoch": 9.53216552734375e-06, + "model_forward_time": 0.025059223175048828, + "step": 6247 + }, + { + "epoch": 9.53216552734375e-06, + "step": 6247, + "training_step_time": 0.15092754364013672 + }, + { + "epoch": 9.53369140625e-06, + "model_forward_time": 0.024275541305541992, + "step": 6248 + }, + { + "epoch": 9.53369140625e-06, + "step": 6248, + "training_step_time": 0.142836332321167 + }, + { + "epoch": 9.53521728515625e-06, + "model_forward_time": 0.024199485778808594, + "step": 6249 + }, + { + "epoch": 9.53521728515625e-06, + "step": 6249, + "training_step_time": 0.12726211547851562 + }, + { + "epoch": 9.5367431640625e-06, + "grad_norm": 0.4221380949020386, + "learning_rate": 9.330127018922194e-05, + "loss": 0.0843, + "step": 6250 + }, + { + "epoch": 9.5367431640625e-06, + "model_forward_time": 0.02413344383239746, + "step": 6250 + }, + { + "epoch": 9.5367431640625e-06, + "step": 6250, + "training_step_time": 0.12211441993713379 + }, + { + "epoch": 9.53826904296875e-06, + "model_forward_time": 0.024712085723876953, + "step": 6251 + }, + { + "epoch": 9.53826904296875e-06, + "step": 6251, + "training_step_time": 0.19897794723510742 + }, + { + "epoch": 9.539794921875e-06, + "model_forward_time": 0.024135828018188477, + "step": 6252 + }, + { + "epoch": 9.539794921875e-06, + "step": 6252, + "training_step_time": 0.11450624465942383 + }, + { + "epoch": 9.54132080078125e-06, + "model_forward_time": 0.024530887603759766, + "step": 6253 + }, + { + "epoch": 9.54132080078125e-06, + "step": 6253, + "training_step_time": 0.1367018222808838 + }, + { + "epoch": 9.5428466796875e-06, + "model_forward_time": 0.02434539794921875, + "step": 6254 + }, + { + "epoch": 9.5428466796875e-06, + "step": 6254, + "training_step_time": 0.16207027435302734 + }, + { + "epoch": 9.54437255859375e-06, + "model_forward_time": 0.024758100509643555, + "step": 6255 + }, + { + "epoch": 9.54437255859375e-06, + "step": 6255, + "training_step_time": 0.2168560028076172 + }, + { + "epoch": 9.5458984375e-06, + "model_forward_time": 0.02465987205505371, + "step": 6256 + }, + { + "epoch": 9.5458984375e-06, + "step": 6256, + "training_step_time": 0.11516547203063965 + }, + { + "epoch": 9.54742431640625e-06, + "model_forward_time": 0.024525880813598633, + "step": 6257 + }, + { + "epoch": 9.54742431640625e-06, + "step": 6257, + "training_step_time": 0.11011433601379395 + }, + { + "epoch": 9.5489501953125e-06, + "model_forward_time": 0.024698495864868164, + "step": 6258 + }, + { + "epoch": 9.5489501953125e-06, + "step": 6258, + "training_step_time": 0.11081743240356445 + }, + { + "epoch": 9.55047607421875e-06, + "model_forward_time": 0.025320053100585938, + "step": 6259 + }, + { + "epoch": 9.55047607421875e-06, + "step": 6259, + "training_step_time": 0.1095888614654541 + }, + { + "epoch": 9.552001953125e-06, + "grad_norm": 0.3353863060474396, + "learning_rate": 9.327368605694502e-05, + "loss": 0.0624, + "step": 6260 + }, + { + "epoch": 9.552001953125e-06, + "model_forward_time": 0.02522420883178711, + "step": 6260 + }, + { + "epoch": 9.552001953125e-06, + "step": 6260, + "training_step_time": 0.1951580047607422 + }, + { + "epoch": 9.55352783203125e-06, + "model_forward_time": 0.02455282211303711, + "step": 6261 + }, + { + "epoch": 9.55352783203125e-06, + "step": 6261, + "training_step_time": 0.10666394233703613 + }, + { + "epoch": 9.5550537109375e-06, + "model_forward_time": 0.02463984489440918, + "step": 6262 + }, + { + "epoch": 9.5550537109375e-06, + "step": 6262, + "training_step_time": 0.1113584041595459 + }, + { + "epoch": 9.55657958984375e-06, + "model_forward_time": 0.024731874465942383, + "step": 6263 + }, + { + "epoch": 9.55657958984375e-06, + "step": 6263, + "training_step_time": 0.11738872528076172 + }, + { + "epoch": 9.55810546875e-06, + "model_forward_time": 0.025014877319335938, + "step": 6264 + }, + { + "epoch": 9.55810546875e-06, + "step": 6264, + "training_step_time": 0.11180663108825684 + }, + { + "epoch": 9.55963134765625e-06, + "model_forward_time": 0.025120019912719727, + "step": 6265 + }, + { + "epoch": 9.55963134765625e-06, + "step": 6265, + "training_step_time": 0.1087641716003418 + }, + { + "epoch": 9.5611572265625e-06, + "model_forward_time": 0.025112390518188477, + "step": 6266 + }, + { + "epoch": 9.5611572265625e-06, + "step": 6266, + "training_step_time": 0.11778473854064941 + }, + { + "epoch": 9.56268310546875e-06, + "model_forward_time": 0.0248105525970459, + "step": 6267 + }, + { + "epoch": 9.56268310546875e-06, + "step": 6267, + "training_step_time": 0.1143186092376709 + }, + { + "epoch": 9.564208984375e-06, + "model_forward_time": 0.02477717399597168, + "step": 6268 + }, + { + "epoch": 9.564208984375e-06, + "step": 6268, + "training_step_time": 0.11207222938537598 + }, + { + "epoch": 9.56573486328125e-06, + "model_forward_time": 0.025023937225341797, + "step": 6269 + }, + { + "epoch": 9.56573486328125e-06, + "step": 6269, + "training_step_time": 0.14268803596496582 + }, + { + "epoch": 9.5672607421875e-06, + "grad_norm": 0.38684239983558655, + "learning_rate": 9.32460493430591e-05, + "loss": 0.0619, + "step": 6270 + }, + { + "epoch": 9.5672607421875e-06, + "model_forward_time": 0.024422883987426758, + "step": 6270 + }, + { + "epoch": 9.5672607421875e-06, + "step": 6270, + "training_step_time": 0.11173820495605469 + }, + { + "epoch": 9.56878662109375e-06, + "model_forward_time": 0.024866580963134766, + "step": 6271 + }, + { + "epoch": 9.56878662109375e-06, + "step": 6271, + "training_step_time": 0.11045074462890625 + }, + { + "epoch": 9.5703125e-06, + "model_forward_time": 0.02553415298461914, + "step": 6272 + }, + { + "epoch": 9.5703125e-06, + "step": 6272, + "training_step_time": 0.11159992218017578 + }, + { + "epoch": 9.57183837890625e-06, + "model_forward_time": 0.02513861656188965, + "step": 6273 + }, + { + "epoch": 9.57183837890625e-06, + "step": 6273, + "training_step_time": 0.13217711448669434 + }, + { + "epoch": 9.5733642578125e-06, + "model_forward_time": 0.02485799789428711, + "step": 6274 + }, + { + "epoch": 9.5733642578125e-06, + "step": 6274, + "training_step_time": 0.10873818397521973 + }, + { + "epoch": 9.57489013671875e-06, + "model_forward_time": 0.024221420288085938, + "step": 6275 + }, + { + "epoch": 9.57489013671875e-06, + "step": 6275, + "training_step_time": 0.16686153411865234 + }, + { + "epoch": 9.576416015625e-06, + "model_forward_time": 0.02460002899169922, + "step": 6276 + }, + { + "epoch": 9.576416015625e-06, + "step": 6276, + "training_step_time": 0.17150664329528809 + }, + { + "epoch": 9.57794189453125e-06, + "model_forward_time": 0.02502131462097168, + "step": 6277 + }, + { + "epoch": 9.57794189453125e-06, + "step": 6277, + "training_step_time": 0.11208891868591309 + }, + { + "epoch": 9.5794677734375e-06, + "model_forward_time": 0.024988412857055664, + "step": 6278 + }, + { + "epoch": 9.5794677734375e-06, + "step": 6278, + "training_step_time": 0.2189488410949707 + }, + { + "epoch": 9.58099365234375e-06, + "model_forward_time": 0.024121761322021484, + "step": 6279 + }, + { + "epoch": 9.58099365234375e-06, + "step": 6279, + "training_step_time": 0.1135091781616211 + }, + { + "epoch": 9.58251953125e-06, + "grad_norm": 0.4543576240539551, + "learning_rate": 9.321836008114539e-05, + "loss": 0.0786, + "step": 6280 + }, + { + "epoch": 9.58251953125e-06, + "model_forward_time": 0.02466416358947754, + "step": 6280 + }, + { + "epoch": 9.58251953125e-06, + "step": 6280, + "training_step_time": 0.1085960865020752 + }, + { + "epoch": 9.58404541015625e-06, + "model_forward_time": 0.02508854866027832, + "step": 6281 + }, + { + "epoch": 9.58404541015625e-06, + "step": 6281, + "training_step_time": 0.10630202293395996 + }, + { + "epoch": 9.5855712890625e-06, + "model_forward_time": 0.024652957916259766, + "step": 6282 + }, + { + "epoch": 9.5855712890625e-06, + "step": 6282, + "training_step_time": 0.11135077476501465 + }, + { + "epoch": 9.58709716796875e-06, + "model_forward_time": 0.02559351921081543, + "step": 6283 + }, + { + "epoch": 9.58709716796875e-06, + "step": 6283, + "training_step_time": 0.1100010871887207 + }, + { + "epoch": 9.588623046875e-06, + "model_forward_time": 0.025285959243774414, + "step": 6284 + }, + { + "epoch": 9.588623046875e-06, + "step": 6284, + "training_step_time": 0.11015105247497559 + }, + { + "epoch": 9.59014892578125e-06, + "model_forward_time": 0.025254011154174805, + "step": 6285 + }, + { + "epoch": 9.59014892578125e-06, + "step": 6285, + "training_step_time": 0.11470675468444824 + }, + { + "epoch": 9.5916748046875e-06, + "model_forward_time": 0.02506422996520996, + "step": 6286 + }, + { + "epoch": 9.5916748046875e-06, + "step": 6286, + "training_step_time": 0.11157894134521484 + }, + { + "epoch": 9.59320068359375e-06, + "model_forward_time": 0.025619983673095703, + "step": 6287 + }, + { + "epoch": 9.59320068359375e-06, + "step": 6287, + "training_step_time": 0.10990262031555176 + }, + { + "epoch": 9.5947265625e-06, + "model_forward_time": 0.025167465209960938, + "step": 6288 + }, + { + "epoch": 9.5947265625e-06, + "step": 6288, + "training_step_time": 0.16965055465698242 + }, + { + "epoch": 9.59625244140625e-06, + "model_forward_time": 0.024462223052978516, + "step": 6289 + }, + { + "epoch": 9.59625244140625e-06, + "step": 6289, + "training_step_time": 0.15226197242736816 + }, + { + "epoch": 9.5977783203125e-06, + "grad_norm": 0.428813099861145, + "learning_rate": 9.319061830484898e-05, + "loss": 0.0796, + "step": 6290 + }, + { + "epoch": 9.5977783203125e-06, + "model_forward_time": 0.024161100387573242, + "step": 6290 + }, + { + "epoch": 9.5977783203125e-06, + "step": 6290, + "training_step_time": 0.13245582580566406 + }, + { + "epoch": 9.59930419921875e-06, + "model_forward_time": 0.024697303771972656, + "step": 6291 + }, + { + "epoch": 9.59930419921875e-06, + "step": 6291, + "training_step_time": 0.12979745864868164 + }, + { + "epoch": 9.600830078125e-06, + "model_forward_time": 0.024670124053955078, + "step": 6292 + }, + { + "epoch": 9.600830078125e-06, + "step": 6292, + "training_step_time": 0.12748312950134277 + }, + { + "epoch": 9.60235595703125e-06, + "model_forward_time": 0.024752378463745117, + "step": 6293 + }, + { + "epoch": 9.60235595703125e-06, + "step": 6293, + "training_step_time": 0.11943197250366211 + }, + { + "epoch": 9.6038818359375e-06, + "model_forward_time": 0.02444005012512207, + "step": 6294 + }, + { + "epoch": 9.6038818359375e-06, + "step": 6294, + "training_step_time": 0.11942100524902344 + }, + { + "epoch": 9.60540771484375e-06, + "model_forward_time": 0.0250093936920166, + "step": 6295 + }, + { + "epoch": 9.60540771484375e-06, + "step": 6295, + "training_step_time": 0.13805007934570312 + }, + { + "epoch": 9.60693359375e-06, + "model_forward_time": 0.02575373649597168, + "step": 6296 + }, + { + "epoch": 9.60693359375e-06, + "step": 6296, + "training_step_time": 0.11794281005859375 + }, + { + "epoch": 9.60845947265625e-06, + "model_forward_time": 0.0251615047454834, + "step": 6297 + }, + { + "epoch": 9.60845947265625e-06, + "step": 6297, + "training_step_time": 0.12678050994873047 + }, + { + "epoch": 9.6099853515625e-06, + "model_forward_time": 0.02752089500427246, + "step": 6298 + }, + { + "epoch": 9.6099853515625e-06, + "step": 6298, + "training_step_time": 0.14595413208007812 + }, + { + "epoch": 9.61151123046875e-06, + "model_forward_time": 0.024409055709838867, + "step": 6299 + }, + { + "epoch": 9.61151123046875e-06, + "step": 6299, + "training_step_time": 0.22737455368041992 + }, + { + "epoch": 9.613037109375e-06, + "grad_norm": 0.7419989109039307, + "learning_rate": 9.316282404787871e-05, + "loss": 0.067, + "step": 6300 + }, + { + "epoch": 9.613037109375e-06, + "model_forward_time": 0.024438142776489258, + "step": 6300 + }, + { + "epoch": 9.613037109375e-06, + "step": 6300, + "training_step_time": 0.13497567176818848 + }, + { + "epoch": 9.61456298828125e-06, + "model_forward_time": 0.02456068992614746, + "step": 6301 + }, + { + "epoch": 9.61456298828125e-06, + "step": 6301, + "training_step_time": 0.189544677734375 + }, + { + "epoch": 9.6160888671875e-06, + "model_forward_time": 0.024581432342529297, + "step": 6302 + }, + { + "epoch": 9.6160888671875e-06, + "step": 6302, + "training_step_time": 0.11003494262695312 + }, + { + "epoch": 9.61761474609375e-06, + "model_forward_time": 0.024633169174194336, + "step": 6303 + }, + { + "epoch": 9.61761474609375e-06, + "step": 6303, + "training_step_time": 0.15676069259643555 + }, + { + "epoch": 9.619140625e-06, + "model_forward_time": 0.024286746978759766, + "step": 6304 + }, + { + "epoch": 9.619140625e-06, + "step": 6304, + "training_step_time": 0.11244535446166992 + }, + { + "epoch": 9.62066650390625e-06, + "model_forward_time": 0.02470254898071289, + "step": 6305 + }, + { + "epoch": 9.62066650390625e-06, + "step": 6305, + "training_step_time": 0.1061868667602539 + }, + { + "epoch": 9.6221923828125e-06, + "model_forward_time": 0.0251009464263916, + "step": 6306 + }, + { + "epoch": 9.6221923828125e-06, + "step": 6306, + "training_step_time": 0.11033749580383301 + }, + { + "epoch": 9.62371826171875e-06, + "model_forward_time": 0.024740219116210938, + "step": 6307 + }, + { + "epoch": 9.62371826171875e-06, + "step": 6307, + "training_step_time": 0.10691952705383301 + }, + { + "epoch": 9.625244140625e-06, + "model_forward_time": 0.024940967559814453, + "step": 6308 + }, + { + "epoch": 9.625244140625e-06, + "step": 6308, + "training_step_time": 0.10649633407592773 + }, + { + "epoch": 9.62677001953125e-06, + "model_forward_time": 0.025206327438354492, + "step": 6309 + }, + { + "epoch": 9.62677001953125e-06, + "step": 6309, + "training_step_time": 0.11043024063110352 + }, + { + "epoch": 9.6282958984375e-06, + "grad_norm": 0.7096858620643616, + "learning_rate": 9.313497734400722e-05, + "loss": 0.074, + "step": 6310 + }, + { + "epoch": 9.6282958984375e-06, + "model_forward_time": 0.024983882904052734, + "step": 6310 + }, + { + "epoch": 9.6282958984375e-06, + "step": 6310, + "training_step_time": 0.11170029640197754 + }, + { + "epoch": 9.62982177734375e-06, + "model_forward_time": 0.025534868240356445, + "step": 6311 + }, + { + "epoch": 9.62982177734375e-06, + "step": 6311, + "training_step_time": 0.1065671443939209 + }, + { + "epoch": 9.63134765625e-06, + "model_forward_time": 0.024755477905273438, + "step": 6312 + }, + { + "epoch": 9.63134765625e-06, + "step": 6312, + "training_step_time": 0.10960102081298828 + }, + { + "epoch": 9.63287353515625e-06, + "model_forward_time": 0.025238990783691406, + "step": 6313 + }, + { + "epoch": 9.63287353515625e-06, + "step": 6313, + "training_step_time": 0.17888092994689941 + }, + { + "epoch": 9.6343994140625e-06, + "model_forward_time": 0.024356842041015625, + "step": 6314 + }, + { + "epoch": 9.6343994140625e-06, + "step": 6314, + "training_step_time": 0.11677289009094238 + }, + { + "epoch": 9.63592529296875e-06, + "model_forward_time": 0.024043798446655273, + "step": 6315 + }, + { + "epoch": 9.63592529296875e-06, + "step": 6315, + "training_step_time": 0.1807842254638672 + }, + { + "epoch": 9.637451171875e-06, + "model_forward_time": 0.02550673484802246, + "step": 6316 + }, + { + "epoch": 9.637451171875e-06, + "step": 6316, + "training_step_time": 0.17399907112121582 + }, + { + "epoch": 9.63897705078125e-06, + "model_forward_time": 0.025082826614379883, + "step": 6317 + }, + { + "epoch": 9.63897705078125e-06, + "step": 6317, + "training_step_time": 0.10758781433105469 + }, + { + "epoch": 9.6405029296875e-06, + "model_forward_time": 0.02474212646484375, + "step": 6318 + }, + { + "epoch": 9.6405029296875e-06, + "step": 6318, + "training_step_time": 0.2243356704711914 + }, + { + "epoch": 9.64202880859375e-06, + "model_forward_time": 0.0251007080078125, + "step": 6319 + }, + { + "epoch": 9.64202880859375e-06, + "step": 6319, + "training_step_time": 0.10608887672424316 + }, + { + "epoch": 9.6435546875e-06, + "grad_norm": 0.40498948097229004, + "learning_rate": 9.31070782270709e-05, + "loss": 0.0725, + "step": 6320 + }, + { + "epoch": 9.6435546875e-06, + "model_forward_time": 0.02472209930419922, + "step": 6320 + }, + { + "epoch": 9.6435546875e-06, + "step": 6320, + "training_step_time": 0.11642622947692871 + }, + { + "epoch": 9.64508056640625e-06, + "model_forward_time": 0.02520298957824707, + "step": 6321 + }, + { + "epoch": 9.64508056640625e-06, + "step": 6321, + "training_step_time": 0.2090015411376953 + }, + { + "epoch": 9.6466064453125e-06, + "model_forward_time": 0.02429819107055664, + "step": 6322 + }, + { + "epoch": 9.6466064453125e-06, + "step": 6322, + "training_step_time": 0.11702394485473633 + }, + { + "epoch": 9.64813232421875e-06, + "model_forward_time": 0.024440765380859375, + "step": 6323 + }, + { + "epoch": 9.64813232421875e-06, + "step": 6323, + "training_step_time": 0.10973167419433594 + }, + { + "epoch": 9.649658203125e-06, + "model_forward_time": 0.024803876876831055, + "step": 6324 + }, + { + "epoch": 9.649658203125e-06, + "step": 6324, + "training_step_time": 0.10746574401855469 + }, + { + "epoch": 9.65118408203125e-06, + "model_forward_time": 0.02493906021118164, + "step": 6325 + }, + { + "epoch": 9.65118408203125e-06, + "step": 6325, + "training_step_time": 0.10668706893920898 + }, + { + "epoch": 9.6527099609375e-06, + "model_forward_time": 0.025269508361816406, + "step": 6326 + }, + { + "epoch": 9.6527099609375e-06, + "step": 6326, + "training_step_time": 0.11152958869934082 + }, + { + "epoch": 9.65423583984375e-06, + "model_forward_time": 0.02511453628540039, + "step": 6327 + }, + { + "epoch": 9.65423583984375e-06, + "step": 6327, + "training_step_time": 0.10652971267700195 + }, + { + "epoch": 9.65576171875e-06, + "model_forward_time": 0.024953603744506836, + "step": 6328 + }, + { + "epoch": 9.65576171875e-06, + "step": 6328, + "training_step_time": 0.10905265808105469 + }, + { + "epoch": 9.65728759765625e-06, + "model_forward_time": 0.02517414093017578, + "step": 6329 + }, + { + "epoch": 9.65728759765625e-06, + "step": 6329, + "training_step_time": 0.1081702709197998 + }, + { + "epoch": 9.6588134765625e-06, + "grad_norm": 0.38989371061325073, + "learning_rate": 9.30791267309698e-05, + "loss": 0.0732, + "step": 6330 + }, + { + "epoch": 9.6588134765625e-06, + "model_forward_time": 0.02578139305114746, + "step": 6330 + }, + { + "epoch": 9.6588134765625e-06, + "step": 6330, + "training_step_time": 0.10853290557861328 + }, + { + "epoch": 9.66033935546875e-06, + "model_forward_time": 0.02470541000366211, + "step": 6331 + }, + { + "epoch": 9.66033935546875e-06, + "step": 6331, + "training_step_time": 0.1062326431274414 + }, + { + "epoch": 9.661865234375e-06, + "model_forward_time": 0.025282621383666992, + "step": 6332 + }, + { + "epoch": 9.661865234375e-06, + "step": 6332, + "training_step_time": 0.1095738410949707 + }, + { + "epoch": 9.66339111328125e-06, + "model_forward_time": 0.024968385696411133, + "step": 6333 + }, + { + "epoch": 9.66339111328125e-06, + "step": 6333, + "training_step_time": 0.10760498046875 + }, + { + "epoch": 9.6649169921875e-06, + "model_forward_time": 0.025264501571655273, + "step": 6334 + }, + { + "epoch": 9.6649169921875e-06, + "step": 6334, + "training_step_time": 0.11597156524658203 + }, + { + "epoch": 9.66644287109375e-06, + "model_forward_time": 0.025235414505004883, + "step": 6335 + }, + { + "epoch": 9.66644287109375e-06, + "step": 6335, + "training_step_time": 0.1047053337097168 + }, + { + "epoch": 9.66796875e-06, + "model_forward_time": 0.025152921676635742, + "step": 6336 + }, + { + "epoch": 9.66796875e-06, + "step": 6336, + "training_step_time": 0.10790801048278809 + }, + { + "epoch": 9.66949462890625e-06, + "model_forward_time": 0.025397300720214844, + "step": 6337 + }, + { + "epoch": 9.66949462890625e-06, + "step": 6337, + "training_step_time": 0.10804605484008789 + }, + { + "epoch": 9.6710205078125e-06, + "model_forward_time": 0.025082826614379883, + "step": 6338 + }, + { + "epoch": 9.6710205078125e-06, + "step": 6338, + "training_step_time": 0.1320970058441162 + }, + { + "epoch": 9.67254638671875e-06, + "model_forward_time": 0.024398326873779297, + "step": 6339 + }, + { + "epoch": 9.67254638671875e-06, + "step": 6339, + "training_step_time": 0.20021557807922363 + }, + { + "epoch": 9.674072265625e-06, + "grad_norm": 0.4955099821090698, + "learning_rate": 9.305112288966761e-05, + "loss": 0.0828, + "step": 6340 + }, + { + "epoch": 9.674072265625e-06, + "model_forward_time": 0.024440765380859375, + "step": 6340 + }, + { + "epoch": 9.674072265625e-06, + "step": 6340, + "training_step_time": 0.16709351539611816 + }, + { + "epoch": 9.67559814453125e-06, + "model_forward_time": 0.0248868465423584, + "step": 6341 + }, + { + "epoch": 9.67559814453125e-06, + "step": 6341, + "training_step_time": 0.1698284149169922 + }, + { + "epoch": 9.6771240234375e-06, + "model_forward_time": 0.024354219436645508, + "step": 6342 + }, + { + "epoch": 9.6771240234375e-06, + "step": 6342, + "training_step_time": 0.19667887687683105 + }, + { + "epoch": 9.67864990234375e-06, + "model_forward_time": 0.024588823318481445, + "step": 6343 + }, + { + "epoch": 9.67864990234375e-06, + "step": 6343, + "training_step_time": 0.19199872016906738 + }, + { + "epoch": 9.68017578125e-06, + "model_forward_time": 0.024130582809448242, + "step": 6344 + }, + { + "epoch": 9.68017578125e-06, + "step": 6344, + "training_step_time": 0.1731119155883789 + }, + { + "epoch": 9.68170166015625e-06, + "model_forward_time": 0.024015188217163086, + "step": 6345 + }, + { + "epoch": 9.68170166015625e-06, + "step": 6345, + "training_step_time": 0.12573671340942383 + }, + { + "epoch": 9.6832275390625e-06, + "model_forward_time": 0.024234771728515625, + "step": 6346 + }, + { + "epoch": 9.6832275390625e-06, + "step": 6346, + "training_step_time": 0.18442654609680176 + }, + { + "epoch": 9.68475341796875e-06, + "model_forward_time": 0.024148941040039062, + "step": 6347 + }, + { + "epoch": 9.68475341796875e-06, + "step": 6347, + "training_step_time": 0.11178421974182129 + }, + { + "epoch": 9.686279296875e-06, + "model_forward_time": 0.024275779724121094, + "step": 6348 + }, + { + "epoch": 9.686279296875e-06, + "step": 6348, + "training_step_time": 0.1101071834564209 + }, + { + "epoch": 9.68780517578125e-06, + "model_forward_time": 0.024898290634155273, + "step": 6349 + }, + { + "epoch": 9.68780517578125e-06, + "step": 6349, + "training_step_time": 0.10902762413024902 + }, + { + "epoch": 9.6893310546875e-06, + "grad_norm": 0.5339387655258179, + "learning_rate": 9.30230667371917e-05, + "loss": 0.0752, + "step": 6350 + }, + { + "epoch": 9.6893310546875e-06, + "model_forward_time": 0.02393794059753418, + "step": 6350 + }, + { + "epoch": 9.6893310546875e-06, + "step": 6350, + "training_step_time": 0.11030197143554688 + }, + { + "epoch": 9.69085693359375e-06, + "model_forward_time": 0.025090694427490234, + "step": 6351 + }, + { + "epoch": 9.69085693359375e-06, + "step": 6351, + "training_step_time": 0.11490869522094727 + }, + { + "epoch": 9.6923828125e-06, + "model_forward_time": 0.025071382522583008, + "step": 6352 + }, + { + "epoch": 9.6923828125e-06, + "step": 6352, + "training_step_time": 0.10852313041687012 + }, + { + "epoch": 9.69390869140625e-06, + "model_forward_time": 0.025237321853637695, + "step": 6353 + }, + { + "epoch": 9.69390869140625e-06, + "step": 6353, + "training_step_time": 0.10785460472106934 + }, + { + "epoch": 9.6954345703125e-06, + "model_forward_time": 0.025052309036254883, + "step": 6354 + }, + { + "epoch": 9.6954345703125e-06, + "step": 6354, + "training_step_time": 0.11221599578857422 + }, + { + "epoch": 9.69696044921875e-06, + "model_forward_time": 0.025387048721313477, + "step": 6355 + }, + { + "epoch": 9.69696044921875e-06, + "step": 6355, + "training_step_time": 0.1149601936340332 + }, + { + "epoch": 9.698486328125e-06, + "model_forward_time": 0.025333881378173828, + "step": 6356 + }, + { + "epoch": 9.698486328125e-06, + "step": 6356, + "training_step_time": 0.20598673820495605 + }, + { + "epoch": 9.70001220703125e-06, + "model_forward_time": 0.024985074996948242, + "step": 6357 + }, + { + "epoch": 9.70001220703125e-06, + "step": 6357, + "training_step_time": 0.11120963096618652 + }, + { + "epoch": 9.7015380859375e-06, + "model_forward_time": 0.024375200271606445, + "step": 6358 + }, + { + "epoch": 9.7015380859375e-06, + "step": 6358, + "training_step_time": 0.10765981674194336 + }, + { + "epoch": 9.70306396484375e-06, + "model_forward_time": 0.025217533111572266, + "step": 6359 + }, + { + "epoch": 9.70306396484375e-06, + "step": 6359, + "training_step_time": 0.12080144882202148 + }, + { + "epoch": 9.70458984375e-06, + "grad_norm": 0.39799192547798157, + "learning_rate": 9.299495830763286e-05, + "loss": 0.0784, + "step": 6360 + }, + { + "epoch": 9.70458984375e-06, + "model_forward_time": 0.02470684051513672, + "step": 6360 + }, + { + "epoch": 9.70458984375e-06, + "step": 6360, + "training_step_time": 0.10517120361328125 + }, + { + "epoch": 9.70611572265625e-06, + "model_forward_time": 0.02449965476989746, + "step": 6361 + }, + { + "epoch": 9.70611572265625e-06, + "step": 6361, + "training_step_time": 0.1679856777191162 + }, + { + "epoch": 9.7076416015625e-06, + "model_forward_time": 0.02473163604736328, + "step": 6362 + }, + { + "epoch": 9.7076416015625e-06, + "step": 6362, + "training_step_time": 0.17008018493652344 + }, + { + "epoch": 9.70916748046875e-06, + "model_forward_time": 0.02400684356689453, + "step": 6363 + }, + { + "epoch": 9.70916748046875e-06, + "step": 6363, + "training_step_time": 0.11502933502197266 + }, + { + "epoch": 9.710693359375e-06, + "model_forward_time": 0.025376081466674805, + "step": 6364 + }, + { + "epoch": 9.710693359375e-06, + "step": 6364, + "training_step_time": 0.1728818416595459 + }, + { + "epoch": 9.71221923828125e-06, + "model_forward_time": 0.02463531494140625, + "step": 6365 + }, + { + "epoch": 9.71221923828125e-06, + "step": 6365, + "training_step_time": 0.1679515838623047 + }, + { + "epoch": 9.7137451171875e-06, + "model_forward_time": 0.025004863739013672, + "step": 6366 + }, + { + "epoch": 9.7137451171875e-06, + "step": 6366, + "training_step_time": 0.1052093505859375 + }, + { + "epoch": 9.71527099609375e-06, + "model_forward_time": 0.02472066879272461, + "step": 6367 + }, + { + "epoch": 9.71527099609375e-06, + "step": 6367, + "training_step_time": 0.10825943946838379 + }, + { + "epoch": 9.716796875e-06, + "model_forward_time": 0.025316953659057617, + "step": 6368 + }, + { + "epoch": 9.716796875e-06, + "step": 6368, + "training_step_time": 0.11265206336975098 + }, + { + "epoch": 9.71832275390625e-06, + "model_forward_time": 0.02568817138671875, + "step": 6369 + }, + { + "epoch": 9.71832275390625e-06, + "step": 6369, + "training_step_time": 0.11026954650878906 + }, + { + "epoch": 9.7198486328125e-06, + "grad_norm": 0.3123718500137329, + "learning_rate": 9.296679763514552e-05, + "loss": 0.0696, + "step": 6370 + }, + { + "epoch": 9.7198486328125e-06, + "model_forward_time": 0.02479076385498047, + "step": 6370 + }, + { + "epoch": 9.7198486328125e-06, + "step": 6370, + "training_step_time": 0.10735440254211426 + }, + { + "epoch": 9.72137451171875e-06, + "model_forward_time": 0.02542257308959961, + "step": 6371 + }, + { + "epoch": 9.72137451171875e-06, + "step": 6371, + "training_step_time": 0.11055397987365723 + }, + { + "epoch": 9.722900390625e-06, + "model_forward_time": 0.02499079704284668, + "step": 6372 + }, + { + "epoch": 9.722900390625e-06, + "step": 6372, + "training_step_time": 0.11044192314147949 + }, + { + "epoch": 9.72442626953125e-06, + "model_forward_time": 0.025032758712768555, + "step": 6373 + }, + { + "epoch": 9.72442626953125e-06, + "step": 6373, + "training_step_time": 0.10766029357910156 + }, + { + "epoch": 9.7259521484375e-06, + "model_forward_time": 0.024792909622192383, + "step": 6374 + }, + { + "epoch": 9.7259521484375e-06, + "step": 6374, + "training_step_time": 0.10824060440063477 + }, + { + "epoch": 9.72747802734375e-06, + "model_forward_time": 0.02501845359802246, + "step": 6375 + }, + { + "epoch": 9.72747802734375e-06, + "step": 6375, + "training_step_time": 0.11243414878845215 + }, + { + "epoch": 9.72900390625e-06, + "model_forward_time": 0.02621150016784668, + "step": 6376 + }, + { + "epoch": 9.72900390625e-06, + "step": 6376, + "training_step_time": 0.1194758415222168 + }, + { + "epoch": 9.73052978515625e-06, + "model_forward_time": 0.02504897117614746, + "step": 6377 + }, + { + "epoch": 9.73052978515625e-06, + "step": 6377, + "training_step_time": 0.10839653015136719 + }, + { + "epoch": 9.7320556640625e-06, + "model_forward_time": 0.02498626708984375, + "step": 6378 + }, + { + "epoch": 9.7320556640625e-06, + "step": 6378, + "training_step_time": 0.1061544418334961 + }, + { + "epoch": 9.73358154296875e-06, + "model_forward_time": 0.02496051788330078, + "step": 6379 + }, + { + "epoch": 9.73358154296875e-06, + "step": 6379, + "training_step_time": 0.11384892463684082 + }, + { + "epoch": 9.735107421875e-06, + "grad_norm": 0.3744323253631592, + "learning_rate": 9.293858475394754e-05, + "loss": 0.0723, + "step": 6380 + }, + { + "epoch": 9.735107421875e-06, + "model_forward_time": 0.02501201629638672, + "step": 6380 + }, + { + "epoch": 9.735107421875e-06, + "step": 6380, + "training_step_time": 0.10827994346618652 + }, + { + "epoch": 9.73663330078125e-06, + "model_forward_time": 0.02487468719482422, + "step": 6381 + }, + { + "epoch": 9.73663330078125e-06, + "step": 6381, + "training_step_time": 0.10662484169006348 + }, + { + "epoch": 9.7381591796875e-06, + "model_forward_time": 0.02529740333557129, + "step": 6382 + }, + { + "epoch": 9.7381591796875e-06, + "step": 6382, + "training_step_time": 0.1964564323425293 + }, + { + "epoch": 9.73968505859375e-06, + "model_forward_time": 0.024744510650634766, + "step": 6383 + }, + { + "epoch": 9.73968505859375e-06, + "step": 6383, + "training_step_time": 0.1131277084350586 + }, + { + "epoch": 9.7412109375e-06, + "model_forward_time": 0.02750420570373535, + "step": 6384 + }, + { + "epoch": 9.7412109375e-06, + "step": 6384, + "training_step_time": 0.1359543800354004 + }, + { + "epoch": 9.74273681640625e-06, + "model_forward_time": 0.025117874145507812, + "step": 6385 + }, + { + "epoch": 9.74273681640625e-06, + "step": 6385, + "training_step_time": 0.16062355041503906 + }, + { + "epoch": 9.7442626953125e-06, + "model_forward_time": 0.024236202239990234, + "step": 6386 + }, + { + "epoch": 9.7442626953125e-06, + "step": 6386, + "training_step_time": 0.20752811431884766 + }, + { + "epoch": 9.74578857421875e-06, + "model_forward_time": 0.02451610565185547, + "step": 6387 + }, + { + "epoch": 9.74578857421875e-06, + "step": 6387, + "training_step_time": 0.14438366889953613 + }, + { + "epoch": 9.747314453125e-06, + "model_forward_time": 0.024376630783081055, + "step": 6388 + }, + { + "epoch": 9.747314453125e-06, + "step": 6388, + "training_step_time": 0.11850690841674805 + }, + { + "epoch": 9.74884033203125e-06, + "model_forward_time": 0.02434372901916504, + "step": 6389 + }, + { + "epoch": 9.74884033203125e-06, + "step": 6389, + "training_step_time": 0.1134023666381836 + }, + { + "epoch": 9.7503662109375e-06, + "grad_norm": 0.6585614085197449, + "learning_rate": 9.291031969832026e-05, + "loss": 0.088, + "step": 6390 + }, + { + "epoch": 9.7503662109375e-06, + "model_forward_time": 0.02542710304260254, + "step": 6390 + }, + { + "epoch": 9.7503662109375e-06, + "step": 6390, + "training_step_time": 0.11016464233398438 + }, + { + "epoch": 9.75189208984375e-06, + "model_forward_time": 0.025087833404541016, + "step": 6391 + }, + { + "epoch": 9.75189208984375e-06, + "step": 6391, + "training_step_time": 0.19121432304382324 + }, + { + "epoch": 9.75341796875e-06, + "model_forward_time": 0.024791240692138672, + "step": 6392 + }, + { + "epoch": 9.75341796875e-06, + "step": 6392, + "training_step_time": 0.10965704917907715 + }, + { + "epoch": 9.75494384765625e-06, + "model_forward_time": 0.0244753360748291, + "step": 6393 + }, + { + "epoch": 9.75494384765625e-06, + "step": 6393, + "training_step_time": 0.11123037338256836 + }, + { + "epoch": 9.7564697265625e-06, + "model_forward_time": 0.025005340576171875, + "step": 6394 + }, + { + "epoch": 9.7564697265625e-06, + "step": 6394, + "training_step_time": 0.11133456230163574 + }, + { + "epoch": 9.75799560546875e-06, + "model_forward_time": 0.025513887405395508, + "step": 6395 + }, + { + "epoch": 9.75799560546875e-06, + "step": 6395, + "training_step_time": 0.11153912544250488 + }, + { + "epoch": 9.759521484375e-06, + "model_forward_time": 0.025590896606445312, + "step": 6396 + }, + { + "epoch": 9.759521484375e-06, + "step": 6396, + "training_step_time": 0.11120057106018066 + }, + { + "epoch": 9.76104736328125e-06, + "model_forward_time": 0.025491952896118164, + "step": 6397 + }, + { + "epoch": 9.76104736328125e-06, + "step": 6397, + "training_step_time": 0.11074995994567871 + }, + { + "epoch": 9.7625732421875e-06, + "model_forward_time": 0.025948524475097656, + "step": 6398 + }, + { + "epoch": 9.7625732421875e-06, + "step": 6398, + "training_step_time": 0.11573386192321777 + }, + { + "epoch": 9.76409912109375e-06, + "model_forward_time": 0.024886131286621094, + "step": 6399 + }, + { + "epoch": 9.76409912109375e-06, + "step": 6399, + "training_step_time": 0.11252522468566895 + }, + { + "epoch": 9.765625e-06, + "grad_norm": 0.4097931981086731, + "learning_rate": 9.288200250260836e-05, + "loss": 0.0853, + "step": 6400 + }, + { + "epoch": 9.765625e-06, + "model_forward_time": 0.02485060691833496, + "step": 6400 + }, + { + "epoch": 9.765625e-06, + "step": 6400, + "training_step_time": 0.1148531436920166 + }, + { + "epoch": 9.76715087890625e-06, + "model_forward_time": 0.025554180145263672, + "step": 6401 + }, + { + "epoch": 9.76715087890625e-06, + "step": 6401, + "training_step_time": 0.12636327743530273 + }, + { + "epoch": 9.7686767578125e-06, + "model_forward_time": 0.02586197853088379, + "step": 6402 + }, + { + "epoch": 9.7686767578125e-06, + "step": 6402, + "training_step_time": 0.11365318298339844 + }, + { + "epoch": 9.77020263671875e-06, + "model_forward_time": 0.025874853134155273, + "step": 6403 + }, + { + "epoch": 9.77020263671875e-06, + "step": 6403, + "training_step_time": 0.21921563148498535 + }, + { + "epoch": 9.771728515625e-06, + "model_forward_time": 0.02479720115661621, + "step": 6404 + }, + { + "epoch": 9.771728515625e-06, + "step": 6404, + "training_step_time": 0.10559582710266113 + }, + { + "epoch": 9.77325439453125e-06, + "model_forward_time": 0.024697542190551758, + "step": 6405 + }, + { + "epoch": 9.77325439453125e-06, + "step": 6405, + "training_step_time": 0.1693267822265625 + }, + { + "epoch": 9.7747802734375e-06, + "model_forward_time": 0.024753570556640625, + "step": 6406 + }, + { + "epoch": 9.7747802734375e-06, + "step": 6406, + "training_step_time": 0.16687965393066406 + }, + { + "epoch": 9.77630615234375e-06, + "model_forward_time": 0.024847745895385742, + "step": 6407 + }, + { + "epoch": 9.77630615234375e-06, + "step": 6407, + "training_step_time": 0.10904407501220703 + }, + { + "epoch": 9.77783203125e-06, + "model_forward_time": 0.025545597076416016, + "step": 6408 + }, + { + "epoch": 9.77783203125e-06, + "step": 6408, + "training_step_time": 0.18384981155395508 + }, + { + "epoch": 9.77935791015625e-06, + "model_forward_time": 0.024743318557739258, + "step": 6409 + }, + { + "epoch": 9.77935791015625e-06, + "step": 6409, + "training_step_time": 0.1584453582763672 + }, + { + "epoch": 9.7808837890625e-06, + "grad_norm": 0.3660637438297272, + "learning_rate": 9.285363320121992e-05, + "loss": 0.0619, + "step": 6410 + }, + { + "epoch": 9.7808837890625e-06, + "model_forward_time": 0.02489757537841797, + "step": 6410 + }, + { + "epoch": 9.7808837890625e-06, + "step": 6410, + "training_step_time": 0.11624026298522949 + }, + { + "epoch": 9.78240966796875e-06, + "model_forward_time": 0.02508831024169922, + "step": 6411 + }, + { + "epoch": 9.78240966796875e-06, + "step": 6411, + "training_step_time": 0.1113882064819336 + }, + { + "epoch": 9.783935546875e-06, + "model_forward_time": 0.02545452117919922, + "step": 6412 + }, + { + "epoch": 9.783935546875e-06, + "step": 6412, + "training_step_time": 0.10992646217346191 + }, + { + "epoch": 9.78546142578125e-06, + "model_forward_time": 0.02508544921875, + "step": 6413 + }, + { + "epoch": 9.78546142578125e-06, + "step": 6413, + "training_step_time": 0.10876345634460449 + }, + { + "epoch": 9.7869873046875e-06, + "model_forward_time": 0.025025606155395508, + "step": 6414 + }, + { + "epoch": 9.7869873046875e-06, + "step": 6414, + "training_step_time": 0.11002326011657715 + }, + { + "epoch": 9.78851318359375e-06, + "model_forward_time": 0.025349855422973633, + "step": 6415 + }, + { + "epoch": 9.78851318359375e-06, + "step": 6415, + "training_step_time": 0.10778021812438965 + }, + { + "epoch": 9.7900390625e-06, + "model_forward_time": 0.02498459815979004, + "step": 6416 + }, + { + "epoch": 9.7900390625e-06, + "step": 6416, + "training_step_time": 0.10670876502990723 + }, + { + "epoch": 9.79156494140625e-06, + "model_forward_time": 0.02531147003173828, + "step": 6417 + }, + { + "epoch": 9.79156494140625e-06, + "step": 6417, + "training_step_time": 0.10906839370727539 + }, + { + "epoch": 9.7930908203125e-06, + "model_forward_time": 0.025927305221557617, + "step": 6418 + }, + { + "epoch": 9.7930908203125e-06, + "step": 6418, + "training_step_time": 0.10847711563110352 + }, + { + "epoch": 9.79461669921875e-06, + "model_forward_time": 0.025086402893066406, + "step": 6419 + }, + { + "epoch": 9.79461669921875e-06, + "step": 6419, + "training_step_time": 0.1091775894165039 + }, + { + "epoch": 9.796142578125e-06, + "grad_norm": 0.2623238265514374, + "learning_rate": 9.282521182862629e-05, + "loss": 0.0743, + "step": 6420 + }, + { + "epoch": 9.796142578125e-06, + "model_forward_time": 0.02553248405456543, + "step": 6420 + }, + { + "epoch": 9.796142578125e-06, + "step": 6420, + "training_step_time": 0.11011600494384766 + }, + { + "epoch": 9.79766845703125e-06, + "model_forward_time": 0.02510547637939453, + "step": 6421 + }, + { + "epoch": 9.79766845703125e-06, + "step": 6421, + "training_step_time": 0.10996389389038086 + }, + { + "epoch": 9.7991943359375e-06, + "model_forward_time": 0.02573561668395996, + "step": 6422 + }, + { + "epoch": 9.7991943359375e-06, + "step": 6422, + "training_step_time": 0.10902929306030273 + }, + { + "epoch": 9.80072021484375e-06, + "model_forward_time": 0.025168180465698242, + "step": 6423 + }, + { + "epoch": 9.80072021484375e-06, + "step": 6423, + "training_step_time": 0.10947895050048828 + }, + { + "epoch": 9.80224609375e-06, + "model_forward_time": 0.0266265869140625, + "step": 6424 + }, + { + "epoch": 9.80224609375e-06, + "step": 6424, + "training_step_time": 0.11413717269897461 + }, + { + "epoch": 9.80377197265625e-06, + "model_forward_time": 0.025608301162719727, + "step": 6425 + }, + { + "epoch": 9.80377197265625e-06, + "step": 6425, + "training_step_time": 0.10844206809997559 + }, + { + "epoch": 9.8052978515625e-06, + "model_forward_time": 0.025483131408691406, + "step": 6426 + }, + { + "epoch": 9.8052978515625e-06, + "step": 6426, + "training_step_time": 0.17925524711608887 + }, + { + "epoch": 9.80682373046875e-06, + "model_forward_time": 0.024312257766723633, + "step": 6427 + }, + { + "epoch": 9.80682373046875e-06, + "step": 6427, + "training_step_time": 0.12234783172607422 + }, + { + "epoch": 9.808349609375e-06, + "model_forward_time": 0.02454996109008789, + "step": 6428 + }, + { + "epoch": 9.808349609375e-06, + "step": 6428, + "training_step_time": 0.1272134780883789 + }, + { + "epoch": 9.80987548828125e-06, + "model_forward_time": 0.025434494018554688, + "step": 6429 + }, + { + "epoch": 9.80987548828125e-06, + "step": 6429, + "training_step_time": 0.15738320350646973 + }, + { + "epoch": 9.8114013671875e-06, + "grad_norm": 0.4843827784061432, + "learning_rate": 9.279673841936214e-05, + "loss": 0.0752, + "step": 6430 + }, + { + "epoch": 9.8114013671875e-06, + "model_forward_time": 0.024613618850708008, + "step": 6430 + }, + { + "epoch": 9.8114013671875e-06, + "step": 6430, + "training_step_time": 0.22339153289794922 + }, + { + "epoch": 9.81292724609375e-06, + "model_forward_time": 0.024805307388305664, + "step": 6431 + }, + { + "epoch": 9.81292724609375e-06, + "step": 6431, + "training_step_time": 0.17174720764160156 + }, + { + "epoch": 9.814453125e-06, + "model_forward_time": 0.024548768997192383, + "step": 6432 + }, + { + "epoch": 9.814453125e-06, + "step": 6432, + "training_step_time": 0.11900544166564941 + }, + { + "epoch": 9.81597900390625e-06, + "model_forward_time": 0.022927522659301758, + "step": 6433 + }, + { + "epoch": 9.81597900390625e-06, + "step": 6433, + "training_step_time": 0.12465405464172363 + }, + { + "epoch": 9.8175048828125e-06, + "model_forward_time": 0.025338411331176758, + "step": 6434 + }, + { + "epoch": 9.8175048828125e-06, + "step": 6434, + "training_step_time": 0.18746566772460938 + }, + { + "epoch": 9.81903076171875e-06, + "model_forward_time": 0.024634122848510742, + "step": 6435 + }, + { + "epoch": 9.81903076171875e-06, + "step": 6435, + "training_step_time": 0.10889267921447754 + }, + { + "epoch": 9.820556640625e-06, + "model_forward_time": 0.024967432022094727, + "step": 6436 + }, + { + "epoch": 9.820556640625e-06, + "step": 6436, + "training_step_time": 0.11101484298706055 + }, + { + "epoch": 9.82208251953125e-06, + "model_forward_time": 0.025379419326782227, + "step": 6437 + }, + { + "epoch": 9.82208251953125e-06, + "step": 6437, + "training_step_time": 0.11189937591552734 + }, + { + "epoch": 9.8236083984375e-06, + "model_forward_time": 0.025327444076538086, + "step": 6438 + }, + { + "epoch": 9.8236083984375e-06, + "step": 6438, + "training_step_time": 0.11257028579711914 + }, + { + "epoch": 9.82513427734375e-06, + "model_forward_time": 0.02582383155822754, + "step": 6439 + }, + { + "epoch": 9.82513427734375e-06, + "step": 6439, + "training_step_time": 0.11294841766357422 + }, + { + "epoch": 9.82666015625e-06, + "grad_norm": 0.3442363142967224, + "learning_rate": 9.276821300802534e-05, + "loss": 0.0761, + "step": 6440 + }, + { + "epoch": 9.82666015625e-06, + "model_forward_time": 0.025915145874023438, + "step": 6440 + }, + { + "epoch": 9.82666015625e-06, + "step": 6440, + "training_step_time": 0.10849928855895996 + }, + { + "epoch": 9.82818603515625e-06, + "model_forward_time": 0.025716066360473633, + "step": 6441 + }, + { + "epoch": 9.82818603515625e-06, + "step": 6441, + "training_step_time": 0.11442422866821289 + }, + { + "epoch": 9.8297119140625e-06, + "model_forward_time": 0.02488231658935547, + "step": 6442 + }, + { + "epoch": 9.8297119140625e-06, + "step": 6442, + "training_step_time": 0.11401581764221191 + }, + { + "epoch": 9.83123779296875e-06, + "model_forward_time": 0.02488875389099121, + "step": 6443 + }, + { + "epoch": 9.83123779296875e-06, + "step": 6443, + "training_step_time": 0.10969710350036621 + }, + { + "epoch": 9.832763671875e-06, + "model_forward_time": 0.025012493133544922, + "step": 6444 + }, + { + "epoch": 9.832763671875e-06, + "step": 6444, + "training_step_time": 0.11367964744567871 + }, + { + "epoch": 9.83428955078125e-06, + "model_forward_time": 0.025678634643554688, + "step": 6445 + }, + { + "epoch": 9.83428955078125e-06, + "step": 6445, + "training_step_time": 0.1939716339111328 + }, + { + "epoch": 9.8358154296875e-06, + "model_forward_time": 0.024347543716430664, + "step": 6446 + }, + { + "epoch": 9.8358154296875e-06, + "step": 6446, + "training_step_time": 0.11507987976074219 + }, + { + "epoch": 9.83734130859375e-06, + "model_forward_time": 0.02462291717529297, + "step": 6447 + }, + { + "epoch": 9.83734130859375e-06, + "step": 6447, + "training_step_time": 0.17828774452209473 + }, + { + "epoch": 9.8388671875e-06, + "model_forward_time": 0.02552628517150879, + "step": 6448 + }, + { + "epoch": 9.8388671875e-06, + "step": 6448, + "training_step_time": 0.1516726016998291 + }, + { + "epoch": 9.84039306640625e-06, + "model_forward_time": 0.024512767791748047, + "step": 6449 + }, + { + "epoch": 9.84039306640625e-06, + "step": 6449, + "training_step_time": 0.18554449081420898 + }, + { + "epoch": 9.8419189453125e-06, + "grad_norm": 0.391694039106369, + "learning_rate": 9.273963562927695e-05, + "loss": 0.0723, + "step": 6450 + }, + { + "epoch": 9.8419189453125e-06, + "model_forward_time": 0.024385452270507812, + "step": 6450 + }, + { + "epoch": 9.8419189453125e-06, + "step": 6450, + "training_step_time": 0.14000201225280762 + }, + { + "epoch": 9.84344482421875e-06, + "model_forward_time": 0.024399757385253906, + "step": 6451 + }, + { + "epoch": 9.84344482421875e-06, + "step": 6451, + "training_step_time": 0.16803574562072754 + }, + { + "epoch": 9.844970703125e-06, + "model_forward_time": 0.024952173233032227, + "step": 6452 + }, + { + "epoch": 9.844970703125e-06, + "step": 6452, + "training_step_time": 0.13135313987731934 + }, + { + "epoch": 9.84649658203125e-06, + "model_forward_time": 0.025227069854736328, + "step": 6453 + }, + { + "epoch": 9.84649658203125e-06, + "step": 6453, + "training_step_time": 0.11016607284545898 + }, + { + "epoch": 9.8480224609375e-06, + "model_forward_time": 0.025941848754882812, + "step": 6454 + }, + { + "epoch": 9.8480224609375e-06, + "step": 6454, + "training_step_time": 0.11260294914245605 + }, + { + "epoch": 9.84954833984375e-06, + "model_forward_time": 0.02573370933532715, + "step": 6455 + }, + { + "epoch": 9.84954833984375e-06, + "step": 6455, + "training_step_time": 0.10840106010437012 + }, + { + "epoch": 9.85107421875e-06, + "model_forward_time": 0.025121688842773438, + "step": 6456 + }, + { + "epoch": 9.85107421875e-06, + "step": 6456, + "training_step_time": 0.11309599876403809 + }, + { + "epoch": 9.85260009765625e-06, + "model_forward_time": 0.025045156478881836, + "step": 6457 + }, + { + "epoch": 9.85260009765625e-06, + "step": 6457, + "training_step_time": 0.11169910430908203 + }, + { + "epoch": 9.8541259765625e-06, + "model_forward_time": 0.02533125877380371, + "step": 6458 + }, + { + "epoch": 9.8541259765625e-06, + "step": 6458, + "training_step_time": 0.11044979095458984 + }, + { + "epoch": 9.85565185546875e-06, + "model_forward_time": 0.024950027465820312, + "step": 6459 + }, + { + "epoch": 9.85565185546875e-06, + "step": 6459, + "training_step_time": 0.10910606384277344 + }, + { + "epoch": 9.857177734375e-06, + "grad_norm": 0.3379170298576355, + "learning_rate": 9.27110063178412e-05, + "loss": 0.0729, + "step": 6460 + }, + { + "epoch": 9.857177734375e-06, + "model_forward_time": 0.025556087493896484, + "step": 6460 + }, + { + "epoch": 9.857177734375e-06, + "step": 6460, + "training_step_time": 0.11236000061035156 + }, + { + "epoch": 9.85870361328125e-06, + "model_forward_time": 0.025185108184814453, + "step": 6461 + }, + { + "epoch": 9.85870361328125e-06, + "step": 6461, + "training_step_time": 0.10976648330688477 + }, + { + "epoch": 9.8602294921875e-06, + "model_forward_time": 0.02523016929626465, + "step": 6462 + }, + { + "epoch": 9.8602294921875e-06, + "step": 6462, + "training_step_time": 0.11237835884094238 + }, + { + "epoch": 9.86175537109375e-06, + "model_forward_time": 0.02500176429748535, + "step": 6463 + }, + { + "epoch": 9.86175537109375e-06, + "step": 6463, + "training_step_time": 0.11214208602905273 + }, + { + "epoch": 9.86328125e-06, + "model_forward_time": 0.025069236755371094, + "step": 6464 + }, + { + "epoch": 9.86328125e-06, + "step": 6464, + "training_step_time": 0.11614775657653809 + }, + { + "epoch": 9.86480712890625e-06, + "model_forward_time": 0.025235891342163086, + "step": 6465 + }, + { + "epoch": 9.86480712890625e-06, + "step": 6465, + "training_step_time": 0.11234688758850098 + }, + { + "epoch": 9.8663330078125e-06, + "model_forward_time": 0.025141000747680664, + "step": 6466 + }, + { + "epoch": 9.8663330078125e-06, + "step": 6466, + "training_step_time": 0.10916805267333984 + }, + { + "epoch": 9.86785888671875e-06, + "model_forward_time": 0.025653839111328125, + "step": 6467 + }, + { + "epoch": 9.86785888671875e-06, + "step": 6467, + "training_step_time": 0.10910892486572266 + }, + { + "epoch": 9.869384765625e-06, + "model_forward_time": 0.025215864181518555, + "step": 6468 + }, + { + "epoch": 9.869384765625e-06, + "step": 6468, + "training_step_time": 0.11259675025939941 + }, + { + "epoch": 9.87091064453125e-06, + "model_forward_time": 0.02528095245361328, + "step": 6469 + }, + { + "epoch": 9.87091064453125e-06, + "step": 6469, + "training_step_time": 0.10969185829162598 + }, + { + "epoch": 9.8724365234375e-06, + "grad_norm": 0.5433868169784546, + "learning_rate": 9.268232510850539e-05, + "loss": 0.0655, + "step": 6470 + }, + { + "epoch": 9.8724365234375e-06, + "model_forward_time": 0.02533721923828125, + "step": 6470 + }, + { + "epoch": 9.8724365234375e-06, + "step": 6470, + "training_step_time": 0.11114692687988281 + }, + { + "epoch": 9.87396240234375e-06, + "model_forward_time": 0.025383949279785156, + "step": 6471 + }, + { + "epoch": 9.87396240234375e-06, + "step": 6471, + "training_step_time": 0.18553638458251953 + }, + { + "epoch": 9.87548828125e-06, + "model_forward_time": 0.024715662002563477, + "step": 6472 + }, + { + "epoch": 9.87548828125e-06, + "step": 6472, + "training_step_time": 0.12156844139099121 + }, + { + "epoch": 9.87701416015625e-06, + "model_forward_time": 0.024167776107788086, + "step": 6473 + }, + { + "epoch": 9.87701416015625e-06, + "step": 6473, + "training_step_time": 0.1274569034576416 + }, + { + "epoch": 9.8785400390625e-06, + "model_forward_time": 0.024987459182739258, + "step": 6474 + }, + { + "epoch": 9.8785400390625e-06, + "step": 6474, + "training_step_time": 0.19722533226013184 + }, + { + "epoch": 9.88006591796875e-06, + "model_forward_time": 0.024302005767822266, + "step": 6475 + }, + { + "epoch": 9.88006591796875e-06, + "step": 6475, + "training_step_time": 0.1802213191986084 + }, + { + "epoch": 9.881591796875e-06, + "model_forward_time": 0.024502992630004883, + "step": 6476 + }, + { + "epoch": 9.881591796875e-06, + "step": 6476, + "training_step_time": 0.18155741691589355 + }, + { + "epoch": 9.88311767578125e-06, + "model_forward_time": 0.02438521385192871, + "step": 6477 + }, + { + "epoch": 9.88311767578125e-06, + "step": 6477, + "training_step_time": 0.11775541305541992 + }, + { + "epoch": 9.8846435546875e-06, + "model_forward_time": 0.02451181411743164, + "step": 6478 + }, + { + "epoch": 9.8846435546875e-06, + "step": 6478, + "training_step_time": 0.10991883277893066 + }, + { + "epoch": 9.88616943359375e-06, + "model_forward_time": 0.02526998519897461, + "step": 6479 + }, + { + "epoch": 9.88616943359375e-06, + "step": 6479, + "training_step_time": 0.20051026344299316 + }, + { + "epoch": 9.8876953125e-06, + "grad_norm": 0.45038890838623047, + "learning_rate": 9.265359203611987e-05, + "loss": 0.0694, + "step": 6480 + }, + { + "epoch": 9.8876953125e-06, + "model_forward_time": 0.024538040161132812, + "step": 6480 + }, + { + "epoch": 9.8876953125e-06, + "step": 6480, + "training_step_time": 0.1041109561920166 + }, + { + "epoch": 9.88922119140625e-06, + "model_forward_time": 0.025287151336669922, + "step": 6481 + }, + { + "epoch": 9.88922119140625e-06, + "step": 6481, + "training_step_time": 0.10660171508789062 + }, + { + "epoch": 9.8907470703125e-06, + "model_forward_time": 0.025284528732299805, + "step": 6482 + }, + { + "epoch": 9.8907470703125e-06, + "step": 6482, + "training_step_time": 0.10610008239746094 + }, + { + "epoch": 9.89227294921875e-06, + "model_forward_time": 0.025357484817504883, + "step": 6483 + }, + { + "epoch": 9.89227294921875e-06, + "step": 6483, + "training_step_time": 0.10704517364501953 + }, + { + "epoch": 9.893798828125e-06, + "model_forward_time": 0.028635740280151367, + "step": 6484 + }, + { + "epoch": 9.893798828125e-06, + "step": 6484, + "training_step_time": 0.11926507949829102 + }, + { + "epoch": 9.89532470703125e-06, + "model_forward_time": 0.025227069854736328, + "step": 6485 + }, + { + "epoch": 9.89532470703125e-06, + "step": 6485, + "training_step_time": 0.11953353881835938 + }, + { + "epoch": 9.8968505859375e-06, + "model_forward_time": 0.02532362937927246, + "step": 6486 + }, + { + "epoch": 9.8968505859375e-06, + "step": 6486, + "training_step_time": 0.10961198806762695 + }, + { + "epoch": 9.89837646484375e-06, + "model_forward_time": 0.025210857391357422, + "step": 6487 + }, + { + "epoch": 9.89837646484375e-06, + "step": 6487, + "training_step_time": 0.10686731338500977 + }, + { + "epoch": 9.89990234375e-06, + "model_forward_time": 0.025217533111572266, + "step": 6488 + }, + { + "epoch": 9.89990234375e-06, + "step": 6488, + "training_step_time": 0.1099550724029541 + }, + { + "epoch": 9.90142822265625e-06, + "model_forward_time": 0.02546525001525879, + "step": 6489 + }, + { + "epoch": 9.90142822265625e-06, + "step": 6489, + "training_step_time": 0.19519448280334473 + }, + { + "epoch": 9.9029541015625e-06, + "grad_norm": 0.39201900362968445, + "learning_rate": 9.262480713559808e-05, + "loss": 0.091, + "step": 6490 + }, + { + "epoch": 9.9029541015625e-06, + "model_forward_time": 0.024413108825683594, + "step": 6490 + }, + { + "epoch": 9.9029541015625e-06, + "step": 6490, + "training_step_time": 0.11302399635314941 + }, + { + "epoch": 9.90447998046875e-06, + "model_forward_time": 0.024483203887939453, + "step": 6491 + }, + { + "epoch": 9.90447998046875e-06, + "step": 6491, + "training_step_time": 0.10732817649841309 + }, + { + "epoch": 9.906005859375e-06, + "model_forward_time": 0.025440216064453125, + "step": 6492 + }, + { + "epoch": 9.906005859375e-06, + "step": 6492, + "training_step_time": 0.11038470268249512 + }, + { + "epoch": 9.90753173828125e-06, + "model_forward_time": 0.024969100952148438, + "step": 6493 + }, + { + "epoch": 9.90753173828125e-06, + "step": 6493, + "training_step_time": 0.19565677642822266 + }, + { + "epoch": 9.9090576171875e-06, + "model_forward_time": 0.02492666244506836, + "step": 6494 + }, + { + "epoch": 9.9090576171875e-06, + "step": 6494, + "training_step_time": 0.1360912322998047 + }, + { + "epoch": 9.91058349609375e-06, + "model_forward_time": 0.025622129440307617, + "step": 6495 + }, + { + "epoch": 9.91058349609375e-06, + "step": 6495, + "training_step_time": 0.11145210266113281 + }, + { + "epoch": 9.912109375e-06, + "model_forward_time": 0.026498079299926758, + "step": 6496 + }, + { + "epoch": 9.912109375e-06, + "step": 6496, + "training_step_time": 0.17135167121887207 + }, + { + "epoch": 9.91363525390625e-06, + "model_forward_time": 0.02469658851623535, + "step": 6497 + }, + { + "epoch": 9.91363525390625e-06, + "step": 6497, + "training_step_time": 0.17390871047973633 + }, + { + "epoch": 9.9151611328125e-06, + "model_forward_time": 0.0252382755279541, + "step": 6498 + }, + { + "epoch": 9.9151611328125e-06, + "step": 6498, + "training_step_time": 0.10881257057189941 + }, + { + "epoch": 9.91668701171875e-06, + "model_forward_time": 0.02506256103515625, + "step": 6499 + }, + { + "epoch": 9.91668701171875e-06, + "step": 6499, + "training_step_time": 0.11243844032287598 + }, + { + "epoch": 9.918212890625e-06, + "grad_norm": 0.5705287456512451, + "learning_rate": 9.259597044191636e-05, + "loss": 0.066, + "step": 6500 + }, + { + "epoch": 9.918212890625e-06, + "model_forward_time": 0.02579665184020996, + "step": 6500 + }, + { + "epoch": 9.918212890625e-06, + "step": 6500, + "training_step_time": 0.10969090461730957 + }, + { + "epoch": 9.91973876953125e-06, + "model_forward_time": 0.025682926177978516, + "step": 6501 + }, + { + "epoch": 9.91973876953125e-06, + "step": 6501, + "training_step_time": 0.10774374008178711 + }, + { + "epoch": 9.9212646484375e-06, + "model_forward_time": 0.027169227600097656, + "step": 6502 + }, + { + "epoch": 9.9212646484375e-06, + "step": 6502, + "training_step_time": 0.10848450660705566 + }, + { + "epoch": 9.92279052734375e-06, + "model_forward_time": 0.02458477020263672, + "step": 6503 + }, + { + "epoch": 9.92279052734375e-06, + "step": 6503, + "training_step_time": 0.10886192321777344 + }, + { + "epoch": 9.92431640625e-06, + "model_forward_time": 0.02572774887084961, + "step": 6504 + }, + { + "epoch": 9.92431640625e-06, + "step": 6504, + "training_step_time": 0.10667586326599121 + }, + { + "epoch": 9.92584228515625e-06, + "model_forward_time": 0.025885343551635742, + "step": 6505 + }, + { + "epoch": 9.92584228515625e-06, + "step": 6505, + "training_step_time": 0.11089348793029785 + }, + { + "epoch": 9.9273681640625e-06, + "model_forward_time": 0.025759458541870117, + "step": 6506 + }, + { + "epoch": 9.9273681640625e-06, + "step": 6506, + "training_step_time": 0.11117839813232422 + }, + { + "epoch": 9.92889404296875e-06, + "model_forward_time": 0.02552652359008789, + "step": 6507 + }, + { + "epoch": 9.92889404296875e-06, + "step": 6507, + "training_step_time": 0.10968399047851562 + }, + { + "epoch": 9.930419921875e-06, + "model_forward_time": 0.025144100189208984, + "step": 6508 + }, + { + "epoch": 9.930419921875e-06, + "step": 6508, + "training_step_time": 0.16504144668579102 + }, + { + "epoch": 9.93194580078125e-06, + "model_forward_time": 0.024913311004638672, + "step": 6509 + }, + { + "epoch": 9.93194580078125e-06, + "step": 6509, + "training_step_time": 0.17327237129211426 + }, + { + "epoch": 9.9334716796875e-06, + "grad_norm": 0.33714815974235535, + "learning_rate": 9.256708199011401e-05, + "loss": 0.0672, + "step": 6510 + }, + { + "epoch": 9.9334716796875e-06, + "model_forward_time": 0.024626493453979492, + "step": 6510 + }, + { + "epoch": 9.9334716796875e-06, + "step": 6510, + "training_step_time": 0.16625308990478516 + }, + { + "epoch": 9.93499755859375e-06, + "model_forward_time": 0.024190902709960938, + "step": 6511 + }, + { + "epoch": 9.93499755859375e-06, + "step": 6511, + "training_step_time": 0.16745352745056152 + }, + { + "epoch": 9.9365234375e-06, + "model_forward_time": 0.024586915969848633, + "step": 6512 + }, + { + "epoch": 9.9365234375e-06, + "step": 6512, + "training_step_time": 0.15546774864196777 + }, + { + "epoch": 9.93804931640625e-06, + "model_forward_time": 0.02459430694580078, + "step": 6513 + }, + { + "epoch": 9.93804931640625e-06, + "step": 6513, + "training_step_time": 0.17969632148742676 + }, + { + "epoch": 9.9395751953125e-06, + "model_forward_time": 0.024633407592773438, + "step": 6514 + }, + { + "epoch": 9.9395751953125e-06, + "step": 6514, + "training_step_time": 0.1299731731414795 + }, + { + "epoch": 9.94110107421875e-06, + "model_forward_time": 0.02414870262145996, + "step": 6515 + }, + { + "epoch": 9.94110107421875e-06, + "step": 6515, + "training_step_time": 0.12489056587219238 + }, + { + "epoch": 9.942626953125e-06, + "model_forward_time": 0.0241239070892334, + "step": 6516 + }, + { + "epoch": 9.942626953125e-06, + "step": 6516, + "training_step_time": 0.19248151779174805 + }, + { + "epoch": 9.94415283203125e-06, + "model_forward_time": 0.02426314353942871, + "step": 6517 + }, + { + "epoch": 9.94415283203125e-06, + "step": 6517, + "training_step_time": 0.1783006191253662 + }, + { + "epoch": 9.9456787109375e-06, + "model_forward_time": 0.024959325790405273, + "step": 6518 + }, + { + "epoch": 9.9456787109375e-06, + "step": 6518, + "training_step_time": 0.18056678771972656 + }, + { + "epoch": 9.94720458984375e-06, + "model_forward_time": 0.02393960952758789, + "step": 6519 + }, + { + "epoch": 9.94720458984375e-06, + "step": 6519, + "training_step_time": 0.12206649780273438 + }, + { + "epoch": 9.94873046875e-06, + "grad_norm": 0.2646665871143341, + "learning_rate": 9.253814181529323e-05, + "loss": 0.0717, + "step": 6520 + }, + { + "epoch": 9.94873046875e-06, + "model_forward_time": 0.024686813354492188, + "step": 6520 + }, + { + "epoch": 9.94873046875e-06, + "step": 6520, + "training_step_time": 0.15739727020263672 + }, + { + "epoch": 9.95025634765625e-06, + "model_forward_time": 0.02528524398803711, + "step": 6521 + }, + { + "epoch": 9.95025634765625e-06, + "step": 6521, + "training_step_time": 0.15546536445617676 + }, + { + "epoch": 9.9517822265625e-06, + "model_forward_time": 0.024883270263671875, + "step": 6522 + }, + { + "epoch": 9.9517822265625e-06, + "step": 6522, + "training_step_time": 0.10444355010986328 + }, + { + "epoch": 9.95330810546875e-06, + "model_forward_time": 0.0251767635345459, + "step": 6523 + }, + { + "epoch": 9.95330810546875e-06, + "step": 6523, + "training_step_time": 0.1049337387084961 + }, + { + "epoch": 9.954833984375e-06, + "model_forward_time": 0.02523517608642578, + "step": 6524 + }, + { + "epoch": 9.954833984375e-06, + "step": 6524, + "training_step_time": 0.10724020004272461 + }, + { + "epoch": 9.95635986328125e-06, + "model_forward_time": 0.025401592254638672, + "step": 6525 + }, + { + "epoch": 9.95635986328125e-06, + "step": 6525, + "training_step_time": 0.11230969429016113 + }, + { + "epoch": 9.9578857421875e-06, + "model_forward_time": 0.02580094337463379, + "step": 6526 + }, + { + "epoch": 9.9578857421875e-06, + "step": 6526, + "training_step_time": 0.10893130302429199 + }, + { + "epoch": 9.95941162109375e-06, + "model_forward_time": 0.025339841842651367, + "step": 6527 + }, + { + "epoch": 9.95941162109375e-06, + "step": 6527, + "training_step_time": 0.10904169082641602 + }, + { + "epoch": 9.9609375e-06, + "model_forward_time": 0.02503228187561035, + "step": 6528 + }, + { + "epoch": 9.9609375e-06, + "step": 6528, + "training_step_time": 0.11194920539855957 + }, + { + "epoch": 9.96246337890625e-06, + "model_forward_time": 0.025620698928833008, + "step": 6529 + }, + { + "epoch": 9.96246337890625e-06, + "step": 6529, + "training_step_time": 0.17990803718566895 + }, + { + "epoch": 9.9639892578125e-06, + "grad_norm": 0.42773929238319397, + "learning_rate": 9.250914995261905e-05, + "loss": 0.0704, + "step": 6530 + }, + { + "epoch": 9.9639892578125e-06, + "model_forward_time": 0.024761438369750977, + "step": 6530 + }, + { + "epoch": 9.9639892578125e-06, + "step": 6530, + "training_step_time": 0.19832634925842285 + }, + { + "epoch": 9.96551513671875e-06, + "model_forward_time": 0.02418804168701172, + "step": 6531 + }, + { + "epoch": 9.96551513671875e-06, + "step": 6531, + "training_step_time": 0.20506024360656738 + }, + { + "epoch": 9.967041015625e-06, + "model_forward_time": 0.024600505828857422, + "step": 6532 + }, + { + "epoch": 9.967041015625e-06, + "step": 6532, + "training_step_time": 0.19393205642700195 + }, + { + "epoch": 9.96856689453125e-06, + "model_forward_time": 0.02482318878173828, + "step": 6533 + }, + { + "epoch": 9.96856689453125e-06, + "step": 6533, + "training_step_time": 0.2174544334411621 + }, + { + "epoch": 9.9700927734375e-06, + "model_forward_time": 0.024936676025390625, + "step": 6534 + }, + { + "epoch": 9.9700927734375e-06, + "step": 6534, + "training_step_time": 0.19177937507629395 + }, + { + "epoch": 9.97161865234375e-06, + "model_forward_time": 0.02472662925720215, + "step": 6535 + }, + { + "epoch": 9.97161865234375e-06, + "step": 6535, + "training_step_time": 0.15485143661499023 + }, + { + "epoch": 9.97314453125e-06, + "model_forward_time": 0.02605581283569336, + "step": 6536 + }, + { + "epoch": 9.97314453125e-06, + "step": 6536, + "training_step_time": 0.18163514137268066 + }, + { + "epoch": 9.97467041015625e-06, + "model_forward_time": 0.02474212646484375, + "step": 6537 + }, + { + "epoch": 9.97467041015625e-06, + "step": 6537, + "training_step_time": 0.13070344924926758 + }, + { + "epoch": 9.9761962890625e-06, + "model_forward_time": 0.02474689483642578, + "step": 6538 + }, + { + "epoch": 9.9761962890625e-06, + "step": 6538, + "training_step_time": 0.12535762786865234 + }, + { + "epoch": 9.97772216796875e-06, + "model_forward_time": 0.025102853775024414, + "step": 6539 + }, + { + "epoch": 9.97772216796875e-06, + "step": 6539, + "training_step_time": 0.12402009963989258 + }, + { + "epoch": 9.979248046875e-06, + "grad_norm": 0.414929062128067, + "learning_rate": 9.248010643731935e-05, + "loss": 0.0835, + "step": 6540 + }, + { + "epoch": 9.979248046875e-06, + "model_forward_time": 0.024689435958862305, + "step": 6540 + }, + { + "epoch": 9.979248046875e-06, + "step": 6540, + "training_step_time": 0.11315751075744629 + }, + { + "epoch": 9.98077392578125e-06, + "model_forward_time": 0.025402069091796875, + "step": 6541 + }, + { + "epoch": 9.98077392578125e-06, + "step": 6541, + "training_step_time": 0.11596894264221191 + }, + { + "epoch": 9.9822998046875e-06, + "model_forward_time": 0.02563762664794922, + "step": 6542 + }, + { + "epoch": 9.9822998046875e-06, + "step": 6542, + "training_step_time": 0.11794137954711914 + }, + { + "epoch": 9.98382568359375e-06, + "model_forward_time": 0.025556325912475586, + "step": 6543 + }, + { + "epoch": 9.98382568359375e-06, + "step": 6543, + "training_step_time": 0.10900497436523438 + }, + { + "epoch": 9.9853515625e-06, + "model_forward_time": 0.02540898323059082, + "step": 6544 + }, + { + "epoch": 9.9853515625e-06, + "step": 6544, + "training_step_time": 0.11584973335266113 + }, + { + "epoch": 9.98687744140625e-06, + "model_forward_time": 0.025455951690673828, + "step": 6545 + }, + { + "epoch": 9.98687744140625e-06, + "step": 6545, + "training_step_time": 0.1203455924987793 + }, + { + "epoch": 9.9884033203125e-06, + "model_forward_time": 0.025952577590942383, + "step": 6546 + }, + { + "epoch": 9.9884033203125e-06, + "step": 6546, + "training_step_time": 0.10809850692749023 + }, + { + "epoch": 9.98992919921875e-06, + "model_forward_time": 0.025254249572753906, + "step": 6547 + }, + { + "epoch": 9.98992919921875e-06, + "step": 6547, + "training_step_time": 0.10940718650817871 + }, + { + "epoch": 9.991455078125e-06, + "model_forward_time": 0.02501678466796875, + "step": 6548 + }, + { + "epoch": 9.991455078125e-06, + "step": 6548, + "training_step_time": 0.10718178749084473 + }, + { + "epoch": 9.99298095703125e-06, + "model_forward_time": 0.025507211685180664, + "step": 6549 + }, + { + "epoch": 9.99298095703125e-06, + "step": 6549, + "training_step_time": 0.10961318016052246 + }, + { + "epoch": 9.9945068359375e-06, + "grad_norm": 0.34869563579559326, + "learning_rate": 9.24510113046847e-05, + "loss": 0.0621, + "step": 6550 + }, + { + "epoch": 9.9945068359375e-06, + "model_forward_time": 0.0255281925201416, + "step": 6550 + }, + { + "epoch": 9.9945068359375e-06, + "step": 6550, + "training_step_time": 0.11459684371948242 + }, + { + "epoch": 9.99603271484375e-06, + "model_forward_time": 0.02547907829284668, + "step": 6551 + }, + { + "epoch": 9.99603271484375e-06, + "step": 6551, + "training_step_time": 0.10655784606933594 + }, + { + "epoch": 9.99755859375e-06, + "model_forward_time": 0.025511741638183594, + "step": 6552 + }, + { + "epoch": 9.99755859375e-06, + "step": 6552, + "training_step_time": 0.11295914649963379 + }, + { + "epoch": 9.99908447265625e-06, + "model_forward_time": 0.025484561920166016, + "step": 6553 + }, + { + "epoch": 9.99908447265625e-06, + "step": 6553, + "training_step_time": 0.165130615234375 + }, + { + "epoch": 1.00006103515625e-05, + "model_forward_time": 0.02457904815673828, + "step": 6554 + }, + { + "epoch": 1.00006103515625e-05, + "step": 6554, + "training_step_time": 0.17719817161560059 + }, + { + "epoch": 1.000213623046875e-05, + "model_forward_time": 0.024677753448486328, + "step": 6555 + }, + { + "epoch": 1.000213623046875e-05, + "step": 6555, + "training_step_time": 0.22609853744506836 + }, + { + "epoch": 1.0003662109375e-05, + "model_forward_time": 0.025181293487548828, + "step": 6556 + }, + { + "epoch": 1.0003662109375e-05, + "step": 6556, + "training_step_time": 0.16385102272033691 + }, + { + "epoch": 1.000518798828125e-05, + "model_forward_time": 0.02461409568786621, + "step": 6557 + }, + { + "epoch": 1.000518798828125e-05, + "step": 6557, + "training_step_time": 0.20233583450317383 + }, + { + "epoch": 1.00067138671875e-05, + "model_forward_time": 0.025184154510498047, + "step": 6558 + }, + { + "epoch": 1.00067138671875e-05, + "step": 6558, + "training_step_time": 0.14763879776000977 + }, + { + "epoch": 1.000823974609375e-05, + "model_forward_time": 0.024624347686767578, + "step": 6559 + }, + { + "epoch": 1.000823974609375e-05, + "step": 6559, + "training_step_time": 0.10206198692321777 + }, + { + "epoch": 1.0009765625e-05, + "grad_norm": 0.5837518572807312, + "learning_rate": 9.242186459006845e-05, + "loss": 0.0912, + "step": 6560 + }, + { + "epoch": 1.0009765625e-05, + "model_forward_time": 0.024824857711791992, + "step": 6560 + }, + { + "epoch": 1.0009765625e-05, + "step": 6560, + "training_step_time": 0.11862015724182129 + }, + { + "epoch": 1.001129150390625e-05, + "model_forward_time": 0.02503180503845215, + "step": 6561 + }, + { + "epoch": 1.001129150390625e-05, + "step": 6561, + "training_step_time": 0.11403226852416992 + }, + { + "epoch": 1.00128173828125e-05, + "model_forward_time": 0.025346994400024414, + "step": 6562 + }, + { + "epoch": 1.00128173828125e-05, + "step": 6562, + "training_step_time": 0.19987177848815918 + }, + { + "epoch": 1.001434326171875e-05, + "model_forward_time": 0.024656295776367188, + "step": 6563 + }, + { + "epoch": 1.001434326171875e-05, + "step": 6563, + "training_step_time": 0.10623526573181152 + }, + { + "epoch": 1.0015869140625e-05, + "model_forward_time": 0.024497270584106445, + "step": 6564 + }, + { + "epoch": 1.0015869140625e-05, + "step": 6564, + "training_step_time": 0.10821032524108887 + }, + { + "epoch": 1.001739501953125e-05, + "model_forward_time": 0.025483131408691406, + "step": 6565 + }, + { + "epoch": 1.001739501953125e-05, + "step": 6565, + "training_step_time": 0.1135711669921875 + }, + { + "epoch": 1.00189208984375e-05, + "model_forward_time": 0.025506019592285156, + "step": 6566 + }, + { + "epoch": 1.00189208984375e-05, + "step": 6566, + "training_step_time": 0.10773110389709473 + }, + { + "epoch": 1.002044677734375e-05, + "model_forward_time": 0.025345802307128906, + "step": 6567 + }, + { + "epoch": 1.002044677734375e-05, + "step": 6567, + "training_step_time": 0.10838103294372559 + }, + { + "epoch": 1.002197265625e-05, + "model_forward_time": 0.025368452072143555, + "step": 6568 + }, + { + "epoch": 1.002197265625e-05, + "step": 6568, + "training_step_time": 0.11011695861816406 + }, + { + "epoch": 1.002349853515625e-05, + "model_forward_time": 0.02463841438293457, + "step": 6569 + }, + { + "epoch": 1.002349853515625e-05, + "step": 6569, + "training_step_time": 0.11744475364685059 + }, + { + "epoch": 1.00250244140625e-05, + "grad_norm": 0.6058138608932495, + "learning_rate": 9.239266632888659e-05, + "loss": 0.0873, + "step": 6570 + }, + { + "epoch": 1.00250244140625e-05, + "model_forward_time": 0.02502894401550293, + "step": 6570 + }, + { + "epoch": 1.00250244140625e-05, + "step": 6570, + "training_step_time": 0.11164259910583496 + }, + { + "epoch": 1.002655029296875e-05, + "model_forward_time": 0.025824308395385742, + "step": 6571 + }, + { + "epoch": 1.002655029296875e-05, + "step": 6571, + "training_step_time": 0.10733318328857422 + }, + { + "epoch": 1.0028076171875e-05, + "model_forward_time": 0.025097370147705078, + "step": 6572 + }, + { + "epoch": 1.0028076171875e-05, + "step": 6572, + "training_step_time": 0.1066129207611084 + }, + { + "epoch": 1.002960205078125e-05, + "model_forward_time": 0.02521491050720215, + "step": 6573 + }, + { + "epoch": 1.002960205078125e-05, + "step": 6573, + "training_step_time": 0.16582345962524414 + }, + { + "epoch": 1.00311279296875e-05, + "model_forward_time": 0.0251004695892334, + "step": 6574 + }, + { + "epoch": 1.00311279296875e-05, + "step": 6574, + "training_step_time": 0.11115622520446777 + }, + { + "epoch": 1.003265380859375e-05, + "model_forward_time": 0.02578258514404297, + "step": 6575 + }, + { + "epoch": 1.003265380859375e-05, + "step": 6575, + "training_step_time": 0.1152796745300293 + }, + { + "epoch": 1.00341796875e-05, + "model_forward_time": 0.02480173110961914, + "step": 6576 + }, + { + "epoch": 1.00341796875e-05, + "step": 6576, + "training_step_time": 0.21740508079528809 + }, + { + "epoch": 1.003570556640625e-05, + "model_forward_time": 0.024617910385131836, + "step": 6577 + }, + { + "epoch": 1.003570556640625e-05, + "step": 6577, + "training_step_time": 0.2051553726196289 + }, + { + "epoch": 1.00372314453125e-05, + "model_forward_time": 0.024477720260620117, + "step": 6578 + }, + { + "epoch": 1.00372314453125e-05, + "step": 6578, + "training_step_time": 0.11720871925354004 + }, + { + "epoch": 1.003875732421875e-05, + "model_forward_time": 0.02523183822631836, + "step": 6579 + }, + { + "epoch": 1.003875732421875e-05, + "step": 6579, + "training_step_time": 0.12525200843811035 + }, + { + "epoch": 1.0040283203125e-05, + "grad_norm": 0.6062305569648743, + "learning_rate": 9.236341655661778e-05, + "loss": 0.0628, + "step": 6580 + }, + { + "epoch": 1.0040283203125e-05, + "model_forward_time": 0.02523660659790039, + "step": 6580 + }, + { + "epoch": 1.0040283203125e-05, + "step": 6580, + "training_step_time": 0.11180639266967773 + }, + { + "epoch": 1.004180908203125e-05, + "model_forward_time": 0.025545120239257812, + "step": 6581 + }, + { + "epoch": 1.004180908203125e-05, + "step": 6581, + "training_step_time": 0.1099853515625 + }, + { + "epoch": 1.00433349609375e-05, + "model_forward_time": 0.02547430992126465, + "step": 6582 + }, + { + "epoch": 1.00433349609375e-05, + "step": 6582, + "training_step_time": 0.11046457290649414 + }, + { + "epoch": 1.004486083984375e-05, + "model_forward_time": 0.02477717399597168, + "step": 6583 + }, + { + "epoch": 1.004486083984375e-05, + "step": 6583, + "training_step_time": 0.1072385311126709 + }, + { + "epoch": 1.004638671875e-05, + "model_forward_time": 0.025068283081054688, + "step": 6584 + }, + { + "epoch": 1.004638671875e-05, + "step": 6584, + "training_step_time": 0.10965681076049805 + }, + { + "epoch": 1.004791259765625e-05, + "model_forward_time": 0.02513599395751953, + "step": 6585 + }, + { + "epoch": 1.004791259765625e-05, + "step": 6585, + "training_step_time": 0.10981392860412598 + }, + { + "epoch": 1.00494384765625e-05, + "model_forward_time": 0.0253293514251709, + "step": 6586 + }, + { + "epoch": 1.00494384765625e-05, + "step": 6586, + "training_step_time": 0.10671186447143555 + }, + { + "epoch": 1.005096435546875e-05, + "model_forward_time": 0.025168180465698242, + "step": 6587 + }, + { + "epoch": 1.005096435546875e-05, + "step": 6587, + "training_step_time": 0.11273598670959473 + }, + { + "epoch": 1.0052490234375e-05, + "model_forward_time": 0.025272846221923828, + "step": 6588 + }, + { + "epoch": 1.0052490234375e-05, + "step": 6588, + "training_step_time": 0.10802078247070312 + }, + { + "epoch": 1.005401611328125e-05, + "model_forward_time": 0.02510356903076172, + "step": 6589 + }, + { + "epoch": 1.005401611328125e-05, + "step": 6589, + "training_step_time": 0.11127281188964844 + }, + { + "epoch": 1.00555419921875e-05, + "grad_norm": 0.5910700559616089, + "learning_rate": 9.233411530880326e-05, + "loss": 0.0951, + "step": 6590 + }, + { + "epoch": 1.00555419921875e-05, + "model_forward_time": 0.0242159366607666, + "step": 6590 + }, + { + "epoch": 1.00555419921875e-05, + "step": 6590, + "training_step_time": 0.10978293418884277 + }, + { + "epoch": 1.005706787109375e-05, + "model_forward_time": 0.025275468826293945, + "step": 6591 + }, + { + "epoch": 1.005706787109375e-05, + "step": 6591, + "training_step_time": 0.10733270645141602 + }, + { + "epoch": 1.005859375e-05, + "model_forward_time": 0.025221586227416992, + "step": 6592 + }, + { + "epoch": 1.005859375e-05, + "step": 6592, + "training_step_time": 0.10641694068908691 + }, + { + "epoch": 1.006011962890625e-05, + "model_forward_time": 0.025393247604370117, + "step": 6593 + }, + { + "epoch": 1.006011962890625e-05, + "step": 6593, + "training_step_time": 0.10719633102416992 + }, + { + "epoch": 1.00616455078125e-05, + "model_forward_time": 0.025210142135620117, + "step": 6594 + }, + { + "epoch": 1.00616455078125e-05, + "step": 6594, + "training_step_time": 0.10864496231079102 + }, + { + "epoch": 1.006317138671875e-05, + "model_forward_time": 0.025278568267822266, + "step": 6595 + }, + { + "epoch": 1.006317138671875e-05, + "step": 6595, + "training_step_time": 0.1097726821899414 + }, + { + "epoch": 1.0064697265625e-05, + "model_forward_time": 0.02552509307861328, + "step": 6596 + }, + { + "epoch": 1.0064697265625e-05, + "step": 6596, + "training_step_time": 0.11342620849609375 + }, + { + "epoch": 1.006622314453125e-05, + "model_forward_time": 0.024968624114990234, + "step": 6597 + }, + { + "epoch": 1.006622314453125e-05, + "step": 6597, + "training_step_time": 0.10930562019348145 + }, + { + "epoch": 1.00677490234375e-05, + "model_forward_time": 0.027837753295898438, + "step": 6598 + }, + { + "epoch": 1.00677490234375e-05, + "step": 6598, + "training_step_time": 0.16094255447387695 + }, + { + "epoch": 1.006927490234375e-05, + "model_forward_time": 0.024677753448486328, + "step": 6599 + }, + { + "epoch": 1.006927490234375e-05, + "step": 6599, + "training_step_time": 0.1149604320526123 + }, + { + "epoch": 1.007080078125e-05, + "grad_norm": 0.3668150007724762, + "learning_rate": 9.230476262104677e-05, + "loss": 0.0887, + "step": 6600 + }, + { + "epoch": 1.007080078125e-05, + "model_forward_time": 0.0246431827545166, + "step": 6600 + }, + { + "epoch": 1.007080078125e-05, + "step": 6600, + "training_step_time": 0.12270092964172363 + }, + { + "epoch": 1.007232666015625e-05, + "model_forward_time": 0.025225400924682617, + "step": 6601 + }, + { + "epoch": 1.007232666015625e-05, + "step": 6601, + "training_step_time": 0.15770483016967773 + }, + { + "epoch": 1.00738525390625e-05, + "model_forward_time": 0.02454209327697754, + "step": 6602 + }, + { + "epoch": 1.00738525390625e-05, + "step": 6602, + "training_step_time": 0.17799139022827148 + }, + { + "epoch": 1.007537841796875e-05, + "model_forward_time": 0.024739503860473633, + "step": 6603 + }, + { + "epoch": 1.007537841796875e-05, + "step": 6603, + "training_step_time": 0.156998872756958 + }, + { + "epoch": 1.0076904296875e-05, + "model_forward_time": 0.024178266525268555, + "step": 6604 + }, + { + "epoch": 1.0076904296875e-05, + "step": 6604, + "training_step_time": 0.20427346229553223 + }, + { + "epoch": 1.007843017578125e-05, + "model_forward_time": 0.02445840835571289, + "step": 6605 + }, + { + "epoch": 1.007843017578125e-05, + "step": 6605, + "training_step_time": 0.11992216110229492 + }, + { + "epoch": 1.00799560546875e-05, + "model_forward_time": 0.02454543113708496, + "step": 6606 + }, + { + "epoch": 1.00799560546875e-05, + "step": 6606, + "training_step_time": 0.10874819755554199 + }, + { + "epoch": 1.008148193359375e-05, + "model_forward_time": 0.02534937858581543, + "step": 6607 + }, + { + "epoch": 1.008148193359375e-05, + "step": 6607, + "training_step_time": 0.19548892974853516 + }, + { + "epoch": 1.00830078125e-05, + "model_forward_time": 0.024621009826660156, + "step": 6608 + }, + { + "epoch": 1.00830078125e-05, + "step": 6608, + "training_step_time": 0.10390520095825195 + }, + { + "epoch": 1.008453369140625e-05, + "model_forward_time": 0.02482748031616211, + "step": 6609 + }, + { + "epoch": 1.008453369140625e-05, + "step": 6609, + "training_step_time": 0.11417055130004883 + }, + { + "epoch": 1.00860595703125e-05, + "grad_norm": 0.33050212264060974, + "learning_rate": 9.227535852901463e-05, + "loss": 0.0624, + "step": 6610 + }, + { + "epoch": 1.00860595703125e-05, + "model_forward_time": 0.0255126953125, + "step": 6610 + }, + { + "epoch": 1.00860595703125e-05, + "step": 6610, + "training_step_time": 0.10654592514038086 + }, + { + "epoch": 1.008758544921875e-05, + "model_forward_time": 0.025337696075439453, + "step": 6611 + }, + { + "epoch": 1.008758544921875e-05, + "step": 6611, + "training_step_time": 0.10657715797424316 + }, + { + "epoch": 1.0089111328125e-05, + "model_forward_time": 0.025542020797729492, + "step": 6612 + }, + { + "epoch": 1.0089111328125e-05, + "step": 6612, + "training_step_time": 0.10787487030029297 + }, + { + "epoch": 1.009063720703125e-05, + "model_forward_time": 0.026583194732666016, + "step": 6613 + }, + { + "epoch": 1.009063720703125e-05, + "step": 6613, + "training_step_time": 0.10704326629638672 + }, + { + "epoch": 1.00921630859375e-05, + "model_forward_time": 0.025339603424072266, + "step": 6614 + }, + { + "epoch": 1.00921630859375e-05, + "step": 6614, + "training_step_time": 0.1078641414642334 + }, + { + "epoch": 1.009368896484375e-05, + "model_forward_time": 0.025077104568481445, + "step": 6615 + }, + { + "epoch": 1.009368896484375e-05, + "step": 6615, + "training_step_time": 0.10654044151306152 + }, + { + "epoch": 1.009521484375e-05, + "model_forward_time": 0.025461435317993164, + "step": 6616 + }, + { + "epoch": 1.009521484375e-05, + "step": 6616, + "training_step_time": 0.10643291473388672 + }, + { + "epoch": 1.009674072265625e-05, + "model_forward_time": 0.026859760284423828, + "step": 6617 + }, + { + "epoch": 1.009674072265625e-05, + "step": 6617, + "training_step_time": 0.11926865577697754 + }, + { + "epoch": 1.00982666015625e-05, + "model_forward_time": 0.02557849884033203, + "step": 6618 + }, + { + "epoch": 1.00982666015625e-05, + "step": 6618, + "training_step_time": 0.10747122764587402 + }, + { + "epoch": 1.009979248046875e-05, + "model_forward_time": 0.02520585060119629, + "step": 6619 + }, + { + "epoch": 1.009979248046875e-05, + "step": 6619, + "training_step_time": 0.18399500846862793 + }, + { + "epoch": 1.0101318359375e-05, + "grad_norm": 0.31240516901016235, + "learning_rate": 9.224590306843558e-05, + "loss": 0.0566, + "step": 6620 + }, + { + "epoch": 1.0101318359375e-05, + "model_forward_time": 0.024890899658203125, + "step": 6620 + }, + { + "epoch": 1.0101318359375e-05, + "step": 6620, + "training_step_time": 0.2280712127685547 + }, + { + "epoch": 1.010284423828125e-05, + "model_forward_time": 0.024544239044189453, + "step": 6621 + }, + { + "epoch": 1.010284423828125e-05, + "step": 6621, + "training_step_time": 0.2417900562286377 + }, + { + "epoch": 1.01043701171875e-05, + "model_forward_time": 0.024407386779785156, + "step": 6622 + }, + { + "epoch": 1.01043701171875e-05, + "step": 6622, + "training_step_time": 0.23080086708068848 + }, + { + "epoch": 1.010589599609375e-05, + "model_forward_time": 0.024460315704345703, + "step": 6623 + }, + { + "epoch": 1.010589599609375e-05, + "step": 6623, + "training_step_time": 0.22805118560791016 + }, + { + "epoch": 1.0107421875e-05, + "model_forward_time": 0.024762868881225586, + "step": 6624 + }, + { + "epoch": 1.0107421875e-05, + "step": 6624, + "training_step_time": 0.17116379737854004 + }, + { + "epoch": 1.010894775390625e-05, + "model_forward_time": 0.024512290954589844, + "step": 6625 + }, + { + "epoch": 1.010894775390625e-05, + "step": 6625, + "training_step_time": 0.15546727180480957 + }, + { + "epoch": 1.01104736328125e-05, + "model_forward_time": 0.025127410888671875, + "step": 6626 + }, + { + "epoch": 1.01104736328125e-05, + "step": 6626, + "training_step_time": 0.10763359069824219 + }, + { + "epoch": 1.011199951171875e-05, + "model_forward_time": 0.02610635757446289, + "step": 6627 + }, + { + "epoch": 1.011199951171875e-05, + "step": 6627, + "training_step_time": 0.10792875289916992 + }, + { + "epoch": 1.0113525390625e-05, + "model_forward_time": 0.025419950485229492, + "step": 6628 + }, + { + "epoch": 1.0113525390625e-05, + "step": 6628, + "training_step_time": 0.11095404624938965 + }, + { + "epoch": 1.011505126953125e-05, + "model_forward_time": 0.024516582489013672, + "step": 6629 + }, + { + "epoch": 1.011505126953125e-05, + "step": 6629, + "training_step_time": 0.10919022560119629 + }, + { + "epoch": 1.01165771484375e-05, + "grad_norm": 0.29019224643707275, + "learning_rate": 9.221639627510076e-05, + "loss": 0.0638, + "step": 6630 + }, + { + "epoch": 1.01165771484375e-05, + "model_forward_time": 0.025113344192504883, + "step": 6630 + }, + { + "epoch": 1.01165771484375e-05, + "step": 6630, + "training_step_time": 0.10723304748535156 + }, + { + "epoch": 1.011810302734375e-05, + "model_forward_time": 0.025109529495239258, + "step": 6631 + }, + { + "epoch": 1.011810302734375e-05, + "step": 6631, + "training_step_time": 0.11158347129821777 + }, + { + "epoch": 1.011962890625e-05, + "model_forward_time": 0.02535247802734375, + "step": 6632 + }, + { + "epoch": 1.011962890625e-05, + "step": 6632, + "training_step_time": 0.11148571968078613 + }, + { + "epoch": 1.012115478515625e-05, + "model_forward_time": 0.025548219680786133, + "step": 6633 + }, + { + "epoch": 1.012115478515625e-05, + "step": 6633, + "training_step_time": 0.11294007301330566 + }, + { + "epoch": 1.01226806640625e-05, + "model_forward_time": 0.026001453399658203, + "step": 6634 + }, + { + "epoch": 1.01226806640625e-05, + "step": 6634, + "training_step_time": 0.1767728328704834 + }, + { + "epoch": 1.012420654296875e-05, + "model_forward_time": 0.02497553825378418, + "step": 6635 + }, + { + "epoch": 1.012420654296875e-05, + "step": 6635, + "training_step_time": 0.19539332389831543 + }, + { + "epoch": 1.0125732421875e-05, + "model_forward_time": 0.024483203887939453, + "step": 6636 + }, + { + "epoch": 1.0125732421875e-05, + "step": 6636, + "training_step_time": 0.1976184844970703 + }, + { + "epoch": 1.012725830078125e-05, + "model_forward_time": 0.02395033836364746, + "step": 6637 + }, + { + "epoch": 1.012725830078125e-05, + "step": 6637, + "training_step_time": 0.17644810676574707 + }, + { + "epoch": 1.01287841796875e-05, + "model_forward_time": 0.024905920028686523, + "step": 6638 + }, + { + "epoch": 1.01287841796875e-05, + "step": 6638, + "training_step_time": 0.17102837562561035 + }, + { + "epoch": 1.013031005859375e-05, + "model_forward_time": 0.024414539337158203, + "step": 6639 + }, + { + "epoch": 1.013031005859375e-05, + "step": 6639, + "training_step_time": 0.17267775535583496 + }, + { + "epoch": 1.01318359375e-05, + "grad_norm": 0.3535989224910736, + "learning_rate": 9.218683818486372e-05, + "loss": 0.0565, + "step": 6640 + }, + { + "epoch": 1.01318359375e-05, + "model_forward_time": 0.024838924407958984, + "step": 6640 + }, + { + "epoch": 1.01318359375e-05, + "step": 6640, + "training_step_time": 0.1535487174987793 + }, + { + "epoch": 1.013336181640625e-05, + "model_forward_time": 0.02460026741027832, + "step": 6641 + }, + { + "epoch": 1.013336181640625e-05, + "step": 6641, + "training_step_time": 0.17336249351501465 + }, + { + "epoch": 1.01348876953125e-05, + "model_forward_time": 0.02479839324951172, + "step": 6642 + }, + { + "epoch": 1.01348876953125e-05, + "step": 6642, + "training_step_time": 0.17738080024719238 + }, + { + "epoch": 1.013641357421875e-05, + "model_forward_time": 0.02465653419494629, + "step": 6643 + }, + { + "epoch": 1.013641357421875e-05, + "step": 6643, + "training_step_time": 0.10442662239074707 + }, + { + "epoch": 1.0137939453125e-05, + "model_forward_time": 0.02462911605834961, + "step": 6644 + }, + { + "epoch": 1.0137939453125e-05, + "step": 6644, + "training_step_time": 0.11913657188415527 + }, + { + "epoch": 1.013946533203125e-05, + "model_forward_time": 0.025424957275390625, + "step": 6645 + }, + { + "epoch": 1.013946533203125e-05, + "step": 6645, + "training_step_time": 0.11937212944030762 + }, + { + "epoch": 1.01409912109375e-05, + "model_forward_time": 0.02550530433654785, + "step": 6646 + }, + { + "epoch": 1.01409912109375e-05, + "step": 6646, + "training_step_time": 0.10855770111083984 + }, + { + "epoch": 1.014251708984375e-05, + "model_forward_time": 0.0254666805267334, + "step": 6647 + }, + { + "epoch": 1.014251708984375e-05, + "step": 6647, + "training_step_time": 0.2111661434173584 + }, + { + "epoch": 1.014404296875e-05, + "model_forward_time": 0.025254487991333008, + "step": 6648 + }, + { + "epoch": 1.014404296875e-05, + "step": 6648, + "training_step_time": 0.10758090019226074 + }, + { + "epoch": 1.014556884765625e-05, + "model_forward_time": 0.02412271499633789, + "step": 6649 + }, + { + "epoch": 1.014556884765625e-05, + "step": 6649, + "training_step_time": 0.10734963417053223 + }, + { + "epoch": 1.01470947265625e-05, + "grad_norm": 0.3252510130405426, + "learning_rate": 9.215722883364033e-05, + "loss": 0.0866, + "step": 6650 + }, + { + "epoch": 1.01470947265625e-05, + "model_forward_time": 0.02480626106262207, + "step": 6650 + }, + { + "epoch": 1.01470947265625e-05, + "step": 6650, + "training_step_time": 0.1477358341217041 + }, + { + "epoch": 1.014862060546875e-05, + "model_forward_time": 0.025113821029663086, + "step": 6651 + }, + { + "epoch": 1.014862060546875e-05, + "step": 6651, + "training_step_time": 0.14652562141418457 + }, + { + "epoch": 1.0150146484375e-05, + "model_forward_time": 0.024847984313964844, + "step": 6652 + }, + { + "epoch": 1.0150146484375e-05, + "step": 6652, + "training_step_time": 0.14195489883422852 + }, + { + "epoch": 1.015167236328125e-05, + "model_forward_time": 0.024892807006835938, + "step": 6653 + }, + { + "epoch": 1.015167236328125e-05, + "step": 6653, + "training_step_time": 0.12610769271850586 + }, + { + "epoch": 1.01531982421875e-05, + "model_forward_time": 0.02494668960571289, + "step": 6654 + }, + { + "epoch": 1.01531982421875e-05, + "step": 6654, + "training_step_time": 0.11930966377258301 + }, + { + "epoch": 1.015472412109375e-05, + "model_forward_time": 0.027321577072143555, + "step": 6655 + }, + { + "epoch": 1.015472412109375e-05, + "step": 6655, + "training_step_time": 0.12143421173095703 + }, + { + "epoch": 1.015625e-05, + "model_forward_time": 0.02546215057373047, + "step": 6656 + }, + { + "epoch": 1.015625e-05, + "step": 6656, + "training_step_time": 0.11566615104675293 + }, + { + "epoch": 1.015777587890625e-05, + "model_forward_time": 0.0250399112701416, + "step": 6657 + }, + { + "epoch": 1.015777587890625e-05, + "step": 6657, + "training_step_time": 0.17594289779663086 + }, + { + "epoch": 1.01593017578125e-05, + "model_forward_time": 0.025894641876220703, + "step": 6658 + }, + { + "epoch": 1.01593017578125e-05, + "step": 6658, + "training_step_time": 0.10976433753967285 + }, + { + "epoch": 1.016082763671875e-05, + "model_forward_time": 0.024956703186035156, + "step": 6659 + }, + { + "epoch": 1.016082763671875e-05, + "step": 6659, + "training_step_time": 0.21849536895751953 + }, + { + "epoch": 1.0162353515625e-05, + "grad_norm": 0.2549554109573364, + "learning_rate": 9.212756825740873e-05, + "loss": 0.0588, + "step": 6660 + }, + { + "epoch": 1.0162353515625e-05, + "model_forward_time": 0.024979829788208008, + "step": 6660 + }, + { + "epoch": 1.0162353515625e-05, + "step": 6660, + "training_step_time": 0.11991143226623535 + }, + { + "epoch": 1.016387939453125e-05, + "model_forward_time": 0.025058984756469727, + "step": 6661 + }, + { + "epoch": 1.016387939453125e-05, + "step": 6661, + "training_step_time": 0.15906882286071777 + }, + { + "epoch": 1.01654052734375e-05, + "model_forward_time": 0.025599002838134766, + "step": 6662 + }, + { + "epoch": 1.01654052734375e-05, + "step": 6662, + "training_step_time": 0.17911100387573242 + }, + { + "epoch": 1.016693115234375e-05, + "model_forward_time": 0.02510523796081543, + "step": 6663 + }, + { + "epoch": 1.016693115234375e-05, + "step": 6663, + "training_step_time": 0.10828399658203125 + }, + { + "epoch": 1.016845703125e-05, + "model_forward_time": 0.02490830421447754, + "step": 6664 + }, + { + "epoch": 1.016845703125e-05, + "step": 6664, + "training_step_time": 0.10892319679260254 + }, + { + "epoch": 1.016998291015625e-05, + "model_forward_time": 0.025702476501464844, + "step": 6665 + }, + { + "epoch": 1.016998291015625e-05, + "step": 6665, + "training_step_time": 0.10903215408325195 + }, + { + "epoch": 1.01715087890625e-05, + "model_forward_time": 0.025539636611938477, + "step": 6666 + }, + { + "epoch": 1.01715087890625e-05, + "step": 6666, + "training_step_time": 0.10861897468566895 + }, + { + "epoch": 1.017303466796875e-05, + "model_forward_time": 0.025807857513427734, + "step": 6667 + }, + { + "epoch": 1.017303466796875e-05, + "step": 6667, + "training_step_time": 0.11297774314880371 + }, + { + "epoch": 1.0174560546875e-05, + "model_forward_time": 0.0253751277923584, + "step": 6668 + }, + { + "epoch": 1.0174560546875e-05, + "step": 6668, + "training_step_time": 0.11098408699035645 + }, + { + "epoch": 1.017608642578125e-05, + "model_forward_time": 0.025780677795410156, + "step": 6669 + }, + { + "epoch": 1.017608642578125e-05, + "step": 6669, + "training_step_time": 0.10950779914855957 + }, + { + "epoch": 1.01776123046875e-05, + "grad_norm": 0.43726256489753723, + "learning_rate": 9.209785649220935e-05, + "loss": 0.0595, + "step": 6670 + }, + { + "epoch": 1.01776123046875e-05, + "model_forward_time": 0.025224685668945312, + "step": 6670 + }, + { + "epoch": 1.01776123046875e-05, + "step": 6670, + "training_step_time": 0.11468982696533203 + }, + { + "epoch": 1.017913818359375e-05, + "model_forward_time": 0.02541661262512207, + "step": 6671 + }, + { + "epoch": 1.017913818359375e-05, + "step": 6671, + "training_step_time": 0.11031198501586914 + }, + { + "epoch": 1.01806640625e-05, + "model_forward_time": 0.025704383850097656, + "step": 6672 + }, + { + "epoch": 1.01806640625e-05, + "step": 6672, + "training_step_time": 0.11162042617797852 + }, + { + "epoch": 1.018218994140625e-05, + "model_forward_time": 0.025445938110351562, + "step": 6673 + }, + { + "epoch": 1.018218994140625e-05, + "step": 6673, + "training_step_time": 0.11043286323547363 + }, + { + "epoch": 1.01837158203125e-05, + "model_forward_time": 0.025633573532104492, + "step": 6674 + }, + { + "epoch": 1.01837158203125e-05, + "step": 6674, + "training_step_time": 0.1081540584564209 + }, + { + "epoch": 1.018524169921875e-05, + "model_forward_time": 0.025513410568237305, + "step": 6675 + }, + { + "epoch": 1.018524169921875e-05, + "step": 6675, + "training_step_time": 0.10905671119689941 + }, + { + "epoch": 1.0186767578125e-05, + "model_forward_time": 0.025923490524291992, + "step": 6676 + }, + { + "epoch": 1.0186767578125e-05, + "step": 6676, + "training_step_time": 0.1147162914276123 + }, + { + "epoch": 1.018829345703125e-05, + "model_forward_time": 0.027506113052368164, + "step": 6677 + }, + { + "epoch": 1.018829345703125e-05, + "step": 6677, + "training_step_time": 0.11569929122924805 + }, + { + "epoch": 1.01898193359375e-05, + "model_forward_time": 0.02564239501953125, + "step": 6678 + }, + { + "epoch": 1.01898193359375e-05, + "step": 6678, + "training_step_time": 0.10983085632324219 + }, + { + "epoch": 1.019134521484375e-05, + "model_forward_time": 0.025661468505859375, + "step": 6679 + }, + { + "epoch": 1.019134521484375e-05, + "step": 6679, + "training_step_time": 0.10880470275878906 + }, + { + "epoch": 1.019287109375e-05, + "grad_norm": 0.593154788017273, + "learning_rate": 9.206809357414474e-05, + "loss": 0.0824, + "step": 6680 + }, + { + "epoch": 1.019287109375e-05, + "model_forward_time": 0.025086402893066406, + "step": 6680 + }, + { + "epoch": 1.019287109375e-05, + "step": 6680, + "training_step_time": 0.10748863220214844 + }, + { + "epoch": 1.019439697265625e-05, + "model_forward_time": 0.025897502899169922, + "step": 6681 + }, + { + "epoch": 1.019439697265625e-05, + "step": 6681, + "training_step_time": 0.18822598457336426 + }, + { + "epoch": 1.01959228515625e-05, + "model_forward_time": 0.02459859848022461, + "step": 6682 + }, + { + "epoch": 1.01959228515625e-05, + "step": 6682, + "training_step_time": 0.11741161346435547 + }, + { + "epoch": 1.019744873046875e-05, + "model_forward_time": 0.02447962760925293, + "step": 6683 + }, + { + "epoch": 1.019744873046875e-05, + "step": 6683, + "training_step_time": 0.12725615501403809 + }, + { + "epoch": 1.0198974609375e-05, + "model_forward_time": 0.025165796279907227, + "step": 6684 + }, + { + "epoch": 1.0198974609375e-05, + "step": 6684, + "training_step_time": 0.16658878326416016 + }, + { + "epoch": 1.020050048828125e-05, + "model_forward_time": 0.024655818939208984, + "step": 6685 + }, + { + "epoch": 1.020050048828125e-05, + "step": 6685, + "training_step_time": 0.21158623695373535 + }, + { + "epoch": 1.02020263671875e-05, + "model_forward_time": 0.02469038963317871, + "step": 6686 + }, + { + "epoch": 1.02020263671875e-05, + "step": 6686, + "training_step_time": 0.1145026683807373 + }, + { + "epoch": 1.020355224609375e-05, + "model_forward_time": 0.024611949920654297, + "step": 6687 + }, + { + "epoch": 1.020355224609375e-05, + "step": 6687, + "training_step_time": 0.11061739921569824 + }, + { + "epoch": 1.0205078125e-05, + "model_forward_time": 0.025577783584594727, + "step": 6688 + }, + { + "epoch": 1.0205078125e-05, + "step": 6688, + "training_step_time": 0.11490631103515625 + }, + { + "epoch": 1.020660400390625e-05, + "model_forward_time": 0.025063276290893555, + "step": 6689 + }, + { + "epoch": 1.020660400390625e-05, + "step": 6689, + "training_step_time": 0.11049604415893555 + }, + { + "epoch": 1.02081298828125e-05, + "grad_norm": 0.43632248044013977, + "learning_rate": 9.20382795393797e-05, + "loss": 0.0782, + "step": 6690 + }, + { + "epoch": 1.02081298828125e-05, + "model_forward_time": 0.025624513626098633, + "step": 6690 + }, + { + "epoch": 1.02081298828125e-05, + "step": 6690, + "training_step_time": 0.15600085258483887 + }, + { + "epoch": 1.020965576171875e-05, + "model_forward_time": 0.027201414108276367, + "step": 6691 + }, + { + "epoch": 1.020965576171875e-05, + "step": 6691, + "training_step_time": 0.1472001075744629 + }, + { + "epoch": 1.0211181640625e-05, + "model_forward_time": 0.025005817413330078, + "step": 6692 + }, + { + "epoch": 1.0211181640625e-05, + "step": 6692, + "training_step_time": 0.13500022888183594 + }, + { + "epoch": 1.021270751953125e-05, + "model_forward_time": 0.024970054626464844, + "step": 6693 + }, + { + "epoch": 1.021270751953125e-05, + "step": 6693, + "training_step_time": 0.18063092231750488 + }, + { + "epoch": 1.02142333984375e-05, + "model_forward_time": 0.02487635612487793, + "step": 6694 + }, + { + "epoch": 1.02142333984375e-05, + "step": 6694, + "training_step_time": 0.18192648887634277 + }, + { + "epoch": 1.021575927734375e-05, + "model_forward_time": 0.024602890014648438, + "step": 6695 + }, + { + "epoch": 1.021575927734375e-05, + "step": 6695, + "training_step_time": 0.17698454856872559 + }, + { + "epoch": 1.021728515625e-05, + "model_forward_time": 0.024635791778564453, + "step": 6696 + }, + { + "epoch": 1.021728515625e-05, + "step": 6696, + "training_step_time": 0.17298436164855957 + }, + { + "epoch": 1.021881103515625e-05, + "model_forward_time": 0.02493762969970703, + "step": 6697 + }, + { + "epoch": 1.021881103515625e-05, + "step": 6697, + "training_step_time": 0.15532946586608887 + }, + { + "epoch": 1.02203369140625e-05, + "model_forward_time": 0.024669647216796875, + "step": 6698 + }, + { + "epoch": 1.02203369140625e-05, + "step": 6698, + "training_step_time": 0.13764238357543945 + }, + { + "epoch": 1.022186279296875e-05, + "model_forward_time": 0.024527311325073242, + "step": 6699 + }, + { + "epoch": 1.022186279296875e-05, + "step": 6699, + "training_step_time": 0.13286733627319336 + }, + { + "epoch": 1.0223388671875e-05, + "grad_norm": 0.7008652687072754, + "learning_rate": 9.200841442414106e-05, + "loss": 0.0794, + "step": 6700 + }, + { + "epoch": 1.0223388671875e-05, + "model_forward_time": 0.024979591369628906, + "step": 6700 + }, + { + "epoch": 1.0223388671875e-05, + "step": 6700, + "training_step_time": 0.1236116886138916 + }, + { + "epoch": 1.022491455078125e-05, + "model_forward_time": 0.02466607093811035, + "step": 6701 + }, + { + "epoch": 1.022491455078125e-05, + "step": 6701, + "training_step_time": 0.1890411376953125 + }, + { + "epoch": 1.02264404296875e-05, + "model_forward_time": 0.024779081344604492, + "step": 6702 + }, + { + "epoch": 1.02264404296875e-05, + "step": 6702, + "training_step_time": 0.13500738143920898 + }, + { + "epoch": 1.022796630859375e-05, + "model_forward_time": 0.02444005012512207, + "step": 6703 + }, + { + "epoch": 1.022796630859375e-05, + "step": 6703, + "training_step_time": 0.13224196434020996 + }, + { + "epoch": 1.02294921875e-05, + "model_forward_time": 0.024773120880126953, + "step": 6704 + }, + { + "epoch": 1.02294921875e-05, + "step": 6704, + "training_step_time": 0.17796826362609863 + }, + { + "epoch": 1.023101806640625e-05, + "model_forward_time": 0.02533102035522461, + "step": 6705 + }, + { + "epoch": 1.023101806640625e-05, + "step": 6705, + "training_step_time": 0.14831233024597168 + }, + { + "epoch": 1.02325439453125e-05, + "model_forward_time": 0.025710582733154297, + "step": 6706 + }, + { + "epoch": 1.02325439453125e-05, + "step": 6706, + "training_step_time": 0.10755181312561035 + }, + { + "epoch": 1.023406982421875e-05, + "model_forward_time": 0.025962114334106445, + "step": 6707 + }, + { + "epoch": 1.023406982421875e-05, + "step": 6707, + "training_step_time": 0.11071038246154785 + }, + { + "epoch": 1.0235595703125e-05, + "model_forward_time": 0.025438785552978516, + "step": 6708 + }, + { + "epoch": 1.0235595703125e-05, + "step": 6708, + "training_step_time": 0.1090400218963623 + }, + { + "epoch": 1.023712158203125e-05, + "model_forward_time": 0.02598714828491211, + "step": 6709 + }, + { + "epoch": 1.023712158203125e-05, + "step": 6709, + "training_step_time": 0.1110529899597168 + }, + { + "epoch": 1.02386474609375e-05, + "grad_norm": 0.5296297669410706, + "learning_rate": 9.197849826471774e-05, + "loss": 0.0832, + "step": 6710 + }, + { + "epoch": 1.02386474609375e-05, + "model_forward_time": 0.025888442993164062, + "step": 6710 + }, + { + "epoch": 1.02386474609375e-05, + "step": 6710, + "training_step_time": 0.1121983528137207 + }, + { + "epoch": 1.024017333984375e-05, + "model_forward_time": 0.025605440139770508, + "step": 6711 + }, + { + "epoch": 1.024017333984375e-05, + "step": 6711, + "training_step_time": 0.11295032501220703 + }, + { + "epoch": 1.024169921875e-05, + "model_forward_time": 0.025949716567993164, + "step": 6712 + }, + { + "epoch": 1.024169921875e-05, + "step": 6712, + "training_step_time": 0.10803723335266113 + }, + { + "epoch": 1.024322509765625e-05, + "model_forward_time": 0.02553391456604004, + "step": 6713 + }, + { + "epoch": 1.024322509765625e-05, + "step": 6713, + "training_step_time": 0.10935521125793457 + }, + { + "epoch": 1.02447509765625e-05, + "model_forward_time": 0.02526545524597168, + "step": 6714 + }, + { + "epoch": 1.02447509765625e-05, + "step": 6714, + "training_step_time": 0.11267876625061035 + }, + { + "epoch": 1.024627685546875e-05, + "model_forward_time": 0.029499530792236328, + "step": 6715 + }, + { + "epoch": 1.024627685546875e-05, + "step": 6715, + "training_step_time": 0.11208701133728027 + }, + { + "epoch": 1.0247802734375e-05, + "model_forward_time": 0.025459766387939453, + "step": 6716 + }, + { + "epoch": 1.0247802734375e-05, + "step": 6716, + "training_step_time": 0.10940074920654297 + }, + { + "epoch": 1.024932861328125e-05, + "model_forward_time": 0.02602076530456543, + "step": 6717 + }, + { + "epoch": 1.024932861328125e-05, + "step": 6717, + "training_step_time": 0.11237454414367676 + }, + { + "epoch": 1.02508544921875e-05, + "model_forward_time": 0.024964570999145508, + "step": 6718 + }, + { + "epoch": 1.02508544921875e-05, + "step": 6718, + "training_step_time": 0.11144852638244629 + }, + { + "epoch": 1.025238037109375e-05, + "model_forward_time": 0.026040315628051758, + "step": 6719 + }, + { + "epoch": 1.025238037109375e-05, + "step": 6719, + "training_step_time": 0.11171674728393555 + }, + { + "epoch": 1.025390625e-05, + "grad_norm": 0.36391812562942505, + "learning_rate": 9.194853109746074e-05, + "loss": 0.0844, + "step": 6720 + }, + { + "epoch": 1.025390625e-05, + "model_forward_time": 0.025341272354125977, + "step": 6720 + }, + { + "epoch": 1.025390625e-05, + "step": 6720, + "training_step_time": 0.11127972602844238 + }, + { + "epoch": 1.025543212890625e-05, + "model_forward_time": 0.025410175323486328, + "step": 6721 + }, + { + "epoch": 1.025543212890625e-05, + "step": 6721, + "training_step_time": 0.10713624954223633 + }, + { + "epoch": 1.02569580078125e-05, + "model_forward_time": 0.02526688575744629, + "step": 6722 + }, + { + "epoch": 1.02569580078125e-05, + "step": 6722, + "training_step_time": 0.10897207260131836 + }, + { + "epoch": 1.025848388671875e-05, + "model_forward_time": 0.02539229393005371, + "step": 6723 + }, + { + "epoch": 1.025848388671875e-05, + "step": 6723, + "training_step_time": 0.10737085342407227 + }, + { + "epoch": 1.0260009765625e-05, + "model_forward_time": 0.02553582191467285, + "step": 6724 + }, + { + "epoch": 1.0260009765625e-05, + "step": 6724, + "training_step_time": 0.155792236328125 + }, + { + "epoch": 1.026153564453125e-05, + "model_forward_time": 0.02562546730041504, + "step": 6725 + }, + { + "epoch": 1.026153564453125e-05, + "step": 6725, + "training_step_time": 0.11450815200805664 + }, + { + "epoch": 1.02630615234375e-05, + "model_forward_time": 0.024576187133789062, + "step": 6726 + }, + { + "epoch": 1.02630615234375e-05, + "step": 6726, + "training_step_time": 0.13769054412841797 + }, + { + "epoch": 1.026458740234375e-05, + "model_forward_time": 0.0250091552734375, + "step": 6727 + }, + { + "epoch": 1.026458740234375e-05, + "step": 6727, + "training_step_time": 0.1526026725769043 + }, + { + "epoch": 1.026611328125e-05, + "model_forward_time": 0.025135517120361328, + "step": 6728 + }, + { + "epoch": 1.026611328125e-05, + "step": 6728, + "training_step_time": 0.19364094734191895 + }, + { + "epoch": 1.026763916015625e-05, + "model_forward_time": 0.025769472122192383, + "step": 6729 + }, + { + "epoch": 1.026763916015625e-05, + "step": 6729, + "training_step_time": 0.15213584899902344 + }, + { + "epoch": 1.02691650390625e-05, + "grad_norm": 0.37396296858787537, + "learning_rate": 9.191851295878295e-05, + "loss": 0.0766, + "step": 6730 + }, + { + "epoch": 1.02691650390625e-05, + "model_forward_time": 0.0246121883392334, + "step": 6730 + }, + { + "epoch": 1.02691650390625e-05, + "step": 6730, + "training_step_time": 0.20453286170959473 + }, + { + "epoch": 1.027069091796875e-05, + "model_forward_time": 0.024776458740234375, + "step": 6731 + }, + { + "epoch": 1.027069091796875e-05, + "step": 6731, + "training_step_time": 0.11318707466125488 + }, + { + "epoch": 1.0272216796875e-05, + "model_forward_time": 0.02440643310546875, + "step": 6732 + }, + { + "epoch": 1.0272216796875e-05, + "step": 6732, + "training_step_time": 0.10942816734313965 + }, + { + "epoch": 1.027374267578125e-05, + "model_forward_time": 0.025505542755126953, + "step": 6733 + }, + { + "epoch": 1.027374267578125e-05, + "step": 6733, + "training_step_time": 0.19609928131103516 + }, + { + "epoch": 1.02752685546875e-05, + "model_forward_time": 0.02491903305053711, + "step": 6734 + }, + { + "epoch": 1.02752685546875e-05, + "step": 6734, + "training_step_time": 0.1049954891204834 + }, + { + "epoch": 1.027679443359375e-05, + "model_forward_time": 0.02437138557434082, + "step": 6735 + }, + { + "epoch": 1.027679443359375e-05, + "step": 6735, + "training_step_time": 0.10743236541748047 + }, + { + "epoch": 1.02783203125e-05, + "model_forward_time": 0.02556777000427246, + "step": 6736 + }, + { + "epoch": 1.02783203125e-05, + "step": 6736, + "training_step_time": 0.1072227954864502 + }, + { + "epoch": 1.027984619140625e-05, + "model_forward_time": 0.02550029754638672, + "step": 6737 + }, + { + "epoch": 1.027984619140625e-05, + "step": 6737, + "training_step_time": 0.10777139663696289 + }, + { + "epoch": 1.02813720703125e-05, + "model_forward_time": 0.02562570571899414, + "step": 6738 + }, + { + "epoch": 1.02813720703125e-05, + "step": 6738, + "training_step_time": 0.10840296745300293 + }, + { + "epoch": 1.028289794921875e-05, + "model_forward_time": 0.025302648544311523, + "step": 6739 + }, + { + "epoch": 1.028289794921875e-05, + "step": 6739, + "training_step_time": 0.10816168785095215 + }, + { + "epoch": 1.0284423828125e-05, + "grad_norm": 0.42670416831970215, + "learning_rate": 9.188844388515926e-05, + "loss": 0.0931, + "step": 6740 + }, + { + "epoch": 1.0284423828125e-05, + "model_forward_time": 0.025083065032958984, + "step": 6740 + }, + { + "epoch": 1.0284423828125e-05, + "step": 6740, + "training_step_time": 0.10978221893310547 + }, + { + "epoch": 1.028594970703125e-05, + "model_forward_time": 0.025543212890625, + "step": 6741 + }, + { + "epoch": 1.028594970703125e-05, + "step": 6741, + "training_step_time": 0.10684990882873535 + }, + { + "epoch": 1.02874755859375e-05, + "model_forward_time": 0.025813579559326172, + "step": 6742 + }, + { + "epoch": 1.02874755859375e-05, + "step": 6742, + "training_step_time": 0.10875988006591797 + }, + { + "epoch": 1.028900146484375e-05, + "model_forward_time": 0.025463581085205078, + "step": 6743 + }, + { + "epoch": 1.028900146484375e-05, + "step": 6743, + "training_step_time": 0.1079552173614502 + }, + { + "epoch": 1.029052734375e-05, + "model_forward_time": 0.02527761459350586, + "step": 6744 + }, + { + "epoch": 1.029052734375e-05, + "step": 6744, + "training_step_time": 0.10619711875915527 + }, + { + "epoch": 1.029205322265625e-05, + "model_forward_time": 0.025319814682006836, + "step": 6745 + }, + { + "epoch": 1.029205322265625e-05, + "step": 6745, + "training_step_time": 0.19970440864562988 + }, + { + "epoch": 1.02935791015625e-05, + "model_forward_time": 0.02437901496887207, + "step": 6746 + }, + { + "epoch": 1.02935791015625e-05, + "step": 6746, + "training_step_time": 0.18084716796875 + }, + { + "epoch": 1.029510498046875e-05, + "model_forward_time": 0.02436995506286621, + "step": 6747 + }, + { + "epoch": 1.029510498046875e-05, + "step": 6747, + "training_step_time": 0.16920948028564453 + }, + { + "epoch": 1.0296630859375e-05, + "model_forward_time": 0.025168895721435547, + "step": 6748 + }, + { + "epoch": 1.0296630859375e-05, + "step": 6748, + "training_step_time": 0.17935466766357422 + }, + { + "epoch": 1.029815673828125e-05, + "model_forward_time": 0.025011777877807617, + "step": 6749 + }, + { + "epoch": 1.029815673828125e-05, + "step": 6749, + "training_step_time": 0.177933931350708 + }, + { + "epoch": 1.02996826171875e-05, + "grad_norm": 0.39730721712112427, + "learning_rate": 9.185832391312644e-05, + "loss": 0.0651, + "step": 6750 + }, + { + "epoch": 1.02996826171875e-05, + "model_forward_time": 0.024962186813354492, + "step": 6750 + }, + { + "epoch": 1.02996826171875e-05, + "step": 6750, + "training_step_time": 0.15660643577575684 + }, + { + "epoch": 1.030120849609375e-05, + "model_forward_time": 0.024784564971923828, + "step": 6751 + }, + { + "epoch": 1.030120849609375e-05, + "step": 6751, + "training_step_time": 0.10580635070800781 + }, + { + "epoch": 1.0302734375e-05, + "model_forward_time": 0.025832414627075195, + "step": 6752 + }, + { + "epoch": 1.0302734375e-05, + "step": 6752, + "training_step_time": 0.10579276084899902 + }, + { + "epoch": 1.030426025390625e-05, + "model_forward_time": 0.02552938461303711, + "step": 6753 + }, + { + "epoch": 1.030426025390625e-05, + "step": 6753, + "training_step_time": 0.10761046409606934 + }, + { + "epoch": 1.03057861328125e-05, + "model_forward_time": 0.02854323387145996, + "step": 6754 + }, + { + "epoch": 1.03057861328125e-05, + "step": 6754, + "training_step_time": 0.11634421348571777 + }, + { + "epoch": 1.030731201171875e-05, + "model_forward_time": 0.025571584701538086, + "step": 6755 + }, + { + "epoch": 1.030731201171875e-05, + "step": 6755, + "training_step_time": 0.1069326400756836 + }, + { + "epoch": 1.0308837890625e-05, + "model_forward_time": 0.025213003158569336, + "step": 6756 + }, + { + "epoch": 1.0308837890625e-05, + "step": 6756, + "training_step_time": 0.10791325569152832 + }, + { + "epoch": 1.031036376953125e-05, + "model_forward_time": 0.02558112144470215, + "step": 6757 + }, + { + "epoch": 1.031036376953125e-05, + "step": 6757, + "training_step_time": 0.10666370391845703 + }, + { + "epoch": 1.03118896484375e-05, + "model_forward_time": 0.02504253387451172, + "step": 6758 + }, + { + "epoch": 1.03118896484375e-05, + "step": 6758, + "training_step_time": 0.13380169868469238 + }, + { + "epoch": 1.031341552734375e-05, + "model_forward_time": 0.025493621826171875, + "step": 6759 + }, + { + "epoch": 1.031341552734375e-05, + "step": 6759, + "training_step_time": 0.15464329719543457 + }, + { + "epoch": 1.031494140625e-05, + "grad_norm": 0.5684757232666016, + "learning_rate": 9.182815307928307e-05, + "loss": 0.0703, + "step": 6760 + }, + { + "epoch": 1.031494140625e-05, + "model_forward_time": 0.024649620056152344, + "step": 6760 + }, + { + "epoch": 1.031494140625e-05, + "step": 6760, + "training_step_time": 0.15729761123657227 + }, + { + "epoch": 1.031646728515625e-05, + "model_forward_time": 0.024744510650634766, + "step": 6761 + }, + { + "epoch": 1.031646728515625e-05, + "step": 6761, + "training_step_time": 0.15435123443603516 + }, + { + "epoch": 1.03179931640625e-05, + "model_forward_time": 0.025151968002319336, + "step": 6762 + }, + { + "epoch": 1.03179931640625e-05, + "step": 6762, + "training_step_time": 0.13762664794921875 + }, + { + "epoch": 1.031951904296875e-05, + "model_forward_time": 0.024779796600341797, + "step": 6763 + }, + { + "epoch": 1.031951904296875e-05, + "step": 6763, + "training_step_time": 0.1277782917022705 + }, + { + "epoch": 1.0321044921875e-05, + "model_forward_time": 0.024873018264770508, + "step": 6764 + }, + { + "epoch": 1.0321044921875e-05, + "step": 6764, + "training_step_time": 0.12342333793640137 + }, + { + "epoch": 1.032257080078125e-05, + "model_forward_time": 0.02537393569946289, + "step": 6765 + }, + { + "epoch": 1.032257080078125e-05, + "step": 6765, + "training_step_time": 0.1369800567626953 + }, + { + "epoch": 1.03240966796875e-05, + "model_forward_time": 0.02530837059020996, + "step": 6766 + }, + { + "epoch": 1.03240966796875e-05, + "step": 6766, + "training_step_time": 0.19207310676574707 + }, + { + "epoch": 1.032562255859375e-05, + "model_forward_time": 0.024490833282470703, + "step": 6767 + }, + { + "epoch": 1.032562255859375e-05, + "step": 6767, + "training_step_time": 0.12000656127929688 + }, + { + "epoch": 1.03271484375e-05, + "model_forward_time": 0.0269777774810791, + "step": 6768 + }, + { + "epoch": 1.03271484375e-05, + "step": 6768, + "training_step_time": 0.12020754814147949 + }, + { + "epoch": 1.032867431640625e-05, + "model_forward_time": 0.025759220123291016, + "step": 6769 + }, + { + "epoch": 1.032867431640625e-05, + "step": 6769, + "training_step_time": 0.11267638206481934 + }, + { + "epoch": 1.03302001953125e-05, + "grad_norm": 0.38645869493484497, + "learning_rate": 9.179793142028959e-05, + "loss": 0.0655, + "step": 6770 + }, + { + "epoch": 1.03302001953125e-05, + "model_forward_time": 0.025322675704956055, + "step": 6770 + }, + { + "epoch": 1.03302001953125e-05, + "step": 6770, + "training_step_time": 0.1320514678955078 + }, + { + "epoch": 1.033172607421875e-05, + "model_forward_time": 0.0254361629486084, + "step": 6771 + }, + { + "epoch": 1.033172607421875e-05, + "step": 6771, + "training_step_time": 0.19279909133911133 + }, + { + "epoch": 1.0333251953125e-05, + "model_forward_time": 0.024619340896606445, + "step": 6772 + }, + { + "epoch": 1.0333251953125e-05, + "step": 6772, + "training_step_time": 0.156111478805542 + }, + { + "epoch": 1.033477783203125e-05, + "model_forward_time": 0.024389982223510742, + "step": 6773 + }, + { + "epoch": 1.033477783203125e-05, + "step": 6773, + "training_step_time": 0.12191057205200195 + }, + { + "epoch": 1.03363037109375e-05, + "model_forward_time": 0.024698972702026367, + "step": 6774 + }, + { + "epoch": 1.03363037109375e-05, + "step": 6774, + "training_step_time": 0.119110107421875 + }, + { + "epoch": 1.033782958984375e-05, + "model_forward_time": 0.025611162185668945, + "step": 6775 + }, + { + "epoch": 1.033782958984375e-05, + "step": 6775, + "training_step_time": 0.10812234878540039 + }, + { + "epoch": 1.033935546875e-05, + "model_forward_time": 0.025439977645874023, + "step": 6776 + }, + { + "epoch": 1.033935546875e-05, + "step": 6776, + "training_step_time": 0.19219470024108887 + }, + { + "epoch": 1.034088134765625e-05, + "model_forward_time": 0.024503231048583984, + "step": 6777 + }, + { + "epoch": 1.034088134765625e-05, + "step": 6777, + "training_step_time": 0.10391712188720703 + }, + { + "epoch": 1.03424072265625e-05, + "model_forward_time": 0.024680137634277344, + "step": 6778 + }, + { + "epoch": 1.03424072265625e-05, + "step": 6778, + "training_step_time": 0.10871386528015137 + }, + { + "epoch": 1.034393310546875e-05, + "model_forward_time": 0.024979114532470703, + "step": 6779 + }, + { + "epoch": 1.034393310546875e-05, + "step": 6779, + "training_step_time": 0.11033987998962402 + }, + { + "epoch": 1.0345458984375e-05, + "grad_norm": 0.36881861090660095, + "learning_rate": 9.176765897286813e-05, + "loss": 0.0735, + "step": 6780 + }, + { + "epoch": 1.0345458984375e-05, + "model_forward_time": 0.024012088775634766, + "step": 6780 + }, + { + "epoch": 1.0345458984375e-05, + "step": 6780, + "training_step_time": 0.10874104499816895 + }, + { + "epoch": 1.034698486328125e-05, + "model_forward_time": 0.02436971664428711, + "step": 6781 + }, + { + "epoch": 1.034698486328125e-05, + "step": 6781, + "training_step_time": 0.1107327938079834 + }, + { + "epoch": 1.03485107421875e-05, + "model_forward_time": 0.025145292282104492, + "step": 6782 + }, + { + "epoch": 1.03485107421875e-05, + "step": 6782, + "training_step_time": 0.10963177680969238 + }, + { + "epoch": 1.035003662109375e-05, + "model_forward_time": 0.025675296783447266, + "step": 6783 + }, + { + "epoch": 1.035003662109375e-05, + "step": 6783, + "training_step_time": 0.1100163459777832 + }, + { + "epoch": 1.03515625e-05, + "model_forward_time": 0.02542710304260254, + "step": 6784 + }, + { + "epoch": 1.03515625e-05, + "step": 6784, + "training_step_time": 0.11111569404602051 + }, + { + "epoch": 1.035308837890625e-05, + "model_forward_time": 0.025756359100341797, + "step": 6785 + }, + { + "epoch": 1.035308837890625e-05, + "step": 6785, + "training_step_time": 0.11312198638916016 + }, + { + "epoch": 1.03546142578125e-05, + "model_forward_time": 0.025445938110351562, + "step": 6786 + }, + { + "epoch": 1.03546142578125e-05, + "step": 6786, + "training_step_time": 0.1082923412322998 + }, + { + "epoch": 1.035614013671875e-05, + "model_forward_time": 0.02658820152282715, + "step": 6787 + }, + { + "epoch": 1.035614013671875e-05, + "step": 6787, + "training_step_time": 0.11227202415466309 + }, + { + "epoch": 1.0357666015625e-05, + "model_forward_time": 0.025177955627441406, + "step": 6788 + }, + { + "epoch": 1.0357666015625e-05, + "step": 6788, + "training_step_time": 0.19746804237365723 + }, + { + "epoch": 1.035919189453125e-05, + "model_forward_time": 0.024490833282470703, + "step": 6789 + }, + { + "epoch": 1.035919189453125e-05, + "step": 6789, + "training_step_time": 0.16613340377807617 + }, + { + "epoch": 1.03607177734375e-05, + "grad_norm": 0.5300776958465576, + "learning_rate": 9.173733577380258e-05, + "loss": 0.0643, + "step": 6790 + }, + { + "epoch": 1.03607177734375e-05, + "model_forward_time": 0.02492237091064453, + "step": 6790 + }, + { + "epoch": 1.03607177734375e-05, + "step": 6790, + "training_step_time": 0.17789149284362793 + }, + { + "epoch": 1.036224365234375e-05, + "model_forward_time": 0.024981260299682617, + "step": 6791 + }, + { + "epoch": 1.036224365234375e-05, + "step": 6791, + "training_step_time": 0.15999388694763184 + }, + { + "epoch": 1.036376953125e-05, + "model_forward_time": 0.02533888816833496, + "step": 6792 + }, + { + "epoch": 1.036376953125e-05, + "step": 6792, + "training_step_time": 0.20302867889404297 + }, + { + "epoch": 1.036529541015625e-05, + "model_forward_time": 0.024639368057250977, + "step": 6793 + }, + { + "epoch": 1.036529541015625e-05, + "step": 6793, + "training_step_time": 0.1438910961151123 + }, + { + "epoch": 1.03668212890625e-05, + "model_forward_time": 0.024744033813476562, + "step": 6794 + }, + { + "epoch": 1.03668212890625e-05, + "step": 6794, + "training_step_time": 0.10484528541564941 + }, + { + "epoch": 1.036834716796875e-05, + "model_forward_time": 0.025305509567260742, + "step": 6795 + }, + { + "epoch": 1.036834716796875e-05, + "step": 6795, + "training_step_time": 0.10884857177734375 + }, + { + "epoch": 1.0369873046875e-05, + "model_forward_time": 0.028499126434326172, + "step": 6796 + }, + { + "epoch": 1.0369873046875e-05, + "step": 6796, + "training_step_time": 0.11509394645690918 + }, + { + "epoch": 1.037139892578125e-05, + "model_forward_time": 0.025892257690429688, + "step": 6797 + }, + { + "epoch": 1.037139892578125e-05, + "step": 6797, + "training_step_time": 0.10813784599304199 + }, + { + "epoch": 1.03729248046875e-05, + "model_forward_time": 0.0252077579498291, + "step": 6798 + }, + { + "epoch": 1.03729248046875e-05, + "step": 6798, + "training_step_time": 0.11166524887084961 + }, + { + "epoch": 1.037445068359375e-05, + "model_forward_time": 0.02540755271911621, + "step": 6799 + }, + { + "epoch": 1.037445068359375e-05, + "step": 6799, + "training_step_time": 0.10682511329650879 + }, + { + "epoch": 1.03759765625e-05, + "grad_norm": 0.4445255994796753, + "learning_rate": 9.17069618599385e-05, + "loss": 0.083, + "step": 6800 + }, + { + "epoch": 1.03759765625e-05, + "model_forward_time": 0.026111841201782227, + "step": 6800 + }, + { + "epoch": 1.03759765625e-05, + "step": 6800, + "training_step_time": 0.11031579971313477 + }, + { + "epoch": 1.037750244140625e-05, + "model_forward_time": 0.025699615478515625, + "step": 6801 + }, + { + "epoch": 1.037750244140625e-05, + "step": 6801, + "training_step_time": 0.10898399353027344 + }, + { + "epoch": 1.03790283203125e-05, + "model_forward_time": 0.025450468063354492, + "step": 6802 + }, + { + "epoch": 1.03790283203125e-05, + "step": 6802, + "training_step_time": 0.10847973823547363 + }, + { + "epoch": 1.038055419921875e-05, + "model_forward_time": 0.025058746337890625, + "step": 6803 + }, + { + "epoch": 1.038055419921875e-05, + "step": 6803, + "training_step_time": 0.10778331756591797 + }, + { + "epoch": 1.0382080078125e-05, + "model_forward_time": 0.02533102035522461, + "step": 6804 + }, + { + "epoch": 1.0382080078125e-05, + "step": 6804, + "training_step_time": 0.11117935180664062 + }, + { + "epoch": 1.038360595703125e-05, + "model_forward_time": 0.024915456771850586, + "step": 6805 + }, + { + "epoch": 1.038360595703125e-05, + "step": 6805, + "training_step_time": 0.1074991226196289 + }, + { + "epoch": 1.03851318359375e-05, + "model_forward_time": 0.025461673736572266, + "step": 6806 + }, + { + "epoch": 1.03851318359375e-05, + "step": 6806, + "training_step_time": 0.10694098472595215 + }, + { + "epoch": 1.038665771484375e-05, + "model_forward_time": 0.025527238845825195, + "step": 6807 + }, + { + "epoch": 1.038665771484375e-05, + "step": 6807, + "training_step_time": 0.1068115234375 + }, + { + "epoch": 1.038818359375e-05, + "model_forward_time": 0.025472640991210938, + "step": 6808 + }, + { + "epoch": 1.038818359375e-05, + "step": 6808, + "training_step_time": 0.10971713066101074 + }, + { + "epoch": 1.038970947265625e-05, + "model_forward_time": 0.025383710861206055, + "step": 6809 + }, + { + "epoch": 1.038970947265625e-05, + "step": 6809, + "training_step_time": 0.10813021659851074 + }, + { + "epoch": 1.03912353515625e-05, + "grad_norm": 0.41912150382995605, + "learning_rate": 9.167653726818305e-05, + "loss": 0.0563, + "step": 6810 + }, + { + "epoch": 1.03912353515625e-05, + "model_forward_time": 0.02541637420654297, + "step": 6810 + }, + { + "epoch": 1.03912353515625e-05, + "step": 6810, + "training_step_time": 0.17542695999145508 + }, + { + "epoch": 1.039276123046875e-05, + "model_forward_time": 0.02513909339904785, + "step": 6811 + }, + { + "epoch": 1.039276123046875e-05, + "step": 6811, + "training_step_time": 0.2097764015197754 + }, + { + "epoch": 1.0394287109375e-05, + "model_forward_time": 0.024834156036376953, + "step": 6812 + }, + { + "epoch": 1.0394287109375e-05, + "step": 6812, + "training_step_time": 0.2441103458404541 + }, + { + "epoch": 1.039581298828125e-05, + "model_forward_time": 0.026102066040039062, + "step": 6813 + }, + { + "epoch": 1.039581298828125e-05, + "step": 6813, + "training_step_time": 0.1863689422607422 + }, + { + "epoch": 1.03973388671875e-05, + "model_forward_time": 0.02456831932067871, + "step": 6814 + }, + { + "epoch": 1.03973388671875e-05, + "step": 6814, + "training_step_time": 0.21054291725158691 + }, + { + "epoch": 1.039886474609375e-05, + "model_forward_time": 0.024891138076782227, + "step": 6815 + }, + { + "epoch": 1.039886474609375e-05, + "step": 6815, + "training_step_time": 0.14827871322631836 + }, + { + "epoch": 1.0400390625e-05, + "model_forward_time": 0.02440333366394043, + "step": 6816 + }, + { + "epoch": 1.0400390625e-05, + "step": 6816, + "training_step_time": 0.1231389045715332 + }, + { + "epoch": 1.040191650390625e-05, + "model_forward_time": 0.025834321975708008, + "step": 6817 + }, + { + "epoch": 1.040191650390625e-05, + "step": 6817, + "training_step_time": 0.11193180084228516 + }, + { + "epoch": 1.04034423828125e-05, + "model_forward_time": 0.025318384170532227, + "step": 6818 + }, + { + "epoch": 1.04034423828125e-05, + "step": 6818, + "training_step_time": 0.1058201789855957 + }, + { + "epoch": 1.040496826171875e-05, + "model_forward_time": 0.02589106559753418, + "step": 6819 + }, + { + "epoch": 1.040496826171875e-05, + "step": 6819, + "training_step_time": 0.19548344612121582 + }, + { + "epoch": 1.0406494140625e-05, + "grad_norm": 0.5922017693519592, + "learning_rate": 9.164606203550497e-05, + "loss": 0.0896, + "step": 6820 + }, + { + "epoch": 1.0406494140625e-05, + "model_forward_time": 0.025278806686401367, + "step": 6820 + }, + { + "epoch": 1.0406494140625e-05, + "step": 6820, + "training_step_time": 0.1424579620361328 + }, + { + "epoch": 1.040802001953125e-05, + "model_forward_time": 0.024781465530395508, + "step": 6821 + }, + { + "epoch": 1.040802001953125e-05, + "step": 6821, + "training_step_time": 0.16158580780029297 + }, + { + "epoch": 1.04095458984375e-05, + "model_forward_time": 0.02426433563232422, + "step": 6822 + }, + { + "epoch": 1.04095458984375e-05, + "step": 6822, + "training_step_time": 0.15207862854003906 + }, + { + "epoch": 1.041107177734375e-05, + "model_forward_time": 0.024457216262817383, + "step": 6823 + }, + { + "epoch": 1.041107177734375e-05, + "step": 6823, + "training_step_time": 0.13353395462036133 + }, + { + "epoch": 1.041259765625e-05, + "model_forward_time": 0.024419546127319336, + "step": 6824 + }, + { + "epoch": 1.041259765625e-05, + "step": 6824, + "training_step_time": 0.125596284866333 + }, + { + "epoch": 1.041412353515625e-05, + "model_forward_time": 0.024992704391479492, + "step": 6825 + }, + { + "epoch": 1.041412353515625e-05, + "step": 6825, + "training_step_time": 0.12492012977600098 + }, + { + "epoch": 1.04156494140625e-05, + "model_forward_time": 0.025506973266601562, + "step": 6826 + }, + { + "epoch": 1.04156494140625e-05, + "step": 6826, + "training_step_time": 0.12287163734436035 + }, + { + "epoch": 1.041717529296875e-05, + "model_forward_time": 0.025490760803222656, + "step": 6827 + }, + { + "epoch": 1.041717529296875e-05, + "step": 6827, + "training_step_time": 0.11313962936401367 + }, + { + "epoch": 1.0418701171875e-05, + "model_forward_time": 0.025393247604370117, + "step": 6828 + }, + { + "epoch": 1.0418701171875e-05, + "step": 6828, + "training_step_time": 0.1130528450012207 + }, + { + "epoch": 1.042022705078125e-05, + "model_forward_time": 0.025601625442504883, + "step": 6829 + }, + { + "epoch": 1.042022705078125e-05, + "step": 6829, + "training_step_time": 0.1140587329864502 + }, + { + "epoch": 1.04217529296875e-05, + "grad_norm": 0.5026640892028809, + "learning_rate": 9.161553619893457e-05, + "loss": 0.091, + "step": 6830 + }, + { + "epoch": 1.04217529296875e-05, + "model_forward_time": 0.025165796279907227, + "step": 6830 + }, + { + "epoch": 1.04217529296875e-05, + "step": 6830, + "training_step_time": 0.10871076583862305 + }, + { + "epoch": 1.042327880859375e-05, + "model_forward_time": 0.024888992309570312, + "step": 6831 + }, + { + "epoch": 1.042327880859375e-05, + "step": 6831, + "training_step_time": 0.19790339469909668 + }, + { + "epoch": 1.04248046875e-05, + "model_forward_time": 0.026005268096923828, + "step": 6832 + }, + { + "epoch": 1.04248046875e-05, + "step": 6832, + "training_step_time": 0.13216757774353027 + }, + { + "epoch": 1.042633056640625e-05, + "model_forward_time": 0.024621248245239258, + "step": 6833 + }, + { + "epoch": 1.042633056640625e-05, + "step": 6833, + "training_step_time": 0.12093043327331543 + }, + { + "epoch": 1.04278564453125e-05, + "model_forward_time": 0.02511286735534668, + "step": 6834 + }, + { + "epoch": 1.04278564453125e-05, + "step": 6834, + "training_step_time": 0.21353650093078613 + }, + { + "epoch": 1.042938232421875e-05, + "model_forward_time": 0.024446964263916016, + "step": 6835 + }, + { + "epoch": 1.042938232421875e-05, + "step": 6835, + "training_step_time": 0.1211555004119873 + }, + { + "epoch": 1.0430908203125e-05, + "model_forward_time": 0.024619579315185547, + "step": 6836 + }, + { + "epoch": 1.0430908203125e-05, + "step": 6836, + "training_step_time": 0.10722780227661133 + }, + { + "epoch": 1.043243408203125e-05, + "model_forward_time": 0.027858257293701172, + "step": 6837 + }, + { + "epoch": 1.043243408203125e-05, + "step": 6837, + "training_step_time": 0.11323404312133789 + }, + { + "epoch": 1.04339599609375e-05, + "model_forward_time": 0.025473833084106445, + "step": 6838 + }, + { + "epoch": 1.04339599609375e-05, + "step": 6838, + "training_step_time": 0.10867667198181152 + }, + { + "epoch": 1.043548583984375e-05, + "model_forward_time": 0.025340557098388672, + "step": 6839 + }, + { + "epoch": 1.043548583984375e-05, + "step": 6839, + "training_step_time": 0.10857868194580078 + }, + { + "epoch": 1.043701171875e-05, + "grad_norm": 0.6259903907775879, + "learning_rate": 9.158495979556358e-05, + "loss": 0.0976, + "step": 6840 + }, + { + "epoch": 1.043701171875e-05, + "model_forward_time": 0.02539229393005371, + "step": 6840 + }, + { + "epoch": 1.043701171875e-05, + "step": 6840, + "training_step_time": 0.11167383193969727 + }, + { + "epoch": 1.043853759765625e-05, + "model_forward_time": 0.02536606788635254, + "step": 6841 + }, + { + "epoch": 1.043853759765625e-05, + "step": 6841, + "training_step_time": 0.10842156410217285 + }, + { + "epoch": 1.04400634765625e-05, + "model_forward_time": 0.02516317367553711, + "step": 6842 + }, + { + "epoch": 1.04400634765625e-05, + "step": 6842, + "training_step_time": 0.10797691345214844 + }, + { + "epoch": 1.044158935546875e-05, + "model_forward_time": 0.025162458419799805, + "step": 6843 + }, + { + "epoch": 1.044158935546875e-05, + "step": 6843, + "training_step_time": 0.11037540435791016 + }, + { + "epoch": 1.0443115234375e-05, + "model_forward_time": 0.025346994400024414, + "step": 6844 + }, + { + "epoch": 1.0443115234375e-05, + "step": 6844, + "training_step_time": 0.1098787784576416 + }, + { + "epoch": 1.044464111328125e-05, + "model_forward_time": 0.025025367736816406, + "step": 6845 + }, + { + "epoch": 1.044464111328125e-05, + "step": 6845, + "training_step_time": 0.11203169822692871 + }, + { + "epoch": 1.04461669921875e-05, + "model_forward_time": 0.024965524673461914, + "step": 6846 + }, + { + "epoch": 1.04461669921875e-05, + "step": 6846, + "training_step_time": 0.11276793479919434 + }, + { + "epoch": 1.044769287109375e-05, + "model_forward_time": 0.025529861450195312, + "step": 6847 + }, + { + "epoch": 1.044769287109375e-05, + "step": 6847, + "training_step_time": 0.10617733001708984 + }, + { + "epoch": 1.044921875e-05, + "model_forward_time": 0.025123119354248047, + "step": 6848 + }, + { + "epoch": 1.044921875e-05, + "step": 6848, + "training_step_time": 0.10662651062011719 + }, + { + "epoch": 1.045074462890625e-05, + "model_forward_time": 0.025099992752075195, + "step": 6849 + }, + { + "epoch": 1.045074462890625e-05, + "step": 6849, + "training_step_time": 0.10772442817687988 + }, + { + "epoch": 1.04522705078125e-05, + "grad_norm": 0.3904078006744385, + "learning_rate": 9.155433286254525e-05, + "loss": 0.0869, + "step": 6850 + }, + { + "epoch": 1.04522705078125e-05, + "model_forward_time": 0.02514195442199707, + "step": 6850 + }, + { + "epoch": 1.04522705078125e-05, + "step": 6850, + "training_step_time": 0.10655665397644043 + }, + { + "epoch": 1.045379638671875e-05, + "model_forward_time": 0.025073528289794922, + "step": 6851 + }, + { + "epoch": 1.045379638671875e-05, + "step": 6851, + "training_step_time": 0.10752987861633301 + }, + { + "epoch": 1.0455322265625e-05, + "model_forward_time": 0.025771379470825195, + "step": 6852 + }, + { + "epoch": 1.0455322265625e-05, + "step": 6852, + "training_step_time": 0.11004471778869629 + }, + { + "epoch": 1.045684814453125e-05, + "model_forward_time": 0.02509140968322754, + "step": 6853 + }, + { + "epoch": 1.045684814453125e-05, + "step": 6853, + "training_step_time": 0.10846352577209473 + }, + { + "epoch": 1.04583740234375e-05, + "model_forward_time": 0.026437759399414062, + "step": 6854 + }, + { + "epoch": 1.04583740234375e-05, + "step": 6854, + "training_step_time": 0.1535472869873047 + }, + { + "epoch": 1.045989990234375e-05, + "model_forward_time": 0.025159358978271484, + "step": 6855 + }, + { + "epoch": 1.045989990234375e-05, + "step": 6855, + "training_step_time": 0.1153264045715332 + }, + { + "epoch": 1.046142578125e-05, + "model_forward_time": 0.025094985961914062, + "step": 6856 + }, + { + "epoch": 1.046142578125e-05, + "step": 6856, + "training_step_time": 0.1332230567932129 + }, + { + "epoch": 1.046295166015625e-05, + "model_forward_time": 0.02551579475402832, + "step": 6857 + }, + { + "epoch": 1.046295166015625e-05, + "step": 6857, + "training_step_time": 0.15721726417541504 + }, + { + "epoch": 1.04644775390625e-05, + "model_forward_time": 0.024110794067382812, + "step": 6858 + }, + { + "epoch": 1.04644775390625e-05, + "step": 6858, + "training_step_time": 0.17815899848937988 + }, + { + "epoch": 1.046600341796875e-05, + "model_forward_time": 0.024344205856323242, + "step": 6859 + }, + { + "epoch": 1.046600341796875e-05, + "step": 6859, + "training_step_time": 0.1632683277130127 + }, + { + "epoch": 1.0467529296875e-05, + "grad_norm": 0.49598586559295654, + "learning_rate": 9.152365543709416e-05, + "loss": 0.0621, + "step": 6860 + }, + { + "epoch": 1.0467529296875e-05, + "model_forward_time": 0.024263858795166016, + "step": 6860 + }, + { + "epoch": 1.0467529296875e-05, + "step": 6860, + "training_step_time": 0.16974520683288574 + }, + { + "epoch": 1.046905517578125e-05, + "model_forward_time": 0.0242769718170166, + "step": 6861 + }, + { + "epoch": 1.046905517578125e-05, + "step": 6861, + "training_step_time": 0.10838723182678223 + }, + { + "epoch": 1.04705810546875e-05, + "model_forward_time": 0.024440526962280273, + "step": 6862 + }, + { + "epoch": 1.04705810546875e-05, + "step": 6862, + "training_step_time": 0.18919014930725098 + }, + { + "epoch": 1.047210693359375e-05, + "model_forward_time": 0.02396702766418457, + "step": 6863 + }, + { + "epoch": 1.047210693359375e-05, + "step": 6863, + "training_step_time": 0.20218777656555176 + }, + { + "epoch": 1.04736328125e-05, + "model_forward_time": 0.02405071258544922, + "step": 6864 + }, + { + "epoch": 1.04736328125e-05, + "step": 6864, + "training_step_time": 0.19723796844482422 + }, + { + "epoch": 1.047515869140625e-05, + "model_forward_time": 0.023938417434692383, + "step": 6865 + }, + { + "epoch": 1.047515869140625e-05, + "step": 6865, + "training_step_time": 0.18705320358276367 + }, + { + "epoch": 1.04766845703125e-05, + "model_forward_time": 0.024021387100219727, + "step": 6866 + }, + { + "epoch": 1.04766845703125e-05, + "step": 6866, + "training_step_time": 0.1728525161743164 + }, + { + "epoch": 1.047821044921875e-05, + "model_forward_time": 0.024511337280273438, + "step": 6867 + }, + { + "epoch": 1.047821044921875e-05, + "step": 6867, + "training_step_time": 0.17067623138427734 + }, + { + "epoch": 1.0479736328125e-05, + "model_forward_time": 0.024166345596313477, + "step": 6868 + }, + { + "epoch": 1.0479736328125e-05, + "step": 6868, + "training_step_time": 0.11936259269714355 + }, + { + "epoch": 1.048126220703125e-05, + "model_forward_time": 0.024559497833251953, + "step": 6869 + }, + { + "epoch": 1.048126220703125e-05, + "step": 6869, + "training_step_time": 0.10615658760070801 + }, + { + "epoch": 1.04827880859375e-05, + "grad_norm": 0.5252742767333984, + "learning_rate": 9.14929275564863e-05, + "loss": 0.0848, + "step": 6870 + }, + { + "epoch": 1.04827880859375e-05, + "model_forward_time": 0.024663448333740234, + "step": 6870 + }, + { + "epoch": 1.04827880859375e-05, + "step": 6870, + "training_step_time": 0.10719823837280273 + }, + { + "epoch": 1.048431396484375e-05, + "model_forward_time": 0.02483534812927246, + "step": 6871 + }, + { + "epoch": 1.048431396484375e-05, + "step": 6871, + "training_step_time": 0.10860753059387207 + }, + { + "epoch": 1.048583984375e-05, + "model_forward_time": 0.024970531463623047, + "step": 6872 + }, + { + "epoch": 1.048583984375e-05, + "step": 6872, + "training_step_time": 0.11089205741882324 + }, + { + "epoch": 1.048736572265625e-05, + "model_forward_time": 0.024719715118408203, + "step": 6873 + }, + { + "epoch": 1.048736572265625e-05, + "step": 6873, + "training_step_time": 0.17366385459899902 + }, + { + "epoch": 1.04888916015625e-05, + "model_forward_time": 0.02436375617980957, + "step": 6874 + }, + { + "epoch": 1.04888916015625e-05, + "step": 6874, + "training_step_time": 0.1891021728515625 + }, + { + "epoch": 1.049041748046875e-05, + "model_forward_time": 0.02434372901916504, + "step": 6875 + }, + { + "epoch": 1.049041748046875e-05, + "step": 6875, + "training_step_time": 0.18416285514831543 + }, + { + "epoch": 1.0491943359375e-05, + "model_forward_time": 0.024489641189575195, + "step": 6876 + }, + { + "epoch": 1.0491943359375e-05, + "step": 6876, + "training_step_time": 0.17869210243225098 + }, + { + "epoch": 1.049346923828125e-05, + "model_forward_time": 0.024214744567871094, + "step": 6877 + }, + { + "epoch": 1.049346923828125e-05, + "step": 6877, + "training_step_time": 0.15127015113830566 + }, + { + "epoch": 1.04949951171875e-05, + "model_forward_time": 0.027262449264526367, + "step": 6878 + }, + { + "epoch": 1.04949951171875e-05, + "step": 6878, + "training_step_time": 0.11522150039672852 + }, + { + "epoch": 1.049652099609375e-05, + "model_forward_time": 0.023425579071044922, + "step": 6879 + }, + { + "epoch": 1.049652099609375e-05, + "step": 6879, + "training_step_time": 0.10960936546325684 + }, + { + "epoch": 1.0498046875e-05, + "grad_norm": 0.2893292307853699, + "learning_rate": 9.146214925805891e-05, + "loss": 0.0822, + "step": 6880 + }, + { + "epoch": 1.0498046875e-05, + "model_forward_time": 0.026083707809448242, + "step": 6880 + }, + { + "epoch": 1.0498046875e-05, + "step": 6880, + "training_step_time": 0.11514163017272949 + }, + { + "epoch": 1.049957275390625e-05, + "model_forward_time": 0.025149822235107422, + "step": 6881 + }, + { + "epoch": 1.049957275390625e-05, + "step": 6881, + "training_step_time": 0.10846257209777832 + }, + { + "epoch": 1.05010986328125e-05, + "model_forward_time": 0.024936914443969727, + "step": 6882 + }, + { + "epoch": 1.05010986328125e-05, + "step": 6882, + "training_step_time": 0.11062002182006836 + }, + { + "epoch": 1.050262451171875e-05, + "model_forward_time": 0.025113821029663086, + "step": 6883 + }, + { + "epoch": 1.050262451171875e-05, + "step": 6883, + "training_step_time": 0.10923910140991211 + }, + { + "epoch": 1.0504150390625e-05, + "model_forward_time": 0.024729013442993164, + "step": 6884 + }, + { + "epoch": 1.0504150390625e-05, + "step": 6884, + "training_step_time": 0.10837125778198242 + }, + { + "epoch": 1.050567626953125e-05, + "model_forward_time": 0.025251150131225586, + "step": 6885 + }, + { + "epoch": 1.050567626953125e-05, + "step": 6885, + "training_step_time": 0.10841155052185059 + }, + { + "epoch": 1.05072021484375e-05, + "model_forward_time": 0.025059223175048828, + "step": 6886 + }, + { + "epoch": 1.05072021484375e-05, + "step": 6886, + "training_step_time": 0.11151838302612305 + }, + { + "epoch": 1.050872802734375e-05, + "model_forward_time": 0.02523207664489746, + "step": 6887 + }, + { + "epoch": 1.050872802734375e-05, + "step": 6887, + "training_step_time": 0.11017608642578125 + }, + { + "epoch": 1.051025390625e-05, + "model_forward_time": 0.02539992332458496, + "step": 6888 + }, + { + "epoch": 1.051025390625e-05, + "step": 6888, + "training_step_time": 0.10846590995788574 + }, + { + "epoch": 1.051177978515625e-05, + "model_forward_time": 0.024913787841796875, + "step": 6889 + }, + { + "epoch": 1.051177978515625e-05, + "step": 6889, + "training_step_time": 0.10684013366699219 + }, + { + "epoch": 1.05133056640625e-05, + "grad_norm": 0.4509742558002472, + "learning_rate": 9.143132057921058e-05, + "loss": 0.0736, + "step": 6890 + }, + { + "epoch": 1.05133056640625e-05, + "model_forward_time": 0.025509357452392578, + "step": 6890 + }, + { + "epoch": 1.05133056640625e-05, + "step": 6890, + "training_step_time": 0.10841894149780273 + }, + { + "epoch": 1.051483154296875e-05, + "model_forward_time": 0.025149106979370117, + "step": 6891 + }, + { + "epoch": 1.051483154296875e-05, + "step": 6891, + "training_step_time": 0.10986566543579102 + }, + { + "epoch": 1.0516357421875e-05, + "model_forward_time": 0.024235963821411133, + "step": 6892 + }, + { + "epoch": 1.0516357421875e-05, + "step": 6892, + "training_step_time": 0.10765886306762695 + }, + { + "epoch": 1.051788330078125e-05, + "model_forward_time": 0.02395153045654297, + "step": 6893 + }, + { + "epoch": 1.051788330078125e-05, + "step": 6893, + "training_step_time": 0.1080479621887207 + }, + { + "epoch": 1.05194091796875e-05, + "model_forward_time": 0.024648189544677734, + "step": 6894 + }, + { + "epoch": 1.05194091796875e-05, + "step": 6894, + "training_step_time": 0.10867524147033691 + }, + { + "epoch": 1.052093505859375e-05, + "model_forward_time": 0.025477886199951172, + "step": 6895 + }, + { + "epoch": 1.052093505859375e-05, + "step": 6895, + "training_step_time": 0.21281933784484863 + }, + { + "epoch": 1.05224609375e-05, + "model_forward_time": 0.02380228042602539, + "step": 6896 + }, + { + "epoch": 1.05224609375e-05, + "step": 6896, + "training_step_time": 0.11900568008422852 + }, + { + "epoch": 1.052398681640625e-05, + "model_forward_time": 0.024505138397216797, + "step": 6897 + }, + { + "epoch": 1.052398681640625e-05, + "step": 6897, + "training_step_time": 0.13282513618469238 + }, + { + "epoch": 1.05255126953125e-05, + "model_forward_time": 0.02491140365600586, + "step": 6898 + }, + { + "epoch": 1.05255126953125e-05, + "step": 6898, + "training_step_time": 0.16254496574401855 + }, + { + "epoch": 1.052703857421875e-05, + "model_forward_time": 0.024431467056274414, + "step": 6899 + }, + { + "epoch": 1.052703857421875e-05, + "step": 6899, + "training_step_time": 0.21403288841247559 + }, + { + "epoch": 1.0528564453125e-05, + "grad_norm": 0.45346662402153015, + "learning_rate": 9.140044155740101e-05, + "loss": 0.0765, + "step": 6900 + }, + { + "epoch": 1.0528564453125e-05, + "model_forward_time": 0.024194002151489258, + "step": 6900 + }, + { + "epoch": 1.0528564453125e-05, + "step": 6900, + "training_step_time": 0.1487438678741455 + }, + { + "epoch": 1.053009033203125e-05, + "model_forward_time": 0.024287939071655273, + "step": 6901 + }, + { + "epoch": 1.053009033203125e-05, + "step": 6901, + "training_step_time": 0.11983561515808105 + }, + { + "epoch": 1.05316162109375e-05, + "model_forward_time": 0.024675607681274414, + "step": 6902 + }, + { + "epoch": 1.05316162109375e-05, + "step": 6902, + "training_step_time": 0.11600899696350098 + }, + { + "epoch": 1.053314208984375e-05, + "model_forward_time": 0.025053739547729492, + "step": 6903 + }, + { + "epoch": 1.053314208984375e-05, + "step": 6903, + "training_step_time": 0.10934877395629883 + }, + { + "epoch": 1.053466796875e-05, + "model_forward_time": 0.025212526321411133, + "step": 6904 + }, + { + "epoch": 1.053466796875e-05, + "step": 6904, + "training_step_time": 0.20821809768676758 + }, + { + "epoch": 1.053619384765625e-05, + "model_forward_time": 0.024994373321533203, + "step": 6905 + }, + { + "epoch": 1.053619384765625e-05, + "step": 6905, + "training_step_time": 0.11135983467102051 + }, + { + "epoch": 1.05377197265625e-05, + "model_forward_time": 0.024297714233398438, + "step": 6906 + }, + { + "epoch": 1.05377197265625e-05, + "step": 6906, + "training_step_time": 0.1109781265258789 + }, + { + "epoch": 1.053924560546875e-05, + "model_forward_time": 0.02540111541748047, + "step": 6907 + }, + { + "epoch": 1.053924560546875e-05, + "step": 6907, + "training_step_time": 0.11577272415161133 + }, + { + "epoch": 1.0540771484375e-05, + "model_forward_time": 0.02454376220703125, + "step": 6908 + }, + { + "epoch": 1.0540771484375e-05, + "step": 6908, + "training_step_time": 0.1179506778717041 + }, + { + "epoch": 1.054229736328125e-05, + "model_forward_time": 0.024923086166381836, + "step": 6909 + }, + { + "epoch": 1.054229736328125e-05, + "step": 6909, + "training_step_time": 0.10774946212768555 + }, + { + "epoch": 1.05438232421875e-05, + "grad_norm": 0.6097220778465271, + "learning_rate": 9.136951223015113e-05, + "loss": 0.0682, + "step": 6910 + }, + { + "epoch": 1.05438232421875e-05, + "model_forward_time": 0.02468109130859375, + "step": 6910 + }, + { + "epoch": 1.05438232421875e-05, + "step": 6910, + "training_step_time": 0.10899472236633301 + }, + { + "epoch": 1.054534912109375e-05, + "model_forward_time": 0.02492666244506836, + "step": 6911 + }, + { + "epoch": 1.054534912109375e-05, + "step": 6911, + "training_step_time": 0.10904860496520996 + }, + { + "epoch": 1.0546875e-05, + "model_forward_time": 0.025363683700561523, + "step": 6912 + }, + { + "epoch": 1.0546875e-05, + "step": 6912, + "training_step_time": 0.11029481887817383 + }, + { + "epoch": 1.054840087890625e-05, + "model_forward_time": 0.024826526641845703, + "step": 6913 + }, + { + "epoch": 1.054840087890625e-05, + "step": 6913, + "training_step_time": 0.10861897468566895 + }, + { + "epoch": 1.05499267578125e-05, + "model_forward_time": 0.024974346160888672, + "step": 6914 + }, + { + "epoch": 1.05499267578125e-05, + "step": 6914, + "training_step_time": 0.10750460624694824 + }, + { + "epoch": 1.055145263671875e-05, + "model_forward_time": 0.02500009536743164, + "step": 6915 + }, + { + "epoch": 1.055145263671875e-05, + "step": 6915, + "training_step_time": 0.10692739486694336 + }, + { + "epoch": 1.0552978515625e-05, + "model_forward_time": 0.028398513793945312, + "step": 6916 + }, + { + "epoch": 1.0552978515625e-05, + "step": 6916, + "training_step_time": 0.11512517929077148 + }, + { + "epoch": 1.055450439453125e-05, + "model_forward_time": 0.025870561599731445, + "step": 6917 + }, + { + "epoch": 1.055450439453125e-05, + "step": 6917, + "training_step_time": 0.10952019691467285 + }, + { + "epoch": 1.05560302734375e-05, + "model_forward_time": 0.025049209594726562, + "step": 6918 + }, + { + "epoch": 1.05560302734375e-05, + "step": 6918, + "training_step_time": 0.20692658424377441 + }, + { + "epoch": 1.055755615234375e-05, + "model_forward_time": 0.024576663970947266, + "step": 6919 + }, + { + "epoch": 1.055755615234375e-05, + "step": 6919, + "training_step_time": 0.20017027854919434 + }, + { + "epoch": 1.055908203125e-05, + "grad_norm": 0.37026605010032654, + "learning_rate": 9.133853263504302e-05, + "loss": 0.0919, + "step": 6920 + }, + { + "epoch": 1.055908203125e-05, + "model_forward_time": 0.02448415756225586, + "step": 6920 + }, + { + "epoch": 1.055908203125e-05, + "step": 6920, + "training_step_time": 0.18847084045410156 + }, + { + "epoch": 1.056060791015625e-05, + "model_forward_time": 0.0244293212890625, + "step": 6921 + }, + { + "epoch": 1.056060791015625e-05, + "step": 6921, + "training_step_time": 0.1820390224456787 + }, + { + "epoch": 1.05621337890625e-05, + "model_forward_time": 0.024730205535888672, + "step": 6922 + }, + { + "epoch": 1.05621337890625e-05, + "step": 6922, + "training_step_time": 0.18834781646728516 + }, + { + "epoch": 1.056365966796875e-05, + "model_forward_time": 0.024193763732910156, + "step": 6923 + }, + { + "epoch": 1.056365966796875e-05, + "step": 6923, + "training_step_time": 0.10442686080932617 + }, + { + "epoch": 1.0565185546875e-05, + "model_forward_time": 0.02419447898864746, + "step": 6924 + }, + { + "epoch": 1.0565185546875e-05, + "step": 6924, + "training_step_time": 0.11005306243896484 + }, + { + "epoch": 1.056671142578125e-05, + "model_forward_time": 0.024891138076782227, + "step": 6925 + }, + { + "epoch": 1.056671142578125e-05, + "step": 6925, + "training_step_time": 0.10966086387634277 + }, + { + "epoch": 1.05682373046875e-05, + "model_forward_time": 0.025177955627441406, + "step": 6926 + }, + { + "epoch": 1.05682373046875e-05, + "step": 6926, + "training_step_time": 0.11630845069885254 + }, + { + "epoch": 1.056976318359375e-05, + "model_forward_time": 0.025228023529052734, + "step": 6927 + }, + { + "epoch": 1.056976318359375e-05, + "step": 6927, + "training_step_time": 0.10709166526794434 + }, + { + "epoch": 1.05712890625e-05, + "model_forward_time": 0.0254361629486084, + "step": 6928 + }, + { + "epoch": 1.05712890625e-05, + "step": 6928, + "training_step_time": 0.1087031364440918 + }, + { + "epoch": 1.057281494140625e-05, + "model_forward_time": 0.025231361389160156, + "step": 6929 + }, + { + "epoch": 1.057281494140625e-05, + "step": 6929, + "training_step_time": 0.10891246795654297 + }, + { + "epoch": 1.05743408203125e-05, + "grad_norm": 0.45468053221702576, + "learning_rate": 9.130750280971978e-05, + "loss": 0.0978, + "step": 6930 + }, + { + "epoch": 1.05743408203125e-05, + "model_forward_time": 0.025106191635131836, + "step": 6930 + }, + { + "epoch": 1.05743408203125e-05, + "step": 6930, + "training_step_time": 0.10860562324523926 + }, + { + "epoch": 1.057586669921875e-05, + "model_forward_time": 0.025057554244995117, + "step": 6931 + }, + { + "epoch": 1.057586669921875e-05, + "step": 6931, + "training_step_time": 0.10863113403320312 + }, + { + "epoch": 1.0577392578125e-05, + "model_forward_time": 0.026504039764404297, + "step": 6932 + }, + { + "epoch": 1.0577392578125e-05, + "step": 6932, + "training_step_time": 0.10920596122741699 + }, + { + "epoch": 1.057891845703125e-05, + "model_forward_time": 0.024993896484375, + "step": 6933 + }, + { + "epoch": 1.057891845703125e-05, + "step": 6933, + "training_step_time": 0.10929608345031738 + }, + { + "epoch": 1.05804443359375e-05, + "model_forward_time": 0.025240182876586914, + "step": 6934 + }, + { + "epoch": 1.05804443359375e-05, + "step": 6934, + "training_step_time": 0.10766887664794922 + }, + { + "epoch": 1.058197021484375e-05, + "model_forward_time": 0.025053024291992188, + "step": 6935 + }, + { + "epoch": 1.058197021484375e-05, + "step": 6935, + "training_step_time": 0.1256873607635498 + }, + { + "epoch": 1.058349609375e-05, + "model_forward_time": 0.024502992630004883, + "step": 6936 + }, + { + "epoch": 1.058349609375e-05, + "step": 6936, + "training_step_time": 0.14051365852355957 + }, + { + "epoch": 1.058502197265625e-05, + "model_forward_time": 0.02467799186706543, + "step": 6937 + }, + { + "epoch": 1.058502197265625e-05, + "step": 6937, + "training_step_time": 0.13616371154785156 + }, + { + "epoch": 1.05865478515625e-05, + "model_forward_time": 0.02424454689025879, + "step": 6938 + }, + { + "epoch": 1.05865478515625e-05, + "step": 6938, + "training_step_time": 0.12135004997253418 + }, + { + "epoch": 1.058807373046875e-05, + "model_forward_time": 0.02514815330505371, + "step": 6939 + }, + { + "epoch": 1.058807373046875e-05, + "step": 6939, + "training_step_time": 0.13232088088989258 + }, + { + "epoch": 1.0589599609375e-05, + "grad_norm": 0.47305402159690857, + "learning_rate": 9.127642279188558e-05, + "loss": 0.0641, + "step": 6940 + }, + { + "epoch": 1.0589599609375e-05, + "model_forward_time": 0.025082111358642578, + "step": 6940 + }, + { + "epoch": 1.0589599609375e-05, + "step": 6940, + "training_step_time": 0.11889147758483887 + }, + { + "epoch": 1.059112548828125e-05, + "model_forward_time": 0.024838685989379883, + "step": 6941 + }, + { + "epoch": 1.059112548828125e-05, + "step": 6941, + "training_step_time": 0.12120175361633301 + }, + { + "epoch": 1.05926513671875e-05, + "model_forward_time": 0.02487969398498535, + "step": 6942 + }, + { + "epoch": 1.05926513671875e-05, + "step": 6942, + "training_step_time": 0.15819287300109863 + }, + { + "epoch": 1.059417724609375e-05, + "model_forward_time": 0.0242156982421875, + "step": 6943 + }, + { + "epoch": 1.059417724609375e-05, + "step": 6943, + "training_step_time": 0.2102794647216797 + }, + { + "epoch": 1.0595703125e-05, + "model_forward_time": 0.02479720115661621, + "step": 6944 + }, + { + "epoch": 1.0595703125e-05, + "step": 6944, + "training_step_time": 0.1717240810394287 + }, + { + "epoch": 1.059722900390625e-05, + "model_forward_time": 0.0239102840423584, + "step": 6945 + }, + { + "epoch": 1.059722900390625e-05, + "step": 6945, + "training_step_time": 0.12453269958496094 + }, + { + "epoch": 1.05987548828125e-05, + "model_forward_time": 0.024402141571044922, + "step": 6946 + }, + { + "epoch": 1.05987548828125e-05, + "step": 6946, + "training_step_time": 0.11365866661071777 + }, + { + "epoch": 1.060028076171875e-05, + "model_forward_time": 0.02542901039123535, + "step": 6947 + }, + { + "epoch": 1.060028076171875e-05, + "step": 6947, + "training_step_time": 0.11139893531799316 + }, + { + "epoch": 1.0601806640625e-05, + "model_forward_time": 0.024979829788208008, + "step": 6948 + }, + { + "epoch": 1.0601806640625e-05, + "step": 6948, + "training_step_time": 0.19571137428283691 + }, + { + "epoch": 1.060333251953125e-05, + "model_forward_time": 0.024342060089111328, + "step": 6949 + }, + { + "epoch": 1.060333251953125e-05, + "step": 6949, + "training_step_time": 0.10512733459472656 + }, + { + "epoch": 1.06048583984375e-05, + "grad_norm": 0.5234485864639282, + "learning_rate": 9.124529261930559e-05, + "loss": 0.0716, + "step": 6950 + }, + { + "epoch": 1.06048583984375e-05, + "model_forward_time": 0.024337053298950195, + "step": 6950 + }, + { + "epoch": 1.06048583984375e-05, + "step": 6950, + "training_step_time": 0.10312104225158691 + }, + { + "epoch": 1.060638427734375e-05, + "model_forward_time": 0.02665567398071289, + "step": 6951 + }, + { + "epoch": 1.060638427734375e-05, + "step": 6951, + "training_step_time": 0.1150050163269043 + }, + { + "epoch": 1.060791015625e-05, + "model_forward_time": 0.023973941802978516, + "step": 6952 + }, + { + "epoch": 1.060791015625e-05, + "step": 6952, + "training_step_time": 0.1068732738494873 + }, + { + "epoch": 1.060943603515625e-05, + "model_forward_time": 0.02396416664123535, + "step": 6953 + }, + { + "epoch": 1.060943603515625e-05, + "step": 6953, + "training_step_time": 0.10594987869262695 + }, + { + "epoch": 1.06109619140625e-05, + "model_forward_time": 0.024998188018798828, + "step": 6954 + }, + { + "epoch": 1.06109619140625e-05, + "step": 6954, + "training_step_time": 0.11004972457885742 + }, + { + "epoch": 1.061248779296875e-05, + "model_forward_time": 0.024957895278930664, + "step": 6955 + }, + { + "epoch": 1.061248779296875e-05, + "step": 6955, + "training_step_time": 0.10573744773864746 + }, + { + "epoch": 1.0614013671875e-05, + "model_forward_time": 0.025057077407836914, + "step": 6956 + }, + { + "epoch": 1.0614013671875e-05, + "step": 6956, + "training_step_time": 0.11009478569030762 + }, + { + "epoch": 1.061553955078125e-05, + "model_forward_time": 0.02498912811279297, + "step": 6957 + }, + { + "epoch": 1.061553955078125e-05, + "step": 6957, + "training_step_time": 0.10709834098815918 + }, + { + "epoch": 1.06170654296875e-05, + "model_forward_time": 0.025204896926879883, + "step": 6958 + }, + { + "epoch": 1.06170654296875e-05, + "step": 6958, + "training_step_time": 0.11341285705566406 + }, + { + "epoch": 1.061859130859375e-05, + "model_forward_time": 0.02502584457397461, + "step": 6959 + }, + { + "epoch": 1.061859130859375e-05, + "step": 6959, + "training_step_time": 0.14218664169311523 + }, + { + "epoch": 1.06201171875e-05, + "grad_norm": 0.556512713432312, + "learning_rate": 9.121411232980588e-05, + "loss": 0.0746, + "step": 6960 + }, + { + "epoch": 1.06201171875e-05, + "model_forward_time": 0.025212764739990234, + "step": 6960 + }, + { + "epoch": 1.06201171875e-05, + "step": 6960, + "training_step_time": 0.11321234703063965 + }, + { + "epoch": 1.062164306640625e-05, + "model_forward_time": 0.024501800537109375, + "step": 6961 + }, + { + "epoch": 1.062164306640625e-05, + "step": 6961, + "training_step_time": 0.13434553146362305 + }, + { + "epoch": 1.06231689453125e-05, + "model_forward_time": 0.02434539794921875, + "step": 6962 + }, + { + "epoch": 1.06231689453125e-05, + "step": 6962, + "training_step_time": 0.2034461498260498 + }, + { + "epoch": 1.062469482421875e-05, + "model_forward_time": 0.024527788162231445, + "step": 6963 + }, + { + "epoch": 1.062469482421875e-05, + "step": 6963, + "training_step_time": 0.1347362995147705 + }, + { + "epoch": 1.0626220703125e-05, + "model_forward_time": 0.02398228645324707, + "step": 6964 + }, + { + "epoch": 1.0626220703125e-05, + "step": 6964, + "training_step_time": 0.20968294143676758 + }, + { + "epoch": 1.062774658203125e-05, + "model_forward_time": 0.0247194766998291, + "step": 6965 + }, + { + "epoch": 1.062774658203125e-05, + "step": 6965, + "training_step_time": 0.13364553451538086 + }, + { + "epoch": 1.06292724609375e-05, + "model_forward_time": 0.02466583251953125, + "step": 6966 + }, + { + "epoch": 1.06292724609375e-05, + "step": 6966, + "training_step_time": 0.11752462387084961 + }, + { + "epoch": 1.063079833984375e-05, + "model_forward_time": 0.025668859481811523, + "step": 6967 + }, + { + "epoch": 1.063079833984375e-05, + "step": 6967, + "training_step_time": 0.11880373954772949 + }, + { + "epoch": 1.063232421875e-05, + "model_forward_time": 0.025139808654785156, + "step": 6968 + }, + { + "epoch": 1.063232421875e-05, + "step": 6968, + "training_step_time": 0.11307787895202637 + }, + { + "epoch": 1.063385009765625e-05, + "model_forward_time": 0.025094032287597656, + "step": 6969 + }, + { + "epoch": 1.063385009765625e-05, + "step": 6969, + "training_step_time": 0.11216187477111816 + }, + { + "epoch": 1.06353759765625e-05, + "grad_norm": 0.5842373371124268, + "learning_rate": 9.118288196127345e-05, + "loss": 0.084, + "step": 6970 + }, + { + "epoch": 1.06353759765625e-05, + "model_forward_time": 0.025012969970703125, + "step": 6970 + }, + { + "epoch": 1.06353759765625e-05, + "step": 6970, + "training_step_time": 0.11043500900268555 + }, + { + "epoch": 1.063690185546875e-05, + "model_forward_time": 0.024838685989379883, + "step": 6971 + }, + { + "epoch": 1.063690185546875e-05, + "step": 6971, + "training_step_time": 0.10912013053894043 + }, + { + "epoch": 1.0638427734375e-05, + "model_forward_time": 0.024827957153320312, + "step": 6972 + }, + { + "epoch": 1.0638427734375e-05, + "step": 6972, + "training_step_time": 0.10993099212646484 + }, + { + "epoch": 1.063995361328125e-05, + "model_forward_time": 0.025302886962890625, + "step": 6973 + }, + { + "epoch": 1.063995361328125e-05, + "step": 6973, + "training_step_time": 0.11309957504272461 + }, + { + "epoch": 1.06414794921875e-05, + "model_forward_time": 0.025162458419799805, + "step": 6974 + }, + { + "epoch": 1.06414794921875e-05, + "step": 6974, + "training_step_time": 0.10836243629455566 + }, + { + "epoch": 1.064300537109375e-05, + "model_forward_time": 0.025098085403442383, + "step": 6975 + }, + { + "epoch": 1.064300537109375e-05, + "step": 6975, + "training_step_time": 0.10652303695678711 + }, + { + "epoch": 1.064453125e-05, + "model_forward_time": 0.027965545654296875, + "step": 6976 + }, + { + "epoch": 1.064453125e-05, + "step": 6976, + "training_step_time": 0.1163339614868164 + }, + { + "epoch": 1.064605712890625e-05, + "model_forward_time": 0.024166584014892578, + "step": 6977 + }, + { + "epoch": 1.064605712890625e-05, + "step": 6977, + "training_step_time": 0.11110782623291016 + }, + { + "epoch": 1.06475830078125e-05, + "model_forward_time": 0.023968935012817383, + "step": 6978 + }, + { + "epoch": 1.06475830078125e-05, + "step": 6978, + "training_step_time": 0.10535955429077148 + }, + { + "epoch": 1.064910888671875e-05, + "model_forward_time": 0.02504110336303711, + "step": 6979 + }, + { + "epoch": 1.064910888671875e-05, + "step": 6979, + "training_step_time": 0.10996055603027344 + }, + { + "epoch": 1.0650634765625e-05, + "grad_norm": 0.5641760230064392, + "learning_rate": 9.115160155165614e-05, + "loss": 0.0789, + "step": 6980 + }, + { + "epoch": 1.0650634765625e-05, + "model_forward_time": 0.02630448341369629, + "step": 6980 + }, + { + "epoch": 1.0650634765625e-05, + "step": 6980, + "training_step_time": 0.1129293441772461 + }, + { + "epoch": 1.065216064453125e-05, + "model_forward_time": 0.027592182159423828, + "step": 6981 + }, + { + "epoch": 1.065216064453125e-05, + "step": 6981, + "training_step_time": 0.11294436454772949 + }, + { + "epoch": 1.06536865234375e-05, + "model_forward_time": 0.02497243881225586, + "step": 6982 + }, + { + "epoch": 1.06536865234375e-05, + "step": 6982, + "training_step_time": 0.11072969436645508 + }, + { + "epoch": 1.065521240234375e-05, + "model_forward_time": 0.02498459815979004, + "step": 6983 + }, + { + "epoch": 1.065521240234375e-05, + "step": 6983, + "training_step_time": 0.19438672065734863 + }, + { + "epoch": 1.065673828125e-05, + "model_forward_time": 0.024617671966552734, + "step": 6984 + }, + { + "epoch": 1.065673828125e-05, + "step": 6984, + "training_step_time": 0.11960458755493164 + }, + { + "epoch": 1.065826416015625e-05, + "model_forward_time": 0.024527549743652344, + "step": 6985 + }, + { + "epoch": 1.065826416015625e-05, + "step": 6985, + "training_step_time": 0.12544631958007812 + }, + { + "epoch": 1.06597900390625e-05, + "model_forward_time": 0.02514934539794922, + "step": 6986 + }, + { + "epoch": 1.06597900390625e-05, + "step": 6986, + "training_step_time": 0.15999865531921387 + }, + { + "epoch": 1.066131591796875e-05, + "model_forward_time": 0.02427983283996582, + "step": 6987 + }, + { + "epoch": 1.066131591796875e-05, + "step": 6987, + "training_step_time": 0.17305231094360352 + }, + { + "epoch": 1.0662841796875e-05, + "model_forward_time": 0.02447342872619629, + "step": 6988 + }, + { + "epoch": 1.0662841796875e-05, + "step": 6988, + "training_step_time": 0.17518877983093262 + }, + { + "epoch": 1.066436767578125e-05, + "model_forward_time": 0.0241849422454834, + "step": 6989 + }, + { + "epoch": 1.066436767578125e-05, + "step": 6989, + "training_step_time": 0.10628914833068848 + }, + { + "epoch": 1.06658935546875e-05, + "grad_norm": 0.38509079813957214, + "learning_rate": 9.112027113896262e-05, + "loss": 0.0674, + "step": 6990 + }, + { + "epoch": 1.06658935546875e-05, + "model_forward_time": 0.024780750274658203, + "step": 6990 + }, + { + "epoch": 1.06658935546875e-05, + "step": 6990, + "training_step_time": 0.11836552619934082 + }, + { + "epoch": 1.066741943359375e-05, + "model_forward_time": 0.025345325469970703, + "step": 6991 + }, + { + "epoch": 1.066741943359375e-05, + "step": 6991, + "training_step_time": 0.11522483825683594 + }, + { + "epoch": 1.06689453125e-05, + "model_forward_time": 0.025555133819580078, + "step": 6992 + }, + { + "epoch": 1.06689453125e-05, + "step": 6992, + "training_step_time": 0.1119081974029541 + }, + { + "epoch": 1.067047119140625e-05, + "model_forward_time": 0.02512502670288086, + "step": 6993 + }, + { + "epoch": 1.067047119140625e-05, + "step": 6993, + "training_step_time": 0.19302845001220703 + }, + { + "epoch": 1.06719970703125e-05, + "model_forward_time": 0.024240970611572266, + "step": 6994 + }, + { + "epoch": 1.06719970703125e-05, + "step": 6994, + "training_step_time": 0.1082918643951416 + }, + { + "epoch": 1.067352294921875e-05, + "model_forward_time": 0.024778366088867188, + "step": 6995 + }, + { + "epoch": 1.067352294921875e-05, + "step": 6995, + "training_step_time": 0.11216950416564941 + }, + { + "epoch": 1.0675048828125e-05, + "model_forward_time": 0.02550816535949707, + "step": 6996 + }, + { + "epoch": 1.0675048828125e-05, + "step": 6996, + "training_step_time": 0.1091001033782959 + }, + { + "epoch": 1.067657470703125e-05, + "model_forward_time": 0.025396108627319336, + "step": 6997 + }, + { + "epoch": 1.067657470703125e-05, + "step": 6997, + "training_step_time": 0.10845518112182617 + }, + { + "epoch": 1.06781005859375e-05, + "model_forward_time": 0.025339365005493164, + "step": 6998 + }, + { + "epoch": 1.06781005859375e-05, + "step": 6998, + "training_step_time": 0.10641217231750488 + }, + { + "epoch": 1.067962646484375e-05, + "model_forward_time": 0.025295257568359375, + "step": 6999 + }, + { + "epoch": 1.067962646484375e-05, + "step": 6999, + "training_step_time": 0.10755085945129395 + }, + { + "epoch": 1.068115234375e-05, + "grad_norm": 0.41960352659225464, + "learning_rate": 9.108889076126226e-05, + "loss": 0.0711, + "step": 7000 + }, + { + "epoch": 1.068115234375e-05, + "model_forward_time": 0.02619338035583496, + "step": 7000 + }, + { + "epoch": 1.068115234375e-05, + "step": 7000, + "training_step_time": 0.10446834564208984 + }, + { + "epoch": 1.068267822265625e-05, + "model_forward_time": 0.023566722869873047, + "step": 7001 + }, + { + "epoch": 1.068267822265625e-05, + "step": 7001, + "training_step_time": 0.1801450252532959 + }, + { + "epoch": 1.06842041015625e-05, + "model_forward_time": 0.024695634841918945, + "step": 7002 + }, + { + "epoch": 1.06842041015625e-05, + "step": 7002, + "training_step_time": 0.10543465614318848 + }, + { + "epoch": 1.068572998046875e-05, + "model_forward_time": 0.02454376220703125, + "step": 7003 + }, + { + "epoch": 1.068572998046875e-05, + "step": 7003, + "training_step_time": 0.19732403755187988 + }, + { + "epoch": 1.0687255859375e-05, + "model_forward_time": 0.02497696876525879, + "step": 7004 + }, + { + "epoch": 1.0687255859375e-05, + "step": 7004, + "training_step_time": 0.10558462142944336 + }, + { + "epoch": 1.068878173828125e-05, + "model_forward_time": 0.02477264404296875, + "step": 7005 + }, + { + "epoch": 1.068878173828125e-05, + "step": 7005, + "training_step_time": 0.10427308082580566 + }, + { + "epoch": 1.06903076171875e-05, + "model_forward_time": 0.02538156509399414, + "step": 7006 + }, + { + "epoch": 1.06903076171875e-05, + "step": 7006, + "training_step_time": 0.10601615905761719 + }, + { + "epoch": 1.069183349609375e-05, + "model_forward_time": 0.025115013122558594, + "step": 7007 + }, + { + "epoch": 1.069183349609375e-05, + "step": 7007, + "training_step_time": 0.1067347526550293 + }, + { + "epoch": 1.0693359375e-05, + "model_forward_time": 0.025571823120117188, + "step": 7008 + }, + { + "epoch": 1.0693359375e-05, + "step": 7008, + "training_step_time": 0.10509395599365234 + }, + { + "epoch": 1.069488525390625e-05, + "model_forward_time": 0.026918888092041016, + "step": 7009 + }, + { + "epoch": 1.069488525390625e-05, + "step": 7009, + "training_step_time": 0.10778617858886719 + }, + { + "epoch": 1.06964111328125e-05, + "grad_norm": 0.6406276822090149, + "learning_rate": 9.105746045668521e-05, + "loss": 0.0804, + "step": 7010 + }, + { + "epoch": 1.06964111328125e-05, + "model_forward_time": 0.025259017944335938, + "step": 7010 + }, + { + "epoch": 1.06964111328125e-05, + "step": 7010, + "training_step_time": 0.10605144500732422 + }, + { + "epoch": 1.069793701171875e-05, + "model_forward_time": 0.02529311180114746, + "step": 7011 + }, + { + "epoch": 1.069793701171875e-05, + "step": 7011, + "training_step_time": 0.10574507713317871 + }, + { + "epoch": 1.0699462890625e-05, + "model_forward_time": 0.024935483932495117, + "step": 7012 + }, + { + "epoch": 1.0699462890625e-05, + "step": 7012, + "training_step_time": 0.11019563674926758 + }, + { + "epoch": 1.070098876953125e-05, + "model_forward_time": 0.024545669555664062, + "step": 7013 + }, + { + "epoch": 1.070098876953125e-05, + "step": 7013, + "training_step_time": 0.11034584045410156 + }, + { + "epoch": 1.07025146484375e-05, + "model_forward_time": 0.024191617965698242, + "step": 7014 + }, + { + "epoch": 1.07025146484375e-05, + "step": 7014, + "training_step_time": 0.10854029655456543 + }, + { + "epoch": 1.070404052734375e-05, + "model_forward_time": 0.02518010139465332, + "step": 7015 + }, + { + "epoch": 1.070404052734375e-05, + "step": 7015, + "training_step_time": 0.1059732437133789 + }, + { + "epoch": 1.070556640625e-05, + "model_forward_time": 0.024617910385131836, + "step": 7016 + }, + { + "epoch": 1.070556640625e-05, + "step": 7016, + "training_step_time": 0.12343764305114746 + }, + { + "epoch": 1.070709228515625e-05, + "model_forward_time": 0.024818897247314453, + "step": 7017 + }, + { + "epoch": 1.070709228515625e-05, + "step": 7017, + "training_step_time": 0.17769408226013184 + }, + { + "epoch": 1.07086181640625e-05, + "model_forward_time": 0.02481675148010254, + "step": 7018 + }, + { + "epoch": 1.07086181640625e-05, + "step": 7018, + "training_step_time": 0.1714038848876953 + }, + { + "epoch": 1.071014404296875e-05, + "model_forward_time": 0.027878761291503906, + "step": 7019 + }, + { + "epoch": 1.071014404296875e-05, + "step": 7019, + "training_step_time": 0.2144160270690918 + }, + { + "epoch": 1.0711669921875e-05, + "grad_norm": 0.5380091667175293, + "learning_rate": 9.102598026342222e-05, + "loss": 0.0731, + "step": 7020 + }, + { + "epoch": 1.0711669921875e-05, + "model_forward_time": 0.02478504180908203, + "step": 7020 + }, + { + "epoch": 1.0711669921875e-05, + "step": 7020, + "training_step_time": 0.12555789947509766 + }, + { + "epoch": 1.071319580078125e-05, + "model_forward_time": 0.025376558303833008, + "step": 7021 + }, + { + "epoch": 1.071319580078125e-05, + "step": 7021, + "training_step_time": 0.10780143737792969 + }, + { + "epoch": 1.07147216796875e-05, + "model_forward_time": 0.025652647018432617, + "step": 7022 + }, + { + "epoch": 1.07147216796875e-05, + "step": 7022, + "training_step_time": 0.12540078163146973 + }, + { + "epoch": 1.071624755859375e-05, + "model_forward_time": 0.025504112243652344, + "step": 7023 + }, + { + "epoch": 1.071624755859375e-05, + "step": 7023, + "training_step_time": 0.10656952857971191 + }, + { + "epoch": 1.07177734375e-05, + "model_forward_time": 0.02558159828186035, + "step": 7024 + }, + { + "epoch": 1.07177734375e-05, + "step": 7024, + "training_step_time": 0.10676693916320801 + }, + { + "epoch": 1.071929931640625e-05, + "model_forward_time": 0.025756359100341797, + "step": 7025 + }, + { + "epoch": 1.071929931640625e-05, + "step": 7025, + "training_step_time": 0.11246752738952637 + }, + { + "epoch": 1.07208251953125e-05, + "model_forward_time": 0.02526712417602539, + "step": 7026 + }, + { + "epoch": 1.07208251953125e-05, + "step": 7026, + "training_step_time": 0.11186075210571289 + }, + { + "epoch": 1.072235107421875e-05, + "model_forward_time": 0.025817394256591797, + "step": 7027 + }, + { + "epoch": 1.072235107421875e-05, + "step": 7027, + "training_step_time": 0.10851478576660156 + }, + { + "epoch": 1.0723876953125e-05, + "model_forward_time": 0.025493383407592773, + "step": 7028 + }, + { + "epoch": 1.0723876953125e-05, + "step": 7028, + "training_step_time": 0.11330008506774902 + }, + { + "epoch": 1.072540283203125e-05, + "model_forward_time": 0.02581787109375, + "step": 7029 + }, + { + "epoch": 1.072540283203125e-05, + "step": 7029, + "training_step_time": 0.10928082466125488 + }, + { + "epoch": 1.07269287109375e-05, + "grad_norm": 0.5369435548782349, + "learning_rate": 9.099445021972473e-05, + "loss": 0.0878, + "step": 7030 + }, + { + "epoch": 1.07269287109375e-05, + "model_forward_time": 0.025064706802368164, + "step": 7030 + }, + { + "epoch": 1.07269287109375e-05, + "step": 7030, + "training_step_time": 0.11098766326904297 + }, + { + "epoch": 1.072845458984375e-05, + "model_forward_time": 0.02506113052368164, + "step": 7031 + }, + { + "epoch": 1.072845458984375e-05, + "step": 7031, + "training_step_time": 0.10520696640014648 + }, + { + "epoch": 1.072998046875e-05, + "model_forward_time": 0.02485370635986328, + "step": 7032 + }, + { + "epoch": 1.072998046875e-05, + "step": 7032, + "training_step_time": 0.10779285430908203 + }, + { + "epoch": 1.073150634765625e-05, + "model_forward_time": 0.02497410774230957, + "step": 7033 + }, + { + "epoch": 1.073150634765625e-05, + "step": 7033, + "training_step_time": 0.10523414611816406 + }, + { + "epoch": 1.07330322265625e-05, + "model_forward_time": 0.025293588638305664, + "step": 7034 + }, + { + "epoch": 1.07330322265625e-05, + "step": 7034, + "training_step_time": 0.1077272891998291 + }, + { + "epoch": 1.073455810546875e-05, + "model_forward_time": 0.025935649871826172, + "step": 7035 + }, + { + "epoch": 1.073455810546875e-05, + "step": 7035, + "training_step_time": 0.11211967468261719 + }, + { + "epoch": 1.0736083984375e-05, + "model_forward_time": 0.025684595108032227, + "step": 7036 + }, + { + "epoch": 1.0736083984375e-05, + "step": 7036, + "training_step_time": 0.10788726806640625 + }, + { + "epoch": 1.073760986328125e-05, + "model_forward_time": 0.02520465850830078, + "step": 7037 + }, + { + "epoch": 1.073760986328125e-05, + "step": 7037, + "training_step_time": 0.10820150375366211 + }, + { + "epoch": 1.07391357421875e-05, + "model_forward_time": 0.025285959243774414, + "step": 7038 + }, + { + "epoch": 1.07391357421875e-05, + "step": 7038, + "training_step_time": 0.1331157684326172 + }, + { + "epoch": 1.074066162109375e-05, + "model_forward_time": 0.02513742446899414, + "step": 7039 + }, + { + "epoch": 1.074066162109375e-05, + "step": 7039, + "training_step_time": 0.12142276763916016 + }, + { + "epoch": 1.07421875e-05, + "grad_norm": 0.2710757851600647, + "learning_rate": 9.09628703639047e-05, + "loss": 0.0669, + "step": 7040 + }, + { + "epoch": 1.07421875e-05, + "model_forward_time": 0.0250089168548584, + "step": 7040 + }, + { + "epoch": 1.07421875e-05, + "step": 7040, + "training_step_time": 0.12996506690979004 + }, + { + "epoch": 1.074371337890625e-05, + "model_forward_time": 0.02472829818725586, + "step": 7041 + }, + { + "epoch": 1.074371337890625e-05, + "step": 7041, + "training_step_time": 0.15635967254638672 + }, + { + "epoch": 1.07452392578125e-05, + "model_forward_time": 0.024196863174438477, + "step": 7042 + }, + { + "epoch": 1.07452392578125e-05, + "step": 7042, + "training_step_time": 0.10655665397644043 + }, + { + "epoch": 1.074676513671875e-05, + "model_forward_time": 0.025248289108276367, + "step": 7043 + }, + { + "epoch": 1.074676513671875e-05, + "step": 7043, + "training_step_time": 0.1185302734375 + }, + { + "epoch": 1.0748291015625e-05, + "model_forward_time": 0.02496814727783203, + "step": 7044 + }, + { + "epoch": 1.0748291015625e-05, + "step": 7044, + "training_step_time": 0.12171816825866699 + }, + { + "epoch": 1.074981689453125e-05, + "model_forward_time": 0.027051448822021484, + "step": 7045 + }, + { + "epoch": 1.074981689453125e-05, + "step": 7045, + "training_step_time": 0.13951444625854492 + }, + { + "epoch": 1.07513427734375e-05, + "model_forward_time": 0.02491307258605957, + "step": 7046 + }, + { + "epoch": 1.07513427734375e-05, + "step": 7046, + "training_step_time": 0.11390924453735352 + }, + { + "epoch": 1.075286865234375e-05, + "model_forward_time": 0.025099992752075195, + "step": 7047 + }, + { + "epoch": 1.075286865234375e-05, + "step": 7047, + "training_step_time": 0.11038398742675781 + }, + { + "epoch": 1.075439453125e-05, + "model_forward_time": 0.024669647216796875, + "step": 7048 + }, + { + "epoch": 1.075439453125e-05, + "step": 7048, + "training_step_time": 0.11325502395629883 + }, + { + "epoch": 1.075592041015625e-05, + "model_forward_time": 0.02504706382751465, + "step": 7049 + }, + { + "epoch": 1.075592041015625e-05, + "step": 7049, + "training_step_time": 0.11198687553405762 + }, + { + "epoch": 1.07574462890625e-05, + "grad_norm": 0.49168747663497925, + "learning_rate": 9.093124073433463e-05, + "loss": 0.0635, + "step": 7050 + }, + { + "epoch": 1.07574462890625e-05, + "model_forward_time": 0.025922298431396484, + "step": 7050 + }, + { + "epoch": 1.07574462890625e-05, + "step": 7050, + "training_step_time": 0.19176888465881348 + }, + { + "epoch": 1.075897216796875e-05, + "model_forward_time": 0.02443718910217285, + "step": 7051 + }, + { + "epoch": 1.075897216796875e-05, + "step": 7051, + "training_step_time": 0.10614514350891113 + }, + { + "epoch": 1.0760498046875e-05, + "model_forward_time": 0.02424764633178711, + "step": 7052 + }, + { + "epoch": 1.0760498046875e-05, + "step": 7052, + "training_step_time": 0.10701131820678711 + }, + { + "epoch": 1.076202392578125e-05, + "model_forward_time": 0.02578139305114746, + "step": 7053 + }, + { + "epoch": 1.076202392578125e-05, + "step": 7053, + "training_step_time": 0.10794782638549805 + }, + { + "epoch": 1.07635498046875e-05, + "model_forward_time": 0.02535414695739746, + "step": 7054 + }, + { + "epoch": 1.07635498046875e-05, + "step": 7054, + "training_step_time": 0.10931730270385742 + }, + { + "epoch": 1.076507568359375e-05, + "model_forward_time": 0.025298595428466797, + "step": 7055 + }, + { + "epoch": 1.076507568359375e-05, + "step": 7055, + "training_step_time": 0.10750102996826172 + }, + { + "epoch": 1.07666015625e-05, + "model_forward_time": 0.02509927749633789, + "step": 7056 + }, + { + "epoch": 1.07666015625e-05, + "step": 7056, + "training_step_time": 0.11254096031188965 + }, + { + "epoch": 1.076812744140625e-05, + "model_forward_time": 0.02480292320251465, + "step": 7057 + }, + { + "epoch": 1.076812744140625e-05, + "step": 7057, + "training_step_time": 0.10697579383850098 + }, + { + "epoch": 1.07696533203125e-05, + "model_forward_time": 0.02809000015258789, + "step": 7058 + }, + { + "epoch": 1.07696533203125e-05, + "step": 7058, + "training_step_time": 0.1109776496887207 + }, + { + "epoch": 1.077117919921875e-05, + "model_forward_time": 0.02514171600341797, + "step": 7059 + }, + { + "epoch": 1.077117919921875e-05, + "step": 7059, + "training_step_time": 0.10888910293579102 + }, + { + "epoch": 1.0772705078125e-05, + "grad_norm": 0.30953270196914673, + "learning_rate": 9.089956136944751e-05, + "loss": 0.0626, + "step": 7060 + }, + { + "epoch": 1.0772705078125e-05, + "model_forward_time": 0.025087594985961914, + "step": 7060 + }, + { + "epoch": 1.0772705078125e-05, + "step": 7060, + "training_step_time": 0.10701370239257812 + }, + { + "epoch": 1.077423095703125e-05, + "model_forward_time": 0.025014638900756836, + "step": 7061 + }, + { + "epoch": 1.077423095703125e-05, + "step": 7061, + "training_step_time": 0.10825872421264648 + }, + { + "epoch": 1.07757568359375e-05, + "model_forward_time": 0.025869131088256836, + "step": 7062 + }, + { + "epoch": 1.07757568359375e-05, + "step": 7062, + "training_step_time": 0.10701322555541992 + }, + { + "epoch": 1.077728271484375e-05, + "model_forward_time": 0.02526402473449707, + "step": 7063 + }, + { + "epoch": 1.077728271484375e-05, + "step": 7063, + "training_step_time": 0.17486119270324707 + }, + { + "epoch": 1.077880859375e-05, + "model_forward_time": 0.02432727813720703, + "step": 7064 + }, + { + "epoch": 1.077880859375e-05, + "step": 7064, + "training_step_time": 0.1943204402923584 + }, + { + "epoch": 1.078033447265625e-05, + "model_forward_time": 0.024588823318481445, + "step": 7065 + }, + { + "epoch": 1.078033447265625e-05, + "step": 7065, + "training_step_time": 0.2124476432800293 + }, + { + "epoch": 1.07818603515625e-05, + "model_forward_time": 0.024348020553588867, + "step": 7066 + }, + { + "epoch": 1.07818603515625e-05, + "step": 7066, + "training_step_time": 0.1244359016418457 + }, + { + "epoch": 1.078338623046875e-05, + "model_forward_time": 0.02512812614440918, + "step": 7067 + }, + { + "epoch": 1.078338623046875e-05, + "step": 7067, + "training_step_time": 0.13160943984985352 + }, + { + "epoch": 1.0784912109375e-05, + "model_forward_time": 0.025207042694091797, + "step": 7068 + }, + { + "epoch": 1.0784912109375e-05, + "step": 7068, + "training_step_time": 0.10900139808654785 + }, + { + "epoch": 1.078643798828125e-05, + "model_forward_time": 0.025530338287353516, + "step": 7069 + }, + { + "epoch": 1.078643798828125e-05, + "step": 7069, + "training_step_time": 0.1302180290222168 + }, + { + "epoch": 1.07879638671875e-05, + "grad_norm": 0.5272168517112732, + "learning_rate": 9.086783230773672e-05, + "loss": 0.0733, + "step": 7070 + }, + { + "epoch": 1.07879638671875e-05, + "model_forward_time": 0.025542020797729492, + "step": 7070 + }, + { + "epoch": 1.07879638671875e-05, + "step": 7070, + "training_step_time": 0.10872650146484375 + }, + { + "epoch": 1.078948974609375e-05, + "model_forward_time": 0.025114774703979492, + "step": 7071 + }, + { + "epoch": 1.078948974609375e-05, + "step": 7071, + "training_step_time": 0.10934185981750488 + }, + { + "epoch": 1.0791015625e-05, + "model_forward_time": 0.025164127349853516, + "step": 7072 + }, + { + "epoch": 1.0791015625e-05, + "step": 7072, + "training_step_time": 0.11220622062683105 + }, + { + "epoch": 1.079254150390625e-05, + "model_forward_time": 0.02472090721130371, + "step": 7073 + }, + { + "epoch": 1.079254150390625e-05, + "step": 7073, + "training_step_time": 0.11298203468322754 + }, + { + "epoch": 1.07940673828125e-05, + "model_forward_time": 0.02496957778930664, + "step": 7074 + }, + { + "epoch": 1.07940673828125e-05, + "step": 7074, + "training_step_time": 0.1113898754119873 + }, + { + "epoch": 1.079559326171875e-05, + "model_forward_time": 0.025299787521362305, + "step": 7075 + }, + { + "epoch": 1.079559326171875e-05, + "step": 7075, + "training_step_time": 0.10976552963256836 + }, + { + "epoch": 1.0797119140625e-05, + "model_forward_time": 0.025315523147583008, + "step": 7076 + }, + { + "epoch": 1.0797119140625e-05, + "step": 7076, + "training_step_time": 0.1083989143371582 + }, + { + "epoch": 1.079864501953125e-05, + "model_forward_time": 0.02558302879333496, + "step": 7077 + }, + { + "epoch": 1.079864501953125e-05, + "step": 7077, + "training_step_time": 0.10982394218444824 + }, + { + "epoch": 1.08001708984375e-05, + "model_forward_time": 0.024932861328125, + "step": 7078 + }, + { + "epoch": 1.08001708984375e-05, + "step": 7078, + "training_step_time": 0.11085820198059082 + }, + { + "epoch": 1.080169677734375e-05, + "model_forward_time": 0.024733543395996094, + "step": 7079 + }, + { + "epoch": 1.080169677734375e-05, + "step": 7079, + "training_step_time": 0.10879302024841309 + }, + { + "epoch": 1.080322265625e-05, + "grad_norm": 0.26838913559913635, + "learning_rate": 9.083605358775612e-05, + "loss": 0.0949, + "step": 7080 + }, + { + "epoch": 1.080322265625e-05, + "model_forward_time": 0.025464534759521484, + "step": 7080 + }, + { + "epoch": 1.080322265625e-05, + "step": 7080, + "training_step_time": 0.11103987693786621 + }, + { + "epoch": 1.080474853515625e-05, + "model_forward_time": 0.02545452117919922, + "step": 7081 + }, + { + "epoch": 1.080474853515625e-05, + "step": 7081, + "training_step_time": 0.11041712760925293 + }, + { + "epoch": 1.08062744140625e-05, + "model_forward_time": 0.025461196899414062, + "step": 7082 + }, + { + "epoch": 1.08062744140625e-05, + "step": 7082, + "training_step_time": 0.10801887512207031 + }, + { + "epoch": 1.080780029296875e-05, + "model_forward_time": 0.02534961700439453, + "step": 7083 + }, + { + "epoch": 1.080780029296875e-05, + "step": 7083, + "training_step_time": 0.11166858673095703 + }, + { + "epoch": 1.0809326171875e-05, + "model_forward_time": 0.025315523147583008, + "step": 7084 + }, + { + "epoch": 1.0809326171875e-05, + "step": 7084, + "training_step_time": 0.22837352752685547 + }, + { + "epoch": 1.081085205078125e-05, + "model_forward_time": 0.024243831634521484, + "step": 7085 + }, + { + "epoch": 1.081085205078125e-05, + "step": 7085, + "training_step_time": 0.1143944263458252 + }, + { + "epoch": 1.08123779296875e-05, + "model_forward_time": 0.024402141571044922, + "step": 7086 + }, + { + "epoch": 1.08123779296875e-05, + "step": 7086, + "training_step_time": 0.13496732711791992 + }, + { + "epoch": 1.081390380859375e-05, + "model_forward_time": 0.025428295135498047, + "step": 7087 + }, + { + "epoch": 1.081390380859375e-05, + "step": 7087, + "training_step_time": 0.14142107963562012 + }, + { + "epoch": 1.08154296875e-05, + "model_forward_time": 0.02503490447998047, + "step": 7088 + }, + { + "epoch": 1.08154296875e-05, + "step": 7088, + "training_step_time": 0.1221470832824707 + }, + { + "epoch": 1.081695556640625e-05, + "model_forward_time": 0.02440476417541504, + "step": 7089 + }, + { + "epoch": 1.081695556640625e-05, + "step": 7089, + "training_step_time": 0.12145471572875977 + }, + { + "epoch": 1.08184814453125e-05, + "grad_norm": 0.45701801776885986, + "learning_rate": 9.080422524811982e-05, + "loss": 0.0807, + "step": 7090 + }, + { + "epoch": 1.08184814453125e-05, + "model_forward_time": 0.025293827056884766, + "step": 7090 + }, + { + "epoch": 1.08184814453125e-05, + "step": 7090, + "training_step_time": 0.11421799659729004 + }, + { + "epoch": 1.082000732421875e-05, + "model_forward_time": 0.025104522705078125, + "step": 7091 + }, + { + "epoch": 1.082000732421875e-05, + "step": 7091, + "training_step_time": 0.14574623107910156 + }, + { + "epoch": 1.0821533203125e-05, + "model_forward_time": 0.024726390838623047, + "step": 7092 + }, + { + "epoch": 1.0821533203125e-05, + "step": 7092, + "training_step_time": 0.11138057708740234 + }, + { + "epoch": 1.082305908203125e-05, + "model_forward_time": 0.024866580963134766, + "step": 7093 + }, + { + "epoch": 1.082305908203125e-05, + "step": 7093, + "training_step_time": 0.1107630729675293 + }, + { + "epoch": 1.08245849609375e-05, + "model_forward_time": 0.02533745765686035, + "step": 7094 + }, + { + "epoch": 1.08245849609375e-05, + "step": 7094, + "training_step_time": 0.11240482330322266 + }, + { + "epoch": 1.082611083984375e-05, + "model_forward_time": 0.025426387786865234, + "step": 7095 + }, + { + "epoch": 1.082611083984375e-05, + "step": 7095, + "training_step_time": 0.10952520370483398 + }, + { + "epoch": 1.082763671875e-05, + "model_forward_time": 0.025590896606445312, + "step": 7096 + }, + { + "epoch": 1.082763671875e-05, + "step": 7096, + "training_step_time": 0.19913673400878906 + }, + { + "epoch": 1.082916259765625e-05, + "model_forward_time": 0.02445077896118164, + "step": 7097 + }, + { + "epoch": 1.082916259765625e-05, + "step": 7097, + "training_step_time": 0.10300159454345703 + }, + { + "epoch": 1.08306884765625e-05, + "model_forward_time": 0.024631977081298828, + "step": 7098 + }, + { + "epoch": 1.08306884765625e-05, + "step": 7098, + "training_step_time": 0.10685181617736816 + }, + { + "epoch": 1.083221435546875e-05, + "model_forward_time": 0.025111913681030273, + "step": 7099 + }, + { + "epoch": 1.083221435546875e-05, + "step": 7099, + "training_step_time": 0.10706782341003418 + }, + { + "epoch": 1.0833740234375e-05, + "grad_norm": 0.531728208065033, + "learning_rate": 9.077234732750224e-05, + "loss": 0.0637, + "step": 7100 + }, + { + "epoch": 1.0833740234375e-05, + "model_forward_time": 0.025095224380493164, + "step": 7100 + }, + { + "epoch": 1.0833740234375e-05, + "step": 7100, + "training_step_time": 0.1746680736541748 + }, + { + "epoch": 1.083526611328125e-05, + "model_forward_time": 0.024361610412597656, + "step": 7101 + }, + { + "epoch": 1.083526611328125e-05, + "step": 7101, + "training_step_time": 0.19654178619384766 + }, + { + "epoch": 1.08367919921875e-05, + "model_forward_time": 0.024446487426757812, + "step": 7102 + }, + { + "epoch": 1.08367919921875e-05, + "step": 7102, + "training_step_time": 0.1746366024017334 + }, + { + "epoch": 1.083831787109375e-05, + "model_forward_time": 0.02370762825012207, + "step": 7103 + }, + { + "epoch": 1.083831787109375e-05, + "step": 7103, + "training_step_time": 0.1680285930633545 + }, + { + "epoch": 1.083984375e-05, + "model_forward_time": 0.024488449096679688, + "step": 7104 + }, + { + "epoch": 1.083984375e-05, + "step": 7104, + "training_step_time": 0.14678120613098145 + }, + { + "epoch": 1.084136962890625e-05, + "model_forward_time": 0.025556325912475586, + "step": 7105 + }, + { + "epoch": 1.084136962890625e-05, + "step": 7105, + "training_step_time": 0.10350632667541504 + }, + { + "epoch": 1.08428955078125e-05, + "model_forward_time": 0.02471780776977539, + "step": 7106 + }, + { + "epoch": 1.08428955078125e-05, + "step": 7106, + "training_step_time": 0.11081433296203613 + }, + { + "epoch": 1.084442138671875e-05, + "model_forward_time": 0.024974584579467773, + "step": 7107 + }, + { + "epoch": 1.084442138671875e-05, + "step": 7107, + "training_step_time": 0.10358548164367676 + }, + { + "epoch": 1.0845947265625e-05, + "model_forward_time": 0.02560710906982422, + "step": 7108 + }, + { + "epoch": 1.0845947265625e-05, + "step": 7108, + "training_step_time": 0.20617341995239258 + }, + { + "epoch": 1.084747314453125e-05, + "model_forward_time": 0.02498650550842285, + "step": 7109 + }, + { + "epoch": 1.084747314453125e-05, + "step": 7109, + "training_step_time": 0.1423053741455078 + }, + { + "epoch": 1.08489990234375e-05, + "grad_norm": 0.38871708512306213, + "learning_rate": 9.074041986463808e-05, + "loss": 0.0789, + "step": 7110 + }, + { + "epoch": 1.08489990234375e-05, + "model_forward_time": 0.024988174438476562, + "step": 7110 + }, + { + "epoch": 1.08489990234375e-05, + "step": 7110, + "training_step_time": 0.20393824577331543 + }, + { + "epoch": 1.085052490234375e-05, + "model_forward_time": 0.024208545684814453, + "step": 7111 + }, + { + "epoch": 1.085052490234375e-05, + "step": 7111, + "training_step_time": 0.12516546249389648 + }, + { + "epoch": 1.085205078125e-05, + "model_forward_time": 0.024608373641967773, + "step": 7112 + }, + { + "epoch": 1.085205078125e-05, + "step": 7112, + "training_step_time": 0.11145186424255371 + }, + { + "epoch": 1.085357666015625e-05, + "model_forward_time": 0.025238752365112305, + "step": 7113 + }, + { + "epoch": 1.085357666015625e-05, + "step": 7113, + "training_step_time": 0.12151646614074707 + }, + { + "epoch": 1.08551025390625e-05, + "model_forward_time": 0.025341272354125977, + "step": 7114 + }, + { + "epoch": 1.08551025390625e-05, + "step": 7114, + "training_step_time": 0.1088559627532959 + }, + { + "epoch": 1.085662841796875e-05, + "model_forward_time": 0.02471137046813965, + "step": 7115 + }, + { + "epoch": 1.085662841796875e-05, + "step": 7115, + "training_step_time": 0.10973572731018066 + }, + { + "epoch": 1.0858154296875e-05, + "model_forward_time": 0.025571584701538086, + "step": 7116 + }, + { + "epoch": 1.0858154296875e-05, + "step": 7116, + "training_step_time": 0.10637497901916504 + }, + { + "epoch": 1.085968017578125e-05, + "model_forward_time": 0.024418115615844727, + "step": 7117 + }, + { + "epoch": 1.085968017578125e-05, + "step": 7117, + "training_step_time": 0.10831713676452637 + }, + { + "epoch": 1.08612060546875e-05, + "model_forward_time": 0.02518916130065918, + "step": 7118 + }, + { + "epoch": 1.08612060546875e-05, + "step": 7118, + "training_step_time": 0.10754871368408203 + }, + { + "epoch": 1.086273193359375e-05, + "model_forward_time": 0.02527475357055664, + "step": 7119 + }, + { + "epoch": 1.086273193359375e-05, + "step": 7119, + "training_step_time": 0.10670089721679688 + }, + { + "epoch": 1.08642578125e-05, + "grad_norm": 0.33641284704208374, + "learning_rate": 9.070844289832224e-05, + "loss": 0.0542, + "step": 7120 + }, + { + "epoch": 1.08642578125e-05, + "model_forward_time": 0.024882793426513672, + "step": 7120 + }, + { + "epoch": 1.08642578125e-05, + "step": 7120, + "training_step_time": 0.10865974426269531 + }, + { + "epoch": 1.086578369140625e-05, + "model_forward_time": 0.02531719207763672, + "step": 7121 + }, + { + "epoch": 1.086578369140625e-05, + "step": 7121, + "training_step_time": 0.10607171058654785 + }, + { + "epoch": 1.08673095703125e-05, + "model_forward_time": 0.02509021759033203, + "step": 7122 + }, + { + "epoch": 1.08673095703125e-05, + "step": 7122, + "training_step_time": 0.10604357719421387 + }, + { + "epoch": 1.086883544921875e-05, + "model_forward_time": 0.028946399688720703, + "step": 7123 + }, + { + "epoch": 1.086883544921875e-05, + "step": 7123, + "training_step_time": 0.11289429664611816 + }, + { + "epoch": 1.0870361328125e-05, + "model_forward_time": 0.02551126480102539, + "step": 7124 + }, + { + "epoch": 1.0870361328125e-05, + "step": 7124, + "training_step_time": 0.11545443534851074 + }, + { + "epoch": 1.087188720703125e-05, + "model_forward_time": 0.024940013885498047, + "step": 7125 + }, + { + "epoch": 1.087188720703125e-05, + "step": 7125, + "training_step_time": 0.10780644416809082 + }, + { + "epoch": 1.08734130859375e-05, + "model_forward_time": 0.025427579879760742, + "step": 7126 + }, + { + "epoch": 1.08734130859375e-05, + "step": 7126, + "training_step_time": 0.1098470687866211 + }, + { + "epoch": 1.087493896484375e-05, + "model_forward_time": 0.025162458419799805, + "step": 7127 + }, + { + "epoch": 1.087493896484375e-05, + "step": 7127, + "training_step_time": 0.10780072212219238 + }, + { + "epoch": 1.087646484375e-05, + "model_forward_time": 0.025557994842529297, + "step": 7128 + }, + { + "epoch": 1.087646484375e-05, + "step": 7128, + "training_step_time": 0.15156078338623047 + }, + { + "epoch": 1.087799072265625e-05, + "model_forward_time": 0.0252382755279541, + "step": 7129 + }, + { + "epoch": 1.087799072265625e-05, + "step": 7129, + "training_step_time": 0.12155938148498535 + }, + { + "epoch": 1.08795166015625e-05, + "grad_norm": 0.6400363445281982, + "learning_rate": 9.067641646740968e-05, + "loss": 0.0668, + "step": 7130 + }, + { + "epoch": 1.08795166015625e-05, + "model_forward_time": 0.02494978904724121, + "step": 7130 + }, + { + "epoch": 1.08795166015625e-05, + "step": 7130, + "training_step_time": 0.10848093032836914 + }, + { + "epoch": 1.088104248046875e-05, + "model_forward_time": 0.02555108070373535, + "step": 7131 + }, + { + "epoch": 1.088104248046875e-05, + "step": 7131, + "training_step_time": 0.11040353775024414 + }, + { + "epoch": 1.0882568359375e-05, + "model_forward_time": 0.024983882904052734, + "step": 7132 + }, + { + "epoch": 1.0882568359375e-05, + "step": 7132, + "training_step_time": 0.10930585861206055 + }, + { + "epoch": 1.088409423828125e-05, + "model_forward_time": 0.025233030319213867, + "step": 7133 + }, + { + "epoch": 1.088409423828125e-05, + "step": 7133, + "training_step_time": 0.12082839012145996 + }, + { + "epoch": 1.08856201171875e-05, + "model_forward_time": 0.02501654624938965, + "step": 7134 + }, + { + "epoch": 1.08856201171875e-05, + "step": 7134, + "training_step_time": 0.1089637279510498 + }, + { + "epoch": 1.088714599609375e-05, + "model_forward_time": 0.025309085845947266, + "step": 7135 + }, + { + "epoch": 1.088714599609375e-05, + "step": 7135, + "training_step_time": 0.11163830757141113 + }, + { + "epoch": 1.0888671875e-05, + "model_forward_time": 0.024944543838500977, + "step": 7136 + }, + { + "epoch": 1.0888671875e-05, + "step": 7136, + "training_step_time": 0.1242983341217041 + }, + { + "epoch": 1.089019775390625e-05, + "model_forward_time": 0.025161027908325195, + "step": 7137 + }, + { + "epoch": 1.089019775390625e-05, + "step": 7137, + "training_step_time": 0.10615706443786621 + }, + { + "epoch": 1.08917236328125e-05, + "model_forward_time": 0.025165319442749023, + "step": 7138 + }, + { + "epoch": 1.08917236328125e-05, + "step": 7138, + "training_step_time": 0.13146185874938965 + }, + { + "epoch": 1.089324951171875e-05, + "model_forward_time": 0.0262148380279541, + "step": 7139 + }, + { + "epoch": 1.089324951171875e-05, + "step": 7139, + "training_step_time": 0.11148381233215332 + }, + { + "epoch": 1.0894775390625e-05, + "grad_norm": 0.36455848813056946, + "learning_rate": 9.064434061081562e-05, + "loss": 0.089, + "step": 7140 + }, + { + "epoch": 1.0894775390625e-05, + "model_forward_time": 0.025578737258911133, + "step": 7140 + }, + { + "epoch": 1.0894775390625e-05, + "step": 7140, + "training_step_time": 0.18718838691711426 + }, + { + "epoch": 1.089630126953125e-05, + "model_forward_time": 0.02448248863220215, + "step": 7141 + }, + { + "epoch": 1.089630126953125e-05, + "step": 7141, + "training_step_time": 0.11860108375549316 + }, + { + "epoch": 1.08978271484375e-05, + "model_forward_time": 0.02464127540588379, + "step": 7142 + }, + { + "epoch": 1.08978271484375e-05, + "step": 7142, + "training_step_time": 0.10653901100158691 + }, + { + "epoch": 1.089935302734375e-05, + "model_forward_time": 0.025600910186767578, + "step": 7143 + }, + { + "epoch": 1.089935302734375e-05, + "step": 7143, + "training_step_time": 0.10887455940246582 + }, + { + "epoch": 1.090087890625e-05, + "model_forward_time": 0.025521516799926758, + "step": 7144 + }, + { + "epoch": 1.090087890625e-05, + "step": 7144, + "training_step_time": 0.10848307609558105 + }, + { + "epoch": 1.090240478515625e-05, + "model_forward_time": 0.02540445327758789, + "step": 7145 + }, + { + "epoch": 1.090240478515625e-05, + "step": 7145, + "training_step_time": 0.1065664291381836 + }, + { + "epoch": 1.09039306640625e-05, + "model_forward_time": 0.02531719207763672, + "step": 7146 + }, + { + "epoch": 1.09039306640625e-05, + "step": 7146, + "training_step_time": 0.1082918643951416 + }, + { + "epoch": 1.090545654296875e-05, + "model_forward_time": 0.02496337890625, + "step": 7147 + }, + { + "epoch": 1.090545654296875e-05, + "step": 7147, + "training_step_time": 0.11219668388366699 + }, + { + "epoch": 1.0906982421875e-05, + "model_forward_time": 0.025003671646118164, + "step": 7148 + }, + { + "epoch": 1.0906982421875e-05, + "step": 7148, + "training_step_time": 0.10953950881958008 + }, + { + "epoch": 1.090850830078125e-05, + "model_forward_time": 0.025734424591064453, + "step": 7149 + }, + { + "epoch": 1.090850830078125e-05, + "step": 7149, + "training_step_time": 0.10737252235412598 + }, + { + "epoch": 1.09100341796875e-05, + "grad_norm": 0.3468288481235504, + "learning_rate": 9.061221536751517e-05, + "loss": 0.0823, + "step": 7150 + }, + { + "epoch": 1.09100341796875e-05, + "model_forward_time": 0.02486252784729004, + "step": 7150 + }, + { + "epoch": 1.09100341796875e-05, + "step": 7150, + "training_step_time": 0.10548138618469238 + }, + { + "epoch": 1.091156005859375e-05, + "model_forward_time": 0.02509617805480957, + "step": 7151 + }, + { + "epoch": 1.091156005859375e-05, + "step": 7151, + "training_step_time": 0.1064445972442627 + }, + { + "epoch": 1.09130859375e-05, + "model_forward_time": 0.02535843849182129, + "step": 7152 + }, + { + "epoch": 1.09130859375e-05, + "step": 7152, + "training_step_time": 0.10901260375976562 + }, + { + "epoch": 1.091461181640625e-05, + "model_forward_time": 0.02548670768737793, + "step": 7153 + }, + { + "epoch": 1.091461181640625e-05, + "step": 7153, + "training_step_time": 0.1082603931427002 + }, + { + "epoch": 1.09161376953125e-05, + "model_forward_time": 0.025157928466796875, + "step": 7154 + }, + { + "epoch": 1.09161376953125e-05, + "step": 7154, + "training_step_time": 0.10576987266540527 + }, + { + "epoch": 1.091766357421875e-05, + "model_forward_time": 0.028718233108520508, + "step": 7155 + }, + { + "epoch": 1.091766357421875e-05, + "step": 7155, + "training_step_time": 0.11563873291015625 + }, + { + "epoch": 1.0919189453125e-05, + "model_forward_time": 0.025705575942993164, + "step": 7156 + }, + { + "epoch": 1.0919189453125e-05, + "step": 7156, + "training_step_time": 0.10599136352539062 + }, + { + "epoch": 1.092071533203125e-05, + "model_forward_time": 0.025582313537597656, + "step": 7157 + }, + { + "epoch": 1.092071533203125e-05, + "step": 7157, + "training_step_time": 0.12227606773376465 + }, + { + "epoch": 1.09222412109375e-05, + "model_forward_time": 0.02516913414001465, + "step": 7158 + }, + { + "epoch": 1.09222412109375e-05, + "step": 7158, + "training_step_time": 0.20772910118103027 + }, + { + "epoch": 1.092376708984375e-05, + "model_forward_time": 0.025328397750854492, + "step": 7159 + }, + { + "epoch": 1.092376708984375e-05, + "step": 7159, + "training_step_time": 0.12943530082702637 + }, + { + "epoch": 1.092529296875e-05, + "grad_norm": 0.4154020845890045, + "learning_rate": 9.058004077654359e-05, + "loss": 0.0731, + "step": 7160 + }, + { + "epoch": 1.092529296875e-05, + "model_forward_time": 0.024350404739379883, + "step": 7160 + }, + { + "epoch": 1.092529296875e-05, + "step": 7160, + "training_step_time": 0.12670683860778809 + }, + { + "epoch": 1.092681884765625e-05, + "model_forward_time": 0.024982213973999023, + "step": 7161 + }, + { + "epoch": 1.092681884765625e-05, + "step": 7161, + "training_step_time": 0.1072075366973877 + }, + { + "epoch": 1.09283447265625e-05, + "model_forward_time": 0.026461362838745117, + "step": 7162 + }, + { + "epoch": 1.09283447265625e-05, + "step": 7162, + "training_step_time": 0.12483382225036621 + }, + { + "epoch": 1.092987060546875e-05, + "model_forward_time": 0.025067806243896484, + "step": 7163 + }, + { + "epoch": 1.092987060546875e-05, + "step": 7163, + "training_step_time": 0.10476303100585938 + }, + { + "epoch": 1.0931396484375e-05, + "model_forward_time": 0.025358200073242188, + "step": 7164 + }, + { + "epoch": 1.0931396484375e-05, + "step": 7164, + "training_step_time": 0.104888916015625 + }, + { + "epoch": 1.093292236328125e-05, + "model_forward_time": 0.02443671226501465, + "step": 7165 + }, + { + "epoch": 1.093292236328125e-05, + "step": 7165, + "training_step_time": 0.10396409034729004 + }, + { + "epoch": 1.09344482421875e-05, + "model_forward_time": 0.02460932731628418, + "step": 7166 + }, + { + "epoch": 1.09344482421875e-05, + "step": 7166, + "training_step_time": 0.10518002510070801 + }, + { + "epoch": 1.093597412109375e-05, + "model_forward_time": 0.025338411331176758, + "step": 7167 + }, + { + "epoch": 1.093597412109375e-05, + "step": 7167, + "training_step_time": 0.10565471649169922 + }, + { + "epoch": 1.09375e-05, + "model_forward_time": 0.025322914123535156, + "step": 7168 + }, + { + "epoch": 1.09375e-05, + "step": 7168, + "training_step_time": 0.10481095314025879 + }, + { + "epoch": 1.093902587890625e-05, + "model_forward_time": 0.0251772403717041, + "step": 7169 + }, + { + "epoch": 1.093902587890625e-05, + "step": 7169, + "training_step_time": 0.10966682434082031 + }, + { + "epoch": 1.09405517578125e-05, + "grad_norm": 0.46047261357307434, + "learning_rate": 9.0547816876996e-05, + "loss": 0.0658, + "step": 7170 + }, + { + "epoch": 1.09405517578125e-05, + "model_forward_time": 0.024871110916137695, + "step": 7170 + }, + { + "epoch": 1.09405517578125e-05, + "step": 7170, + "training_step_time": 0.1083979606628418 + }, + { + "epoch": 1.094207763671875e-05, + "model_forward_time": 0.02695012092590332, + "step": 7171 + }, + { + "epoch": 1.094207763671875e-05, + "step": 7171, + "training_step_time": 0.11578702926635742 + }, + { + "epoch": 1.0943603515625e-05, + "model_forward_time": 0.02516651153564453, + "step": 7172 + }, + { + "epoch": 1.0943603515625e-05, + "step": 7172, + "training_step_time": 0.11121439933776855 + }, + { + "epoch": 1.094512939453125e-05, + "model_forward_time": 0.025144338607788086, + "step": 7173 + }, + { + "epoch": 1.094512939453125e-05, + "step": 7173, + "training_step_time": 0.13921475410461426 + }, + { + "epoch": 1.09466552734375e-05, + "model_forward_time": 0.024189233779907227, + "step": 7174 + }, + { + "epoch": 1.09466552734375e-05, + "step": 7174, + "training_step_time": 0.1520841121673584 + }, + { + "epoch": 1.094818115234375e-05, + "model_forward_time": 0.02350020408630371, + "step": 7175 + }, + { + "epoch": 1.094818115234375e-05, + "step": 7175, + "training_step_time": 0.13956379890441895 + }, + { + "epoch": 1.094970703125e-05, + "model_forward_time": 0.02353835105895996, + "step": 7176 + }, + { + "epoch": 1.094970703125e-05, + "step": 7176, + "training_step_time": 0.1563112735748291 + }, + { + "epoch": 1.095123291015625e-05, + "model_forward_time": 0.023143768310546875, + "step": 7177 + }, + { + "epoch": 1.095123291015625e-05, + "step": 7177, + "training_step_time": 0.18925261497497559 + }, + { + "epoch": 1.09527587890625e-05, + "model_forward_time": 0.024778366088867188, + "step": 7178 + }, + { + "epoch": 1.09527587890625e-05, + "step": 7178, + "training_step_time": 0.12831521034240723 + }, + { + "epoch": 1.095428466796875e-05, + "model_forward_time": 0.024120569229125977, + "step": 7179 + }, + { + "epoch": 1.095428466796875e-05, + "step": 7179, + "training_step_time": 0.21085309982299805 + }, + { + "epoch": 1.0955810546875e-05, + "grad_norm": 0.38073450326919556, + "learning_rate": 9.05155437080275e-05, + "loss": 0.0638, + "step": 7180 + }, + { + "epoch": 1.0955810546875e-05, + "model_forward_time": 0.024722814559936523, + "step": 7180 + }, + { + "epoch": 1.0955810546875e-05, + "step": 7180, + "training_step_time": 0.16522479057312012 + }, + { + "epoch": 1.095733642578125e-05, + "model_forward_time": 0.024158954620361328, + "step": 7181 + }, + { + "epoch": 1.095733642578125e-05, + "step": 7181, + "training_step_time": 0.22240757942199707 + }, + { + "epoch": 1.09588623046875e-05, + "model_forward_time": 0.02434706687927246, + "step": 7182 + }, + { + "epoch": 1.09588623046875e-05, + "step": 7182, + "training_step_time": 0.10975885391235352 + }, + { + "epoch": 1.096038818359375e-05, + "model_forward_time": 0.02623581886291504, + "step": 7183 + }, + { + "epoch": 1.096038818359375e-05, + "step": 7183, + "training_step_time": 0.11231613159179688 + }, + { + "epoch": 1.09619140625e-05, + "model_forward_time": 0.02514505386352539, + "step": 7184 + }, + { + "epoch": 1.09619140625e-05, + "step": 7184, + "training_step_time": 0.11184501647949219 + }, + { + "epoch": 1.096343994140625e-05, + "model_forward_time": 0.02528095245361328, + "step": 7185 + }, + { + "epoch": 1.096343994140625e-05, + "step": 7185, + "training_step_time": 0.11185789108276367 + }, + { + "epoch": 1.09649658203125e-05, + "model_forward_time": 0.025191068649291992, + "step": 7186 + }, + { + "epoch": 1.09649658203125e-05, + "step": 7186, + "training_step_time": 0.19948053359985352 + }, + { + "epoch": 1.096649169921875e-05, + "model_forward_time": 0.024756193161010742, + "step": 7187 + }, + { + "epoch": 1.096649169921875e-05, + "step": 7187, + "training_step_time": 0.10447001457214355 + }, + { + "epoch": 1.0968017578125e-05, + "model_forward_time": 0.02440953254699707, + "step": 7188 + }, + { + "epoch": 1.0968017578125e-05, + "step": 7188, + "training_step_time": 0.10707473754882812 + }, + { + "epoch": 1.096954345703125e-05, + "model_forward_time": 0.025114059448242188, + "step": 7189 + }, + { + "epoch": 1.096954345703125e-05, + "step": 7189, + "training_step_time": 0.11580729484558105 + }, + { + "epoch": 1.09710693359375e-05, + "grad_norm": 0.34728512167930603, + "learning_rate": 9.048322130885305e-05, + "loss": 0.0823, + "step": 7190 + }, + { + "epoch": 1.09710693359375e-05, + "model_forward_time": 0.02399611473083496, + "step": 7190 + }, + { + "epoch": 1.09710693359375e-05, + "step": 7190, + "training_step_time": 0.10837864875793457 + }, + { + "epoch": 1.097259521484375e-05, + "model_forward_time": 0.024109601974487305, + "step": 7191 + }, + { + "epoch": 1.097259521484375e-05, + "step": 7191, + "training_step_time": 0.116119384765625 + }, + { + "epoch": 1.097412109375e-05, + "model_forward_time": 0.024892568588256836, + "step": 7192 + }, + { + "epoch": 1.097412109375e-05, + "step": 7192, + "training_step_time": 0.1058342456817627 + }, + { + "epoch": 1.097564697265625e-05, + "model_forward_time": 0.024745702743530273, + "step": 7193 + }, + { + "epoch": 1.097564697265625e-05, + "step": 7193, + "training_step_time": 0.1080021858215332 + }, + { + "epoch": 1.09771728515625e-05, + "model_forward_time": 0.025022506713867188, + "step": 7194 + }, + { + "epoch": 1.09771728515625e-05, + "step": 7194, + "training_step_time": 0.11544251441955566 + }, + { + "epoch": 1.097869873046875e-05, + "model_forward_time": 0.0252535343170166, + "step": 7195 + }, + { + "epoch": 1.097869873046875e-05, + "step": 7195, + "training_step_time": 0.1085367202758789 + }, + { + "epoch": 1.0980224609375e-05, + "model_forward_time": 0.025187969207763672, + "step": 7196 + }, + { + "epoch": 1.0980224609375e-05, + "step": 7196, + "training_step_time": 0.10837101936340332 + }, + { + "epoch": 1.098175048828125e-05, + "model_forward_time": 0.025034666061401367, + "step": 7197 + }, + { + "epoch": 1.098175048828125e-05, + "step": 7197, + "training_step_time": 0.10655403137207031 + }, + { + "epoch": 1.09832763671875e-05, + "model_forward_time": 0.02567267417907715, + "step": 7198 + }, + { + "epoch": 1.09832763671875e-05, + "step": 7198, + "training_step_time": 0.10844230651855469 + }, + { + "epoch": 1.098480224609375e-05, + "model_forward_time": 0.028069257736206055, + "step": 7199 + }, + { + "epoch": 1.098480224609375e-05, + "step": 7199, + "training_step_time": 0.1746354103088379 + }, + { + "epoch": 1.0986328125e-05, + "grad_norm": 0.4247555732727051, + "learning_rate": 9.045084971874738e-05, + "loss": 0.0616, + "step": 7200 + }, + { + "epoch": 1.0986328125e-05, + "model_forward_time": 0.024900436401367188, + "step": 7200 + }, + { + "epoch": 1.0986328125e-05, + "step": 7200, + "training_step_time": 0.1531538963317871 + }, + { + "epoch": 1.098785400390625e-05, + "model_forward_time": 0.024562835693359375, + "step": 7201 + }, + { + "epoch": 1.098785400390625e-05, + "step": 7201, + "training_step_time": 0.16017651557922363 + }, + { + "epoch": 1.09893798828125e-05, + "model_forward_time": 0.024556398391723633, + "step": 7202 + }, + { + "epoch": 1.09893798828125e-05, + "step": 7202, + "training_step_time": 0.17667675018310547 + }, + { + "epoch": 1.099090576171875e-05, + "model_forward_time": 0.02463364601135254, + "step": 7203 + }, + { + "epoch": 1.099090576171875e-05, + "step": 7203, + "training_step_time": 0.1712172031402588 + }, + { + "epoch": 1.0992431640625e-05, + "model_forward_time": 0.025115966796875, + "step": 7204 + }, + { + "epoch": 1.0992431640625e-05, + "step": 7204, + "training_step_time": 0.1172933578491211 + }, + { + "epoch": 1.099395751953125e-05, + "model_forward_time": 0.02686285972595215, + "step": 7205 + }, + { + "epoch": 1.099395751953125e-05, + "step": 7205, + "training_step_time": 0.11657524108886719 + }, + { + "epoch": 1.09954833984375e-05, + "model_forward_time": 0.025010108947753906, + "step": 7206 + }, + { + "epoch": 1.09954833984375e-05, + "step": 7206, + "training_step_time": 0.11490726470947266 + }, + { + "epoch": 1.099700927734375e-05, + "model_forward_time": 0.025209665298461914, + "step": 7207 + }, + { + "epoch": 1.099700927734375e-05, + "step": 7207, + "training_step_time": 0.1080636978149414 + }, + { + "epoch": 1.099853515625e-05, + "model_forward_time": 0.025360584259033203, + "step": 7208 + }, + { + "epoch": 1.099853515625e-05, + "step": 7208, + "training_step_time": 0.10703182220458984 + }, + { + "epoch": 1.100006103515625e-05, + "model_forward_time": 0.025326251983642578, + "step": 7209 + }, + { + "epoch": 1.100006103515625e-05, + "step": 7209, + "training_step_time": 0.10778617858886719 + }, + { + "epoch": 1.10015869140625e-05, + "grad_norm": 0.3541938364505768, + "learning_rate": 9.041842897704502e-05, + "loss": 0.0654, + "step": 7210 + }, + { + "epoch": 1.10015869140625e-05, + "model_forward_time": 0.024917125701904297, + "step": 7210 + }, + { + "epoch": 1.10015869140625e-05, + "step": 7210, + "training_step_time": 0.11451578140258789 + }, + { + "epoch": 1.100311279296875e-05, + "model_forward_time": 0.025231122970581055, + "step": 7211 + }, + { + "epoch": 1.100311279296875e-05, + "step": 7211, + "training_step_time": 0.17651700973510742 + }, + { + "epoch": 1.1004638671875e-05, + "model_forward_time": 0.024654865264892578, + "step": 7212 + }, + { + "epoch": 1.1004638671875e-05, + "step": 7212, + "training_step_time": 0.1771717071533203 + }, + { + "epoch": 1.100616455078125e-05, + "model_forward_time": 0.02397465705871582, + "step": 7213 + }, + { + "epoch": 1.100616455078125e-05, + "step": 7213, + "training_step_time": 0.18460941314697266 + }, + { + "epoch": 1.10076904296875e-05, + "model_forward_time": 0.024483680725097656, + "step": 7214 + }, + { + "epoch": 1.10076904296875e-05, + "step": 7214, + "training_step_time": 0.18198108673095703 + }, + { + "epoch": 1.100921630859375e-05, + "model_forward_time": 0.023537635803222656, + "step": 7215 + }, + { + "epoch": 1.100921630859375e-05, + "step": 7215, + "training_step_time": 0.15772032737731934 + }, + { + "epoch": 1.10107421875e-05, + "model_forward_time": 0.02441263198852539, + "step": 7216 + }, + { + "epoch": 1.10107421875e-05, + "step": 7216, + "training_step_time": 0.14050745964050293 + }, + { + "epoch": 1.101226806640625e-05, + "model_forward_time": 0.024580001831054688, + "step": 7217 + }, + { + "epoch": 1.101226806640625e-05, + "step": 7217, + "training_step_time": 0.14451980590820312 + }, + { + "epoch": 1.10137939453125e-05, + "model_forward_time": 0.024553537368774414, + "step": 7218 + }, + { + "epoch": 1.10137939453125e-05, + "step": 7218, + "training_step_time": 0.12962627410888672 + }, + { + "epoch": 1.101531982421875e-05, + "model_forward_time": 0.024059295654296875, + "step": 7219 + }, + { + "epoch": 1.101531982421875e-05, + "step": 7219, + "training_step_time": 0.18365931510925293 + }, + { + "epoch": 1.1016845703125e-05, + "grad_norm": 0.3829857110977173, + "learning_rate": 9.038595912314027e-05, + "loss": 0.0736, + "step": 7220 + }, + { + "epoch": 1.1016845703125e-05, + "model_forward_time": 0.024135589599609375, + "step": 7220 + }, + { + "epoch": 1.1016845703125e-05, + "step": 7220, + "training_step_time": 0.11563467979431152 + }, + { + "epoch": 1.101837158203125e-05, + "model_forward_time": 0.024232864379882812, + "step": 7221 + }, + { + "epoch": 1.101837158203125e-05, + "step": 7221, + "training_step_time": 0.11539173126220703 + }, + { + "epoch": 1.10198974609375e-05, + "model_forward_time": 0.02488255500793457, + "step": 7222 + }, + { + "epoch": 1.10198974609375e-05, + "step": 7222, + "training_step_time": 0.18532681465148926 + }, + { + "epoch": 1.102142333984375e-05, + "model_forward_time": 0.024862289428710938, + "step": 7223 + }, + { + "epoch": 1.102142333984375e-05, + "step": 7223, + "training_step_time": 0.11067676544189453 + }, + { + "epoch": 1.102294921875e-05, + "model_forward_time": 0.024369001388549805, + "step": 7224 + }, + { + "epoch": 1.102294921875e-05, + "step": 7224, + "training_step_time": 0.12307024002075195 + }, + { + "epoch": 1.102447509765625e-05, + "model_forward_time": 0.024908781051635742, + "step": 7225 + }, + { + "epoch": 1.102447509765625e-05, + "step": 7225, + "training_step_time": 0.11423397064208984 + }, + { + "epoch": 1.10260009765625e-05, + "model_forward_time": 0.025310516357421875, + "step": 7226 + }, + { + "epoch": 1.10260009765625e-05, + "step": 7226, + "training_step_time": 0.12051892280578613 + }, + { + "epoch": 1.102752685546875e-05, + "model_forward_time": 0.025153398513793945, + "step": 7227 + }, + { + "epoch": 1.102752685546875e-05, + "step": 7227, + "training_step_time": 0.12036991119384766 + }, + { + "epoch": 1.1029052734375e-05, + "model_forward_time": 0.025138139724731445, + "step": 7228 + }, + { + "epoch": 1.1029052734375e-05, + "step": 7228, + "training_step_time": 0.10861778259277344 + }, + { + "epoch": 1.103057861328125e-05, + "model_forward_time": 0.025153398513793945, + "step": 7229 + }, + { + "epoch": 1.103057861328125e-05, + "step": 7229, + "training_step_time": 0.1465158462524414 + }, + { + "epoch": 1.10321044921875e-05, + "grad_norm": 0.3377252221107483, + "learning_rate": 9.035344019648702e-05, + "loss": 0.0636, + "step": 7230 + }, + { + "epoch": 1.10321044921875e-05, + "model_forward_time": 0.024282217025756836, + "step": 7230 + }, + { + "epoch": 1.10321044921875e-05, + "step": 7230, + "training_step_time": 0.11136412620544434 + }, + { + "epoch": 1.103363037109375e-05, + "model_forward_time": 0.02481389045715332, + "step": 7231 + }, + { + "epoch": 1.103363037109375e-05, + "step": 7231, + "training_step_time": 0.10992431640625 + }, + { + "epoch": 1.103515625e-05, + "model_forward_time": 0.02512955665588379, + "step": 7232 + }, + { + "epoch": 1.103515625e-05, + "step": 7232, + "training_step_time": 0.1082766056060791 + }, + { + "epoch": 1.103668212890625e-05, + "model_forward_time": 0.025579452514648438, + "step": 7233 + }, + { + "epoch": 1.103668212890625e-05, + "step": 7233, + "training_step_time": 0.10912609100341797 + }, + { + "epoch": 1.10382080078125e-05, + "model_forward_time": 0.024631023406982422, + "step": 7234 + }, + { + "epoch": 1.10382080078125e-05, + "step": 7234, + "training_step_time": 0.10748863220214844 + }, + { + "epoch": 1.103973388671875e-05, + "model_forward_time": 0.025499343872070312, + "step": 7235 + }, + { + "epoch": 1.103973388671875e-05, + "step": 7235, + "training_step_time": 0.10778069496154785 + }, + { + "epoch": 1.1041259765625e-05, + "model_forward_time": 0.025264739990234375, + "step": 7236 + }, + { + "epoch": 1.1041259765625e-05, + "step": 7236, + "training_step_time": 0.10705137252807617 + }, + { + "epoch": 1.104278564453125e-05, + "model_forward_time": 0.025964975357055664, + "step": 7237 + }, + { + "epoch": 1.104278564453125e-05, + "step": 7237, + "training_step_time": 0.10864996910095215 + }, + { + "epoch": 1.10443115234375e-05, + "model_forward_time": 0.02418231964111328, + "step": 7238 + }, + { + "epoch": 1.10443115234375e-05, + "step": 7238, + "training_step_time": 0.10707879066467285 + }, + { + "epoch": 1.104583740234375e-05, + "model_forward_time": 0.025104522705078125, + "step": 7239 + }, + { + "epoch": 1.104583740234375e-05, + "step": 7239, + "training_step_time": 0.1099095344543457 + }, + { + "epoch": 1.104736328125e-05, + "grad_norm": 0.3963189423084259, + "learning_rate": 9.032087223659885e-05, + "loss": 0.0695, + "step": 7240 + }, + { + "epoch": 1.104736328125e-05, + "model_forward_time": 0.025391101837158203, + "step": 7240 + }, + { + "epoch": 1.104736328125e-05, + "step": 7240, + "training_step_time": 0.10815811157226562 + }, + { + "epoch": 1.104888916015625e-05, + "model_forward_time": 0.02583026885986328, + "step": 7241 + }, + { + "epoch": 1.104888916015625e-05, + "step": 7241, + "training_step_time": 0.10962152481079102 + }, + { + "epoch": 1.10504150390625e-05, + "model_forward_time": 0.025299787521362305, + "step": 7242 + }, + { + "epoch": 1.10504150390625e-05, + "step": 7242, + "training_step_time": 0.17074108123779297 + }, + { + "epoch": 1.105194091796875e-05, + "model_forward_time": 0.02467966079711914, + "step": 7243 + }, + { + "epoch": 1.105194091796875e-05, + "step": 7243, + "training_step_time": 0.16531896591186523 + }, + { + "epoch": 1.1053466796875e-05, + "model_forward_time": 0.024398326873779297, + "step": 7244 + }, + { + "epoch": 1.1053466796875e-05, + "step": 7244, + "training_step_time": 0.1087045669555664 + }, + { + "epoch": 1.105499267578125e-05, + "model_forward_time": 0.02461719512939453, + "step": 7245 + }, + { + "epoch": 1.105499267578125e-05, + "step": 7245, + "training_step_time": 0.1721200942993164 + }, + { + "epoch": 1.10565185546875e-05, + "model_forward_time": 0.024678468704223633, + "step": 7246 + }, + { + "epoch": 1.10565185546875e-05, + "step": 7246, + "training_step_time": 0.18065905570983887 + }, + { + "epoch": 1.105804443359375e-05, + "model_forward_time": 0.024240493774414062, + "step": 7247 + }, + { + "epoch": 1.105804443359375e-05, + "step": 7247, + "training_step_time": 0.14234304428100586 + }, + { + "epoch": 1.10595703125e-05, + "model_forward_time": 0.024916648864746094, + "step": 7248 + }, + { + "epoch": 1.10595703125e-05, + "step": 7248, + "training_step_time": 0.11143255233764648 + }, + { + "epoch": 1.106109619140625e-05, + "model_forward_time": 0.025053024291992188, + "step": 7249 + }, + { + "epoch": 1.106109619140625e-05, + "step": 7249, + "training_step_time": 0.11568427085876465 + }, + { + "epoch": 1.10626220703125e-05, + "grad_norm": 0.4576609134674072, + "learning_rate": 9.028825528304892e-05, + "loss": 0.0818, + "step": 7250 + }, + { + "epoch": 1.10626220703125e-05, + "model_forward_time": 0.024919748306274414, + "step": 7250 + }, + { + "epoch": 1.10626220703125e-05, + "step": 7250, + "training_step_time": 0.11223173141479492 + }, + { + "epoch": 1.106414794921875e-05, + "model_forward_time": 0.02493453025817871, + "step": 7251 + }, + { + "epoch": 1.106414794921875e-05, + "step": 7251, + "training_step_time": 0.10937166213989258 + }, + { + "epoch": 1.1065673828125e-05, + "model_forward_time": 0.024809837341308594, + "step": 7252 + }, + { + "epoch": 1.1065673828125e-05, + "step": 7252, + "training_step_time": 0.10766124725341797 + }, + { + "epoch": 1.106719970703125e-05, + "model_forward_time": 0.025313138961791992, + "step": 7253 + }, + { + "epoch": 1.106719970703125e-05, + "step": 7253, + "training_step_time": 0.10795712471008301 + }, + { + "epoch": 1.10687255859375e-05, + "model_forward_time": 0.025242328643798828, + "step": 7254 + }, + { + "epoch": 1.10687255859375e-05, + "step": 7254, + "training_step_time": 0.10759353637695312 + }, + { + "epoch": 1.107025146484375e-05, + "model_forward_time": 0.02611231803894043, + "step": 7255 + }, + { + "epoch": 1.107025146484375e-05, + "step": 7255, + "training_step_time": 0.10763049125671387 + }, + { + "epoch": 1.107177734375e-05, + "model_forward_time": 0.02528858184814453, + "step": 7256 + }, + { + "epoch": 1.107177734375e-05, + "step": 7256, + "training_step_time": 0.1089181900024414 + }, + { + "epoch": 1.107330322265625e-05, + "model_forward_time": 0.025150537490844727, + "step": 7257 + }, + { + "epoch": 1.107330322265625e-05, + "step": 7257, + "training_step_time": 0.1080312728881836 + }, + { + "epoch": 1.10748291015625e-05, + "model_forward_time": 0.025082826614379883, + "step": 7258 + }, + { + "epoch": 1.10748291015625e-05, + "step": 7258, + "training_step_time": 0.10687136650085449 + }, + { + "epoch": 1.107635498046875e-05, + "model_forward_time": 0.0253293514251709, + "step": 7259 + }, + { + "epoch": 1.107635498046875e-05, + "step": 7259, + "training_step_time": 0.10645031929016113 + }, + { + "epoch": 1.1077880859375e-05, + "grad_norm": 0.47237181663513184, + "learning_rate": 9.025558937546988e-05, + "loss": 0.0551, + "step": 7260 + }, + { + "epoch": 1.1077880859375e-05, + "model_forward_time": 0.0248565673828125, + "step": 7260 + }, + { + "epoch": 1.1077880859375e-05, + "step": 7260, + "training_step_time": 0.10957765579223633 + }, + { + "epoch": 1.107940673828125e-05, + "model_forward_time": 0.024677753448486328, + "step": 7261 + }, + { + "epoch": 1.107940673828125e-05, + "step": 7261, + "training_step_time": 0.10764074325561523 + }, + { + "epoch": 1.10809326171875e-05, + "model_forward_time": 0.025378704071044922, + "step": 7262 + }, + { + "epoch": 1.10809326171875e-05, + "step": 7262, + "training_step_time": 0.11346673965454102 + }, + { + "epoch": 1.108245849609375e-05, + "model_forward_time": 0.024579286575317383, + "step": 7263 + }, + { + "epoch": 1.108245849609375e-05, + "step": 7263, + "training_step_time": 0.16894245147705078 + }, + { + "epoch": 1.1083984375e-05, + "model_forward_time": 0.02456212043762207, + "step": 7264 + }, + { + "epoch": 1.1083984375e-05, + "step": 7264, + "training_step_time": 0.10973334312438965 + }, + { + "epoch": 1.108551025390625e-05, + "model_forward_time": 0.024558544158935547, + "step": 7265 + }, + { + "epoch": 1.108551025390625e-05, + "step": 7265, + "training_step_time": 0.21234774589538574 + }, + { + "epoch": 1.10870361328125e-05, + "model_forward_time": 0.023995637893676758, + "step": 7266 + }, + { + "epoch": 1.10870361328125e-05, + "step": 7266, + "training_step_time": 0.1076805591583252 + }, + { + "epoch": 1.108856201171875e-05, + "model_forward_time": 0.024391651153564453, + "step": 7267 + }, + { + "epoch": 1.108856201171875e-05, + "step": 7267, + "training_step_time": 0.11236882209777832 + }, + { + "epoch": 1.1090087890625e-05, + "model_forward_time": 0.02426934242248535, + "step": 7268 + }, + { + "epoch": 1.1090087890625e-05, + "step": 7268, + "training_step_time": 0.20067310333251953 + }, + { + "epoch": 1.109161376953125e-05, + "model_forward_time": 0.0243222713470459, + "step": 7269 + }, + { + "epoch": 1.109161376953125e-05, + "step": 7269, + "training_step_time": 0.20014715194702148 + }, + { + "epoch": 1.10931396484375e-05, + "grad_norm": 0.25583454966545105, + "learning_rate": 9.022287455355387e-05, + "loss": 0.053, + "step": 7270 + }, + { + "epoch": 1.10931396484375e-05, + "model_forward_time": 0.025159358978271484, + "step": 7270 + }, + { + "epoch": 1.10931396484375e-05, + "step": 7270, + "training_step_time": 0.10805583000183105 + }, + { + "epoch": 1.109466552734375e-05, + "model_forward_time": 0.024270057678222656, + "step": 7271 + }, + { + "epoch": 1.109466552734375e-05, + "step": 7271, + "training_step_time": 0.10420513153076172 + }, + { + "epoch": 1.109619140625e-05, + "model_forward_time": 0.02562427520751953, + "step": 7272 + }, + { + "epoch": 1.109619140625e-05, + "step": 7272, + "training_step_time": 0.10866403579711914 + }, + { + "epoch": 1.109771728515625e-05, + "model_forward_time": 0.025391817092895508, + "step": 7273 + }, + { + "epoch": 1.109771728515625e-05, + "step": 7273, + "training_step_time": 0.1334521770477295 + }, + { + "epoch": 1.10992431640625e-05, + "model_forward_time": 0.025745153427124023, + "step": 7274 + }, + { + "epoch": 1.10992431640625e-05, + "step": 7274, + "training_step_time": 0.10922694206237793 + }, + { + "epoch": 1.110076904296875e-05, + "model_forward_time": 0.025252103805541992, + "step": 7275 + }, + { + "epoch": 1.110076904296875e-05, + "step": 7275, + "training_step_time": 0.10742950439453125 + }, + { + "epoch": 1.1102294921875e-05, + "model_forward_time": 0.024979829788208008, + "step": 7276 + }, + { + "epoch": 1.1102294921875e-05, + "step": 7276, + "training_step_time": 0.11502671241760254 + }, + { + "epoch": 1.110382080078125e-05, + "model_forward_time": 0.025252819061279297, + "step": 7277 + }, + { + "epoch": 1.110382080078125e-05, + "step": 7277, + "training_step_time": 0.10231256484985352 + }, + { + "epoch": 1.11053466796875e-05, + "model_forward_time": 0.02502894401550293, + "step": 7278 + }, + { + "epoch": 1.11053466796875e-05, + "step": 7278, + "training_step_time": 0.10949850082397461 + }, + { + "epoch": 1.110687255859375e-05, + "model_forward_time": 0.02550649642944336, + "step": 7279 + }, + { + "epoch": 1.110687255859375e-05, + "step": 7279, + "training_step_time": 0.10681366920471191 + }, + { + "epoch": 1.11083984375e-05, + "grad_norm": 0.5158094763755798, + "learning_rate": 9.019011085705253e-05, + "loss": 0.0631, + "step": 7280 + }, + { + "epoch": 1.11083984375e-05, + "model_forward_time": 0.025570392608642578, + "step": 7280 + }, + { + "epoch": 1.11083984375e-05, + "step": 7280, + "training_step_time": 0.10640549659729004 + }, + { + "epoch": 1.110992431640625e-05, + "model_forward_time": 0.025530338287353516, + "step": 7281 + }, + { + "epoch": 1.110992431640625e-05, + "step": 7281, + "training_step_time": 0.10541176795959473 + }, + { + "epoch": 1.11114501953125e-05, + "model_forward_time": 0.025226116180419922, + "step": 7282 + }, + { + "epoch": 1.11114501953125e-05, + "step": 7282, + "training_step_time": 0.10879158973693848 + }, + { + "epoch": 1.111297607421875e-05, + "model_forward_time": 0.025523662567138672, + "step": 7283 + }, + { + "epoch": 1.111297607421875e-05, + "step": 7283, + "training_step_time": 0.10608220100402832 + }, + { + "epoch": 1.1114501953125e-05, + "model_forward_time": 0.025322914123535156, + "step": 7284 + }, + { + "epoch": 1.1114501953125e-05, + "step": 7284, + "training_step_time": 0.10627937316894531 + }, + { + "epoch": 1.111602783203125e-05, + "model_forward_time": 0.025860071182250977, + "step": 7285 + }, + { + "epoch": 1.111602783203125e-05, + "step": 7285, + "training_step_time": 0.10731673240661621 + }, + { + "epoch": 1.11175537109375e-05, + "model_forward_time": 0.025129079818725586, + "step": 7286 + }, + { + "epoch": 1.11175537109375e-05, + "step": 7286, + "training_step_time": 0.10641765594482422 + }, + { + "epoch": 1.111907958984375e-05, + "model_forward_time": 0.02591252326965332, + "step": 7287 + }, + { + "epoch": 1.111907958984375e-05, + "step": 7287, + "training_step_time": 0.1089174747467041 + }, + { + "epoch": 1.112060546875e-05, + "model_forward_time": 0.025355100631713867, + "step": 7288 + }, + { + "epoch": 1.112060546875e-05, + "step": 7288, + "training_step_time": 0.10734844207763672 + }, + { + "epoch": 1.112213134765625e-05, + "model_forward_time": 0.02515864372253418, + "step": 7289 + }, + { + "epoch": 1.112213134765625e-05, + "step": 7289, + "training_step_time": 0.11225128173828125 + }, + { + "epoch": 1.11236572265625e-05, + "grad_norm": 0.49643757939338684, + "learning_rate": 9.015729832577681e-05, + "loss": 0.0804, + "step": 7290 + }, + { + "epoch": 1.11236572265625e-05, + "model_forward_time": 0.025411367416381836, + "step": 7290 + }, + { + "epoch": 1.11236572265625e-05, + "step": 7290, + "training_step_time": 0.10703182220458984 + }, + { + "epoch": 1.112518310546875e-05, + "model_forward_time": 0.0251920223236084, + "step": 7291 + }, + { + "epoch": 1.112518310546875e-05, + "step": 7291, + "training_step_time": 0.10680437088012695 + }, + { + "epoch": 1.1126708984375e-05, + "model_forward_time": 0.025351524353027344, + "step": 7292 + }, + { + "epoch": 1.1126708984375e-05, + "step": 7292, + "training_step_time": 0.17006254196166992 + }, + { + "epoch": 1.112823486328125e-05, + "model_forward_time": 0.024634599685668945, + "step": 7293 + }, + { + "epoch": 1.112823486328125e-05, + "step": 7293, + "training_step_time": 0.16485595703125 + }, + { + "epoch": 1.11297607421875e-05, + "model_forward_time": 0.025214672088623047, + "step": 7294 + }, + { + "epoch": 1.11297607421875e-05, + "step": 7294, + "training_step_time": 0.18205595016479492 + }, + { + "epoch": 1.113128662109375e-05, + "model_forward_time": 0.024909019470214844, + "step": 7295 + }, + { + "epoch": 1.113128662109375e-05, + "step": 7295, + "training_step_time": 0.13115262985229492 + }, + { + "epoch": 1.11328125e-05, + "model_forward_time": 0.024552106857299805, + "step": 7296 + }, + { + "epoch": 1.11328125e-05, + "step": 7296, + "training_step_time": 0.11386394500732422 + }, + { + "epoch": 1.113433837890625e-05, + "model_forward_time": 0.025427579879760742, + "step": 7297 + }, + { + "epoch": 1.113433837890625e-05, + "step": 7297, + "training_step_time": 0.12169647216796875 + }, + { + "epoch": 1.11358642578125e-05, + "model_forward_time": 0.025649547576904297, + "step": 7298 + }, + { + "epoch": 1.11358642578125e-05, + "step": 7298, + "training_step_time": 0.1115255355834961 + }, + { + "epoch": 1.113739013671875e-05, + "model_forward_time": 0.02454400062561035, + "step": 7299 + }, + { + "epoch": 1.113739013671875e-05, + "step": 7299, + "training_step_time": 0.11632871627807617 + }, + { + "epoch": 1.1138916015625e-05, + "grad_norm": 0.6585777997970581, + "learning_rate": 9.012443699959705e-05, + "loss": 0.0746, + "step": 7300 + }, + { + "epoch": 1.1138916015625e-05, + "model_forward_time": 0.024686098098754883, + "step": 7300 + }, + { + "epoch": 1.1138916015625e-05, + "step": 7300, + "training_step_time": 0.10910964012145996 + }, + { + "epoch": 1.114044189453125e-05, + "model_forward_time": 0.024530887603759766, + "step": 7301 + }, + { + "epoch": 1.114044189453125e-05, + "step": 7301, + "training_step_time": 0.11162185668945312 + }, + { + "epoch": 1.11419677734375e-05, + "model_forward_time": 0.02512073516845703, + "step": 7302 + }, + { + "epoch": 1.11419677734375e-05, + "step": 7302, + "training_step_time": 0.10822820663452148 + }, + { + "epoch": 1.114349365234375e-05, + "model_forward_time": 0.025602102279663086, + "step": 7303 + }, + { + "epoch": 1.114349365234375e-05, + "step": 7303, + "training_step_time": 0.10930347442626953 + }, + { + "epoch": 1.114501953125e-05, + "model_forward_time": 0.025483369827270508, + "step": 7304 + }, + { + "epoch": 1.114501953125e-05, + "step": 7304, + "training_step_time": 0.10873675346374512 + }, + { + "epoch": 1.114654541015625e-05, + "model_forward_time": 0.025824785232543945, + "step": 7305 + }, + { + "epoch": 1.114654541015625e-05, + "step": 7305, + "training_step_time": 0.10851573944091797 + }, + { + "epoch": 1.11480712890625e-05, + "model_forward_time": 0.025628089904785156, + "step": 7306 + }, + { + "epoch": 1.11480712890625e-05, + "step": 7306, + "training_step_time": 0.10759186744689941 + }, + { + "epoch": 1.114959716796875e-05, + "model_forward_time": 0.02558445930480957, + "step": 7307 + }, + { + "epoch": 1.114959716796875e-05, + "step": 7307, + "training_step_time": 0.1086263656616211 + }, + { + "epoch": 1.1151123046875e-05, + "model_forward_time": 0.025326013565063477, + "step": 7308 + }, + { + "epoch": 1.1151123046875e-05, + "step": 7308, + "training_step_time": 0.1112678050994873 + }, + { + "epoch": 1.115264892578125e-05, + "model_forward_time": 0.025253772735595703, + "step": 7309 + }, + { + "epoch": 1.115264892578125e-05, + "step": 7309, + "training_step_time": 0.11048650741577148 + }, + { + "epoch": 1.11541748046875e-05, + "grad_norm": 0.5398983359336853, + "learning_rate": 9.009152691844285e-05, + "loss": 0.0816, + "step": 7310 + }, + { + "epoch": 1.11541748046875e-05, + "model_forward_time": 0.02472066879272461, + "step": 7310 + }, + { + "epoch": 1.11541748046875e-05, + "step": 7310, + "training_step_time": 0.19739842414855957 + }, + { + "epoch": 1.115570068359375e-05, + "model_forward_time": 0.024675369262695312, + "step": 7311 + }, + { + "epoch": 1.115570068359375e-05, + "step": 7311, + "training_step_time": 0.20471549034118652 + }, + { + "epoch": 1.11572265625e-05, + "model_forward_time": 0.02423691749572754, + "step": 7312 + }, + { + "epoch": 1.11572265625e-05, + "step": 7312, + "training_step_time": 0.10540318489074707 + }, + { + "epoch": 1.115875244140625e-05, + "model_forward_time": 0.025855302810668945, + "step": 7313 + }, + { + "epoch": 1.115875244140625e-05, + "step": 7313, + "training_step_time": 0.12966465950012207 + }, + { + "epoch": 1.11602783203125e-05, + "model_forward_time": 0.025696516036987305, + "step": 7314 + }, + { + "epoch": 1.11602783203125e-05, + "step": 7314, + "training_step_time": 0.19605040550231934 + }, + { + "epoch": 1.116180419921875e-05, + "model_forward_time": 0.024641990661621094, + "step": 7315 + }, + { + "epoch": 1.116180419921875e-05, + "step": 7315, + "training_step_time": 0.10182046890258789 + }, + { + "epoch": 1.1163330078125e-05, + "model_forward_time": 0.024964094161987305, + "step": 7316 + }, + { + "epoch": 1.1163330078125e-05, + "step": 7316, + "training_step_time": 0.10999584197998047 + }, + { + "epoch": 1.116485595703125e-05, + "model_forward_time": 0.02523064613342285, + "step": 7317 + }, + { + "epoch": 1.116485595703125e-05, + "step": 7317, + "training_step_time": 0.1361536979675293 + }, + { + "epoch": 1.11663818359375e-05, + "model_forward_time": 0.025173664093017578, + "step": 7318 + }, + { + "epoch": 1.11663818359375e-05, + "step": 7318, + "training_step_time": 0.1111748218536377 + }, + { + "epoch": 1.116790771484375e-05, + "model_forward_time": 0.025420427322387695, + "step": 7319 + }, + { + "epoch": 1.116790771484375e-05, + "step": 7319, + "training_step_time": 0.11510467529296875 + }, + { + "epoch": 1.116943359375e-05, + "grad_norm": 0.48828309774398804, + "learning_rate": 9.005856812230304e-05, + "loss": 0.0583, + "step": 7320 + }, + { + "epoch": 1.116943359375e-05, + "model_forward_time": 0.025263309478759766, + "step": 7320 + }, + { + "epoch": 1.116943359375e-05, + "step": 7320, + "training_step_time": 0.11714482307434082 + }, + { + "epoch": 1.117095947265625e-05, + "model_forward_time": 0.025460481643676758, + "step": 7321 + }, + { + "epoch": 1.117095947265625e-05, + "step": 7321, + "training_step_time": 0.1083533763885498 + }, + { + "epoch": 1.11724853515625e-05, + "model_forward_time": 0.02569723129272461, + "step": 7322 + }, + { + "epoch": 1.11724853515625e-05, + "step": 7322, + "training_step_time": 0.19159507751464844 + }, + { + "epoch": 1.117401123046875e-05, + "model_forward_time": 0.024570226669311523, + "step": 7323 + }, + { + "epoch": 1.117401123046875e-05, + "step": 7323, + "training_step_time": 0.10987544059753418 + }, + { + "epoch": 1.1175537109375e-05, + "model_forward_time": 0.02426934242248535, + "step": 7324 + }, + { + "epoch": 1.1175537109375e-05, + "step": 7324, + "training_step_time": 0.10293054580688477 + }, + { + "epoch": 1.117706298828125e-05, + "model_forward_time": 0.024903297424316406, + "step": 7325 + }, + { + "epoch": 1.117706298828125e-05, + "step": 7325, + "training_step_time": 0.10445642471313477 + }, + { + "epoch": 1.11785888671875e-05, + "model_forward_time": 0.024820327758789062, + "step": 7326 + }, + { + "epoch": 1.11785888671875e-05, + "step": 7326, + "training_step_time": 0.10518026351928711 + }, + { + "epoch": 1.118011474609375e-05, + "model_forward_time": 0.025187015533447266, + "step": 7327 + }, + { + "epoch": 1.118011474609375e-05, + "step": 7327, + "training_step_time": 0.10549211502075195 + }, + { + "epoch": 1.1181640625e-05, + "model_forward_time": 0.024790525436401367, + "step": 7328 + }, + { + "epoch": 1.1181640625e-05, + "step": 7328, + "training_step_time": 0.10471796989440918 + }, + { + "epoch": 1.118316650390625e-05, + "model_forward_time": 0.025397300720214844, + "step": 7329 + }, + { + "epoch": 1.118316650390625e-05, + "step": 7329, + "training_step_time": 0.10778355598449707 + }, + { + "epoch": 1.11846923828125e-05, + "grad_norm": 0.3651945888996124, + "learning_rate": 9.002556065122571e-05, + "loss": 0.0765, + "step": 7330 + }, + { + "epoch": 1.11846923828125e-05, + "model_forward_time": 0.024899721145629883, + "step": 7330 + }, + { + "epoch": 1.11846923828125e-05, + "step": 7330, + "training_step_time": 0.10371065139770508 + }, + { + "epoch": 1.118621826171875e-05, + "model_forward_time": 0.02550196647644043, + "step": 7331 + }, + { + "epoch": 1.118621826171875e-05, + "step": 7331, + "training_step_time": 0.10548162460327148 + }, + { + "epoch": 1.1187744140625e-05, + "model_forward_time": 0.025576114654541016, + "step": 7332 + }, + { + "epoch": 1.1187744140625e-05, + "step": 7332, + "training_step_time": 0.11353325843811035 + }, + { + "epoch": 1.118927001953125e-05, + "model_forward_time": 0.025925159454345703, + "step": 7333 + }, + { + "epoch": 1.118927001953125e-05, + "step": 7333, + "training_step_time": 0.11351132392883301 + }, + { + "epoch": 1.11907958984375e-05, + "model_forward_time": 0.025793790817260742, + "step": 7334 + }, + { + "epoch": 1.11907958984375e-05, + "step": 7334, + "training_step_time": 0.2113487720489502 + }, + { + "epoch": 1.119232177734375e-05, + "model_forward_time": 0.024778366088867188, + "step": 7335 + }, + { + "epoch": 1.119232177734375e-05, + "step": 7335, + "training_step_time": 0.11678290367126465 + }, + { + "epoch": 1.119384765625e-05, + "model_forward_time": 0.024832725524902344, + "step": 7336 + }, + { + "epoch": 1.119384765625e-05, + "step": 7336, + "training_step_time": 0.11150908470153809 + }, + { + "epoch": 1.119537353515625e-05, + "model_forward_time": 0.02576613426208496, + "step": 7337 + }, + { + "epoch": 1.119537353515625e-05, + "step": 7337, + "training_step_time": 0.2151024341583252 + }, + { + "epoch": 1.11968994140625e-05, + "model_forward_time": 0.024915695190429688, + "step": 7338 + }, + { + "epoch": 1.11968994140625e-05, + "step": 7338, + "training_step_time": 0.13201141357421875 + }, + { + "epoch": 1.119842529296875e-05, + "model_forward_time": 0.02493453025817871, + "step": 7339 + }, + { + "epoch": 1.119842529296875e-05, + "step": 7339, + "training_step_time": 0.1182854175567627 + }, + { + "epoch": 1.1199951171875e-05, + "grad_norm": 0.331667959690094, + "learning_rate": 8.999250454531802e-05, + "loss": 0.072, + "step": 7340 + }, + { + "epoch": 1.1199951171875e-05, + "model_forward_time": 0.0255734920501709, + "step": 7340 + }, + { + "epoch": 1.1199951171875e-05, + "step": 7340, + "training_step_time": 0.1233818531036377 + }, + { + "epoch": 1.120147705078125e-05, + "model_forward_time": 0.02521491050720215, + "step": 7341 + }, + { + "epoch": 1.120147705078125e-05, + "step": 7341, + "training_step_time": 0.11857318878173828 + }, + { + "epoch": 1.12030029296875e-05, + "model_forward_time": 0.02529120445251465, + "step": 7342 + }, + { + "epoch": 1.12030029296875e-05, + "step": 7342, + "training_step_time": 0.11760401725769043 + }, + { + "epoch": 1.120452880859375e-05, + "model_forward_time": 0.027306556701660156, + "step": 7343 + }, + { + "epoch": 1.120452880859375e-05, + "step": 7343, + "training_step_time": 0.11627554893493652 + }, + { + "epoch": 1.12060546875e-05, + "model_forward_time": 0.025255680084228516, + "step": 7344 + }, + { + "epoch": 1.12060546875e-05, + "step": 7344, + "training_step_time": 0.11273479461669922 + }, + { + "epoch": 1.120758056640625e-05, + "model_forward_time": 0.02567267417907715, + "step": 7345 + }, + { + "epoch": 1.120758056640625e-05, + "step": 7345, + "training_step_time": 0.11033272743225098 + }, + { + "epoch": 1.12091064453125e-05, + "model_forward_time": 0.025693416595458984, + "step": 7346 + }, + { + "epoch": 1.12091064453125e-05, + "step": 7346, + "training_step_time": 0.11128449440002441 + }, + { + "epoch": 1.121063232421875e-05, + "model_forward_time": 0.0255124568939209, + "step": 7347 + }, + { + "epoch": 1.121063232421875e-05, + "step": 7347, + "training_step_time": 0.11049604415893555 + }, + { + "epoch": 1.1212158203125e-05, + "model_forward_time": 0.02553534507751465, + "step": 7348 + }, + { + "epoch": 1.1212158203125e-05, + "step": 7348, + "training_step_time": 0.10732245445251465 + }, + { + "epoch": 1.121368408203125e-05, + "model_forward_time": 0.025444507598876953, + "step": 7349 + }, + { + "epoch": 1.121368408203125e-05, + "step": 7349, + "training_step_time": 0.10709452629089355 + }, + { + "epoch": 1.12152099609375e-05, + "grad_norm": 0.7745429277420044, + "learning_rate": 8.995939984474624e-05, + "loss": 0.0599, + "step": 7350 + }, + { + "epoch": 1.12152099609375e-05, + "model_forward_time": 0.02503824234008789, + "step": 7350 + }, + { + "epoch": 1.12152099609375e-05, + "step": 7350, + "training_step_time": 0.10704851150512695 + }, + { + "epoch": 1.121673583984375e-05, + "model_forward_time": 0.025383949279785156, + "step": 7351 + }, + { + "epoch": 1.121673583984375e-05, + "step": 7351, + "training_step_time": 0.10941624641418457 + }, + { + "epoch": 1.121826171875e-05, + "model_forward_time": 0.025880813598632812, + "step": 7352 + }, + { + "epoch": 1.121826171875e-05, + "step": 7352, + "training_step_time": 0.10666179656982422 + }, + { + "epoch": 1.121978759765625e-05, + "model_forward_time": 0.025503158569335938, + "step": 7353 + }, + { + "epoch": 1.121978759765625e-05, + "step": 7353, + "training_step_time": 0.10626888275146484 + }, + { + "epoch": 1.12213134765625e-05, + "model_forward_time": 0.025522470474243164, + "step": 7354 + }, + { + "epoch": 1.12213134765625e-05, + "step": 7354, + "training_step_time": 0.11079096794128418 + }, + { + "epoch": 1.122283935546875e-05, + "model_forward_time": 0.025725603103637695, + "step": 7355 + }, + { + "epoch": 1.122283935546875e-05, + "step": 7355, + "training_step_time": 0.13513851165771484 + }, + { + "epoch": 1.1224365234375e-05, + "model_forward_time": 0.025693893432617188, + "step": 7356 + }, + { + "epoch": 1.1224365234375e-05, + "step": 7356, + "training_step_time": 0.10869884490966797 + }, + { + "epoch": 1.122589111328125e-05, + "model_forward_time": 0.02541661262512207, + "step": 7357 + }, + { + "epoch": 1.122589111328125e-05, + "step": 7357, + "training_step_time": 0.21927452087402344 + }, + { + "epoch": 1.12274169921875e-05, + "model_forward_time": 0.024211883544921875, + "step": 7358 + }, + { + "epoch": 1.12274169921875e-05, + "step": 7358, + "training_step_time": 0.10668706893920898 + }, + { + "epoch": 1.122894287109375e-05, + "model_forward_time": 0.0242002010345459, + "step": 7359 + }, + { + "epoch": 1.122894287109375e-05, + "step": 7359, + "training_step_time": 0.11103606224060059 + }, + { + "epoch": 1.123046875e-05, + "grad_norm": 0.6871018409729004, + "learning_rate": 8.992624658973574e-05, + "loss": 0.0662, + "step": 7360 + }, + { + "epoch": 1.123046875e-05, + "model_forward_time": 0.024962902069091797, + "step": 7360 + }, + { + "epoch": 1.123046875e-05, + "step": 7360, + "training_step_time": 0.19650936126708984 + }, + { + "epoch": 1.123199462890625e-05, + "model_forward_time": 0.02691483497619629, + "step": 7361 + }, + { + "epoch": 1.123199462890625e-05, + "step": 7361, + "training_step_time": 0.11382222175598145 + }, + { + "epoch": 1.12335205078125e-05, + "model_forward_time": 0.024154186248779297, + "step": 7362 + }, + { + "epoch": 1.12335205078125e-05, + "step": 7362, + "training_step_time": 0.11993145942687988 + }, + { + "epoch": 1.123504638671875e-05, + "model_forward_time": 0.025316238403320312, + "step": 7363 + }, + { + "epoch": 1.123504638671875e-05, + "step": 7363, + "training_step_time": 0.1404893398284912 + }, + { + "epoch": 1.1236572265625e-05, + "model_forward_time": 0.024767637252807617, + "step": 7364 + }, + { + "epoch": 1.1236572265625e-05, + "step": 7364, + "training_step_time": 0.12512588500976562 + }, + { + "epoch": 1.123809814453125e-05, + "model_forward_time": 0.023523330688476562, + "step": 7365 + }, + { + "epoch": 1.123809814453125e-05, + "step": 7365, + "training_step_time": 0.11969208717346191 + }, + { + "epoch": 1.12396240234375e-05, + "model_forward_time": 0.024266481399536133, + "step": 7366 + }, + { + "epoch": 1.12396240234375e-05, + "step": 7366, + "training_step_time": 0.1976485252380371 + }, + { + "epoch": 1.124114990234375e-05, + "model_forward_time": 0.023970842361450195, + "step": 7367 + }, + { + "epoch": 1.124114990234375e-05, + "step": 7367, + "training_step_time": 0.11065149307250977 + }, + { + "epoch": 1.124267578125e-05, + "model_forward_time": 0.02421259880065918, + "step": 7368 + }, + { + "epoch": 1.124267578125e-05, + "step": 7368, + "training_step_time": 0.10947299003601074 + }, + { + "epoch": 1.124420166015625e-05, + "model_forward_time": 0.02505636215209961, + "step": 7369 + }, + { + "epoch": 1.124420166015625e-05, + "step": 7369, + "training_step_time": 0.11038732528686523 + }, + { + "epoch": 1.12457275390625e-05, + "grad_norm": 0.41827675700187683, + "learning_rate": 8.989304482057084e-05, + "loss": 0.0782, + "step": 7370 + }, + { + "epoch": 1.12457275390625e-05, + "model_forward_time": 0.025074005126953125, + "step": 7370 + }, + { + "epoch": 1.12457275390625e-05, + "step": 7370, + "training_step_time": 0.10799884796142578 + }, + { + "epoch": 1.124725341796875e-05, + "model_forward_time": 0.02555251121520996, + "step": 7371 + }, + { + "epoch": 1.124725341796875e-05, + "step": 7371, + "training_step_time": 0.10899734497070312 + }, + { + "epoch": 1.1248779296875e-05, + "model_forward_time": 0.025128841400146484, + "step": 7372 + }, + { + "epoch": 1.1248779296875e-05, + "step": 7372, + "training_step_time": 0.10998129844665527 + }, + { + "epoch": 1.125030517578125e-05, + "model_forward_time": 0.02503371238708496, + "step": 7373 + }, + { + "epoch": 1.125030517578125e-05, + "step": 7373, + "training_step_time": 0.10831117630004883 + }, + { + "epoch": 1.12518310546875e-05, + "model_forward_time": 0.025280237197875977, + "step": 7374 + }, + { + "epoch": 1.12518310546875e-05, + "step": 7374, + "training_step_time": 0.1067967414855957 + }, + { + "epoch": 1.125335693359375e-05, + "model_forward_time": 0.025223731994628906, + "step": 7375 + }, + { + "epoch": 1.125335693359375e-05, + "step": 7375, + "training_step_time": 0.10846924781799316 + }, + { + "epoch": 1.12548828125e-05, + "model_forward_time": 0.024911880493164062, + "step": 7376 + }, + { + "epoch": 1.12548828125e-05, + "step": 7376, + "training_step_time": 0.11301136016845703 + }, + { + "epoch": 1.125640869140625e-05, + "model_forward_time": 0.025600433349609375, + "step": 7377 + }, + { + "epoch": 1.125640869140625e-05, + "step": 7377, + "training_step_time": 0.11171364784240723 + }, + { + "epoch": 1.12579345703125e-05, + "model_forward_time": 0.026075124740600586, + "step": 7378 + }, + { + "epoch": 1.12579345703125e-05, + "step": 7378, + "training_step_time": 0.1078798770904541 + }, + { + "epoch": 1.125946044921875e-05, + "model_forward_time": 0.025047779083251953, + "step": 7379 + }, + { + "epoch": 1.125946044921875e-05, + "step": 7379, + "training_step_time": 0.1105952262878418 + }, + { + "epoch": 1.1260986328125e-05, + "grad_norm": 0.6497596502304077, + "learning_rate": 8.98597945775948e-05, + "loss": 0.0556, + "step": 7380 + }, + { + "epoch": 1.1260986328125e-05, + "model_forward_time": 0.024868011474609375, + "step": 7380 + }, + { + "epoch": 1.1260986328125e-05, + "step": 7380, + "training_step_time": 0.10617375373840332 + }, + { + "epoch": 1.126251220703125e-05, + "model_forward_time": 0.025554180145263672, + "step": 7381 + }, + { + "epoch": 1.126251220703125e-05, + "step": 7381, + "training_step_time": 0.10843157768249512 + }, + { + "epoch": 1.12640380859375e-05, + "model_forward_time": 0.0257108211517334, + "step": 7382 + }, + { + "epoch": 1.12640380859375e-05, + "step": 7382, + "training_step_time": 0.12195611000061035 + }, + { + "epoch": 1.126556396484375e-05, + "model_forward_time": 0.025971412658691406, + "step": 7383 + }, + { + "epoch": 1.126556396484375e-05, + "step": 7383, + "training_step_time": 0.21953082084655762 + }, + { + "epoch": 1.126708984375e-05, + "model_forward_time": 0.025005340576171875, + "step": 7384 + }, + { + "epoch": 1.126708984375e-05, + "step": 7384, + "training_step_time": 0.17476272583007812 + }, + { + "epoch": 1.126861572265625e-05, + "model_forward_time": 0.024931669235229492, + "step": 7385 + }, + { + "epoch": 1.126861572265625e-05, + "step": 7385, + "training_step_time": 0.10980010032653809 + }, + { + "epoch": 1.12701416015625e-05, + "model_forward_time": 0.024990081787109375, + "step": 7386 + }, + { + "epoch": 1.12701416015625e-05, + "step": 7386, + "training_step_time": 0.12690210342407227 + }, + { + "epoch": 1.127166748046875e-05, + "model_forward_time": 0.025683164596557617, + "step": 7387 + }, + { + "epoch": 1.127166748046875e-05, + "step": 7387, + "training_step_time": 0.11972451210021973 + }, + { + "epoch": 1.1273193359375e-05, + "model_forward_time": 0.025560855865478516, + "step": 7388 + }, + { + "epoch": 1.1273193359375e-05, + "step": 7388, + "training_step_time": 0.16822552680969238 + }, + { + "epoch": 1.127471923828125e-05, + "model_forward_time": 0.0234375, + "step": 7389 + }, + { + "epoch": 1.127471923828125e-05, + "step": 7389, + "training_step_time": 0.19314885139465332 + }, + { + "epoch": 1.12762451171875e-05, + "grad_norm": 0.3705720603466034, + "learning_rate": 8.982649590120982e-05, + "loss": 0.061, + "step": 7390 + }, + { + "epoch": 1.12762451171875e-05, + "model_forward_time": 0.025278329849243164, + "step": 7390 + }, + { + "epoch": 1.12762451171875e-05, + "step": 7390, + "training_step_time": 0.16959571838378906 + }, + { + "epoch": 1.127777099609375e-05, + "model_forward_time": 0.024254560470581055, + "step": 7391 + }, + { + "epoch": 1.127777099609375e-05, + "step": 7391, + "training_step_time": 0.14531517028808594 + }, + { + "epoch": 1.1279296875e-05, + "model_forward_time": 0.02408909797668457, + "step": 7392 + }, + { + "epoch": 1.1279296875e-05, + "step": 7392, + "training_step_time": 0.14297008514404297 + }, + { + "epoch": 1.128082275390625e-05, + "model_forward_time": 0.0245211124420166, + "step": 7393 + }, + { + "epoch": 1.128082275390625e-05, + "step": 7393, + "training_step_time": 0.13319897651672363 + }, + { + "epoch": 1.12823486328125e-05, + "model_forward_time": 0.024317264556884766, + "step": 7394 + }, + { + "epoch": 1.12823486328125e-05, + "step": 7394, + "training_step_time": 0.11066365242004395 + }, + { + "epoch": 1.128387451171875e-05, + "model_forward_time": 0.02541065216064453, + "step": 7395 + }, + { + "epoch": 1.128387451171875e-05, + "step": 7395, + "training_step_time": 0.10962176322937012 + }, + { + "epoch": 1.1285400390625e-05, + "model_forward_time": 0.025240182876586914, + "step": 7396 + }, + { + "epoch": 1.1285400390625e-05, + "step": 7396, + "training_step_time": 0.10647153854370117 + }, + { + "epoch": 1.128692626953125e-05, + "model_forward_time": 0.025767087936401367, + "step": 7397 + }, + { + "epoch": 1.128692626953125e-05, + "step": 7397, + "training_step_time": 0.11550045013427734 + }, + { + "epoch": 1.12884521484375e-05, + "model_forward_time": 0.027737140655517578, + "step": 7398 + }, + { + "epoch": 1.12884521484375e-05, + "step": 7398, + "training_step_time": 0.14266657829284668 + }, + { + "epoch": 1.128997802734375e-05, + "model_forward_time": 0.0270388126373291, + "step": 7399 + }, + { + "epoch": 1.128997802734375e-05, + "step": 7399, + "training_step_time": 0.11533951759338379 + }, + { + "epoch": 1.129150390625e-05, + "grad_norm": 0.2715272903442383, + "learning_rate": 8.979314883187693e-05, + "loss": 0.0667, + "step": 7400 + }, + { + "epoch": 1.129150390625e-05, + "model_forward_time": 0.024865150451660156, + "step": 7400 + }, + { + "epoch": 1.129150390625e-05, + "step": 7400, + "training_step_time": 0.20172691345214844 + }, + { + "epoch": 1.129302978515625e-05, + "model_forward_time": 0.025548934936523438, + "step": 7401 + }, + { + "epoch": 1.129302978515625e-05, + "step": 7401, + "training_step_time": 0.10495829582214355 + }, + { + "epoch": 1.12945556640625e-05, + "model_forward_time": 0.02457904815673828, + "step": 7402 + }, + { + "epoch": 1.12945556640625e-05, + "step": 7402, + "training_step_time": 0.11523842811584473 + }, + { + "epoch": 1.129608154296875e-05, + "model_forward_time": 0.02513718605041504, + "step": 7403 + }, + { + "epoch": 1.129608154296875e-05, + "step": 7403, + "training_step_time": 0.1959671974182129 + }, + { + "epoch": 1.1297607421875e-05, + "model_forward_time": 0.02559828758239746, + "step": 7404 + }, + { + "epoch": 1.1297607421875e-05, + "step": 7404, + "training_step_time": 0.10901403427124023 + }, + { + "epoch": 1.129913330078125e-05, + "model_forward_time": 0.025177717208862305, + "step": 7405 + }, + { + "epoch": 1.129913330078125e-05, + "step": 7405, + "training_step_time": 0.18027710914611816 + }, + { + "epoch": 1.13006591796875e-05, + "model_forward_time": 0.024931907653808594, + "step": 7406 + }, + { + "epoch": 1.13006591796875e-05, + "step": 7406, + "training_step_time": 0.17157816886901855 + }, + { + "epoch": 1.130218505859375e-05, + "model_forward_time": 0.02465653419494629, + "step": 7407 + }, + { + "epoch": 1.130218505859375e-05, + "step": 7407, + "training_step_time": 0.20664453506469727 + }, + { + "epoch": 1.13037109375e-05, + "model_forward_time": 0.02470254898071289, + "step": 7408 + }, + { + "epoch": 1.13037109375e-05, + "step": 7408, + "training_step_time": 0.17580413818359375 + }, + { + "epoch": 1.130523681640625e-05, + "model_forward_time": 0.024215221405029297, + "step": 7409 + }, + { + "epoch": 1.130523681640625e-05, + "step": 7409, + "training_step_time": 0.15679097175598145 + }, + { + "epoch": 1.13067626953125e-05, + "grad_norm": 0.48039108514785767, + "learning_rate": 8.975975341011596e-05, + "loss": 0.0725, + "step": 7410 + }, + { + "epoch": 1.13067626953125e-05, + "model_forward_time": 0.024577856063842773, + "step": 7410 + }, + { + "epoch": 1.13067626953125e-05, + "step": 7410, + "training_step_time": 0.13814902305603027 + }, + { + "epoch": 1.130828857421875e-05, + "model_forward_time": 0.024631977081298828, + "step": 7411 + }, + { + "epoch": 1.130828857421875e-05, + "step": 7411, + "training_step_time": 0.13447833061218262 + }, + { + "epoch": 1.1309814453125e-05, + "model_forward_time": 0.024537086486816406, + "step": 7412 + }, + { + "epoch": 1.1309814453125e-05, + "step": 7412, + "training_step_time": 0.13095355033874512 + }, + { + "epoch": 1.131134033203125e-05, + "model_forward_time": 0.024105548858642578, + "step": 7413 + }, + { + "epoch": 1.131134033203125e-05, + "step": 7413, + "training_step_time": 0.12012648582458496 + }, + { + "epoch": 1.13128662109375e-05, + "model_forward_time": 0.0253603458404541, + "step": 7414 + }, + { + "epoch": 1.13128662109375e-05, + "step": 7414, + "training_step_time": 0.12164807319641113 + }, + { + "epoch": 1.131439208984375e-05, + "model_forward_time": 0.02573108673095703, + "step": 7415 + }, + { + "epoch": 1.131439208984375e-05, + "step": 7415, + "training_step_time": 0.11651396751403809 + }, + { + "epoch": 1.131591796875e-05, + "model_forward_time": 0.025417089462280273, + "step": 7416 + }, + { + "epoch": 1.131591796875e-05, + "step": 7416, + "training_step_time": 0.11426615715026855 + }, + { + "epoch": 1.131744384765625e-05, + "model_forward_time": 0.02568984031677246, + "step": 7417 + }, + { + "epoch": 1.131744384765625e-05, + "step": 7417, + "training_step_time": 0.11350584030151367 + }, + { + "epoch": 1.13189697265625e-05, + "model_forward_time": 0.024745702743530273, + "step": 7418 + }, + { + "epoch": 1.13189697265625e-05, + "step": 7418, + "training_step_time": 0.11293458938598633 + }, + { + "epoch": 1.132049560546875e-05, + "model_forward_time": 0.0253143310546875, + "step": 7419 + }, + { + "epoch": 1.132049560546875e-05, + "step": 7419, + "training_step_time": 0.10815167427062988 + }, + { + "epoch": 1.1322021484375e-05, + "grad_norm": 0.5846309661865234, + "learning_rate": 8.972630967650548e-05, + "loss": 0.0816, + "step": 7420 + }, + { + "epoch": 1.1322021484375e-05, + "model_forward_time": 0.024700403213500977, + "step": 7420 + }, + { + "epoch": 1.1322021484375e-05, + "step": 7420, + "training_step_time": 0.11052703857421875 + }, + { + "epoch": 1.132354736328125e-05, + "model_forward_time": 0.024944305419921875, + "step": 7421 + }, + { + "epoch": 1.132354736328125e-05, + "step": 7421, + "training_step_time": 0.10870623588562012 + }, + { + "epoch": 1.13250732421875e-05, + "model_forward_time": 0.025362730026245117, + "step": 7422 + }, + { + "epoch": 1.13250732421875e-05, + "step": 7422, + "training_step_time": 0.11077380180358887 + }, + { + "epoch": 1.132659912109375e-05, + "model_forward_time": 0.025944232940673828, + "step": 7423 + }, + { + "epoch": 1.132659912109375e-05, + "step": 7423, + "training_step_time": 0.11313629150390625 + }, + { + "epoch": 1.1328125e-05, + "model_forward_time": 0.025660037994384766, + "step": 7424 + }, + { + "epoch": 1.1328125e-05, + "step": 7424, + "training_step_time": 0.1739051342010498 + }, + { + "epoch": 1.132965087890625e-05, + "model_forward_time": 0.02476215362548828, + "step": 7425 + }, + { + "epoch": 1.132965087890625e-05, + "step": 7425, + "training_step_time": 0.16234040260314941 + }, + { + "epoch": 1.13311767578125e-05, + "model_forward_time": 0.02507305145263672, + "step": 7426 + }, + { + "epoch": 1.13311767578125e-05, + "step": 7426, + "training_step_time": 0.11198234558105469 + }, + { + "epoch": 1.133270263671875e-05, + "model_forward_time": 0.02494359016418457, + "step": 7427 + }, + { + "epoch": 1.133270263671875e-05, + "step": 7427, + "training_step_time": 0.10856127738952637 + }, + { + "epoch": 1.1334228515625e-05, + "model_forward_time": 0.025786876678466797, + "step": 7428 + }, + { + "epoch": 1.1334228515625e-05, + "step": 7428, + "training_step_time": 0.11350059509277344 + }, + { + "epoch": 1.133575439453125e-05, + "model_forward_time": 0.0250699520111084, + "step": 7429 + }, + { + "epoch": 1.133575439453125e-05, + "step": 7429, + "training_step_time": 0.12893414497375488 + }, + { + "epoch": 1.13372802734375e-05, + "grad_norm": 0.3795636296272278, + "learning_rate": 8.969281767168283e-05, + "loss": 0.0719, + "step": 7430 + }, + { + "epoch": 1.13372802734375e-05, + "model_forward_time": 0.025446176528930664, + "step": 7430 + }, + { + "epoch": 1.13372802734375e-05, + "step": 7430, + "training_step_time": 0.11540460586547852 + }, + { + "epoch": 1.133880615234375e-05, + "model_forward_time": 0.02591729164123535, + "step": 7431 + }, + { + "epoch": 1.133880615234375e-05, + "step": 7431, + "training_step_time": 0.11378955841064453 + }, + { + "epoch": 1.134033203125e-05, + "model_forward_time": 0.025783300399780273, + "step": 7432 + }, + { + "epoch": 1.134033203125e-05, + "step": 7432, + "training_step_time": 0.11556839942932129 + }, + { + "epoch": 1.134185791015625e-05, + "model_forward_time": 0.0253293514251709, + "step": 7433 + }, + { + "epoch": 1.134185791015625e-05, + "step": 7433, + "training_step_time": 0.10606932640075684 + }, + { + "epoch": 1.13433837890625e-05, + "model_forward_time": 0.025543212890625, + "step": 7434 + }, + { + "epoch": 1.13433837890625e-05, + "step": 7434, + "training_step_time": 0.11037468910217285 + }, + { + "epoch": 1.134490966796875e-05, + "model_forward_time": 0.025588035583496094, + "step": 7435 + }, + { + "epoch": 1.134490966796875e-05, + "step": 7435, + "training_step_time": 0.10693669319152832 + }, + { + "epoch": 1.1346435546875e-05, + "model_forward_time": 0.025462865829467773, + "step": 7436 + }, + { + "epoch": 1.1346435546875e-05, + "step": 7436, + "training_step_time": 0.10628199577331543 + }, + { + "epoch": 1.134796142578125e-05, + "model_forward_time": 0.025536775588989258, + "step": 7437 + }, + { + "epoch": 1.134796142578125e-05, + "step": 7437, + "training_step_time": 0.10689353942871094 + }, + { + "epoch": 1.13494873046875e-05, + "model_forward_time": 0.025814533233642578, + "step": 7438 + }, + { + "epoch": 1.13494873046875e-05, + "step": 7438, + "training_step_time": 0.10968470573425293 + }, + { + "epoch": 1.135101318359375e-05, + "model_forward_time": 0.025473833084106445, + "step": 7439 + }, + { + "epoch": 1.135101318359375e-05, + "step": 7439, + "training_step_time": 0.10853457450866699 + }, + { + "epoch": 1.13525390625e-05, + "grad_norm": 0.3715420365333557, + "learning_rate": 8.965927743634391e-05, + "loss": 0.0674, + "step": 7440 + }, + { + "epoch": 1.13525390625e-05, + "model_forward_time": 0.025832653045654297, + "step": 7440 + }, + { + "epoch": 1.13525390625e-05, + "step": 7440, + "training_step_time": 0.10965991020202637 + }, + { + "epoch": 1.135406494140625e-05, + "model_forward_time": 0.025208234786987305, + "step": 7441 + }, + { + "epoch": 1.135406494140625e-05, + "step": 7441, + "training_step_time": 0.16036725044250488 + }, + { + "epoch": 1.13555908203125e-05, + "model_forward_time": 0.024778127670288086, + "step": 7442 + }, + { + "epoch": 1.13555908203125e-05, + "step": 7442, + "training_step_time": 0.10770535469055176 + }, + { + "epoch": 1.135711669921875e-05, + "model_forward_time": 0.024984121322631836, + "step": 7443 + }, + { + "epoch": 1.135711669921875e-05, + "step": 7443, + "training_step_time": 0.1269998550415039 + }, + { + "epoch": 1.1358642578125e-05, + "model_forward_time": 0.025672435760498047, + "step": 7444 + }, + { + "epoch": 1.1358642578125e-05, + "step": 7444, + "training_step_time": 0.1109006404876709 + }, + { + "epoch": 1.136016845703125e-05, + "model_forward_time": 0.02538323402404785, + "step": 7445 + }, + { + "epoch": 1.136016845703125e-05, + "step": 7445, + "training_step_time": 0.1919553279876709 + }, + { + "epoch": 1.13616943359375e-05, + "model_forward_time": 0.024901866912841797, + "step": 7446 + }, + { + "epoch": 1.13616943359375e-05, + "step": 7446, + "training_step_time": 0.12483000755310059 + }, + { + "epoch": 1.136322021484375e-05, + "model_forward_time": 0.024997711181640625, + "step": 7447 + }, + { + "epoch": 1.136322021484375e-05, + "step": 7447, + "training_step_time": 0.11973786354064941 + }, + { + "epoch": 1.136474609375e-05, + "model_forward_time": 0.025373458862304688, + "step": 7448 + }, + { + "epoch": 1.136474609375e-05, + "step": 7448, + "training_step_time": 0.1066751480102539 + }, + { + "epoch": 1.136627197265625e-05, + "model_forward_time": 0.025304079055786133, + "step": 7449 + }, + { + "epoch": 1.136627197265625e-05, + "step": 7449, + "training_step_time": 0.19579410552978516 + }, + { + "epoch": 1.13677978515625e-05, + "grad_norm": 0.541823148727417, + "learning_rate": 8.962568901124327e-05, + "loss": 0.0645, + "step": 7450 + }, + { + "epoch": 1.13677978515625e-05, + "model_forward_time": 0.024516582489013672, + "step": 7450 + }, + { + "epoch": 1.13677978515625e-05, + "step": 7450, + "training_step_time": 0.13893532752990723 + }, + { + "epoch": 1.136932373046875e-05, + "model_forward_time": 0.02518630027770996, + "step": 7451 + }, + { + "epoch": 1.136932373046875e-05, + "step": 7451, + "training_step_time": 0.11243033409118652 + }, + { + "epoch": 1.1370849609375e-05, + "model_forward_time": 0.025170326232910156, + "step": 7452 + }, + { + "epoch": 1.1370849609375e-05, + "step": 7452, + "training_step_time": 0.10991573333740234 + }, + { + "epoch": 1.137237548828125e-05, + "model_forward_time": 0.025527477264404297, + "step": 7453 + }, + { + "epoch": 1.137237548828125e-05, + "step": 7453, + "training_step_time": 0.1188039779663086 + }, + { + "epoch": 1.13739013671875e-05, + "model_forward_time": 0.025712966918945312, + "step": 7454 + }, + { + "epoch": 1.13739013671875e-05, + "step": 7454, + "training_step_time": 0.1149134635925293 + }, + { + "epoch": 1.137542724609375e-05, + "model_forward_time": 0.02524733543395996, + "step": 7455 + }, + { + "epoch": 1.137542724609375e-05, + "step": 7455, + "training_step_time": 0.18972492218017578 + }, + { + "epoch": 1.1376953125e-05, + "model_forward_time": 0.02466440200805664, + "step": 7456 + }, + { + "epoch": 1.1376953125e-05, + "step": 7456, + "training_step_time": 0.10737967491149902 + }, + { + "epoch": 1.137847900390625e-05, + "model_forward_time": 0.02499699592590332, + "step": 7457 + }, + { + "epoch": 1.137847900390625e-05, + "step": 7457, + "training_step_time": 0.10535192489624023 + }, + { + "epoch": 1.13800048828125e-05, + "model_forward_time": 0.025231599807739258, + "step": 7458 + }, + { + "epoch": 1.13800048828125e-05, + "step": 7458, + "training_step_time": 0.10739731788635254 + }, + { + "epoch": 1.138153076171875e-05, + "model_forward_time": 0.026720285415649414, + "step": 7459 + }, + { + "epoch": 1.138153076171875e-05, + "step": 7459, + "training_step_time": 0.10987997055053711 + }, + { + "epoch": 1.1383056640625e-05, + "grad_norm": 0.4487568140029907, + "learning_rate": 8.959205243719402e-05, + "loss": 0.0725, + "step": 7460 + }, + { + "epoch": 1.1383056640625e-05, + "model_forward_time": 0.02602219581604004, + "step": 7460 + }, + { + "epoch": 1.1383056640625e-05, + "step": 7460, + "training_step_time": 0.10877442359924316 + }, + { + "epoch": 1.138458251953125e-05, + "model_forward_time": 0.025221586227416992, + "step": 7461 + }, + { + "epoch": 1.138458251953125e-05, + "step": 7461, + "training_step_time": 0.10827088356018066 + }, + { + "epoch": 1.13861083984375e-05, + "model_forward_time": 0.02580404281616211, + "step": 7462 + }, + { + "epoch": 1.13861083984375e-05, + "step": 7462, + "training_step_time": 0.11054730415344238 + }, + { + "epoch": 1.138763427734375e-05, + "model_forward_time": 0.02606201171875, + "step": 7463 + }, + { + "epoch": 1.138763427734375e-05, + "step": 7463, + "training_step_time": 0.10782623291015625 + }, + { + "epoch": 1.138916015625e-05, + "model_forward_time": 0.025223731994628906, + "step": 7464 + }, + { + "epoch": 1.138916015625e-05, + "step": 7464, + "training_step_time": 0.10737371444702148 + }, + { + "epoch": 1.139068603515625e-05, + "model_forward_time": 0.025101661682128906, + "step": 7465 + }, + { + "epoch": 1.139068603515625e-05, + "step": 7465, + "training_step_time": 0.10848331451416016 + }, + { + "epoch": 1.13922119140625e-05, + "model_forward_time": 0.025310039520263672, + "step": 7466 + }, + { + "epoch": 1.13922119140625e-05, + "step": 7466, + "training_step_time": 0.11298537254333496 + }, + { + "epoch": 1.139373779296875e-05, + "model_forward_time": 0.025574922561645508, + "step": 7467 + }, + { + "epoch": 1.139373779296875e-05, + "step": 7467, + "training_step_time": 0.20866775512695312 + }, + { + "epoch": 1.1395263671875e-05, + "model_forward_time": 0.024865150451660156, + "step": 7468 + }, + { + "epoch": 1.1395263671875e-05, + "step": 7468, + "training_step_time": 0.1118307113647461 + }, + { + "epoch": 1.139678955078125e-05, + "model_forward_time": 0.024909019470214844, + "step": 7469 + }, + { + "epoch": 1.139678955078125e-05, + "step": 7469, + "training_step_time": 0.11139750480651855 + }, + { + "epoch": 1.13983154296875e-05, + "grad_norm": 0.827911913394928, + "learning_rate": 8.955836775506776e-05, + "loss": 0.0835, + "step": 7470 + }, + { + "epoch": 1.13983154296875e-05, + "model_forward_time": 0.025642871856689453, + "step": 7470 + }, + { + "epoch": 1.13983154296875e-05, + "step": 7470, + "training_step_time": 0.21308088302612305 + }, + { + "epoch": 1.139984130859375e-05, + "model_forward_time": 0.027009010314941406, + "step": 7471 + }, + { + "epoch": 1.139984130859375e-05, + "step": 7471, + "training_step_time": 0.1160123348236084 + }, + { + "epoch": 1.14013671875e-05, + "model_forward_time": 0.025164127349853516, + "step": 7472 + }, + { + "epoch": 1.14013671875e-05, + "step": 7472, + "training_step_time": 0.14982175827026367 + }, + { + "epoch": 1.140289306640625e-05, + "model_forward_time": 0.0259552001953125, + "step": 7473 + }, + { + "epoch": 1.140289306640625e-05, + "step": 7473, + "training_step_time": 0.10730361938476562 + }, + { + "epoch": 1.14044189453125e-05, + "model_forward_time": 0.025359392166137695, + "step": 7474 + }, + { + "epoch": 1.14044189453125e-05, + "step": 7474, + "training_step_time": 0.10889339447021484 + }, + { + "epoch": 1.140594482421875e-05, + "model_forward_time": 0.02538752555847168, + "step": 7475 + }, + { + "epoch": 1.140594482421875e-05, + "step": 7475, + "training_step_time": 0.13800287246704102 + }, + { + "epoch": 1.1407470703125e-05, + "model_forward_time": 0.0254974365234375, + "step": 7476 + }, + { + "epoch": 1.1407470703125e-05, + "step": 7476, + "training_step_time": 0.13433384895324707 + }, + { + "epoch": 1.140899658203125e-05, + "model_forward_time": 0.024883747100830078, + "step": 7477 + }, + { + "epoch": 1.140899658203125e-05, + "step": 7477, + "training_step_time": 0.11044049263000488 + }, + { + "epoch": 1.14105224609375e-05, + "model_forward_time": 0.025385379791259766, + "step": 7478 + }, + { + "epoch": 1.14105224609375e-05, + "step": 7478, + "training_step_time": 0.11016201972961426 + }, + { + "epoch": 1.141204833984375e-05, + "model_forward_time": 0.02512812614440918, + "step": 7479 + }, + { + "epoch": 1.141204833984375e-05, + "step": 7479, + "training_step_time": 0.10714221000671387 + }, + { + "epoch": 1.141357421875e-05, + "grad_norm": 0.44633209705352783, + "learning_rate": 8.95246350057946e-05, + "loss": 0.0739, + "step": 7480 + }, + { + "epoch": 1.141357421875e-05, + "model_forward_time": 0.025025129318237305, + "step": 7480 + }, + { + "epoch": 1.141357421875e-05, + "step": 7480, + "training_step_time": 0.11897420883178711 + }, + { + "epoch": 1.141510009765625e-05, + "model_forward_time": 0.02547144889831543, + "step": 7481 + }, + { + "epoch": 1.141510009765625e-05, + "step": 7481, + "training_step_time": 0.12049603462219238 + }, + { + "epoch": 1.14166259765625e-05, + "model_forward_time": 0.024299144744873047, + "step": 7482 + }, + { + "epoch": 1.14166259765625e-05, + "step": 7482, + "training_step_time": 0.11717367172241211 + }, + { + "epoch": 1.141815185546875e-05, + "model_forward_time": 0.0246732234954834, + "step": 7483 + }, + { + "epoch": 1.141815185546875e-05, + "step": 7483, + "training_step_time": 0.11414647102355957 + }, + { + "epoch": 1.1419677734375e-05, + "model_forward_time": 0.02502274513244629, + "step": 7484 + }, + { + "epoch": 1.1419677734375e-05, + "step": 7484, + "training_step_time": 0.11986017227172852 + }, + { + "epoch": 1.142120361328125e-05, + "model_forward_time": 0.024698734283447266, + "step": 7485 + }, + { + "epoch": 1.142120361328125e-05, + "step": 7485, + "training_step_time": 0.11498546600341797 + }, + { + "epoch": 1.14227294921875e-05, + "model_forward_time": 0.025289297103881836, + "step": 7486 + }, + { + "epoch": 1.14227294921875e-05, + "step": 7486, + "training_step_time": 0.1871654987335205 + }, + { + "epoch": 1.142425537109375e-05, + "model_forward_time": 0.02476048469543457, + "step": 7487 + }, + { + "epoch": 1.142425537109375e-05, + "step": 7487, + "training_step_time": 0.11253595352172852 + }, + { + "epoch": 1.142578125e-05, + "model_forward_time": 0.025255203247070312, + "step": 7488 + }, + { + "epoch": 1.142578125e-05, + "step": 7488, + "training_step_time": 0.1189577579498291 + }, + { + "epoch": 1.142730712890625e-05, + "model_forward_time": 0.027173280715942383, + "step": 7489 + }, + { + "epoch": 1.142730712890625e-05, + "step": 7489, + "training_step_time": 0.13217759132385254 + }, + { + "epoch": 1.14288330078125e-05, + "grad_norm": 0.4712039530277252, + "learning_rate": 8.949085423036296e-05, + "loss": 0.0736, + "step": 7490 + }, + { + "epoch": 1.14288330078125e-05, + "model_forward_time": 0.02729964256286621, + "step": 7490 + }, + { + "epoch": 1.14288330078125e-05, + "step": 7490, + "training_step_time": 0.2835516929626465 + }, + { + "epoch": 1.143035888671875e-05, + "model_forward_time": 0.02773261070251465, + "step": 7491 + }, + { + "epoch": 1.143035888671875e-05, + "step": 7491, + "training_step_time": 0.31546521186828613 + }, + { + "epoch": 1.1431884765625e-05, + "model_forward_time": 0.027684450149536133, + "step": 7492 + }, + { + "epoch": 1.1431884765625e-05, + "step": 7492, + "training_step_time": 0.23333311080932617 + }, + { + "epoch": 1.143341064453125e-05, + "model_forward_time": 0.029149770736694336, + "step": 7493 + }, + { + "epoch": 1.143341064453125e-05, + "step": 7493, + "training_step_time": 0.27862095832824707 + }, + { + "epoch": 1.14349365234375e-05, + "model_forward_time": 0.03225994110107422, + "step": 7494 + }, + { + "epoch": 1.14349365234375e-05, + "step": 7494, + "training_step_time": 0.38484764099121094 + }, + { + "epoch": 1.143646240234375e-05, + "model_forward_time": 0.032654762268066406, + "step": 7495 + }, + { + "epoch": 1.143646240234375e-05, + "step": 7495, + "training_step_time": 0.330456018447876 + }, + { + "epoch": 1.143798828125e-05, + "model_forward_time": 0.03200221061706543, + "step": 7496 + }, + { + "epoch": 1.143798828125e-05, + "step": 7496, + "training_step_time": 0.288083553314209 + }, + { + "epoch": 1.143951416015625e-05, + "model_forward_time": 0.03369498252868652, + "step": 7497 + }, + { + "epoch": 1.143951416015625e-05, + "step": 7497, + "training_step_time": 0.2920682430267334 + }, + { + "epoch": 1.14410400390625e-05, + "model_forward_time": 0.03727149963378906, + "step": 7498 + }, + { + "epoch": 1.14410400390625e-05, + "step": 7498, + "training_step_time": 0.2603771686553955 + }, + { + "epoch": 1.144256591796875e-05, + "model_forward_time": 0.044814109802246094, + "step": 7499 + }, + { + "epoch": 1.144256591796875e-05, + "step": 7499, + "training_step_time": 0.25127720832824707 + }, + { + "epoch": 1.1444091796875e-05, + "grad_norm": 0.6195780038833618, + "learning_rate": 8.945702546981969e-05, + "loss": 0.0586, + "step": 7500 + }, + { + "epoch": 1.1444091796875e-05, + "model_forward_time": 0.028694868087768555, + "step": 7500 + }, + { + "epoch": 1.1444091796875e-05, + "step": 7500, + "training_step_time": 0.27643513679504395 + }, + { + "epoch": 1.144561767578125e-05, + "model_forward_time": 0.03206181526184082, + "step": 7501 + }, + { + "epoch": 1.144561767578125e-05, + "step": 7501, + "training_step_time": 0.3108961582183838 + }, + { + "epoch": 1.14471435546875e-05, + "model_forward_time": 0.030891895294189453, + "step": 7502 + }, + { + "epoch": 1.14471435546875e-05, + "step": 7502, + "training_step_time": 0.2969017028808594 + }, + { + "epoch": 1.144866943359375e-05, + "model_forward_time": 0.03175759315490723, + "step": 7503 + }, + { + "epoch": 1.144866943359375e-05, + "step": 7503, + "training_step_time": 0.17866063117980957 + }, + { + "epoch": 1.14501953125e-05, + "model_forward_time": 0.03013467788696289, + "step": 7504 + }, + { + "epoch": 1.14501953125e-05, + "step": 7504, + "training_step_time": 0.3036816120147705 + }, + { + "epoch": 1.145172119140625e-05, + "model_forward_time": 0.028237581253051758, + "step": 7505 + }, + { + "epoch": 1.145172119140625e-05, + "step": 7505, + "training_step_time": 0.17996764183044434 + }, + { + "epoch": 1.14532470703125e-05, + "model_forward_time": 0.027724742889404297, + "step": 7506 + }, + { + "epoch": 1.14532470703125e-05, + "step": 7506, + "training_step_time": 0.15885686874389648 + }, + { + "epoch": 1.145477294921875e-05, + "model_forward_time": 0.02636861801147461, + "step": 7507 + }, + { + "epoch": 1.145477294921875e-05, + "step": 7507, + "training_step_time": 0.15992069244384766 + }, + { + "epoch": 1.1456298828125e-05, + "model_forward_time": 0.026248693466186523, + "step": 7508 + }, + { + "epoch": 1.1456298828125e-05, + "step": 7508, + "training_step_time": 0.11858963966369629 + }, + { + "epoch": 1.145782470703125e-05, + "model_forward_time": 0.025864124298095703, + "step": 7509 + }, + { + "epoch": 1.145782470703125e-05, + "step": 7509, + "training_step_time": 0.11260151863098145 + }, + { + "epoch": 1.14593505859375e-05, + "grad_norm": 0.463886022567749, + "learning_rate": 8.942314876526992e-05, + "loss": 0.0605, + "step": 7510 + }, + { + "epoch": 1.14593505859375e-05, + "model_forward_time": 0.025745391845703125, + "step": 7510 + }, + { + "epoch": 1.14593505859375e-05, + "step": 7510, + "training_step_time": 0.11325716972351074 + }, + { + "epoch": 1.146087646484375e-05, + "model_forward_time": 0.025552749633789062, + "step": 7511 + }, + { + "epoch": 1.146087646484375e-05, + "step": 7511, + "training_step_time": 0.11036968231201172 + }, + { + "epoch": 1.146240234375e-05, + "model_forward_time": 0.02500605583190918, + "step": 7512 + }, + { + "epoch": 1.146240234375e-05, + "step": 7512, + "training_step_time": 0.1109781265258789 + }, + { + "epoch": 1.146392822265625e-05, + "model_forward_time": 0.02545619010925293, + "step": 7513 + }, + { + "epoch": 1.146392822265625e-05, + "step": 7513, + "training_step_time": 0.1268160343170166 + }, + { + "epoch": 1.14654541015625e-05, + "model_forward_time": 0.025903940200805664, + "step": 7514 + }, + { + "epoch": 1.14654541015625e-05, + "step": 7514, + "training_step_time": 0.10970020294189453 + }, + { + "epoch": 1.146697998046875e-05, + "model_forward_time": 0.025619983673095703, + "step": 7515 + }, + { + "epoch": 1.146697998046875e-05, + "step": 7515, + "training_step_time": 0.21758794784545898 + }, + { + "epoch": 1.1468505859375e-05, + "model_forward_time": 0.02518916130065918, + "step": 7516 + }, + { + "epoch": 1.1468505859375e-05, + "step": 7516, + "training_step_time": 0.10937666893005371 + }, + { + "epoch": 1.147003173828125e-05, + "model_forward_time": 0.023174285888671875, + "step": 7517 + }, + { + "epoch": 1.147003173828125e-05, + "step": 7517, + "training_step_time": 0.10879373550415039 + }, + { + "epoch": 1.14715576171875e-05, + "model_forward_time": 0.024286985397338867, + "step": 7518 + }, + { + "epoch": 1.14715576171875e-05, + "step": 7518, + "training_step_time": 0.21215009689331055 + }, + { + "epoch": 1.147308349609375e-05, + "model_forward_time": 0.02474069595336914, + "step": 7519 + }, + { + "epoch": 1.147308349609375e-05, + "step": 7519, + "training_step_time": 0.10425758361816406 + }, + { + "epoch": 1.1474609375e-05, + "grad_norm": 0.6711480021476746, + "learning_rate": 8.938922415787703e-05, + "loss": 0.0565, + "step": 7520 + }, + { + "epoch": 1.1474609375e-05, + "model_forward_time": 0.024590015411376953, + "step": 7520 + }, + { + "epoch": 1.1474609375e-05, + "step": 7520, + "training_step_time": 0.10497379302978516 + }, + { + "epoch": 1.147613525390625e-05, + "model_forward_time": 0.025584936141967773, + "step": 7521 + }, + { + "epoch": 1.147613525390625e-05, + "step": 7521, + "training_step_time": 0.10781288146972656 + }, + { + "epoch": 1.14776611328125e-05, + "model_forward_time": 0.025742769241333008, + "step": 7522 + }, + { + "epoch": 1.14776611328125e-05, + "step": 7522, + "training_step_time": 0.15942955017089844 + }, + { + "epoch": 1.147918701171875e-05, + "model_forward_time": 0.026860713958740234, + "step": 7523 + }, + { + "epoch": 1.147918701171875e-05, + "step": 7523, + "training_step_time": 0.22969937324523926 + }, + { + "epoch": 1.1480712890625e-05, + "model_forward_time": 0.024640798568725586, + "step": 7524 + }, + { + "epoch": 1.1480712890625e-05, + "step": 7524, + "training_step_time": 0.15426349639892578 + }, + { + "epoch": 1.148223876953125e-05, + "model_forward_time": 0.024309158325195312, + "step": 7525 + }, + { + "epoch": 1.148223876953125e-05, + "step": 7525, + "training_step_time": 0.1756727695465088 + }, + { + "epoch": 1.14837646484375e-05, + "model_forward_time": 0.024947166442871094, + "step": 7526 + }, + { + "epoch": 1.14837646484375e-05, + "step": 7526, + "training_step_time": 0.13421344757080078 + }, + { + "epoch": 1.148529052734375e-05, + "model_forward_time": 0.023604154586791992, + "step": 7527 + }, + { + "epoch": 1.148529052734375e-05, + "step": 7527, + "training_step_time": 0.18221139907836914 + }, + { + "epoch": 1.148681640625e-05, + "model_forward_time": 0.024515151977539062, + "step": 7528 + }, + { + "epoch": 1.148681640625e-05, + "step": 7528, + "training_step_time": 0.12261390686035156 + }, + { + "epoch": 1.148834228515625e-05, + "model_forward_time": 0.0234529972076416, + "step": 7529 + }, + { + "epoch": 1.148834228515625e-05, + "step": 7529, + "training_step_time": 0.11481118202209473 + }, + { + "epoch": 1.14898681640625e-05, + "grad_norm": 0.31023868918418884, + "learning_rate": 8.935525168886262e-05, + "loss": 0.0513, + "step": 7530 + }, + { + "epoch": 1.14898681640625e-05, + "model_forward_time": 0.024337291717529297, + "step": 7530 + }, + { + "epoch": 1.14898681640625e-05, + "step": 7530, + "training_step_time": 0.11804342269897461 + }, + { + "epoch": 1.149139404296875e-05, + "model_forward_time": 0.02403569221496582, + "step": 7531 + }, + { + "epoch": 1.149139404296875e-05, + "step": 7531, + "training_step_time": 0.11221981048583984 + }, + { + "epoch": 1.1492919921875e-05, + "model_forward_time": 0.02452254295349121, + "step": 7532 + }, + { + "epoch": 1.1492919921875e-05, + "step": 7532, + "training_step_time": 0.11206412315368652 + }, + { + "epoch": 1.149444580078125e-05, + "model_forward_time": 0.025042295455932617, + "step": 7533 + }, + { + "epoch": 1.149444580078125e-05, + "step": 7533, + "training_step_time": 0.10973405838012695 + }, + { + "epoch": 1.14959716796875e-05, + "model_forward_time": 0.02460169792175293, + "step": 7534 + }, + { + "epoch": 1.14959716796875e-05, + "step": 7534, + "training_step_time": 0.10980796813964844 + }, + { + "epoch": 1.149749755859375e-05, + "model_forward_time": 0.025773048400878906, + "step": 7535 + }, + { + "epoch": 1.149749755859375e-05, + "step": 7535, + "training_step_time": 0.11005377769470215 + }, + { + "epoch": 1.14990234375e-05, + "model_forward_time": 0.027606725692749023, + "step": 7536 + }, + { + "epoch": 1.14990234375e-05, + "step": 7536, + "training_step_time": 0.1114509105682373 + }, + { + "epoch": 1.150054931640625e-05, + "model_forward_time": 0.025926828384399414, + "step": 7537 + }, + { + "epoch": 1.150054931640625e-05, + "step": 7537, + "training_step_time": 0.11589455604553223 + }, + { + "epoch": 1.15020751953125e-05, + "model_forward_time": 0.026102542877197266, + "step": 7538 + }, + { + "epoch": 1.15020751953125e-05, + "step": 7538, + "training_step_time": 0.1090848445892334 + }, + { + "epoch": 1.150360107421875e-05, + "model_forward_time": 0.025512218475341797, + "step": 7539 + }, + { + "epoch": 1.150360107421875e-05, + "step": 7539, + "training_step_time": 0.18982887268066406 + }, + { + "epoch": 1.1505126953125e-05, + "grad_norm": 0.34551236033439636, + "learning_rate": 8.932123139950648e-05, + "loss": 0.0543, + "step": 7540 + }, + { + "epoch": 1.1505126953125e-05, + "model_forward_time": 0.025347471237182617, + "step": 7540 + }, + { + "epoch": 1.1505126953125e-05, + "step": 7540, + "training_step_time": 0.10652685165405273 + }, + { + "epoch": 1.150665283203125e-05, + "model_forward_time": 0.024825334548950195, + "step": 7541 + }, + { + "epoch": 1.150665283203125e-05, + "step": 7541, + "training_step_time": 0.15387463569641113 + }, + { + "epoch": 1.15081787109375e-05, + "model_forward_time": 0.025041580200195312, + "step": 7542 + }, + { + "epoch": 1.15081787109375e-05, + "step": 7542, + "training_step_time": 0.15796875953674316 + }, + { + "epoch": 1.150970458984375e-05, + "model_forward_time": 0.024828433990478516, + "step": 7543 + }, + { + "epoch": 1.150970458984375e-05, + "step": 7543, + "training_step_time": 0.1181643009185791 + }, + { + "epoch": 1.151123046875e-05, + "model_forward_time": 0.02532029151916504, + "step": 7544 + }, + { + "epoch": 1.151123046875e-05, + "step": 7544, + "training_step_time": 0.10732269287109375 + }, + { + "epoch": 1.151275634765625e-05, + "model_forward_time": 0.025308609008789062, + "step": 7545 + }, + { + "epoch": 1.151275634765625e-05, + "step": 7545, + "training_step_time": 0.19997739791870117 + }, + { + "epoch": 1.15142822265625e-05, + "model_forward_time": 0.02466416358947754, + "step": 7546 + }, + { + "epoch": 1.15142822265625e-05, + "step": 7546, + "training_step_time": 0.10839080810546875 + }, + { + "epoch": 1.151580810546875e-05, + "model_forward_time": 0.02546095848083496, + "step": 7547 + }, + { + "epoch": 1.151580810546875e-05, + "step": 7547, + "training_step_time": 0.10600161552429199 + }, + { + "epoch": 1.1517333984375e-05, + "model_forward_time": 0.025327205657958984, + "step": 7548 + }, + { + "epoch": 1.1517333984375e-05, + "step": 7548, + "training_step_time": 0.11980986595153809 + }, + { + "epoch": 1.151885986328125e-05, + "model_forward_time": 0.025332927703857422, + "step": 7549 + }, + { + "epoch": 1.151885986328125e-05, + "step": 7549, + "training_step_time": 0.13177704811096191 + }, + { + "epoch": 1.15203857421875e-05, + "grad_norm": 0.3907056450843811, + "learning_rate": 8.928716333114643e-05, + "loss": 0.052, + "step": 7550 + }, + { + "epoch": 1.15203857421875e-05, + "model_forward_time": 0.0253298282623291, + "step": 7550 + }, + { + "epoch": 1.15203857421875e-05, + "step": 7550, + "training_step_time": 0.1103363037109375 + }, + { + "epoch": 1.152191162109375e-05, + "model_forward_time": 0.025565147399902344, + "step": 7551 + }, + { + "epoch": 1.152191162109375e-05, + "step": 7551, + "training_step_time": 0.11193203926086426 + }, + { + "epoch": 1.15234375e-05, + "model_forward_time": 0.025788307189941406, + "step": 7552 + }, + { + "epoch": 1.15234375e-05, + "step": 7552, + "training_step_time": 0.10695981979370117 + }, + { + "epoch": 1.152496337890625e-05, + "model_forward_time": 0.025403738021850586, + "step": 7553 + }, + { + "epoch": 1.152496337890625e-05, + "step": 7553, + "training_step_time": 0.10672807693481445 + }, + { + "epoch": 1.15264892578125e-05, + "model_forward_time": 0.025153398513793945, + "step": 7554 + }, + { + "epoch": 1.15264892578125e-05, + "step": 7554, + "training_step_time": 0.10427594184875488 + }, + { + "epoch": 1.152801513671875e-05, + "model_forward_time": 0.025589704513549805, + "step": 7555 + }, + { + "epoch": 1.152801513671875e-05, + "step": 7555, + "training_step_time": 0.10627865791320801 + }, + { + "epoch": 1.1529541015625e-05, + "model_forward_time": 0.025009870529174805, + "step": 7556 + }, + { + "epoch": 1.1529541015625e-05, + "step": 7556, + "training_step_time": 0.17627835273742676 + }, + { + "epoch": 1.153106689453125e-05, + "model_forward_time": 0.025344133377075195, + "step": 7557 + }, + { + "epoch": 1.153106689453125e-05, + "step": 7557, + "training_step_time": 0.1110372543334961 + }, + { + "epoch": 1.15325927734375e-05, + "model_forward_time": 0.024831533432006836, + "step": 7558 + }, + { + "epoch": 1.15325927734375e-05, + "step": 7558, + "training_step_time": 0.1956171989440918 + }, + { + "epoch": 1.153411865234375e-05, + "model_forward_time": 0.0248563289642334, + "step": 7559 + }, + { + "epoch": 1.153411865234375e-05, + "step": 7559, + "training_step_time": 0.18651485443115234 + }, + { + "epoch": 1.153564453125e-05, + "grad_norm": 0.20785319805145264, + "learning_rate": 8.92530475251784e-05, + "loss": 0.0476, + "step": 7560 + }, + { + "epoch": 1.153564453125e-05, + "model_forward_time": 0.024492502212524414, + "step": 7560 + }, + { + "epoch": 1.153564453125e-05, + "step": 7560, + "training_step_time": 0.12028884887695312 + }, + { + "epoch": 1.153717041015625e-05, + "model_forward_time": 0.02514028549194336, + "step": 7561 + }, + { + "epoch": 1.153717041015625e-05, + "step": 7561, + "training_step_time": 0.11856937408447266 + }, + { + "epoch": 1.15386962890625e-05, + "model_forward_time": 0.025243282318115234, + "step": 7562 + }, + { + "epoch": 1.15386962890625e-05, + "step": 7562, + "training_step_time": 0.10597634315490723 + }, + { + "epoch": 1.154022216796875e-05, + "model_forward_time": 0.026213407516479492, + "step": 7563 + }, + { + "epoch": 1.154022216796875e-05, + "step": 7563, + "training_step_time": 0.10567951202392578 + }, + { + "epoch": 1.1541748046875e-05, + "model_forward_time": 0.025548458099365234, + "step": 7564 + }, + { + "epoch": 1.1541748046875e-05, + "step": 7564, + "training_step_time": 0.10509753227233887 + }, + { + "epoch": 1.154327392578125e-05, + "model_forward_time": 0.025368213653564453, + "step": 7565 + }, + { + "epoch": 1.154327392578125e-05, + "step": 7565, + "training_step_time": 0.10883307456970215 + }, + { + "epoch": 1.15447998046875e-05, + "model_forward_time": 0.026121854782104492, + "step": 7566 + }, + { + "epoch": 1.15447998046875e-05, + "step": 7566, + "training_step_time": 0.13388895988464355 + }, + { + "epoch": 1.154632568359375e-05, + "model_forward_time": 0.025322675704956055, + "step": 7567 + }, + { + "epoch": 1.154632568359375e-05, + "step": 7567, + "training_step_time": 0.14200782775878906 + }, + { + "epoch": 1.15478515625e-05, + "model_forward_time": 0.024810791015625, + "step": 7568 + }, + { + "epoch": 1.15478515625e-05, + "step": 7568, + "training_step_time": 0.1113128662109375 + }, + { + "epoch": 1.154937744140625e-05, + "model_forward_time": 0.025634765625, + "step": 7569 + }, + { + "epoch": 1.154937744140625e-05, + "step": 7569, + "training_step_time": 0.11280298233032227 + }, + { + "epoch": 1.15509033203125e-05, + "grad_norm": 0.41076380014419556, + "learning_rate": 8.921888402305628e-05, + "loss": 0.057, + "step": 7570 + }, + { + "epoch": 1.15509033203125e-05, + "model_forward_time": 0.025046110153198242, + "step": 7570 + }, + { + "epoch": 1.15509033203125e-05, + "step": 7570, + "training_step_time": 0.11122369766235352 + }, + { + "epoch": 1.155242919921875e-05, + "model_forward_time": 0.0251309871673584, + "step": 7571 + }, + { + "epoch": 1.155242919921875e-05, + "step": 7571, + "training_step_time": 0.10721945762634277 + }, + { + "epoch": 1.1553955078125e-05, + "model_forward_time": 0.02491450309753418, + "step": 7572 + }, + { + "epoch": 1.1553955078125e-05, + "step": 7572, + "training_step_time": 0.19387412071228027 + }, + { + "epoch": 1.155548095703125e-05, + "model_forward_time": 0.024790525436401367, + "step": 7573 + }, + { + "epoch": 1.155548095703125e-05, + "step": 7573, + "training_step_time": 0.10835862159729004 + }, + { + "epoch": 1.15570068359375e-05, + "model_forward_time": 0.02400970458984375, + "step": 7574 + }, + { + "epoch": 1.15570068359375e-05, + "step": 7574, + "training_step_time": 0.10474824905395508 + }, + { + "epoch": 1.155853271484375e-05, + "model_forward_time": 0.02563929557800293, + "step": 7575 + }, + { + "epoch": 1.155853271484375e-05, + "step": 7575, + "training_step_time": 0.10742783546447754 + }, + { + "epoch": 1.156005859375e-05, + "model_forward_time": 0.025474071502685547, + "step": 7576 + }, + { + "epoch": 1.156005859375e-05, + "step": 7576, + "training_step_time": 0.10790824890136719 + }, + { + "epoch": 1.156158447265625e-05, + "model_forward_time": 0.025529861450195312, + "step": 7577 + }, + { + "epoch": 1.156158447265625e-05, + "step": 7577, + "training_step_time": 0.10888409614562988 + }, + { + "epoch": 1.15631103515625e-05, + "model_forward_time": 0.02574443817138672, + "step": 7578 + }, + { + "epoch": 1.15631103515625e-05, + "step": 7578, + "training_step_time": 0.10878586769104004 + }, + { + "epoch": 1.156463623046875e-05, + "model_forward_time": 0.025615930557250977, + "step": 7579 + }, + { + "epoch": 1.156463623046875e-05, + "step": 7579, + "training_step_time": 0.10616827011108398 + }, + { + "epoch": 1.1566162109375e-05, + "grad_norm": 0.4428868293762207, + "learning_rate": 8.9184672866292e-05, + "loss": 0.0636, + "step": 7580 + }, + { + "epoch": 1.1566162109375e-05, + "model_forward_time": 0.025633573532104492, + "step": 7580 + }, + { + "epoch": 1.1566162109375e-05, + "step": 7580, + "training_step_time": 0.11730504035949707 + }, + { + "epoch": 1.156768798828125e-05, + "model_forward_time": 0.025322437286376953, + "step": 7581 + }, + { + "epoch": 1.156768798828125e-05, + "step": 7581, + "training_step_time": 0.11594557762145996 + }, + { + "epoch": 1.15692138671875e-05, + "model_forward_time": 0.025382041931152344, + "step": 7582 + }, + { + "epoch": 1.15692138671875e-05, + "step": 7582, + "training_step_time": 0.11431550979614258 + }, + { + "epoch": 1.157073974609375e-05, + "model_forward_time": 0.02511906623840332, + "step": 7583 + }, + { + "epoch": 1.157073974609375e-05, + "step": 7583, + "training_step_time": 0.10933399200439453 + }, + { + "epoch": 1.1572265625e-05, + "model_forward_time": 0.025600433349609375, + "step": 7584 + }, + { + "epoch": 1.1572265625e-05, + "step": 7584, + "training_step_time": 0.11113572120666504 + }, + { + "epoch": 1.157379150390625e-05, + "model_forward_time": 0.025514841079711914, + "step": 7585 + }, + { + "epoch": 1.157379150390625e-05, + "step": 7585, + "training_step_time": 0.11262178421020508 + }, + { + "epoch": 1.15753173828125e-05, + "model_forward_time": 0.025684118270874023, + "step": 7586 + }, + { + "epoch": 1.15753173828125e-05, + "step": 7586, + "training_step_time": 0.11214947700500488 + }, + { + "epoch": 1.157684326171875e-05, + "model_forward_time": 0.025887727737426758, + "step": 7587 + }, + { + "epoch": 1.157684326171875e-05, + "step": 7587, + "training_step_time": 0.11516928672790527 + }, + { + "epoch": 1.1578369140625e-05, + "model_forward_time": 0.02554941177368164, + "step": 7588 + }, + { + "epoch": 1.1578369140625e-05, + "step": 7588, + "training_step_time": 0.22561955451965332 + }, + { + "epoch": 1.157989501953125e-05, + "model_forward_time": 0.025089263916015625, + "step": 7589 + }, + { + "epoch": 1.157989501953125e-05, + "step": 7589, + "training_step_time": 0.11248159408569336 + }, + { + "epoch": 1.15814208984375e-05, + "grad_norm": 0.497812420129776, + "learning_rate": 8.91504140964553e-05, + "loss": 0.06, + "step": 7590 + }, + { + "epoch": 1.15814208984375e-05, + "model_forward_time": 0.02458810806274414, + "step": 7590 + }, + { + "epoch": 1.15814208984375e-05, + "step": 7590, + "training_step_time": 0.1064901351928711 + }, + { + "epoch": 1.158294677734375e-05, + "model_forward_time": 0.025667190551757812, + "step": 7591 + }, + { + "epoch": 1.158294677734375e-05, + "step": 7591, + "training_step_time": 0.11013436317443848 + }, + { + "epoch": 1.158447265625e-05, + "model_forward_time": 0.02528834342956543, + "step": 7592 + }, + { + "epoch": 1.158447265625e-05, + "step": 7592, + "training_step_time": 0.11440348625183105 + }, + { + "epoch": 1.158599853515625e-05, + "model_forward_time": 0.025539636611938477, + "step": 7593 + }, + { + "epoch": 1.158599853515625e-05, + "step": 7593, + "training_step_time": 0.1176600456237793 + }, + { + "epoch": 1.15875244140625e-05, + "model_forward_time": 0.02505660057067871, + "step": 7594 + }, + { + "epoch": 1.15875244140625e-05, + "step": 7594, + "training_step_time": 0.2245039939880371 + }, + { + "epoch": 1.158905029296875e-05, + "model_forward_time": 0.02477741241455078, + "step": 7595 + }, + { + "epoch": 1.158905029296875e-05, + "step": 7595, + "training_step_time": 0.12923789024353027 + }, + { + "epoch": 1.1590576171875e-05, + "model_forward_time": 0.024675846099853516, + "step": 7596 + }, + { + "epoch": 1.1590576171875e-05, + "step": 7596, + "training_step_time": 0.11308979988098145 + }, + { + "epoch": 1.159210205078125e-05, + "model_forward_time": 0.02508544921875, + "step": 7597 + }, + { + "epoch": 1.159210205078125e-05, + "step": 7597, + "training_step_time": 0.11894893646240234 + }, + { + "epoch": 1.15936279296875e-05, + "model_forward_time": 0.025282621383666992, + "step": 7598 + }, + { + "epoch": 1.15936279296875e-05, + "step": 7598, + "training_step_time": 0.10695528984069824 + }, + { + "epoch": 1.159515380859375e-05, + "model_forward_time": 0.025716304779052734, + "step": 7599 + }, + { + "epoch": 1.159515380859375e-05, + "step": 7599, + "training_step_time": 0.1094503402709961 + }, + { + "epoch": 1.15966796875e-05, + "grad_norm": 0.4147672951221466, + "learning_rate": 8.911610775517382e-05, + "loss": 0.0681, + "step": 7600 + }, + { + "epoch": 1.15966796875e-05, + "model_forward_time": 0.02514934539794922, + "step": 7600 + }, + { + "epoch": 1.15966796875e-05, + "step": 7600, + "training_step_time": 0.10867762565612793 + }, + { + "epoch": 1.159820556640625e-05, + "model_forward_time": 0.026123523712158203, + "step": 7601 + }, + { + "epoch": 1.159820556640625e-05, + "step": 7601, + "training_step_time": 0.1785874366760254 + }, + { + "epoch": 1.15997314453125e-05, + "model_forward_time": 0.0252685546875, + "step": 7602 + }, + { + "epoch": 1.15997314453125e-05, + "step": 7602, + "training_step_time": 0.10990190505981445 + }, + { + "epoch": 1.160125732421875e-05, + "model_forward_time": 0.02790045738220215, + "step": 7603 + }, + { + "epoch": 1.160125732421875e-05, + "step": 7603, + "training_step_time": 0.21428894996643066 + }, + { + "epoch": 1.1602783203125e-05, + "model_forward_time": 0.024366378784179688, + "step": 7604 + }, + { + "epoch": 1.1602783203125e-05, + "step": 7604, + "training_step_time": 0.10966753959655762 + }, + { + "epoch": 1.160430908203125e-05, + "model_forward_time": 0.024645566940307617, + "step": 7605 + }, + { + "epoch": 1.160430908203125e-05, + "step": 7605, + "training_step_time": 0.10852789878845215 + }, + { + "epoch": 1.16058349609375e-05, + "model_forward_time": 0.025670766830444336, + "step": 7606 + }, + { + "epoch": 1.16058349609375e-05, + "step": 7606, + "training_step_time": 0.11270785331726074 + }, + { + "epoch": 1.160736083984375e-05, + "model_forward_time": 0.02571392059326172, + "step": 7607 + }, + { + "epoch": 1.160736083984375e-05, + "step": 7607, + "training_step_time": 0.11242079734802246 + }, + { + "epoch": 1.160888671875e-05, + "model_forward_time": 0.02544689178466797, + "step": 7608 + }, + { + "epoch": 1.160888671875e-05, + "step": 7608, + "training_step_time": 0.10724973678588867 + }, + { + "epoch": 1.161041259765625e-05, + "model_forward_time": 0.025478601455688477, + "step": 7609 + }, + { + "epoch": 1.161041259765625e-05, + "step": 7609, + "training_step_time": 0.10607290267944336 + }, + { + "epoch": 1.16119384765625e-05, + "grad_norm": 0.5737213492393494, + "learning_rate": 8.908175388413304e-05, + "loss": 0.0597, + "step": 7610 + }, + { + "epoch": 1.16119384765625e-05, + "model_forward_time": 0.025459766387939453, + "step": 7610 + }, + { + "epoch": 1.16119384765625e-05, + "step": 7610, + "training_step_time": 0.10748910903930664 + }, + { + "epoch": 1.161346435546875e-05, + "model_forward_time": 0.025223970413208008, + "step": 7611 + }, + { + "epoch": 1.161346435546875e-05, + "step": 7611, + "training_step_time": 0.10920405387878418 + }, + { + "epoch": 1.1614990234375e-05, + "model_forward_time": 0.024556875228881836, + "step": 7612 + }, + { + "epoch": 1.1614990234375e-05, + "step": 7612, + "training_step_time": 0.11302423477172852 + }, + { + "epoch": 1.161651611328125e-05, + "model_forward_time": 0.026060104370117188, + "step": 7613 + }, + { + "epoch": 1.161651611328125e-05, + "step": 7613, + "training_step_time": 0.12968683242797852 + }, + { + "epoch": 1.16180419921875e-05, + "model_forward_time": 0.025667190551757812, + "step": 7614 + }, + { + "epoch": 1.16180419921875e-05, + "step": 7614, + "training_step_time": 0.1146547794342041 + }, + { + "epoch": 1.161956787109375e-05, + "model_forward_time": 0.026381254196166992, + "step": 7615 + }, + { + "epoch": 1.161956787109375e-05, + "step": 7615, + "training_step_time": 0.10876011848449707 + }, + { + "epoch": 1.162109375e-05, + "model_forward_time": 0.025258779525756836, + "step": 7616 + }, + { + "epoch": 1.162109375e-05, + "step": 7616, + "training_step_time": 0.1150503158569336 + }, + { + "epoch": 1.162261962890625e-05, + "model_forward_time": 0.025096416473388672, + "step": 7617 + }, + { + "epoch": 1.162261962890625e-05, + "step": 7617, + "training_step_time": 0.10724091529846191 + }, + { + "epoch": 1.16241455078125e-05, + "model_forward_time": 0.02503824234008789, + "step": 7618 + }, + { + "epoch": 1.16241455078125e-05, + "step": 7618, + "training_step_time": 0.19698643684387207 + }, + { + "epoch": 1.162567138671875e-05, + "model_forward_time": 0.024817943572998047, + "step": 7619 + }, + { + "epoch": 1.162567138671875e-05, + "step": 7619, + "training_step_time": 0.1044609546661377 + }, + { + "epoch": 1.1627197265625e-05, + "grad_norm": 0.7217997908592224, + "learning_rate": 8.90473525250761e-05, + "loss": 0.0555, + "step": 7620 + }, + { + "epoch": 1.1627197265625e-05, + "model_forward_time": 0.024636268615722656, + "step": 7620 + }, + { + "epoch": 1.1627197265625e-05, + "step": 7620, + "training_step_time": 0.10553312301635742 + }, + { + "epoch": 1.162872314453125e-05, + "model_forward_time": 0.02508234977722168, + "step": 7621 + }, + { + "epoch": 1.162872314453125e-05, + "step": 7621, + "training_step_time": 0.10552549362182617 + }, + { + "epoch": 1.16302490234375e-05, + "model_forward_time": 0.025265932083129883, + "step": 7622 + }, + { + "epoch": 1.16302490234375e-05, + "step": 7622, + "training_step_time": 0.10860824584960938 + }, + { + "epoch": 1.163177490234375e-05, + "model_forward_time": 0.025378704071044922, + "step": 7623 + }, + { + "epoch": 1.163177490234375e-05, + "step": 7623, + "training_step_time": 0.1076810359954834 + }, + { + "epoch": 1.163330078125e-05, + "model_forward_time": 0.025180339813232422, + "step": 7624 + }, + { + "epoch": 1.163330078125e-05, + "step": 7624, + "training_step_time": 0.10910868644714355 + }, + { + "epoch": 1.163482666015625e-05, + "model_forward_time": 0.025206327438354492, + "step": 7625 + }, + { + "epoch": 1.163482666015625e-05, + "step": 7625, + "training_step_time": 0.10742974281311035 + }, + { + "epoch": 1.16363525390625e-05, + "model_forward_time": 0.02530074119567871, + "step": 7626 + }, + { + "epoch": 1.16363525390625e-05, + "step": 7626, + "training_step_time": 0.1073293685913086 + }, + { + "epoch": 1.163787841796875e-05, + "model_forward_time": 0.025426864624023438, + "step": 7627 + }, + { + "epoch": 1.163787841796875e-05, + "step": 7627, + "training_step_time": 0.11109209060668945 + }, + { + "epoch": 1.1639404296875e-05, + "model_forward_time": 0.025487184524536133, + "step": 7628 + }, + { + "epoch": 1.1639404296875e-05, + "step": 7628, + "training_step_time": 0.10695099830627441 + }, + { + "epoch": 1.164093017578125e-05, + "model_forward_time": 0.02542281150817871, + "step": 7629 + }, + { + "epoch": 1.164093017578125e-05, + "step": 7629, + "training_step_time": 0.10611367225646973 + }, + { + "epoch": 1.16424560546875e-05, + "grad_norm": 0.3115273416042328, + "learning_rate": 8.901290371980393e-05, + "loss": 0.0589, + "step": 7630 + }, + { + "epoch": 1.16424560546875e-05, + "model_forward_time": 0.025999069213867188, + "step": 7630 + }, + { + "epoch": 1.16424560546875e-05, + "step": 7630, + "training_step_time": 0.10917162895202637 + }, + { + "epoch": 1.164398193359375e-05, + "model_forward_time": 0.02528977394104004, + "step": 7631 + }, + { + "epoch": 1.164398193359375e-05, + "step": 7631, + "training_step_time": 0.16918063163757324 + }, + { + "epoch": 1.16455078125e-05, + "model_forward_time": 0.024712562561035156, + "step": 7632 + }, + { + "epoch": 1.16455078125e-05, + "step": 7632, + "training_step_time": 0.16158485412597656 + }, + { + "epoch": 1.164703369140625e-05, + "model_forward_time": 0.02495598793029785, + "step": 7633 + }, + { + "epoch": 1.164703369140625e-05, + "step": 7633, + "training_step_time": 0.11144614219665527 + }, + { + "epoch": 1.16485595703125e-05, + "model_forward_time": 0.025044918060302734, + "step": 7634 + }, + { + "epoch": 1.16485595703125e-05, + "step": 7634, + "training_step_time": 0.17116045951843262 + }, + { + "epoch": 1.165008544921875e-05, + "model_forward_time": 0.024498701095581055, + "step": 7635 + }, + { + "epoch": 1.165008544921875e-05, + "step": 7635, + "training_step_time": 0.1633284091949463 + }, + { + "epoch": 1.1651611328125e-05, + "model_forward_time": 0.0246734619140625, + "step": 7636 + }, + { + "epoch": 1.1651611328125e-05, + "step": 7636, + "training_step_time": 0.104644775390625 + }, + { + "epoch": 1.165313720703125e-05, + "model_forward_time": 0.025096893310546875, + "step": 7637 + }, + { + "epoch": 1.165313720703125e-05, + "step": 7637, + "training_step_time": 0.10618901252746582 + }, + { + "epoch": 1.16546630859375e-05, + "model_forward_time": 0.025556087493896484, + "step": 7638 + }, + { + "epoch": 1.16546630859375e-05, + "step": 7638, + "training_step_time": 0.13782405853271484 + }, + { + "epoch": 1.165618896484375e-05, + "model_forward_time": 0.024988651275634766, + "step": 7639 + }, + { + "epoch": 1.165618896484375e-05, + "step": 7639, + "training_step_time": 0.1111452579498291 + }, + { + "epoch": 1.165771484375e-05, + "grad_norm": 0.3155267834663391, + "learning_rate": 8.897840751017506e-05, + "loss": 0.0561, + "step": 7640 + }, + { + "epoch": 1.165771484375e-05, + "model_forward_time": 0.024981021881103516, + "step": 7640 + }, + { + "epoch": 1.165771484375e-05, + "step": 7640, + "training_step_time": 0.22199630737304688 + }, + { + "epoch": 1.165924072265625e-05, + "model_forward_time": 0.024469375610351562, + "step": 7641 + }, + { + "epoch": 1.165924072265625e-05, + "step": 7641, + "training_step_time": 0.1360480785369873 + }, + { + "epoch": 1.16607666015625e-05, + "model_forward_time": 0.024828672409057617, + "step": 7642 + }, + { + "epoch": 1.16607666015625e-05, + "step": 7642, + "training_step_time": 0.1051030158996582 + }, + { + "epoch": 1.166229248046875e-05, + "model_forward_time": 0.024886369705200195, + "step": 7643 + }, + { + "epoch": 1.166229248046875e-05, + "step": 7643, + "training_step_time": 0.12057209014892578 + }, + { + "epoch": 1.1663818359375e-05, + "model_forward_time": 0.02578878402709961, + "step": 7644 + }, + { + "epoch": 1.1663818359375e-05, + "step": 7644, + "training_step_time": 0.11903691291809082 + }, + { + "epoch": 1.166534423828125e-05, + "model_forward_time": 0.025133609771728516, + "step": 7645 + }, + { + "epoch": 1.166534423828125e-05, + "step": 7645, + "training_step_time": 0.12076044082641602 + }, + { + "epoch": 1.16668701171875e-05, + "model_forward_time": 0.022945404052734375, + "step": 7646 + }, + { + "epoch": 1.16668701171875e-05, + "step": 7646, + "training_step_time": 0.11647319793701172 + }, + { + "epoch": 1.166839599609375e-05, + "model_forward_time": 0.02434849739074707, + "step": 7647 + }, + { + "epoch": 1.166839599609375e-05, + "step": 7647, + "training_step_time": 0.16655898094177246 + }, + { + "epoch": 1.1669921875e-05, + "model_forward_time": 0.024731874465942383, + "step": 7648 + }, + { + "epoch": 1.1669921875e-05, + "step": 7648, + "training_step_time": 0.1195073127746582 + }, + { + "epoch": 1.167144775390625e-05, + "model_forward_time": 0.02491021156311035, + "step": 7649 + }, + { + "epoch": 1.167144775390625e-05, + "step": 7649, + "training_step_time": 0.13245201110839844 + }, + { + "epoch": 1.16729736328125e-05, + "grad_norm": 0.4117908775806427, + "learning_rate": 8.894386393810563e-05, + "loss": 0.0502, + "step": 7650 + }, + { + "epoch": 1.16729736328125e-05, + "model_forward_time": 0.025406360626220703, + "step": 7650 + }, + { + "epoch": 1.16729736328125e-05, + "step": 7650, + "training_step_time": 0.11232972145080566 + }, + { + "epoch": 1.167449951171875e-05, + "model_forward_time": 0.025336742401123047, + "step": 7651 + }, + { + "epoch": 1.167449951171875e-05, + "step": 7651, + "training_step_time": 0.17204499244689941 + }, + { + "epoch": 1.1676025390625e-05, + "model_forward_time": 0.024525880813598633, + "step": 7652 + }, + { + "epoch": 1.1676025390625e-05, + "step": 7652, + "training_step_time": 0.13030672073364258 + }, + { + "epoch": 1.167755126953125e-05, + "model_forward_time": 0.024316072463989258, + "step": 7653 + }, + { + "epoch": 1.167755126953125e-05, + "step": 7653, + "training_step_time": 0.12222647666931152 + }, + { + "epoch": 1.16790771484375e-05, + "model_forward_time": 0.024706125259399414, + "step": 7654 + }, + { + "epoch": 1.16790771484375e-05, + "step": 7654, + "training_step_time": 0.1091926097869873 + }, + { + "epoch": 1.168060302734375e-05, + "model_forward_time": 0.02550506591796875, + "step": 7655 + }, + { + "epoch": 1.168060302734375e-05, + "step": 7655, + "training_step_time": 0.1096808910369873 + }, + { + "epoch": 1.168212890625e-05, + "model_forward_time": 0.024821758270263672, + "step": 7656 + }, + { + "epoch": 1.168212890625e-05, + "step": 7656, + "training_step_time": 0.10886716842651367 + }, + { + "epoch": 1.168365478515625e-05, + "model_forward_time": 0.0246274471282959, + "step": 7657 + }, + { + "epoch": 1.168365478515625e-05, + "step": 7657, + "training_step_time": 0.14403724670410156 + }, + { + "epoch": 1.16851806640625e-05, + "model_forward_time": 0.02502918243408203, + "step": 7658 + }, + { + "epoch": 1.16851806640625e-05, + "step": 7658, + "training_step_time": 0.13561058044433594 + }, + { + "epoch": 1.168670654296875e-05, + "model_forward_time": 0.024547576904296875, + "step": 7659 + }, + { + "epoch": 1.168670654296875e-05, + "step": 7659, + "training_step_time": 0.1077275276184082 + }, + { + "epoch": 1.1688232421875e-05, + "grad_norm": 0.5045364499092102, + "learning_rate": 8.890927304556935e-05, + "loss": 0.054, + "step": 7660 + }, + { + "epoch": 1.1688232421875e-05, + "model_forward_time": 0.025346755981445312, + "step": 7660 + }, + { + "epoch": 1.1688232421875e-05, + "step": 7660, + "training_step_time": 0.11255931854248047 + }, + { + "epoch": 1.168975830078125e-05, + "model_forward_time": 0.02481698989868164, + "step": 7661 + }, + { + "epoch": 1.168975830078125e-05, + "step": 7661, + "training_step_time": 0.1192939281463623 + }, + { + "epoch": 1.16912841796875e-05, + "model_forward_time": 0.02497720718383789, + "step": 7662 + }, + { + "epoch": 1.16912841796875e-05, + "step": 7662, + "training_step_time": 0.10813689231872559 + }, + { + "epoch": 1.169281005859375e-05, + "model_forward_time": 0.025302410125732422, + "step": 7663 + }, + { + "epoch": 1.169281005859375e-05, + "step": 7663, + "training_step_time": 0.1939830780029297 + }, + { + "epoch": 1.16943359375e-05, + "model_forward_time": 0.024466991424560547, + "step": 7664 + }, + { + "epoch": 1.16943359375e-05, + "step": 7664, + "training_step_time": 0.10402035713195801 + }, + { + "epoch": 1.169586181640625e-05, + "model_forward_time": 0.025084733963012695, + "step": 7665 + }, + { + "epoch": 1.169586181640625e-05, + "step": 7665, + "training_step_time": 0.11043930053710938 + }, + { + "epoch": 1.16973876953125e-05, + "model_forward_time": 0.025323152542114258, + "step": 7666 + }, + { + "epoch": 1.16973876953125e-05, + "step": 7666, + "training_step_time": 0.11245870590209961 + }, + { + "epoch": 1.169891357421875e-05, + "model_forward_time": 0.025198698043823242, + "step": 7667 + }, + { + "epoch": 1.169891357421875e-05, + "step": 7667, + "training_step_time": 0.10982131958007812 + }, + { + "epoch": 1.1700439453125e-05, + "model_forward_time": 0.025183677673339844, + "step": 7668 + }, + { + "epoch": 1.1700439453125e-05, + "step": 7668, + "training_step_time": 0.10924649238586426 + }, + { + "epoch": 1.170196533203125e-05, + "model_forward_time": 0.02520608901977539, + "step": 7669 + }, + { + "epoch": 1.170196533203125e-05, + "step": 7669, + "training_step_time": 0.10728096961975098 + }, + { + "epoch": 1.17034912109375e-05, + "grad_norm": 0.5160402655601501, + "learning_rate": 8.887463487459742e-05, + "loss": 0.0618, + "step": 7670 + }, + { + "epoch": 1.17034912109375e-05, + "model_forward_time": 0.025020837783813477, + "step": 7670 + }, + { + "epoch": 1.17034912109375e-05, + "step": 7670, + "training_step_time": 0.10979628562927246 + }, + { + "epoch": 1.170501708984375e-05, + "model_forward_time": 0.025074005126953125, + "step": 7671 + }, + { + "epoch": 1.170501708984375e-05, + "step": 7671, + "training_step_time": 0.11176061630249023 + }, + { + "epoch": 1.170654296875e-05, + "model_forward_time": 0.025228261947631836, + "step": 7672 + }, + { + "epoch": 1.170654296875e-05, + "step": 7672, + "training_step_time": 0.1084904670715332 + }, + { + "epoch": 1.170806884765625e-05, + "model_forward_time": 0.025189876556396484, + "step": 7673 + }, + { + "epoch": 1.170806884765625e-05, + "step": 7673, + "training_step_time": 0.10861968994140625 + }, + { + "epoch": 1.17095947265625e-05, + "model_forward_time": 0.02464747428894043, + "step": 7674 + }, + { + "epoch": 1.17095947265625e-05, + "step": 7674, + "training_step_time": 0.11990880966186523 + }, + { + "epoch": 1.171112060546875e-05, + "model_forward_time": 0.024719953536987305, + "step": 7675 + }, + { + "epoch": 1.171112060546875e-05, + "step": 7675, + "training_step_time": 0.22686266899108887 + }, + { + "epoch": 1.1712646484375e-05, + "model_forward_time": 0.024109363555908203, + "step": 7676 + }, + { + "epoch": 1.1712646484375e-05, + "step": 7676, + "training_step_time": 0.1515214443206787 + }, + { + "epoch": 1.171417236328125e-05, + "model_forward_time": 0.024283647537231445, + "step": 7677 + }, + { + "epoch": 1.171417236328125e-05, + "step": 7677, + "training_step_time": 0.1644735336303711 + }, + { + "epoch": 1.17156982421875e-05, + "model_forward_time": 0.024814844131469727, + "step": 7678 + }, + { + "epoch": 1.17156982421875e-05, + "step": 7678, + "training_step_time": 0.13182544708251953 + }, + { + "epoch": 1.171722412109375e-05, + "model_forward_time": 0.025079727172851562, + "step": 7679 + }, + { + "epoch": 1.171722412109375e-05, + "step": 7679, + "training_step_time": 0.12630176544189453 + }, + { + "epoch": 1.171875e-05, + "grad_norm": 0.7796112298965454, + "learning_rate": 8.883994946727849e-05, + "loss": 0.0631, + "step": 7680 + }, + { + "epoch": 1.171875e-05, + "model_forward_time": 0.02504730224609375, + "step": 7680 + }, + { + "epoch": 1.171875e-05, + "step": 7680, + "training_step_time": 0.12173128128051758 + }, + { + "epoch": 1.172027587890625e-05, + "model_forward_time": 0.025322914123535156, + "step": 7681 + }, + { + "epoch": 1.172027587890625e-05, + "step": 7681, + "training_step_time": 0.11916947364807129 + }, + { + "epoch": 1.17218017578125e-05, + "model_forward_time": 0.024396419525146484, + "step": 7682 + }, + { + "epoch": 1.17218017578125e-05, + "step": 7682, + "training_step_time": 0.14658784866333008 + }, + { + "epoch": 1.172332763671875e-05, + "model_forward_time": 0.024469614028930664, + "step": 7683 + }, + { + "epoch": 1.172332763671875e-05, + "step": 7683, + "training_step_time": 0.11020231246948242 + }, + { + "epoch": 1.1724853515625e-05, + "model_forward_time": 0.025097370147705078, + "step": 7684 + }, + { + "epoch": 1.1724853515625e-05, + "step": 7684, + "training_step_time": 0.11536622047424316 + }, + { + "epoch": 1.172637939453125e-05, + "model_forward_time": 0.024798870086669922, + "step": 7685 + }, + { + "epoch": 1.172637939453125e-05, + "step": 7685, + "training_step_time": 0.11691856384277344 + }, + { + "epoch": 1.17279052734375e-05, + "model_forward_time": 0.025141239166259766, + "step": 7686 + }, + { + "epoch": 1.17279052734375e-05, + "step": 7686, + "training_step_time": 0.13804841041564941 + }, + { + "epoch": 1.172943115234375e-05, + "model_forward_time": 0.024703502655029297, + "step": 7687 + }, + { + "epoch": 1.172943115234375e-05, + "step": 7687, + "training_step_time": 0.11431169509887695 + }, + { + "epoch": 1.173095703125e-05, + "model_forward_time": 0.025166988372802734, + "step": 7688 + }, + { + "epoch": 1.173095703125e-05, + "step": 7688, + "training_step_time": 0.11120200157165527 + }, + { + "epoch": 1.173248291015625e-05, + "model_forward_time": 0.024980783462524414, + "step": 7689 + }, + { + "epoch": 1.173248291015625e-05, + "step": 7689, + "training_step_time": 0.10812258720397949 + }, + { + "epoch": 1.17340087890625e-05, + "grad_norm": 1.3057737350463867, + "learning_rate": 8.880521686575857e-05, + "loss": 0.0636, + "step": 7690 + }, + { + "epoch": 1.17340087890625e-05, + "model_forward_time": 0.02374267578125, + "step": 7690 + }, + { + "epoch": 1.17340087890625e-05, + "step": 7690, + "training_step_time": 0.10821175575256348 + }, + { + "epoch": 1.173553466796875e-05, + "model_forward_time": 0.02387523651123047, + "step": 7691 + }, + { + "epoch": 1.173553466796875e-05, + "step": 7691, + "training_step_time": 0.11259889602661133 + }, + { + "epoch": 1.1737060546875e-05, + "model_forward_time": 0.02520895004272461, + "step": 7692 + }, + { + "epoch": 1.1737060546875e-05, + "step": 7692, + "training_step_time": 0.20957446098327637 + }, + { + "epoch": 1.173858642578125e-05, + "model_forward_time": 0.024709701538085938, + "step": 7693 + }, + { + "epoch": 1.173858642578125e-05, + "step": 7693, + "training_step_time": 0.1916193962097168 + }, + { + "epoch": 1.17401123046875e-05, + "model_forward_time": 0.024043798446655273, + "step": 7694 + }, + { + "epoch": 1.17401123046875e-05, + "step": 7694, + "training_step_time": 0.17976117134094238 + }, + { + "epoch": 1.174163818359375e-05, + "model_forward_time": 0.024140119552612305, + "step": 7695 + }, + { + "epoch": 1.174163818359375e-05, + "step": 7695, + "training_step_time": 0.17760729789733887 + }, + { + "epoch": 1.17431640625e-05, + "model_forward_time": 0.024209260940551758, + "step": 7696 + }, + { + "epoch": 1.17431640625e-05, + "step": 7696, + "training_step_time": 0.18854856491088867 + }, + { + "epoch": 1.174468994140625e-05, + "model_forward_time": 0.025072097778320312, + "step": 7697 + }, + { + "epoch": 1.174468994140625e-05, + "step": 7697, + "training_step_time": 0.14582109451293945 + }, + { + "epoch": 1.17462158203125e-05, + "model_forward_time": 0.023628950119018555, + "step": 7698 + }, + { + "epoch": 1.17462158203125e-05, + "step": 7698, + "training_step_time": 0.12953686714172363 + }, + { + "epoch": 1.174774169921875e-05, + "model_forward_time": 0.023647546768188477, + "step": 7699 + }, + { + "epoch": 1.174774169921875e-05, + "step": 7699, + "training_step_time": 0.19784903526306152 + }, + { + "epoch": 1.1749267578125e-05, + "grad_norm": 0.541006326675415, + "learning_rate": 8.877043711224108e-05, + "loss": 0.0444, + "step": 7700 + }, + { + "epoch": 1.1749267578125e-05, + "model_forward_time": 0.024860858917236328, + "step": 7700 + }, + { + "epoch": 1.1749267578125e-05, + "step": 7700, + "training_step_time": 0.1337127685546875 + }, + { + "epoch": 1.175079345703125e-05, + "model_forward_time": 0.024201631546020508, + "step": 7701 + }, + { + "epoch": 1.175079345703125e-05, + "step": 7701, + "training_step_time": 0.11581540107727051 + }, + { + "epoch": 1.17523193359375e-05, + "model_forward_time": 0.026084423065185547, + "step": 7702 + }, + { + "epoch": 1.17523193359375e-05, + "step": 7702, + "training_step_time": 0.11082935333251953 + }, + { + "epoch": 1.175384521484375e-05, + "model_forward_time": 0.025341510772705078, + "step": 7703 + }, + { + "epoch": 1.175384521484375e-05, + "step": 7703, + "training_step_time": 0.11129283905029297 + }, + { + "epoch": 1.175537109375e-05, + "model_forward_time": 0.02535414695739746, + "step": 7704 + }, + { + "epoch": 1.175537109375e-05, + "step": 7704, + "training_step_time": 0.18598604202270508 + }, + { + "epoch": 1.175689697265625e-05, + "model_forward_time": 0.02468252182006836, + "step": 7705 + }, + { + "epoch": 1.175689697265625e-05, + "step": 7705, + "training_step_time": 0.12248086929321289 + }, + { + "epoch": 1.17584228515625e-05, + "model_forward_time": 0.024626970291137695, + "step": 7706 + }, + { + "epoch": 1.17584228515625e-05, + "step": 7706, + "training_step_time": 0.10551333427429199 + }, + { + "epoch": 1.175994873046875e-05, + "model_forward_time": 0.025740861892700195, + "step": 7707 + }, + { + "epoch": 1.175994873046875e-05, + "step": 7707, + "training_step_time": 0.10759902000427246 + }, + { + "epoch": 1.1761474609375e-05, + "model_forward_time": 0.025030136108398438, + "step": 7708 + }, + { + "epoch": 1.1761474609375e-05, + "step": 7708, + "training_step_time": 0.1120293140411377 + }, + { + "epoch": 1.176300048828125e-05, + "model_forward_time": 0.024737834930419922, + "step": 7709 + }, + { + "epoch": 1.176300048828125e-05, + "step": 7709, + "training_step_time": 0.10749530792236328 + }, + { + "epoch": 1.17645263671875e-05, + "grad_norm": 0.8377155065536499, + "learning_rate": 8.873561024898668e-05, + "loss": 0.0627, + "step": 7710 + }, + { + "epoch": 1.17645263671875e-05, + "model_forward_time": 0.025483369827270508, + "step": 7710 + }, + { + "epoch": 1.17645263671875e-05, + "step": 7710, + "training_step_time": 0.10772919654846191 + }, + { + "epoch": 1.176605224609375e-05, + "model_forward_time": 0.02534198760986328, + "step": 7711 + }, + { + "epoch": 1.176605224609375e-05, + "step": 7711, + "training_step_time": 0.11078071594238281 + }, + { + "epoch": 1.1767578125e-05, + "model_forward_time": 0.025656700134277344, + "step": 7712 + }, + { + "epoch": 1.1767578125e-05, + "step": 7712, + "training_step_time": 0.11186408996582031 + }, + { + "epoch": 1.176910400390625e-05, + "model_forward_time": 0.025247812271118164, + "step": 7713 + }, + { + "epoch": 1.176910400390625e-05, + "step": 7713, + "training_step_time": 0.10705709457397461 + }, + { + "epoch": 1.17706298828125e-05, + "model_forward_time": 0.02513599395751953, + "step": 7714 + }, + { + "epoch": 1.17706298828125e-05, + "step": 7714, + "training_step_time": 0.10616803169250488 + }, + { + "epoch": 1.177215576171875e-05, + "model_forward_time": 0.02587413787841797, + "step": 7715 + }, + { + "epoch": 1.177215576171875e-05, + "step": 7715, + "training_step_time": 0.10958504676818848 + }, + { + "epoch": 1.1773681640625e-05, + "model_forward_time": 0.02522873878479004, + "step": 7716 + }, + { + "epoch": 1.1773681640625e-05, + "step": 7716, + "training_step_time": 0.10745811462402344 + }, + { + "epoch": 1.177520751953125e-05, + "model_forward_time": 0.024819612503051758, + "step": 7717 + }, + { + "epoch": 1.177520751953125e-05, + "step": 7717, + "training_step_time": 0.10750555992126465 + }, + { + "epoch": 1.17767333984375e-05, + "model_forward_time": 0.025099515914916992, + "step": 7718 + }, + { + "epoch": 1.17767333984375e-05, + "step": 7718, + "training_step_time": 0.11246013641357422 + }, + { + "epoch": 1.177825927734375e-05, + "model_forward_time": 0.02562546730041504, + "step": 7719 + }, + { + "epoch": 1.177825927734375e-05, + "step": 7719, + "training_step_time": 0.1166541576385498 + }, + { + "epoch": 1.177978515625e-05, + "grad_norm": 0.4356689155101776, + "learning_rate": 8.87007363183133e-05, + "loss": 0.05, + "step": 7720 + }, + { + "epoch": 1.177978515625e-05, + "model_forward_time": 0.025404691696166992, + "step": 7720 + }, + { + "epoch": 1.177978515625e-05, + "step": 7720, + "training_step_time": 0.11284756660461426 + }, + { + "epoch": 1.178131103515625e-05, + "model_forward_time": 0.025376081466674805, + "step": 7721 + }, + { + "epoch": 1.178131103515625e-05, + "step": 7721, + "training_step_time": 0.211378812789917 + }, + { + "epoch": 1.17828369140625e-05, + "model_forward_time": 0.02438521385192871, + "step": 7722 + }, + { + "epoch": 1.17828369140625e-05, + "step": 7722, + "training_step_time": 0.11699438095092773 + }, + { + "epoch": 1.178436279296875e-05, + "model_forward_time": 0.02436351776123047, + "step": 7723 + }, + { + "epoch": 1.178436279296875e-05, + "step": 7723, + "training_step_time": 0.1054234504699707 + }, + { + "epoch": 1.1785888671875e-05, + "model_forward_time": 0.025081157684326172, + "step": 7724 + }, + { + "epoch": 1.1785888671875e-05, + "step": 7724, + "training_step_time": 0.10701322555541992 + }, + { + "epoch": 1.178741455078125e-05, + "model_forward_time": 0.025431156158447266, + "step": 7725 + }, + { + "epoch": 1.178741455078125e-05, + "step": 7725, + "training_step_time": 0.10744166374206543 + }, + { + "epoch": 1.17889404296875e-05, + "model_forward_time": 0.02550959587097168, + "step": 7726 + }, + { + "epoch": 1.17889404296875e-05, + "step": 7726, + "training_step_time": 0.16896700859069824 + }, + { + "epoch": 1.179046630859375e-05, + "model_forward_time": 0.024020671844482422, + "step": 7727 + }, + { + "epoch": 1.179046630859375e-05, + "step": 7727, + "training_step_time": 0.11316370964050293 + }, + { + "epoch": 1.17919921875e-05, + "model_forward_time": 0.02482128143310547, + "step": 7728 + }, + { + "epoch": 1.17919921875e-05, + "step": 7728, + "training_step_time": 0.10796618461608887 + }, + { + "epoch": 1.179351806640625e-05, + "model_forward_time": 0.02555537223815918, + "step": 7729 + }, + { + "epoch": 1.179351806640625e-05, + "step": 7729, + "training_step_time": 0.11862707138061523 + }, + { + "epoch": 1.17950439453125e-05, + "grad_norm": 0.318733274936676, + "learning_rate": 8.866581536259605e-05, + "loss": 0.0583, + "step": 7730 + }, + { + "epoch": 1.17950439453125e-05, + "model_forward_time": 0.02557849884033203, + "step": 7730 + }, + { + "epoch": 1.17950439453125e-05, + "step": 7730, + "training_step_time": 0.1253345012664795 + }, + { + "epoch": 1.179656982421875e-05, + "model_forward_time": 0.02507305145263672, + "step": 7731 + }, + { + "epoch": 1.179656982421875e-05, + "step": 7731, + "training_step_time": 0.1106255054473877 + }, + { + "epoch": 1.1798095703125e-05, + "model_forward_time": 0.02522444725036621, + "step": 7732 + }, + { + "epoch": 1.1798095703125e-05, + "step": 7732, + "training_step_time": 0.11439371109008789 + }, + { + "epoch": 1.179962158203125e-05, + "model_forward_time": 0.025400876998901367, + "step": 7733 + }, + { + "epoch": 1.179962158203125e-05, + "step": 7733, + "training_step_time": 0.11259913444519043 + }, + { + "epoch": 1.18011474609375e-05, + "model_forward_time": 0.024643421173095703, + "step": 7734 + }, + { + "epoch": 1.18011474609375e-05, + "step": 7734, + "training_step_time": 0.11178755760192871 + }, + { + "epoch": 1.180267333984375e-05, + "model_forward_time": 0.02538776397705078, + "step": 7735 + }, + { + "epoch": 1.180267333984375e-05, + "step": 7735, + "training_step_time": 0.10647177696228027 + }, + { + "epoch": 1.180419921875e-05, + "model_forward_time": 0.025136947631835938, + "step": 7736 + }, + { + "epoch": 1.180419921875e-05, + "step": 7736, + "training_step_time": 0.13620376586914062 + }, + { + "epoch": 1.180572509765625e-05, + "model_forward_time": 0.025172710418701172, + "step": 7737 + }, + { + "epoch": 1.180572509765625e-05, + "step": 7737, + "training_step_time": 0.10985875129699707 + }, + { + "epoch": 1.18072509765625e-05, + "model_forward_time": 0.024355649948120117, + "step": 7738 + }, + { + "epoch": 1.18072509765625e-05, + "step": 7738, + "training_step_time": 0.13913607597351074 + }, + { + "epoch": 1.180877685546875e-05, + "model_forward_time": 0.024984121322631836, + "step": 7739 + }, + { + "epoch": 1.180877685546875e-05, + "step": 7739, + "training_step_time": 0.1537153720855713 + }, + { + "epoch": 1.1810302734375e-05, + "grad_norm": 0.5249224901199341, + "learning_rate": 8.863084742426719e-05, + "loss": 0.0526, + "step": 7740 + }, + { + "epoch": 1.1810302734375e-05, + "model_forward_time": 0.024425029754638672, + "step": 7740 + }, + { + "epoch": 1.1810302734375e-05, + "step": 7740, + "training_step_time": 0.21947836875915527 + }, + { + "epoch": 1.181182861328125e-05, + "model_forward_time": 0.025855302810668945, + "step": 7741 + }, + { + "epoch": 1.181182861328125e-05, + "step": 7741, + "training_step_time": 0.12151646614074707 + }, + { + "epoch": 1.18133544921875e-05, + "model_forward_time": 0.023589611053466797, + "step": 7742 + }, + { + "epoch": 1.18133544921875e-05, + "step": 7742, + "training_step_time": 0.1079866886138916 + }, + { + "epoch": 1.181488037109375e-05, + "model_forward_time": 0.02535223960876465, + "step": 7743 + }, + { + "epoch": 1.181488037109375e-05, + "step": 7743, + "training_step_time": 0.10709810256958008 + }, + { + "epoch": 1.181640625e-05, + "model_forward_time": 0.027086257934570312, + "step": 7744 + }, + { + "epoch": 1.181640625e-05, + "step": 7744, + "training_step_time": 0.11165380477905273 + }, + { + "epoch": 1.181793212890625e-05, + "model_forward_time": 0.02668166160583496, + "step": 7745 + }, + { + "epoch": 1.181793212890625e-05, + "step": 7745, + "training_step_time": 0.12515664100646973 + }, + { + "epoch": 1.18194580078125e-05, + "model_forward_time": 0.024979114532470703, + "step": 7746 + }, + { + "epoch": 1.18194580078125e-05, + "step": 7746, + "training_step_time": 0.12214446067810059 + }, + { + "epoch": 1.182098388671875e-05, + "model_forward_time": 0.025182485580444336, + "step": 7747 + }, + { + "epoch": 1.182098388671875e-05, + "step": 7747, + "training_step_time": 0.12251853942871094 + }, + { + "epoch": 1.1822509765625e-05, + "model_forward_time": 0.02502608299255371, + "step": 7748 + }, + { + "epoch": 1.1822509765625e-05, + "step": 7748, + "training_step_time": 0.20865368843078613 + }, + { + "epoch": 1.182403564453125e-05, + "model_forward_time": 0.024486541748046875, + "step": 7749 + }, + { + "epoch": 1.182403564453125e-05, + "step": 7749, + "training_step_time": 0.11443901062011719 + }, + { + "epoch": 1.18255615234375e-05, + "grad_norm": 0.693023681640625, + "learning_rate": 8.859583254581605e-05, + "loss": 0.0467, + "step": 7750 + }, + { + "epoch": 1.18255615234375e-05, + "model_forward_time": 0.024216175079345703, + "step": 7750 + }, + { + "epoch": 1.18255615234375e-05, + "step": 7750, + "training_step_time": 0.1847379207611084 + }, + { + "epoch": 1.182708740234375e-05, + "model_forward_time": 0.024264812469482422, + "step": 7751 + }, + { + "epoch": 1.182708740234375e-05, + "step": 7751, + "training_step_time": 0.10838794708251953 + }, + { + "epoch": 1.182861328125e-05, + "model_forward_time": 0.024150609970092773, + "step": 7752 + }, + { + "epoch": 1.182861328125e-05, + "step": 7752, + "training_step_time": 0.10871672630310059 + }, + { + "epoch": 1.183013916015625e-05, + "model_forward_time": 0.025004148483276367, + "step": 7753 + }, + { + "epoch": 1.183013916015625e-05, + "step": 7753, + "training_step_time": 0.10836505889892578 + }, + { + "epoch": 1.18316650390625e-05, + "model_forward_time": 0.02517223358154297, + "step": 7754 + }, + { + "epoch": 1.18316650390625e-05, + "step": 7754, + "training_step_time": 0.10822820663452148 + }, + { + "epoch": 1.183319091796875e-05, + "model_forward_time": 0.024975061416625977, + "step": 7755 + }, + { + "epoch": 1.183319091796875e-05, + "step": 7755, + "training_step_time": 0.10958290100097656 + }, + { + "epoch": 1.1834716796875e-05, + "model_forward_time": 0.025902271270751953, + "step": 7756 + }, + { + "epoch": 1.1834716796875e-05, + "step": 7756, + "training_step_time": 0.11475801467895508 + }, + { + "epoch": 1.183624267578125e-05, + "model_forward_time": 0.02540278434753418, + "step": 7757 + }, + { + "epoch": 1.183624267578125e-05, + "step": 7757, + "training_step_time": 0.11082601547241211 + }, + { + "epoch": 1.18377685546875e-05, + "model_forward_time": 0.02541375160217285, + "step": 7758 + }, + { + "epoch": 1.18377685546875e-05, + "step": 7758, + "training_step_time": 0.10836911201477051 + }, + { + "epoch": 1.183929443359375e-05, + "model_forward_time": 0.025089740753173828, + "step": 7759 + }, + { + "epoch": 1.183929443359375e-05, + "step": 7759, + "training_step_time": 0.1086115837097168 + }, + { + "epoch": 1.18408203125e-05, + "grad_norm": 0.4121415317058563, + "learning_rate": 8.856077076978902e-05, + "loss": 0.0576, + "step": 7760 + }, + { + "epoch": 1.18408203125e-05, + "model_forward_time": 0.024975299835205078, + "step": 7760 + }, + { + "epoch": 1.18408203125e-05, + "step": 7760, + "training_step_time": 0.10617828369140625 + }, + { + "epoch": 1.184234619140625e-05, + "model_forward_time": 0.025573253631591797, + "step": 7761 + }, + { + "epoch": 1.184234619140625e-05, + "step": 7761, + "training_step_time": 0.10631155967712402 + }, + { + "epoch": 1.18438720703125e-05, + "model_forward_time": 0.025957584381103516, + "step": 7762 + }, + { + "epoch": 1.18438720703125e-05, + "step": 7762, + "training_step_time": 0.10744476318359375 + }, + { + "epoch": 1.184539794921875e-05, + "model_forward_time": 0.025270938873291016, + "step": 7763 + }, + { + "epoch": 1.184539794921875e-05, + "step": 7763, + "training_step_time": 0.10683321952819824 + }, + { + "epoch": 1.1846923828125e-05, + "model_forward_time": 0.025553226470947266, + "step": 7764 + }, + { + "epoch": 1.1846923828125e-05, + "step": 7764, + "training_step_time": 0.12814974784851074 + }, + { + "epoch": 1.184844970703125e-05, + "model_forward_time": 0.025400638580322266, + "step": 7765 + }, + { + "epoch": 1.184844970703125e-05, + "step": 7765, + "training_step_time": 0.11033129692077637 + }, + { + "epoch": 1.18499755859375e-05, + "model_forward_time": 0.02535390853881836, + "step": 7766 + }, + { + "epoch": 1.18499755859375e-05, + "step": 7766, + "training_step_time": 0.11047124862670898 + }, + { + "epoch": 1.185150146484375e-05, + "model_forward_time": 0.025304079055786133, + "step": 7767 + }, + { + "epoch": 1.185150146484375e-05, + "step": 7767, + "training_step_time": 0.215501070022583 + }, + { + "epoch": 1.185302734375e-05, + "model_forward_time": 0.02460789680480957, + "step": 7768 + }, + { + "epoch": 1.185302734375e-05, + "step": 7768, + "training_step_time": 0.11265802383422852 + }, + { + "epoch": 1.185455322265625e-05, + "model_forward_time": 0.024341821670532227, + "step": 7769 + }, + { + "epoch": 1.185455322265625e-05, + "step": 7769, + "training_step_time": 0.10685324668884277 + }, + { + "epoch": 1.18560791015625e-05, + "grad_norm": 0.3426803648471832, + "learning_rate": 8.852566213878947e-05, + "loss": 0.0496, + "step": 7770 + }, + { + "epoch": 1.18560791015625e-05, + "model_forward_time": 0.024798154830932617, + "step": 7770 + }, + { + "epoch": 1.18560791015625e-05, + "step": 7770, + "training_step_time": 0.16218066215515137 + }, + { + "epoch": 1.185760498046875e-05, + "model_forward_time": 0.024543285369873047, + "step": 7771 + }, + { + "epoch": 1.185760498046875e-05, + "step": 7771, + "training_step_time": 0.19092059135437012 + }, + { + "epoch": 1.1859130859375e-05, + "model_forward_time": 0.024196624755859375, + "step": 7772 + }, + { + "epoch": 1.1859130859375e-05, + "step": 7772, + "training_step_time": 0.17877650260925293 + }, + { + "epoch": 1.186065673828125e-05, + "model_forward_time": 0.024291515350341797, + "step": 7773 + }, + { + "epoch": 1.186065673828125e-05, + "step": 7773, + "training_step_time": 0.213623046875 + }, + { + "epoch": 1.18621826171875e-05, + "model_forward_time": 0.024327516555786133, + "step": 7774 + }, + { + "epoch": 1.18621826171875e-05, + "step": 7774, + "training_step_time": 0.12874150276184082 + }, + { + "epoch": 1.186370849609375e-05, + "model_forward_time": 0.024349212646484375, + "step": 7775 + }, + { + "epoch": 1.186370849609375e-05, + "step": 7775, + "training_step_time": 0.10973215103149414 + }, + { + "epoch": 1.1865234375e-05, + "model_forward_time": 0.025148391723632812, + "step": 7776 + }, + { + "epoch": 1.1865234375e-05, + "step": 7776, + "training_step_time": 0.10922837257385254 + }, + { + "epoch": 1.186676025390625e-05, + "model_forward_time": 0.026033401489257812, + "step": 7777 + }, + { + "epoch": 1.186676025390625e-05, + "step": 7777, + "training_step_time": 0.11815357208251953 + }, + { + "epoch": 1.18682861328125e-05, + "model_forward_time": 0.025121688842773438, + "step": 7778 + }, + { + "epoch": 1.18682861328125e-05, + "step": 7778, + "training_step_time": 0.10730695724487305 + }, + { + "epoch": 1.186981201171875e-05, + "model_forward_time": 0.025010347366333008, + "step": 7779 + }, + { + "epoch": 1.186981201171875e-05, + "step": 7779, + "training_step_time": 0.11321854591369629 + }, + { + "epoch": 1.1871337890625e-05, + "grad_norm": 0.4797683656215668, + "learning_rate": 8.849050669547768e-05, + "loss": 0.0631, + "step": 7780 + }, + { + "epoch": 1.1871337890625e-05, + "model_forward_time": 0.02532815933227539, + "step": 7780 + }, + { + "epoch": 1.1871337890625e-05, + "step": 7780, + "training_step_time": 0.1437220573425293 + }, + { + "epoch": 1.187286376953125e-05, + "model_forward_time": 0.025399446487426758, + "step": 7781 + }, + { + "epoch": 1.187286376953125e-05, + "step": 7781, + "training_step_time": 0.11267495155334473 + }, + { + "epoch": 1.18743896484375e-05, + "model_forward_time": 0.024983644485473633, + "step": 7782 + }, + { + "epoch": 1.18743896484375e-05, + "step": 7782, + "training_step_time": 0.19978928565979004 + }, + { + "epoch": 1.187591552734375e-05, + "model_forward_time": 0.024271011352539062, + "step": 7783 + }, + { + "epoch": 1.187591552734375e-05, + "step": 7783, + "training_step_time": 0.20637893676757812 + }, + { + "epoch": 1.187744140625e-05, + "model_forward_time": 0.024370670318603516, + "step": 7784 + }, + { + "epoch": 1.187744140625e-05, + "step": 7784, + "training_step_time": 0.20615744590759277 + }, + { + "epoch": 1.187896728515625e-05, + "model_forward_time": 0.023543357849121094, + "step": 7785 + }, + { + "epoch": 1.187896728515625e-05, + "step": 7785, + "training_step_time": 0.18914556503295898 + }, + { + "epoch": 1.18804931640625e-05, + "model_forward_time": 0.024684429168701172, + "step": 7786 + }, + { + "epoch": 1.18804931640625e-05, + "step": 7786, + "training_step_time": 0.10544657707214355 + }, + { + "epoch": 1.188201904296875e-05, + "model_forward_time": 0.024766206741333008, + "step": 7787 + }, + { + "epoch": 1.188201904296875e-05, + "step": 7787, + "training_step_time": 0.14011359214782715 + }, + { + "epoch": 1.1883544921875e-05, + "model_forward_time": 0.025393247604370117, + "step": 7788 + }, + { + "epoch": 1.1883544921875e-05, + "step": 7788, + "training_step_time": 0.1330416202545166 + }, + { + "epoch": 1.188507080078125e-05, + "model_forward_time": 0.024828672409057617, + "step": 7789 + }, + { + "epoch": 1.188507080078125e-05, + "step": 7789, + "training_step_time": 0.11042213439941406 + }, + { + "epoch": 1.18865966796875e-05, + "grad_norm": 0.6274198889732361, + "learning_rate": 8.845530448257085e-05, + "loss": 0.0545, + "step": 7790 + }, + { + "epoch": 1.18865966796875e-05, + "model_forward_time": 0.0285036563873291, + "step": 7790 + }, + { + "epoch": 1.18865966796875e-05, + "step": 7790, + "training_step_time": 0.11602091789245605 + }, + { + "epoch": 1.188812255859375e-05, + "model_forward_time": 0.02609086036682129, + "step": 7791 + }, + { + "epoch": 1.188812255859375e-05, + "step": 7791, + "training_step_time": 0.10823225975036621 + }, + { + "epoch": 1.18896484375e-05, + "model_forward_time": 0.026993274688720703, + "step": 7792 + }, + { + "epoch": 1.18896484375e-05, + "step": 7792, + "training_step_time": 0.17877626419067383 + }, + { + "epoch": 1.189117431640625e-05, + "model_forward_time": 0.025026321411132812, + "step": 7793 + }, + { + "epoch": 1.189117431640625e-05, + "step": 7793, + "training_step_time": 0.20397710800170898 + }, + { + "epoch": 1.18927001953125e-05, + "model_forward_time": 0.024626493453979492, + "step": 7794 + }, + { + "epoch": 1.18927001953125e-05, + "step": 7794, + "training_step_time": 0.19957852363586426 + }, + { + "epoch": 1.189422607421875e-05, + "model_forward_time": 0.02411341667175293, + "step": 7795 + }, + { + "epoch": 1.189422607421875e-05, + "step": 7795, + "training_step_time": 0.19173884391784668 + }, + { + "epoch": 1.1895751953125e-05, + "model_forward_time": 0.025067567825317383, + "step": 7796 + }, + { + "epoch": 1.1895751953125e-05, + "step": 7796, + "training_step_time": 0.17390775680541992 + }, + { + "epoch": 1.189727783203125e-05, + "model_forward_time": 0.02463245391845703, + "step": 7797 + }, + { + "epoch": 1.189727783203125e-05, + "step": 7797, + "training_step_time": 0.17256402969360352 + }, + { + "epoch": 1.18988037109375e-05, + "model_forward_time": 0.0251924991607666, + "step": 7798 + }, + { + "epoch": 1.18988037109375e-05, + "step": 7798, + "training_step_time": 0.11508059501647949 + }, + { + "epoch": 1.190032958984375e-05, + "model_forward_time": 0.025096654891967773, + "step": 7799 + }, + { + "epoch": 1.190032958984375e-05, + "step": 7799, + "training_step_time": 0.10608148574829102 + }, + { + "epoch": 1.190185546875e-05, + "grad_norm": 0.5842939615249634, + "learning_rate": 8.842005554284296e-05, + "loss": 0.0455, + "step": 7800 + }, + { + "epoch": 1.190185546875e-05, + "model_forward_time": 0.025037288665771484, + "step": 7800 + }, + { + "epoch": 1.190185546875e-05, + "step": 7800, + "training_step_time": 0.10821127891540527 + }, + { + "epoch": 1.190338134765625e-05, + "model_forward_time": 0.025350093841552734, + "step": 7801 + }, + { + "epoch": 1.190338134765625e-05, + "step": 7801, + "training_step_time": 0.10711550712585449 + }, + { + "epoch": 1.19049072265625e-05, + "model_forward_time": 0.02607274055480957, + "step": 7802 + }, + { + "epoch": 1.19049072265625e-05, + "step": 7802, + "training_step_time": 0.10849118232727051 + }, + { + "epoch": 1.190643310546875e-05, + "model_forward_time": 0.025176286697387695, + "step": 7803 + }, + { + "epoch": 1.190643310546875e-05, + "step": 7803, + "training_step_time": 0.10977053642272949 + }, + { + "epoch": 1.1907958984375e-05, + "model_forward_time": 0.0251772403717041, + "step": 7804 + }, + { + "epoch": 1.1907958984375e-05, + "step": 7804, + "training_step_time": 0.10600566864013672 + }, + { + "epoch": 1.190948486328125e-05, + "model_forward_time": 0.025789260864257812, + "step": 7805 + }, + { + "epoch": 1.190948486328125e-05, + "step": 7805, + "training_step_time": 0.1127021312713623 + }, + { + "epoch": 1.19110107421875e-05, + "model_forward_time": 0.02523040771484375, + "step": 7806 + }, + { + "epoch": 1.19110107421875e-05, + "step": 7806, + "training_step_time": 0.10910892486572266 + }, + { + "epoch": 1.191253662109375e-05, + "model_forward_time": 0.025269031524658203, + "step": 7807 + }, + { + "epoch": 1.191253662109375e-05, + "step": 7807, + "training_step_time": 0.1363232135772705 + }, + { + "epoch": 1.19140625e-05, + "model_forward_time": 0.02669501304626465, + "step": 7808 + }, + { + "epoch": 1.19140625e-05, + "step": 7808, + "training_step_time": 0.11250948905944824 + }, + { + "epoch": 1.191558837890625e-05, + "model_forward_time": 0.02630138397216797, + "step": 7809 + }, + { + "epoch": 1.191558837890625e-05, + "step": 7809, + "training_step_time": 0.11458873748779297 + }, + { + "epoch": 1.19171142578125e-05, + "grad_norm": 0.3187682330608368, + "learning_rate": 8.838475991912482e-05, + "loss": 0.051, + "step": 7810 + }, + { + "epoch": 1.19171142578125e-05, + "model_forward_time": 0.02519369125366211, + "step": 7810 + }, + { + "epoch": 1.19171142578125e-05, + "step": 7810, + "training_step_time": 0.10689353942871094 + }, + { + "epoch": 1.191864013671875e-05, + "model_forward_time": 0.026935338973999023, + "step": 7811 + }, + { + "epoch": 1.191864013671875e-05, + "step": 7811, + "training_step_time": 0.11625051498413086 + }, + { + "epoch": 1.1920166015625e-05, + "model_forward_time": 0.0253448486328125, + "step": 7812 + }, + { + "epoch": 1.1920166015625e-05, + "step": 7812, + "training_step_time": 0.1224210262298584 + }, + { + "epoch": 1.192169189453125e-05, + "model_forward_time": 0.0252840518951416, + "step": 7813 + }, + { + "epoch": 1.192169189453125e-05, + "step": 7813, + "training_step_time": 0.1571178436279297 + }, + { + "epoch": 1.19232177734375e-05, + "model_forward_time": 0.025163650512695312, + "step": 7814 + }, + { + "epoch": 1.19232177734375e-05, + "step": 7814, + "training_step_time": 0.11394858360290527 + }, + { + "epoch": 1.192474365234375e-05, + "model_forward_time": 0.024710416793823242, + "step": 7815 + }, + { + "epoch": 1.192474365234375e-05, + "step": 7815, + "training_step_time": 0.1816692352294922 + }, + { + "epoch": 1.192626953125e-05, + "model_forward_time": 0.024820327758789062, + "step": 7816 + }, + { + "epoch": 1.192626953125e-05, + "step": 7816, + "training_step_time": 0.173844575881958 + }, + { + "epoch": 1.192779541015625e-05, + "model_forward_time": 0.02471446990966797, + "step": 7817 + }, + { + "epoch": 1.192779541015625e-05, + "step": 7817, + "training_step_time": 0.11200594902038574 + }, + { + "epoch": 1.19293212890625e-05, + "model_forward_time": 0.024959564208984375, + "step": 7818 + }, + { + "epoch": 1.19293212890625e-05, + "step": 7818, + "training_step_time": 0.12364673614501953 + }, + { + "epoch": 1.193084716796875e-05, + "model_forward_time": 0.025573253631591797, + "step": 7819 + }, + { + "epoch": 1.193084716796875e-05, + "step": 7819, + "training_step_time": 0.10993337631225586 + }, + { + "epoch": 1.1932373046875e-05, + "grad_norm": 0.41468310356140137, + "learning_rate": 8.834941765430391e-05, + "loss": 0.0337, + "step": 7820 + }, + { + "epoch": 1.1932373046875e-05, + "model_forward_time": 0.025514841079711914, + "step": 7820 + }, + { + "epoch": 1.1932373046875e-05, + "step": 7820, + "training_step_time": 0.2273268699645996 + }, + { + "epoch": 1.193389892578125e-05, + "model_forward_time": 0.02413487434387207, + "step": 7821 + }, + { + "epoch": 1.193389892578125e-05, + "step": 7821, + "training_step_time": 0.12456512451171875 + }, + { + "epoch": 1.19354248046875e-05, + "model_forward_time": 0.02417469024658203, + "step": 7822 + }, + { + "epoch": 1.19354248046875e-05, + "step": 7822, + "training_step_time": 0.11142587661743164 + }, + { + "epoch": 1.193695068359375e-05, + "model_forward_time": 0.0249483585357666, + "step": 7823 + }, + { + "epoch": 1.193695068359375e-05, + "step": 7823, + "training_step_time": 0.12179327011108398 + }, + { + "epoch": 1.19384765625e-05, + "model_forward_time": 0.024895906448364258, + "step": 7824 + }, + { + "epoch": 1.19384765625e-05, + "step": 7824, + "training_step_time": 0.17485642433166504 + }, + { + "epoch": 1.194000244140625e-05, + "model_forward_time": 0.024909257888793945, + "step": 7825 + }, + { + "epoch": 1.194000244140625e-05, + "step": 7825, + "training_step_time": 0.12551093101501465 + }, + { + "epoch": 1.19415283203125e-05, + "model_forward_time": 0.0243227481842041, + "step": 7826 + }, + { + "epoch": 1.19415283203125e-05, + "step": 7826, + "training_step_time": 0.11665582656860352 + }, + { + "epoch": 1.194305419921875e-05, + "model_forward_time": 0.025130033493041992, + "step": 7827 + }, + { + "epoch": 1.194305419921875e-05, + "step": 7827, + "training_step_time": 0.10761833190917969 + }, + { + "epoch": 1.1944580078125e-05, + "model_forward_time": 0.025203943252563477, + "step": 7828 + }, + { + "epoch": 1.1944580078125e-05, + "step": 7828, + "training_step_time": 0.10996031761169434 + }, + { + "epoch": 1.194610595703125e-05, + "model_forward_time": 0.0250852108001709, + "step": 7829 + }, + { + "epoch": 1.194610595703125e-05, + "step": 7829, + "training_step_time": 0.19105124473571777 + }, + { + "epoch": 1.19476318359375e-05, + "grad_norm": 0.46187856793403625, + "learning_rate": 8.831402879132446e-05, + "loss": 0.0479, + "step": 7830 + }, + { + "epoch": 1.19476318359375e-05, + "model_forward_time": 0.02467966079711914, + "step": 7830 + }, + { + "epoch": 1.19476318359375e-05, + "step": 7830, + "training_step_time": 0.11877274513244629 + }, + { + "epoch": 1.194915771484375e-05, + "model_forward_time": 0.024508953094482422, + "step": 7831 + }, + { + "epoch": 1.194915771484375e-05, + "step": 7831, + "training_step_time": 0.11325526237487793 + }, + { + "epoch": 1.195068359375e-05, + "model_forward_time": 0.0252685546875, + "step": 7832 + }, + { + "epoch": 1.195068359375e-05, + "step": 7832, + "training_step_time": 0.11606311798095703 + }, + { + "epoch": 1.195220947265625e-05, + "model_forward_time": 0.025557994842529297, + "step": 7833 + }, + { + "epoch": 1.195220947265625e-05, + "step": 7833, + "training_step_time": 0.11269259452819824 + }, + { + "epoch": 1.19537353515625e-05, + "model_forward_time": 0.02548503875732422, + "step": 7834 + }, + { + "epoch": 1.19537353515625e-05, + "step": 7834, + "training_step_time": 0.10583639144897461 + }, + { + "epoch": 1.195526123046875e-05, + "model_forward_time": 0.02512812614440918, + "step": 7835 + }, + { + "epoch": 1.195526123046875e-05, + "step": 7835, + "training_step_time": 0.19674158096313477 + }, + { + "epoch": 1.1956787109375e-05, + "model_forward_time": 0.024318218231201172, + "step": 7836 + }, + { + "epoch": 1.1956787109375e-05, + "step": 7836, + "training_step_time": 0.1079404354095459 + }, + { + "epoch": 1.195831298828125e-05, + "model_forward_time": 0.02438068389892578, + "step": 7837 + }, + { + "epoch": 1.195831298828125e-05, + "step": 7837, + "training_step_time": 0.1055917739868164 + }, + { + "epoch": 1.19598388671875e-05, + "model_forward_time": 0.025700807571411133, + "step": 7838 + }, + { + "epoch": 1.19598388671875e-05, + "step": 7838, + "training_step_time": 0.10809850692749023 + }, + { + "epoch": 1.196136474609375e-05, + "model_forward_time": 0.026479244232177734, + "step": 7839 + }, + { + "epoch": 1.196136474609375e-05, + "step": 7839, + "training_step_time": 0.11246728897094727 + }, + { + "epoch": 1.1962890625e-05, + "grad_norm": 0.45973441004753113, + "learning_rate": 8.827859337318725e-05, + "loss": 0.0397, + "step": 7840 + }, + { + "epoch": 1.1962890625e-05, + "model_forward_time": 0.02504277229309082, + "step": 7840 + }, + { + "epoch": 1.1962890625e-05, + "step": 7840, + "training_step_time": 0.11144638061523438 + }, + { + "epoch": 1.196441650390625e-05, + "model_forward_time": 0.025012493133544922, + "step": 7841 + }, + { + "epoch": 1.196441650390625e-05, + "step": 7841, + "training_step_time": 0.10768508911132812 + }, + { + "epoch": 1.19659423828125e-05, + "model_forward_time": 0.025186538696289062, + "step": 7842 + }, + { + "epoch": 1.19659423828125e-05, + "step": 7842, + "training_step_time": 0.10861372947692871 + }, + { + "epoch": 1.196746826171875e-05, + "model_forward_time": 0.025285959243774414, + "step": 7843 + }, + { + "epoch": 1.196746826171875e-05, + "step": 7843, + "training_step_time": 0.10761666297912598 + }, + { + "epoch": 1.1968994140625e-05, + "model_forward_time": 0.024943113327026367, + "step": 7844 + }, + { + "epoch": 1.1968994140625e-05, + "step": 7844, + "training_step_time": 0.10673880577087402 + }, + { + "epoch": 1.197052001953125e-05, + "model_forward_time": 0.024587154388427734, + "step": 7845 + }, + { + "epoch": 1.197052001953125e-05, + "step": 7845, + "training_step_time": 0.1075749397277832 + }, + { + "epoch": 1.19720458984375e-05, + "model_forward_time": 0.024624109268188477, + "step": 7846 + }, + { + "epoch": 1.19720458984375e-05, + "step": 7846, + "training_step_time": 0.10741400718688965 + }, + { + "epoch": 1.197357177734375e-05, + "model_forward_time": 0.025069475173950195, + "step": 7847 + }, + { + "epoch": 1.197357177734375e-05, + "step": 7847, + "training_step_time": 0.11314702033996582 + }, + { + "epoch": 1.197509765625e-05, + "model_forward_time": 0.025876998901367188, + "step": 7848 + }, + { + "epoch": 1.197509765625e-05, + "step": 7848, + "training_step_time": 0.1083214282989502 + }, + { + "epoch": 1.197662353515625e-05, + "model_forward_time": 0.02499985694885254, + "step": 7849 + }, + { + "epoch": 1.197662353515625e-05, + "step": 7849, + "training_step_time": 0.10786318778991699 + }, + { + "epoch": 1.19781494140625e-05, + "grad_norm": 0.5109211206436157, + "learning_rate": 8.824311144294965e-05, + "loss": 0.0476, + "step": 7850 + }, + { + "epoch": 1.19781494140625e-05, + "model_forward_time": 0.025148391723632812, + "step": 7850 + }, + { + "epoch": 1.19781494140625e-05, + "step": 7850, + "training_step_time": 0.10919475555419922 + }, + { + "epoch": 1.197967529296875e-05, + "model_forward_time": 0.025360107421875, + "step": 7851 + }, + { + "epoch": 1.197967529296875e-05, + "step": 7851, + "training_step_time": 0.11489629745483398 + }, + { + "epoch": 1.1981201171875e-05, + "model_forward_time": 0.02649664878845215, + "step": 7852 + }, + { + "epoch": 1.1981201171875e-05, + "step": 7852, + "training_step_time": 0.11010479927062988 + }, + { + "epoch": 1.198272705078125e-05, + "model_forward_time": 0.02671504020690918, + "step": 7853 + }, + { + "epoch": 1.198272705078125e-05, + "step": 7853, + "training_step_time": 0.12367057800292969 + }, + { + "epoch": 1.19842529296875e-05, + "model_forward_time": 0.025665283203125, + "step": 7854 + }, + { + "epoch": 1.19842529296875e-05, + "step": 7854, + "training_step_time": 0.1161048412322998 + }, + { + "epoch": 1.198577880859375e-05, + "model_forward_time": 0.025107622146606445, + "step": 7855 + }, + { + "epoch": 1.198577880859375e-05, + "step": 7855, + "training_step_time": 0.12682318687438965 + }, + { + "epoch": 1.19873046875e-05, + "model_forward_time": 0.025748252868652344, + "step": 7856 + }, + { + "epoch": 1.19873046875e-05, + "step": 7856, + "training_step_time": 0.10806560516357422 + }, + { + "epoch": 1.198883056640625e-05, + "model_forward_time": 0.025358200073242188, + "step": 7857 + }, + { + "epoch": 1.198883056640625e-05, + "step": 7857, + "training_step_time": 0.10684776306152344 + }, + { + "epoch": 1.19903564453125e-05, + "model_forward_time": 0.0252227783203125, + "step": 7858 + }, + { + "epoch": 1.19903564453125e-05, + "step": 7858, + "training_step_time": 0.11107540130615234 + }, + { + "epoch": 1.199188232421875e-05, + "model_forward_time": 0.026148319244384766, + "step": 7859 + }, + { + "epoch": 1.199188232421875e-05, + "step": 7859, + "training_step_time": 0.1456146240234375 + }, + { + "epoch": 1.1993408203125e-05, + "grad_norm": 0.6275359392166138, + "learning_rate": 8.820758304372557e-05, + "loss": 0.0587, + "step": 7860 + }, + { + "epoch": 1.1993408203125e-05, + "model_forward_time": 0.030323028564453125, + "step": 7860 + }, + { + "epoch": 1.1993408203125e-05, + "step": 7860, + "training_step_time": 0.1139078140258789 + }, + { + "epoch": 1.199493408203125e-05, + "model_forward_time": 0.024778366088867188, + "step": 7861 + }, + { + "epoch": 1.199493408203125e-05, + "step": 7861, + "training_step_time": 0.17496585845947266 + }, + { + "epoch": 1.19964599609375e-05, + "model_forward_time": 0.02480459213256836, + "step": 7862 + }, + { + "epoch": 1.19964599609375e-05, + "step": 7862, + "training_step_time": 0.18030929565429688 + }, + { + "epoch": 1.199798583984375e-05, + "model_forward_time": 0.025185585021972656, + "step": 7863 + }, + { + "epoch": 1.199798583984375e-05, + "step": 7863, + "training_step_time": 0.11366701126098633 + }, + { + "epoch": 1.199951171875e-05, + "model_forward_time": 0.023967266082763672, + "step": 7864 + }, + { + "epoch": 1.199951171875e-05, + "step": 7864, + "training_step_time": 0.1141812801361084 + }, + { + "epoch": 1.200103759765625e-05, + "model_forward_time": 0.025478124618530273, + "step": 7865 + }, + { + "epoch": 1.200103759765625e-05, + "step": 7865, + "training_step_time": 0.11051058769226074 + }, + { + "epoch": 1.20025634765625e-05, + "model_forward_time": 0.02510213851928711, + "step": 7866 + }, + { + "epoch": 1.20025634765625e-05, + "step": 7866, + "training_step_time": 0.2274637222290039 + }, + { + "epoch": 1.200408935546875e-05, + "model_forward_time": 0.024494647979736328, + "step": 7867 + }, + { + "epoch": 1.200408935546875e-05, + "step": 7867, + "training_step_time": 0.1239631175994873 + }, + { + "epoch": 1.2005615234375e-05, + "model_forward_time": 0.025365114212036133, + "step": 7868 + }, + { + "epoch": 1.2005615234375e-05, + "step": 7868, + "training_step_time": 0.13744449615478516 + }, + { + "epoch": 1.200714111328125e-05, + "model_forward_time": 0.02488875389099121, + "step": 7869 + }, + { + "epoch": 1.200714111328125e-05, + "step": 7869, + "training_step_time": 0.11133718490600586 + }, + { + "epoch": 1.20086669921875e-05, + "grad_norm": 0.638238787651062, + "learning_rate": 8.817200821868533e-05, + "loss": 0.0617, + "step": 7870 + }, + { + "epoch": 1.20086669921875e-05, + "model_forward_time": 0.024882078170776367, + "step": 7870 + }, + { + "epoch": 1.20086669921875e-05, + "step": 7870, + "training_step_time": 0.15382027626037598 + }, + { + "epoch": 1.201019287109375e-05, + "model_forward_time": 0.026033401489257812, + "step": 7871 + }, + { + "epoch": 1.201019287109375e-05, + "step": 7871, + "training_step_time": 0.12656283378601074 + }, + { + "epoch": 1.201171875e-05, + "model_forward_time": 0.024245738983154297, + "step": 7872 + }, + { + "epoch": 1.201171875e-05, + "step": 7872, + "training_step_time": 0.11639904975891113 + }, + { + "epoch": 1.201324462890625e-05, + "model_forward_time": 0.025424718856811523, + "step": 7873 + }, + { + "epoch": 1.201324462890625e-05, + "step": 7873, + "training_step_time": 0.10869479179382324 + }, + { + "epoch": 1.20147705078125e-05, + "model_forward_time": 0.026077747344970703, + "step": 7874 + }, + { + "epoch": 1.20147705078125e-05, + "step": 7874, + "training_step_time": 0.10886263847351074 + }, + { + "epoch": 1.201629638671875e-05, + "model_forward_time": 0.02504420280456543, + "step": 7875 + }, + { + "epoch": 1.201629638671875e-05, + "step": 7875, + "training_step_time": 0.12500286102294922 + }, + { + "epoch": 1.2017822265625e-05, + "model_forward_time": 0.02523040771484375, + "step": 7876 + }, + { + "epoch": 1.2017822265625e-05, + "step": 7876, + "training_step_time": 0.11100172996520996 + }, + { + "epoch": 1.201934814453125e-05, + "model_forward_time": 0.029355287551879883, + "step": 7877 + }, + { + "epoch": 1.201934814453125e-05, + "step": 7877, + "training_step_time": 0.19710969924926758 + }, + { + "epoch": 1.20208740234375e-05, + "model_forward_time": 0.02463984489440918, + "step": 7878 + }, + { + "epoch": 1.20208740234375e-05, + "step": 7878, + "training_step_time": 0.17306756973266602 + }, + { + "epoch": 1.202239990234375e-05, + "model_forward_time": 0.02457261085510254, + "step": 7879 + }, + { + "epoch": 1.202239990234375e-05, + "step": 7879, + "training_step_time": 0.18588757514953613 + }, + { + "epoch": 1.202392578125e-05, + "grad_norm": 0.2985515594482422, + "learning_rate": 8.813638701105573e-05, + "loss": 0.0507, + "step": 7880 + }, + { + "epoch": 1.202392578125e-05, + "model_forward_time": 0.024352312088012695, + "step": 7880 + }, + { + "epoch": 1.202392578125e-05, + "step": 7880, + "training_step_time": 0.16162371635437012 + }, + { + "epoch": 1.202545166015625e-05, + "model_forward_time": 0.02441096305847168, + "step": 7881 + }, + { + "epoch": 1.202545166015625e-05, + "step": 7881, + "training_step_time": 0.18004107475280762 + }, + { + "epoch": 1.20269775390625e-05, + "model_forward_time": 0.023365020751953125, + "step": 7882 + }, + { + "epoch": 1.20269775390625e-05, + "step": 7882, + "training_step_time": 0.1710350513458252 + }, + { + "epoch": 1.202850341796875e-05, + "model_forward_time": 0.023360490798950195, + "step": 7883 + }, + { + "epoch": 1.202850341796875e-05, + "step": 7883, + "training_step_time": 0.1512889862060547 + }, + { + "epoch": 1.2030029296875e-05, + "model_forward_time": 0.024245500564575195, + "step": 7884 + }, + { + "epoch": 1.2030029296875e-05, + "step": 7884, + "training_step_time": 0.13660454750061035 + }, + { + "epoch": 1.203155517578125e-05, + "model_forward_time": 0.023772001266479492, + "step": 7885 + }, + { + "epoch": 1.203155517578125e-05, + "step": 7885, + "training_step_time": 0.12547993659973145 + }, + { + "epoch": 1.20330810546875e-05, + "model_forward_time": 0.02412724494934082, + "step": 7886 + }, + { + "epoch": 1.20330810546875e-05, + "step": 7886, + "training_step_time": 0.11126852035522461 + }, + { + "epoch": 1.203460693359375e-05, + "model_forward_time": 0.02537703514099121, + "step": 7887 + }, + { + "epoch": 1.203460693359375e-05, + "step": 7887, + "training_step_time": 0.10500478744506836 + }, + { + "epoch": 1.20361328125e-05, + "model_forward_time": 0.02559351921081543, + "step": 7888 + }, + { + "epoch": 1.20361328125e-05, + "step": 7888, + "training_step_time": 0.10680913925170898 + }, + { + "epoch": 1.203765869140625e-05, + "model_forward_time": 0.025312423706054688, + "step": 7889 + }, + { + "epoch": 1.203765869140625e-05, + "step": 7889, + "training_step_time": 0.10815954208374023 + }, + { + "epoch": 1.20391845703125e-05, + "grad_norm": 0.5479375720024109, + "learning_rate": 8.810071946411989e-05, + "loss": 0.0429, + "step": 7890 + }, + { + "epoch": 1.20391845703125e-05, + "model_forward_time": 0.02578592300415039, + "step": 7890 + }, + { + "epoch": 1.20391845703125e-05, + "step": 7890, + "training_step_time": 0.10888934135437012 + }, + { + "epoch": 1.204071044921875e-05, + "model_forward_time": 0.026696205139160156, + "step": 7891 + }, + { + "epoch": 1.204071044921875e-05, + "step": 7891, + "training_step_time": 0.10713934898376465 + }, + { + "epoch": 1.2042236328125e-05, + "model_forward_time": 0.025906801223754883, + "step": 7892 + }, + { + "epoch": 1.2042236328125e-05, + "step": 7892, + "training_step_time": 0.10468029975891113 + }, + { + "epoch": 1.204376220703125e-05, + "model_forward_time": 0.02537989616394043, + "step": 7893 + }, + { + "epoch": 1.204376220703125e-05, + "step": 7893, + "training_step_time": 0.11684536933898926 + }, + { + "epoch": 1.20452880859375e-05, + "model_forward_time": 0.025667428970336914, + "step": 7894 + }, + { + "epoch": 1.20452880859375e-05, + "step": 7894, + "training_step_time": 0.18823695182800293 + }, + { + "epoch": 1.204681396484375e-05, + "model_forward_time": 0.025701045989990234, + "step": 7895 + }, + { + "epoch": 1.204681396484375e-05, + "step": 7895, + "training_step_time": 0.14424347877502441 + }, + { + "epoch": 1.204833984375e-05, + "model_forward_time": 0.02483391761779785, + "step": 7896 + }, + { + "epoch": 1.204833984375e-05, + "step": 7896, + "training_step_time": 0.10968470573425293 + }, + { + "epoch": 1.204986572265625e-05, + "model_forward_time": 0.025405168533325195, + "step": 7897 + }, + { + "epoch": 1.204986572265625e-05, + "step": 7897, + "training_step_time": 0.11185812950134277 + }, + { + "epoch": 1.20513916015625e-05, + "model_forward_time": 0.025368928909301758, + "step": 7898 + }, + { + "epoch": 1.20513916015625e-05, + "step": 7898, + "training_step_time": 0.10683465003967285 + }, + { + "epoch": 1.205291748046875e-05, + "model_forward_time": 0.02557659149169922, + "step": 7899 + }, + { + "epoch": 1.205291748046875e-05, + "step": 7899, + "training_step_time": 0.10740208625793457 + }, + { + "epoch": 1.2054443359375e-05, + "grad_norm": 0.5200270414352417, + "learning_rate": 8.806500562121723e-05, + "loss": 0.0545, + "step": 7900 + }, + { + "epoch": 1.2054443359375e-05, + "model_forward_time": 0.02678084373474121, + "step": 7900 + }, + { + "epoch": 1.2054443359375e-05, + "step": 7900, + "training_step_time": 0.10790038108825684 + }, + { + "epoch": 1.205596923828125e-05, + "model_forward_time": 0.02628946304321289, + "step": 7901 + }, + { + "epoch": 1.205596923828125e-05, + "step": 7901, + "training_step_time": 0.16756153106689453 + }, + { + "epoch": 1.20574951171875e-05, + "model_forward_time": 0.024643421173095703, + "step": 7902 + }, + { + "epoch": 1.20574951171875e-05, + "step": 7902, + "training_step_time": 0.12151384353637695 + }, + { + "epoch": 1.205902099609375e-05, + "model_forward_time": 0.02806544303894043, + "step": 7903 + }, + { + "epoch": 1.205902099609375e-05, + "step": 7903, + "training_step_time": 0.22317838668823242 + }, + { + "epoch": 1.2060546875e-05, + "model_forward_time": 0.025924205780029297, + "step": 7904 + }, + { + "epoch": 1.2060546875e-05, + "step": 7904, + "training_step_time": 0.13548016548156738 + }, + { + "epoch": 1.206207275390625e-05, + "model_forward_time": 0.02471446990966797, + "step": 7905 + }, + { + "epoch": 1.206207275390625e-05, + "step": 7905, + "training_step_time": 0.11276721954345703 + }, + { + "epoch": 1.20635986328125e-05, + "model_forward_time": 0.024350881576538086, + "step": 7906 + }, + { + "epoch": 1.20635986328125e-05, + "step": 7906, + "training_step_time": 0.1176750659942627 + }, + { + "epoch": 1.206512451171875e-05, + "model_forward_time": 0.02517533302307129, + "step": 7907 + }, + { + "epoch": 1.206512451171875e-05, + "step": 7907, + "training_step_time": 0.11346554756164551 + }, + { + "epoch": 1.2066650390625e-05, + "model_forward_time": 0.026131153106689453, + "step": 7908 + }, + { + "epoch": 1.2066650390625e-05, + "step": 7908, + "training_step_time": 0.19850683212280273 + }, + { + "epoch": 1.206817626953125e-05, + "model_forward_time": 0.025051116943359375, + "step": 7909 + }, + { + "epoch": 1.206817626953125e-05, + "step": 7909, + "training_step_time": 0.11710286140441895 + }, + { + "epoch": 1.20697021484375e-05, + "grad_norm": 0.3905726671218872, + "learning_rate": 8.802924552574345e-05, + "loss": 0.0562, + "step": 7910 + }, + { + "epoch": 1.20697021484375e-05, + "model_forward_time": 0.02635955810546875, + "step": 7910 + }, + { + "epoch": 1.20697021484375e-05, + "step": 7910, + "training_step_time": 0.13576769828796387 + }, + { + "epoch": 1.207122802734375e-05, + "model_forward_time": 0.025088071823120117, + "step": 7911 + }, + { + "epoch": 1.207122802734375e-05, + "step": 7911, + "training_step_time": 0.15278387069702148 + }, + { + "epoch": 1.207275390625e-05, + "model_forward_time": 0.025600433349609375, + "step": 7912 + }, + { + "epoch": 1.207275390625e-05, + "step": 7912, + "training_step_time": 0.17139697074890137 + }, + { + "epoch": 1.207427978515625e-05, + "model_forward_time": 0.02432537078857422, + "step": 7913 + }, + { + "epoch": 1.207427978515625e-05, + "step": 7913, + "training_step_time": 0.17067360877990723 + }, + { + "epoch": 1.20758056640625e-05, + "model_forward_time": 0.024817466735839844, + "step": 7914 + }, + { + "epoch": 1.20758056640625e-05, + "step": 7914, + "training_step_time": 0.10875439643859863 + }, + { + "epoch": 1.207733154296875e-05, + "model_forward_time": 0.02481389045715332, + "step": 7915 + }, + { + "epoch": 1.207733154296875e-05, + "step": 7915, + "training_step_time": 0.1369922161102295 + }, + { + "epoch": 1.2078857421875e-05, + "model_forward_time": 0.02545762062072754, + "step": 7916 + }, + { + "epoch": 1.2078857421875e-05, + "step": 7916, + "training_step_time": 0.17713451385498047 + }, + { + "epoch": 1.208038330078125e-05, + "model_forward_time": 0.02508378028869629, + "step": 7917 + }, + { + "epoch": 1.208038330078125e-05, + "step": 7917, + "training_step_time": 0.10827136039733887 + }, + { + "epoch": 1.20819091796875e-05, + "model_forward_time": 0.024219036102294922, + "step": 7918 + }, + { + "epoch": 1.20819091796875e-05, + "step": 7918, + "training_step_time": 0.10509347915649414 + }, + { + "epoch": 1.208343505859375e-05, + "model_forward_time": 0.02587723731994629, + "step": 7919 + }, + { + "epoch": 1.208343505859375e-05, + "step": 7919, + "training_step_time": 0.19098520278930664 + }, + { + "epoch": 1.20849609375e-05, + "grad_norm": 0.4202629029750824, + "learning_rate": 8.799343922115044e-05, + "loss": 0.073, + "step": 7920 + }, + { + "epoch": 1.20849609375e-05, + "model_forward_time": 0.025252342224121094, + "step": 7920 + }, + { + "epoch": 1.20849609375e-05, + "step": 7920, + "training_step_time": 0.1090538501739502 + }, + { + "epoch": 1.208648681640625e-05, + "model_forward_time": 0.025035858154296875, + "step": 7921 + }, + { + "epoch": 1.208648681640625e-05, + "step": 7921, + "training_step_time": 0.10737061500549316 + }, + { + "epoch": 1.20880126953125e-05, + "model_forward_time": 0.025863170623779297, + "step": 7922 + }, + { + "epoch": 1.20880126953125e-05, + "step": 7922, + "training_step_time": 0.10735893249511719 + }, + { + "epoch": 1.208953857421875e-05, + "model_forward_time": 0.0262453556060791, + "step": 7923 + }, + { + "epoch": 1.208953857421875e-05, + "step": 7923, + "training_step_time": 0.10936737060546875 + }, + { + "epoch": 1.2091064453125e-05, + "model_forward_time": 0.026357650756835938, + "step": 7924 + }, + { + "epoch": 1.2091064453125e-05, + "step": 7924, + "training_step_time": 0.1101231575012207 + }, + { + "epoch": 1.209259033203125e-05, + "model_forward_time": 0.02567577362060547, + "step": 7925 + }, + { + "epoch": 1.209259033203125e-05, + "step": 7925, + "training_step_time": 0.1090693473815918 + }, + { + "epoch": 1.20941162109375e-05, + "model_forward_time": 0.02533435821533203, + "step": 7926 + }, + { + "epoch": 1.20941162109375e-05, + "step": 7926, + "training_step_time": 0.11175966262817383 + }, + { + "epoch": 1.209564208984375e-05, + "model_forward_time": 0.024616718292236328, + "step": 7927 + }, + { + "epoch": 1.209564208984375e-05, + "step": 7927, + "training_step_time": 0.10579705238342285 + }, + { + "epoch": 1.209716796875e-05, + "model_forward_time": 0.02611517906188965, + "step": 7928 + }, + { + "epoch": 1.209716796875e-05, + "step": 7928, + "training_step_time": 0.10750651359558105 + }, + { + "epoch": 1.209869384765625e-05, + "model_forward_time": 0.025504112243652344, + "step": 7929 + }, + { + "epoch": 1.209869384765625e-05, + "step": 7929, + "training_step_time": 0.11054706573486328 + }, + { + "epoch": 1.21002197265625e-05, + "grad_norm": 0.7044833302497864, + "learning_rate": 8.795758675094621e-05, + "loss": 0.044, + "step": 7930 + }, + { + "epoch": 1.21002197265625e-05, + "model_forward_time": 0.024985551834106445, + "step": 7930 + }, + { + "epoch": 1.21002197265625e-05, + "step": 7930, + "training_step_time": 0.10688519477844238 + }, + { + "epoch": 1.210174560546875e-05, + "model_forward_time": 0.02528691291809082, + "step": 7931 + }, + { + "epoch": 1.210174560546875e-05, + "step": 7931, + "training_step_time": 0.12502336502075195 + }, + { + "epoch": 1.2103271484375e-05, + "model_forward_time": 0.026308298110961914, + "step": 7932 + }, + { + "epoch": 1.2103271484375e-05, + "step": 7932, + "training_step_time": 0.13570427894592285 + }, + { + "epoch": 1.210479736328125e-05, + "model_forward_time": 0.02536296844482422, + "step": 7933 + }, + { + "epoch": 1.210479736328125e-05, + "step": 7933, + "training_step_time": 0.13477849960327148 + }, + { + "epoch": 1.21063232421875e-05, + "model_forward_time": 0.02510690689086914, + "step": 7934 + }, + { + "epoch": 1.21063232421875e-05, + "step": 7934, + "training_step_time": 0.12601327896118164 + }, + { + "epoch": 1.210784912109375e-05, + "model_forward_time": 0.024953603744506836, + "step": 7935 + }, + { + "epoch": 1.210784912109375e-05, + "step": 7935, + "training_step_time": 0.11086630821228027 + }, + { + "epoch": 1.2109375e-05, + "model_forward_time": 0.024572372436523438, + "step": 7936 + }, + { + "epoch": 1.2109375e-05, + "step": 7936, + "training_step_time": 0.21455979347229004 + }, + { + "epoch": 1.211090087890625e-05, + "model_forward_time": 0.02447366714477539, + "step": 7937 + }, + { + "epoch": 1.211090087890625e-05, + "step": 7937, + "training_step_time": 0.11578989028930664 + }, + { + "epoch": 1.21124267578125e-05, + "model_forward_time": 0.024437665939331055, + "step": 7938 + }, + { + "epoch": 1.21124267578125e-05, + "step": 7938, + "training_step_time": 0.11175775527954102 + }, + { + "epoch": 1.211395263671875e-05, + "model_forward_time": 0.02499246597290039, + "step": 7939 + }, + { + "epoch": 1.211395263671875e-05, + "step": 7939, + "training_step_time": 0.11520743370056152 + }, + { + "epoch": 1.2115478515625e-05, + "grad_norm": 0.6897473931312561, + "learning_rate": 8.792168815869493e-05, + "loss": 0.0529, + "step": 7940 + }, + { + "epoch": 1.2115478515625e-05, + "model_forward_time": 0.02551102638244629, + "step": 7940 + }, + { + "epoch": 1.2115478515625e-05, + "step": 7940, + "training_step_time": 0.10939192771911621 + }, + { + "epoch": 1.211700439453125e-05, + "model_forward_time": 0.02597832679748535, + "step": 7941 + }, + { + "epoch": 1.211700439453125e-05, + "step": 7941, + "training_step_time": 0.12726497650146484 + }, + { + "epoch": 1.21185302734375e-05, + "model_forward_time": 0.02549886703491211, + "step": 7942 + }, + { + "epoch": 1.21185302734375e-05, + "step": 7942, + "training_step_time": 0.10681533813476562 + }, + { + "epoch": 1.212005615234375e-05, + "model_forward_time": 0.02470541000366211, + "step": 7943 + }, + { + "epoch": 1.212005615234375e-05, + "step": 7943, + "training_step_time": 0.10898184776306152 + }, + { + "epoch": 1.212158203125e-05, + "model_forward_time": 0.02497553825378418, + "step": 7944 + }, + { + "epoch": 1.212158203125e-05, + "step": 7944, + "training_step_time": 0.10942411422729492 + }, + { + "epoch": 1.212310791015625e-05, + "model_forward_time": 0.024882793426513672, + "step": 7945 + }, + { + "epoch": 1.212310791015625e-05, + "step": 7945, + "training_step_time": 0.18957972526550293 + }, + { + "epoch": 1.21246337890625e-05, + "model_forward_time": 0.024564027786254883, + "step": 7946 + }, + { + "epoch": 1.21246337890625e-05, + "step": 7946, + "training_step_time": 0.10462427139282227 + }, + { + "epoch": 1.212615966796875e-05, + "model_forward_time": 0.025892972946166992, + "step": 7947 + }, + { + "epoch": 1.212615966796875e-05, + "step": 7947, + "training_step_time": 0.11996626853942871 + }, + { + "epoch": 1.2127685546875e-05, + "model_forward_time": 0.024728059768676758, + "step": 7948 + }, + { + "epoch": 1.2127685546875e-05, + "step": 7948, + "training_step_time": 0.1269516944885254 + }, + { + "epoch": 1.212921142578125e-05, + "model_forward_time": 0.02470993995666504, + "step": 7949 + }, + { + "epoch": 1.212921142578125e-05, + "step": 7949, + "training_step_time": 0.13221526145935059 + }, + { + "epoch": 1.21307373046875e-05, + "grad_norm": 0.8151080012321472, + "learning_rate": 8.788574348801675e-05, + "loss": 0.0419, + "step": 7950 + }, + { + "epoch": 1.21307373046875e-05, + "model_forward_time": 0.02478194236755371, + "step": 7950 + }, + { + "epoch": 1.21307373046875e-05, + "step": 7950, + "training_step_time": 0.11572003364562988 + }, + { + "epoch": 1.213226318359375e-05, + "model_forward_time": 0.025125741958618164, + "step": 7951 + }, + { + "epoch": 1.213226318359375e-05, + "step": 7951, + "training_step_time": 0.11235260963439941 + }, + { + "epoch": 1.21337890625e-05, + "model_forward_time": 0.024944067001342773, + "step": 7952 + }, + { + "epoch": 1.21337890625e-05, + "step": 7952, + "training_step_time": 0.20114636421203613 + }, + { + "epoch": 1.213531494140625e-05, + "model_forward_time": 0.024495363235473633, + "step": 7953 + }, + { + "epoch": 1.213531494140625e-05, + "step": 7953, + "training_step_time": 0.11314654350280762 + }, + { + "epoch": 1.21368408203125e-05, + "model_forward_time": 0.029157400131225586, + "step": 7954 + }, + { + "epoch": 1.21368408203125e-05, + "step": 7954, + "training_step_time": 0.13973259925842285 + }, + { + "epoch": 1.213836669921875e-05, + "model_forward_time": 0.02474045753479004, + "step": 7955 + }, + { + "epoch": 1.213836669921875e-05, + "step": 7955, + "training_step_time": 0.15376520156860352 + }, + { + "epoch": 1.2139892578125e-05, + "model_forward_time": 0.024340391159057617, + "step": 7956 + }, + { + "epoch": 1.2139892578125e-05, + "step": 7956, + "training_step_time": 0.21885466575622559 + }, + { + "epoch": 1.214141845703125e-05, + "model_forward_time": 0.024456262588500977, + "step": 7957 + }, + { + "epoch": 1.214141845703125e-05, + "step": 7957, + "training_step_time": 0.11707091331481934 + }, + { + "epoch": 1.21429443359375e-05, + "model_forward_time": 0.02457284927368164, + "step": 7958 + }, + { + "epoch": 1.21429443359375e-05, + "step": 7958, + "training_step_time": 0.18384027481079102 + }, + { + "epoch": 1.214447021484375e-05, + "model_forward_time": 0.024204730987548828, + "step": 7959 + }, + { + "epoch": 1.214447021484375e-05, + "step": 7959, + "training_step_time": 0.14910554885864258 + }, + { + "epoch": 1.214599609375e-05, + "grad_norm": 0.478555828332901, + "learning_rate": 8.784975278258783e-05, + "loss": 0.0437, + "step": 7960 + }, + { + "epoch": 1.214599609375e-05, + "model_forward_time": 0.024885892868041992, + "step": 7960 + }, + { + "epoch": 1.214599609375e-05, + "step": 7960, + "training_step_time": 0.11147737503051758 + }, + { + "epoch": 1.214752197265625e-05, + "model_forward_time": 0.024567127227783203, + "step": 7961 + }, + { + "epoch": 1.214752197265625e-05, + "step": 7961, + "training_step_time": 0.11080765724182129 + }, + { + "epoch": 1.21490478515625e-05, + "model_forward_time": 0.024933815002441406, + "step": 7962 + }, + { + "epoch": 1.21490478515625e-05, + "step": 7962, + "training_step_time": 0.12792205810546875 + }, + { + "epoch": 1.215057373046875e-05, + "model_forward_time": 0.025124549865722656, + "step": 7963 + }, + { + "epoch": 1.215057373046875e-05, + "step": 7963, + "training_step_time": 0.11346960067749023 + }, + { + "epoch": 1.2152099609375e-05, + "model_forward_time": 0.024829864501953125, + "step": 7964 + }, + { + "epoch": 1.2152099609375e-05, + "step": 7964, + "training_step_time": 0.1145787239074707 + }, + { + "epoch": 1.215362548828125e-05, + "model_forward_time": 0.024878501892089844, + "step": 7965 + }, + { + "epoch": 1.215362548828125e-05, + "step": 7965, + "training_step_time": 0.11946439743041992 + }, + { + "epoch": 1.21551513671875e-05, + "model_forward_time": 0.025093793869018555, + "step": 7966 + }, + { + "epoch": 1.21551513671875e-05, + "step": 7966, + "training_step_time": 0.10670924186706543 + }, + { + "epoch": 1.215667724609375e-05, + "model_forward_time": 0.02442169189453125, + "step": 7967 + }, + { + "epoch": 1.215667724609375e-05, + "step": 7967, + "training_step_time": 0.10758852958679199 + }, + { + "epoch": 1.2158203125e-05, + "model_forward_time": 0.02733469009399414, + "step": 7968 + }, + { + "epoch": 1.2158203125e-05, + "step": 7968, + "training_step_time": 0.11456847190856934 + }, + { + "epoch": 1.215972900390625e-05, + "model_forward_time": 0.025136232376098633, + "step": 7969 + }, + { + "epoch": 1.215972900390625e-05, + "step": 7969, + "training_step_time": 0.10651516914367676 + }, + { + "epoch": 1.21612548828125e-05, + "grad_norm": 0.5801077485084534, + "learning_rate": 8.781371608614029e-05, + "loss": 0.0444, + "step": 7970 + }, + { + "epoch": 1.21612548828125e-05, + "model_forward_time": 0.025835752487182617, + "step": 7970 + }, + { + "epoch": 1.21612548828125e-05, + "step": 7970, + "training_step_time": 0.10868954658508301 + }, + { + "epoch": 1.216278076171875e-05, + "model_forward_time": 0.025037765502929688, + "step": 7971 + }, + { + "epoch": 1.216278076171875e-05, + "step": 7971, + "training_step_time": 0.10644912719726562 + }, + { + "epoch": 1.2164306640625e-05, + "model_forward_time": 0.024999141693115234, + "step": 7972 + }, + { + "epoch": 1.2164306640625e-05, + "step": 7972, + "training_step_time": 0.10641980171203613 + }, + { + "epoch": 1.216583251953125e-05, + "model_forward_time": 0.02485489845275879, + "step": 7973 + }, + { + "epoch": 1.216583251953125e-05, + "step": 7973, + "training_step_time": 0.11394405364990234 + }, + { + "epoch": 1.21673583984375e-05, + "model_forward_time": 0.0245211124420166, + "step": 7974 + }, + { + "epoch": 1.21673583984375e-05, + "step": 7974, + "training_step_time": 0.1098182201385498 + }, + { + "epoch": 1.216888427734375e-05, + "model_forward_time": 0.02516317367553711, + "step": 7975 + }, + { + "epoch": 1.216888427734375e-05, + "step": 7975, + "training_step_time": 0.12448930740356445 + }, + { + "epoch": 1.217041015625e-05, + "model_forward_time": 0.024618864059448242, + "step": 7976 + }, + { + "epoch": 1.217041015625e-05, + "step": 7976, + "training_step_time": 0.12089371681213379 + }, + { + "epoch": 1.217193603515625e-05, + "model_forward_time": 0.025037527084350586, + "step": 7977 + }, + { + "epoch": 1.217193603515625e-05, + "step": 7977, + "training_step_time": 0.12023663520812988 + }, + { + "epoch": 1.21734619140625e-05, + "model_forward_time": 0.024635791778564453, + "step": 7978 + }, + { + "epoch": 1.21734619140625e-05, + "step": 7978, + "training_step_time": 0.1242685317993164 + }, + { + "epoch": 1.217498779296875e-05, + "model_forward_time": 0.024935007095336914, + "step": 7979 + }, + { + "epoch": 1.217498779296875e-05, + "step": 7979, + "training_step_time": 0.12067174911499023 + }, + { + "epoch": 1.2176513671875e-05, + "grad_norm": 0.8685769438743591, + "learning_rate": 8.77776334424621e-05, + "loss": 0.0452, + "step": 7980 + }, + { + "epoch": 1.2176513671875e-05, + "model_forward_time": 0.025543928146362305, + "step": 7980 + }, + { + "epoch": 1.2176513671875e-05, + "step": 7980, + "training_step_time": 0.10847306251525879 + }, + { + "epoch": 1.217803955078125e-05, + "model_forward_time": 0.025130748748779297, + "step": 7981 + }, + { + "epoch": 1.217803955078125e-05, + "step": 7981, + "training_step_time": 0.11330461502075195 + }, + { + "epoch": 1.21795654296875e-05, + "model_forward_time": 0.025539636611938477, + "step": 7982 + }, + { + "epoch": 1.21795654296875e-05, + "step": 7982, + "training_step_time": 0.11588811874389648 + }, + { + "epoch": 1.218109130859375e-05, + "model_forward_time": 0.02513432502746582, + "step": 7983 + }, + { + "epoch": 1.218109130859375e-05, + "step": 7983, + "training_step_time": 0.1141366958618164 + }, + { + "epoch": 1.21826171875e-05, + "model_forward_time": 0.02728724479675293, + "step": 7984 + }, + { + "epoch": 1.21826171875e-05, + "step": 7984, + "training_step_time": 0.11709713935852051 + }, + { + "epoch": 1.218414306640625e-05, + "model_forward_time": 0.025393962860107422, + "step": 7985 + }, + { + "epoch": 1.218414306640625e-05, + "step": 7985, + "training_step_time": 0.2055351734161377 + }, + { + "epoch": 1.21856689453125e-05, + "model_forward_time": 0.02438807487487793, + "step": 7986 + }, + { + "epoch": 1.21856689453125e-05, + "step": 7986, + "training_step_time": 0.11226773262023926 + }, + { + "epoch": 1.218719482421875e-05, + "model_forward_time": 0.024125099182128906, + "step": 7987 + }, + { + "epoch": 1.218719482421875e-05, + "step": 7987, + "training_step_time": 0.10791182518005371 + }, + { + "epoch": 1.2188720703125e-05, + "model_forward_time": 0.025109529495239258, + "step": 7988 + }, + { + "epoch": 1.2188720703125e-05, + "step": 7988, + "training_step_time": 0.10952353477478027 + }, + { + "epoch": 1.219024658203125e-05, + "model_forward_time": 0.024990081787109375, + "step": 7989 + }, + { + "epoch": 1.219024658203125e-05, + "step": 7989, + "training_step_time": 0.10594606399536133 + }, + { + "epoch": 1.21917724609375e-05, + "grad_norm": 0.6445547938346863, + "learning_rate": 8.774150489539707e-05, + "loss": 0.0384, + "step": 7990 + }, + { + "epoch": 1.21917724609375e-05, + "model_forward_time": 0.023876428604125977, + "step": 7990 + }, + { + "epoch": 1.21917724609375e-05, + "step": 7990, + "training_step_time": 0.10858273506164551 + }, + { + "epoch": 1.219329833984375e-05, + "model_forward_time": 0.024243831634521484, + "step": 7991 + }, + { + "epoch": 1.219329833984375e-05, + "step": 7991, + "training_step_time": 0.12700700759887695 + }, + { + "epoch": 1.219482421875e-05, + "model_forward_time": 0.02518320083618164, + "step": 7992 + }, + { + "epoch": 1.219482421875e-05, + "step": 7992, + "training_step_time": 0.11277341842651367 + }, + { + "epoch": 1.219635009765625e-05, + "model_forward_time": 0.025277137756347656, + "step": 7993 + }, + { + "epoch": 1.219635009765625e-05, + "step": 7993, + "training_step_time": 0.2225806713104248 + }, + { + "epoch": 1.21978759765625e-05, + "model_forward_time": 0.02420783042907715, + "step": 7994 + }, + { + "epoch": 1.21978759765625e-05, + "step": 7994, + "training_step_time": 0.12477612495422363 + }, + { + "epoch": 1.219940185546875e-05, + "model_forward_time": 0.024157047271728516, + "step": 7995 + }, + { + "epoch": 1.219940185546875e-05, + "step": 7995, + "training_step_time": 0.10909628868103027 + }, + { + "epoch": 1.2200927734375e-05, + "model_forward_time": 0.025439977645874023, + "step": 7996 + }, + { + "epoch": 1.2200927734375e-05, + "step": 7996, + "training_step_time": 0.12809038162231445 + }, + { + "epoch": 1.220245361328125e-05, + "model_forward_time": 0.024990558624267578, + "step": 7997 + }, + { + "epoch": 1.220245361328125e-05, + "step": 7997, + "training_step_time": 0.15242266654968262 + }, + { + "epoch": 1.22039794921875e-05, + "model_forward_time": 0.02489948272705078, + "step": 7998 + }, + { + "epoch": 1.22039794921875e-05, + "step": 7998, + "training_step_time": 0.12850666046142578 + }, + { + "epoch": 1.220550537109375e-05, + "model_forward_time": 0.024712800979614258, + "step": 7999 + }, + { + "epoch": 1.220550537109375e-05, + "step": 7999, + "training_step_time": 0.10808920860290527 + }, + { + "epoch": 1.220703125e-05, + "grad_norm": 0.42424267530441284, + "learning_rate": 8.770533048884482e-05, + "loss": 0.0393, + "step": 8000 + }, + { + "epoch": 1.220703125e-05, + "model_forward_time": 0.026155710220336914, + "step": 8000 + }, + { + "epoch": 1.220703125e-05, + "step": 8000, + "training_step_time": 0.10521769523620605 + }, + { + "epoch": 1.220855712890625e-05, + "model_forward_time": 0.02340412139892578, + "step": 8001 + }, + { + "epoch": 1.220855712890625e-05, + "step": 8001, + "training_step_time": 0.15229058265686035 + }, + { + "epoch": 1.22100830078125e-05, + "model_forward_time": 0.024644136428833008, + "step": 8002 + }, + { + "epoch": 1.22100830078125e-05, + "step": 8002, + "training_step_time": 0.11137056350708008 + }, + { + "epoch": 1.221160888671875e-05, + "model_forward_time": 0.0244448184967041, + "step": 8003 + }, + { + "epoch": 1.221160888671875e-05, + "step": 8003, + "training_step_time": 0.11009430885314941 + }, + { + "epoch": 1.2213134765625e-05, + "model_forward_time": 0.024834871292114258, + "step": 8004 + }, + { + "epoch": 1.2213134765625e-05, + "step": 8004, + "training_step_time": 0.12272524833679199 + }, + { + "epoch": 1.221466064453125e-05, + "model_forward_time": 0.025106191635131836, + "step": 8005 + }, + { + "epoch": 1.221466064453125e-05, + "step": 8005, + "training_step_time": 0.13160347938537598 + }, + { + "epoch": 1.22161865234375e-05, + "model_forward_time": 0.025022506713867188, + "step": 8006 + }, + { + "epoch": 1.22161865234375e-05, + "step": 8006, + "training_step_time": 0.10595393180847168 + }, + { + "epoch": 1.221771240234375e-05, + "model_forward_time": 0.024736881256103516, + "step": 8007 + }, + { + "epoch": 1.221771240234375e-05, + "step": 8007, + "training_step_time": 0.11640000343322754 + }, + { + "epoch": 1.221923828125e-05, + "model_forward_time": 0.02558732032775879, + "step": 8008 + }, + { + "epoch": 1.221923828125e-05, + "step": 8008, + "training_step_time": 0.209367036819458 + }, + { + "epoch": 1.222076416015625e-05, + "model_forward_time": 0.02422022819519043, + "step": 8009 + }, + { + "epoch": 1.222076416015625e-05, + "step": 8009, + "training_step_time": 0.12210321426391602 + }, + { + "epoch": 1.22222900390625e-05, + "grad_norm": 0.989590048789978, + "learning_rate": 8.766911026676064e-05, + "loss": 0.0385, + "step": 8010 + }, + { + "epoch": 1.22222900390625e-05, + "model_forward_time": 0.02434086799621582, + "step": 8010 + }, + { + "epoch": 1.22222900390625e-05, + "step": 8010, + "training_step_time": 0.10384702682495117 + }, + { + "epoch": 1.222381591796875e-05, + "model_forward_time": 0.027399778366088867, + "step": 8011 + }, + { + "epoch": 1.222381591796875e-05, + "step": 8011, + "training_step_time": 0.1770954132080078 + }, + { + "epoch": 1.2225341796875e-05, + "model_forward_time": 0.024376392364501953, + "step": 8012 + }, + { + "epoch": 1.2225341796875e-05, + "step": 8012, + "training_step_time": 0.13405919075012207 + }, + { + "epoch": 1.222686767578125e-05, + "model_forward_time": 0.023864269256591797, + "step": 8013 + }, + { + "epoch": 1.222686767578125e-05, + "step": 8013, + "training_step_time": 0.11847400665283203 + }, + { + "epoch": 1.22283935546875e-05, + "model_forward_time": 0.026166677474975586, + "step": 8014 + }, + { + "epoch": 1.22283935546875e-05, + "step": 8014, + "training_step_time": 0.10955429077148438 + }, + { + "epoch": 1.222991943359375e-05, + "model_forward_time": 0.025537967681884766, + "step": 8015 + }, + { + "epoch": 1.222991943359375e-05, + "step": 8015, + "training_step_time": 0.14167284965515137 + }, + { + "epoch": 1.22314453125e-05, + "model_forward_time": 0.024870634078979492, + "step": 8016 + }, + { + "epoch": 1.22314453125e-05, + "step": 8016, + "training_step_time": 0.11117029190063477 + }, + { + "epoch": 1.223297119140625e-05, + "model_forward_time": 0.024974346160888672, + "step": 8017 + }, + { + "epoch": 1.223297119140625e-05, + "step": 8017, + "training_step_time": 0.10834479331970215 + }, + { + "epoch": 1.22344970703125e-05, + "model_forward_time": 0.025220394134521484, + "step": 8018 + }, + { + "epoch": 1.22344970703125e-05, + "step": 8018, + "training_step_time": 0.11035656929016113 + }, + { + "epoch": 1.223602294921875e-05, + "model_forward_time": 0.025110244750976562, + "step": 8019 + }, + { + "epoch": 1.223602294921875e-05, + "step": 8019, + "training_step_time": 0.10979390144348145 + }, + { + "epoch": 1.2237548828125e-05, + "grad_norm": 0.5198809504508972, + "learning_rate": 8.763284427315551e-05, + "loss": 0.0455, + "step": 8020 + }, + { + "epoch": 1.2237548828125e-05, + "model_forward_time": 0.024939775466918945, + "step": 8020 + }, + { + "epoch": 1.2237548828125e-05, + "step": 8020, + "training_step_time": 0.19510436058044434 + }, + { + "epoch": 1.223907470703125e-05, + "model_forward_time": 0.024056434631347656, + "step": 8021 + }, + { + "epoch": 1.223907470703125e-05, + "step": 8021, + "training_step_time": 0.10980510711669922 + }, + { + "epoch": 1.22406005859375e-05, + "model_forward_time": 0.024538278579711914, + "step": 8022 + }, + { + "epoch": 1.22406005859375e-05, + "step": 8022, + "training_step_time": 0.10904335975646973 + }, + { + "epoch": 1.224212646484375e-05, + "model_forward_time": 0.025308847427368164, + "step": 8023 + }, + { + "epoch": 1.224212646484375e-05, + "step": 8023, + "training_step_time": 0.10912275314331055 + }, + { + "epoch": 1.224365234375e-05, + "model_forward_time": 0.024741411209106445, + "step": 8024 + }, + { + "epoch": 1.224365234375e-05, + "step": 8024, + "training_step_time": 0.10647392272949219 + }, + { + "epoch": 1.224517822265625e-05, + "model_forward_time": 0.02515244483947754, + "step": 8025 + }, + { + "epoch": 1.224517822265625e-05, + "step": 8025, + "training_step_time": 0.10667657852172852 + }, + { + "epoch": 1.22467041015625e-05, + "model_forward_time": 0.02509140968322754, + "step": 8026 + }, + { + "epoch": 1.22467041015625e-05, + "step": 8026, + "training_step_time": 0.10818696022033691 + }, + { + "epoch": 1.224822998046875e-05, + "model_forward_time": 0.02521347999572754, + "step": 8027 + }, + { + "epoch": 1.224822998046875e-05, + "step": 8027, + "training_step_time": 0.10985255241394043 + }, + { + "epoch": 1.2249755859375e-05, + "model_forward_time": 0.02496957778930664, + "step": 8028 + }, + { + "epoch": 1.2249755859375e-05, + "step": 8028, + "training_step_time": 0.10714507102966309 + }, + { + "epoch": 1.225128173828125e-05, + "model_forward_time": 0.025231599807739258, + "step": 8029 + }, + { + "epoch": 1.225128173828125e-05, + "step": 8029, + "training_step_time": 0.10595011711120605 + }, + { + "epoch": 1.22528076171875e-05, + "grad_norm": 0.6120517253875732, + "learning_rate": 8.759653255209606e-05, + "loss": 0.0384, + "step": 8030 + }, + { + "epoch": 1.22528076171875e-05, + "model_forward_time": 0.02554464340209961, + "step": 8030 + }, + { + "epoch": 1.22528076171875e-05, + "step": 8030, + "training_step_time": 0.10946106910705566 + }, + { + "epoch": 1.225433349609375e-05, + "model_forward_time": 0.02524113655090332, + "step": 8031 + }, + { + "epoch": 1.225433349609375e-05, + "step": 8031, + "training_step_time": 0.1143794059753418 + }, + { + "epoch": 1.2255859375e-05, + "model_forward_time": 0.02399444580078125, + "step": 8032 + }, + { + "epoch": 1.2255859375e-05, + "step": 8032, + "training_step_time": 0.1059868335723877 + }, + { + "epoch": 1.225738525390625e-05, + "model_forward_time": 0.024372339248657227, + "step": 8033 + }, + { + "epoch": 1.225738525390625e-05, + "step": 8033, + "training_step_time": 0.1079108715057373 + }, + { + "epoch": 1.22589111328125e-05, + "model_forward_time": 0.025270700454711914, + "step": 8034 + }, + { + "epoch": 1.22589111328125e-05, + "step": 8034, + "training_step_time": 0.10793447494506836 + }, + { + "epoch": 1.226043701171875e-05, + "model_forward_time": 0.025335311889648438, + "step": 8035 + }, + { + "epoch": 1.226043701171875e-05, + "step": 8035, + "training_step_time": 0.10961413383483887 + }, + { + "epoch": 1.2261962890625e-05, + "model_forward_time": 0.026239871978759766, + "step": 8036 + }, + { + "epoch": 1.2261962890625e-05, + "step": 8036, + "training_step_time": 0.10873675346374512 + }, + { + "epoch": 1.226348876953125e-05, + "model_forward_time": 0.025423049926757812, + "step": 8037 + }, + { + "epoch": 1.226348876953125e-05, + "step": 8037, + "training_step_time": 0.10605287551879883 + }, + { + "epoch": 1.22650146484375e-05, + "model_forward_time": 0.025758981704711914, + "step": 8038 + }, + { + "epoch": 1.22650146484375e-05, + "step": 8038, + "training_step_time": 0.11978030204772949 + }, + { + "epoch": 1.226654052734375e-05, + "model_forward_time": 0.02588939666748047, + "step": 8039 + }, + { + "epoch": 1.226654052734375e-05, + "step": 8039, + "training_step_time": 0.11473894119262695 + }, + { + "epoch": 1.226806640625e-05, + "grad_norm": 0.424248605966568, + "learning_rate": 8.756017514770443e-05, + "loss": 0.0406, + "step": 8040 + }, + { + "epoch": 1.226806640625e-05, + "model_forward_time": 0.025607585906982422, + "step": 8040 + }, + { + "epoch": 1.226806640625e-05, + "step": 8040, + "training_step_time": 0.11047554016113281 + }, + { + "epoch": 1.226959228515625e-05, + "model_forward_time": 0.025839805603027344, + "step": 8041 + }, + { + "epoch": 1.226959228515625e-05, + "step": 8041, + "training_step_time": 0.2095508575439453 + }, + { + "epoch": 1.22711181640625e-05, + "model_forward_time": 0.024628877639770508, + "step": 8042 + }, + { + "epoch": 1.22711181640625e-05, + "step": 8042, + "training_step_time": 0.11656570434570312 + }, + { + "epoch": 1.227264404296875e-05, + "model_forward_time": 0.02409815788269043, + "step": 8043 + }, + { + "epoch": 1.227264404296875e-05, + "step": 8043, + "training_step_time": 0.10518479347229004 + }, + { + "epoch": 1.2274169921875e-05, + "model_forward_time": 0.024974584579467773, + "step": 8044 + }, + { + "epoch": 1.2274169921875e-05, + "step": 8044, + "training_step_time": 0.10662031173706055 + }, + { + "epoch": 1.227569580078125e-05, + "model_forward_time": 0.02510857582092285, + "step": 8045 + }, + { + "epoch": 1.227569580078125e-05, + "step": 8045, + "training_step_time": 0.10593032836914062 + }, + { + "epoch": 1.22772216796875e-05, + "model_forward_time": 0.02510213851928711, + "step": 8046 + }, + { + "epoch": 1.22772216796875e-05, + "step": 8046, + "training_step_time": 0.11077117919921875 + }, + { + "epoch": 1.227874755859375e-05, + "model_forward_time": 0.025410890579223633, + "step": 8047 + }, + { + "epoch": 1.227874755859375e-05, + "step": 8047, + "training_step_time": 0.11119270324707031 + }, + { + "epoch": 1.22802734375e-05, + "model_forward_time": 0.025301456451416016, + "step": 8048 + }, + { + "epoch": 1.22802734375e-05, + "step": 8048, + "training_step_time": 0.21284008026123047 + }, + { + "epoch": 1.228179931640625e-05, + "model_forward_time": 0.02433490753173828, + "step": 8049 + }, + { + "epoch": 1.228179931640625e-05, + "step": 8049, + "training_step_time": 0.2016582489013672 + }, + { + "epoch": 1.22833251953125e-05, + "grad_norm": 0.25658440589904785, + "learning_rate": 8.75237721041583e-05, + "loss": 0.0494, + "step": 8050 + }, + { + "epoch": 1.22833251953125e-05, + "model_forward_time": 0.02441692352294922, + "step": 8050 + }, + { + "epoch": 1.22833251953125e-05, + "step": 8050, + "training_step_time": 0.18492412567138672 + }, + { + "epoch": 1.228485107421875e-05, + "model_forward_time": 0.0244448184967041, + "step": 8051 + }, + { + "epoch": 1.228485107421875e-05, + "step": 8051, + "training_step_time": 0.18129563331604004 + }, + { + "epoch": 1.2286376953125e-05, + "model_forward_time": 0.02532672882080078, + "step": 8052 + }, + { + "epoch": 1.2286376953125e-05, + "step": 8052, + "training_step_time": 0.21875810623168945 + }, + { + "epoch": 1.228790283203125e-05, + "model_forward_time": 0.024783849716186523, + "step": 8053 + }, + { + "epoch": 1.228790283203125e-05, + "step": 8053, + "training_step_time": 0.11652278900146484 + }, + { + "epoch": 1.22894287109375e-05, + "model_forward_time": 0.024639129638671875, + "step": 8054 + }, + { + "epoch": 1.22894287109375e-05, + "step": 8054, + "training_step_time": 0.13737082481384277 + }, + { + "epoch": 1.229095458984375e-05, + "model_forward_time": 0.025293827056884766, + "step": 8055 + }, + { + "epoch": 1.229095458984375e-05, + "step": 8055, + "training_step_time": 0.13673615455627441 + }, + { + "epoch": 1.229248046875e-05, + "model_forward_time": 0.02505016326904297, + "step": 8056 + }, + { + "epoch": 1.229248046875e-05, + "step": 8056, + "training_step_time": 0.12302613258361816 + }, + { + "epoch": 1.229400634765625e-05, + "model_forward_time": 0.024658679962158203, + "step": 8057 + }, + { + "epoch": 1.229400634765625e-05, + "step": 8057, + "training_step_time": 0.12527871131896973 + }, + { + "epoch": 1.22955322265625e-05, + "model_forward_time": 0.025033235549926758, + "step": 8058 + }, + { + "epoch": 1.22955322265625e-05, + "step": 8058, + "training_step_time": 0.11224842071533203 + }, + { + "epoch": 1.229705810546875e-05, + "model_forward_time": 0.025592803955078125, + "step": 8059 + }, + { + "epoch": 1.229705810546875e-05, + "step": 8059, + "training_step_time": 0.14414167404174805 + }, + { + "epoch": 1.2298583984375e-05, + "grad_norm": 0.47664836049079895, + "learning_rate": 8.74873234656908e-05, + "loss": 0.0374, + "step": 8060 + }, + { + "epoch": 1.2298583984375e-05, + "model_forward_time": 0.025231122970581055, + "step": 8060 + }, + { + "epoch": 1.2298583984375e-05, + "step": 8060, + "training_step_time": 0.13704943656921387 + }, + { + "epoch": 1.230010986328125e-05, + "model_forward_time": 0.024730443954467773, + "step": 8061 + }, + { + "epoch": 1.230010986328125e-05, + "step": 8061, + "training_step_time": 0.11207270622253418 + }, + { + "epoch": 1.23016357421875e-05, + "model_forward_time": 0.024997234344482422, + "step": 8062 + }, + { + "epoch": 1.23016357421875e-05, + "step": 8062, + "training_step_time": 0.10901474952697754 + }, + { + "epoch": 1.230316162109375e-05, + "model_forward_time": 0.02504754066467285, + "step": 8063 + }, + { + "epoch": 1.230316162109375e-05, + "step": 8063, + "training_step_time": 0.11756300926208496 + }, + { + "epoch": 1.23046875e-05, + "model_forward_time": 0.025216341018676758, + "step": 8064 + }, + { + "epoch": 1.23046875e-05, + "step": 8064, + "training_step_time": 0.11266231536865234 + }, + { + "epoch": 1.230621337890625e-05, + "model_forward_time": 0.025602102279663086, + "step": 8065 + }, + { + "epoch": 1.230621337890625e-05, + "step": 8065, + "training_step_time": 0.1920170783996582 + }, + { + "epoch": 1.23077392578125e-05, + "model_forward_time": 0.024467945098876953, + "step": 8066 + }, + { + "epoch": 1.23077392578125e-05, + "step": 8066, + "training_step_time": 0.11527323722839355 + }, + { + "epoch": 1.230926513671875e-05, + "model_forward_time": 0.024228334426879883, + "step": 8067 + }, + { + "epoch": 1.230926513671875e-05, + "step": 8067, + "training_step_time": 0.10783243179321289 + }, + { + "epoch": 1.2310791015625e-05, + "model_forward_time": 0.025952577590942383, + "step": 8068 + }, + { + "epoch": 1.2310791015625e-05, + "step": 8068, + "training_step_time": 0.11552977561950684 + }, + { + "epoch": 1.231231689453125e-05, + "model_forward_time": 0.025319814682006836, + "step": 8069 + }, + { + "epoch": 1.231231689453125e-05, + "step": 8069, + "training_step_time": 0.11143112182617188 + }, + { + "epoch": 1.23138427734375e-05, + "grad_norm": 0.5555617213249207, + "learning_rate": 8.745082927659047e-05, + "loss": 0.0395, + "step": 8070 + }, + { + "epoch": 1.23138427734375e-05, + "model_forward_time": 0.025371789932250977, + "step": 8070 + }, + { + "epoch": 1.23138427734375e-05, + "step": 8070, + "training_step_time": 0.1114654541015625 + }, + { + "epoch": 1.231536865234375e-05, + "model_forward_time": 0.02511310577392578, + "step": 8071 + }, + { + "epoch": 1.231536865234375e-05, + "step": 8071, + "training_step_time": 0.10948944091796875 + }, + { + "epoch": 1.231689453125e-05, + "model_forward_time": 0.025192737579345703, + "step": 8072 + }, + { + "epoch": 1.231689453125e-05, + "step": 8072, + "training_step_time": 0.11054372787475586 + }, + { + "epoch": 1.231842041015625e-05, + "model_forward_time": 0.02531290054321289, + "step": 8073 + }, + { + "epoch": 1.231842041015625e-05, + "step": 8073, + "training_step_time": 0.10719180107116699 + }, + { + "epoch": 1.23199462890625e-05, + "model_forward_time": 0.0253293514251709, + "step": 8074 + }, + { + "epoch": 1.23199462890625e-05, + "step": 8074, + "training_step_time": 0.11167025566101074 + }, + { + "epoch": 1.232147216796875e-05, + "model_forward_time": 0.02662825584411621, + "step": 8075 + }, + { + "epoch": 1.232147216796875e-05, + "step": 8075, + "training_step_time": 0.11392736434936523 + }, + { + "epoch": 1.2322998046875e-05, + "model_forward_time": 0.025458097457885742, + "step": 8076 + }, + { + "epoch": 1.2322998046875e-05, + "step": 8076, + "training_step_time": 0.10980010032653809 + }, + { + "epoch": 1.232452392578125e-05, + "model_forward_time": 0.02566814422607422, + "step": 8077 + }, + { + "epoch": 1.232452392578125e-05, + "step": 8077, + "training_step_time": 0.11444497108459473 + }, + { + "epoch": 1.23260498046875e-05, + "model_forward_time": 0.025586366653442383, + "step": 8078 + }, + { + "epoch": 1.23260498046875e-05, + "step": 8078, + "training_step_time": 0.1084909439086914 + }, + { + "epoch": 1.232757568359375e-05, + "model_forward_time": 0.025228261947631836, + "step": 8079 + }, + { + "epoch": 1.232757568359375e-05, + "step": 8079, + "training_step_time": 0.11124134063720703 + }, + { + "epoch": 1.23291015625e-05, + "grad_norm": 0.3775727152824402, + "learning_rate": 8.741428958120118e-05, + "loss": 0.0614, + "step": 8080 + }, + { + "epoch": 1.23291015625e-05, + "model_forward_time": 0.02603006362915039, + "step": 8080 + }, + { + "epoch": 1.23291015625e-05, + "step": 8080, + "training_step_time": 0.1084434986114502 + }, + { + "epoch": 1.233062744140625e-05, + "model_forward_time": 0.024833202362060547, + "step": 8081 + }, + { + "epoch": 1.233062744140625e-05, + "step": 8081, + "training_step_time": 0.11174130439758301 + }, + { + "epoch": 1.23321533203125e-05, + "model_forward_time": 0.025392770767211914, + "step": 8082 + }, + { + "epoch": 1.23321533203125e-05, + "step": 8082, + "training_step_time": 0.11500120162963867 + }, + { + "epoch": 1.233367919921875e-05, + "model_forward_time": 0.025792360305786133, + "step": 8083 + }, + { + "epoch": 1.233367919921875e-05, + "step": 8083, + "training_step_time": 0.11186480522155762 + }, + { + "epoch": 1.2335205078125e-05, + "model_forward_time": 0.025766849517822266, + "step": 8084 + }, + { + "epoch": 1.2335205078125e-05, + "step": 8084, + "training_step_time": 0.11236715316772461 + }, + { + "epoch": 1.233673095703125e-05, + "model_forward_time": 0.02521347999572754, + "step": 8085 + }, + { + "epoch": 1.233673095703125e-05, + "step": 8085, + "training_step_time": 0.2122054100036621 + }, + { + "epoch": 1.23382568359375e-05, + "model_forward_time": 0.025249958038330078, + "step": 8086 + }, + { + "epoch": 1.23382568359375e-05, + "step": 8086, + "training_step_time": 0.11527323722839355 + }, + { + "epoch": 1.233978271484375e-05, + "model_forward_time": 0.02473139762878418, + "step": 8087 + }, + { + "epoch": 1.233978271484375e-05, + "step": 8087, + "training_step_time": 0.11561703681945801 + }, + { + "epoch": 1.234130859375e-05, + "model_forward_time": 0.025363445281982422, + "step": 8088 + }, + { + "epoch": 1.234130859375e-05, + "step": 8088, + "training_step_time": 0.10826849937438965 + }, + { + "epoch": 1.234283447265625e-05, + "model_forward_time": 0.02518630027770996, + "step": 8089 + }, + { + "epoch": 1.234283447265625e-05, + "step": 8089, + "training_step_time": 0.1096034049987793 + }, + { + "epoch": 1.23443603515625e-05, + "grad_norm": 0.6591735482215881, + "learning_rate": 8.737770442392212e-05, + "loss": 0.0452, + "step": 8090 + }, + { + "epoch": 1.23443603515625e-05, + "model_forward_time": 0.02527308464050293, + "step": 8090 + }, + { + "epoch": 1.23443603515625e-05, + "step": 8090, + "training_step_time": 0.10939669609069824 + }, + { + "epoch": 1.234588623046875e-05, + "model_forward_time": 0.02529311180114746, + "step": 8091 + }, + { + "epoch": 1.234588623046875e-05, + "step": 8091, + "training_step_time": 0.11006808280944824 + }, + { + "epoch": 1.2347412109375e-05, + "model_forward_time": 0.025275707244873047, + "step": 8092 + }, + { + "epoch": 1.2347412109375e-05, + "step": 8092, + "training_step_time": 0.1374979019165039 + }, + { + "epoch": 1.234893798828125e-05, + "model_forward_time": 0.02574324607849121, + "step": 8093 + }, + { + "epoch": 1.234893798828125e-05, + "step": 8093, + "training_step_time": 0.11505842208862305 + }, + { + "epoch": 1.23504638671875e-05, + "model_forward_time": 0.025410890579223633, + "step": 8094 + }, + { + "epoch": 1.23504638671875e-05, + "step": 8094, + "training_step_time": 0.22623395919799805 + }, + { + "epoch": 1.235198974609375e-05, + "model_forward_time": 0.024717092514038086, + "step": 8095 + }, + { + "epoch": 1.235198974609375e-05, + "step": 8095, + "training_step_time": 0.13234162330627441 + }, + { + "epoch": 1.2353515625e-05, + "model_forward_time": 0.0240933895111084, + "step": 8096 + }, + { + "epoch": 1.2353515625e-05, + "step": 8096, + "training_step_time": 0.11337447166442871 + }, + { + "epoch": 1.235504150390625e-05, + "model_forward_time": 0.025170087814331055, + "step": 8097 + }, + { + "epoch": 1.235504150390625e-05, + "step": 8097, + "training_step_time": 0.11403489112854004 + }, + { + "epoch": 1.23565673828125e-05, + "model_forward_time": 0.025411128997802734, + "step": 8098 + }, + { + "epoch": 1.23565673828125e-05, + "step": 8098, + "training_step_time": 0.1303086280822754 + }, + { + "epoch": 1.235809326171875e-05, + "model_forward_time": 0.02536606788635254, + "step": 8099 + }, + { + "epoch": 1.235809326171875e-05, + "step": 8099, + "training_step_time": 0.12517261505126953 + }, + { + "epoch": 1.2359619140625e-05, + "grad_norm": 0.6788386106491089, + "learning_rate": 8.73410738492077e-05, + "loss": 0.047, + "step": 8100 + }, + { + "epoch": 1.2359619140625e-05, + "model_forward_time": 0.025327205657958984, + "step": 8100 + }, + { + "epoch": 1.2359619140625e-05, + "step": 8100, + "training_step_time": 0.11152124404907227 + }, + { + "epoch": 1.236114501953125e-05, + "model_forward_time": 0.025552988052368164, + "step": 8101 + }, + { + "epoch": 1.236114501953125e-05, + "step": 8101, + "training_step_time": 0.11388063430786133 + }, + { + "epoch": 1.23626708984375e-05, + "model_forward_time": 0.02545332908630371, + "step": 8102 + }, + { + "epoch": 1.23626708984375e-05, + "step": 8102, + "training_step_time": 0.10773468017578125 + }, + { + "epoch": 1.236419677734375e-05, + "model_forward_time": 0.02523183822631836, + "step": 8103 + }, + { + "epoch": 1.236419677734375e-05, + "step": 8103, + "training_step_time": 0.11584734916687012 + }, + { + "epoch": 1.236572265625e-05, + "model_forward_time": 0.024899721145629883, + "step": 8104 + }, + { + "epoch": 1.236572265625e-05, + "step": 8104, + "training_step_time": 0.15821528434753418 + }, + { + "epoch": 1.236724853515625e-05, + "model_forward_time": 0.02474188804626465, + "step": 8105 + }, + { + "epoch": 1.236724853515625e-05, + "step": 8105, + "training_step_time": 0.15355396270751953 + }, + { + "epoch": 1.23687744140625e-05, + "model_forward_time": 0.024437904357910156, + "step": 8106 + }, + { + "epoch": 1.23687744140625e-05, + "step": 8106, + "training_step_time": 0.16364073753356934 + }, + { + "epoch": 1.237030029296875e-05, + "model_forward_time": 0.02473592758178711, + "step": 8107 + }, + { + "epoch": 1.237030029296875e-05, + "step": 8107, + "training_step_time": 0.18017339706420898 + }, + { + "epoch": 1.2371826171875e-05, + "model_forward_time": 0.024404048919677734, + "step": 8108 + }, + { + "epoch": 1.2371826171875e-05, + "step": 8108, + "training_step_time": 0.14826321601867676 + }, + { + "epoch": 1.237335205078125e-05, + "model_forward_time": 0.02372574806213379, + "step": 8109 + }, + { + "epoch": 1.237335205078125e-05, + "step": 8109, + "training_step_time": 0.1697988510131836 + }, + { + "epoch": 1.23748779296875e-05, + "grad_norm": 0.8109952211380005, + "learning_rate": 8.730439790156752e-05, + "loss": 0.055, + "step": 8110 + }, + { + "epoch": 1.23748779296875e-05, + "model_forward_time": 0.025094032287597656, + "step": 8110 + }, + { + "epoch": 1.23748779296875e-05, + "step": 8110, + "training_step_time": 0.13260126113891602 + }, + { + "epoch": 1.237640380859375e-05, + "model_forward_time": 0.023451805114746094, + "step": 8111 + }, + { + "epoch": 1.237640380859375e-05, + "step": 8111, + "training_step_time": 0.12503528594970703 + }, + { + "epoch": 1.23779296875e-05, + "model_forward_time": 0.02404928207397461, + "step": 8112 + }, + { + "epoch": 1.23779296875e-05, + "step": 8112, + "training_step_time": 0.1221613883972168 + }, + { + "epoch": 1.237945556640625e-05, + "model_forward_time": 0.024060726165771484, + "step": 8113 + }, + { + "epoch": 1.237945556640625e-05, + "step": 8113, + "training_step_time": 0.11781787872314453 + }, + { + "epoch": 1.23809814453125e-05, + "model_forward_time": 0.024256467819213867, + "step": 8114 + }, + { + "epoch": 1.23809814453125e-05, + "step": 8114, + "training_step_time": 0.11264300346374512 + }, + { + "epoch": 1.238250732421875e-05, + "model_forward_time": 0.023929834365844727, + "step": 8115 + }, + { + "epoch": 1.238250732421875e-05, + "step": 8115, + "training_step_time": 0.11896085739135742 + }, + { + "epoch": 1.2384033203125e-05, + "model_forward_time": 0.026053905487060547, + "step": 8116 + }, + { + "epoch": 1.2384033203125e-05, + "step": 8116, + "training_step_time": 0.11521601676940918 + }, + { + "epoch": 1.238555908203125e-05, + "model_forward_time": 0.02526688575744629, + "step": 8117 + }, + { + "epoch": 1.238555908203125e-05, + "step": 8117, + "training_step_time": 0.11238217353820801 + }, + { + "epoch": 1.23870849609375e-05, + "model_forward_time": 0.02555108070373535, + "step": 8118 + }, + { + "epoch": 1.23870849609375e-05, + "step": 8118, + "training_step_time": 0.11261415481567383 + }, + { + "epoch": 1.238861083984375e-05, + "model_forward_time": 0.025472640991210938, + "step": 8119 + }, + { + "epoch": 1.238861083984375e-05, + "step": 8119, + "training_step_time": 0.11106419563293457 + }, + { + "epoch": 1.239013671875e-05, + "grad_norm": 0.28319039940834045, + "learning_rate": 8.72676766255663e-05, + "loss": 0.0446, + "step": 8120 + }, + { + "epoch": 1.239013671875e-05, + "model_forward_time": 0.02457404136657715, + "step": 8120 + }, + { + "epoch": 1.239013671875e-05, + "step": 8120, + "training_step_time": 0.10783815383911133 + }, + { + "epoch": 1.239166259765625e-05, + "model_forward_time": 0.024559736251831055, + "step": 8121 + }, + { + "epoch": 1.239166259765625e-05, + "step": 8121, + "training_step_time": 0.10813045501708984 + }, + { + "epoch": 1.23931884765625e-05, + "model_forward_time": 0.02554011344909668, + "step": 8122 + }, + { + "epoch": 1.23931884765625e-05, + "step": 8122, + "training_step_time": 0.10766816139221191 + }, + { + "epoch": 1.239471435546875e-05, + "model_forward_time": 0.025222301483154297, + "step": 8123 + }, + { + "epoch": 1.239471435546875e-05, + "step": 8123, + "training_step_time": 0.10807418823242188 + }, + { + "epoch": 1.2396240234375e-05, + "model_forward_time": 0.02599811553955078, + "step": 8124 + }, + { + "epoch": 1.2396240234375e-05, + "step": 8124, + "training_step_time": 0.11099028587341309 + }, + { + "epoch": 1.239776611328125e-05, + "model_forward_time": 0.02555370330810547, + "step": 8125 + }, + { + "epoch": 1.239776611328125e-05, + "step": 8125, + "training_step_time": 0.10835576057434082 + }, + { + "epoch": 1.23992919921875e-05, + "model_forward_time": 0.025142431259155273, + "step": 8126 + }, + { + "epoch": 1.23992919921875e-05, + "step": 8126, + "training_step_time": 0.10769772529602051 + }, + { + "epoch": 1.240081787109375e-05, + "model_forward_time": 0.02531909942626953, + "step": 8127 + }, + { + "epoch": 1.240081787109375e-05, + "step": 8127, + "training_step_time": 0.10702157020568848 + }, + { + "epoch": 1.240234375e-05, + "model_forward_time": 0.02565932273864746, + "step": 8128 + }, + { + "epoch": 1.240234375e-05, + "step": 8128, + "training_step_time": 0.11246895790100098 + }, + { + "epoch": 1.240386962890625e-05, + "model_forward_time": 0.02562236785888672, + "step": 8129 + }, + { + "epoch": 1.240386962890625e-05, + "step": 8129, + "training_step_time": 0.11068892478942871 + }, + { + "epoch": 1.24053955078125e-05, + "grad_norm": 0.7070178389549255, + "learning_rate": 8.723091006582389e-05, + "loss": 0.0599, + "step": 8130 + }, + { + "epoch": 1.24053955078125e-05, + "model_forward_time": 0.02548050880432129, + "step": 8130 + }, + { + "epoch": 1.24053955078125e-05, + "step": 8130, + "training_step_time": 0.21427464485168457 + }, + { + "epoch": 1.240692138671875e-05, + "model_forward_time": 0.025106430053710938, + "step": 8131 + }, + { + "epoch": 1.240692138671875e-05, + "step": 8131, + "training_step_time": 0.12172651290893555 + }, + { + "epoch": 1.2408447265625e-05, + "model_forward_time": 0.024637699127197266, + "step": 8132 + }, + { + "epoch": 1.2408447265625e-05, + "step": 8132, + "training_step_time": 0.10773253440856934 + }, + { + "epoch": 1.240997314453125e-05, + "model_forward_time": 0.025692224502563477, + "step": 8133 + }, + { + "epoch": 1.240997314453125e-05, + "step": 8133, + "training_step_time": 0.11088895797729492 + }, + { + "epoch": 1.24114990234375e-05, + "model_forward_time": 0.0256502628326416, + "step": 8134 + }, + { + "epoch": 1.24114990234375e-05, + "step": 8134, + "training_step_time": 0.10803699493408203 + }, + { + "epoch": 1.241302490234375e-05, + "model_forward_time": 0.0263674259185791, + "step": 8135 + }, + { + "epoch": 1.241302490234375e-05, + "step": 8135, + "training_step_time": 0.10741662979125977 + }, + { + "epoch": 1.241455078125e-05, + "model_forward_time": 0.025513410568237305, + "step": 8136 + }, + { + "epoch": 1.241455078125e-05, + "step": 8136, + "training_step_time": 0.10782289505004883 + }, + { + "epoch": 1.241607666015625e-05, + "model_forward_time": 0.025393009185791016, + "step": 8137 + }, + { + "epoch": 1.241607666015625e-05, + "step": 8137, + "training_step_time": 0.13406801223754883 + }, + { + "epoch": 1.24176025390625e-05, + "model_forward_time": 0.02581310272216797, + "step": 8138 + }, + { + "epoch": 1.24176025390625e-05, + "step": 8138, + "training_step_time": 0.10785317420959473 + }, + { + "epoch": 1.241912841796875e-05, + "model_forward_time": 0.025168418884277344, + "step": 8139 + }, + { + "epoch": 1.241912841796875e-05, + "step": 8139, + "training_step_time": 0.1128695011138916 + }, + { + "epoch": 1.2420654296875e-05, + "grad_norm": 0.4179186522960663, + "learning_rate": 8.719409826701508e-05, + "loss": 0.0562, + "step": 8140 + }, + { + "epoch": 1.2420654296875e-05, + "model_forward_time": 0.025157451629638672, + "step": 8140 + }, + { + "epoch": 1.2420654296875e-05, + "step": 8140, + "training_step_time": 0.12705111503601074 + }, + { + "epoch": 1.242218017578125e-05, + "model_forward_time": 0.025702953338623047, + "step": 8141 + }, + { + "epoch": 1.242218017578125e-05, + "step": 8141, + "training_step_time": 0.13133645057678223 + }, + { + "epoch": 1.24237060546875e-05, + "model_forward_time": 0.025308609008789062, + "step": 8142 + }, + { + "epoch": 1.24237060546875e-05, + "step": 8142, + "training_step_time": 0.11477875709533691 + }, + { + "epoch": 1.242523193359375e-05, + "model_forward_time": 0.025319337844848633, + "step": 8143 + }, + { + "epoch": 1.242523193359375e-05, + "step": 8143, + "training_step_time": 0.11365652084350586 + }, + { + "epoch": 1.24267578125e-05, + "model_forward_time": 0.02421736717224121, + "step": 8144 + }, + { + "epoch": 1.24267578125e-05, + "step": 8144, + "training_step_time": 0.11259293556213379 + }, + { + "epoch": 1.242828369140625e-05, + "model_forward_time": 0.025304079055786133, + "step": 8145 + }, + { + "epoch": 1.242828369140625e-05, + "step": 8145, + "training_step_time": 0.19833993911743164 + }, + { + "epoch": 1.24298095703125e-05, + "model_forward_time": 0.024834632873535156, + "step": 8146 + }, + { + "epoch": 1.24298095703125e-05, + "step": 8146, + "training_step_time": 0.11335206031799316 + }, + { + "epoch": 1.243133544921875e-05, + "model_forward_time": 0.024581432342529297, + "step": 8147 + }, + { + "epoch": 1.243133544921875e-05, + "step": 8147, + "training_step_time": 0.13666725158691406 + }, + { + "epoch": 1.2432861328125e-05, + "model_forward_time": 0.025397777557373047, + "step": 8148 + }, + { + "epoch": 1.2432861328125e-05, + "step": 8148, + "training_step_time": 0.11576175689697266 + }, + { + "epoch": 1.243438720703125e-05, + "model_forward_time": 0.025118589401245117, + "step": 8149 + }, + { + "epoch": 1.243438720703125e-05, + "step": 8149, + "training_step_time": 0.16438603401184082 + }, + { + "epoch": 1.24359130859375e-05, + "grad_norm": 0.5981489419937134, + "learning_rate": 8.715724127386972e-05, + "loss": 0.0463, + "step": 8150 + }, + { + "epoch": 1.24359130859375e-05, + "model_forward_time": 0.025103330612182617, + "step": 8150 + }, + { + "epoch": 1.24359130859375e-05, + "step": 8150, + "training_step_time": 0.1981360912322998 + }, + { + "epoch": 1.243743896484375e-05, + "model_forward_time": 0.027241945266723633, + "step": 8151 + }, + { + "epoch": 1.243743896484375e-05, + "step": 8151, + "training_step_time": 0.10811877250671387 + }, + { + "epoch": 1.243896484375e-05, + "model_forward_time": 0.02480626106262207, + "step": 8152 + }, + { + "epoch": 1.243896484375e-05, + "step": 8152, + "training_step_time": 0.12440848350524902 + }, + { + "epoch": 1.244049072265625e-05, + "model_forward_time": 0.025801658630371094, + "step": 8153 + }, + { + "epoch": 1.244049072265625e-05, + "step": 8153, + "training_step_time": 0.11080479621887207 + }, + { + "epoch": 1.24420166015625e-05, + "model_forward_time": 0.025527000427246094, + "step": 8154 + }, + { + "epoch": 1.24420166015625e-05, + "step": 8154, + "training_step_time": 0.10692644119262695 + }, + { + "epoch": 1.244354248046875e-05, + "model_forward_time": 0.025470495223999023, + "step": 8155 + }, + { + "epoch": 1.244354248046875e-05, + "step": 8155, + "training_step_time": 0.19858312606811523 + }, + { + "epoch": 1.2445068359375e-05, + "model_forward_time": 0.024739980697631836, + "step": 8156 + }, + { + "epoch": 1.2445068359375e-05, + "step": 8156, + "training_step_time": 0.10601687431335449 + }, + { + "epoch": 1.244659423828125e-05, + "model_forward_time": 0.024831295013427734, + "step": 8157 + }, + { + "epoch": 1.244659423828125e-05, + "step": 8157, + "training_step_time": 0.10599398612976074 + }, + { + "epoch": 1.24481201171875e-05, + "model_forward_time": 0.025536060333251953, + "step": 8158 + }, + { + "epoch": 1.24481201171875e-05, + "step": 8158, + "training_step_time": 0.10997915267944336 + }, + { + "epoch": 1.244964599609375e-05, + "model_forward_time": 0.025259971618652344, + "step": 8159 + }, + { + "epoch": 1.244964599609375e-05, + "step": 8159, + "training_step_time": 0.10826468467712402 + }, + { + "epoch": 1.2451171875e-05, + "grad_norm": 0.704897940158844, + "learning_rate": 8.71203391311725e-05, + "loss": 0.0522, + "step": 8160 + }, + { + "epoch": 1.2451171875e-05, + "model_forward_time": 0.0256044864654541, + "step": 8160 + }, + { + "epoch": 1.2451171875e-05, + "step": 8160, + "training_step_time": 0.11163139343261719 + }, + { + "epoch": 1.245269775390625e-05, + "model_forward_time": 0.025558948516845703, + "step": 8161 + }, + { + "epoch": 1.245269775390625e-05, + "step": 8161, + "training_step_time": 0.10732913017272949 + }, + { + "epoch": 1.24542236328125e-05, + "model_forward_time": 0.025450468063354492, + "step": 8162 + }, + { + "epoch": 1.24542236328125e-05, + "step": 8162, + "training_step_time": 0.10640478134155273 + }, + { + "epoch": 1.245574951171875e-05, + "model_forward_time": 0.02585744857788086, + "step": 8163 + }, + { + "epoch": 1.245574951171875e-05, + "step": 8163, + "training_step_time": 0.11344099044799805 + }, + { + "epoch": 1.2457275390625e-05, + "model_forward_time": 0.025229454040527344, + "step": 8164 + }, + { + "epoch": 1.2457275390625e-05, + "step": 8164, + "training_step_time": 0.10651540756225586 + }, + { + "epoch": 1.245880126953125e-05, + "model_forward_time": 0.02511882781982422, + "step": 8165 + }, + { + "epoch": 1.245880126953125e-05, + "step": 8165, + "training_step_time": 0.10824346542358398 + }, + { + "epoch": 1.24603271484375e-05, + "model_forward_time": 0.025422334671020508, + "step": 8166 + }, + { + "epoch": 1.24603271484375e-05, + "step": 8166, + "training_step_time": 0.10669851303100586 + }, + { + "epoch": 1.246185302734375e-05, + "model_forward_time": 0.025738239288330078, + "step": 8167 + }, + { + "epoch": 1.246185302734375e-05, + "step": 8167, + "training_step_time": 0.10750603675842285 + }, + { + "epoch": 1.246337890625e-05, + "model_forward_time": 0.02566385269165039, + "step": 8168 + }, + { + "epoch": 1.246337890625e-05, + "step": 8168, + "training_step_time": 0.1098947525024414 + }, + { + "epoch": 1.246490478515625e-05, + "model_forward_time": 0.025243520736694336, + "step": 8169 + }, + { + "epoch": 1.246490478515625e-05, + "step": 8169, + "training_step_time": 0.11076760292053223 + }, + { + "epoch": 1.24664306640625e-05, + "grad_norm": 0.17834721505641937, + "learning_rate": 8.708339188376302e-05, + "loss": 0.0466, + "step": 8170 + }, + { + "epoch": 1.24664306640625e-05, + "model_forward_time": 0.025246143341064453, + "step": 8170 + }, + { + "epoch": 1.24664306640625e-05, + "step": 8170, + "training_step_time": 0.10639619827270508 + }, + { + "epoch": 1.246795654296875e-05, + "model_forward_time": 0.02529597282409668, + "step": 8171 + }, + { + "epoch": 1.246795654296875e-05, + "step": 8171, + "training_step_time": 0.11070513725280762 + }, + { + "epoch": 1.2469482421875e-05, + "model_forward_time": 0.026114940643310547, + "step": 8172 + }, + { + "epoch": 1.2469482421875e-05, + "step": 8172, + "training_step_time": 0.10739803314208984 + }, + { + "epoch": 1.247100830078125e-05, + "model_forward_time": 0.027096033096313477, + "step": 8173 + }, + { + "epoch": 1.247100830078125e-05, + "step": 8173, + "training_step_time": 0.11214208602905273 + }, + { + "epoch": 1.24725341796875e-05, + "model_forward_time": 0.02624344825744629, + "step": 8174 + }, + { + "epoch": 1.24725341796875e-05, + "step": 8174, + "training_step_time": 0.1440715789794922 + }, + { + "epoch": 1.247406005859375e-05, + "model_forward_time": 0.025405168533325195, + "step": 8175 + }, + { + "epoch": 1.247406005859375e-05, + "step": 8175, + "training_step_time": 0.19591522216796875 + }, + { + "epoch": 1.24755859375e-05, + "model_forward_time": 0.024134159088134766, + "step": 8176 + }, + { + "epoch": 1.24755859375e-05, + "step": 8176, + "training_step_time": 0.15391206741333008 + }, + { + "epoch": 1.247711181640625e-05, + "model_forward_time": 0.025673627853393555, + "step": 8177 + }, + { + "epoch": 1.247711181640625e-05, + "step": 8177, + "training_step_time": 0.14055562019348145 + }, + { + "epoch": 1.24786376953125e-05, + "model_forward_time": 0.025114774703979492, + "step": 8178 + }, + { + "epoch": 1.24786376953125e-05, + "step": 8178, + "training_step_time": 0.13091659545898438 + }, + { + "epoch": 1.248016357421875e-05, + "model_forward_time": 0.02469491958618164, + "step": 8179 + }, + { + "epoch": 1.248016357421875e-05, + "step": 8179, + "training_step_time": 0.12354660034179688 + }, + { + "epoch": 1.2481689453125e-05, + "grad_norm": 0.37783750891685486, + "learning_rate": 8.704639957653567e-05, + "loss": 0.0421, + "step": 8180 + }, + { + "epoch": 1.2481689453125e-05, + "model_forward_time": 0.02525472640991211, + "step": 8180 + }, + { + "epoch": 1.2481689453125e-05, + "step": 8180, + "training_step_time": 0.11915230751037598 + }, + { + "epoch": 1.248321533203125e-05, + "model_forward_time": 0.025423765182495117, + "step": 8181 + }, + { + "epoch": 1.248321533203125e-05, + "step": 8181, + "training_step_time": 0.11789155006408691 + }, + { + "epoch": 1.24847412109375e-05, + "model_forward_time": 0.02539658546447754, + "step": 8182 + }, + { + "epoch": 1.24847412109375e-05, + "step": 8182, + "training_step_time": 0.15716838836669922 + }, + { + "epoch": 1.248626708984375e-05, + "model_forward_time": 0.0246734619140625, + "step": 8183 + }, + { + "epoch": 1.248626708984375e-05, + "step": 8183, + "training_step_time": 0.11026191711425781 + }, + { + "epoch": 1.248779296875e-05, + "model_forward_time": 0.025103092193603516, + "step": 8184 + }, + { + "epoch": 1.248779296875e-05, + "step": 8184, + "training_step_time": 0.11174130439758301 + }, + { + "epoch": 1.248931884765625e-05, + "model_forward_time": 0.02518439292907715, + "step": 8185 + }, + { + "epoch": 1.248931884765625e-05, + "step": 8185, + "training_step_time": 0.11531519889831543 + }, + { + "epoch": 1.24908447265625e-05, + "model_forward_time": 0.02520608901977539, + "step": 8186 + }, + { + "epoch": 1.24908447265625e-05, + "step": 8186, + "training_step_time": 0.1285996437072754 + }, + { + "epoch": 1.249237060546875e-05, + "model_forward_time": 0.02532052993774414, + "step": 8187 + }, + { + "epoch": 1.249237060546875e-05, + "step": 8187, + "training_step_time": 0.11246442794799805 + }, + { + "epoch": 1.2493896484375e-05, + "model_forward_time": 0.025151491165161133, + "step": 8188 + }, + { + "epoch": 1.2493896484375e-05, + "step": 8188, + "training_step_time": 0.12714242935180664 + }, + { + "epoch": 1.249542236328125e-05, + "model_forward_time": 0.025516033172607422, + "step": 8189 + }, + { + "epoch": 1.249542236328125e-05, + "step": 8189, + "training_step_time": 0.11196565628051758 + }, + { + "epoch": 1.24969482421875e-05, + "grad_norm": 0.38588637113571167, + "learning_rate": 8.700936225443959e-05, + "loss": 0.0417, + "step": 8190 + }, + { + "epoch": 1.24969482421875e-05, + "model_forward_time": 0.025374889373779297, + "step": 8190 + }, + { + "epoch": 1.24969482421875e-05, + "step": 8190, + "training_step_time": 0.21080279350280762 + }, + { + "epoch": 1.249847412109375e-05, + "model_forward_time": 0.024503469467163086, + "step": 8191 + }, + { + "epoch": 1.249847412109375e-05, + "step": 8191, + "training_step_time": 0.11725592613220215 + }, + { + "epoch": 1.25e-05, + "model_forward_time": 0.024637222290039062, + "step": 8192 + }, + { + "epoch": 1.25e-05, + "step": 8192, + "training_step_time": 0.14208698272705078 + }, + { + "epoch": 1.250152587890625e-05, + "model_forward_time": 0.024822235107421875, + "step": 8193 + }, + { + "epoch": 1.250152587890625e-05, + "step": 8193, + "training_step_time": 0.14830398559570312 + }, + { + "epoch": 1.25030517578125e-05, + "model_forward_time": 0.02435922622680664, + "step": 8194 + }, + { + "epoch": 1.25030517578125e-05, + "step": 8194, + "training_step_time": 0.21439456939697266 + }, + { + "epoch": 1.250457763671875e-05, + "model_forward_time": 0.02557516098022461, + "step": 8195 + }, + { + "epoch": 1.250457763671875e-05, + "step": 8195, + "training_step_time": 0.11522245407104492 + }, + { + "epoch": 1.2506103515625e-05, + "model_forward_time": 0.024249792098999023, + "step": 8196 + }, + { + "epoch": 1.2506103515625e-05, + "step": 8196, + "training_step_time": 0.10521292686462402 + }, + { + "epoch": 1.250762939453125e-05, + "model_forward_time": 0.025287389755249023, + "step": 8197 + }, + { + "epoch": 1.250762939453125e-05, + "step": 8197, + "training_step_time": 0.11505365371704102 + }, + { + "epoch": 1.25091552734375e-05, + "model_forward_time": 0.025240421295166016, + "step": 8198 + }, + { + "epoch": 1.25091552734375e-05, + "step": 8198, + "training_step_time": 0.11180973052978516 + }, + { + "epoch": 1.251068115234375e-05, + "model_forward_time": 0.025684833526611328, + "step": 8199 + }, + { + "epoch": 1.251068115234375e-05, + "step": 8199, + "training_step_time": 0.11319279670715332 + }, + { + "epoch": 1.251220703125e-05, + "grad_norm": 0.7702202796936035, + "learning_rate": 8.697227996247861e-05, + "loss": 0.0425, + "step": 8200 + }, + { + "epoch": 1.251220703125e-05, + "model_forward_time": 0.024726152420043945, + "step": 8200 + }, + { + "epoch": 1.251220703125e-05, + "step": 8200, + "training_step_time": 0.18407177925109863 + }, + { + "epoch": 1.251373291015625e-05, + "model_forward_time": 0.024876832962036133, + "step": 8201 + }, + { + "epoch": 1.251373291015625e-05, + "step": 8201, + "training_step_time": 0.10600090026855469 + }, + { + "epoch": 1.25152587890625e-05, + "model_forward_time": 0.024321317672729492, + "step": 8202 + }, + { + "epoch": 1.25152587890625e-05, + "step": 8202, + "training_step_time": 0.1080012321472168 + }, + { + "epoch": 1.251678466796875e-05, + "model_forward_time": 0.02572941780090332, + "step": 8203 + }, + { + "epoch": 1.251678466796875e-05, + "step": 8203, + "training_step_time": 0.11006355285644531 + }, + { + "epoch": 1.2518310546875e-05, + "model_forward_time": 0.024849414825439453, + "step": 8204 + }, + { + "epoch": 1.2518310546875e-05, + "step": 8204, + "training_step_time": 0.11139583587646484 + }, + { + "epoch": 1.251983642578125e-05, + "model_forward_time": 0.02514791488647461, + "step": 8205 + }, + { + "epoch": 1.251983642578125e-05, + "step": 8205, + "training_step_time": 0.18909645080566406 + }, + { + "epoch": 1.25213623046875e-05, + "model_forward_time": 0.024007081985473633, + "step": 8206 + }, + { + "epoch": 1.25213623046875e-05, + "step": 8206, + "training_step_time": 0.19478559494018555 + }, + { + "epoch": 1.252288818359375e-05, + "model_forward_time": 0.023978471755981445, + "step": 8207 + }, + { + "epoch": 1.252288818359375e-05, + "step": 8207, + "training_step_time": 0.18808794021606445 + }, + { + "epoch": 1.25244140625e-05, + "model_forward_time": 0.024257659912109375, + "step": 8208 + }, + { + "epoch": 1.25244140625e-05, + "step": 8208, + "training_step_time": 0.17804312705993652 + }, + { + "epoch": 1.252593994140625e-05, + "model_forward_time": 0.0243074893951416, + "step": 8209 + }, + { + "epoch": 1.252593994140625e-05, + "step": 8209, + "training_step_time": 0.16118907928466797 + }, + { + "epoch": 1.25274658203125e-05, + "grad_norm": 0.39683008193969727, + "learning_rate": 8.693515274571123e-05, + "loss": 0.0297, + "step": 8210 + }, + { + "epoch": 1.25274658203125e-05, + "model_forward_time": 0.024426698684692383, + "step": 8210 + }, + { + "epoch": 1.25274658203125e-05, + "step": 8210, + "training_step_time": 0.107757568359375 + }, + { + "epoch": 1.252899169921875e-05, + "model_forward_time": 0.02456212043762207, + "step": 8211 + }, + { + "epoch": 1.252899169921875e-05, + "step": 8211, + "training_step_time": 0.10657000541687012 + }, + { + "epoch": 1.2530517578125e-05, + "model_forward_time": 0.02526235580444336, + "step": 8212 + }, + { + "epoch": 1.2530517578125e-05, + "step": 8212, + "training_step_time": 0.11682629585266113 + }, + { + "epoch": 1.253204345703125e-05, + "model_forward_time": 0.02599048614501953, + "step": 8213 + }, + { + "epoch": 1.253204345703125e-05, + "step": 8213, + "training_step_time": 0.10712933540344238 + }, + { + "epoch": 1.25335693359375e-05, + "model_forward_time": 0.025165319442749023, + "step": 8214 + }, + { + "epoch": 1.25335693359375e-05, + "step": 8214, + "training_step_time": 0.21603631973266602 + }, + { + "epoch": 1.253509521484375e-05, + "model_forward_time": 0.02450108528137207, + "step": 8215 + }, + { + "epoch": 1.253509521484375e-05, + "step": 8215, + "training_step_time": 0.11315226554870605 + }, + { + "epoch": 1.253662109375e-05, + "model_forward_time": 0.024524688720703125, + "step": 8216 + }, + { + "epoch": 1.253662109375e-05, + "step": 8216, + "training_step_time": 0.11068987846374512 + }, + { + "epoch": 1.253814697265625e-05, + "model_forward_time": 0.025288820266723633, + "step": 8217 + }, + { + "epoch": 1.253814697265625e-05, + "step": 8217, + "training_step_time": 0.17493104934692383 + }, + { + "epoch": 1.25396728515625e-05, + "model_forward_time": 0.024574995040893555, + "step": 8218 + }, + { + "epoch": 1.25396728515625e-05, + "step": 8218, + "training_step_time": 0.16621136665344238 + }, + { + "epoch": 1.254119873046875e-05, + "model_forward_time": 0.02475452423095703, + "step": 8219 + }, + { + "epoch": 1.254119873046875e-05, + "step": 8219, + "training_step_time": 0.10582661628723145 + }, + { + "epoch": 1.2542724609375e-05, + "grad_norm": 0.6069748997688293, + "learning_rate": 8.689798064925049e-05, + "loss": 0.039, + "step": 8220 + }, + { + "epoch": 1.2542724609375e-05, + "model_forward_time": 0.02534794807434082, + "step": 8220 + }, + { + "epoch": 1.2542724609375e-05, + "step": 8220, + "training_step_time": 0.12897920608520508 + }, + { + "epoch": 1.254425048828125e-05, + "model_forward_time": 0.025569915771484375, + "step": 8221 + }, + { + "epoch": 1.254425048828125e-05, + "step": 8221, + "training_step_time": 0.1462569236755371 + }, + { + "epoch": 1.25457763671875e-05, + "model_forward_time": 0.024427175521850586, + "step": 8222 + }, + { + "epoch": 1.25457763671875e-05, + "step": 8222, + "training_step_time": 0.13944578170776367 + }, + { + "epoch": 1.254730224609375e-05, + "model_forward_time": 0.02634596824645996, + "step": 8223 + }, + { + "epoch": 1.254730224609375e-05, + "step": 8223, + "training_step_time": 0.12539887428283691 + }, + { + "epoch": 1.2548828125e-05, + "model_forward_time": 0.029315471649169922, + "step": 8224 + }, + { + "epoch": 1.2548828125e-05, + "step": 8224, + "training_step_time": 0.12380790710449219 + }, + { + "epoch": 1.255035400390625e-05, + "model_forward_time": 0.02501201629638672, + "step": 8225 + }, + { + "epoch": 1.255035400390625e-05, + "step": 8225, + "training_step_time": 0.11406183242797852 + }, + { + "epoch": 1.25518798828125e-05, + "model_forward_time": 0.025432348251342773, + "step": 8226 + }, + { + "epoch": 1.25518798828125e-05, + "step": 8226, + "training_step_time": 0.22187042236328125 + }, + { + "epoch": 1.255340576171875e-05, + "model_forward_time": 0.024504899978637695, + "step": 8227 + }, + { + "epoch": 1.255340576171875e-05, + "step": 8227, + "training_step_time": 0.12736296653747559 + }, + { + "epoch": 1.2554931640625e-05, + "model_forward_time": 0.024104833602905273, + "step": 8228 + }, + { + "epoch": 1.2554931640625e-05, + "step": 8228, + "training_step_time": 0.11540102958679199 + }, + { + "epoch": 1.255645751953125e-05, + "model_forward_time": 0.02513885498046875, + "step": 8229 + }, + { + "epoch": 1.255645751953125e-05, + "step": 8229, + "training_step_time": 0.12282252311706543 + }, + { + "epoch": 1.25579833984375e-05, + "grad_norm": 0.31822460889816284, + "learning_rate": 8.686076371826401e-05, + "loss": 0.0414, + "step": 8230 + }, + { + "epoch": 1.25579833984375e-05, + "model_forward_time": 0.02523970603942871, + "step": 8230 + }, + { + "epoch": 1.25579833984375e-05, + "step": 8230, + "training_step_time": 0.11207962036132812 + }, + { + "epoch": 1.255950927734375e-05, + "model_forward_time": 0.02538466453552246, + "step": 8231 + }, + { + "epoch": 1.255950927734375e-05, + "step": 8231, + "training_step_time": 0.19382572174072266 + }, + { + "epoch": 1.256103515625e-05, + "model_forward_time": 0.024582862854003906, + "step": 8232 + }, + { + "epoch": 1.256103515625e-05, + "step": 8232, + "training_step_time": 0.11188554763793945 + }, + { + "epoch": 1.256256103515625e-05, + "model_forward_time": 0.02440953254699707, + "step": 8233 + }, + { + "epoch": 1.256256103515625e-05, + "step": 8233, + "training_step_time": 0.13836240768432617 + }, + { + "epoch": 1.25640869140625e-05, + "model_forward_time": 0.025472640991210938, + "step": 8234 + }, + { + "epoch": 1.25640869140625e-05, + "step": 8234, + "training_step_time": 0.15541815757751465 + }, + { + "epoch": 1.256561279296875e-05, + "model_forward_time": 0.02447366714477539, + "step": 8235 + }, + { + "epoch": 1.256561279296875e-05, + "step": 8235, + "training_step_time": 0.17270278930664062 + }, + { + "epoch": 1.2567138671875e-05, + "model_forward_time": 0.02411031723022461, + "step": 8236 + }, + { + "epoch": 1.2567138671875e-05, + "step": 8236, + "training_step_time": 0.16235613822937012 + }, + { + "epoch": 1.256866455078125e-05, + "model_forward_time": 0.024165868759155273, + "step": 8237 + }, + { + "epoch": 1.256866455078125e-05, + "step": 8237, + "training_step_time": 0.15903329849243164 + }, + { + "epoch": 1.25701904296875e-05, + "model_forward_time": 0.024572134017944336, + "step": 8238 + }, + { + "epoch": 1.25701904296875e-05, + "step": 8238, + "training_step_time": 0.15021109580993652 + }, + { + "epoch": 1.257171630859375e-05, + "model_forward_time": 0.024484872817993164, + "step": 8239 + }, + { + "epoch": 1.257171630859375e-05, + "step": 8239, + "training_step_time": 0.12743806838989258 + }, + { + "epoch": 1.25732421875e-05, + "grad_norm": 0.36488208174705505, + "learning_rate": 8.682350199797388e-05, + "loss": 0.0382, + "step": 8240 + }, + { + "epoch": 1.25732421875e-05, + "model_forward_time": 0.025040864944458008, + "step": 8240 + }, + { + "epoch": 1.25732421875e-05, + "step": 8240, + "training_step_time": 0.17049050331115723 + }, + { + "epoch": 1.257476806640625e-05, + "model_forward_time": 0.024437427520751953, + "step": 8241 + }, + { + "epoch": 1.257476806640625e-05, + "step": 8241, + "training_step_time": 0.10408997535705566 + }, + { + "epoch": 1.25762939453125e-05, + "model_forward_time": 0.024277687072753906, + "step": 8242 + }, + { + "epoch": 1.25762939453125e-05, + "step": 8242, + "training_step_time": 0.11392378807067871 + }, + { + "epoch": 1.257781982421875e-05, + "model_forward_time": 0.024971723556518555, + "step": 8243 + }, + { + "epoch": 1.257781982421875e-05, + "step": 8243, + "training_step_time": 0.10902976989746094 + }, + { + "epoch": 1.2579345703125e-05, + "model_forward_time": 0.026078224182128906, + "step": 8244 + }, + { + "epoch": 1.2579345703125e-05, + "step": 8244, + "training_step_time": 0.11269617080688477 + }, + { + "epoch": 1.258087158203125e-05, + "model_forward_time": 0.025257110595703125, + "step": 8245 + }, + { + "epoch": 1.258087158203125e-05, + "step": 8245, + "training_step_time": 0.10592198371887207 + }, + { + "epoch": 1.25823974609375e-05, + "model_forward_time": 0.025606632232666016, + "step": 8246 + }, + { + "epoch": 1.25823974609375e-05, + "step": 8246, + "training_step_time": 0.11192727088928223 + }, + { + "epoch": 1.258392333984375e-05, + "model_forward_time": 0.025290489196777344, + "step": 8247 + }, + { + "epoch": 1.258392333984375e-05, + "step": 8247, + "training_step_time": 0.10774350166320801 + }, + { + "epoch": 1.258544921875e-05, + "model_forward_time": 0.025415658950805664, + "step": 8248 + }, + { + "epoch": 1.258544921875e-05, + "step": 8248, + "training_step_time": 0.10636353492736816 + }, + { + "epoch": 1.258697509765625e-05, + "model_forward_time": 0.02469491958618164, + "step": 8249 + }, + { + "epoch": 1.258697509765625e-05, + "step": 8249, + "training_step_time": 0.10709023475646973 + }, + { + "epoch": 1.25885009765625e-05, + "grad_norm": 0.365360289812088, + "learning_rate": 8.678619553365659e-05, + "loss": 0.0381, + "step": 8250 + }, + { + "epoch": 1.25885009765625e-05, + "model_forward_time": 0.024948835372924805, + "step": 8250 + }, + { + "epoch": 1.25885009765625e-05, + "step": 8250, + "training_step_time": 0.10601091384887695 + }, + { + "epoch": 1.259002685546875e-05, + "model_forward_time": 0.025475502014160156, + "step": 8251 + }, + { + "epoch": 1.259002685546875e-05, + "step": 8251, + "training_step_time": 0.1078944206237793 + }, + { + "epoch": 1.2591552734375e-05, + "model_forward_time": 0.024945735931396484, + "step": 8252 + }, + { + "epoch": 1.2591552734375e-05, + "step": 8252, + "training_step_time": 0.10906624794006348 + }, + { + "epoch": 1.259307861328125e-05, + "model_forward_time": 0.025359392166137695, + "step": 8253 + }, + { + "epoch": 1.259307861328125e-05, + "step": 8253, + "training_step_time": 0.11066246032714844 + }, + { + "epoch": 1.25946044921875e-05, + "model_forward_time": 0.02534031867980957, + "step": 8254 + }, + { + "epoch": 1.25946044921875e-05, + "step": 8254, + "training_step_time": 0.11054682731628418 + }, + { + "epoch": 1.259613037109375e-05, + "model_forward_time": 0.02546095848083496, + "step": 8255 + }, + { + "epoch": 1.259613037109375e-05, + "step": 8255, + "training_step_time": 0.11228585243225098 + }, + { + "epoch": 1.259765625e-05, + "model_forward_time": 0.026200532913208008, + "step": 8256 + }, + { + "epoch": 1.259765625e-05, + "step": 8256, + "training_step_time": 0.10856294631958008 + }, + { + "epoch": 1.259918212890625e-05, + "model_forward_time": 0.025425434112548828, + "step": 8257 + }, + { + "epoch": 1.259918212890625e-05, + "step": 8257, + "training_step_time": 0.1676044464111328 + }, + { + "epoch": 1.26007080078125e-05, + "model_forward_time": 0.024544239044189453, + "step": 8258 + }, + { + "epoch": 1.26007080078125e-05, + "step": 8258, + "training_step_time": 0.1586611270904541 + }, + { + "epoch": 1.260223388671875e-05, + "model_forward_time": 0.024640321731567383, + "step": 8259 + }, + { + "epoch": 1.260223388671875e-05, + "step": 8259, + "training_step_time": 0.11453056335449219 + }, + { + "epoch": 1.2603759765625e-05, + "grad_norm": 0.4181400239467621, + "learning_rate": 8.674884437064302e-05, + "loss": 0.0301, + "step": 8260 + }, + { + "epoch": 1.2603759765625e-05, + "model_forward_time": 0.02446269989013672, + "step": 8260 + }, + { + "epoch": 1.2603759765625e-05, + "step": 8260, + "training_step_time": 0.21543145179748535 + }, + { + "epoch": 1.260528564453125e-05, + "model_forward_time": 0.024779796600341797, + "step": 8261 + }, + { + "epoch": 1.260528564453125e-05, + "step": 8261, + "training_step_time": 0.11783599853515625 + }, + { + "epoch": 1.26068115234375e-05, + "model_forward_time": 0.024695873260498047, + "step": 8262 + }, + { + "epoch": 1.26068115234375e-05, + "step": 8262, + "training_step_time": 0.10457754135131836 + }, + { + "epoch": 1.260833740234375e-05, + "model_forward_time": 0.02546238899230957, + "step": 8263 + }, + { + "epoch": 1.260833740234375e-05, + "step": 8263, + "training_step_time": 0.10940814018249512 + }, + { + "epoch": 1.260986328125e-05, + "model_forward_time": 0.0253298282623291, + "step": 8264 + }, + { + "epoch": 1.260986328125e-05, + "step": 8264, + "training_step_time": 0.10827374458312988 + }, + { + "epoch": 1.261138916015625e-05, + "model_forward_time": 0.025241613388061523, + "step": 8265 + }, + { + "epoch": 1.261138916015625e-05, + "step": 8265, + "training_step_time": 0.10695338249206543 + }, + { + "epoch": 1.26129150390625e-05, + "model_forward_time": 0.02699732780456543, + "step": 8266 + }, + { + "epoch": 1.26129150390625e-05, + "step": 8266, + "training_step_time": 0.10977029800415039 + }, + { + "epoch": 1.261444091796875e-05, + "model_forward_time": 0.025312185287475586, + "step": 8267 + }, + { + "epoch": 1.261444091796875e-05, + "step": 8267, + "training_step_time": 0.10727429389953613 + }, + { + "epoch": 1.2615966796875e-05, + "model_forward_time": 0.02538156509399414, + "step": 8268 + }, + { + "epoch": 1.2615966796875e-05, + "step": 8268, + "training_step_time": 0.15910005569458008 + }, + { + "epoch": 1.261749267578125e-05, + "model_forward_time": 0.024812698364257812, + "step": 8269 + }, + { + "epoch": 1.261749267578125e-05, + "step": 8269, + "training_step_time": 0.11165642738342285 + }, + { + "epoch": 1.26190185546875e-05, + "grad_norm": 0.7219143509864807, + "learning_rate": 8.671144855431833e-05, + "loss": 0.0366, + "step": 8270 + }, + { + "epoch": 1.26190185546875e-05, + "model_forward_time": 0.02467656135559082, + "step": 8270 + }, + { + "epoch": 1.26190185546875e-05, + "step": 8270, + "training_step_time": 0.10815858840942383 + }, + { + "epoch": 1.262054443359375e-05, + "model_forward_time": 0.02594470977783203, + "step": 8271 + }, + { + "epoch": 1.262054443359375e-05, + "step": 8271, + "training_step_time": 0.11873769760131836 + }, + { + "epoch": 1.26220703125e-05, + "model_forward_time": 0.025385618209838867, + "step": 8272 + }, + { + "epoch": 1.26220703125e-05, + "step": 8272, + "training_step_time": 0.13196349143981934 + }, + { + "epoch": 1.262359619140625e-05, + "model_forward_time": 0.025077342987060547, + "step": 8273 + }, + { + "epoch": 1.262359619140625e-05, + "step": 8273, + "training_step_time": 0.11383843421936035 + }, + { + "epoch": 1.26251220703125e-05, + "model_forward_time": 0.02555704116821289, + "step": 8274 + }, + { + "epoch": 1.26251220703125e-05, + "step": 8274, + "training_step_time": 0.12500810623168945 + }, + { + "epoch": 1.262664794921875e-05, + "model_forward_time": 0.025110483169555664, + "step": 8275 + }, + { + "epoch": 1.262664794921875e-05, + "step": 8275, + "training_step_time": 0.1092996597290039 + }, + { + "epoch": 1.2628173828125e-05, + "model_forward_time": 0.025484561920166016, + "step": 8276 + }, + { + "epoch": 1.2628173828125e-05, + "step": 8276, + "training_step_time": 0.17926979064941406 + }, + { + "epoch": 1.262969970703125e-05, + "model_forward_time": 0.0254061222076416, + "step": 8277 + }, + { + "epoch": 1.262969970703125e-05, + "step": 8277, + "training_step_time": 0.10682964324951172 + }, + { + "epoch": 1.26312255859375e-05, + "model_forward_time": 0.024709701538085938, + "step": 8278 + }, + { + "epoch": 1.26312255859375e-05, + "step": 8278, + "training_step_time": 0.13444924354553223 + }, + { + "epoch": 1.263275146484375e-05, + "model_forward_time": 0.02516627311706543, + "step": 8279 + }, + { + "epoch": 1.263275146484375e-05, + "step": 8279, + "training_step_time": 0.13913321495056152 + }, + { + "epoch": 1.263427734375e-05, + "grad_norm": 0.5215271711349487, + "learning_rate": 8.6674008130122e-05, + "loss": 0.0463, + "step": 8280 + }, + { + "epoch": 1.263427734375e-05, + "model_forward_time": 0.02445363998413086, + "step": 8280 + }, + { + "epoch": 1.263427734375e-05, + "step": 8280, + "training_step_time": 0.11500382423400879 + }, + { + "epoch": 1.263580322265625e-05, + "model_forward_time": 0.025279998779296875, + "step": 8281 + }, + { + "epoch": 1.263580322265625e-05, + "step": 8281, + "training_step_time": 0.1218256950378418 + }, + { + "epoch": 1.26373291015625e-05, + "model_forward_time": 0.025266647338867188, + "step": 8282 + }, + { + "epoch": 1.26373291015625e-05, + "step": 8282, + "training_step_time": 0.1891002655029297 + }, + { + "epoch": 1.263885498046875e-05, + "model_forward_time": 0.02354741096496582, + "step": 8283 + }, + { + "epoch": 1.263885498046875e-05, + "step": 8283, + "training_step_time": 0.1746351718902588 + }, + { + "epoch": 1.2640380859375e-05, + "model_forward_time": 0.02398395538330078, + "step": 8284 + }, + { + "epoch": 1.2640380859375e-05, + "step": 8284, + "training_step_time": 0.17761015892028809 + }, + { + "epoch": 1.264190673828125e-05, + "model_forward_time": 0.02431941032409668, + "step": 8285 + }, + { + "epoch": 1.264190673828125e-05, + "step": 8285, + "training_step_time": 0.10540461540222168 + }, + { + "epoch": 1.26434326171875e-05, + "model_forward_time": 0.02440500259399414, + "step": 8286 + }, + { + "epoch": 1.26434326171875e-05, + "step": 8286, + "training_step_time": 0.10448575019836426 + }, + { + "epoch": 1.264495849609375e-05, + "model_forward_time": 0.025258541107177734, + "step": 8287 + }, + { + "epoch": 1.264495849609375e-05, + "step": 8287, + "training_step_time": 0.10839462280273438 + }, + { + "epoch": 1.2646484375e-05, + "model_forward_time": 0.025036096572875977, + "step": 8288 + }, + { + "epoch": 1.2646484375e-05, + "step": 8288, + "training_step_time": 0.1069180965423584 + }, + { + "epoch": 1.264801025390625e-05, + "model_forward_time": 0.02533578872680664, + "step": 8289 + }, + { + "epoch": 1.264801025390625e-05, + "step": 8289, + "training_step_time": 0.10826706886291504 + }, + { + "epoch": 1.26495361328125e-05, + "grad_norm": 0.5088498592376709, + "learning_rate": 8.663652314354765e-05, + "loss": 0.0305, + "step": 8290 + }, + { + "epoch": 1.26495361328125e-05, + "model_forward_time": 0.02423095703125, + "step": 8290 + }, + { + "epoch": 1.26495361328125e-05, + "step": 8290, + "training_step_time": 0.10860347747802734 + }, + { + "epoch": 1.265106201171875e-05, + "model_forward_time": 0.025333881378173828, + "step": 8291 + }, + { + "epoch": 1.265106201171875e-05, + "step": 8291, + "training_step_time": 0.11097002029418945 + }, + { + "epoch": 1.2652587890625e-05, + "model_forward_time": 0.0252230167388916, + "step": 8292 + }, + { + "epoch": 1.2652587890625e-05, + "step": 8292, + "training_step_time": 0.10833239555358887 + }, + { + "epoch": 1.265411376953125e-05, + "model_forward_time": 0.024645566940307617, + "step": 8293 + }, + { + "epoch": 1.265411376953125e-05, + "step": 8293, + "training_step_time": 0.1066901683807373 + }, + { + "epoch": 1.26556396484375e-05, + "model_forward_time": 0.025157690048217773, + "step": 8294 + }, + { + "epoch": 1.26556396484375e-05, + "step": 8294, + "training_step_time": 0.1079859733581543 + }, + { + "epoch": 1.265716552734375e-05, + "model_forward_time": 0.025233745574951172, + "step": 8295 + }, + { + "epoch": 1.265716552734375e-05, + "step": 8295, + "training_step_time": 0.1087646484375 + }, + { + "epoch": 1.265869140625e-05, + "model_forward_time": 0.024991750717163086, + "step": 8296 + }, + { + "epoch": 1.265869140625e-05, + "step": 8296, + "training_step_time": 0.10718011856079102 + }, + { + "epoch": 1.266021728515625e-05, + "model_forward_time": 0.024930715560913086, + "step": 8297 + }, + { + "epoch": 1.266021728515625e-05, + "step": 8297, + "training_step_time": 0.1100461483001709 + }, + { + "epoch": 1.26617431640625e-05, + "model_forward_time": 0.02480292320251465, + "step": 8298 + }, + { + "epoch": 1.26617431640625e-05, + "step": 8298, + "training_step_time": 0.1079554557800293 + }, + { + "epoch": 1.266326904296875e-05, + "model_forward_time": 0.025150775909423828, + "step": 8299 + }, + { + "epoch": 1.266326904296875e-05, + "step": 8299, + "training_step_time": 0.10934925079345703 + }, + { + "epoch": 1.2664794921875e-05, + "grad_norm": 0.2651950716972351, + "learning_rate": 8.659899364014309e-05, + "loss": 0.0443, + "step": 8300 + }, + { + "epoch": 1.2664794921875e-05, + "model_forward_time": 0.025262117385864258, + "step": 8300 + }, + { + "epoch": 1.2664794921875e-05, + "step": 8300, + "training_step_time": 0.11060929298400879 + }, + { + "epoch": 1.266632080078125e-05, + "model_forward_time": 0.0248870849609375, + "step": 8301 + }, + { + "epoch": 1.266632080078125e-05, + "step": 8301, + "training_step_time": 0.10633325576782227 + }, + { + "epoch": 1.26678466796875e-05, + "model_forward_time": 0.025025606155395508, + "step": 8302 + }, + { + "epoch": 1.26678466796875e-05, + "step": 8302, + "training_step_time": 0.10481858253479004 + }, + { + "epoch": 1.266937255859375e-05, + "model_forward_time": 0.025299787521362305, + "step": 8303 + }, + { + "epoch": 1.266937255859375e-05, + "step": 8303, + "training_step_time": 0.1071774959564209 + }, + { + "epoch": 1.26708984375e-05, + "model_forward_time": 0.025222063064575195, + "step": 8304 + }, + { + "epoch": 1.26708984375e-05, + "step": 8304, + "training_step_time": 0.1157834529876709 + }, + { + "epoch": 1.267242431640625e-05, + "model_forward_time": 0.025131702423095703, + "step": 8305 + }, + { + "epoch": 1.267242431640625e-05, + "step": 8305, + "training_step_time": 0.1990342140197754 + }, + { + "epoch": 1.26739501953125e-05, + "model_forward_time": 0.024380922317504883, + "step": 8306 + }, + { + "epoch": 1.26739501953125e-05, + "step": 8306, + "training_step_time": 0.12038278579711914 + }, + { + "epoch": 1.267547607421875e-05, + "model_forward_time": 0.024713754653930664, + "step": 8307 + }, + { + "epoch": 1.267547607421875e-05, + "step": 8307, + "training_step_time": 0.1348867416381836 + }, + { + "epoch": 1.2677001953125e-05, + "model_forward_time": 0.024938583374023438, + "step": 8308 + }, + { + "epoch": 1.2677001953125e-05, + "step": 8308, + "training_step_time": 0.13088464736938477 + }, + { + "epoch": 1.267852783203125e-05, + "model_forward_time": 0.024430274963378906, + "step": 8309 + }, + { + "epoch": 1.267852783203125e-05, + "step": 8309, + "training_step_time": 0.13077998161315918 + }, + { + "epoch": 1.26800537109375e-05, + "grad_norm": 0.40368762612342834, + "learning_rate": 8.656141966551019e-05, + "loss": 0.0333, + "step": 8310 + }, + { + "epoch": 1.26800537109375e-05, + "model_forward_time": 0.02498149871826172, + "step": 8310 + }, + { + "epoch": 1.26800537109375e-05, + "step": 8310, + "training_step_time": 0.12154078483581543 + }, + { + "epoch": 1.268157958984375e-05, + "model_forward_time": 0.024976491928100586, + "step": 8311 + }, + { + "epoch": 1.268157958984375e-05, + "step": 8311, + "training_step_time": 0.13335514068603516 + }, + { + "epoch": 1.268310546875e-05, + "model_forward_time": 0.024996519088745117, + "step": 8312 + }, + { + "epoch": 1.268310546875e-05, + "step": 8312, + "training_step_time": 0.1266331672668457 + }, + { + "epoch": 1.268463134765625e-05, + "model_forward_time": 0.024685144424438477, + "step": 8313 + }, + { + "epoch": 1.268463134765625e-05, + "step": 8313, + "training_step_time": 0.12082147598266602 + }, + { + "epoch": 1.26861572265625e-05, + "model_forward_time": 0.025316953659057617, + "step": 8314 + }, + { + "epoch": 1.26861572265625e-05, + "step": 8314, + "training_step_time": 0.13019490242004395 + }, + { + "epoch": 1.268768310546875e-05, + "model_forward_time": 0.02454090118408203, + "step": 8315 + }, + { + "epoch": 1.268768310546875e-05, + "step": 8315, + "training_step_time": 0.11512017250061035 + }, + { + "epoch": 1.2689208984375e-05, + "model_forward_time": 0.024876832962036133, + "step": 8316 + }, + { + "epoch": 1.2689208984375e-05, + "step": 8316, + "training_step_time": 0.220841646194458 + }, + { + "epoch": 1.269073486328125e-05, + "model_forward_time": 0.024767637252807617, + "step": 8317 + }, + { + "epoch": 1.269073486328125e-05, + "step": 8317, + "training_step_time": 0.13405799865722656 + }, + { + "epoch": 1.26922607421875e-05, + "model_forward_time": 0.023891448974609375, + "step": 8318 + }, + { + "epoch": 1.26922607421875e-05, + "step": 8318, + "training_step_time": 0.10958003997802734 + }, + { + "epoch": 1.269378662109375e-05, + "model_forward_time": 0.02476024627685547, + "step": 8319 + }, + { + "epoch": 1.269378662109375e-05, + "step": 8319, + "training_step_time": 0.12910246849060059 + }, + { + "epoch": 1.26953125e-05, + "grad_norm": 0.36668530106544495, + "learning_rate": 8.652380126530488e-05, + "loss": 0.0288, + "step": 8320 + }, + { + "epoch": 1.26953125e-05, + "model_forward_time": 0.025208711624145508, + "step": 8320 + }, + { + "epoch": 1.26953125e-05, + "step": 8320, + "training_step_time": 0.23127484321594238 + }, + { + "epoch": 1.269683837890625e-05, + "model_forward_time": 0.024553537368774414, + "step": 8321 + }, + { + "epoch": 1.269683837890625e-05, + "step": 8321, + "training_step_time": 0.11568999290466309 + }, + { + "epoch": 1.26983642578125e-05, + "model_forward_time": 0.024027585983276367, + "step": 8322 + }, + { + "epoch": 1.26983642578125e-05, + "step": 8322, + "training_step_time": 0.13327670097351074 + }, + { + "epoch": 1.269989013671875e-05, + "model_forward_time": 0.02466750144958496, + "step": 8323 + }, + { + "epoch": 1.269989013671875e-05, + "step": 8323, + "training_step_time": 0.19024968147277832 + }, + { + "epoch": 1.2701416015625e-05, + "model_forward_time": 0.024016141891479492, + "step": 8324 + }, + { + "epoch": 1.2701416015625e-05, + "step": 8324, + "training_step_time": 0.20044398307800293 + }, + { + "epoch": 1.270294189453125e-05, + "model_forward_time": 0.023749828338623047, + "step": 8325 + }, + { + "epoch": 1.270294189453125e-05, + "step": 8325, + "training_step_time": 0.17215275764465332 + }, + { + "epoch": 1.27044677734375e-05, + "model_forward_time": 0.025114059448242188, + "step": 8326 + }, + { + "epoch": 1.27044677734375e-05, + "step": 8326, + "training_step_time": 0.11306548118591309 + }, + { + "epoch": 1.270599365234375e-05, + "model_forward_time": 0.024098634719848633, + "step": 8327 + }, + { + "epoch": 1.270599365234375e-05, + "step": 8327, + "training_step_time": 0.1038053035736084 + }, + { + "epoch": 1.270751953125e-05, + "model_forward_time": 0.025075197219848633, + "step": 8328 + }, + { + "epoch": 1.270751953125e-05, + "step": 8328, + "training_step_time": 0.19225502014160156 + }, + { + "epoch": 1.270904541015625e-05, + "model_forward_time": 0.025210857391357422, + "step": 8329 + }, + { + "epoch": 1.270904541015625e-05, + "step": 8329, + "training_step_time": 0.10590791702270508 + }, + { + "epoch": 1.27105712890625e-05, + "grad_norm": 0.3765827715396881, + "learning_rate": 8.648613848523707e-05, + "loss": 0.0382, + "step": 8330 + }, + { + "epoch": 1.27105712890625e-05, + "model_forward_time": 0.0246429443359375, + "step": 8330 + }, + { + "epoch": 1.27105712890625e-05, + "step": 8330, + "training_step_time": 0.10933160781860352 + }, + { + "epoch": 1.271209716796875e-05, + "model_forward_time": 0.025256633758544922, + "step": 8331 + }, + { + "epoch": 1.271209716796875e-05, + "step": 8331, + "training_step_time": 0.11239266395568848 + }, + { + "epoch": 1.2713623046875e-05, + "model_forward_time": 0.02524590492248535, + "step": 8332 + }, + { + "epoch": 1.2713623046875e-05, + "step": 8332, + "training_step_time": 0.10671377182006836 + }, + { + "epoch": 1.271514892578125e-05, + "model_forward_time": 0.025117874145507812, + "step": 8333 + }, + { + "epoch": 1.271514892578125e-05, + "step": 8333, + "training_step_time": 0.10737824440002441 + }, + { + "epoch": 1.27166748046875e-05, + "model_forward_time": 0.025689125061035156, + "step": 8334 + }, + { + "epoch": 1.27166748046875e-05, + "step": 8334, + "training_step_time": 0.10934138298034668 + }, + { + "epoch": 1.271820068359375e-05, + "model_forward_time": 0.025018692016601562, + "step": 8335 + }, + { + "epoch": 1.271820068359375e-05, + "step": 8335, + "training_step_time": 0.11110591888427734 + }, + { + "epoch": 1.27197265625e-05, + "model_forward_time": 0.025036096572875977, + "step": 8336 + }, + { + "epoch": 1.27197265625e-05, + "step": 8336, + "training_step_time": 0.10603904724121094 + }, + { + "epoch": 1.272125244140625e-05, + "model_forward_time": 0.02524876594543457, + "step": 8337 + }, + { + "epoch": 1.272125244140625e-05, + "step": 8337, + "training_step_time": 0.10672140121459961 + }, + { + "epoch": 1.27227783203125e-05, + "model_forward_time": 0.025658369064331055, + "step": 8338 + }, + { + "epoch": 1.27227783203125e-05, + "step": 8338, + "training_step_time": 0.1067667007446289 + }, + { + "epoch": 1.272430419921875e-05, + "model_forward_time": 0.02503180503845215, + "step": 8339 + }, + { + "epoch": 1.272430419921875e-05, + "step": 8339, + "training_step_time": 0.1061711311340332 + }, + { + "epoch": 1.2725830078125e-05, + "grad_norm": 0.4698609709739685, + "learning_rate": 8.644843137107059e-05, + "loss": 0.029, + "step": 8340 + }, + { + "epoch": 1.2725830078125e-05, + "model_forward_time": 0.02399420738220215, + "step": 8340 + }, + { + "epoch": 1.2725830078125e-05, + "step": 8340, + "training_step_time": 0.10959410667419434 + }, + { + "epoch": 1.272735595703125e-05, + "model_forward_time": 0.024898290634155273, + "step": 8341 + }, + { + "epoch": 1.272735595703125e-05, + "step": 8341, + "training_step_time": 0.1078944206237793 + }, + { + "epoch": 1.27288818359375e-05, + "model_forward_time": 0.023999691009521484, + "step": 8342 + }, + { + "epoch": 1.27288818359375e-05, + "step": 8342, + "training_step_time": 0.11195588111877441 + }, + { + "epoch": 1.273040771484375e-05, + "model_forward_time": 0.0266265869140625, + "step": 8343 + }, + { + "epoch": 1.273040771484375e-05, + "step": 8343, + "training_step_time": 0.11050820350646973 + }, + { + "epoch": 1.273193359375e-05, + "model_forward_time": 0.02622532844543457, + "step": 8344 + }, + { + "epoch": 1.273193359375e-05, + "step": 8344, + "training_step_time": 0.10818195343017578 + }, + { + "epoch": 1.273345947265625e-05, + "model_forward_time": 0.02520275115966797, + "step": 8345 + }, + { + "epoch": 1.273345947265625e-05, + "step": 8345, + "training_step_time": 0.10703587532043457 + }, + { + "epoch": 1.27349853515625e-05, + "model_forward_time": 0.02508831024169922, + "step": 8346 + }, + { + "epoch": 1.27349853515625e-05, + "step": 8346, + "training_step_time": 0.10923075675964355 + }, + { + "epoch": 1.273651123046875e-05, + "model_forward_time": 0.026125431060791016, + "step": 8347 + }, + { + "epoch": 1.273651123046875e-05, + "step": 8347, + "training_step_time": 0.10979175567626953 + }, + { + "epoch": 1.2738037109375e-05, + "model_forward_time": 0.025430679321289062, + "step": 8348 + }, + { + "epoch": 1.2738037109375e-05, + "step": 8348, + "training_step_time": 0.1694927215576172 + }, + { + "epoch": 1.273956298828125e-05, + "model_forward_time": 0.024853229522705078, + "step": 8349 + }, + { + "epoch": 1.273956298828125e-05, + "step": 8349, + "training_step_time": 0.1736891269683838 + }, + { + "epoch": 1.27410888671875e-05, + "grad_norm": 0.3819833993911743, + "learning_rate": 8.641067996862311e-05, + "loss": 0.0439, + "step": 8350 + }, + { + "epoch": 1.27410888671875e-05, + "model_forward_time": 0.024979591369628906, + "step": 8350 + }, + { + "epoch": 1.27410888671875e-05, + "step": 8350, + "training_step_time": 0.10771965980529785 + }, + { + "epoch": 1.274261474609375e-05, + "model_forward_time": 0.024687528610229492, + "step": 8351 + }, + { + "epoch": 1.274261474609375e-05, + "step": 8351, + "training_step_time": 0.21317529678344727 + }, + { + "epoch": 1.2744140625e-05, + "model_forward_time": 0.024732112884521484, + "step": 8352 + }, + { + "epoch": 1.2744140625e-05, + "step": 8352, + "training_step_time": 0.11307048797607422 + }, + { + "epoch": 1.274566650390625e-05, + "model_forward_time": 0.024814605712890625, + "step": 8353 + }, + { + "epoch": 1.274566650390625e-05, + "step": 8353, + "training_step_time": 0.10328817367553711 + }, + { + "epoch": 1.27471923828125e-05, + "model_forward_time": 0.025638818740844727, + "step": 8354 + }, + { + "epoch": 1.27471923828125e-05, + "step": 8354, + "training_step_time": 0.10732221603393555 + }, + { + "epoch": 1.274871826171875e-05, + "model_forward_time": 0.024724483489990234, + "step": 8355 + }, + { + "epoch": 1.274871826171875e-05, + "step": 8355, + "training_step_time": 0.10698437690734863 + }, + { + "epoch": 1.2750244140625e-05, + "model_forward_time": 0.025170564651489258, + "step": 8356 + }, + { + "epoch": 1.2750244140625e-05, + "step": 8356, + "training_step_time": 0.10966038703918457 + }, + { + "epoch": 1.275177001953125e-05, + "model_forward_time": 0.02550983428955078, + "step": 8357 + }, + { + "epoch": 1.275177001953125e-05, + "step": 8357, + "training_step_time": 0.11053800582885742 + }, + { + "epoch": 1.27532958984375e-05, + "model_forward_time": 0.024966716766357422, + "step": 8358 + }, + { + "epoch": 1.27532958984375e-05, + "step": 8358, + "training_step_time": 0.15204310417175293 + }, + { + "epoch": 1.275482177734375e-05, + "model_forward_time": 0.025038480758666992, + "step": 8359 + }, + { + "epoch": 1.275482177734375e-05, + "step": 8359, + "training_step_time": 0.11052274703979492 + }, + { + "epoch": 1.275634765625e-05, + "grad_norm": 0.6609170436859131, + "learning_rate": 8.637288432376618e-05, + "loss": 0.0348, + "step": 8360 + }, + { + "epoch": 1.275634765625e-05, + "model_forward_time": 0.024238109588623047, + "step": 8360 + }, + { + "epoch": 1.275634765625e-05, + "step": 8360, + "training_step_time": 0.10915398597717285 + }, + { + "epoch": 1.275787353515625e-05, + "model_forward_time": 0.025017261505126953, + "step": 8361 + }, + { + "epoch": 1.275787353515625e-05, + "step": 8361, + "training_step_time": 0.12468624114990234 + }, + { + "epoch": 1.27593994140625e-05, + "model_forward_time": 0.024975061416625977, + "step": 8362 + }, + { + "epoch": 1.27593994140625e-05, + "step": 8362, + "training_step_time": 0.12370729446411133 + }, + { + "epoch": 1.276092529296875e-05, + "model_forward_time": 0.025257349014282227, + "step": 8363 + }, + { + "epoch": 1.276092529296875e-05, + "step": 8363, + "training_step_time": 0.11098742485046387 + }, + { + "epoch": 1.2762451171875e-05, + "model_forward_time": 0.025055646896362305, + "step": 8364 + }, + { + "epoch": 1.2762451171875e-05, + "step": 8364, + "training_step_time": 0.1190338134765625 + }, + { + "epoch": 1.276397705078125e-05, + "model_forward_time": 0.02498650550842285, + "step": 8365 + }, + { + "epoch": 1.276397705078125e-05, + "step": 8365, + "training_step_time": 0.19570350646972656 + }, + { + "epoch": 1.27655029296875e-05, + "model_forward_time": 0.02420806884765625, + "step": 8366 + }, + { + "epoch": 1.27655029296875e-05, + "step": 8366, + "training_step_time": 0.12624669075012207 + }, + { + "epoch": 1.276702880859375e-05, + "model_forward_time": 0.0243532657623291, + "step": 8367 + }, + { + "epoch": 1.276702880859375e-05, + "step": 8367, + "training_step_time": 0.10639429092407227 + }, + { + "epoch": 1.27685546875e-05, + "model_forward_time": 0.0251007080078125, + "step": 8368 + }, + { + "epoch": 1.27685546875e-05, + "step": 8368, + "training_step_time": 0.12567138671875 + }, + { + "epoch": 1.277008056640625e-05, + "model_forward_time": 0.024992942810058594, + "step": 8369 + }, + { + "epoch": 1.277008056640625e-05, + "step": 8369, + "training_step_time": 0.1861555576324463 + }, + { + "epoch": 1.27716064453125e-05, + "grad_norm": 0.4028451144695282, + "learning_rate": 8.633504448242505e-05, + "loss": 0.0316, + "step": 8370 + }, + { + "epoch": 1.27716064453125e-05, + "model_forward_time": 0.02384805679321289, + "step": 8370 + }, + { + "epoch": 1.27716064453125e-05, + "step": 8370, + "training_step_time": 0.18284320831298828 + }, + { + "epoch": 1.277313232421875e-05, + "model_forward_time": 0.025268077850341797, + "step": 8371 + }, + { + "epoch": 1.277313232421875e-05, + "step": 8371, + "training_step_time": 0.11041736602783203 + }, + { + "epoch": 1.2774658203125e-05, + "model_forward_time": 0.024677515029907227, + "step": 8372 + }, + { + "epoch": 1.2774658203125e-05, + "step": 8372, + "training_step_time": 0.1602156162261963 + }, + { + "epoch": 1.277618408203125e-05, + "model_forward_time": 0.02453160285949707, + "step": 8373 + }, + { + "epoch": 1.277618408203125e-05, + "step": 8373, + "training_step_time": 0.10850644111633301 + }, + { + "epoch": 1.27777099609375e-05, + "model_forward_time": 0.02485966682434082, + "step": 8374 + }, + { + "epoch": 1.27777099609375e-05, + "step": 8374, + "training_step_time": 0.11056017875671387 + }, + { + "epoch": 1.277923583984375e-05, + "model_forward_time": 0.025166034698486328, + "step": 8375 + }, + { + "epoch": 1.277923583984375e-05, + "step": 8375, + "training_step_time": 0.11009383201599121 + }, + { + "epoch": 1.278076171875e-05, + "model_forward_time": 0.025252103805541992, + "step": 8376 + }, + { + "epoch": 1.278076171875e-05, + "step": 8376, + "training_step_time": 0.10729479789733887 + }, + { + "epoch": 1.278228759765625e-05, + "model_forward_time": 0.025304079055786133, + "step": 8377 + }, + { + "epoch": 1.278228759765625e-05, + "step": 8377, + "training_step_time": 0.10678935050964355 + }, + { + "epoch": 1.27838134765625e-05, + "model_forward_time": 0.025885343551635742, + "step": 8378 + }, + { + "epoch": 1.27838134765625e-05, + "step": 8378, + "training_step_time": 0.11346578598022461 + }, + { + "epoch": 1.278533935546875e-05, + "model_forward_time": 0.02542734146118164, + "step": 8379 + }, + { + "epoch": 1.278533935546875e-05, + "step": 8379, + "training_step_time": 0.10952210426330566 + }, + { + "epoch": 1.2786865234375e-05, + "grad_norm": 0.44128119945526123, + "learning_rate": 8.629716049057872e-05, + "loss": 0.0422, + "step": 8380 + }, + { + "epoch": 1.2786865234375e-05, + "model_forward_time": 0.02500295639038086, + "step": 8380 + }, + { + "epoch": 1.2786865234375e-05, + "step": 8380, + "training_step_time": 0.10754776000976562 + }, + { + "epoch": 1.278839111328125e-05, + "model_forward_time": 0.02545166015625, + "step": 8381 + }, + { + "epoch": 1.278839111328125e-05, + "step": 8381, + "training_step_time": 0.10729312896728516 + }, + { + "epoch": 1.27899169921875e-05, + "model_forward_time": 0.024972200393676758, + "step": 8382 + }, + { + "epoch": 1.27899169921875e-05, + "step": 8382, + "training_step_time": 0.11115312576293945 + }, + { + "epoch": 1.279144287109375e-05, + "model_forward_time": 0.025452852249145508, + "step": 8383 + }, + { + "epoch": 1.279144287109375e-05, + "step": 8383, + "training_step_time": 0.10815024375915527 + }, + { + "epoch": 1.279296875e-05, + "model_forward_time": 0.025129079818725586, + "step": 8384 + }, + { + "epoch": 1.279296875e-05, + "step": 8384, + "training_step_time": 0.1091454029083252 + }, + { + "epoch": 1.279449462890625e-05, + "model_forward_time": 0.025385618209838867, + "step": 8385 + }, + { + "epoch": 1.279449462890625e-05, + "step": 8385, + "training_step_time": 0.10782790184020996 + }, + { + "epoch": 1.27960205078125e-05, + "model_forward_time": 0.025218486785888672, + "step": 8386 + }, + { + "epoch": 1.27960205078125e-05, + "step": 8386, + "training_step_time": 0.10896658897399902 + }, + { + "epoch": 1.279754638671875e-05, + "model_forward_time": 0.02565145492553711, + "step": 8387 + }, + { + "epoch": 1.279754638671875e-05, + "step": 8387, + "training_step_time": 0.11694836616516113 + }, + { + "epoch": 1.2799072265625e-05, + "model_forward_time": 0.02463507652282715, + "step": 8388 + }, + { + "epoch": 1.2799072265625e-05, + "step": 8388, + "training_step_time": 0.1114206314086914 + }, + { + "epoch": 1.280059814453125e-05, + "model_forward_time": 0.025370359420776367, + "step": 8389 + }, + { + "epoch": 1.280059814453125e-05, + "step": 8389, + "training_step_time": 0.10836124420166016 + }, + { + "epoch": 1.28021240234375e-05, + "grad_norm": 0.24307404458522797, + "learning_rate": 8.625923239425978e-05, + "loss": 0.032, + "step": 8390 + }, + { + "epoch": 1.28021240234375e-05, + "model_forward_time": 0.025161027908325195, + "step": 8390 + }, + { + "epoch": 1.28021240234375e-05, + "step": 8390, + "training_step_time": 0.11198854446411133 + }, + { + "epoch": 1.280364990234375e-05, + "model_forward_time": 0.025131940841674805, + "step": 8391 + }, + { + "epoch": 1.280364990234375e-05, + "step": 8391, + "training_step_time": 0.10939908027648926 + }, + { + "epoch": 1.280517578125e-05, + "model_forward_time": 0.025220394134521484, + "step": 8392 + }, + { + "epoch": 1.280517578125e-05, + "step": 8392, + "training_step_time": 0.11527609825134277 + }, + { + "epoch": 1.280670166015625e-05, + "model_forward_time": 0.025374889373779297, + "step": 8393 + }, + { + "epoch": 1.280670166015625e-05, + "step": 8393, + "training_step_time": 0.19013166427612305 + }, + { + "epoch": 1.28082275390625e-05, + "model_forward_time": 0.024819374084472656, + "step": 8394 + }, + { + "epoch": 1.28082275390625e-05, + "step": 8394, + "training_step_time": 0.13388395309448242 + }, + { + "epoch": 1.280975341796875e-05, + "model_forward_time": 0.02500772476196289, + "step": 8395 + }, + { + "epoch": 1.280975341796875e-05, + "step": 8395, + "training_step_time": 0.1089029312133789 + }, + { + "epoch": 1.2811279296875e-05, + "model_forward_time": 0.025564908981323242, + "step": 8396 + }, + { + "epoch": 1.2811279296875e-05, + "step": 8396, + "training_step_time": 0.11120963096618652 + }, + { + "epoch": 1.281280517578125e-05, + "model_forward_time": 0.025641679763793945, + "step": 8397 + }, + { + "epoch": 1.281280517578125e-05, + "step": 8397, + "training_step_time": 0.1466817855834961 + }, + { + "epoch": 1.28143310546875e-05, + "model_forward_time": 0.025205612182617188, + "step": 8398 + }, + { + "epoch": 1.28143310546875e-05, + "step": 8398, + "training_step_time": 0.1926419734954834 + }, + { + "epoch": 1.281585693359375e-05, + "model_forward_time": 0.024611949920654297, + "step": 8399 + }, + { + "epoch": 1.281585693359375e-05, + "step": 8399, + "training_step_time": 0.10730814933776855 + }, + { + "epoch": 1.28173828125e-05, + "grad_norm": 0.41981056332588196, + "learning_rate": 8.622126023955446e-05, + "loss": 0.0345, + "step": 8400 + }, + { + "epoch": 1.28173828125e-05, + "model_forward_time": 0.024862051010131836, + "step": 8400 + }, + { + "epoch": 1.28173828125e-05, + "step": 8400, + "training_step_time": 0.10840082168579102 + }, + { + "epoch": 1.281890869140625e-05, + "model_forward_time": 0.02514362335205078, + "step": 8401 + }, + { + "epoch": 1.281890869140625e-05, + "step": 8401, + "training_step_time": 0.10918140411376953 + }, + { + "epoch": 1.28204345703125e-05, + "model_forward_time": 0.025333404541015625, + "step": 8402 + }, + { + "epoch": 1.28204345703125e-05, + "step": 8402, + "training_step_time": 0.11403083801269531 + }, + { + "epoch": 1.282196044921875e-05, + "model_forward_time": 0.025063037872314453, + "step": 8403 + }, + { + "epoch": 1.282196044921875e-05, + "step": 8403, + "training_step_time": 0.10716056823730469 + }, + { + "epoch": 1.2823486328125e-05, + "model_forward_time": 0.025291919708251953, + "step": 8404 + }, + { + "epoch": 1.2823486328125e-05, + "step": 8404, + "training_step_time": 0.18341279029846191 + }, + { + "epoch": 1.282501220703125e-05, + "model_forward_time": 0.0243070125579834, + "step": 8405 + }, + { + "epoch": 1.282501220703125e-05, + "step": 8405, + "training_step_time": 0.10787129402160645 + }, + { + "epoch": 1.28265380859375e-05, + "model_forward_time": 0.02421879768371582, + "step": 8406 + }, + { + "epoch": 1.28265380859375e-05, + "step": 8406, + "training_step_time": 0.11003899574279785 + }, + { + "epoch": 1.282806396484375e-05, + "model_forward_time": 0.025466203689575195, + "step": 8407 + }, + { + "epoch": 1.282806396484375e-05, + "step": 8407, + "training_step_time": 0.12634658813476562 + }, + { + "epoch": 1.282958984375e-05, + "model_forward_time": 0.02593207359313965, + "step": 8408 + }, + { + "epoch": 1.282958984375e-05, + "step": 8408, + "training_step_time": 0.127305269241333 + }, + { + "epoch": 1.283111572265625e-05, + "model_forward_time": 0.025074481964111328, + "step": 8409 + }, + { + "epoch": 1.283111572265625e-05, + "step": 8409, + "training_step_time": 0.11005449295043945 + }, + { + "epoch": 1.28326416015625e-05, + "grad_norm": 0.44348084926605225, + "learning_rate": 8.61832440726025e-05, + "loss": 0.0366, + "step": 8410 + }, + { + "epoch": 1.28326416015625e-05, + "model_forward_time": 0.02497553825378418, + "step": 8410 + }, + { + "epoch": 1.28326416015625e-05, + "step": 8410, + "training_step_time": 0.11715388298034668 + }, + { + "epoch": 1.283416748046875e-05, + "model_forward_time": 0.024999618530273438, + "step": 8411 + }, + { + "epoch": 1.283416748046875e-05, + "step": 8411, + "training_step_time": 0.15167570114135742 + }, + { + "epoch": 1.2835693359375e-05, + "model_forward_time": 0.024689674377441406, + "step": 8412 + }, + { + "epoch": 1.2835693359375e-05, + "step": 8412, + "training_step_time": 0.11267805099487305 + }, + { + "epoch": 1.283721923828125e-05, + "model_forward_time": 0.02488994598388672, + "step": 8413 + }, + { + "epoch": 1.283721923828125e-05, + "step": 8413, + "training_step_time": 0.2178962230682373 + }, + { + "epoch": 1.28387451171875e-05, + "model_forward_time": 0.024739503860473633, + "step": 8414 + }, + { + "epoch": 1.28387451171875e-05, + "step": 8414, + "training_step_time": 0.1516273021697998 + }, + { + "epoch": 1.284027099609375e-05, + "model_forward_time": 0.024057388305664062, + "step": 8415 + }, + { + "epoch": 1.284027099609375e-05, + "step": 8415, + "training_step_time": 0.1739046573638916 + }, + { + "epoch": 1.2841796875e-05, + "model_forward_time": 0.02478790283203125, + "step": 8416 + }, + { + "epoch": 1.2841796875e-05, + "step": 8416, + "training_step_time": 0.16681957244873047 + }, + { + "epoch": 1.284332275390625e-05, + "model_forward_time": 0.024401426315307617, + "step": 8417 + }, + { + "epoch": 1.284332275390625e-05, + "step": 8417, + "training_step_time": 0.1191091537475586 + }, + { + "epoch": 1.28448486328125e-05, + "model_forward_time": 0.024442672729492188, + "step": 8418 + }, + { + "epoch": 1.28448486328125e-05, + "step": 8418, + "training_step_time": 0.1895139217376709 + }, + { + "epoch": 1.284637451171875e-05, + "model_forward_time": 0.02458930015563965, + "step": 8419 + }, + { + "epoch": 1.284637451171875e-05, + "step": 8419, + "training_step_time": 0.11278343200683594 + }, + { + "epoch": 1.2847900390625e-05, + "grad_norm": 0.3509846031665802, + "learning_rate": 8.614518393959714e-05, + "loss": 0.0304, + "step": 8420 + }, + { + "epoch": 1.2847900390625e-05, + "model_forward_time": 0.024610042572021484, + "step": 8420 + }, + { + "epoch": 1.2847900390625e-05, + "step": 8420, + "training_step_time": 0.11339807510375977 + }, + { + "epoch": 1.284942626953125e-05, + "model_forward_time": 0.024471521377563477, + "step": 8421 + }, + { + "epoch": 1.284942626953125e-05, + "step": 8421, + "training_step_time": 0.11230754852294922 + }, + { + "epoch": 1.28509521484375e-05, + "model_forward_time": 0.024585723876953125, + "step": 8422 + }, + { + "epoch": 1.28509521484375e-05, + "step": 8422, + "training_step_time": 0.11299920082092285 + }, + { + "epoch": 1.285247802734375e-05, + "model_forward_time": 0.025350093841552734, + "step": 8423 + }, + { + "epoch": 1.285247802734375e-05, + "step": 8423, + "training_step_time": 0.1131901741027832 + }, + { + "epoch": 1.285400390625e-05, + "model_forward_time": 0.025278568267822266, + "step": 8424 + }, + { + "epoch": 1.285400390625e-05, + "step": 8424, + "training_step_time": 0.11283087730407715 + }, + { + "epoch": 1.285552978515625e-05, + "model_forward_time": 0.02487659454345703, + "step": 8425 + }, + { + "epoch": 1.285552978515625e-05, + "step": 8425, + "training_step_time": 0.11358189582824707 + }, + { + "epoch": 1.28570556640625e-05, + "model_forward_time": 0.02543926239013672, + "step": 8426 + }, + { + "epoch": 1.28570556640625e-05, + "step": 8426, + "training_step_time": 0.10893869400024414 + }, + { + "epoch": 1.285858154296875e-05, + "model_forward_time": 0.02559661865234375, + "step": 8427 + }, + { + "epoch": 1.285858154296875e-05, + "step": 8427, + "training_step_time": 0.1089789867401123 + }, + { + "epoch": 1.2860107421875e-05, + "model_forward_time": 0.025310039520263672, + "step": 8428 + }, + { + "epoch": 1.2860107421875e-05, + "step": 8428, + "training_step_time": 0.11014556884765625 + }, + { + "epoch": 1.286163330078125e-05, + "model_forward_time": 0.02503824234008789, + "step": 8429 + }, + { + "epoch": 1.286163330078125e-05, + "step": 8429, + "training_step_time": 0.1107332706451416 + }, + { + "epoch": 1.28631591796875e-05, + "grad_norm": 0.5693546533584595, + "learning_rate": 8.610707988678503e-05, + "loss": 0.039, + "step": 8430 + }, + { + "epoch": 1.28631591796875e-05, + "model_forward_time": 0.0253448486328125, + "step": 8430 + }, + { + "epoch": 1.28631591796875e-05, + "step": 8430, + "training_step_time": 0.10806560516357422 + }, + { + "epoch": 1.286468505859375e-05, + "model_forward_time": 0.025567293167114258, + "step": 8431 + }, + { + "epoch": 1.286468505859375e-05, + "step": 8431, + "training_step_time": 0.11466693878173828 + }, + { + "epoch": 1.28662109375e-05, + "model_forward_time": 0.02409648895263672, + "step": 8432 + }, + { + "epoch": 1.28662109375e-05, + "step": 8432, + "training_step_time": 0.1093759536743164 + }, + { + "epoch": 1.286773681640625e-05, + "model_forward_time": 0.02469158172607422, + "step": 8433 + }, + { + "epoch": 1.286773681640625e-05, + "step": 8433, + "training_step_time": 0.10927915573120117 + }, + { + "epoch": 1.28692626953125e-05, + "model_forward_time": 0.025231599807739258, + "step": 8434 + }, + { + "epoch": 1.28692626953125e-05, + "step": 8434, + "training_step_time": 0.10896134376525879 + }, + { + "epoch": 1.287078857421875e-05, + "model_forward_time": 0.02518630027770996, + "step": 8435 + }, + { + "epoch": 1.287078857421875e-05, + "step": 8435, + "training_step_time": 0.10616827011108398 + }, + { + "epoch": 1.2872314453125e-05, + "model_forward_time": 0.025454044342041016, + "step": 8436 + }, + { + "epoch": 1.2872314453125e-05, + "step": 8436, + "training_step_time": 0.11049771308898926 + }, + { + "epoch": 1.287384033203125e-05, + "model_forward_time": 0.025554180145263672, + "step": 8437 + }, + { + "epoch": 1.287384033203125e-05, + "step": 8437, + "training_step_time": 0.10774898529052734 + }, + { + "epoch": 1.28753662109375e-05, + "model_forward_time": 0.025630474090576172, + "step": 8438 + }, + { + "epoch": 1.28753662109375e-05, + "step": 8438, + "training_step_time": 0.10767579078674316 + }, + { + "epoch": 1.287689208984375e-05, + "model_forward_time": 0.02550816535949707, + "step": 8439 + }, + { + "epoch": 1.287689208984375e-05, + "step": 8439, + "training_step_time": 0.2196333408355713 + }, + { + "epoch": 1.287841796875e-05, + "grad_norm": 0.3627856969833374, + "learning_rate": 8.606893196046619e-05, + "loss": 0.0378, + "step": 8440 + }, + { + "epoch": 1.287841796875e-05, + "model_forward_time": 0.0250399112701416, + "step": 8440 + }, + { + "epoch": 1.287841796875e-05, + "step": 8440, + "training_step_time": 0.1106717586517334 + }, + { + "epoch": 1.287994384765625e-05, + "model_forward_time": 0.02515411376953125, + "step": 8441 + }, + { + "epoch": 1.287994384765625e-05, + "step": 8441, + "training_step_time": 0.1080620288848877 + }, + { + "epoch": 1.28814697265625e-05, + "model_forward_time": 0.025762081146240234, + "step": 8442 + }, + { + "epoch": 1.28814697265625e-05, + "step": 8442, + "training_step_time": 0.1706404685974121 + }, + { + "epoch": 1.288299560546875e-05, + "model_forward_time": 0.02474498748779297, + "step": 8443 + }, + { + "epoch": 1.288299560546875e-05, + "step": 8443, + "training_step_time": 0.16864013671875 + }, + { + "epoch": 1.2884521484375e-05, + "model_forward_time": 0.02462148666381836, + "step": 8444 + }, + { + "epoch": 1.2884521484375e-05, + "step": 8444, + "training_step_time": 0.10453200340270996 + }, + { + "epoch": 1.288604736328125e-05, + "model_forward_time": 0.02480602264404297, + "step": 8445 + }, + { + "epoch": 1.288604736328125e-05, + "step": 8445, + "training_step_time": 0.10568714141845703 + }, + { + "epoch": 1.28875732421875e-05, + "model_forward_time": 0.025746822357177734, + "step": 8446 + }, + { + "epoch": 1.28875732421875e-05, + "step": 8446, + "training_step_time": 0.11220526695251465 + }, + { + "epoch": 1.288909912109375e-05, + "model_forward_time": 0.02534770965576172, + "step": 8447 + }, + { + "epoch": 1.288909912109375e-05, + "step": 8447, + "training_step_time": 0.1049954891204834 + }, + { + "epoch": 1.2890625e-05, + "model_forward_time": 0.025272607803344727, + "step": 8448 + }, + { + "epoch": 1.2890625e-05, + "step": 8448, + "training_step_time": 0.10831928253173828 + }, + { + "epoch": 1.289215087890625e-05, + "model_forward_time": 0.025092363357543945, + "step": 8449 + }, + { + "epoch": 1.289215087890625e-05, + "step": 8449, + "training_step_time": 0.10623335838317871 + }, + { + "epoch": 1.28936767578125e-05, + "grad_norm": 0.3822075128555298, + "learning_rate": 8.603074020699393e-05, + "loss": 0.0424, + "step": 8450 + }, + { + "epoch": 1.28936767578125e-05, + "model_forward_time": 0.025623559951782227, + "step": 8450 + }, + { + "epoch": 1.28936767578125e-05, + "step": 8450, + "training_step_time": 0.10686588287353516 + }, + { + "epoch": 1.289520263671875e-05, + "model_forward_time": 0.025102615356445312, + "step": 8451 + }, + { + "epoch": 1.289520263671875e-05, + "step": 8451, + "training_step_time": 0.14095354080200195 + }, + { + "epoch": 1.2896728515625e-05, + "model_forward_time": 0.025460243225097656, + "step": 8452 + }, + { + "epoch": 1.2896728515625e-05, + "step": 8452, + "training_step_time": 0.10820674896240234 + }, + { + "epoch": 1.289825439453125e-05, + "model_forward_time": 0.024969100952148438, + "step": 8453 + }, + { + "epoch": 1.289825439453125e-05, + "step": 8453, + "training_step_time": 0.11278438568115234 + }, + { + "epoch": 1.28997802734375e-05, + "model_forward_time": 0.025583267211914062, + "step": 8454 + }, + { + "epoch": 1.28997802734375e-05, + "step": 8454, + "training_step_time": 0.12532830238342285 + }, + { + "epoch": 1.290130615234375e-05, + "model_forward_time": 0.025507450103759766, + "step": 8455 + }, + { + "epoch": 1.290130615234375e-05, + "step": 8455, + "training_step_time": 0.1235501766204834 + }, + { + "epoch": 1.290283203125e-05, + "model_forward_time": 0.025557279586791992, + "step": 8456 + }, + { + "epoch": 1.290283203125e-05, + "step": 8456, + "training_step_time": 0.11019229888916016 + }, + { + "epoch": 1.290435791015625e-05, + "model_forward_time": 0.025714874267578125, + "step": 8457 + }, + { + "epoch": 1.290435791015625e-05, + "step": 8457, + "training_step_time": 0.1983785629272461 + }, + { + "epoch": 1.29058837890625e-05, + "model_forward_time": 0.02407550811767578, + "step": 8458 + }, + { + "epoch": 1.29058837890625e-05, + "step": 8458, + "training_step_time": 0.2275409698486328 + }, + { + "epoch": 1.290740966796875e-05, + "model_forward_time": 0.02306675910949707, + "step": 8459 + }, + { + "epoch": 1.290740966796875e-05, + "step": 8459, + "training_step_time": 0.22449660301208496 + }, + { + "epoch": 1.2908935546875e-05, + "grad_norm": 0.36279141902923584, + "learning_rate": 8.599250467277483e-05, + "loss": 0.0347, + "step": 8460 + }, + { + "epoch": 1.2908935546875e-05, + "model_forward_time": 0.02484583854675293, + "step": 8460 + }, + { + "epoch": 1.2908935546875e-05, + "step": 8460, + "training_step_time": 0.23877835273742676 + }, + { + "epoch": 1.291046142578125e-05, + "model_forward_time": 0.0248565673828125, + "step": 8461 + }, + { + "epoch": 1.291046142578125e-05, + "step": 8461, + "training_step_time": 0.19716477394104004 + }, + { + "epoch": 1.29119873046875e-05, + "model_forward_time": 0.024669408798217773, + "step": 8462 + }, + { + "epoch": 1.29119873046875e-05, + "step": 8462, + "training_step_time": 0.13439655303955078 + }, + { + "epoch": 1.291351318359375e-05, + "model_forward_time": 0.024426937103271484, + "step": 8463 + }, + { + "epoch": 1.291351318359375e-05, + "step": 8463, + "training_step_time": 0.10390996932983398 + }, + { + "epoch": 1.29150390625e-05, + "model_forward_time": 0.025187969207763672, + "step": 8464 + }, + { + "epoch": 1.29150390625e-05, + "step": 8464, + "training_step_time": 0.10418057441711426 + }, + { + "epoch": 1.291656494140625e-05, + "model_forward_time": 0.025497913360595703, + "step": 8465 + }, + { + "epoch": 1.291656494140625e-05, + "step": 8465, + "training_step_time": 0.10616683959960938 + }, + { + "epoch": 1.29180908203125e-05, + "model_forward_time": 0.025522947311401367, + "step": 8466 + }, + { + "epoch": 1.29180908203125e-05, + "step": 8466, + "training_step_time": 0.11188364028930664 + }, + { + "epoch": 1.291961669921875e-05, + "model_forward_time": 0.025270700454711914, + "step": 8467 + }, + { + "epoch": 1.291961669921875e-05, + "step": 8467, + "training_step_time": 0.10875916481018066 + }, + { + "epoch": 1.2921142578125e-05, + "model_forward_time": 0.025496482849121094, + "step": 8468 + }, + { + "epoch": 1.2921142578125e-05, + "step": 8468, + "training_step_time": 0.11177349090576172 + }, + { + "epoch": 1.292266845703125e-05, + "model_forward_time": 0.025326967239379883, + "step": 8469 + }, + { + "epoch": 1.292266845703125e-05, + "step": 8469, + "training_step_time": 0.10746955871582031 + }, + { + "epoch": 1.29241943359375e-05, + "grad_norm": 0.6637995839118958, + "learning_rate": 8.595422540426869e-05, + "loss": 0.0331, + "step": 8470 + }, + { + "epoch": 1.29241943359375e-05, + "model_forward_time": 0.02515864372253418, + "step": 8470 + }, + { + "epoch": 1.29241943359375e-05, + "step": 8470, + "training_step_time": 0.10781717300415039 + }, + { + "epoch": 1.292572021484375e-05, + "model_forward_time": 0.025058507919311523, + "step": 8471 + }, + { + "epoch": 1.292572021484375e-05, + "step": 8471, + "training_step_time": 0.11310625076293945 + }, + { + "epoch": 1.292724609375e-05, + "model_forward_time": 0.02503204345703125, + "step": 8472 + }, + { + "epoch": 1.292724609375e-05, + "step": 8472, + "training_step_time": 0.11124205589294434 + }, + { + "epoch": 1.292877197265625e-05, + "model_forward_time": 0.025274276733398438, + "step": 8473 + }, + { + "epoch": 1.292877197265625e-05, + "step": 8473, + "training_step_time": 0.1098930835723877 + }, + { + "epoch": 1.29302978515625e-05, + "model_forward_time": 0.025301218032836914, + "step": 8474 + }, + { + "epoch": 1.29302978515625e-05, + "step": 8474, + "training_step_time": 0.1124880313873291 + }, + { + "epoch": 1.293182373046875e-05, + "model_forward_time": 0.025021076202392578, + "step": 8475 + }, + { + "epoch": 1.293182373046875e-05, + "step": 8475, + "training_step_time": 0.11137151718139648 + }, + { + "epoch": 1.2933349609375e-05, + "model_forward_time": 0.025391101837158203, + "step": 8476 + }, + { + "epoch": 1.2933349609375e-05, + "step": 8476, + "training_step_time": 0.11142683029174805 + }, + { + "epoch": 1.293487548828125e-05, + "model_forward_time": 0.025598526000976562, + "step": 8477 + }, + { + "epoch": 1.293487548828125e-05, + "step": 8477, + "training_step_time": 0.1415262222290039 + }, + { + "epoch": 1.29364013671875e-05, + "model_forward_time": 0.024309635162353516, + "step": 8478 + }, + { + "epoch": 1.29364013671875e-05, + "step": 8478, + "training_step_time": 0.20037031173706055 + }, + { + "epoch": 1.293792724609375e-05, + "model_forward_time": 0.023020267486572266, + "step": 8479 + }, + { + "epoch": 1.293792724609375e-05, + "step": 8479, + "training_step_time": 0.19777727127075195 + }, + { + "epoch": 1.2939453125e-05, + "grad_norm": 0.3210027515888214, + "learning_rate": 8.591590244798844e-05, + "loss": 0.0318, + "step": 8480 + }, + { + "epoch": 1.2939453125e-05, + "model_forward_time": 0.023561477661132812, + "step": 8480 + }, + { + "epoch": 1.2939453125e-05, + "step": 8480, + "training_step_time": 0.18262767791748047 + }, + { + "epoch": 1.294097900390625e-05, + "model_forward_time": 0.02459430694580078, + "step": 8481 + }, + { + "epoch": 1.294097900390625e-05, + "step": 8481, + "training_step_time": 0.17492151260375977 + }, + { + "epoch": 1.29425048828125e-05, + "model_forward_time": 0.02434682846069336, + "step": 8482 + }, + { + "epoch": 1.29425048828125e-05, + "step": 8482, + "training_step_time": 0.18710875511169434 + }, + { + "epoch": 1.294403076171875e-05, + "model_forward_time": 0.024454116821289062, + "step": 8483 + }, + { + "epoch": 1.294403076171875e-05, + "step": 8483, + "training_step_time": 0.11809563636779785 + }, + { + "epoch": 1.2945556640625e-05, + "model_forward_time": 0.02505326271057129, + "step": 8484 + }, + { + "epoch": 1.2945556640625e-05, + "step": 8484, + "training_step_time": 0.10655808448791504 + }, + { + "epoch": 1.294708251953125e-05, + "model_forward_time": 0.027289867401123047, + "step": 8485 + }, + { + "epoch": 1.294708251953125e-05, + "step": 8485, + "training_step_time": 0.1136162281036377 + }, + { + "epoch": 1.29486083984375e-05, + "model_forward_time": 0.02543330192565918, + "step": 8486 + }, + { + "epoch": 1.29486083984375e-05, + "step": 8486, + "training_step_time": 0.1173396110534668 + }, + { + "epoch": 1.295013427734375e-05, + "model_forward_time": 0.025449275970458984, + "step": 8487 + }, + { + "epoch": 1.295013427734375e-05, + "step": 8487, + "training_step_time": 0.10909271240234375 + }, + { + "epoch": 1.295166015625e-05, + "model_forward_time": 0.025208711624145508, + "step": 8488 + }, + { + "epoch": 1.295166015625e-05, + "step": 8488, + "training_step_time": 0.11129260063171387 + }, + { + "epoch": 1.295318603515625e-05, + "model_forward_time": 0.02517533302307129, + "step": 8489 + }, + { + "epoch": 1.295318603515625e-05, + "step": 8489, + "training_step_time": 0.10646653175354004 + }, + { + "epoch": 1.29547119140625e-05, + "grad_norm": 0.6224812269210815, + "learning_rate": 8.587753585050004e-05, + "loss": 0.0403, + "step": 8490 + }, + { + "epoch": 1.29547119140625e-05, + "model_forward_time": 0.026526689529418945, + "step": 8490 + }, + { + "epoch": 1.29547119140625e-05, + "step": 8490, + "training_step_time": 0.10977602005004883 + }, + { + "epoch": 1.295623779296875e-05, + "model_forward_time": 0.025548696517944336, + "step": 8491 + }, + { + "epoch": 1.295623779296875e-05, + "step": 8491, + "training_step_time": 0.10744476318359375 + }, + { + "epoch": 1.2957763671875e-05, + "model_forward_time": 0.025397539138793945, + "step": 8492 + }, + { + "epoch": 1.2957763671875e-05, + "step": 8492, + "training_step_time": 0.10970592498779297 + }, + { + "epoch": 1.295928955078125e-05, + "model_forward_time": 0.025183916091918945, + "step": 8493 + }, + { + "epoch": 1.295928955078125e-05, + "step": 8493, + "training_step_time": 0.11104726791381836 + }, + { + "epoch": 1.29608154296875e-05, + "model_forward_time": 0.02545905113220215, + "step": 8494 + }, + { + "epoch": 1.29608154296875e-05, + "step": 8494, + "training_step_time": 0.1938161849975586 + }, + { + "epoch": 1.296234130859375e-05, + "model_forward_time": 0.024188995361328125, + "step": 8495 + }, + { + "epoch": 1.296234130859375e-05, + "step": 8495, + "training_step_time": 0.21196365356445312 + }, + { + "epoch": 1.29638671875e-05, + "model_forward_time": 0.024163246154785156, + "step": 8496 + }, + { + "epoch": 1.29638671875e-05, + "step": 8496, + "training_step_time": 0.24666523933410645 + }, + { + "epoch": 1.296539306640625e-05, + "model_forward_time": 0.024107694625854492, + "step": 8497 + }, + { + "epoch": 1.296539306640625e-05, + "step": 8497, + "training_step_time": 0.2194833755493164 + }, + { + "epoch": 1.29669189453125e-05, + "model_forward_time": 0.023825645446777344, + "step": 8498 + }, + { + "epoch": 1.29669189453125e-05, + "step": 8498, + "training_step_time": 0.19492840766906738 + }, + { + "epoch": 1.296844482421875e-05, + "model_forward_time": 0.024234294891357422, + "step": 8499 + }, + { + "epoch": 1.296844482421875e-05, + "step": 8499, + "training_step_time": 0.1612870693206787 + }, + { + "epoch": 1.2969970703125e-05, + "grad_norm": 0.7341414093971252, + "learning_rate": 8.583912565842257e-05, + "loss": 0.0487, + "step": 8500 + }, + { + "epoch": 1.2969970703125e-05, + "model_forward_time": 0.02478194236755371, + "step": 8500 + }, + { + "epoch": 1.2969970703125e-05, + "step": 8500, + "training_step_time": 0.1288461685180664 + }, + { + "epoch": 1.297149658203125e-05, + "model_forward_time": 0.024066448211669922, + "step": 8501 + }, + { + "epoch": 1.297149658203125e-05, + "step": 8501, + "training_step_time": 0.19371390342712402 + }, + { + "epoch": 1.29730224609375e-05, + "model_forward_time": 0.02440786361694336, + "step": 8502 + }, + { + "epoch": 1.29730224609375e-05, + "step": 8502, + "training_step_time": 0.10483098030090332 + }, + { + "epoch": 1.297454833984375e-05, + "model_forward_time": 0.024338483810424805, + "step": 8503 + }, + { + "epoch": 1.297454833984375e-05, + "step": 8503, + "training_step_time": 0.15780210494995117 + }, + { + "epoch": 1.297607421875e-05, + "model_forward_time": 0.02457714080810547, + "step": 8504 + }, + { + "epoch": 1.297607421875e-05, + "step": 8504, + "training_step_time": 0.11320710182189941 + }, + { + "epoch": 1.297760009765625e-05, + "model_forward_time": 0.024333477020263672, + "step": 8505 + }, + { + "epoch": 1.297760009765625e-05, + "step": 8505, + "training_step_time": 0.10988450050354004 + }, + { + "epoch": 1.29791259765625e-05, + "model_forward_time": 0.02524089813232422, + "step": 8506 + }, + { + "epoch": 1.29791259765625e-05, + "step": 8506, + "training_step_time": 0.1080312728881836 + }, + { + "epoch": 1.298065185546875e-05, + "model_forward_time": 0.025478363037109375, + "step": 8507 + }, + { + "epoch": 1.298065185546875e-05, + "step": 8507, + "training_step_time": 0.10982155799865723 + }, + { + "epoch": 1.2982177734375e-05, + "model_forward_time": 0.025667190551757812, + "step": 8508 + }, + { + "epoch": 1.2982177734375e-05, + "step": 8508, + "training_step_time": 0.10718989372253418 + }, + { + "epoch": 1.298370361328125e-05, + "model_forward_time": 0.025009870529174805, + "step": 8509 + }, + { + "epoch": 1.298370361328125e-05, + "step": 8509, + "training_step_time": 0.10791826248168945 + }, + { + "epoch": 1.29852294921875e-05, + "grad_norm": 0.3781100809574127, + "learning_rate": 8.5800671918428e-05, + "loss": 0.0376, + "step": 8510 + }, + { + "epoch": 1.29852294921875e-05, + "model_forward_time": 0.02417469024658203, + "step": 8510 + }, + { + "epoch": 1.29852294921875e-05, + "step": 8510, + "training_step_time": 0.10970616340637207 + }, + { + "epoch": 1.298675537109375e-05, + "model_forward_time": 0.024345874786376953, + "step": 8511 + }, + { + "epoch": 1.298675537109375e-05, + "step": 8511, + "training_step_time": 0.10900688171386719 + }, + { + "epoch": 1.298828125e-05, + "model_forward_time": 0.025331974029541016, + "step": 8512 + }, + { + "epoch": 1.298828125e-05, + "step": 8512, + "training_step_time": 0.1087641716003418 + }, + { + "epoch": 1.298980712890625e-05, + "model_forward_time": 0.025189638137817383, + "step": 8513 + }, + { + "epoch": 1.298980712890625e-05, + "step": 8513, + "training_step_time": 0.10828614234924316 + }, + { + "epoch": 1.29913330078125e-05, + "model_forward_time": 0.0250089168548584, + "step": 8514 + }, + { + "epoch": 1.29913330078125e-05, + "step": 8514, + "training_step_time": 0.11053824424743652 + }, + { + "epoch": 1.299285888671875e-05, + "model_forward_time": 0.0253143310546875, + "step": 8515 + }, + { + "epoch": 1.299285888671875e-05, + "step": 8515, + "training_step_time": 0.11462187767028809 + }, + { + "epoch": 1.2994384765625e-05, + "model_forward_time": 0.025487184524536133, + "step": 8516 + }, + { + "epoch": 1.2994384765625e-05, + "step": 8516, + "training_step_time": 0.11500740051269531 + }, + { + "epoch": 1.299591064453125e-05, + "model_forward_time": 0.025350570678710938, + "step": 8517 + }, + { + "epoch": 1.299591064453125e-05, + "step": 8517, + "training_step_time": 0.1097707748413086 + }, + { + "epoch": 1.29974365234375e-05, + "model_forward_time": 0.02521038055419922, + "step": 8518 + }, + { + "epoch": 1.29974365234375e-05, + "step": 8518, + "training_step_time": 0.1079108715057373 + }, + { + "epoch": 1.299896240234375e-05, + "model_forward_time": 0.025005102157592773, + "step": 8519 + }, + { + "epoch": 1.299896240234375e-05, + "step": 8519, + "training_step_time": 0.10697531700134277 + }, + { + "epoch": 1.300048828125e-05, + "grad_norm": 0.572238564491272, + "learning_rate": 8.576217467724128e-05, + "loss": 0.027, + "step": 8520 + }, + { + "epoch": 1.300048828125e-05, + "model_forward_time": 0.024984121322631836, + "step": 8520 + }, + { + "epoch": 1.300048828125e-05, + "step": 8520, + "training_step_time": 0.10987472534179688 + }, + { + "epoch": 1.300201416015625e-05, + "model_forward_time": 0.025002479553222656, + "step": 8521 + }, + { + "epoch": 1.300201416015625e-05, + "step": 8521, + "training_step_time": 0.108306884765625 + }, + { + "epoch": 1.30035400390625e-05, + "model_forward_time": 0.02526235580444336, + "step": 8522 + }, + { + "epoch": 1.30035400390625e-05, + "step": 8522, + "training_step_time": 0.10857057571411133 + }, + { + "epoch": 1.300506591796875e-05, + "model_forward_time": 0.025405406951904297, + "step": 8523 + }, + { + "epoch": 1.300506591796875e-05, + "step": 8523, + "training_step_time": 0.11063265800476074 + }, + { + "epoch": 1.3006591796875e-05, + "model_forward_time": 0.026181936264038086, + "step": 8524 + }, + { + "epoch": 1.3006591796875e-05, + "step": 8524, + "training_step_time": 0.1075129508972168 + }, + { + "epoch": 1.300811767578125e-05, + "model_forward_time": 0.025140047073364258, + "step": 8525 + }, + { + "epoch": 1.300811767578125e-05, + "step": 8525, + "training_step_time": 0.10638809204101562 + }, + { + "epoch": 1.30096435546875e-05, + "model_forward_time": 0.025313615798950195, + "step": 8526 + }, + { + "epoch": 1.30096435546875e-05, + "step": 8526, + "training_step_time": 0.11937713623046875 + }, + { + "epoch": 1.301116943359375e-05, + "model_forward_time": 0.02593517303466797, + "step": 8527 + }, + { + "epoch": 1.301116943359375e-05, + "step": 8527, + "training_step_time": 0.10895133018493652 + }, + { + "epoch": 1.30126953125e-05, + "model_forward_time": 0.02526116371154785, + "step": 8528 + }, + { + "epoch": 1.30126953125e-05, + "step": 8528, + "training_step_time": 0.10902953147888184 + }, + { + "epoch": 1.301422119140625e-05, + "model_forward_time": 0.02546834945678711, + "step": 8529 + }, + { + "epoch": 1.301422119140625e-05, + "step": 8529, + "training_step_time": 0.2134406566619873 + }, + { + "epoch": 1.30157470703125e-05, + "grad_norm": 0.36905935406684875, + "learning_rate": 8.572363398164017e-05, + "loss": 0.0383, + "step": 8530 + }, + { + "epoch": 1.30157470703125e-05, + "model_forward_time": 0.024584531784057617, + "step": 8530 + }, + { + "epoch": 1.30157470703125e-05, + "step": 8530, + "training_step_time": 0.12221026420593262 + }, + { + "epoch": 1.301727294921875e-05, + "model_forward_time": 0.024863481521606445, + "step": 8531 + }, + { + "epoch": 1.301727294921875e-05, + "step": 8531, + "training_step_time": 0.10729432106018066 + }, + { + "epoch": 1.3018798828125e-05, + "model_forward_time": 0.025046825408935547, + "step": 8532 + }, + { + "epoch": 1.3018798828125e-05, + "step": 8532, + "training_step_time": 0.10649347305297852 + }, + { + "epoch": 1.302032470703125e-05, + "model_forward_time": 0.024453401565551758, + "step": 8533 + }, + { + "epoch": 1.302032470703125e-05, + "step": 8533, + "training_step_time": 0.10722231864929199 + }, + { + "epoch": 1.30218505859375e-05, + "model_forward_time": 0.025127172470092773, + "step": 8534 + }, + { + "epoch": 1.30218505859375e-05, + "step": 8534, + "training_step_time": 0.10629034042358398 + }, + { + "epoch": 1.302337646484375e-05, + "model_forward_time": 0.02499103546142578, + "step": 8535 + }, + { + "epoch": 1.302337646484375e-05, + "step": 8535, + "training_step_time": 0.10706973075866699 + }, + { + "epoch": 1.302490234375e-05, + "model_forward_time": 0.025224685668945312, + "step": 8536 + }, + { + "epoch": 1.302490234375e-05, + "step": 8536, + "training_step_time": 0.11256575584411621 + }, + { + "epoch": 1.302642822265625e-05, + "model_forward_time": 0.024209260940551758, + "step": 8537 + }, + { + "epoch": 1.302642822265625e-05, + "step": 8537, + "training_step_time": 0.10873985290527344 + }, + { + "epoch": 1.30279541015625e-05, + "model_forward_time": 0.025450706481933594, + "step": 8538 + }, + { + "epoch": 1.30279541015625e-05, + "step": 8538, + "training_step_time": 0.14252734184265137 + }, + { + "epoch": 1.302947998046875e-05, + "model_forward_time": 0.025411128997802734, + "step": 8539 + }, + { + "epoch": 1.302947998046875e-05, + "step": 8539, + "training_step_time": 0.11145663261413574 + }, + { + "epoch": 1.3031005859375e-05, + "grad_norm": 0.5095164179801941, + "learning_rate": 8.568504987845525e-05, + "loss": 0.0406, + "step": 8540 + }, + { + "epoch": 1.3031005859375e-05, + "model_forward_time": 0.024390697479248047, + "step": 8540 + }, + { + "epoch": 1.3031005859375e-05, + "step": 8540, + "training_step_time": 0.17826604843139648 + }, + { + "epoch": 1.303253173828125e-05, + "model_forward_time": 0.02462172508239746, + "step": 8541 + }, + { + "epoch": 1.303253173828125e-05, + "step": 8541, + "training_step_time": 0.1736743450164795 + }, + { + "epoch": 1.30340576171875e-05, + "model_forward_time": 0.025616884231567383, + "step": 8542 + }, + { + "epoch": 1.30340576171875e-05, + "step": 8542, + "training_step_time": 0.15473222732543945 + }, + { + "epoch": 1.303558349609375e-05, + "model_forward_time": 0.02454996109008789, + "step": 8543 + }, + { + "epoch": 1.303558349609375e-05, + "step": 8543, + "training_step_time": 0.1759488582611084 + }, + { + "epoch": 1.3037109375e-05, + "model_forward_time": 0.024289369583129883, + "step": 8544 + }, + { + "epoch": 1.3037109375e-05, + "step": 8544, + "training_step_time": 0.12207674980163574 + }, + { + "epoch": 1.303863525390625e-05, + "model_forward_time": 0.025026559829711914, + "step": 8545 + }, + { + "epoch": 1.303863525390625e-05, + "step": 8545, + "training_step_time": 0.148789644241333 + }, + { + "epoch": 1.30401611328125e-05, + "model_forward_time": 0.024718046188354492, + "step": 8546 + }, + { + "epoch": 1.30401611328125e-05, + "step": 8546, + "training_step_time": 0.13233160972595215 + }, + { + "epoch": 1.304168701171875e-05, + "model_forward_time": 0.02458024024963379, + "step": 8547 + }, + { + "epoch": 1.304168701171875e-05, + "step": 8547, + "training_step_time": 0.1948089599609375 + }, + { + "epoch": 1.3043212890625e-05, + "model_forward_time": 0.024645090103149414, + "step": 8548 + }, + { + "epoch": 1.3043212890625e-05, + "step": 8548, + "training_step_time": 0.10531258583068848 + }, + { + "epoch": 1.304473876953125e-05, + "model_forward_time": 0.02452230453491211, + "step": 8549 + }, + { + "epoch": 1.304473876953125e-05, + "step": 8549, + "training_step_time": 0.10472559928894043 + }, + { + "epoch": 1.30462646484375e-05, + "grad_norm": 0.2935931980609894, + "learning_rate": 8.564642241456986e-05, + "loss": 0.0378, + "step": 8550 + }, + { + "epoch": 1.30462646484375e-05, + "model_forward_time": 0.025105714797973633, + "step": 8550 + }, + { + "epoch": 1.30462646484375e-05, + "step": 8550, + "training_step_time": 0.10613441467285156 + }, + { + "epoch": 1.304779052734375e-05, + "model_forward_time": 0.024910449981689453, + "step": 8551 + }, + { + "epoch": 1.304779052734375e-05, + "step": 8551, + "training_step_time": 0.10725283622741699 + }, + { + "epoch": 1.304931640625e-05, + "model_forward_time": 0.02523493766784668, + "step": 8552 + }, + { + "epoch": 1.304931640625e-05, + "step": 8552, + "training_step_time": 0.11110925674438477 + }, + { + "epoch": 1.305084228515625e-05, + "model_forward_time": 0.025808334350585938, + "step": 8553 + }, + { + "epoch": 1.305084228515625e-05, + "step": 8553, + "training_step_time": 0.10963058471679688 + }, + { + "epoch": 1.30523681640625e-05, + "model_forward_time": 0.026088953018188477, + "step": 8554 + }, + { + "epoch": 1.30523681640625e-05, + "step": 8554, + "training_step_time": 0.10961222648620605 + }, + { + "epoch": 1.305389404296875e-05, + "model_forward_time": 0.025322437286376953, + "step": 8555 + }, + { + "epoch": 1.305389404296875e-05, + "step": 8555, + "training_step_time": 0.11128902435302734 + }, + { + "epoch": 1.3055419921875e-05, + "model_forward_time": 0.02516913414001465, + "step": 8556 + }, + { + "epoch": 1.3055419921875e-05, + "step": 8556, + "training_step_time": 0.11033964157104492 + }, + { + "epoch": 1.305694580078125e-05, + "model_forward_time": 0.025201797485351562, + "step": 8557 + }, + { + "epoch": 1.305694580078125e-05, + "step": 8557, + "training_step_time": 0.10784649848937988 + }, + { + "epoch": 1.30584716796875e-05, + "model_forward_time": 0.025284767150878906, + "step": 8558 + }, + { + "epoch": 1.30584716796875e-05, + "step": 8558, + "training_step_time": 0.11011981964111328 + }, + { + "epoch": 1.305999755859375e-05, + "model_forward_time": 0.024981260299682617, + "step": 8559 + }, + { + "epoch": 1.305999755859375e-05, + "step": 8559, + "training_step_time": 0.10783076286315918 + }, + { + "epoch": 1.30615234375e-05, + "grad_norm": 0.7260056734085083, + "learning_rate": 8.560775163691999e-05, + "loss": 0.0386, + "step": 8560 + }, + { + "epoch": 1.30615234375e-05, + "model_forward_time": 0.025025606155395508, + "step": 8560 + }, + { + "epoch": 1.30615234375e-05, + "step": 8560, + "training_step_time": 0.11048340797424316 + }, + { + "epoch": 1.306304931640625e-05, + "model_forward_time": 0.02541828155517578, + "step": 8561 + }, + { + "epoch": 1.306304931640625e-05, + "step": 8561, + "training_step_time": 0.11759138107299805 + }, + { + "epoch": 1.30645751953125e-05, + "model_forward_time": 0.025322914123535156, + "step": 8562 + }, + { + "epoch": 1.30645751953125e-05, + "step": 8562, + "training_step_time": 0.11378288269042969 + }, + { + "epoch": 1.306610107421875e-05, + "model_forward_time": 0.025467634201049805, + "step": 8563 + }, + { + "epoch": 1.306610107421875e-05, + "step": 8563, + "training_step_time": 0.11336469650268555 + }, + { + "epoch": 1.3067626953125e-05, + "model_forward_time": 0.025119781494140625, + "step": 8564 + }, + { + "epoch": 1.3067626953125e-05, + "step": 8564, + "training_step_time": 0.10680818557739258 + }, + { + "epoch": 1.306915283203125e-05, + "model_forward_time": 0.024999141693115234, + "step": 8565 + }, + { + "epoch": 1.306915283203125e-05, + "step": 8565, + "training_step_time": 0.10766458511352539 + }, + { + "epoch": 1.30706787109375e-05, + "model_forward_time": 0.025144338607788086, + "step": 8566 + }, + { + "epoch": 1.30706787109375e-05, + "step": 8566, + "training_step_time": 0.11034584045410156 + }, + { + "epoch": 1.307220458984375e-05, + "model_forward_time": 0.02532362937927246, + "step": 8567 + }, + { + "epoch": 1.307220458984375e-05, + "step": 8567, + "training_step_time": 0.11063694953918457 + }, + { + "epoch": 1.307373046875e-05, + "model_forward_time": 0.02497720718383789, + "step": 8568 + }, + { + "epoch": 1.307373046875e-05, + "step": 8568, + "training_step_time": 0.11074018478393555 + }, + { + "epoch": 1.307525634765625e-05, + "model_forward_time": 0.025429487228393555, + "step": 8569 + }, + { + "epoch": 1.307525634765625e-05, + "step": 8569, + "training_step_time": 0.1141209602355957 + }, + { + "epoch": 1.30767822265625e-05, + "grad_norm": 0.5504429340362549, + "learning_rate": 8.556903759249428e-05, + "loss": 0.074, + "step": 8570 + }, + { + "epoch": 1.30767822265625e-05, + "model_forward_time": 0.025015830993652344, + "step": 8570 + }, + { + "epoch": 1.30767822265625e-05, + "step": 8570, + "training_step_time": 0.10707259178161621 + }, + { + "epoch": 1.307830810546875e-05, + "model_forward_time": 0.024809598922729492, + "step": 8571 + }, + { + "epoch": 1.307830810546875e-05, + "step": 8571, + "training_step_time": 0.1060035228729248 + }, + { + "epoch": 1.3079833984375e-05, + "model_forward_time": 0.025105953216552734, + "step": 8572 + }, + { + "epoch": 1.3079833984375e-05, + "step": 8572, + "training_step_time": 0.11037945747375488 + }, + { + "epoch": 1.308135986328125e-05, + "model_forward_time": 0.025478124618530273, + "step": 8573 + }, + { + "epoch": 1.308135986328125e-05, + "step": 8573, + "training_step_time": 0.11862349510192871 + }, + { + "epoch": 1.30828857421875e-05, + "model_forward_time": 0.025417804718017578, + "step": 8574 + }, + { + "epoch": 1.30828857421875e-05, + "step": 8574, + "training_step_time": 0.11163544654846191 + }, + { + "epoch": 1.308441162109375e-05, + "model_forward_time": 0.02540898323059082, + "step": 8575 + }, + { + "epoch": 1.308441162109375e-05, + "step": 8575, + "training_step_time": 0.2170274257659912 + }, + { + "epoch": 1.30859375e-05, + "model_forward_time": 0.025041580200195312, + "step": 8576 + }, + { + "epoch": 1.30859375e-05, + "step": 8576, + "training_step_time": 0.11631011962890625 + }, + { + "epoch": 1.308746337890625e-05, + "model_forward_time": 0.024708032608032227, + "step": 8577 + }, + { + "epoch": 1.308746337890625e-05, + "step": 8577, + "training_step_time": 0.10408782958984375 + }, + { + "epoch": 1.30889892578125e-05, + "model_forward_time": 0.025156736373901367, + "step": 8578 + }, + { + "epoch": 1.30889892578125e-05, + "step": 8578, + "training_step_time": 0.10798525810241699 + }, + { + "epoch": 1.309051513671875e-05, + "model_forward_time": 0.02511739730834961, + "step": 8579 + }, + { + "epoch": 1.309051513671875e-05, + "step": 8579, + "training_step_time": 0.1102294921875 + }, + { + "epoch": 1.3092041015625e-05, + "grad_norm": 0.33122915029525757, + "learning_rate": 8.553028032833397e-05, + "loss": 0.0478, + "step": 8580 + }, + { + "epoch": 1.3092041015625e-05, + "model_forward_time": 0.02475738525390625, + "step": 8580 + }, + { + "epoch": 1.3092041015625e-05, + "step": 8580, + "training_step_time": 0.10719585418701172 + }, + { + "epoch": 1.309356689453125e-05, + "model_forward_time": 0.025197982788085938, + "step": 8581 + }, + { + "epoch": 1.309356689453125e-05, + "step": 8581, + "training_step_time": 0.10858583450317383 + }, + { + "epoch": 1.30950927734375e-05, + "model_forward_time": 0.02564406394958496, + "step": 8582 + }, + { + "epoch": 1.30950927734375e-05, + "step": 8582, + "training_step_time": 0.10618829727172852 + }, + { + "epoch": 1.309661865234375e-05, + "model_forward_time": 0.025420665740966797, + "step": 8583 + }, + { + "epoch": 1.309661865234375e-05, + "step": 8583, + "training_step_time": 0.10611605644226074 + }, + { + "epoch": 1.309814453125e-05, + "model_forward_time": 0.025449752807617188, + "step": 8584 + }, + { + "epoch": 1.309814453125e-05, + "step": 8584, + "training_step_time": 0.20740389823913574 + }, + { + "epoch": 1.309967041015625e-05, + "model_forward_time": 0.024262666702270508, + "step": 8585 + }, + { + "epoch": 1.309967041015625e-05, + "step": 8585, + "training_step_time": 0.11228275299072266 + }, + { + "epoch": 1.31011962890625e-05, + "model_forward_time": 0.024262666702270508, + "step": 8586 + }, + { + "epoch": 1.31011962890625e-05, + "step": 8586, + "training_step_time": 0.10965204238891602 + }, + { + "epoch": 1.310272216796875e-05, + "model_forward_time": 0.02527642250061035, + "step": 8587 + }, + { + "epoch": 1.310272216796875e-05, + "step": 8587, + "training_step_time": 0.11970233917236328 + }, + { + "epoch": 1.3104248046875e-05, + "model_forward_time": 0.025374174118041992, + "step": 8588 + }, + { + "epoch": 1.3104248046875e-05, + "step": 8588, + "training_step_time": 0.126939058303833 + }, + { + "epoch": 1.310577392578125e-05, + "model_forward_time": 0.02526569366455078, + "step": 8589 + }, + { + "epoch": 1.310577392578125e-05, + "step": 8589, + "training_step_time": 0.12303614616394043 + }, + { + "epoch": 1.31072998046875e-05, + "grad_norm": 0.2752124071121216, + "learning_rate": 8.549147989153276e-05, + "loss": 0.0354, + "step": 8590 + }, + { + "epoch": 1.31072998046875e-05, + "model_forward_time": 0.025165319442749023, + "step": 8590 + }, + { + "epoch": 1.31072998046875e-05, + "step": 8590, + "training_step_time": 0.1118309497833252 + }, + { + "epoch": 1.310882568359375e-05, + "model_forward_time": 0.025021076202392578, + "step": 8591 + }, + { + "epoch": 1.310882568359375e-05, + "step": 8591, + "training_step_time": 0.1861588954925537 + }, + { + "epoch": 1.31103515625e-05, + "model_forward_time": 0.02464151382446289, + "step": 8592 + }, + { + "epoch": 1.31103515625e-05, + "step": 8592, + "training_step_time": 0.14913630485534668 + }, + { + "epoch": 1.311187744140625e-05, + "model_forward_time": 0.024680376052856445, + "step": 8593 + }, + { + "epoch": 1.311187744140625e-05, + "step": 8593, + "training_step_time": 0.1892564296722412 + }, + { + "epoch": 1.31134033203125e-05, + "model_forward_time": 0.024349212646484375, + "step": 8594 + }, + { + "epoch": 1.31134033203125e-05, + "step": 8594, + "training_step_time": 0.17212247848510742 + }, + { + "epoch": 1.311492919921875e-05, + "model_forward_time": 0.024448156356811523, + "step": 8595 + }, + { + "epoch": 1.311492919921875e-05, + "step": 8595, + "training_step_time": 0.10905838012695312 + }, + { + "epoch": 1.3116455078125e-05, + "model_forward_time": 0.024903297424316406, + "step": 8596 + }, + { + "epoch": 1.3116455078125e-05, + "step": 8596, + "training_step_time": 0.17607736587524414 + }, + { + "epoch": 1.311798095703125e-05, + "model_forward_time": 0.023910999298095703, + "step": 8597 + }, + { + "epoch": 1.311798095703125e-05, + "step": 8597, + "training_step_time": 0.1874864101409912 + }, + { + "epoch": 1.31195068359375e-05, + "model_forward_time": 0.024268388748168945, + "step": 8598 + }, + { + "epoch": 1.31195068359375e-05, + "step": 8598, + "training_step_time": 0.1785261631011963 + }, + { + "epoch": 1.312103271484375e-05, + "model_forward_time": 0.02351832389831543, + "step": 8599 + }, + { + "epoch": 1.312103271484375e-05, + "step": 8599, + "training_step_time": 0.16787362098693848 + }, + { + "epoch": 1.312255859375e-05, + "grad_norm": 0.571994423866272, + "learning_rate": 8.545263632923687e-05, + "loss": 0.0257, + "step": 8600 + }, + { + "epoch": 1.312255859375e-05, + "model_forward_time": 0.024059534072875977, + "step": 8600 + }, + { + "epoch": 1.312255859375e-05, + "step": 8600, + "training_step_time": 0.1477503776550293 + }, + { + "epoch": 1.312408447265625e-05, + "model_forward_time": 0.02431774139404297, + "step": 8601 + }, + { + "epoch": 1.312408447265625e-05, + "step": 8601, + "training_step_time": 0.14449715614318848 + }, + { + "epoch": 1.31256103515625e-05, + "model_forward_time": 0.02480936050415039, + "step": 8602 + }, + { + "epoch": 1.31256103515625e-05, + "step": 8602, + "training_step_time": 0.1284351348876953 + }, + { + "epoch": 1.312713623046875e-05, + "model_forward_time": 0.02422308921813965, + "step": 8603 + }, + { + "epoch": 1.312713623046875e-05, + "step": 8603, + "training_step_time": 0.12949728965759277 + }, + { + "epoch": 1.3128662109375e-05, + "model_forward_time": 0.024661779403686523, + "step": 8604 + }, + { + "epoch": 1.3128662109375e-05, + "step": 8604, + "training_step_time": 0.12489819526672363 + }, + { + "epoch": 1.313018798828125e-05, + "model_forward_time": 0.02506875991821289, + "step": 8605 + }, + { + "epoch": 1.313018798828125e-05, + "step": 8605, + "training_step_time": 0.12144923210144043 + }, + { + "epoch": 1.31317138671875e-05, + "model_forward_time": 0.024789810180664062, + "step": 8606 + }, + { + "epoch": 1.31317138671875e-05, + "step": 8606, + "training_step_time": 0.11706876754760742 + }, + { + "epoch": 1.313323974609375e-05, + "model_forward_time": 0.025183439254760742, + "step": 8607 + }, + { + "epoch": 1.313323974609375e-05, + "step": 8607, + "training_step_time": 0.11273503303527832 + }, + { + "epoch": 1.3134765625e-05, + "model_forward_time": 0.025107145309448242, + "step": 8608 + }, + { + "epoch": 1.3134765625e-05, + "step": 8608, + "training_step_time": 0.11113643646240234 + }, + { + "epoch": 1.313629150390625e-05, + "model_forward_time": 0.025447368621826172, + "step": 8609 + }, + { + "epoch": 1.313629150390625e-05, + "step": 8609, + "training_step_time": 0.10822701454162598 + }, + { + "epoch": 1.31378173828125e-05, + "grad_norm": 0.6709743142127991, + "learning_rate": 8.541374968864487e-05, + "loss": 0.0323, + "step": 8610 + }, + { + "epoch": 1.31378173828125e-05, + "model_forward_time": 0.025504350662231445, + "step": 8610 + }, + { + "epoch": 1.31378173828125e-05, + "step": 8610, + "training_step_time": 0.11101531982421875 + }, + { + "epoch": 1.313934326171875e-05, + "model_forward_time": 0.02473759651184082, + "step": 8611 + }, + { + "epoch": 1.313934326171875e-05, + "step": 8611, + "training_step_time": 0.11205029487609863 + }, + { + "epoch": 1.3140869140625e-05, + "model_forward_time": 0.02497267723083496, + "step": 8612 + }, + { + "epoch": 1.3140869140625e-05, + "step": 8612, + "training_step_time": 0.10936331748962402 + }, + { + "epoch": 1.314239501953125e-05, + "model_forward_time": 0.024730920791625977, + "step": 8613 + }, + { + "epoch": 1.314239501953125e-05, + "step": 8613, + "training_step_time": 0.10367202758789062 + }, + { + "epoch": 1.31439208984375e-05, + "model_forward_time": 0.024790048599243164, + "step": 8614 + }, + { + "epoch": 1.31439208984375e-05, + "step": 8614, + "training_step_time": 0.1121358871459961 + }, + { + "epoch": 1.314544677734375e-05, + "model_forward_time": 0.024821043014526367, + "step": 8615 + }, + { + "epoch": 1.314544677734375e-05, + "step": 8615, + "training_step_time": 0.11941695213317871 + }, + { + "epoch": 1.314697265625e-05, + "model_forward_time": 0.025316953659057617, + "step": 8616 + }, + { + "epoch": 1.314697265625e-05, + "step": 8616, + "training_step_time": 0.13284873962402344 + }, + { + "epoch": 1.314849853515625e-05, + "model_forward_time": 0.024974346160888672, + "step": 8617 + }, + { + "epoch": 1.314849853515625e-05, + "step": 8617, + "training_step_time": 0.18481779098510742 + }, + { + "epoch": 1.31500244140625e-05, + "model_forward_time": 0.024972915649414062, + "step": 8618 + }, + { + "epoch": 1.31500244140625e-05, + "step": 8618, + "training_step_time": 0.13886356353759766 + }, + { + "epoch": 1.315155029296875e-05, + "model_forward_time": 0.025105714797973633, + "step": 8619 + }, + { + "epoch": 1.315155029296875e-05, + "step": 8619, + "training_step_time": 0.12329864501953125 + }, + { + "epoch": 1.3153076171875e-05, + "grad_norm": 0.6751372218132019, + "learning_rate": 8.537482001700769e-05, + "loss": 0.0437, + "step": 8620 + }, + { + "epoch": 1.3153076171875e-05, + "model_forward_time": 0.024887800216674805, + "step": 8620 + }, + { + "epoch": 1.3153076171875e-05, + "step": 8620, + "training_step_time": 0.11719608306884766 + }, + { + "epoch": 1.315460205078125e-05, + "model_forward_time": 0.025247573852539062, + "step": 8621 + }, + { + "epoch": 1.315460205078125e-05, + "step": 8621, + "training_step_time": 0.11867380142211914 + }, + { + "epoch": 1.31561279296875e-05, + "model_forward_time": 0.025392532348632812, + "step": 8622 + }, + { + "epoch": 1.31561279296875e-05, + "step": 8622, + "training_step_time": 0.11298704147338867 + }, + { + "epoch": 1.315765380859375e-05, + "model_forward_time": 0.025484085083007812, + "step": 8623 + }, + { + "epoch": 1.315765380859375e-05, + "step": 8623, + "training_step_time": 0.11469030380249023 + }, + { + "epoch": 1.31591796875e-05, + "model_forward_time": 0.025179147720336914, + "step": 8624 + }, + { + "epoch": 1.31591796875e-05, + "step": 8624, + "training_step_time": 0.1128082275390625 + }, + { + "epoch": 1.316070556640625e-05, + "model_forward_time": 0.02524280548095703, + "step": 8625 + }, + { + "epoch": 1.316070556640625e-05, + "step": 8625, + "training_step_time": 0.10920953750610352 + }, + { + "epoch": 1.31622314453125e-05, + "model_forward_time": 0.02540898323059082, + "step": 8626 + }, + { + "epoch": 1.31622314453125e-05, + "step": 8626, + "training_step_time": 0.11580705642700195 + }, + { + "epoch": 1.316375732421875e-05, + "model_forward_time": 0.025336027145385742, + "step": 8627 + }, + { + "epoch": 1.316375732421875e-05, + "step": 8627, + "training_step_time": 0.14510464668273926 + }, + { + "epoch": 1.3165283203125e-05, + "model_forward_time": 0.024894237518310547, + "step": 8628 + }, + { + "epoch": 1.3165283203125e-05, + "step": 8628, + "training_step_time": 0.11376333236694336 + }, + { + "epoch": 1.316680908203125e-05, + "model_forward_time": 0.02507162094116211, + "step": 8629 + }, + { + "epoch": 1.316680908203125e-05, + "step": 8629, + "training_step_time": 0.11668038368225098 + }, + { + "epoch": 1.31683349609375e-05, + "grad_norm": 0.5390626788139343, + "learning_rate": 8.533584736162857e-05, + "loss": 0.0297, + "step": 8630 + }, + { + "epoch": 1.31683349609375e-05, + "model_forward_time": 0.025188207626342773, + "step": 8630 + }, + { + "epoch": 1.31683349609375e-05, + "step": 8630, + "training_step_time": 0.11632895469665527 + }, + { + "epoch": 1.316986083984375e-05, + "model_forward_time": 0.025362491607666016, + "step": 8631 + }, + { + "epoch": 1.316986083984375e-05, + "step": 8631, + "training_step_time": 0.12700271606445312 + }, + { + "epoch": 1.317138671875e-05, + "model_forward_time": 0.025690793991088867, + "step": 8632 + }, + { + "epoch": 1.317138671875e-05, + "step": 8632, + "training_step_time": 0.14475631713867188 + }, + { + "epoch": 1.317291259765625e-05, + "model_forward_time": 0.025162458419799805, + "step": 8633 + }, + { + "epoch": 1.317291259765625e-05, + "step": 8633, + "training_step_time": 0.18711352348327637 + }, + { + "epoch": 1.31744384765625e-05, + "model_forward_time": 0.02428150177001953, + "step": 8634 + }, + { + "epoch": 1.31744384765625e-05, + "step": 8634, + "training_step_time": 0.19015979766845703 + }, + { + "epoch": 1.317596435546875e-05, + "model_forward_time": 0.02457880973815918, + "step": 8635 + }, + { + "epoch": 1.317596435546875e-05, + "step": 8635, + "training_step_time": 0.17269039154052734 + }, + { + "epoch": 1.3177490234375e-05, + "model_forward_time": 0.024191856384277344, + "step": 8636 + }, + { + "epoch": 1.3177490234375e-05, + "step": 8636, + "training_step_time": 0.15909171104431152 + }, + { + "epoch": 1.317901611328125e-05, + "model_forward_time": 0.024516820907592773, + "step": 8637 + }, + { + "epoch": 1.317901611328125e-05, + "step": 8637, + "training_step_time": 0.10775113105773926 + }, + { + "epoch": 1.31805419921875e-05, + "model_forward_time": 0.02461385726928711, + "step": 8638 + }, + { + "epoch": 1.31805419921875e-05, + "step": 8638, + "training_step_time": 0.10728979110717773 + }, + { + "epoch": 1.318206787109375e-05, + "model_forward_time": 0.024155378341674805, + "step": 8639 + }, + { + "epoch": 1.318206787109375e-05, + "step": 8639, + "training_step_time": 0.1101679801940918 + }, + { + "epoch": 1.318359375e-05, + "grad_norm": 0.37394291162490845, + "learning_rate": 8.529683176986295e-05, + "loss": 0.0397, + "step": 8640 + }, + { + "epoch": 1.318359375e-05, + "model_forward_time": 0.0252838134765625, + "step": 8640 + }, + { + "epoch": 1.318359375e-05, + "step": 8640, + "training_step_time": 0.10839533805847168 + }, + { + "epoch": 1.318511962890625e-05, + "model_forward_time": 0.025204181671142578, + "step": 8641 + }, + { + "epoch": 1.318511962890625e-05, + "step": 8641, + "training_step_time": 0.10919642448425293 + }, + { + "epoch": 1.31866455078125e-05, + "model_forward_time": 0.025541305541992188, + "step": 8642 + }, + { + "epoch": 1.31866455078125e-05, + "step": 8642, + "training_step_time": 0.10925555229187012 + }, + { + "epoch": 1.318817138671875e-05, + "model_forward_time": 0.024201393127441406, + "step": 8643 + }, + { + "epoch": 1.318817138671875e-05, + "step": 8643, + "training_step_time": 0.10751581192016602 + }, + { + "epoch": 1.3189697265625e-05, + "model_forward_time": 0.025599002838134766, + "step": 8644 + }, + { + "epoch": 1.3189697265625e-05, + "step": 8644, + "training_step_time": 0.1433415412902832 + }, + { + "epoch": 1.319122314453125e-05, + "model_forward_time": 0.024764060974121094, + "step": 8645 + }, + { + "epoch": 1.319122314453125e-05, + "step": 8645, + "training_step_time": 0.16957402229309082 + }, + { + "epoch": 1.31927490234375e-05, + "model_forward_time": 0.025166988372802734, + "step": 8646 + }, + { + "epoch": 1.31927490234375e-05, + "step": 8646, + "training_step_time": 0.15799665451049805 + }, + { + "epoch": 1.319427490234375e-05, + "model_forward_time": 0.024077177047729492, + "step": 8647 + }, + { + "epoch": 1.319427490234375e-05, + "step": 8647, + "training_step_time": 0.14330744743347168 + }, + { + "epoch": 1.319580078125e-05, + "model_forward_time": 0.0247042179107666, + "step": 8648 + }, + { + "epoch": 1.319580078125e-05, + "step": 8648, + "training_step_time": 0.1289200782775879 + }, + { + "epoch": 1.319732666015625e-05, + "model_forward_time": 0.02465033531188965, + "step": 8649 + }, + { + "epoch": 1.319732666015625e-05, + "step": 8649, + "training_step_time": 0.12702298164367676 + }, + { + "epoch": 1.31988525390625e-05, + "grad_norm": 0.40195232629776, + "learning_rate": 8.525777328911846e-05, + "loss": 0.0419, + "step": 8650 + }, + { + "epoch": 1.31988525390625e-05, + "model_forward_time": 0.025113821029663086, + "step": 8650 + }, + { + "epoch": 1.31988525390625e-05, + "step": 8650, + "training_step_time": 0.12198662757873535 + }, + { + "epoch": 1.320037841796875e-05, + "model_forward_time": 0.025211572647094727, + "step": 8651 + }, + { + "epoch": 1.320037841796875e-05, + "step": 8651, + "training_step_time": 0.12036871910095215 + }, + { + "epoch": 1.3201904296875e-05, + "model_forward_time": 0.02510857582092285, + "step": 8652 + }, + { + "epoch": 1.3201904296875e-05, + "step": 8652, + "training_step_time": 0.11340117454528809 + }, + { + "epoch": 1.320343017578125e-05, + "model_forward_time": 0.025313377380371094, + "step": 8653 + }, + { + "epoch": 1.320343017578125e-05, + "step": 8653, + "training_step_time": 0.11277365684509277 + }, + { + "epoch": 1.32049560546875e-05, + "model_forward_time": 0.02543807029724121, + "step": 8654 + }, + { + "epoch": 1.32049560546875e-05, + "step": 8654, + "training_step_time": 0.11357522010803223 + }, + { + "epoch": 1.320648193359375e-05, + "model_forward_time": 0.02534794807434082, + "step": 8655 + }, + { + "epoch": 1.320648193359375e-05, + "step": 8655, + "training_step_time": 0.1105504035949707 + }, + { + "epoch": 1.32080078125e-05, + "model_forward_time": 0.0254976749420166, + "step": 8656 + }, + { + "epoch": 1.32080078125e-05, + "step": 8656, + "training_step_time": 0.10862159729003906 + }, + { + "epoch": 1.320953369140625e-05, + "model_forward_time": 0.025475263595581055, + "step": 8657 + }, + { + "epoch": 1.320953369140625e-05, + "step": 8657, + "training_step_time": 0.10529112815856934 + }, + { + "epoch": 1.32110595703125e-05, + "model_forward_time": 0.024819612503051758, + "step": 8658 + }, + { + "epoch": 1.32110595703125e-05, + "step": 8658, + "training_step_time": 0.10994410514831543 + }, + { + "epoch": 1.321258544921875e-05, + "model_forward_time": 0.02486133575439453, + "step": 8659 + }, + { + "epoch": 1.321258544921875e-05, + "step": 8659, + "training_step_time": 0.11730790138244629 + }, + { + "epoch": 1.3214111328125e-05, + "grad_norm": 0.42142099142074585, + "learning_rate": 8.521867196685482e-05, + "loss": 0.0327, + "step": 8660 + }, + { + "epoch": 1.3214111328125e-05, + "model_forward_time": 0.025101184844970703, + "step": 8660 + }, + { + "epoch": 1.3214111328125e-05, + "step": 8660, + "training_step_time": 0.11071372032165527 + }, + { + "epoch": 1.321563720703125e-05, + "model_forward_time": 0.025377750396728516, + "step": 8661 + }, + { + "epoch": 1.321563720703125e-05, + "step": 8661, + "training_step_time": 0.10681867599487305 + }, + { + "epoch": 1.32171630859375e-05, + "model_forward_time": 0.025609731674194336, + "step": 8662 + }, + { + "epoch": 1.32171630859375e-05, + "step": 8662, + "training_step_time": 0.17490720748901367 + }, + { + "epoch": 1.321868896484375e-05, + "model_forward_time": 0.024233341217041016, + "step": 8663 + }, + { + "epoch": 1.321868896484375e-05, + "step": 8663, + "training_step_time": 0.16441917419433594 + }, + { + "epoch": 1.322021484375e-05, + "model_forward_time": 0.024694442749023438, + "step": 8664 + }, + { + "epoch": 1.322021484375e-05, + "step": 8664, + "training_step_time": 0.10715794563293457 + }, + { + "epoch": 1.322174072265625e-05, + "model_forward_time": 0.02450084686279297, + "step": 8665 + }, + { + "epoch": 1.322174072265625e-05, + "step": 8665, + "training_step_time": 0.10627007484436035 + }, + { + "epoch": 1.32232666015625e-05, + "model_forward_time": 0.025232553482055664, + "step": 8666 + }, + { + "epoch": 1.32232666015625e-05, + "step": 8666, + "training_step_time": 0.11317014694213867 + }, + { + "epoch": 1.322479248046875e-05, + "model_forward_time": 0.025261640548706055, + "step": 8667 + }, + { + "epoch": 1.322479248046875e-05, + "step": 8667, + "training_step_time": 0.10953426361083984 + }, + { + "epoch": 1.3226318359375e-05, + "model_forward_time": 0.025219202041625977, + "step": 8668 + }, + { + "epoch": 1.3226318359375e-05, + "step": 8668, + "training_step_time": 0.11009407043457031 + }, + { + "epoch": 1.322784423828125e-05, + "model_forward_time": 0.02515721321105957, + "step": 8669 + }, + { + "epoch": 1.322784423828125e-05, + "step": 8669, + "training_step_time": 0.10850119590759277 + }, + { + "epoch": 1.32293701171875e-05, + "grad_norm": 0.41516929864883423, + "learning_rate": 8.517952785058385e-05, + "loss": 0.0286, + "step": 8670 + }, + { + "epoch": 1.32293701171875e-05, + "model_forward_time": 0.02478957176208496, + "step": 8670 + }, + { + "epoch": 1.32293701171875e-05, + "step": 8670, + "training_step_time": 0.10839557647705078 + }, + { + "epoch": 1.323089599609375e-05, + "model_forward_time": 0.02549576759338379, + "step": 8671 + }, + { + "epoch": 1.323089599609375e-05, + "step": 8671, + "training_step_time": 0.11282062530517578 + }, + { + "epoch": 1.3232421875e-05, + "model_forward_time": 0.02573084831237793, + "step": 8672 + }, + { + "epoch": 1.3232421875e-05, + "step": 8672, + "training_step_time": 0.15232276916503906 + }, + { + "epoch": 1.323394775390625e-05, + "model_forward_time": 0.025122880935668945, + "step": 8673 + }, + { + "epoch": 1.323394775390625e-05, + "step": 8673, + "training_step_time": 0.11089658737182617 + }, + { + "epoch": 1.32354736328125e-05, + "model_forward_time": 0.025050878524780273, + "step": 8674 + }, + { + "epoch": 1.32354736328125e-05, + "step": 8674, + "training_step_time": 0.11160063743591309 + }, + { + "epoch": 1.323699951171875e-05, + "model_forward_time": 0.02529430389404297, + "step": 8675 + }, + { + "epoch": 1.323699951171875e-05, + "step": 8675, + "training_step_time": 0.11767864227294922 + }, + { + "epoch": 1.3238525390625e-05, + "model_forward_time": 0.025238513946533203, + "step": 8676 + }, + { + "epoch": 1.3238525390625e-05, + "step": 8676, + "training_step_time": 0.15393757820129395 + }, + { + "epoch": 1.324005126953125e-05, + "model_forward_time": 0.025004148483276367, + "step": 8677 + }, + { + "epoch": 1.324005126953125e-05, + "step": 8677, + "training_step_time": 0.21161937713623047 + }, + { + "epoch": 1.32415771484375e-05, + "model_forward_time": 0.024626731872558594, + "step": 8678 + }, + { + "epoch": 1.32415771484375e-05, + "step": 8678, + "training_step_time": 0.14476418495178223 + }, + { + "epoch": 1.324310302734375e-05, + "model_forward_time": 0.024337291717529297, + "step": 8679 + }, + { + "epoch": 1.324310302734375e-05, + "step": 8679, + "training_step_time": 0.15688753128051758 + }, + { + "epoch": 1.324462890625e-05, + "grad_norm": 0.3715316653251648, + "learning_rate": 8.514034098786933e-05, + "loss": 0.0215, + "step": 8680 + }, + { + "epoch": 1.324462890625e-05, + "model_forward_time": 0.02440333366394043, + "step": 8680 + }, + { + "epoch": 1.324462890625e-05, + "step": 8680, + "training_step_time": 0.21669411659240723 + }, + { + "epoch": 1.324615478515625e-05, + "model_forward_time": 0.024186134338378906, + "step": 8681 + }, + { + "epoch": 1.324615478515625e-05, + "step": 8681, + "training_step_time": 0.16753268241882324 + }, + { + "epoch": 1.32476806640625e-05, + "model_forward_time": 0.02419304847717285, + "step": 8682 + }, + { + "epoch": 1.32476806640625e-05, + "step": 8682, + "training_step_time": 0.10979366302490234 + }, + { + "epoch": 1.324920654296875e-05, + "model_forward_time": 0.024457931518554688, + "step": 8683 + }, + { + "epoch": 1.324920654296875e-05, + "step": 8683, + "training_step_time": 0.10829734802246094 + }, + { + "epoch": 1.3250732421875e-05, + "model_forward_time": 0.02453303337097168, + "step": 8684 + }, + { + "epoch": 1.3250732421875e-05, + "step": 8684, + "training_step_time": 0.10763359069824219 + }, + { + "epoch": 1.325225830078125e-05, + "model_forward_time": 0.026613712310791016, + "step": 8685 + }, + { + "epoch": 1.325225830078125e-05, + "step": 8685, + "training_step_time": 0.11093807220458984 + }, + { + "epoch": 1.32537841796875e-05, + "model_forward_time": 0.02528095245361328, + "step": 8686 + }, + { + "epoch": 1.32537841796875e-05, + "step": 8686, + "training_step_time": 0.11257052421569824 + }, + { + "epoch": 1.325531005859375e-05, + "model_forward_time": 0.025196552276611328, + "step": 8687 + }, + { + "epoch": 1.325531005859375e-05, + "step": 8687, + "training_step_time": 0.11330842971801758 + }, + { + "epoch": 1.32568359375e-05, + "model_forward_time": 0.026841402053833008, + "step": 8688 + }, + { + "epoch": 1.32568359375e-05, + "step": 8688, + "training_step_time": 0.11173033714294434 + }, + { + "epoch": 1.325836181640625e-05, + "model_forward_time": 0.025418996810913086, + "step": 8689 + }, + { + "epoch": 1.325836181640625e-05, + "step": 8689, + "training_step_time": 0.11455345153808594 + }, + { + "epoch": 1.32598876953125e-05, + "grad_norm": 0.34135934710502625, + "learning_rate": 8.510111142632698e-05, + "loss": 0.0306, + "step": 8690 + }, + { + "epoch": 1.32598876953125e-05, + "model_forward_time": 0.025343894958496094, + "step": 8690 + }, + { + "epoch": 1.32598876953125e-05, + "step": 8690, + "training_step_time": 0.11251282691955566 + }, + { + "epoch": 1.326141357421875e-05, + "model_forward_time": 0.025675058364868164, + "step": 8691 + }, + { + "epoch": 1.326141357421875e-05, + "step": 8691, + "training_step_time": 0.11121821403503418 + }, + { + "epoch": 1.3262939453125e-05, + "model_forward_time": 0.02627873420715332, + "step": 8692 + }, + { + "epoch": 1.3262939453125e-05, + "step": 8692, + "training_step_time": 0.10932326316833496 + }, + { + "epoch": 1.326446533203125e-05, + "model_forward_time": 0.025389671325683594, + "step": 8693 + }, + { + "epoch": 1.326446533203125e-05, + "step": 8693, + "training_step_time": 0.10909795761108398 + }, + { + "epoch": 1.32659912109375e-05, + "model_forward_time": 0.025283336639404297, + "step": 8694 + }, + { + "epoch": 1.32659912109375e-05, + "step": 8694, + "training_step_time": 0.1109457015991211 + }, + { + "epoch": 1.326751708984375e-05, + "model_forward_time": 0.025147676467895508, + "step": 8695 + }, + { + "epoch": 1.326751708984375e-05, + "step": 8695, + "training_step_time": 0.11028432846069336 + }, + { + "epoch": 1.326904296875e-05, + "model_forward_time": 0.025182247161865234, + "step": 8696 + }, + { + "epoch": 1.326904296875e-05, + "step": 8696, + "training_step_time": 0.11253476142883301 + }, + { + "epoch": 1.327056884765625e-05, + "model_forward_time": 0.0254361629486084, + "step": 8697 + }, + { + "epoch": 1.327056884765625e-05, + "step": 8697, + "training_step_time": 0.11026787757873535 + }, + { + "epoch": 1.32720947265625e-05, + "model_forward_time": 0.02513408660888672, + "step": 8698 + }, + { + "epoch": 1.32720947265625e-05, + "step": 8698, + "training_step_time": 0.10847854614257812 + }, + { + "epoch": 1.327362060546875e-05, + "model_forward_time": 0.025403976440429688, + "step": 8699 + }, + { + "epoch": 1.327362060546875e-05, + "step": 8699, + "training_step_time": 0.11688709259033203 + }, + { + "epoch": 1.3275146484375e-05, + "grad_norm": 0.37964928150177, + "learning_rate": 8.506183921362443e-05, + "loss": 0.0327, + "step": 8700 + }, + { + "epoch": 1.3275146484375e-05, + "model_forward_time": 0.024176597595214844, + "step": 8700 + }, + { + "epoch": 1.3275146484375e-05, + "step": 8700, + "training_step_time": 0.1116485595703125 + }, + { + "epoch": 1.327667236328125e-05, + "model_forward_time": 0.0249631404876709, + "step": 8701 + }, + { + "epoch": 1.327667236328125e-05, + "step": 8701, + "training_step_time": 0.11012601852416992 + }, + { + "epoch": 1.32781982421875e-05, + "model_forward_time": 0.025253772735595703, + "step": 8702 + }, + { + "epoch": 1.32781982421875e-05, + "step": 8702, + "training_step_time": 0.11392855644226074 + }, + { + "epoch": 1.327972412109375e-05, + "model_forward_time": 0.02556443214416504, + "step": 8703 + }, + { + "epoch": 1.327972412109375e-05, + "step": 8703, + "training_step_time": 0.10684514045715332 + }, + { + "epoch": 1.328125e-05, + "model_forward_time": 0.02507495880126953, + "step": 8704 + }, + { + "epoch": 1.328125e-05, + "step": 8704, + "training_step_time": 0.1119847297668457 + }, + { + "epoch": 1.328277587890625e-05, + "model_forward_time": 0.025295019149780273, + "step": 8705 + }, + { + "epoch": 1.328277587890625e-05, + "step": 8705, + "training_step_time": 0.10932540893554688 + }, + { + "epoch": 1.32843017578125e-05, + "model_forward_time": 0.025536060333251953, + "step": 8706 + }, + { + "epoch": 1.32843017578125e-05, + "step": 8706, + "training_step_time": 0.11809372901916504 + }, + { + "epoch": 1.328582763671875e-05, + "model_forward_time": 0.02560710906982422, + "step": 8707 + }, + { + "epoch": 1.328582763671875e-05, + "step": 8707, + "training_step_time": 0.11478495597839355 + }, + { + "epoch": 1.3287353515625e-05, + "model_forward_time": 0.025247812271118164, + "step": 8708 + }, + { + "epoch": 1.3287353515625e-05, + "step": 8708, + "training_step_time": 0.2127077579498291 + }, + { + "epoch": 1.328887939453125e-05, + "model_forward_time": 0.024817466735839844, + "step": 8709 + }, + { + "epoch": 1.328887939453125e-05, + "step": 8709, + "training_step_time": 0.11666011810302734 + }, + { + "epoch": 1.32904052734375e-05, + "grad_norm": 0.3208135664463043, + "learning_rate": 8.502252439748113e-05, + "loss": 0.0414, + "step": 8710 + }, + { + "epoch": 1.32904052734375e-05, + "model_forward_time": 0.024654626846313477, + "step": 8710 + }, + { + "epoch": 1.32904052734375e-05, + "step": 8710, + "training_step_time": 0.10544872283935547 + }, + { + "epoch": 1.329193115234375e-05, + "model_forward_time": 0.025427579879760742, + "step": 8711 + }, + { + "epoch": 1.329193115234375e-05, + "step": 8711, + "training_step_time": 0.10595250129699707 + }, + { + "epoch": 1.329345703125e-05, + "model_forward_time": 0.024982690811157227, + "step": 8712 + }, + { + "epoch": 1.329345703125e-05, + "step": 8712, + "training_step_time": 0.10770392417907715 + }, + { + "epoch": 1.329498291015625e-05, + "model_forward_time": 0.02541828155517578, + "step": 8713 + }, + { + "epoch": 1.329498291015625e-05, + "step": 8713, + "training_step_time": 0.11406874656677246 + }, + { + "epoch": 1.32965087890625e-05, + "model_forward_time": 0.024962902069091797, + "step": 8714 + }, + { + "epoch": 1.32965087890625e-05, + "step": 8714, + "training_step_time": 0.10871672630310059 + }, + { + "epoch": 1.329803466796875e-05, + "model_forward_time": 0.02542710304260254, + "step": 8715 + }, + { + "epoch": 1.329803466796875e-05, + "step": 8715, + "training_step_time": 0.10850667953491211 + }, + { + "epoch": 1.3299560546875e-05, + "model_forward_time": 0.025145769119262695, + "step": 8716 + }, + { + "epoch": 1.3299560546875e-05, + "step": 8716, + "training_step_time": 0.1073160171508789 + }, + { + "epoch": 1.330108642578125e-05, + "model_forward_time": 0.02538895606994629, + "step": 8717 + }, + { + "epoch": 1.330108642578125e-05, + "step": 8717, + "training_step_time": 0.10846948623657227 + }, + { + "epoch": 1.33026123046875e-05, + "model_forward_time": 0.02521681785583496, + "step": 8718 + }, + { + "epoch": 1.33026123046875e-05, + "step": 8718, + "training_step_time": 0.15985345840454102 + }, + { + "epoch": 1.330413818359375e-05, + "model_forward_time": 0.02490091323852539, + "step": 8719 + }, + { + "epoch": 1.330413818359375e-05, + "step": 8719, + "training_step_time": 0.10992693901062012 + }, + { + "epoch": 1.33056640625e-05, + "grad_norm": 0.6753371953964233, + "learning_rate": 8.498316702566828e-05, + "loss": 0.0525, + "step": 8720 + }, + { + "epoch": 1.33056640625e-05, + "model_forward_time": 0.023951292037963867, + "step": 8720 + }, + { + "epoch": 1.33056640625e-05, + "step": 8720, + "training_step_time": 0.11025524139404297 + }, + { + "epoch": 1.330718994140625e-05, + "model_forward_time": 0.025206565856933594, + "step": 8721 + }, + { + "epoch": 1.330718994140625e-05, + "step": 8721, + "training_step_time": 0.11939597129821777 + }, + { + "epoch": 1.33087158203125e-05, + "model_forward_time": 0.02513599395751953, + "step": 8722 + }, + { + "epoch": 1.33087158203125e-05, + "step": 8722, + "training_step_time": 0.20740532875061035 + }, + { + "epoch": 1.331024169921875e-05, + "model_forward_time": 0.024218320846557617, + "step": 8723 + }, + { + "epoch": 1.331024169921875e-05, + "step": 8723, + "training_step_time": 0.15556645393371582 + }, + { + "epoch": 1.3311767578125e-05, + "model_forward_time": 0.024419069290161133, + "step": 8724 + }, + { + "epoch": 1.3311767578125e-05, + "step": 8724, + "training_step_time": 0.19742417335510254 + }, + { + "epoch": 1.331329345703125e-05, + "model_forward_time": 0.024473190307617188, + "step": 8725 + }, + { + "epoch": 1.331329345703125e-05, + "step": 8725, + "training_step_time": 0.14482855796813965 + }, + { + "epoch": 1.33148193359375e-05, + "model_forward_time": 0.02490520477294922, + "step": 8726 + }, + { + "epoch": 1.33148193359375e-05, + "step": 8726, + "training_step_time": 0.1974935531616211 + }, + { + "epoch": 1.331634521484375e-05, + "model_forward_time": 0.02417445182800293, + "step": 8727 + }, + { + "epoch": 1.331634521484375e-05, + "step": 8727, + "training_step_time": 0.11752486228942871 + }, + { + "epoch": 1.331787109375e-05, + "model_forward_time": 0.024597644805908203, + "step": 8728 + }, + { + "epoch": 1.331787109375e-05, + "step": 8728, + "training_step_time": 0.11045217514038086 + }, + { + "epoch": 1.331939697265625e-05, + "model_forward_time": 0.025583982467651367, + "step": 8729 + }, + { + "epoch": 1.331939697265625e-05, + "step": 8729, + "training_step_time": 0.12935280799865723 + }, + { + "epoch": 1.33209228515625e-05, + "grad_norm": 0.5615624785423279, + "learning_rate": 8.494376714600878e-05, + "loss": 0.0281, + "step": 8730 + }, + { + "epoch": 1.33209228515625e-05, + "model_forward_time": 0.025447607040405273, + "step": 8730 + }, + { + "epoch": 1.33209228515625e-05, + "step": 8730, + "training_step_time": 0.10720276832580566 + }, + { + "epoch": 1.332244873046875e-05, + "model_forward_time": 0.02550816535949707, + "step": 8731 + }, + { + "epoch": 1.332244873046875e-05, + "step": 8731, + "training_step_time": 0.10784912109375 + }, + { + "epoch": 1.3323974609375e-05, + "model_forward_time": 0.02492690086364746, + "step": 8732 + }, + { + "epoch": 1.3323974609375e-05, + "step": 8732, + "training_step_time": 0.10740041732788086 + }, + { + "epoch": 1.332550048828125e-05, + "model_forward_time": 0.025301694869995117, + "step": 8733 + }, + { + "epoch": 1.332550048828125e-05, + "step": 8733, + "training_step_time": 0.10611891746520996 + }, + { + "epoch": 1.33270263671875e-05, + "model_forward_time": 0.025489330291748047, + "step": 8734 + }, + { + "epoch": 1.33270263671875e-05, + "step": 8734, + "training_step_time": 0.10845470428466797 + }, + { + "epoch": 1.332855224609375e-05, + "model_forward_time": 0.02501225471496582, + "step": 8735 + }, + { + "epoch": 1.332855224609375e-05, + "step": 8735, + "training_step_time": 0.10717320442199707 + }, + { + "epoch": 1.3330078125e-05, + "model_forward_time": 0.025678157806396484, + "step": 8736 + }, + { + "epoch": 1.3330078125e-05, + "step": 8736, + "training_step_time": 0.11244511604309082 + }, + { + "epoch": 1.333160400390625e-05, + "model_forward_time": 0.02494645118713379, + "step": 8737 + }, + { + "epoch": 1.333160400390625e-05, + "step": 8737, + "training_step_time": 0.10598158836364746 + }, + { + "epoch": 1.33331298828125e-05, + "model_forward_time": 0.024924039840698242, + "step": 8738 + }, + { + "epoch": 1.33331298828125e-05, + "step": 8738, + "training_step_time": 0.10685133934020996 + }, + { + "epoch": 1.333465576171875e-05, + "model_forward_time": 0.025328874588012695, + "step": 8739 + }, + { + "epoch": 1.333465576171875e-05, + "step": 8739, + "training_step_time": 0.10676169395446777 + }, + { + "epoch": 1.3336181640625e-05, + "grad_norm": 0.405076801776886, + "learning_rate": 8.490432480637723e-05, + "loss": 0.0296, + "step": 8740 + }, + { + "epoch": 1.3336181640625e-05, + "model_forward_time": 0.025174617767333984, + "step": 8740 + }, + { + "epoch": 1.3336181640625e-05, + "step": 8740, + "training_step_time": 0.10813426971435547 + }, + { + "epoch": 1.333770751953125e-05, + "model_forward_time": 0.02494978904724121, + "step": 8741 + }, + { + "epoch": 1.333770751953125e-05, + "step": 8741, + "training_step_time": 0.10765790939331055 + }, + { + "epoch": 1.33392333984375e-05, + "model_forward_time": 0.024982452392578125, + "step": 8742 + }, + { + "epoch": 1.33392333984375e-05, + "step": 8742, + "training_step_time": 0.10985565185546875 + }, + { + "epoch": 1.334075927734375e-05, + "model_forward_time": 0.024590015411376953, + "step": 8743 + }, + { + "epoch": 1.334075927734375e-05, + "step": 8743, + "training_step_time": 0.10851693153381348 + }, + { + "epoch": 1.334228515625e-05, + "model_forward_time": 0.02555394172668457, + "step": 8744 + }, + { + "epoch": 1.334228515625e-05, + "step": 8744, + "training_step_time": 0.11751031875610352 + }, + { + "epoch": 1.334381103515625e-05, + "model_forward_time": 0.02538919448852539, + "step": 8745 + }, + { + "epoch": 1.334381103515625e-05, + "step": 8745, + "training_step_time": 0.10754060745239258 + }, + { + "epoch": 1.33453369140625e-05, + "model_forward_time": 0.025147199630737305, + "step": 8746 + }, + { + "epoch": 1.33453369140625e-05, + "step": 8746, + "training_step_time": 0.10901141166687012 + }, + { + "epoch": 1.334686279296875e-05, + "model_forward_time": 0.025341033935546875, + "step": 8747 + }, + { + "epoch": 1.334686279296875e-05, + "step": 8747, + "training_step_time": 0.10875320434570312 + }, + { + "epoch": 1.3348388671875e-05, + "model_forward_time": 0.02519392967224121, + "step": 8748 + }, + { + "epoch": 1.3348388671875e-05, + "step": 8748, + "training_step_time": 0.10841608047485352 + }, + { + "epoch": 1.334991455078125e-05, + "model_forward_time": 0.025355815887451172, + "step": 8749 + }, + { + "epoch": 1.334991455078125e-05, + "step": 8749, + "training_step_time": 0.10985255241394043 + }, + { + "epoch": 1.33514404296875e-05, + "grad_norm": 0.5718039274215698, + "learning_rate": 8.486484005469977e-05, + "loss": 0.0406, + "step": 8750 + }, + { + "epoch": 1.33514404296875e-05, + "model_forward_time": 0.02520751953125, + "step": 8750 + }, + { + "epoch": 1.33514404296875e-05, + "step": 8750, + "training_step_time": 0.11177849769592285 + }, + { + "epoch": 1.335296630859375e-05, + "model_forward_time": 0.025536537170410156, + "step": 8751 + }, + { + "epoch": 1.335296630859375e-05, + "step": 8751, + "training_step_time": 0.10800457000732422 + }, + { + "epoch": 1.33544921875e-05, + "model_forward_time": 0.02536749839782715, + "step": 8752 + }, + { + "epoch": 1.33544921875e-05, + "step": 8752, + "training_step_time": 0.11539387702941895 + }, + { + "epoch": 1.335601806640625e-05, + "model_forward_time": 0.025501728057861328, + "step": 8753 + }, + { + "epoch": 1.335601806640625e-05, + "step": 8753, + "training_step_time": 0.10890793800354004 + }, + { + "epoch": 1.33575439453125e-05, + "model_forward_time": 0.025465726852416992, + "step": 8754 + }, + { + "epoch": 1.33575439453125e-05, + "step": 8754, + "training_step_time": 0.11148905754089355 + }, + { + "epoch": 1.335906982421875e-05, + "model_forward_time": 0.02530384063720703, + "step": 8755 + }, + { + "epoch": 1.335906982421875e-05, + "step": 8755, + "training_step_time": 0.17321157455444336 + }, + { + "epoch": 1.3360595703125e-05, + "model_forward_time": 0.02442145347595215, + "step": 8756 + }, + { + "epoch": 1.3360595703125e-05, + "step": 8756, + "training_step_time": 0.1709728240966797 + }, + { + "epoch": 1.336212158203125e-05, + "model_forward_time": 0.02507185935974121, + "step": 8757 + }, + { + "epoch": 1.336212158203125e-05, + "step": 8757, + "training_step_time": 0.10523271560668945 + }, + { + "epoch": 1.33636474609375e-05, + "model_forward_time": 0.024764060974121094, + "step": 8758 + }, + { + "epoch": 1.33636474609375e-05, + "step": 8758, + "training_step_time": 0.10849165916442871 + }, + { + "epoch": 1.336517333984375e-05, + "model_forward_time": 0.0257565975189209, + "step": 8759 + }, + { + "epoch": 1.336517333984375e-05, + "step": 8759, + "training_step_time": 0.10713338851928711 + }, + { + "epoch": 1.336669921875e-05, + "grad_norm": 0.5659478306770325, + "learning_rate": 8.482531293895412e-05, + "loss": 0.0322, + "step": 8760 + }, + { + "epoch": 1.336669921875e-05, + "model_forward_time": 0.026951074600219727, + "step": 8760 + }, + { + "epoch": 1.336669921875e-05, + "step": 8760, + "training_step_time": 0.11285281181335449 + }, + { + "epoch": 1.336822509765625e-05, + "model_forward_time": 0.025627613067626953, + "step": 8761 + }, + { + "epoch": 1.336822509765625e-05, + "step": 8761, + "training_step_time": 0.11042428016662598 + }, + { + "epoch": 1.33697509765625e-05, + "model_forward_time": 0.02534198760986328, + "step": 8762 + }, + { + "epoch": 1.33697509765625e-05, + "step": 8762, + "training_step_time": 0.10664033889770508 + }, + { + "epoch": 1.337127685546875e-05, + "model_forward_time": 0.025066137313842773, + "step": 8763 + }, + { + "epoch": 1.337127685546875e-05, + "step": 8763, + "training_step_time": 0.10854625701904297 + }, + { + "epoch": 1.3372802734375e-05, + "model_forward_time": 0.02540898323059082, + "step": 8764 + }, + { + "epoch": 1.3372802734375e-05, + "step": 8764, + "training_step_time": 0.10821223258972168 + }, + { + "epoch": 1.337432861328125e-05, + "model_forward_time": 0.0286865234375, + "step": 8765 + }, + { + "epoch": 1.337432861328125e-05, + "step": 8765, + "training_step_time": 0.11848688125610352 + }, + { + "epoch": 1.33758544921875e-05, + "model_forward_time": 0.024961471557617188, + "step": 8766 + }, + { + "epoch": 1.33758544921875e-05, + "step": 8766, + "training_step_time": 0.11111664772033691 + }, + { + "epoch": 1.337738037109375e-05, + "model_forward_time": 0.024529695510864258, + "step": 8767 + }, + { + "epoch": 1.337738037109375e-05, + "step": 8767, + "training_step_time": 0.10783553123474121 + }, + { + "epoch": 1.337890625e-05, + "model_forward_time": 0.02606511116027832, + "step": 8768 + }, + { + "epoch": 1.337890625e-05, + "step": 8768, + "training_step_time": 0.1223287582397461 + }, + { + "epoch": 1.338043212890625e-05, + "model_forward_time": 0.02485513687133789, + "step": 8769 + }, + { + "epoch": 1.338043212890625e-05, + "step": 8769, + "training_step_time": 0.12665843963623047 + }, + { + "epoch": 1.33819580078125e-05, + "grad_norm": 0.5869755744934082, + "learning_rate": 8.478574350716941e-05, + "loss": 0.0364, + "step": 8770 + }, + { + "epoch": 1.33819580078125e-05, + "model_forward_time": 0.024556875228881836, + "step": 8770 + }, + { + "epoch": 1.33819580078125e-05, + "step": 8770, + "training_step_time": 0.11711621284484863 + }, + { + "epoch": 1.338348388671875e-05, + "model_forward_time": 0.024651288986206055, + "step": 8771 + }, + { + "epoch": 1.338348388671875e-05, + "step": 8771, + "training_step_time": 0.12674331665039062 + }, + { + "epoch": 1.3385009765625e-05, + "model_forward_time": 0.025304317474365234, + "step": 8772 + }, + { + "epoch": 1.3385009765625e-05, + "step": 8772, + "training_step_time": 0.13030409812927246 + }, + { + "epoch": 1.338653564453125e-05, + "model_forward_time": 0.024782896041870117, + "step": 8773 + }, + { + "epoch": 1.338653564453125e-05, + "step": 8773, + "training_step_time": 0.20790362358093262 + }, + { + "epoch": 1.33880615234375e-05, + "model_forward_time": 0.024371623992919922, + "step": 8774 + }, + { + "epoch": 1.33880615234375e-05, + "step": 8774, + "training_step_time": 0.10860562324523926 + }, + { + "epoch": 1.338958740234375e-05, + "model_forward_time": 0.024336814880371094, + "step": 8775 + }, + { + "epoch": 1.338958740234375e-05, + "step": 8775, + "training_step_time": 0.1091773509979248 + }, + { + "epoch": 1.339111328125e-05, + "model_forward_time": 0.02565598487854004, + "step": 8776 + }, + { + "epoch": 1.339111328125e-05, + "step": 8776, + "training_step_time": 0.12937617301940918 + }, + { + "epoch": 1.339263916015625e-05, + "model_forward_time": 0.025231599807739258, + "step": 8777 + }, + { + "epoch": 1.339263916015625e-05, + "step": 8777, + "training_step_time": 0.11984109878540039 + }, + { + "epoch": 1.33941650390625e-05, + "model_forward_time": 0.025063037872314453, + "step": 8778 + }, + { + "epoch": 1.33941650390625e-05, + "step": 8778, + "training_step_time": 0.12134313583374023 + }, + { + "epoch": 1.339569091796875e-05, + "model_forward_time": 0.025151729583740234, + "step": 8779 + }, + { + "epoch": 1.339569091796875e-05, + "step": 8779, + "training_step_time": 0.12621593475341797 + }, + { + "epoch": 1.3397216796875e-05, + "grad_norm": 0.333095908164978, + "learning_rate": 8.474613180742628e-05, + "loss": 0.0382, + "step": 8780 + }, + { + "epoch": 1.3397216796875e-05, + "model_forward_time": 0.024985313415527344, + "step": 8780 + }, + { + "epoch": 1.3397216796875e-05, + "step": 8780, + "training_step_time": 0.1232759952545166 + }, + { + "epoch": 1.339874267578125e-05, + "model_forward_time": 0.025370359420776367, + "step": 8781 + }, + { + "epoch": 1.339874267578125e-05, + "step": 8781, + "training_step_time": 0.1264786720275879 + }, + { + "epoch": 1.34002685546875e-05, + "model_forward_time": 0.024039745330810547, + "step": 8782 + }, + { + "epoch": 1.34002685546875e-05, + "step": 8782, + "training_step_time": 0.12488174438476562 + }, + { + "epoch": 1.340179443359375e-05, + "model_forward_time": 0.02418661117553711, + "step": 8783 + }, + { + "epoch": 1.340179443359375e-05, + "step": 8783, + "training_step_time": 0.12580490112304688 + }, + { + "epoch": 1.34033203125e-05, + "model_forward_time": 0.026244401931762695, + "step": 8784 + }, + { + "epoch": 1.34033203125e-05, + "step": 8784, + "training_step_time": 0.11731886863708496 + }, + { + "epoch": 1.340484619140625e-05, + "model_forward_time": 0.02543044090270996, + "step": 8785 + }, + { + "epoch": 1.340484619140625e-05, + "step": 8785, + "training_step_time": 0.11612439155578613 + }, + { + "epoch": 1.34063720703125e-05, + "model_forward_time": 0.025148391723632812, + "step": 8786 + }, + { + "epoch": 1.34063720703125e-05, + "step": 8786, + "training_step_time": 0.1104285717010498 + }, + { + "epoch": 1.340789794921875e-05, + "model_forward_time": 0.025269746780395508, + "step": 8787 + }, + { + "epoch": 1.340789794921875e-05, + "step": 8787, + "training_step_time": 0.10896492004394531 + }, + { + "epoch": 1.3409423828125e-05, + "model_forward_time": 0.02523326873779297, + "step": 8788 + }, + { + "epoch": 1.3409423828125e-05, + "step": 8788, + "training_step_time": 0.10929012298583984 + }, + { + "epoch": 1.341094970703125e-05, + "model_forward_time": 0.025089502334594727, + "step": 8789 + }, + { + "epoch": 1.341094970703125e-05, + "step": 8789, + "training_step_time": 0.11003828048706055 + }, + { + "epoch": 1.34124755859375e-05, + "grad_norm": 0.3114985227584839, + "learning_rate": 8.470647788785665e-05, + "loss": 0.0287, + "step": 8790 + }, + { + "epoch": 1.34124755859375e-05, + "model_forward_time": 0.025006532669067383, + "step": 8790 + }, + { + "epoch": 1.34124755859375e-05, + "step": 8790, + "training_step_time": 0.10836076736450195 + }, + { + "epoch": 1.341400146484375e-05, + "model_forward_time": 0.025517940521240234, + "step": 8791 + }, + { + "epoch": 1.341400146484375e-05, + "step": 8791, + "training_step_time": 0.11234045028686523 + }, + { + "epoch": 1.341552734375e-05, + "model_forward_time": 0.025081157684326172, + "step": 8792 + }, + { + "epoch": 1.341552734375e-05, + "step": 8792, + "training_step_time": 0.10860323905944824 + }, + { + "epoch": 1.341705322265625e-05, + "model_forward_time": 0.025372028350830078, + "step": 8793 + }, + { + "epoch": 1.341705322265625e-05, + "step": 8793, + "training_step_time": 0.10774827003479004 + }, + { + "epoch": 1.34185791015625e-05, + "model_forward_time": 0.025311946868896484, + "step": 8794 + }, + { + "epoch": 1.34185791015625e-05, + "step": 8794, + "training_step_time": 0.10869193077087402 + }, + { + "epoch": 1.342010498046875e-05, + "model_forward_time": 0.025477170944213867, + "step": 8795 + }, + { + "epoch": 1.342010498046875e-05, + "step": 8795, + "training_step_time": 0.10699176788330078 + }, + { + "epoch": 1.3421630859375e-05, + "model_forward_time": 0.025287866592407227, + "step": 8796 + }, + { + "epoch": 1.3421630859375e-05, + "step": 8796, + "training_step_time": 0.11208748817443848 + }, + { + "epoch": 1.342315673828125e-05, + "model_forward_time": 0.02572774887084961, + "step": 8797 + }, + { + "epoch": 1.342315673828125e-05, + "step": 8797, + "training_step_time": 0.10788488388061523 + }, + { + "epoch": 1.34246826171875e-05, + "model_forward_time": 0.025348663330078125, + "step": 8798 + }, + { + "epoch": 1.34246826171875e-05, + "step": 8798, + "training_step_time": 0.10809516906738281 + }, + { + "epoch": 1.342620849609375e-05, + "model_forward_time": 0.025090932846069336, + "step": 8799 + }, + { + "epoch": 1.342620849609375e-05, + "step": 8799, + "training_step_time": 0.1138010025024414 + }, + { + "epoch": 1.3427734375e-05, + "grad_norm": 0.3685428202152252, + "learning_rate": 8.466678179664379e-05, + "loss": 0.0232, + "step": 8800 + }, + { + "epoch": 1.3427734375e-05, + "model_forward_time": 0.025508403778076172, + "step": 8800 + }, + { + "epoch": 1.3427734375e-05, + "step": 8800, + "training_step_time": 0.10789299011230469 + }, + { + "epoch": 1.342926025390625e-05, + "model_forward_time": 0.025765419006347656, + "step": 8801 + }, + { + "epoch": 1.342926025390625e-05, + "step": 8801, + "training_step_time": 0.11353731155395508 + }, + { + "epoch": 1.34307861328125e-05, + "model_forward_time": 0.025545120239257812, + "step": 8802 + }, + { + "epoch": 1.34307861328125e-05, + "step": 8802, + "training_step_time": 0.17339706420898438 + }, + { + "epoch": 1.343231201171875e-05, + "model_forward_time": 0.024440288543701172, + "step": 8803 + }, + { + "epoch": 1.343231201171875e-05, + "step": 8803, + "training_step_time": 0.1670207977294922 + }, + { + "epoch": 1.3433837890625e-05, + "model_forward_time": 0.024464845657348633, + "step": 8804 + }, + { + "epoch": 1.3433837890625e-05, + "step": 8804, + "training_step_time": 0.10442662239074707 + }, + { + "epoch": 1.343536376953125e-05, + "model_forward_time": 0.025066852569580078, + "step": 8805 + }, + { + "epoch": 1.343536376953125e-05, + "step": 8805, + "training_step_time": 0.10520458221435547 + }, + { + "epoch": 1.34368896484375e-05, + "model_forward_time": 0.025597333908081055, + "step": 8806 + }, + { + "epoch": 1.34368896484375e-05, + "step": 8806, + "training_step_time": 0.11063241958618164 + }, + { + "epoch": 1.343841552734375e-05, + "model_forward_time": 0.025333881378173828, + "step": 8807 + }, + { + "epoch": 1.343841552734375e-05, + "step": 8807, + "training_step_time": 0.10731196403503418 + }, + { + "epoch": 1.343994140625e-05, + "model_forward_time": 0.025368213653564453, + "step": 8808 + }, + { + "epoch": 1.343994140625e-05, + "step": 8808, + "training_step_time": 0.10878777503967285 + }, + { + "epoch": 1.344146728515625e-05, + "model_forward_time": 0.02531743049621582, + "step": 8809 + }, + { + "epoch": 1.344146728515625e-05, + "step": 8809, + "training_step_time": 0.1060178279876709 + }, + { + "epoch": 1.34429931640625e-05, + "grad_norm": 0.2750084102153778, + "learning_rate": 8.462704358202216e-05, + "loss": 0.031, + "step": 8810 + }, + { + "epoch": 1.34429931640625e-05, + "model_forward_time": 0.025397539138793945, + "step": 8810 + }, + { + "epoch": 1.34429931640625e-05, + "step": 8810, + "training_step_time": 0.11011457443237305 + }, + { + "epoch": 1.344451904296875e-05, + "model_forward_time": 0.02520895004272461, + "step": 8811 + }, + { + "epoch": 1.344451904296875e-05, + "step": 8811, + "training_step_time": 0.1070108413696289 + }, + { + "epoch": 1.3446044921875e-05, + "model_forward_time": 0.025388002395629883, + "step": 8812 + }, + { + "epoch": 1.3446044921875e-05, + "step": 8812, + "training_step_time": 0.14338254928588867 + }, + { + "epoch": 1.344757080078125e-05, + "model_forward_time": 0.02521681785583496, + "step": 8813 + }, + { + "epoch": 1.344757080078125e-05, + "step": 8813, + "training_step_time": 0.11064887046813965 + }, + { + "epoch": 1.34490966796875e-05, + "model_forward_time": 0.02514195442199707, + "step": 8814 + }, + { + "epoch": 1.34490966796875e-05, + "step": 8814, + "training_step_time": 0.11353230476379395 + }, + { + "epoch": 1.345062255859375e-05, + "model_forward_time": 0.0254514217376709, + "step": 8815 + }, + { + "epoch": 1.345062255859375e-05, + "step": 8815, + "training_step_time": 0.11924171447753906 + }, + { + "epoch": 1.34521484375e-05, + "model_forward_time": 0.025285005569458008, + "step": 8816 + }, + { + "epoch": 1.34521484375e-05, + "step": 8816, + "training_step_time": 0.13036465644836426 + }, + { + "epoch": 1.345367431640625e-05, + "model_forward_time": 0.02561783790588379, + "step": 8817 + }, + { + "epoch": 1.345367431640625e-05, + "step": 8817, + "training_step_time": 0.12407898902893066 + }, + { + "epoch": 1.34552001953125e-05, + "model_forward_time": 0.02499532699584961, + "step": 8818 + }, + { + "epoch": 1.34552001953125e-05, + "step": 8818, + "training_step_time": 0.15128612518310547 + }, + { + "epoch": 1.345672607421875e-05, + "model_forward_time": 0.02417159080505371, + "step": 8819 + }, + { + "epoch": 1.345672607421875e-05, + "step": 8819, + "training_step_time": 0.11753034591674805 + }, + { + "epoch": 1.3458251953125e-05, + "grad_norm": 0.5003789067268372, + "learning_rate": 8.458726329227747e-05, + "loss": 0.0308, + "step": 8820 + }, + { + "epoch": 1.3458251953125e-05, + "model_forward_time": 0.02455902099609375, + "step": 8820 + }, + { + "epoch": 1.3458251953125e-05, + "step": 8820, + "training_step_time": 0.1599719524383545 + }, + { + "epoch": 1.345977783203125e-05, + "model_forward_time": 0.024831295013427734, + "step": 8821 + }, + { + "epoch": 1.345977783203125e-05, + "step": 8821, + "training_step_time": 0.18172764778137207 + }, + { + "epoch": 1.34613037109375e-05, + "model_forward_time": 0.024753332138061523, + "step": 8822 + }, + { + "epoch": 1.34613037109375e-05, + "step": 8822, + "training_step_time": 0.20117568969726562 + }, + { + "epoch": 1.346282958984375e-05, + "model_forward_time": 0.024095773696899414, + "step": 8823 + }, + { + "epoch": 1.346282958984375e-05, + "step": 8823, + "training_step_time": 0.12165379524230957 + }, + { + "epoch": 1.346435546875e-05, + "model_forward_time": 0.024319887161254883, + "step": 8824 + }, + { + "epoch": 1.346435546875e-05, + "step": 8824, + "training_step_time": 0.10489177703857422 + }, + { + "epoch": 1.346588134765625e-05, + "model_forward_time": 0.025298118591308594, + "step": 8825 + }, + { + "epoch": 1.346588134765625e-05, + "step": 8825, + "training_step_time": 0.1065514087677002 + }, + { + "epoch": 1.34674072265625e-05, + "model_forward_time": 0.025423765182495117, + "step": 8826 + }, + { + "epoch": 1.34674072265625e-05, + "step": 8826, + "training_step_time": 0.10610079765319824 + }, + { + "epoch": 1.346893310546875e-05, + "model_forward_time": 0.024874448776245117, + "step": 8827 + }, + { + "epoch": 1.346893310546875e-05, + "step": 8827, + "training_step_time": 0.1057438850402832 + }, + { + "epoch": 1.3470458984375e-05, + "model_forward_time": 0.025505542755126953, + "step": 8828 + }, + { + "epoch": 1.3470458984375e-05, + "step": 8828, + "training_step_time": 0.11060285568237305 + }, + { + "epoch": 1.347198486328125e-05, + "model_forward_time": 0.024786949157714844, + "step": 8829 + }, + { + "epoch": 1.347198486328125e-05, + "step": 8829, + "training_step_time": 0.11215806007385254 + }, + { + "epoch": 1.34735107421875e-05, + "grad_norm": 0.40606626868247986, + "learning_rate": 8.454744097574652e-05, + "loss": 0.0321, + "step": 8830 + }, + { + "epoch": 1.34735107421875e-05, + "model_forward_time": 0.025208473205566406, + "step": 8830 + }, + { + "epoch": 1.34735107421875e-05, + "step": 8830, + "training_step_time": 0.10625576972961426 + }, + { + "epoch": 1.347503662109375e-05, + "model_forward_time": 0.02485203742980957, + "step": 8831 + }, + { + "epoch": 1.347503662109375e-05, + "step": 8831, + "training_step_time": 0.1052711009979248 + }, + { + "epoch": 1.34765625e-05, + "model_forward_time": 0.02641606330871582, + "step": 8832 + }, + { + "epoch": 1.34765625e-05, + "step": 8832, + "training_step_time": 0.10939502716064453 + }, + { + "epoch": 1.347808837890625e-05, + "model_forward_time": 0.024899005889892578, + "step": 8833 + }, + { + "epoch": 1.347808837890625e-05, + "step": 8833, + "training_step_time": 0.11361575126647949 + }, + { + "epoch": 1.34796142578125e-05, + "model_forward_time": 0.02507781982421875, + "step": 8834 + }, + { + "epoch": 1.34796142578125e-05, + "step": 8834, + "training_step_time": 0.11864209175109863 + }, + { + "epoch": 1.348114013671875e-05, + "model_forward_time": 0.025072336196899414, + "step": 8835 + }, + { + "epoch": 1.348114013671875e-05, + "step": 8835, + "training_step_time": 0.12011456489562988 + }, + { + "epoch": 1.3482666015625e-05, + "model_forward_time": 0.025199174880981445, + "step": 8836 + }, + { + "epoch": 1.3482666015625e-05, + "step": 8836, + "training_step_time": 0.12028288841247559 + }, + { + "epoch": 1.348419189453125e-05, + "model_forward_time": 0.025249481201171875, + "step": 8837 + }, + { + "epoch": 1.348419189453125e-05, + "step": 8837, + "training_step_time": 0.11735177040100098 + }, + { + "epoch": 1.34857177734375e-05, + "model_forward_time": 0.025351762771606445, + "step": 8838 + }, + { + "epoch": 1.34857177734375e-05, + "step": 8838, + "training_step_time": 0.11881470680236816 + }, + { + "epoch": 1.348724365234375e-05, + "model_forward_time": 0.025122880935668945, + "step": 8839 + }, + { + "epoch": 1.348724365234375e-05, + "step": 8839, + "training_step_time": 0.1134951114654541 + }, + { + "epoch": 1.348876953125e-05, + "grad_norm": 0.5772373080253601, + "learning_rate": 8.450757668081716e-05, + "loss": 0.0312, + "step": 8840 + }, + { + "epoch": 1.348876953125e-05, + "model_forward_time": 0.024771928787231445, + "step": 8840 + }, + { + "epoch": 1.348876953125e-05, + "step": 8840, + "training_step_time": 0.11152887344360352 + }, + { + "epoch": 1.349029541015625e-05, + "model_forward_time": 0.024884939193725586, + "step": 8841 + }, + { + "epoch": 1.349029541015625e-05, + "step": 8841, + "training_step_time": 0.11375761032104492 + }, + { + "epoch": 1.34918212890625e-05, + "model_forward_time": 0.024939775466918945, + "step": 8842 + }, + { + "epoch": 1.34918212890625e-05, + "step": 8842, + "training_step_time": 0.11223506927490234 + }, + { + "epoch": 1.349334716796875e-05, + "model_forward_time": 0.025085926055908203, + "step": 8843 + }, + { + "epoch": 1.349334716796875e-05, + "step": 8843, + "training_step_time": 0.10805225372314453 + }, + { + "epoch": 1.3494873046875e-05, + "model_forward_time": 0.025293827056884766, + "step": 8844 + }, + { + "epoch": 1.3494873046875e-05, + "step": 8844, + "training_step_time": 0.11306881904602051 + }, + { + "epoch": 1.349639892578125e-05, + "model_forward_time": 0.025015592575073242, + "step": 8845 + }, + { + "epoch": 1.349639892578125e-05, + "step": 8845, + "training_step_time": 0.16548633575439453 + }, + { + "epoch": 1.34979248046875e-05, + "model_forward_time": 0.024149179458618164, + "step": 8846 + }, + { + "epoch": 1.34979248046875e-05, + "step": 8846, + "training_step_time": 0.16495609283447266 + }, + { + "epoch": 1.349945068359375e-05, + "model_forward_time": 0.02623605728149414, + "step": 8847 + }, + { + "epoch": 1.349945068359375e-05, + "step": 8847, + "training_step_time": 0.11311841011047363 + }, + { + "epoch": 1.35009765625e-05, + "model_forward_time": 0.024779796600341797, + "step": 8848 + }, + { + "epoch": 1.35009765625e-05, + "step": 8848, + "training_step_time": 0.17114758491516113 + }, + { + "epoch": 1.350250244140625e-05, + "model_forward_time": 0.024367570877075195, + "step": 8849 + }, + { + "epoch": 1.350250244140625e-05, + "step": 8849, + "training_step_time": 0.17211055755615234 + }, + { + "epoch": 1.35040283203125e-05, + "grad_norm": 0.4923705458641052, + "learning_rate": 8.44676704559283e-05, + "loss": 0.0257, + "step": 8850 + }, + { + "epoch": 1.35040283203125e-05, + "model_forward_time": 0.02602100372314453, + "step": 8850 + }, + { + "epoch": 1.35040283203125e-05, + "step": 8850, + "training_step_time": 0.11019659042358398 + }, + { + "epoch": 1.350555419921875e-05, + "model_forward_time": 0.02504277229309082, + "step": 8851 + }, + { + "epoch": 1.350555419921875e-05, + "step": 8851, + "training_step_time": 0.10940194129943848 + }, + { + "epoch": 1.3507080078125e-05, + "model_forward_time": 0.025532007217407227, + "step": 8852 + }, + { + "epoch": 1.3507080078125e-05, + "step": 8852, + "training_step_time": 0.1074824333190918 + }, + { + "epoch": 1.350860595703125e-05, + "model_forward_time": 0.02534031867980957, + "step": 8853 + }, + { + "epoch": 1.350860595703125e-05, + "step": 8853, + "training_step_time": 0.10865926742553711 + }, + { + "epoch": 1.35101318359375e-05, + "model_forward_time": 0.02662515640258789, + "step": 8854 + }, + { + "epoch": 1.35101318359375e-05, + "step": 8854, + "training_step_time": 0.10818910598754883 + }, + { + "epoch": 1.351165771484375e-05, + "model_forward_time": 0.025313854217529297, + "step": 8855 + }, + { + "epoch": 1.351165771484375e-05, + "step": 8855, + "training_step_time": 0.1082310676574707 + }, + { + "epoch": 1.351318359375e-05, + "model_forward_time": 0.025217533111572266, + "step": 8856 + }, + { + "epoch": 1.351318359375e-05, + "step": 8856, + "training_step_time": 0.10806703567504883 + }, + { + "epoch": 1.351470947265625e-05, + "model_forward_time": 0.024977445602416992, + "step": 8857 + }, + { + "epoch": 1.351470947265625e-05, + "step": 8857, + "training_step_time": 0.11097955703735352 + }, + { + "epoch": 1.35162353515625e-05, + "model_forward_time": 0.02490520477294922, + "step": 8858 + }, + { + "epoch": 1.35162353515625e-05, + "step": 8858, + "training_step_time": 0.11985993385314941 + }, + { + "epoch": 1.351776123046875e-05, + "model_forward_time": 0.026998519897460938, + "step": 8859 + }, + { + "epoch": 1.351776123046875e-05, + "step": 8859, + "training_step_time": 0.11333155632019043 + }, + { + "epoch": 1.3519287109375e-05, + "grad_norm": 0.3777911365032196, + "learning_rate": 8.442772234956972e-05, + "loss": 0.0313, + "step": 8860 + }, + { + "epoch": 1.3519287109375e-05, + "model_forward_time": 0.025001049041748047, + "step": 8860 + }, + { + "epoch": 1.3519287109375e-05, + "step": 8860, + "training_step_time": 0.21588969230651855 + }, + { + "epoch": 1.352081298828125e-05, + "model_forward_time": 0.024205923080444336, + "step": 8861 + }, + { + "epoch": 1.352081298828125e-05, + "step": 8861, + "training_step_time": 0.1356363296508789 + }, + { + "epoch": 1.35223388671875e-05, + "model_forward_time": 0.024274349212646484, + "step": 8862 + }, + { + "epoch": 1.35223388671875e-05, + "step": 8862, + "training_step_time": 0.11793041229248047 + }, + { + "epoch": 1.352386474609375e-05, + "model_forward_time": 0.024878978729248047, + "step": 8863 + }, + { + "epoch": 1.352386474609375e-05, + "step": 8863, + "training_step_time": 0.11830258369445801 + }, + { + "epoch": 1.3525390625e-05, + "model_forward_time": 0.02496194839477539, + "step": 8864 + }, + { + "epoch": 1.3525390625e-05, + "step": 8864, + "training_step_time": 0.19593119621276855 + }, + { + "epoch": 1.352691650390625e-05, + "model_forward_time": 0.02435016632080078, + "step": 8865 + }, + { + "epoch": 1.352691650390625e-05, + "step": 8865, + "training_step_time": 0.1910707950592041 + }, + { + "epoch": 1.35284423828125e-05, + "model_forward_time": 0.024585485458374023, + "step": 8866 + }, + { + "epoch": 1.35284423828125e-05, + "step": 8866, + "training_step_time": 0.18648099899291992 + }, + { + "epoch": 1.352996826171875e-05, + "model_forward_time": 0.024266958236694336, + "step": 8867 + }, + { + "epoch": 1.352996826171875e-05, + "step": 8867, + "training_step_time": 0.10962033271789551 + }, + { + "epoch": 1.3531494140625e-05, + "model_forward_time": 0.024217844009399414, + "step": 8868 + }, + { + "epoch": 1.3531494140625e-05, + "step": 8868, + "training_step_time": 0.1070561408996582 + }, + { + "epoch": 1.353302001953125e-05, + "model_forward_time": 0.02538013458251953, + "step": 8869 + }, + { + "epoch": 1.353302001953125e-05, + "step": 8869, + "training_step_time": 0.10833454132080078 + }, + { + "epoch": 1.35345458984375e-05, + "grad_norm": 0.2980242073535919, + "learning_rate": 8.438773241028219e-05, + "loss": 0.031, + "step": 8870 + }, + { + "epoch": 1.35345458984375e-05, + "model_forward_time": 0.025416851043701172, + "step": 8870 + }, + { + "epoch": 1.35345458984375e-05, + "step": 8870, + "training_step_time": 0.10670590400695801 + }, + { + "epoch": 1.353607177734375e-05, + "model_forward_time": 0.02506732940673828, + "step": 8871 + }, + { + "epoch": 1.353607177734375e-05, + "step": 8871, + "training_step_time": 0.10593295097351074 + }, + { + "epoch": 1.353759765625e-05, + "model_forward_time": 0.024930715560913086, + "step": 8872 + }, + { + "epoch": 1.353759765625e-05, + "step": 8872, + "training_step_time": 0.11065030097961426 + }, + { + "epoch": 1.353912353515625e-05, + "model_forward_time": 0.025054216384887695, + "step": 8873 + }, + { + "epoch": 1.353912353515625e-05, + "step": 8873, + "training_step_time": 0.1139369010925293 + }, + { + "epoch": 1.35406494140625e-05, + "model_forward_time": 0.0253293514251709, + "step": 8874 + }, + { + "epoch": 1.35406494140625e-05, + "step": 8874, + "training_step_time": 0.1081547737121582 + }, + { + "epoch": 1.354217529296875e-05, + "model_forward_time": 0.024956703186035156, + "step": 8875 + }, + { + "epoch": 1.354217529296875e-05, + "step": 8875, + "training_step_time": 0.10578417778015137 + }, + { + "epoch": 1.3543701171875e-05, + "model_forward_time": 0.025588274002075195, + "step": 8876 + }, + { + "epoch": 1.3543701171875e-05, + "step": 8876, + "training_step_time": 0.10856103897094727 + }, + { + "epoch": 1.354522705078125e-05, + "model_forward_time": 0.02507615089416504, + "step": 8877 + }, + { + "epoch": 1.354522705078125e-05, + "step": 8877, + "training_step_time": 0.11995530128479004 + }, + { + "epoch": 1.35467529296875e-05, + "model_forward_time": 0.025243520736694336, + "step": 8878 + }, + { + "epoch": 1.35467529296875e-05, + "step": 8878, + "training_step_time": 0.11559224128723145 + }, + { + "epoch": 1.354827880859375e-05, + "model_forward_time": 0.024836063385009766, + "step": 8879 + }, + { + "epoch": 1.354827880859375e-05, + "step": 8879, + "training_step_time": 0.1065518856048584 + }, + { + "epoch": 1.35498046875e-05, + "grad_norm": 0.20807787775993347, + "learning_rate": 8.434770068665723e-05, + "loss": 0.037, + "step": 8880 + }, + { + "epoch": 1.35498046875e-05, + "model_forward_time": 0.02470111846923828, + "step": 8880 + }, + { + "epoch": 1.35498046875e-05, + "step": 8880, + "training_step_time": 0.1107339859008789 + }, + { + "epoch": 1.355133056640625e-05, + "model_forward_time": 0.025808095932006836, + "step": 8881 + }, + { + "epoch": 1.355133056640625e-05, + "step": 8881, + "training_step_time": 0.10991358757019043 + }, + { + "epoch": 1.35528564453125e-05, + "model_forward_time": 0.024822711944580078, + "step": 8882 + }, + { + "epoch": 1.35528564453125e-05, + "step": 8882, + "training_step_time": 0.10769248008728027 + }, + { + "epoch": 1.355438232421875e-05, + "model_forward_time": 0.02543020248413086, + "step": 8883 + }, + { + "epoch": 1.355438232421875e-05, + "step": 8883, + "training_step_time": 0.10801458358764648 + }, + { + "epoch": 1.3555908203125e-05, + "model_forward_time": 0.0252230167388916, + "step": 8884 + }, + { + "epoch": 1.3555908203125e-05, + "step": 8884, + "training_step_time": 0.10470938682556152 + }, + { + "epoch": 1.355743408203125e-05, + "model_forward_time": 0.024959564208984375, + "step": 8885 + }, + { + "epoch": 1.355743408203125e-05, + "step": 8885, + "training_step_time": 0.10763239860534668 + }, + { + "epoch": 1.35589599609375e-05, + "model_forward_time": 0.02584075927734375, + "step": 8886 + }, + { + "epoch": 1.35589599609375e-05, + "step": 8886, + "training_step_time": 0.11389970779418945 + }, + { + "epoch": 1.356048583984375e-05, + "model_forward_time": 0.025709152221679688, + "step": 8887 + }, + { + "epoch": 1.356048583984375e-05, + "step": 8887, + "training_step_time": 0.14470815658569336 + }, + { + "epoch": 1.356201171875e-05, + "model_forward_time": 0.024810314178466797, + "step": 8888 + }, + { + "epoch": 1.356201171875e-05, + "step": 8888, + "training_step_time": 0.14928317070007324 + }, + { + "epoch": 1.356353759765625e-05, + "model_forward_time": 0.025288105010986328, + "step": 8889 + }, + { + "epoch": 1.356353759765625e-05, + "step": 8889, + "training_step_time": 0.11434555053710938 + }, + { + "epoch": 1.35650634765625e-05, + "grad_norm": 0.411801815032959, + "learning_rate": 8.430762722733714e-05, + "loss": 0.0329, + "step": 8890 + }, + { + "epoch": 1.35650634765625e-05, + "model_forward_time": 0.02453756332397461, + "step": 8890 + }, + { + "epoch": 1.35650634765625e-05, + "step": 8890, + "training_step_time": 0.21364188194274902 + }, + { + "epoch": 1.356658935546875e-05, + "model_forward_time": 0.02632451057434082, + "step": 8891 + }, + { + "epoch": 1.356658935546875e-05, + "step": 8891, + "training_step_time": 0.13043761253356934 + }, + { + "epoch": 1.3568115234375e-05, + "model_forward_time": 0.024518966674804688, + "step": 8892 + }, + { + "epoch": 1.3568115234375e-05, + "step": 8892, + "training_step_time": 0.18149518966674805 + }, + { + "epoch": 1.356964111328125e-05, + "model_forward_time": 0.024816036224365234, + "step": 8893 + }, + { + "epoch": 1.356964111328125e-05, + "step": 8893, + "training_step_time": 0.1370687484741211 + }, + { + "epoch": 1.35711669921875e-05, + "model_forward_time": 0.0244596004486084, + "step": 8894 + }, + { + "epoch": 1.35711669921875e-05, + "step": 8894, + "training_step_time": 0.11090683937072754 + }, + { + "epoch": 1.357269287109375e-05, + "model_forward_time": 0.02552318572998047, + "step": 8895 + }, + { + "epoch": 1.357269287109375e-05, + "step": 8895, + "training_step_time": 0.1131746768951416 + }, + { + "epoch": 1.357421875e-05, + "model_forward_time": 0.025241374969482422, + "step": 8896 + }, + { + "epoch": 1.357421875e-05, + "step": 8896, + "training_step_time": 0.10974407196044922 + }, + { + "epoch": 1.357574462890625e-05, + "model_forward_time": 0.02513599395751953, + "step": 8897 + }, + { + "epoch": 1.357574462890625e-05, + "step": 8897, + "training_step_time": 0.10878872871398926 + }, + { + "epoch": 1.35772705078125e-05, + "model_forward_time": 0.025089740753173828, + "step": 8898 + }, + { + "epoch": 1.35772705078125e-05, + "step": 8898, + "training_step_time": 0.10956001281738281 + }, + { + "epoch": 1.357879638671875e-05, + "model_forward_time": 0.024848222732543945, + "step": 8899 + }, + { + "epoch": 1.357879638671875e-05, + "step": 8899, + "training_step_time": 0.10918354988098145 + }, + { + "epoch": 1.3580322265625e-05, + "grad_norm": 0.37221142649650574, + "learning_rate": 8.4267512081015e-05, + "loss": 0.031, + "step": 8900 + }, + { + "epoch": 1.3580322265625e-05, + "model_forward_time": 0.02458977699279785, + "step": 8900 + }, + { + "epoch": 1.3580322265625e-05, + "step": 8900, + "training_step_time": 0.11006402969360352 + }, + { + "epoch": 1.358184814453125e-05, + "model_forward_time": 0.025192975997924805, + "step": 8901 + }, + { + "epoch": 1.358184814453125e-05, + "step": 8901, + "training_step_time": 0.1093902587890625 + }, + { + "epoch": 1.35833740234375e-05, + "model_forward_time": 0.02514815330505371, + "step": 8902 + }, + { + "epoch": 1.35833740234375e-05, + "step": 8902, + "training_step_time": 0.10878324508666992 + }, + { + "epoch": 1.358489990234375e-05, + "model_forward_time": 0.029021024703979492, + "step": 8903 + }, + { + "epoch": 1.358489990234375e-05, + "step": 8903, + "training_step_time": 0.14989256858825684 + }, + { + "epoch": 1.358642578125e-05, + "model_forward_time": 0.025372743606567383, + "step": 8904 + }, + { + "epoch": 1.358642578125e-05, + "step": 8904, + "training_step_time": 0.11182427406311035 + }, + { + "epoch": 1.358795166015625e-05, + "model_forward_time": 0.02456974983215332, + "step": 8905 + }, + { + "epoch": 1.358795166015625e-05, + "step": 8905, + "training_step_time": 0.2236781120300293 + }, + { + "epoch": 1.35894775390625e-05, + "model_forward_time": 0.024309873580932617, + "step": 8906 + }, + { + "epoch": 1.35894775390625e-05, + "step": 8906, + "training_step_time": 0.12743830680847168 + }, + { + "epoch": 1.359100341796875e-05, + "model_forward_time": 0.02399921417236328, + "step": 8907 + }, + { + "epoch": 1.359100341796875e-05, + "step": 8907, + "training_step_time": 0.10874700546264648 + }, + { + "epoch": 1.3592529296875e-05, + "model_forward_time": 0.025083303451538086, + "step": 8908 + }, + { + "epoch": 1.3592529296875e-05, + "step": 8908, + "training_step_time": 0.12138795852661133 + }, + { + "epoch": 1.359405517578125e-05, + "model_forward_time": 0.024996519088745117, + "step": 8909 + }, + { + "epoch": 1.359405517578125e-05, + "step": 8909, + "training_step_time": 0.1862947940826416 + }, + { + "epoch": 1.35955810546875e-05, + "grad_norm": 0.3871256411075592, + "learning_rate": 8.422735529643444e-05, + "loss": 0.0292, + "step": 8910 + }, + { + "epoch": 1.35955810546875e-05, + "model_forward_time": 0.024939298629760742, + "step": 8910 + }, + { + "epoch": 1.35955810546875e-05, + "step": 8910, + "training_step_time": 0.18158173561096191 + }, + { + "epoch": 1.359710693359375e-05, + "model_forward_time": 0.02435135841369629, + "step": 8911 + }, + { + "epoch": 1.359710693359375e-05, + "step": 8911, + "training_step_time": 0.15028619766235352 + }, + { + "epoch": 1.35986328125e-05, + "model_forward_time": 0.024403095245361328, + "step": 8912 + }, + { + "epoch": 1.35986328125e-05, + "step": 8912, + "training_step_time": 0.11498665809631348 + }, + { + "epoch": 1.360015869140625e-05, + "model_forward_time": 0.024422168731689453, + "step": 8913 + }, + { + "epoch": 1.360015869140625e-05, + "step": 8913, + "training_step_time": 0.10341835021972656 + }, + { + "epoch": 1.36016845703125e-05, + "model_forward_time": 0.025068283081054688, + "step": 8914 + }, + { + "epoch": 1.36016845703125e-05, + "step": 8914, + "training_step_time": 0.10617995262145996 + }, + { + "epoch": 1.360321044921875e-05, + "model_forward_time": 0.025139570236206055, + "step": 8915 + }, + { + "epoch": 1.360321044921875e-05, + "step": 8915, + "training_step_time": 0.10662174224853516 + }, + { + "epoch": 1.3604736328125e-05, + "model_forward_time": 0.025115013122558594, + "step": 8916 + }, + { + "epoch": 1.3604736328125e-05, + "step": 8916, + "training_step_time": 0.10822749137878418 + }, + { + "epoch": 1.360626220703125e-05, + "model_forward_time": 0.025452375411987305, + "step": 8917 + }, + { + "epoch": 1.360626220703125e-05, + "step": 8917, + "training_step_time": 0.11053967475891113 + }, + { + "epoch": 1.36077880859375e-05, + "model_forward_time": 0.025655746459960938, + "step": 8918 + }, + { + "epoch": 1.36077880859375e-05, + "step": 8918, + "training_step_time": 0.16332650184631348 + }, + { + "epoch": 1.360931396484375e-05, + "model_forward_time": 0.023495912551879883, + "step": 8919 + }, + { + "epoch": 1.360931396484375e-05, + "step": 8919, + "training_step_time": 0.19013428688049316 + }, + { + "epoch": 1.361083984375e-05, + "grad_norm": 0.7622633576393127, + "learning_rate": 8.418715692238978e-05, + "loss": 0.0412, + "step": 8920 + }, + { + "epoch": 1.361083984375e-05, + "model_forward_time": 0.02314591407775879, + "step": 8920 + }, + { + "epoch": 1.361083984375e-05, + "step": 8920, + "training_step_time": 0.17504215240478516 + }, + { + "epoch": 1.361236572265625e-05, + "model_forward_time": 0.023323535919189453, + "step": 8921 + }, + { + "epoch": 1.361236572265625e-05, + "step": 8921, + "training_step_time": 0.17570209503173828 + }, + { + "epoch": 1.36138916015625e-05, + "model_forward_time": 0.02346968650817871, + "step": 8922 + }, + { + "epoch": 1.36138916015625e-05, + "step": 8922, + "training_step_time": 0.1582016944885254 + }, + { + "epoch": 1.361541748046875e-05, + "model_forward_time": 0.023576021194458008, + "step": 8923 + }, + { + "epoch": 1.361541748046875e-05, + "step": 8923, + "training_step_time": 0.1454155445098877 + }, + { + "epoch": 1.3616943359375e-05, + "model_forward_time": 0.02311229705810547, + "step": 8924 + }, + { + "epoch": 1.3616943359375e-05, + "step": 8924, + "training_step_time": 0.13324904441833496 + }, + { + "epoch": 1.361846923828125e-05, + "model_forward_time": 0.023470640182495117, + "step": 8925 + }, + { + "epoch": 1.361846923828125e-05, + "step": 8925, + "training_step_time": 0.12807035446166992 + }, + { + "epoch": 1.36199951171875e-05, + "model_forward_time": 0.02347731590270996, + "step": 8926 + }, + { + "epoch": 1.36199951171875e-05, + "step": 8926, + "training_step_time": 0.12607288360595703 + }, + { + "epoch": 1.362152099609375e-05, + "model_forward_time": 0.02404475212097168, + "step": 8927 + }, + { + "epoch": 1.362152099609375e-05, + "step": 8927, + "training_step_time": 0.11951565742492676 + }, + { + "epoch": 1.3623046875e-05, + "model_forward_time": 0.024280548095703125, + "step": 8928 + }, + { + "epoch": 1.3623046875e-05, + "step": 8928, + "training_step_time": 0.11611819267272949 + }, + { + "epoch": 1.362457275390625e-05, + "model_forward_time": 0.02507328987121582, + "step": 8929 + }, + { + "epoch": 1.362457275390625e-05, + "step": 8929, + "training_step_time": 0.11864995956420898 + }, + { + "epoch": 1.36260986328125e-05, + "grad_norm": 0.6880233883857727, + "learning_rate": 8.41469170077258e-05, + "loss": 0.0331, + "step": 8930 + }, + { + "epoch": 1.36260986328125e-05, + "model_forward_time": 0.02788519859313965, + "step": 8930 + }, + { + "epoch": 1.36260986328125e-05, + "step": 8930, + "training_step_time": 0.11542701721191406 + }, + { + "epoch": 1.362762451171875e-05, + "model_forward_time": 0.024332284927368164, + "step": 8931 + }, + { + "epoch": 1.362762451171875e-05, + "step": 8931, + "training_step_time": 0.10256576538085938 + }, + { + "epoch": 1.3629150390625e-05, + "model_forward_time": 0.024483203887939453, + "step": 8932 + }, + { + "epoch": 1.3629150390625e-05, + "step": 8932, + "training_step_time": 0.10550951957702637 + }, + { + "epoch": 1.363067626953125e-05, + "model_forward_time": 0.024417638778686523, + "step": 8933 + }, + { + "epoch": 1.363067626953125e-05, + "step": 8933, + "training_step_time": 0.12224030494689941 + }, + { + "epoch": 1.36322021484375e-05, + "model_forward_time": 0.024944543838500977, + "step": 8934 + }, + { + "epoch": 1.36322021484375e-05, + "step": 8934, + "training_step_time": 0.10565376281738281 + }, + { + "epoch": 1.363372802734375e-05, + "model_forward_time": 0.02512955665588379, + "step": 8935 + }, + { + "epoch": 1.363372802734375e-05, + "step": 8935, + "training_step_time": 0.11670923233032227 + }, + { + "epoch": 1.363525390625e-05, + "model_forward_time": 0.025147199630737305, + "step": 8936 + }, + { + "epoch": 1.363525390625e-05, + "step": 8936, + "training_step_time": 0.16999244689941406 + }, + { + "epoch": 1.363677978515625e-05, + "model_forward_time": 0.026273012161254883, + "step": 8937 + }, + { + "epoch": 1.363677978515625e-05, + "step": 8937, + "training_step_time": 0.16988635063171387 + }, + { + "epoch": 1.36383056640625e-05, + "model_forward_time": 0.024311304092407227, + "step": 8938 + }, + { + "epoch": 1.36383056640625e-05, + "step": 8938, + "training_step_time": 0.10423874855041504 + }, + { + "epoch": 1.363983154296875e-05, + "model_forward_time": 0.024471521377563477, + "step": 8939 + }, + { + "epoch": 1.363983154296875e-05, + "step": 8939, + "training_step_time": 0.10640621185302734 + }, + { + "epoch": 1.3641357421875e-05, + "grad_norm": 0.35701218247413635, + "learning_rate": 8.410663560133784e-05, + "loss": 0.0357, + "step": 8940 + }, + { + "epoch": 1.3641357421875e-05, + "model_forward_time": 0.024985551834106445, + "step": 8940 + }, + { + "epoch": 1.3641357421875e-05, + "step": 8940, + "training_step_time": 0.10749626159667969 + }, + { + "epoch": 1.364288330078125e-05, + "model_forward_time": 0.02533245086669922, + "step": 8941 + }, + { + "epoch": 1.364288330078125e-05, + "step": 8941, + "training_step_time": 0.10916972160339355 + }, + { + "epoch": 1.36444091796875e-05, + "model_forward_time": 0.02486419677734375, + "step": 8942 + }, + { + "epoch": 1.36444091796875e-05, + "step": 8942, + "training_step_time": 0.10698938369750977 + }, + { + "epoch": 1.364593505859375e-05, + "model_forward_time": 0.02512335777282715, + "step": 8943 + }, + { + "epoch": 1.364593505859375e-05, + "step": 8943, + "training_step_time": 0.10818767547607422 + }, + { + "epoch": 1.36474609375e-05, + "model_forward_time": 0.024863719940185547, + "step": 8944 + }, + { + "epoch": 1.36474609375e-05, + "step": 8944, + "training_step_time": 0.10978889465332031 + }, + { + "epoch": 1.364898681640625e-05, + "model_forward_time": 0.027097463607788086, + "step": 8945 + }, + { + "epoch": 1.364898681640625e-05, + "step": 8945, + "training_step_time": 0.11330389976501465 + }, + { + "epoch": 1.36505126953125e-05, + "model_forward_time": 0.02515244483947754, + "step": 8946 + }, + { + "epoch": 1.36505126953125e-05, + "step": 8946, + "training_step_time": 0.13869380950927734 + }, + { + "epoch": 1.365203857421875e-05, + "model_forward_time": 0.02506566047668457, + "step": 8947 + }, + { + "epoch": 1.365203857421875e-05, + "step": 8947, + "training_step_time": 0.11571550369262695 + }, + { + "epoch": 1.3653564453125e-05, + "model_forward_time": 0.025188922882080078, + "step": 8948 + }, + { + "epoch": 1.3653564453125e-05, + "step": 8948, + "training_step_time": 0.1754453182220459 + }, + { + "epoch": 1.365509033203125e-05, + "model_forward_time": 0.024492263793945312, + "step": 8949 + }, + { + "epoch": 1.365509033203125e-05, + "step": 8949, + "training_step_time": 0.18662500381469727 + }, + { + "epoch": 1.36566162109375e-05, + "grad_norm": 0.4007699191570282, + "learning_rate": 8.406631275217156e-05, + "loss": 0.0303, + "step": 8950 + }, + { + "epoch": 1.36566162109375e-05, + "model_forward_time": 0.02387237548828125, + "step": 8950 + }, + { + "epoch": 1.36566162109375e-05, + "step": 8950, + "training_step_time": 0.11038994789123535 + }, + { + "epoch": 1.365814208984375e-05, + "model_forward_time": 0.024508953094482422, + "step": 8951 + }, + { + "epoch": 1.365814208984375e-05, + "step": 8951, + "training_step_time": 0.12679624557495117 + }, + { + "epoch": 1.365966796875e-05, + "model_forward_time": 0.02622532844543457, + "step": 8952 + }, + { + "epoch": 1.365966796875e-05, + "step": 8952, + "training_step_time": 0.11607217788696289 + }, + { + "epoch": 1.366119384765625e-05, + "model_forward_time": 0.025133848190307617, + "step": 8953 + }, + { + "epoch": 1.366119384765625e-05, + "step": 8953, + "training_step_time": 0.22102761268615723 + }, + { + "epoch": 1.36627197265625e-05, + "model_forward_time": 0.02434563636779785, + "step": 8954 + }, + { + "epoch": 1.36627197265625e-05, + "step": 8954, + "training_step_time": 0.23151087760925293 + }, + { + "epoch": 1.366424560546875e-05, + "model_forward_time": 0.024414539337158203, + "step": 8955 + }, + { + "epoch": 1.366424560546875e-05, + "step": 8955, + "training_step_time": 0.19629907608032227 + }, + { + "epoch": 1.3665771484375e-05, + "model_forward_time": 0.024863243103027344, + "step": 8956 + }, + { + "epoch": 1.3665771484375e-05, + "step": 8956, + "training_step_time": 0.18976831436157227 + }, + { + "epoch": 1.366729736328125e-05, + "model_forward_time": 0.0267641544342041, + "step": 8957 + }, + { + "epoch": 1.366729736328125e-05, + "step": 8957, + "training_step_time": 0.18341946601867676 + }, + { + "epoch": 1.36688232421875e-05, + "model_forward_time": 0.024255990982055664, + "step": 8958 + }, + { + "epoch": 1.36688232421875e-05, + "step": 8958, + "training_step_time": 0.16764545440673828 + }, + { + "epoch": 1.367034912109375e-05, + "model_forward_time": 0.024435997009277344, + "step": 8959 + }, + { + "epoch": 1.367034912109375e-05, + "step": 8959, + "training_step_time": 0.1043086051940918 + }, + { + "epoch": 1.3671875e-05, + "grad_norm": 0.30645623803138733, + "learning_rate": 8.402594850922305e-05, + "loss": 0.0256, + "step": 8960 + }, + { + "epoch": 1.3671875e-05, + "model_forward_time": 0.024538040161132812, + "step": 8960 + }, + { + "epoch": 1.3671875e-05, + "step": 8960, + "training_step_time": 0.10386252403259277 + }, + { + "epoch": 1.367340087890625e-05, + "model_forward_time": 0.025150775909423828, + "step": 8961 + }, + { + "epoch": 1.367340087890625e-05, + "step": 8961, + "training_step_time": 0.11051154136657715 + }, + { + "epoch": 1.36749267578125e-05, + "model_forward_time": 0.02490997314453125, + "step": 8962 + }, + { + "epoch": 1.36749267578125e-05, + "step": 8962, + "training_step_time": 0.11093902587890625 + }, + { + "epoch": 1.367645263671875e-05, + "model_forward_time": 0.025046586990356445, + "step": 8963 + }, + { + "epoch": 1.367645263671875e-05, + "step": 8963, + "training_step_time": 0.10650134086608887 + }, + { + "epoch": 1.3677978515625e-05, + "model_forward_time": 0.02500462532043457, + "step": 8964 + }, + { + "epoch": 1.3677978515625e-05, + "step": 8964, + "training_step_time": 0.109222412109375 + }, + { + "epoch": 1.367950439453125e-05, + "model_forward_time": 0.025159120559692383, + "step": 8965 + }, + { + "epoch": 1.367950439453125e-05, + "step": 8965, + "training_step_time": 0.10845494270324707 + }, + { + "epoch": 1.36810302734375e-05, + "model_forward_time": 0.024912595748901367, + "step": 8966 + }, + { + "epoch": 1.36810302734375e-05, + "step": 8966, + "training_step_time": 0.1050107479095459 + }, + { + "epoch": 1.368255615234375e-05, + "model_forward_time": 0.025229454040527344, + "step": 8967 + }, + { + "epoch": 1.368255615234375e-05, + "step": 8967, + "training_step_time": 0.10692524909973145 + }, + { + "epoch": 1.368408203125e-05, + "model_forward_time": 0.02490973472595215, + "step": 8968 + }, + { + "epoch": 1.368408203125e-05, + "step": 8968, + "training_step_time": 0.10834336280822754 + }, + { + "epoch": 1.368560791015625e-05, + "model_forward_time": 0.024918079376220703, + "step": 8969 + }, + { + "epoch": 1.368560791015625e-05, + "step": 8969, + "training_step_time": 0.1759345531463623 + }, + { + "epoch": 1.36871337890625e-05, + "grad_norm": 0.5372781753540039, + "learning_rate": 8.398554292153866e-05, + "loss": 0.024, + "step": 8970 + }, + { + "epoch": 1.36871337890625e-05, + "model_forward_time": 0.0234677791595459, + "step": 8970 + }, + { + "epoch": 1.36871337890625e-05, + "step": 8970, + "training_step_time": 0.18744730949401855 + }, + { + "epoch": 1.368865966796875e-05, + "model_forward_time": 0.023607492446899414, + "step": 8971 + }, + { + "epoch": 1.368865966796875e-05, + "step": 8971, + "training_step_time": 0.1913130283355713 + }, + { + "epoch": 1.3690185546875e-05, + "model_forward_time": 0.02420830726623535, + "step": 8972 + }, + { + "epoch": 1.3690185546875e-05, + "step": 8972, + "training_step_time": 0.17948675155639648 + }, + { + "epoch": 1.369171142578125e-05, + "model_forward_time": 0.024532556533813477, + "step": 8973 + }, + { + "epoch": 1.369171142578125e-05, + "step": 8973, + "training_step_time": 0.1673755645751953 + }, + { + "epoch": 1.36932373046875e-05, + "model_forward_time": 0.024538516998291016, + "step": 8974 + }, + { + "epoch": 1.36932373046875e-05, + "step": 8974, + "training_step_time": 0.17450714111328125 + }, + { + "epoch": 1.369476318359375e-05, + "model_forward_time": 0.024251937866210938, + "step": 8975 + }, + { + "epoch": 1.369476318359375e-05, + "step": 8975, + "training_step_time": 0.17681360244750977 + }, + { + "epoch": 1.36962890625e-05, + "model_forward_time": 0.02441859245300293, + "step": 8976 + }, + { + "epoch": 1.36962890625e-05, + "step": 8976, + "training_step_time": 0.1400141716003418 + }, + { + "epoch": 1.369781494140625e-05, + "model_forward_time": 0.028722763061523438, + "step": 8977 + }, + { + "epoch": 1.369781494140625e-05, + "step": 8977, + "training_step_time": 0.1135709285736084 + }, + { + "epoch": 1.36993408203125e-05, + "model_forward_time": 0.026620864868164062, + "step": 8978 + }, + { + "epoch": 1.36993408203125e-05, + "step": 8978, + "training_step_time": 0.10544180870056152 + }, + { + "epoch": 1.370086669921875e-05, + "model_forward_time": 0.02524733543395996, + "step": 8979 + }, + { + "epoch": 1.370086669921875e-05, + "step": 8979, + "training_step_time": 0.10431694984436035 + }, + { + "epoch": 1.3702392578125e-05, + "grad_norm": 0.24594636261463165, + "learning_rate": 8.394509603821499e-05, + "loss": 0.0368, + "step": 8980 + }, + { + "epoch": 1.3702392578125e-05, + "model_forward_time": 0.02498602867126465, + "step": 8980 + }, + { + "epoch": 1.3702392578125e-05, + "step": 8980, + "training_step_time": 0.10901856422424316 + }, + { + "epoch": 1.370391845703125e-05, + "model_forward_time": 0.025427579879760742, + "step": 8981 + }, + { + "epoch": 1.370391845703125e-05, + "step": 8981, + "training_step_time": 0.10844302177429199 + }, + { + "epoch": 1.37054443359375e-05, + "model_forward_time": 0.025048017501831055, + "step": 8982 + }, + { + "epoch": 1.37054443359375e-05, + "step": 8982, + "training_step_time": 0.10976195335388184 + }, + { + "epoch": 1.370697021484375e-05, + "model_forward_time": 0.025348901748657227, + "step": 8983 + }, + { + "epoch": 1.370697021484375e-05, + "step": 8983, + "training_step_time": 0.10935449600219727 + }, + { + "epoch": 1.370849609375e-05, + "model_forward_time": 0.024932861328125, + "step": 8984 + }, + { + "epoch": 1.370849609375e-05, + "step": 8984, + "training_step_time": 0.10789275169372559 + }, + { + "epoch": 1.371002197265625e-05, + "model_forward_time": 0.025131702423095703, + "step": 8985 + }, + { + "epoch": 1.371002197265625e-05, + "step": 8985, + "training_step_time": 0.11817669868469238 + }, + { + "epoch": 1.37115478515625e-05, + "model_forward_time": 0.024858474731445312, + "step": 8986 + }, + { + "epoch": 1.37115478515625e-05, + "step": 8986, + "training_step_time": 0.12210369110107422 + }, + { + "epoch": 1.371307373046875e-05, + "model_forward_time": 0.025006532669067383, + "step": 8987 + }, + { + "epoch": 1.371307373046875e-05, + "step": 8987, + "training_step_time": 0.1212015151977539 + }, + { + "epoch": 1.3714599609375e-05, + "model_forward_time": 0.02530956268310547, + "step": 8988 + }, + { + "epoch": 1.3714599609375e-05, + "step": 8988, + "training_step_time": 0.147416353225708 + }, + { + "epoch": 1.371612548828125e-05, + "model_forward_time": 0.02468132972717285, + "step": 8989 + }, + { + "epoch": 1.371612548828125e-05, + "step": 8989, + "training_step_time": 0.11413049697875977 + }, + { + "epoch": 1.37176513671875e-05, + "grad_norm": 0.7974529266357422, + "learning_rate": 8.390460790839882e-05, + "loss": 0.0594, + "step": 8990 + }, + { + "epoch": 1.37176513671875e-05, + "model_forward_time": 0.026628732681274414, + "step": 8990 + }, + { + "epoch": 1.37176513671875e-05, + "step": 8990, + "training_step_time": 0.21802783012390137 + }, + { + "epoch": 1.371917724609375e-05, + "model_forward_time": 0.02472090721130371, + "step": 8991 + }, + { + "epoch": 1.371917724609375e-05, + "step": 8991, + "training_step_time": 0.13001418113708496 + }, + { + "epoch": 1.3720703125e-05, + "model_forward_time": 0.02873969078063965, + "step": 8992 + }, + { + "epoch": 1.3720703125e-05, + "step": 8992, + "training_step_time": 0.17467117309570312 + }, + { + "epoch": 1.372222900390625e-05, + "model_forward_time": 0.024420976638793945, + "step": 8993 + }, + { + "epoch": 1.372222900390625e-05, + "step": 8993, + "training_step_time": 0.16365504264831543 + }, + { + "epoch": 1.37237548828125e-05, + "model_forward_time": 0.024140119552612305, + "step": 8994 + }, + { + "epoch": 1.37237548828125e-05, + "step": 8994, + "training_step_time": 0.14179706573486328 + }, + { + "epoch": 1.372528076171875e-05, + "model_forward_time": 0.02414226531982422, + "step": 8995 + }, + { + "epoch": 1.372528076171875e-05, + "step": 8995, + "training_step_time": 0.18182706832885742 + }, + { + "epoch": 1.3726806640625e-05, + "model_forward_time": 0.024464130401611328, + "step": 8996 + }, + { + "epoch": 1.3726806640625e-05, + "step": 8996, + "training_step_time": 0.10683941841125488 + }, + { + "epoch": 1.372833251953125e-05, + "model_forward_time": 0.025491952896118164, + "step": 8997 + }, + { + "epoch": 1.372833251953125e-05, + "step": 8997, + "training_step_time": 0.10645198822021484 + }, + { + "epoch": 1.37298583984375e-05, + "model_forward_time": 0.025068998336791992, + "step": 8998 + }, + { + "epoch": 1.37298583984375e-05, + "step": 8998, + "training_step_time": 0.10979843139648438 + }, + { + "epoch": 1.373138427734375e-05, + "model_forward_time": 0.025546789169311523, + "step": 8999 + }, + { + "epoch": 1.373138427734375e-05, + "step": 8999, + "training_step_time": 0.11343121528625488 + }, + { + "epoch": 1.373291015625e-05, + "grad_norm": 0.5483483672142029, + "learning_rate": 8.386407858128706e-05, + "loss": 0.037, + "step": 9000 + }, + { + "epoch": 1.373291015625e-05, + "model_forward_time": 0.0243685245513916, + "step": 9000 + }, + { + "epoch": 1.373291015625e-05, + "step": 9000, + "training_step_time": 0.1080172061920166 + }, + { + "epoch": 1.373443603515625e-05, + "model_forward_time": 0.023291826248168945, + "step": 9001 + }, + { + "epoch": 1.373443603515625e-05, + "step": 9001, + "training_step_time": 0.20213937759399414 + }, + { + "epoch": 1.37359619140625e-05, + "model_forward_time": 0.02447366714477539, + "step": 9002 + }, + { + "epoch": 1.37359619140625e-05, + "step": 9002, + "training_step_time": 0.18469715118408203 + }, + { + "epoch": 1.373748779296875e-05, + "model_forward_time": 0.024559736251831055, + "step": 9003 + }, + { + "epoch": 1.373748779296875e-05, + "step": 9003, + "training_step_time": 0.12102532386779785 + }, + { + "epoch": 1.3739013671875e-05, + "model_forward_time": 0.026051998138427734, + "step": 9004 + }, + { + "epoch": 1.3739013671875e-05, + "step": 9004, + "training_step_time": 0.10467028617858887 + }, + { + "epoch": 1.374053955078125e-05, + "model_forward_time": 0.0249025821685791, + "step": 9005 + }, + { + "epoch": 1.374053955078125e-05, + "step": 9005, + "training_step_time": 0.11005520820617676 + }, + { + "epoch": 1.37420654296875e-05, + "model_forward_time": 0.0251157283782959, + "step": 9006 + }, + { + "epoch": 1.37420654296875e-05, + "step": 9006, + "training_step_time": 0.11157417297363281 + }, + { + "epoch": 1.374359130859375e-05, + "model_forward_time": 0.025456666946411133, + "step": 9007 + }, + { + "epoch": 1.374359130859375e-05, + "step": 9007, + "training_step_time": 0.10640835762023926 + }, + { + "epoch": 1.37451171875e-05, + "model_forward_time": 0.025034427642822266, + "step": 9008 + }, + { + "epoch": 1.37451171875e-05, + "step": 9008, + "training_step_time": 0.10537266731262207 + }, + { + "epoch": 1.374664306640625e-05, + "model_forward_time": 0.025076627731323242, + "step": 9009 + }, + { + "epoch": 1.374664306640625e-05, + "step": 9009, + "training_step_time": 0.10428094863891602 + }, + { + "epoch": 1.37481689453125e-05, + "grad_norm": 0.5794000625610352, + "learning_rate": 8.382350810612663e-05, + "loss": 0.0313, + "step": 9010 + }, + { + "epoch": 1.37481689453125e-05, + "model_forward_time": 0.0251922607421875, + "step": 9010 + }, + { + "epoch": 1.37481689453125e-05, + "step": 9010, + "training_step_time": 0.11228704452514648 + }, + { + "epoch": 1.374969482421875e-05, + "model_forward_time": 0.025099515914916992, + "step": 9011 + }, + { + "epoch": 1.374969482421875e-05, + "step": 9011, + "training_step_time": 0.10986495018005371 + }, + { + "epoch": 1.3751220703125e-05, + "model_forward_time": 0.024966001510620117, + "step": 9012 + }, + { + "epoch": 1.3751220703125e-05, + "step": 9012, + "training_step_time": 0.10871124267578125 + }, + { + "epoch": 1.375274658203125e-05, + "model_forward_time": 0.02519845962524414, + "step": 9013 + }, + { + "epoch": 1.375274658203125e-05, + "step": 9013, + "training_step_time": 0.11205339431762695 + }, + { + "epoch": 1.37542724609375e-05, + "model_forward_time": 0.025275707244873047, + "step": 9014 + }, + { + "epoch": 1.37542724609375e-05, + "step": 9014, + "training_step_time": 0.11888885498046875 + }, + { + "epoch": 1.375579833984375e-05, + "model_forward_time": 0.026001453399658203, + "step": 9015 + }, + { + "epoch": 1.375579833984375e-05, + "step": 9015, + "training_step_time": 0.10770583152770996 + }, + { + "epoch": 1.375732421875e-05, + "model_forward_time": 0.02490091323852539, + "step": 9016 + }, + { + "epoch": 1.375732421875e-05, + "step": 9016, + "training_step_time": 0.10969948768615723 + }, + { + "epoch": 1.375885009765625e-05, + "model_forward_time": 0.025013446807861328, + "step": 9017 + }, + { + "epoch": 1.375885009765625e-05, + "step": 9017, + "training_step_time": 0.11002993583679199 + }, + { + "epoch": 1.37603759765625e-05, + "model_forward_time": 0.025025367736816406, + "step": 9018 + }, + { + "epoch": 1.37603759765625e-05, + "step": 9018, + "training_step_time": 0.10908889770507812 + }, + { + "epoch": 1.376190185546875e-05, + "model_forward_time": 0.02509284019470215, + "step": 9019 + }, + { + "epoch": 1.376190185546875e-05, + "step": 9019, + "training_step_time": 0.10874104499816895 + }, + { + "epoch": 1.3763427734375e-05, + "grad_norm": 0.5082548260688782, + "learning_rate": 8.378289653221452e-05, + "loss": 0.035, + "step": 9020 + }, + { + "epoch": 1.3763427734375e-05, + "model_forward_time": 0.025090932846069336, + "step": 9020 + }, + { + "epoch": 1.3763427734375e-05, + "step": 9020, + "training_step_time": 0.10783910751342773 + }, + { + "epoch": 1.376495361328125e-05, + "model_forward_time": 0.025123119354248047, + "step": 9021 + }, + { + "epoch": 1.376495361328125e-05, + "step": 9021, + "training_step_time": 0.1098184585571289 + }, + { + "epoch": 1.37664794921875e-05, + "model_forward_time": 0.027651548385620117, + "step": 9022 + }, + { + "epoch": 1.37664794921875e-05, + "step": 9022, + "training_step_time": 0.11696720123291016 + }, + { + "epoch": 1.376800537109375e-05, + "model_forward_time": 0.02509760856628418, + "step": 9023 + }, + { + "epoch": 1.376800537109375e-05, + "step": 9023, + "training_step_time": 0.11575770378112793 + }, + { + "epoch": 1.376953125e-05, + "model_forward_time": 0.0249483585357666, + "step": 9024 + }, + { + "epoch": 1.376953125e-05, + "step": 9024, + "training_step_time": 0.1095728874206543 + }, + { + "epoch": 1.377105712890625e-05, + "model_forward_time": 0.025665998458862305, + "step": 9025 + }, + { + "epoch": 1.377105712890625e-05, + "step": 9025, + "training_step_time": 0.1091766357421875 + }, + { + "epoch": 1.37725830078125e-05, + "model_forward_time": 0.024926185607910156, + "step": 9026 + }, + { + "epoch": 1.37725830078125e-05, + "step": 9026, + "training_step_time": 0.16724371910095215 + }, + { + "epoch": 1.377410888671875e-05, + "model_forward_time": 0.02477264404296875, + "step": 9027 + }, + { + "epoch": 1.377410888671875e-05, + "step": 9027, + "training_step_time": 0.16097402572631836 + }, + { + "epoch": 1.3775634765625e-05, + "model_forward_time": 0.02511882781982422, + "step": 9028 + }, + { + "epoch": 1.3775634765625e-05, + "step": 9028, + "training_step_time": 0.1185603141784668 + }, + { + "epoch": 1.377716064453125e-05, + "model_forward_time": 0.02625727653503418, + "step": 9029 + }, + { + "epoch": 1.377716064453125e-05, + "step": 9029, + "training_step_time": 0.17354798316955566 + }, + { + "epoch": 1.37786865234375e-05, + "grad_norm": 0.3936881721019745, + "learning_rate": 8.37422439088976e-05, + "loss": 0.0439, + "step": 9030 + }, + { + "epoch": 1.37786865234375e-05, + "model_forward_time": 0.024529218673706055, + "step": 9030 + }, + { + "epoch": 1.37786865234375e-05, + "step": 9030, + "training_step_time": 0.1577596664428711 + }, + { + "epoch": 1.378021240234375e-05, + "model_forward_time": 0.024654150009155273, + "step": 9031 + }, + { + "epoch": 1.378021240234375e-05, + "step": 9031, + "training_step_time": 0.10467743873596191 + }, + { + "epoch": 1.378173828125e-05, + "model_forward_time": 0.025350093841552734, + "step": 9032 + }, + { + "epoch": 1.378173828125e-05, + "step": 9032, + "training_step_time": 0.10571956634521484 + }, + { + "epoch": 1.378326416015625e-05, + "model_forward_time": 0.025458097457885742, + "step": 9033 + }, + { + "epoch": 1.378326416015625e-05, + "step": 9033, + "training_step_time": 0.10661530494689941 + }, + { + "epoch": 1.37847900390625e-05, + "model_forward_time": 0.025179147720336914, + "step": 9034 + }, + { + "epoch": 1.37847900390625e-05, + "step": 9034, + "training_step_time": 0.10761809349060059 + }, + { + "epoch": 1.378631591796875e-05, + "model_forward_time": 0.02529621124267578, + "step": 9035 + }, + { + "epoch": 1.378631591796875e-05, + "step": 9035, + "training_step_time": 0.1101384162902832 + }, + { + "epoch": 1.3787841796875e-05, + "model_forward_time": 0.02881479263305664, + "step": 9036 + }, + { + "epoch": 1.3787841796875e-05, + "step": 9036, + "training_step_time": 0.115692138671875 + }, + { + "epoch": 1.378936767578125e-05, + "model_forward_time": 0.02627873420715332, + "step": 9037 + }, + { + "epoch": 1.378936767578125e-05, + "step": 9037, + "training_step_time": 0.10987401008605957 + }, + { + "epoch": 1.37908935546875e-05, + "model_forward_time": 0.026580333709716797, + "step": 9038 + }, + { + "epoch": 1.37908935546875e-05, + "step": 9038, + "training_step_time": 0.10758852958679199 + }, + { + "epoch": 1.379241943359375e-05, + "model_forward_time": 0.025876998901367188, + "step": 9039 + }, + { + "epoch": 1.379241943359375e-05, + "step": 9039, + "training_step_time": 0.10732293128967285 + }, + { + "epoch": 1.37939453125e-05, + "grad_norm": 0.414567768573761, + "learning_rate": 8.370155028557265e-05, + "loss": 0.04, + "step": 9040 + }, + { + "epoch": 1.37939453125e-05, + "model_forward_time": 0.025490283966064453, + "step": 9040 + }, + { + "epoch": 1.37939453125e-05, + "step": 9040, + "training_step_time": 0.10637068748474121 + }, + { + "epoch": 1.379547119140625e-05, + "model_forward_time": 0.02532196044921875, + "step": 9041 + }, + { + "epoch": 1.379547119140625e-05, + "step": 9041, + "training_step_time": 0.15054082870483398 + }, + { + "epoch": 1.37969970703125e-05, + "model_forward_time": 0.02528524398803711, + "step": 9042 + }, + { + "epoch": 1.37969970703125e-05, + "step": 9042, + "training_step_time": 0.10934734344482422 + }, + { + "epoch": 1.379852294921875e-05, + "model_forward_time": 0.025161027908325195, + "step": 9043 + }, + { + "epoch": 1.379852294921875e-05, + "step": 9043, + "training_step_time": 0.10867977142333984 + }, + { + "epoch": 1.3800048828125e-05, + "model_forward_time": 0.02981734275817871, + "step": 9044 + }, + { + "epoch": 1.3800048828125e-05, + "step": 9044, + "training_step_time": 0.20467734336853027 + }, + { + "epoch": 1.380157470703125e-05, + "model_forward_time": 0.02487921714782715, + "step": 9045 + }, + { + "epoch": 1.380157470703125e-05, + "step": 9045, + "training_step_time": 0.17493224143981934 + }, + { + "epoch": 1.38031005859375e-05, + "model_forward_time": 0.024866342544555664, + "step": 9046 + }, + { + "epoch": 1.38031005859375e-05, + "step": 9046, + "training_step_time": 0.18550753593444824 + }, + { + "epoch": 1.380462646484375e-05, + "model_forward_time": 0.024600982666015625, + "step": 9047 + }, + { + "epoch": 1.380462646484375e-05, + "step": 9047, + "training_step_time": 0.1880788803100586 + }, + { + "epoch": 1.380615234375e-05, + "model_forward_time": 0.024859905242919922, + "step": 9048 + }, + { + "epoch": 1.380615234375e-05, + "step": 9048, + "training_step_time": 0.16760730743408203 + }, + { + "epoch": 1.380767822265625e-05, + "model_forward_time": 0.02560257911682129, + "step": 9049 + }, + { + "epoch": 1.380767822265625e-05, + "step": 9049, + "training_step_time": 0.10809445381164551 + }, + { + "epoch": 1.38092041015625e-05, + "grad_norm": 0.29646000266075134, + "learning_rate": 8.366081571168625e-05, + "loss": 0.0324, + "step": 9050 + }, + { + "epoch": 1.38092041015625e-05, + "model_forward_time": 0.02561783790588379, + "step": 9050 + }, + { + "epoch": 1.38092041015625e-05, + "step": 9050, + "training_step_time": 0.10949993133544922 + }, + { + "epoch": 1.381072998046875e-05, + "model_forward_time": 0.025775671005249023, + "step": 9051 + }, + { + "epoch": 1.381072998046875e-05, + "step": 9051, + "training_step_time": 0.1080939769744873 + }, + { + "epoch": 1.3812255859375e-05, + "model_forward_time": 0.025256633758544922, + "step": 9052 + }, + { + "epoch": 1.3812255859375e-05, + "step": 9052, + "training_step_time": 0.10902214050292969 + }, + { + "epoch": 1.381378173828125e-05, + "model_forward_time": 0.02559494972229004, + "step": 9053 + }, + { + "epoch": 1.381378173828125e-05, + "step": 9053, + "training_step_time": 0.10812759399414062 + }, + { + "epoch": 1.38153076171875e-05, + "model_forward_time": 0.02541947364807129, + "step": 9054 + }, + { + "epoch": 1.38153076171875e-05, + "step": 9054, + "training_step_time": 0.17336249351501465 + }, + { + "epoch": 1.381683349609375e-05, + "model_forward_time": 0.02556443214416504, + "step": 9055 + }, + { + "epoch": 1.381683349609375e-05, + "step": 9055, + "training_step_time": 0.16871213912963867 + }, + { + "epoch": 1.3818359375e-05, + "model_forward_time": 0.024761676788330078, + "step": 9056 + }, + { + "epoch": 1.3818359375e-05, + "step": 9056, + "training_step_time": 0.14667391777038574 + }, + { + "epoch": 1.381988525390625e-05, + "model_forward_time": 0.024607419967651367, + "step": 9057 + }, + { + "epoch": 1.381988525390625e-05, + "step": 9057, + "training_step_time": 0.14133715629577637 + }, + { + "epoch": 1.38214111328125e-05, + "model_forward_time": 0.024799823760986328, + "step": 9058 + }, + { + "epoch": 1.38214111328125e-05, + "step": 9058, + "training_step_time": 0.1506960391998291 + }, + { + "epoch": 1.382293701171875e-05, + "model_forward_time": 0.025743961334228516, + "step": 9059 + }, + { + "epoch": 1.382293701171875e-05, + "step": 9059, + "training_step_time": 0.14345669746398926 + }, + { + "epoch": 1.3824462890625e-05, + "grad_norm": 0.2952696979045868, + "learning_rate": 8.362004023673474e-05, + "loss": 0.0431, + "step": 9060 + }, + { + "epoch": 1.3824462890625e-05, + "model_forward_time": 0.02445816993713379, + "step": 9060 + }, + { + "epoch": 1.3824462890625e-05, + "step": 9060, + "training_step_time": 0.12764620780944824 + }, + { + "epoch": 1.382598876953125e-05, + "model_forward_time": 0.024692773818969727, + "step": 9061 + }, + { + "epoch": 1.382598876953125e-05, + "step": 9061, + "training_step_time": 0.1258397102355957 + }, + { + "epoch": 1.38275146484375e-05, + "model_forward_time": 0.025930166244506836, + "step": 9062 + }, + { + "epoch": 1.38275146484375e-05, + "step": 9062, + "training_step_time": 0.12148118019104004 + }, + { + "epoch": 1.382904052734375e-05, + "model_forward_time": 0.025757551193237305, + "step": 9063 + }, + { + "epoch": 1.382904052734375e-05, + "step": 9063, + "training_step_time": 0.1168966293334961 + }, + { + "epoch": 1.383056640625e-05, + "model_forward_time": 0.025410890579223633, + "step": 9064 + }, + { + "epoch": 1.383056640625e-05, + "step": 9064, + "training_step_time": 0.11532807350158691 + }, + { + "epoch": 1.383209228515625e-05, + "model_forward_time": 0.025713682174682617, + "step": 9065 + }, + { + "epoch": 1.383209228515625e-05, + "step": 9065, + "training_step_time": 0.11672115325927734 + }, + { + "epoch": 1.38336181640625e-05, + "model_forward_time": 0.025488615036010742, + "step": 9066 + }, + { + "epoch": 1.38336181640625e-05, + "step": 9066, + "training_step_time": 0.11321759223937988 + }, + { + "epoch": 1.383514404296875e-05, + "model_forward_time": 0.025453567504882812, + "step": 9067 + }, + { + "epoch": 1.383514404296875e-05, + "step": 9067, + "training_step_time": 0.10818338394165039 + }, + { + "epoch": 1.3836669921875e-05, + "model_forward_time": 0.02543354034423828, + "step": 9068 + }, + { + "epoch": 1.3836669921875e-05, + "step": 9068, + "training_step_time": 0.10792803764343262 + }, + { + "epoch": 1.383819580078125e-05, + "model_forward_time": 0.02462148666381836, + "step": 9069 + }, + { + "epoch": 1.383819580078125e-05, + "step": 9069, + "training_step_time": 0.1068258285522461 + }, + { + "epoch": 1.38397216796875e-05, + "grad_norm": 0.44075703620910645, + "learning_rate": 8.357922391026418e-05, + "loss": 0.0259, + "step": 9070 + }, + { + "epoch": 1.38397216796875e-05, + "model_forward_time": 0.02488565444946289, + "step": 9070 + }, + { + "epoch": 1.38397216796875e-05, + "step": 9070, + "training_step_time": 0.11024069786071777 + }, + { + "epoch": 1.384124755859375e-05, + "model_forward_time": 0.0266876220703125, + "step": 9071 + }, + { + "epoch": 1.384124755859375e-05, + "step": 9071, + "training_step_time": 0.11408400535583496 + }, + { + "epoch": 1.38427734375e-05, + "model_forward_time": 0.026940584182739258, + "step": 9072 + }, + { + "epoch": 1.38427734375e-05, + "step": 9072, + "training_step_time": 0.11243510246276855 + }, + { + "epoch": 1.384429931640625e-05, + "model_forward_time": 0.02596449851989746, + "step": 9073 + }, + { + "epoch": 1.384429931640625e-05, + "step": 9073, + "training_step_time": 0.1717221736907959 + }, + { + "epoch": 1.38458251953125e-05, + "model_forward_time": 0.024836301803588867, + "step": 9074 + }, + { + "epoch": 1.38458251953125e-05, + "step": 9074, + "training_step_time": 0.16862154006958008 + }, + { + "epoch": 1.384735107421875e-05, + "model_forward_time": 0.024775266647338867, + "step": 9075 + }, + { + "epoch": 1.384735107421875e-05, + "step": 9075, + "training_step_time": 0.1068415641784668 + }, + { + "epoch": 1.3848876953125e-05, + "model_forward_time": 0.025150775909423828, + "step": 9076 + }, + { + "epoch": 1.3848876953125e-05, + "step": 9076, + "training_step_time": 0.10566973686218262 + }, + { + "epoch": 1.385040283203125e-05, + "model_forward_time": 0.025618553161621094, + "step": 9077 + }, + { + "epoch": 1.385040283203125e-05, + "step": 9077, + "training_step_time": 0.10737299919128418 + }, + { + "epoch": 1.38519287109375e-05, + "model_forward_time": 0.0252382755279541, + "step": 9078 + }, + { + "epoch": 1.38519287109375e-05, + "step": 9078, + "training_step_time": 0.11012148857116699 + }, + { + "epoch": 1.385345458984375e-05, + "model_forward_time": 0.02565598487854004, + "step": 9079 + }, + { + "epoch": 1.385345458984375e-05, + "step": 9079, + "training_step_time": 0.11248564720153809 + }, + { + "epoch": 1.385498046875e-05, + "grad_norm": 0.3671765923500061, + "learning_rate": 8.353836678187027e-05, + "loss": 0.0294, + "step": 9080 + }, + { + "epoch": 1.385498046875e-05, + "model_forward_time": 0.025854825973510742, + "step": 9080 + }, + { + "epoch": 1.385498046875e-05, + "step": 9080, + "training_step_time": 0.10753488540649414 + }, + { + "epoch": 1.385650634765625e-05, + "model_forward_time": 0.0267331600189209, + "step": 9081 + }, + { + "epoch": 1.385650634765625e-05, + "step": 9081, + "training_step_time": 0.10996675491333008 + }, + { + "epoch": 1.38580322265625e-05, + "model_forward_time": 0.025294065475463867, + "step": 9082 + }, + { + "epoch": 1.38580322265625e-05, + "step": 9082, + "training_step_time": 0.10984110832214355 + }, + { + "epoch": 1.385955810546875e-05, + "model_forward_time": 0.025162935256958008, + "step": 9083 + }, + { + "epoch": 1.385955810546875e-05, + "step": 9083, + "training_step_time": 0.1065518856048584 + }, + { + "epoch": 1.3861083984375e-05, + "model_forward_time": 0.025650501251220703, + "step": 9084 + }, + { + "epoch": 1.3861083984375e-05, + "step": 9084, + "training_step_time": 0.1081686019897461 + }, + { + "epoch": 1.386260986328125e-05, + "model_forward_time": 0.02553391456604004, + "step": 9085 + }, + { + "epoch": 1.386260986328125e-05, + "step": 9085, + "training_step_time": 0.14331364631652832 + }, + { + "epoch": 1.38641357421875e-05, + "model_forward_time": 0.025303125381469727, + "step": 9086 + }, + { + "epoch": 1.38641357421875e-05, + "step": 9086, + "training_step_time": 0.1101534366607666 + }, + { + "epoch": 1.386566162109375e-05, + "model_forward_time": 0.026425838470458984, + "step": 9087 + }, + { + "epoch": 1.386566162109375e-05, + "step": 9087, + "training_step_time": 0.10869574546813965 + }, + { + "epoch": 1.38671875e-05, + "model_forward_time": 0.02586507797241211, + "step": 9088 + }, + { + "epoch": 1.38671875e-05, + "step": 9088, + "training_step_time": 0.1996002197265625 + }, + { + "epoch": 1.386871337890625e-05, + "model_forward_time": 0.025615453720092773, + "step": 9089 + }, + { + "epoch": 1.386871337890625e-05, + "step": 9089, + "training_step_time": 0.16156959533691406 + }, + { + "epoch": 1.38702392578125e-05, + "grad_norm": 0.40770334005355835, + "learning_rate": 8.349746890119826e-05, + "loss": 0.0403, + "step": 9090 + }, + { + "epoch": 1.38702392578125e-05, + "model_forward_time": 0.024502992630004883, + "step": 9090 + }, + { + "epoch": 1.38702392578125e-05, + "step": 9090, + "training_step_time": 0.17998361587524414 + }, + { + "epoch": 1.387176513671875e-05, + "model_forward_time": 0.025048494338989258, + "step": 9091 + }, + { + "epoch": 1.387176513671875e-05, + "step": 9091, + "training_step_time": 0.16448688507080078 + }, + { + "epoch": 1.3873291015625e-05, + "model_forward_time": 0.02584075927734375, + "step": 9092 + }, + { + "epoch": 1.3873291015625e-05, + "step": 9092, + "training_step_time": 0.20310330390930176 + }, + { + "epoch": 1.387481689453125e-05, + "model_forward_time": 0.02554035186767578, + "step": 9093 + }, + { + "epoch": 1.387481689453125e-05, + "step": 9093, + "training_step_time": 0.10418057441711426 + }, + { + "epoch": 1.38763427734375e-05, + "model_forward_time": 0.025744915008544922, + "step": 9094 + }, + { + "epoch": 1.38763427734375e-05, + "step": 9094, + "training_step_time": 0.10464262962341309 + }, + { + "epoch": 1.387786865234375e-05, + "model_forward_time": 0.026983022689819336, + "step": 9095 + }, + { + "epoch": 1.387786865234375e-05, + "step": 9095, + "training_step_time": 0.10904455184936523 + }, + { + "epoch": 1.387939453125e-05, + "model_forward_time": 0.025606870651245117, + "step": 9096 + }, + { + "epoch": 1.387939453125e-05, + "step": 9096, + "training_step_time": 0.10742735862731934 + }, + { + "epoch": 1.388092041015625e-05, + "model_forward_time": 0.025597333908081055, + "step": 9097 + }, + { + "epoch": 1.388092041015625e-05, + "step": 9097, + "training_step_time": 0.10663414001464844 + }, + { + "epoch": 1.38824462890625e-05, + "model_forward_time": 0.025463104248046875, + "step": 9098 + }, + { + "epoch": 1.38824462890625e-05, + "step": 9098, + "training_step_time": 0.10827159881591797 + }, + { + "epoch": 1.388397216796875e-05, + "model_forward_time": 0.025330543518066406, + "step": 9099 + }, + { + "epoch": 1.388397216796875e-05, + "step": 9099, + "training_step_time": 0.10693573951721191 + }, + { + "epoch": 1.3885498046875e-05, + "grad_norm": 0.626854658126831, + "learning_rate": 8.345653031794292e-05, + "loss": 0.0435, + "step": 9100 + }, + { + "epoch": 1.3885498046875e-05, + "model_forward_time": 0.025407075881958008, + "step": 9100 + }, + { + "epoch": 1.3885498046875e-05, + "step": 9100, + "training_step_time": 0.1093299388885498 + }, + { + "epoch": 1.388702392578125e-05, + "model_forward_time": 0.02592754364013672, + "step": 9101 + }, + { + "epoch": 1.388702392578125e-05, + "step": 9101, + "training_step_time": 0.10992121696472168 + }, + { + "epoch": 1.38885498046875e-05, + "model_forward_time": 0.025316953659057617, + "step": 9102 + }, + { + "epoch": 1.38885498046875e-05, + "step": 9102, + "training_step_time": 0.11553335189819336 + }, + { + "epoch": 1.389007568359375e-05, + "model_forward_time": 0.025146007537841797, + "step": 9103 + }, + { + "epoch": 1.389007568359375e-05, + "step": 9103, + "training_step_time": 0.12148523330688477 + }, + { + "epoch": 1.38916015625e-05, + "model_forward_time": 0.02622389793395996, + "step": 9104 + }, + { + "epoch": 1.38916015625e-05, + "step": 9104, + "training_step_time": 0.11942172050476074 + }, + { + "epoch": 1.389312744140625e-05, + "model_forward_time": 0.025426387786865234, + "step": 9105 + }, + { + "epoch": 1.389312744140625e-05, + "step": 9105, + "training_step_time": 0.11870241165161133 + }, + { + "epoch": 1.38946533203125e-05, + "model_forward_time": 0.025638580322265625, + "step": 9106 + }, + { + "epoch": 1.38946533203125e-05, + "step": 9106, + "training_step_time": 0.11709070205688477 + }, + { + "epoch": 1.389617919921875e-05, + "model_forward_time": 0.025388002395629883, + "step": 9107 + }, + { + "epoch": 1.389617919921875e-05, + "step": 9107, + "training_step_time": 0.11936521530151367 + }, + { + "epoch": 1.3897705078125e-05, + "model_forward_time": 0.02653026580810547, + "step": 9108 + }, + { + "epoch": 1.3897705078125e-05, + "step": 9108, + "training_step_time": 0.11499786376953125 + }, + { + "epoch": 1.389923095703125e-05, + "model_forward_time": 0.026556968688964844, + "step": 9109 + }, + { + "epoch": 1.389923095703125e-05, + "step": 9109, + "training_step_time": 0.1176598072052002 + }, + { + "epoch": 1.39007568359375e-05, + "grad_norm": 0.405610591173172, + "learning_rate": 8.34155510818485e-05, + "loss": 0.0362, + "step": 9110 + }, + { + "epoch": 1.39007568359375e-05, + "model_forward_time": 0.02557063102722168, + "step": 9110 + }, + { + "epoch": 1.39007568359375e-05, + "step": 9110, + "training_step_time": 0.10902833938598633 + }, + { + "epoch": 1.390228271484375e-05, + "model_forward_time": 0.02564859390258789, + "step": 9111 + }, + { + "epoch": 1.390228271484375e-05, + "step": 9111, + "training_step_time": 0.10829710960388184 + }, + { + "epoch": 1.390380859375e-05, + "model_forward_time": 0.025366783142089844, + "step": 9112 + }, + { + "epoch": 1.390380859375e-05, + "step": 9112, + "training_step_time": 0.10773205757141113 + }, + { + "epoch": 1.390533447265625e-05, + "model_forward_time": 0.025246143341064453, + "step": 9113 + }, + { + "epoch": 1.390533447265625e-05, + "step": 9113, + "training_step_time": 0.10864996910095215 + }, + { + "epoch": 1.39068603515625e-05, + "model_forward_time": 0.025406837463378906, + "step": 9114 + }, + { + "epoch": 1.39068603515625e-05, + "step": 9114, + "training_step_time": 0.11008143424987793 + }, + { + "epoch": 1.390838623046875e-05, + "model_forward_time": 0.026132822036743164, + "step": 9115 + }, + { + "epoch": 1.390838623046875e-05, + "step": 9115, + "training_step_time": 0.10822796821594238 + }, + { + "epoch": 1.3909912109375e-05, + "model_forward_time": 0.02559804916381836, + "step": 9116 + }, + { + "epoch": 1.3909912109375e-05, + "step": 9116, + "training_step_time": 0.10969376564025879 + }, + { + "epoch": 1.391143798828125e-05, + "model_forward_time": 0.02891993522644043, + "step": 9117 + }, + { + "epoch": 1.391143798828125e-05, + "step": 9117, + "training_step_time": 0.11565017700195312 + }, + { + "epoch": 1.39129638671875e-05, + "model_forward_time": 0.026072263717651367, + "step": 9118 + }, + { + "epoch": 1.39129638671875e-05, + "step": 9118, + "training_step_time": 0.10712265968322754 + }, + { + "epoch": 1.391448974609375e-05, + "model_forward_time": 0.02549600601196289, + "step": 9119 + }, + { + "epoch": 1.391448974609375e-05, + "step": 9119, + "training_step_time": 0.10980057716369629 + }, + { + "epoch": 1.3916015625e-05, + "grad_norm": 0.6752818822860718, + "learning_rate": 8.337453124270863e-05, + "loss": 0.0364, + "step": 9120 + }, + { + "epoch": 1.3916015625e-05, + "model_forward_time": 0.025489330291748047, + "step": 9120 + }, + { + "epoch": 1.3916015625e-05, + "step": 9120, + "training_step_time": 0.21808648109436035 + }, + { + "epoch": 1.391754150390625e-05, + "model_forward_time": 0.02497124671936035, + "step": 9121 + }, + { + "epoch": 1.391754150390625e-05, + "step": 9121, + "training_step_time": 0.11492919921875 + }, + { + "epoch": 1.39190673828125e-05, + "model_forward_time": 0.02472519874572754, + "step": 9122 + }, + { + "epoch": 1.39190673828125e-05, + "step": 9122, + "training_step_time": 0.10494709014892578 + }, + { + "epoch": 1.392059326171875e-05, + "model_forward_time": 0.025079727172851562, + "step": 9123 + }, + { + "epoch": 1.392059326171875e-05, + "step": 9123, + "training_step_time": 0.10689520835876465 + }, + { + "epoch": 1.3922119140625e-05, + "model_forward_time": 0.02539205551147461, + "step": 9124 + }, + { + "epoch": 1.3922119140625e-05, + "step": 9124, + "training_step_time": 0.11005568504333496 + }, + { + "epoch": 1.392364501953125e-05, + "model_forward_time": 0.026664257049560547, + "step": 9125 + }, + { + "epoch": 1.392364501953125e-05, + "step": 9125, + "training_step_time": 0.10702753067016602 + }, + { + "epoch": 1.39251708984375e-05, + "model_forward_time": 0.026546001434326172, + "step": 9126 + }, + { + "epoch": 1.39251708984375e-05, + "step": 9126, + "training_step_time": 0.11172747611999512 + }, + { + "epoch": 1.392669677734375e-05, + "model_forward_time": 0.026352405548095703, + "step": 9127 + }, + { + "epoch": 1.392669677734375e-05, + "step": 9127, + "training_step_time": 0.10736536979675293 + }, + { + "epoch": 1.392822265625e-05, + "model_forward_time": 0.026894330978393555, + "step": 9128 + }, + { + "epoch": 1.392822265625e-05, + "step": 9128, + "training_step_time": 0.10813117027282715 + }, + { + "epoch": 1.392974853515625e-05, + "model_forward_time": 0.02512502670288086, + "step": 9129 + }, + { + "epoch": 1.392974853515625e-05, + "step": 9129, + "training_step_time": 0.10827517509460449 + }, + { + "epoch": 1.39312744140625e-05, + "grad_norm": 0.6920375823974609, + "learning_rate": 8.33334708503663e-05, + "loss": 0.0353, + "step": 9130 + }, + { + "epoch": 1.39312744140625e-05, + "model_forward_time": 0.024845123291015625, + "step": 9130 + }, + { + "epoch": 1.39312744140625e-05, + "step": 9130, + "training_step_time": 0.10716629028320312 + }, + { + "epoch": 1.393280029296875e-05, + "model_forward_time": 0.025761127471923828, + "step": 9131 + }, + { + "epoch": 1.393280029296875e-05, + "step": 9131, + "training_step_time": 0.10730528831481934 + }, + { + "epoch": 1.3934326171875e-05, + "model_forward_time": 0.024785280227661133, + "step": 9132 + }, + { + "epoch": 1.3934326171875e-05, + "step": 9132, + "training_step_time": 0.12429213523864746 + }, + { + "epoch": 1.393585205078125e-05, + "model_forward_time": 0.025729894638061523, + "step": 9133 + }, + { + "epoch": 1.393585205078125e-05, + "step": 9133, + "training_step_time": 0.11242985725402832 + }, + { + "epoch": 1.39373779296875e-05, + "model_forward_time": 0.02620387077331543, + "step": 9134 + }, + { + "epoch": 1.39373779296875e-05, + "step": 9134, + "training_step_time": 0.18211984634399414 + }, + { + "epoch": 1.393890380859375e-05, + "model_forward_time": 0.025114774703979492, + "step": 9135 + }, + { + "epoch": 1.393890380859375e-05, + "step": 9135, + "training_step_time": 0.1727735996246338 + }, + { + "epoch": 1.39404296875e-05, + "model_forward_time": 0.024020671844482422, + "step": 9136 + }, + { + "epoch": 1.39404296875e-05, + "step": 9136, + "training_step_time": 0.20041346549987793 + }, + { + "epoch": 1.394195556640625e-05, + "model_forward_time": 0.024845600128173828, + "step": 9137 + }, + { + "epoch": 1.394195556640625e-05, + "step": 9137, + "training_step_time": 0.14423918724060059 + }, + { + "epoch": 1.39434814453125e-05, + "model_forward_time": 0.02481365203857422, + "step": 9138 + }, + { + "epoch": 1.39434814453125e-05, + "step": 9138, + "training_step_time": 0.22405028343200684 + }, + { + "epoch": 1.394500732421875e-05, + "model_forward_time": 0.02446913719177246, + "step": 9139 + }, + { + "epoch": 1.394500732421875e-05, + "step": 9139, + "training_step_time": 0.11087870597839355 + }, + { + "epoch": 1.3946533203125e-05, + "grad_norm": 0.41101816296577454, + "learning_rate": 8.329236995471373e-05, + "loss": 0.026, + "step": 9140 + }, + { + "epoch": 1.3946533203125e-05, + "model_forward_time": 0.025381088256835938, + "step": 9140 + }, + { + "epoch": 1.3946533203125e-05, + "step": 9140, + "training_step_time": 0.10801339149475098 + }, + { + "epoch": 1.394805908203125e-05, + "model_forward_time": 0.0258638858795166, + "step": 9141 + }, + { + "epoch": 1.394805908203125e-05, + "step": 9141, + "training_step_time": 0.10876870155334473 + }, + { + "epoch": 1.39495849609375e-05, + "model_forward_time": 0.025592327117919922, + "step": 9142 + }, + { + "epoch": 1.39495849609375e-05, + "step": 9142, + "training_step_time": 0.10953545570373535 + }, + { + "epoch": 1.395111083984375e-05, + "model_forward_time": 0.024933338165283203, + "step": 9143 + }, + { + "epoch": 1.395111083984375e-05, + "step": 9143, + "training_step_time": 0.1128995418548584 + }, + { + "epoch": 1.395263671875e-05, + "model_forward_time": 0.025464296340942383, + "step": 9144 + }, + { + "epoch": 1.395263671875e-05, + "step": 9144, + "training_step_time": 0.11025452613830566 + }, + { + "epoch": 1.395416259765625e-05, + "model_forward_time": 0.025427579879760742, + "step": 9145 + }, + { + "epoch": 1.395416259765625e-05, + "step": 9145, + "training_step_time": 0.11137962341308594 + }, + { + "epoch": 1.39556884765625e-05, + "model_forward_time": 0.025498628616333008, + "step": 9146 + }, + { + "epoch": 1.39556884765625e-05, + "step": 9146, + "training_step_time": 0.1098165512084961 + }, + { + "epoch": 1.395721435546875e-05, + "model_forward_time": 0.025551557540893555, + "step": 9147 + }, + { + "epoch": 1.395721435546875e-05, + "step": 9147, + "training_step_time": 0.11008858680725098 + }, + { + "epoch": 1.3958740234375e-05, + "model_forward_time": 0.02558588981628418, + "step": 9148 + }, + { + "epoch": 1.3958740234375e-05, + "step": 9148, + "training_step_time": 0.10902762413024902 + }, + { + "epoch": 1.396026611328125e-05, + "model_forward_time": 0.02511119842529297, + "step": 9149 + }, + { + "epoch": 1.396026611328125e-05, + "step": 9149, + "training_step_time": 0.10747361183166504 + }, + { + "epoch": 1.39617919921875e-05, + "grad_norm": 0.3406887948513031, + "learning_rate": 8.32512286056924e-05, + "loss": 0.0415, + "step": 9150 + }, + { + "epoch": 1.39617919921875e-05, + "model_forward_time": 0.026964902877807617, + "step": 9150 + }, + { + "epoch": 1.39617919921875e-05, + "step": 9150, + "training_step_time": 0.11081171035766602 + }, + { + "epoch": 1.396331787109375e-05, + "model_forward_time": 0.026320219039916992, + "step": 9151 + }, + { + "epoch": 1.396331787109375e-05, + "step": 9151, + "training_step_time": 0.11258196830749512 + }, + { + "epoch": 1.396484375e-05, + "model_forward_time": 0.025126218795776367, + "step": 9152 + }, + { + "epoch": 1.396484375e-05, + "step": 9152, + "training_step_time": 0.10746884346008301 + }, + { + "epoch": 1.396636962890625e-05, + "model_forward_time": 0.025794267654418945, + "step": 9153 + }, + { + "epoch": 1.396636962890625e-05, + "step": 9153, + "training_step_time": 0.10755252838134766 + }, + { + "epoch": 1.39678955078125e-05, + "model_forward_time": 0.025263309478759766, + "step": 9154 + }, + { + "epoch": 1.39678955078125e-05, + "step": 9154, + "training_step_time": 0.11017203330993652 + }, + { + "epoch": 1.396942138671875e-05, + "model_forward_time": 0.025174379348754883, + "step": 9155 + }, + { + "epoch": 1.396942138671875e-05, + "step": 9155, + "training_step_time": 0.10856223106384277 + }, + { + "epoch": 1.3970947265625e-05, + "model_forward_time": 0.025234460830688477, + "step": 9156 + }, + { + "epoch": 1.3970947265625e-05, + "step": 9156, + "training_step_time": 0.10779070854187012 + }, + { + "epoch": 1.397247314453125e-05, + "model_forward_time": 0.024880409240722656, + "step": 9157 + }, + { + "epoch": 1.397247314453125e-05, + "step": 9157, + "training_step_time": 0.10769534111022949 + }, + { + "epoch": 1.39739990234375e-05, + "model_forward_time": 0.024944782257080078, + "step": 9158 + }, + { + "epoch": 1.39739990234375e-05, + "step": 9158, + "training_step_time": 0.11530113220214844 + }, + { + "epoch": 1.397552490234375e-05, + "model_forward_time": 0.02593851089477539, + "step": 9159 + }, + { + "epoch": 1.397552490234375e-05, + "step": 9159, + "training_step_time": 0.10800385475158691 + }, + { + "epoch": 1.397705078125e-05, + "grad_norm": 0.5496638417243958, + "learning_rate": 8.321004685329296e-05, + "loss": 0.0275, + "step": 9160 + }, + { + "epoch": 1.397705078125e-05, + "model_forward_time": 0.02482914924621582, + "step": 9160 + }, + { + "epoch": 1.397705078125e-05, + "step": 9160, + "training_step_time": 0.11044430732727051 + }, + { + "epoch": 1.397857666015625e-05, + "model_forward_time": 0.0247955322265625, + "step": 9161 + }, + { + "epoch": 1.397857666015625e-05, + "step": 9161, + "training_step_time": 0.10543060302734375 + }, + { + "epoch": 1.39801025390625e-05, + "model_forward_time": 0.02610611915588379, + "step": 9162 + }, + { + "epoch": 1.39801025390625e-05, + "step": 9162, + "training_step_time": 0.10873651504516602 + }, + { + "epoch": 1.398162841796875e-05, + "model_forward_time": 0.02532482147216797, + "step": 9163 + }, + { + "epoch": 1.398162841796875e-05, + "step": 9163, + "training_step_time": 0.1094520092010498 + }, + { + "epoch": 1.3983154296875e-05, + "model_forward_time": 0.024999618530273438, + "step": 9164 + }, + { + "epoch": 1.3983154296875e-05, + "step": 9164, + "training_step_time": 0.12070369720458984 + }, + { + "epoch": 1.398468017578125e-05, + "model_forward_time": 0.025365114212036133, + "step": 9165 + }, + { + "epoch": 1.398468017578125e-05, + "step": 9165, + "training_step_time": 0.1388697624206543 + }, + { + "epoch": 1.39862060546875e-05, + "model_forward_time": 0.025928974151611328, + "step": 9166 + }, + { + "epoch": 1.39862060546875e-05, + "step": 9166, + "training_step_time": 0.1851944923400879 + }, + { + "epoch": 1.398773193359375e-05, + "model_forward_time": 0.024860858917236328, + "step": 9167 + }, + { + "epoch": 1.398773193359375e-05, + "step": 9167, + "training_step_time": 0.13228583335876465 + }, + { + "epoch": 1.39892578125e-05, + "model_forward_time": 0.024890661239624023, + "step": 9168 + }, + { + "epoch": 1.39892578125e-05, + "step": 9168, + "training_step_time": 0.12163019180297852 + }, + { + "epoch": 1.399078369140625e-05, + "model_forward_time": 0.0252230167388916, + "step": 9169 + }, + { + "epoch": 1.399078369140625e-05, + "step": 9169, + "training_step_time": 0.11821198463439941 + }, + { + "epoch": 1.39923095703125e-05, + "grad_norm": 0.3352798521518707, + "learning_rate": 8.316882474755507e-05, + "loss": 0.0271, + "step": 9170 + }, + { + "epoch": 1.39923095703125e-05, + "model_forward_time": 0.026145219802856445, + "step": 9170 + }, + { + "epoch": 1.39923095703125e-05, + "step": 9170, + "training_step_time": 0.1144561767578125 + }, + { + "epoch": 1.399383544921875e-05, + "model_forward_time": 0.025437355041503906, + "step": 9171 + }, + { + "epoch": 1.399383544921875e-05, + "step": 9171, + "training_step_time": 0.11360597610473633 + }, + { + "epoch": 1.3995361328125e-05, + "model_forward_time": 0.026082754135131836, + "step": 9172 + }, + { + "epoch": 1.3995361328125e-05, + "step": 9172, + "training_step_time": 0.11205363273620605 + }, + { + "epoch": 1.399688720703125e-05, + "model_forward_time": 0.026355981826782227, + "step": 9173 + }, + { + "epoch": 1.399688720703125e-05, + "step": 9173, + "training_step_time": 0.11150479316711426 + }, + { + "epoch": 1.39984130859375e-05, + "model_forward_time": 0.025547027587890625, + "step": 9174 + }, + { + "epoch": 1.39984130859375e-05, + "step": 9174, + "training_step_time": 0.10934567451477051 + }, + { + "epoch": 1.399993896484375e-05, + "model_forward_time": 0.025652170181274414, + "step": 9175 + }, + { + "epoch": 1.399993896484375e-05, + "step": 9175, + "training_step_time": 0.11099910736083984 + }, + { + "epoch": 1.400146484375e-05, + "model_forward_time": 0.025305509567260742, + "step": 9176 + }, + { + "epoch": 1.400146484375e-05, + "step": 9176, + "training_step_time": 0.11101102828979492 + }, + { + "epoch": 1.400299072265625e-05, + "model_forward_time": 0.025686979293823242, + "step": 9177 + }, + { + "epoch": 1.400299072265625e-05, + "step": 9177, + "training_step_time": 0.10844206809997559 + }, + { + "epoch": 1.40045166015625e-05, + "model_forward_time": 0.029117345809936523, + "step": 9178 + }, + { + "epoch": 1.40045166015625e-05, + "step": 9178, + "training_step_time": 0.17886686325073242 + }, + { + "epoch": 1.400604248046875e-05, + "model_forward_time": 0.024958372116088867, + "step": 9179 + }, + { + "epoch": 1.400604248046875e-05, + "step": 9179, + "training_step_time": 0.11199951171875 + }, + { + "epoch": 1.4007568359375e-05, + "grad_norm": 0.4306095540523529, + "learning_rate": 8.31275623385675e-05, + "loss": 0.0285, + "step": 9180 + }, + { + "epoch": 1.4007568359375e-05, + "model_forward_time": 0.024013280868530273, + "step": 9180 + }, + { + "epoch": 1.4007568359375e-05, + "step": 9180, + "training_step_time": 0.17786574363708496 + }, + { + "epoch": 1.400909423828125e-05, + "model_forward_time": 0.025191307067871094, + "step": 9181 + }, + { + "epoch": 1.400909423828125e-05, + "step": 9181, + "training_step_time": 0.18233585357666016 + }, + { + "epoch": 1.40106201171875e-05, + "model_forward_time": 0.02554178237915039, + "step": 9182 + }, + { + "epoch": 1.40106201171875e-05, + "step": 9182, + "training_step_time": 0.17758464813232422 + }, + { + "epoch": 1.401214599609375e-05, + "model_forward_time": 0.025226354598999023, + "step": 9183 + }, + { + "epoch": 1.401214599609375e-05, + "step": 9183, + "training_step_time": 0.14825940132141113 + }, + { + "epoch": 1.4013671875e-05, + "model_forward_time": 0.025041580200195312, + "step": 9184 + }, + { + "epoch": 1.4013671875e-05, + "step": 9184, + "training_step_time": 0.17253994941711426 + }, + { + "epoch": 1.401519775390625e-05, + "model_forward_time": 0.02605891227722168, + "step": 9185 + }, + { + "epoch": 1.401519775390625e-05, + "step": 9185, + "training_step_time": 0.16543936729431152 + }, + { + "epoch": 1.40167236328125e-05, + "model_forward_time": 0.024085521697998047, + "step": 9186 + }, + { + "epoch": 1.40167236328125e-05, + "step": 9186, + "training_step_time": 0.10842394828796387 + }, + { + "epoch": 1.401824951171875e-05, + "model_forward_time": 0.02480602264404297, + "step": 9187 + }, + { + "epoch": 1.401824951171875e-05, + "step": 9187, + "training_step_time": 0.10920119285583496 + }, + { + "epoch": 1.4019775390625e-05, + "model_forward_time": 0.025591611862182617, + "step": 9188 + }, + { + "epoch": 1.4019775390625e-05, + "step": 9188, + "training_step_time": 0.10972285270690918 + }, + { + "epoch": 1.402130126953125e-05, + "model_forward_time": 0.026254653930664062, + "step": 9189 + }, + { + "epoch": 1.402130126953125e-05, + "step": 9189, + "training_step_time": 0.11072182655334473 + }, + { + "epoch": 1.40228271484375e-05, + "grad_norm": 0.28492471575737, + "learning_rate": 8.308625967646795e-05, + "loss": 0.0214, + "step": 9190 + }, + { + "epoch": 1.40228271484375e-05, + "model_forward_time": 0.027051925659179688, + "step": 9190 + }, + { + "epoch": 1.40228271484375e-05, + "step": 9190, + "training_step_time": 0.11478042602539062 + }, + { + "epoch": 1.402435302734375e-05, + "model_forward_time": 0.026002168655395508, + "step": 9191 + }, + { + "epoch": 1.402435302734375e-05, + "step": 9191, + "training_step_time": 0.11104536056518555 + }, + { + "epoch": 1.402587890625e-05, + "model_forward_time": 0.025592327117919922, + "step": 9192 + }, + { + "epoch": 1.402587890625e-05, + "step": 9192, + "training_step_time": 0.10969018936157227 + }, + { + "epoch": 1.402740478515625e-05, + "model_forward_time": 0.025146007537841797, + "step": 9193 + }, + { + "epoch": 1.402740478515625e-05, + "step": 9193, + "training_step_time": 0.1098945140838623 + }, + { + "epoch": 1.40289306640625e-05, + "model_forward_time": 0.025857210159301758, + "step": 9194 + }, + { + "epoch": 1.40289306640625e-05, + "step": 9194, + "training_step_time": 0.11042046546936035 + }, + { + "epoch": 1.403045654296875e-05, + "model_forward_time": 0.02577948570251465, + "step": 9195 + }, + { + "epoch": 1.403045654296875e-05, + "step": 9195, + "training_step_time": 0.11013197898864746 + }, + { + "epoch": 1.4031982421875e-05, + "model_forward_time": 0.02564406394958496, + "step": 9196 + }, + { + "epoch": 1.4031982421875e-05, + "step": 9196, + "training_step_time": 0.11328887939453125 + }, + { + "epoch": 1.403350830078125e-05, + "model_forward_time": 0.025844573974609375, + "step": 9197 + }, + { + "epoch": 1.403350830078125e-05, + "step": 9197, + "training_step_time": 0.10879087448120117 + }, + { + "epoch": 1.40350341796875e-05, + "model_forward_time": 0.0251462459564209, + "step": 9198 + }, + { + "epoch": 1.40350341796875e-05, + "step": 9198, + "training_step_time": 0.1086575984954834 + }, + { + "epoch": 1.403656005859375e-05, + "model_forward_time": 0.02576446533203125, + "step": 9199 + }, + { + "epoch": 1.403656005859375e-05, + "step": 9199, + "training_step_time": 0.11162900924682617 + }, + { + "epoch": 1.40380859375e-05, + "grad_norm": 0.32366159558296204, + "learning_rate": 8.304491681144306e-05, + "loss": 0.0342, + "step": 9200 + }, + { + "epoch": 1.40380859375e-05, + "model_forward_time": 0.02669692039489746, + "step": 9200 + }, + { + "epoch": 1.40380859375e-05, + "step": 9200, + "training_step_time": 0.11670875549316406 + }, + { + "epoch": 1.403961181640625e-05, + "model_forward_time": 0.025883913040161133, + "step": 9201 + }, + { + "epoch": 1.403961181640625e-05, + "step": 9201, + "training_step_time": 0.11293911933898926 + }, + { + "epoch": 1.40411376953125e-05, + "model_forward_time": 0.0256350040435791, + "step": 9202 + }, + { + "epoch": 1.40411376953125e-05, + "step": 9202, + "training_step_time": 0.1109156608581543 + }, + { + "epoch": 1.404266357421875e-05, + "model_forward_time": 0.02554631233215332, + "step": 9203 + }, + { + "epoch": 1.404266357421875e-05, + "step": 9203, + "training_step_time": 0.11634659767150879 + }, + { + "epoch": 1.4044189453125e-05, + "model_forward_time": 0.025773048400878906, + "step": 9204 + }, + { + "epoch": 1.4044189453125e-05, + "step": 9204, + "training_step_time": 0.1144571304321289 + }, + { + "epoch": 1.404571533203125e-05, + "model_forward_time": 0.025533199310302734, + "step": 9205 + }, + { + "epoch": 1.404571533203125e-05, + "step": 9205, + "training_step_time": 0.12031698226928711 + }, + { + "epoch": 1.40472412109375e-05, + "model_forward_time": 0.02633833885192871, + "step": 9206 + }, + { + "epoch": 1.40472412109375e-05, + "step": 9206, + "training_step_time": 0.11600995063781738 + }, + { + "epoch": 1.404876708984375e-05, + "model_forward_time": 0.02688765525817871, + "step": 9207 + }, + { + "epoch": 1.404876708984375e-05, + "step": 9207, + "training_step_time": 0.1140131950378418 + }, + { + "epoch": 1.405029296875e-05, + "model_forward_time": 0.026072978973388672, + "step": 9208 + }, + { + "epoch": 1.405029296875e-05, + "step": 9208, + "training_step_time": 0.19865822792053223 + }, + { + "epoch": 1.405181884765625e-05, + "model_forward_time": 0.025192737579345703, + "step": 9209 + }, + { + "epoch": 1.405181884765625e-05, + "step": 9209, + "training_step_time": 0.12128019332885742 + }, + { + "epoch": 1.40533447265625e-05, + "grad_norm": 0.44884684681892395, + "learning_rate": 8.300353379372834e-05, + "loss": 0.0404, + "step": 9210 + }, + { + "epoch": 1.40533447265625e-05, + "model_forward_time": 0.024949073791503906, + "step": 9210 + }, + { + "epoch": 1.40533447265625e-05, + "step": 9210, + "training_step_time": 0.11033391952514648 + }, + { + "epoch": 1.405487060546875e-05, + "model_forward_time": 0.025771617889404297, + "step": 9211 + }, + { + "epoch": 1.405487060546875e-05, + "step": 9211, + "training_step_time": 0.11068487167358398 + }, + { + "epoch": 1.4056396484375e-05, + "model_forward_time": 0.025538921356201172, + "step": 9212 + }, + { + "epoch": 1.4056396484375e-05, + "step": 9212, + "training_step_time": 0.1698305606842041 + }, + { + "epoch": 1.405792236328125e-05, + "model_forward_time": 0.025423049926757812, + "step": 9213 + }, + { + "epoch": 1.405792236328125e-05, + "step": 9213, + "training_step_time": 0.16509389877319336 + }, + { + "epoch": 1.40594482421875e-05, + "model_forward_time": 0.026980161666870117, + "step": 9214 + }, + { + "epoch": 1.40594482421875e-05, + "step": 9214, + "training_step_time": 0.11147308349609375 + }, + { + "epoch": 1.406097412109375e-05, + "model_forward_time": 0.025206804275512695, + "step": 9215 + }, + { + "epoch": 1.406097412109375e-05, + "step": 9215, + "training_step_time": 0.10766005516052246 + }, + { + "epoch": 1.40625e-05, + "model_forward_time": 0.02593231201171875, + "step": 9216 + }, + { + "epoch": 1.40625e-05, + "step": 9216, + "training_step_time": 0.11008024215698242 + }, + { + "epoch": 1.406402587890625e-05, + "model_forward_time": 0.026243925094604492, + "step": 9217 + }, + { + "epoch": 1.406402587890625e-05, + "step": 9217, + "training_step_time": 0.11051106452941895 + }, + { + "epoch": 1.40655517578125e-05, + "model_forward_time": 0.025405168533325195, + "step": 9218 + }, + { + "epoch": 1.40655517578125e-05, + "step": 9218, + "training_step_time": 0.11015629768371582 + }, + { + "epoch": 1.406707763671875e-05, + "model_forward_time": 0.02550363540649414, + "step": 9219 + }, + { + "epoch": 1.406707763671875e-05, + "step": 9219, + "training_step_time": 0.11301898956298828 + }, + { + "epoch": 1.4068603515625e-05, + "grad_norm": 0.4171225428581238, + "learning_rate": 8.2962110673608e-05, + "loss": 0.0387, + "step": 9220 + }, + { + "epoch": 1.4068603515625e-05, + "model_forward_time": 0.02526092529296875, + "step": 9220 + }, + { + "epoch": 1.4068603515625e-05, + "step": 9220, + "training_step_time": 0.11416840553283691 + }, + { + "epoch": 1.407012939453125e-05, + "model_forward_time": 0.025185585021972656, + "step": 9221 + }, + { + "epoch": 1.407012939453125e-05, + "step": 9221, + "training_step_time": 0.11016988754272461 + }, + { + "epoch": 1.40716552734375e-05, + "model_forward_time": 0.025459766387939453, + "step": 9222 + }, + { + "epoch": 1.40716552734375e-05, + "step": 9222, + "training_step_time": 0.11258459091186523 + }, + { + "epoch": 1.407318115234375e-05, + "model_forward_time": 0.024973630905151367, + "step": 9223 + }, + { + "epoch": 1.407318115234375e-05, + "step": 9223, + "training_step_time": 0.11520266532897949 + }, + { + "epoch": 1.407470703125e-05, + "model_forward_time": 0.024943113327026367, + "step": 9224 + }, + { + "epoch": 1.407470703125e-05, + "step": 9224, + "training_step_time": 0.14126276969909668 + }, + { + "epoch": 1.407623291015625e-05, + "model_forward_time": 0.024735689163208008, + "step": 9225 + }, + { + "epoch": 1.407623291015625e-05, + "step": 9225, + "training_step_time": 0.18017315864562988 + }, + { + "epoch": 1.40777587890625e-05, + "model_forward_time": 0.025815963745117188, + "step": 9226 + }, + { + "epoch": 1.40777587890625e-05, + "step": 9226, + "training_step_time": 0.16603803634643555 + }, + { + "epoch": 1.407928466796875e-05, + "model_forward_time": 0.02499246597290039, + "step": 9227 + }, + { + "epoch": 1.407928466796875e-05, + "step": 9227, + "training_step_time": 0.14215469360351562 + }, + { + "epoch": 1.4080810546875e-05, + "model_forward_time": 0.024719953536987305, + "step": 9228 + }, + { + "epoch": 1.4080810546875e-05, + "step": 9228, + "training_step_time": 0.17731165885925293 + }, + { + "epoch": 1.408233642578125e-05, + "model_forward_time": 0.02483057975769043, + "step": 9229 + }, + { + "epoch": 1.408233642578125e-05, + "step": 9229, + "training_step_time": 0.17722320556640625 + }, + { + "epoch": 1.40838623046875e-05, + "grad_norm": 0.32784372568130493, + "learning_rate": 8.292064750141509e-05, + "loss": 0.0331, + "step": 9230 + }, + { + "epoch": 1.40838623046875e-05, + "model_forward_time": 0.024706363677978516, + "step": 9230 + }, + { + "epoch": 1.40838623046875e-05, + "step": 9230, + "training_step_time": 0.1236112117767334 + }, + { + "epoch": 1.408538818359375e-05, + "model_forward_time": 0.024143457412719727, + "step": 9231 + }, + { + "epoch": 1.408538818359375e-05, + "step": 9231, + "training_step_time": 0.11300969123840332 + }, + { + "epoch": 1.40869140625e-05, + "model_forward_time": 0.02503824234008789, + "step": 9232 + }, + { + "epoch": 1.40869140625e-05, + "step": 9232, + "training_step_time": 0.11308169364929199 + }, + { + "epoch": 1.408843994140625e-05, + "model_forward_time": 0.02443528175354004, + "step": 9233 + }, + { + "epoch": 1.408843994140625e-05, + "step": 9233, + "training_step_time": 0.12836909294128418 + }, + { + "epoch": 1.40899658203125e-05, + "model_forward_time": 0.02539229393005371, + "step": 9234 + }, + { + "epoch": 1.40899658203125e-05, + "step": 9234, + "training_step_time": 0.1230771541595459 + }, + { + "epoch": 1.409149169921875e-05, + "model_forward_time": 0.02460312843322754, + "step": 9235 + }, + { + "epoch": 1.409149169921875e-05, + "step": 9235, + "training_step_time": 0.11778998374938965 + }, + { + "epoch": 1.4093017578125e-05, + "model_forward_time": 0.02858901023864746, + "step": 9236 + }, + { + "epoch": 1.4093017578125e-05, + "step": 9236, + "training_step_time": 0.1146688461303711 + }, + { + "epoch": 1.409454345703125e-05, + "model_forward_time": 0.025128602981567383, + "step": 9237 + }, + { + "epoch": 1.409454345703125e-05, + "step": 9237, + "training_step_time": 0.11722755432128906 + }, + { + "epoch": 1.40960693359375e-05, + "model_forward_time": 0.025204181671142578, + "step": 9238 + }, + { + "epoch": 1.40960693359375e-05, + "step": 9238, + "training_step_time": 0.11149072647094727 + }, + { + "epoch": 1.409759521484375e-05, + "model_forward_time": 0.02494192123413086, + "step": 9239 + }, + { + "epoch": 1.409759521484375e-05, + "step": 9239, + "training_step_time": 0.10841727256774902 + }, + { + "epoch": 1.409912109375e-05, + "grad_norm": 0.25738635659217834, + "learning_rate": 8.287914432753123e-05, + "loss": 0.0373, + "step": 9240 + }, + { + "epoch": 1.409912109375e-05, + "model_forward_time": 0.02593207359313965, + "step": 9240 + }, + { + "epoch": 1.409912109375e-05, + "step": 9240, + "training_step_time": 0.10759234428405762 + }, + { + "epoch": 1.410064697265625e-05, + "model_forward_time": 0.025211334228515625, + "step": 9241 + }, + { + "epoch": 1.410064697265625e-05, + "step": 9241, + "training_step_time": 0.11351203918457031 + }, + { + "epoch": 1.41021728515625e-05, + "model_forward_time": 0.025452375411987305, + "step": 9242 + }, + { + "epoch": 1.41021728515625e-05, + "step": 9242, + "training_step_time": 0.10903167724609375 + }, + { + "epoch": 1.410369873046875e-05, + "model_forward_time": 0.02634739875793457, + "step": 9243 + }, + { + "epoch": 1.410369873046875e-05, + "step": 9243, + "training_step_time": 0.11479735374450684 + }, + { + "epoch": 1.4105224609375e-05, + "model_forward_time": 0.02508378028869629, + "step": 9244 + }, + { + "epoch": 1.4105224609375e-05, + "step": 9244, + "training_step_time": 0.10703349113464355 + }, + { + "epoch": 1.410675048828125e-05, + "model_forward_time": 0.025347232818603516, + "step": 9245 + }, + { + "epoch": 1.410675048828125e-05, + "step": 9245, + "training_step_time": 0.1081242561340332 + }, + { + "epoch": 1.41082763671875e-05, + "model_forward_time": 0.025745391845703125, + "step": 9246 + }, + { + "epoch": 1.41082763671875e-05, + "step": 9246, + "training_step_time": 0.10690927505493164 + }, + { + "epoch": 1.410980224609375e-05, + "model_forward_time": 0.025300979614257812, + "step": 9247 + }, + { + "epoch": 1.410980224609375e-05, + "step": 9247, + "training_step_time": 0.10891103744506836 + }, + { + "epoch": 1.4111328125e-05, + "model_forward_time": 0.025121212005615234, + "step": 9248 + }, + { + "epoch": 1.4111328125e-05, + "step": 9248, + "training_step_time": 0.10979747772216797 + }, + { + "epoch": 1.411285400390625e-05, + "model_forward_time": 0.026063919067382812, + "step": 9249 + }, + { + "epoch": 1.411285400390625e-05, + "step": 9249, + "training_step_time": 0.10776376724243164 + }, + { + "epoch": 1.41143798828125e-05, + "grad_norm": 0.28596746921539307, + "learning_rate": 8.283760120238672e-05, + "loss": 0.0208, + "step": 9250 + }, + { + "epoch": 1.41143798828125e-05, + "model_forward_time": 0.0256345272064209, + "step": 9250 + }, + { + "epoch": 1.41143798828125e-05, + "step": 9250, + "training_step_time": 0.10863256454467773 + }, + { + "epoch": 1.411590576171875e-05, + "model_forward_time": 0.025503873825073242, + "step": 9251 + }, + { + "epoch": 1.411590576171875e-05, + "step": 9251, + "training_step_time": 0.11173129081726074 + }, + { + "epoch": 1.4117431640625e-05, + "model_forward_time": 0.025211811065673828, + "step": 9252 + }, + { + "epoch": 1.4117431640625e-05, + "step": 9252, + "training_step_time": 0.1117558479309082 + }, + { + "epoch": 1.411895751953125e-05, + "model_forward_time": 0.02545022964477539, + "step": 9253 + }, + { + "epoch": 1.411895751953125e-05, + "step": 9253, + "training_step_time": 0.11340069770812988 + }, + { + "epoch": 1.41204833984375e-05, + "model_forward_time": 0.025596141815185547, + "step": 9254 + }, + { + "epoch": 1.41204833984375e-05, + "step": 9254, + "training_step_time": 0.10758757591247559 + }, + { + "epoch": 1.412200927734375e-05, + "model_forward_time": 0.025137901306152344, + "step": 9255 + }, + { + "epoch": 1.412200927734375e-05, + "step": 9255, + "training_step_time": 0.16773009300231934 + }, + { + "epoch": 1.412353515625e-05, + "model_forward_time": 0.025700092315673828, + "step": 9256 + }, + { + "epoch": 1.412353515625e-05, + "step": 9256, + "training_step_time": 0.16127872467041016 + }, + { + "epoch": 1.412506103515625e-05, + "model_forward_time": 0.02477860450744629, + "step": 9257 + }, + { + "epoch": 1.412506103515625e-05, + "step": 9257, + "training_step_time": 0.11895465850830078 + }, + { + "epoch": 1.41265869140625e-05, + "model_forward_time": 0.02507495880126953, + "step": 9258 + }, + { + "epoch": 1.41265869140625e-05, + "step": 9258, + "training_step_time": 0.16961288452148438 + }, + { + "epoch": 1.412811279296875e-05, + "model_forward_time": 0.024747610092163086, + "step": 9259 + }, + { + "epoch": 1.412811279296875e-05, + "step": 9259, + "training_step_time": 0.17305254936218262 + }, + { + "epoch": 1.4129638671875e-05, + "grad_norm": 0.5188223123550415, + "learning_rate": 8.279601817646036e-05, + "loss": 0.0239, + "step": 9260 + }, + { + "epoch": 1.4129638671875e-05, + "model_forward_time": 0.02499985694885254, + "step": 9260 + }, + { + "epoch": 1.4129638671875e-05, + "step": 9260, + "training_step_time": 0.10708951950073242 + }, + { + "epoch": 1.413116455078125e-05, + "model_forward_time": 0.024985074996948242, + "step": 9261 + }, + { + "epoch": 1.413116455078125e-05, + "step": 9261, + "training_step_time": 0.10597848892211914 + }, + { + "epoch": 1.41326904296875e-05, + "model_forward_time": 0.025522947311401367, + "step": 9262 + }, + { + "epoch": 1.41326904296875e-05, + "step": 9262, + "training_step_time": 0.10741066932678223 + }, + { + "epoch": 1.413421630859375e-05, + "model_forward_time": 0.02537369728088379, + "step": 9263 + }, + { + "epoch": 1.413421630859375e-05, + "step": 9263, + "training_step_time": 0.10847926139831543 + }, + { + "epoch": 1.41357421875e-05, + "model_forward_time": 0.02527141571044922, + "step": 9264 + }, + { + "epoch": 1.41357421875e-05, + "step": 9264, + "training_step_time": 0.10844779014587402 + }, + { + "epoch": 1.413726806640625e-05, + "model_forward_time": 0.025139808654785156, + "step": 9265 + }, + { + "epoch": 1.413726806640625e-05, + "step": 9265, + "training_step_time": 0.10712218284606934 + }, + { + "epoch": 1.41387939453125e-05, + "model_forward_time": 0.02492547035217285, + "step": 9266 + }, + { + "epoch": 1.41387939453125e-05, + "step": 9266, + "training_step_time": 0.10891890525817871 + }, + { + "epoch": 1.414031982421875e-05, + "model_forward_time": 0.025429725646972656, + "step": 9267 + }, + { + "epoch": 1.414031982421875e-05, + "step": 9267, + "training_step_time": 0.11018657684326172 + }, + { + "epoch": 1.4141845703125e-05, + "model_forward_time": 0.025316715240478516, + "step": 9268 + }, + { + "epoch": 1.4141845703125e-05, + "step": 9268, + "training_step_time": 0.11097121238708496 + }, + { + "epoch": 1.414337158203125e-05, + "model_forward_time": 0.02517247200012207, + "step": 9269 + }, + { + "epoch": 1.414337158203125e-05, + "step": 9269, + "training_step_time": 0.11122965812683105 + }, + { + "epoch": 1.41448974609375e-05, + "grad_norm": 0.32652047276496887, + "learning_rate": 8.275439530027948e-05, + "loss": 0.0294, + "step": 9270 + }, + { + "epoch": 1.41448974609375e-05, + "model_forward_time": 0.025018930435180664, + "step": 9270 + }, + { + "epoch": 1.41448974609375e-05, + "step": 9270, + "training_step_time": 0.1684269905090332 + }, + { + "epoch": 1.414642333984375e-05, + "model_forward_time": 0.024736881256103516, + "step": 9271 + }, + { + "epoch": 1.414642333984375e-05, + "step": 9271, + "training_step_time": 0.18860697746276855 + }, + { + "epoch": 1.414794921875e-05, + "model_forward_time": 0.02544856071472168, + "step": 9272 + }, + { + "epoch": 1.414794921875e-05, + "step": 9272, + "training_step_time": 0.12715482711791992 + }, + { + "epoch": 1.414947509765625e-05, + "model_forward_time": 0.02449941635131836, + "step": 9273 + }, + { + "epoch": 1.414947509765625e-05, + "step": 9273, + "training_step_time": 0.17458891868591309 + }, + { + "epoch": 1.41510009765625e-05, + "model_forward_time": 0.027659177780151367, + "step": 9274 + }, + { + "epoch": 1.41510009765625e-05, + "step": 9274, + "training_step_time": 0.19277024269104004 + }, + { + "epoch": 1.415252685546875e-05, + "model_forward_time": 0.02462458610534668, + "step": 9275 + }, + { + "epoch": 1.415252685546875e-05, + "step": 9275, + "training_step_time": 0.12173748016357422 + }, + { + "epoch": 1.4154052734375e-05, + "model_forward_time": 0.024233341217041016, + "step": 9276 + }, + { + "epoch": 1.4154052734375e-05, + "step": 9276, + "training_step_time": 0.1326615810394287 + }, + { + "epoch": 1.415557861328125e-05, + "model_forward_time": 0.025336027145385742, + "step": 9277 + }, + { + "epoch": 1.415557861328125e-05, + "step": 9277, + "training_step_time": 0.10873579978942871 + }, + { + "epoch": 1.41571044921875e-05, + "model_forward_time": 0.02552032470703125, + "step": 9278 + }, + { + "epoch": 1.41571044921875e-05, + "step": 9278, + "training_step_time": 0.10592365264892578 + }, + { + "epoch": 1.415863037109375e-05, + "model_forward_time": 0.025714397430419922, + "step": 9279 + }, + { + "epoch": 1.415863037109375e-05, + "step": 9279, + "training_step_time": 0.10602569580078125 + }, + { + "epoch": 1.416015625e-05, + "grad_norm": 0.46416255831718445, + "learning_rate": 8.271273262441975e-05, + "loss": 0.0323, + "step": 9280 + }, + { + "epoch": 1.416015625e-05, + "model_forward_time": 0.025510072708129883, + "step": 9280 + }, + { + "epoch": 1.416015625e-05, + "step": 9280, + "training_step_time": 0.11364531517028809 + }, + { + "epoch": 1.416168212890625e-05, + "model_forward_time": 0.02494359016418457, + "step": 9281 + }, + { + "epoch": 1.416168212890625e-05, + "step": 9281, + "training_step_time": 0.12385916709899902 + }, + { + "epoch": 1.41632080078125e-05, + "model_forward_time": 0.025298595428466797, + "step": 9282 + }, + { + "epoch": 1.41632080078125e-05, + "step": 9282, + "training_step_time": 0.11015081405639648 + }, + { + "epoch": 1.416473388671875e-05, + "model_forward_time": 0.02552175521850586, + "step": 9283 + }, + { + "epoch": 1.416473388671875e-05, + "step": 9283, + "training_step_time": 0.11682796478271484 + }, + { + "epoch": 1.4166259765625e-05, + "model_forward_time": 0.025438547134399414, + "step": 9284 + }, + { + "epoch": 1.4166259765625e-05, + "step": 9284, + "training_step_time": 0.1071324348449707 + }, + { + "epoch": 1.416778564453125e-05, + "model_forward_time": 0.025322914123535156, + "step": 9285 + }, + { + "epoch": 1.416778564453125e-05, + "step": 9285, + "training_step_time": 0.10787367820739746 + }, + { + "epoch": 1.41693115234375e-05, + "model_forward_time": 0.02552175521850586, + "step": 9286 + }, + { + "epoch": 1.41693115234375e-05, + "step": 9286, + "training_step_time": 0.10880208015441895 + }, + { + "epoch": 1.417083740234375e-05, + "model_forward_time": 0.02536606788635254, + "step": 9287 + }, + { + "epoch": 1.417083740234375e-05, + "step": 9287, + "training_step_time": 0.10973405838012695 + }, + { + "epoch": 1.417236328125e-05, + "model_forward_time": 0.025511503219604492, + "step": 9288 + }, + { + "epoch": 1.417236328125e-05, + "step": 9288, + "training_step_time": 0.10735821723937988 + }, + { + "epoch": 1.417388916015625e-05, + "model_forward_time": 0.02530694007873535, + "step": 9289 + }, + { + "epoch": 1.417388916015625e-05, + "step": 9289, + "training_step_time": 0.1080026626586914 + }, + { + "epoch": 1.41754150390625e-05, + "grad_norm": 0.46193185448646545, + "learning_rate": 8.267103019950529e-05, + "loss": 0.041, + "step": 9290 + }, + { + "epoch": 1.41754150390625e-05, + "model_forward_time": 0.025723934173583984, + "step": 9290 + }, + { + "epoch": 1.41754150390625e-05, + "step": 9290, + "training_step_time": 0.11262655258178711 + }, + { + "epoch": 1.417694091796875e-05, + "model_forward_time": 0.025437593460083008, + "step": 9291 + }, + { + "epoch": 1.417694091796875e-05, + "step": 9291, + "training_step_time": 0.10833311080932617 + }, + { + "epoch": 1.4178466796875e-05, + "model_forward_time": 0.025084495544433594, + "step": 9292 + }, + { + "epoch": 1.4178466796875e-05, + "step": 9292, + "training_step_time": 0.10884284973144531 + }, + { + "epoch": 1.417999267578125e-05, + "model_forward_time": 0.02580428123474121, + "step": 9293 + }, + { + "epoch": 1.417999267578125e-05, + "step": 9293, + "training_step_time": 0.11286258697509766 + }, + { + "epoch": 1.41815185546875e-05, + "model_forward_time": 0.0252532958984375, + "step": 9294 + }, + { + "epoch": 1.41815185546875e-05, + "step": 9294, + "training_step_time": 0.10817098617553711 + }, + { + "epoch": 1.418304443359375e-05, + "model_forward_time": 0.025254011154174805, + "step": 9295 + }, + { + "epoch": 1.418304443359375e-05, + "step": 9295, + "training_step_time": 0.10746407508850098 + }, + { + "epoch": 1.41845703125e-05, + "model_forward_time": 0.025269269943237305, + "step": 9296 + }, + { + "epoch": 1.41845703125e-05, + "step": 9296, + "training_step_time": 0.1064143180847168 + }, + { + "epoch": 1.418609619140625e-05, + "model_forward_time": 0.02565765380859375, + "step": 9297 + }, + { + "epoch": 1.418609619140625e-05, + "step": 9297, + "training_step_time": 0.1084902286529541 + }, + { + "epoch": 1.41876220703125e-05, + "model_forward_time": 0.026873350143432617, + "step": 9298 + }, + { + "epoch": 1.41876220703125e-05, + "step": 9298, + "training_step_time": 0.11151838302612305 + }, + { + "epoch": 1.418914794921875e-05, + "model_forward_time": 0.02538013458251953, + "step": 9299 + }, + { + "epoch": 1.418914794921875e-05, + "step": 9299, + "training_step_time": 0.10770058631896973 + }, + { + "epoch": 1.4190673828125e-05, + "grad_norm": 0.31563517451286316, + "learning_rate": 8.262928807620843e-05, + "loss": 0.0414, + "step": 9300 + }, + { + "epoch": 1.4190673828125e-05, + "model_forward_time": 0.025341510772705078, + "step": 9300 + }, + { + "epoch": 1.4190673828125e-05, + "step": 9300, + "training_step_time": 0.10755300521850586 + }, + { + "epoch": 1.419219970703125e-05, + "model_forward_time": 0.024741172790527344, + "step": 9301 + }, + { + "epoch": 1.419219970703125e-05, + "step": 9301, + "training_step_time": 0.1060018539428711 + }, + { + "epoch": 1.41937255859375e-05, + "model_forward_time": 0.02494335174560547, + "step": 9302 + }, + { + "epoch": 1.41937255859375e-05, + "step": 9302, + "training_step_time": 0.11619067192077637 + }, + { + "epoch": 1.419525146484375e-05, + "model_forward_time": 0.025476694107055664, + "step": 9303 + }, + { + "epoch": 1.419525146484375e-05, + "step": 9303, + "training_step_time": 0.11117291450500488 + }, + { + "epoch": 1.419677734375e-05, + "model_forward_time": 0.024979114532470703, + "step": 9304 + }, + { + "epoch": 1.419677734375e-05, + "step": 9304, + "training_step_time": 0.1080925464630127 + }, + { + "epoch": 1.419830322265625e-05, + "model_forward_time": 0.025685787200927734, + "step": 9305 + }, + { + "epoch": 1.419830322265625e-05, + "step": 9305, + "training_step_time": 0.1720445156097412 + }, + { + "epoch": 1.41998291015625e-05, + "model_forward_time": 0.024773120880126953, + "step": 9306 + }, + { + "epoch": 1.41998291015625e-05, + "step": 9306, + "training_step_time": 0.17114877700805664 + }, + { + "epoch": 1.420135498046875e-05, + "model_forward_time": 0.024457454681396484, + "step": 9307 + }, + { + "epoch": 1.420135498046875e-05, + "step": 9307, + "training_step_time": 0.10494470596313477 + }, + { + "epoch": 1.4202880859375e-05, + "model_forward_time": 0.0251007080078125, + "step": 9308 + }, + { + "epoch": 1.4202880859375e-05, + "step": 9308, + "training_step_time": 0.10710597038269043 + }, + { + "epoch": 1.420440673828125e-05, + "model_forward_time": 0.02538585662841797, + "step": 9309 + }, + { + "epoch": 1.420440673828125e-05, + "step": 9309, + "training_step_time": 0.10929417610168457 + }, + { + "epoch": 1.42059326171875e-05, + "grad_norm": 0.6427388191223145, + "learning_rate": 8.258750630524984e-05, + "loss": 0.0316, + "step": 9310 + }, + { + "epoch": 1.42059326171875e-05, + "model_forward_time": 0.025500059127807617, + "step": 9310 + }, + { + "epoch": 1.42059326171875e-05, + "step": 9310, + "training_step_time": 0.10837960243225098 + }, + { + "epoch": 1.420745849609375e-05, + "model_forward_time": 0.02531147003173828, + "step": 9311 + }, + { + "epoch": 1.420745849609375e-05, + "step": 9311, + "training_step_time": 0.10727143287658691 + }, + { + "epoch": 1.4208984375e-05, + "model_forward_time": 0.025129079818725586, + "step": 9312 + }, + { + "epoch": 1.4208984375e-05, + "step": 9312, + "training_step_time": 0.11274552345275879 + }, + { + "epoch": 1.421051025390625e-05, + "model_forward_time": 0.027581453323364258, + "step": 9313 + }, + { + "epoch": 1.421051025390625e-05, + "step": 9313, + "training_step_time": 0.10983538627624512 + }, + { + "epoch": 1.42120361328125e-05, + "model_forward_time": 0.024992704391479492, + "step": 9314 + }, + { + "epoch": 1.42120361328125e-05, + "step": 9314, + "training_step_time": 0.11027073860168457 + }, + { + "epoch": 1.421356201171875e-05, + "model_forward_time": 0.02499079704284668, + "step": 9315 + }, + { + "epoch": 1.421356201171875e-05, + "step": 9315, + "training_step_time": 0.10888934135437012 + }, + { + "epoch": 1.4215087890625e-05, + "model_forward_time": 0.02524089813232422, + "step": 9316 + }, + { + "epoch": 1.4215087890625e-05, + "step": 9316, + "training_step_time": 0.1559295654296875 + }, + { + "epoch": 1.421661376953125e-05, + "model_forward_time": 0.024919509887695312, + "step": 9317 + }, + { + "epoch": 1.421661376953125e-05, + "step": 9317, + "training_step_time": 0.17152619361877441 + }, + { + "epoch": 1.42181396484375e-05, + "model_forward_time": 0.02431011199951172, + "step": 9318 + }, + { + "epoch": 1.42181396484375e-05, + "step": 9318, + "training_step_time": 0.19185280799865723 + }, + { + "epoch": 1.421966552734375e-05, + "model_forward_time": 0.024252891540527344, + "step": 9319 + }, + { + "epoch": 1.421966552734375e-05, + "step": 9319, + "training_step_time": 0.19882965087890625 + }, + { + "epoch": 1.422119140625e-05, + "grad_norm": 0.3493986129760742, + "learning_rate": 8.254568493739828e-05, + "loss": 0.0503, + "step": 9320 + }, + { + "epoch": 1.422119140625e-05, + "model_forward_time": 0.023731231689453125, + "step": 9320 + }, + { + "epoch": 1.422119140625e-05, + "step": 9320, + "training_step_time": 0.1493396759033203 + }, + { + "epoch": 1.422271728515625e-05, + "model_forward_time": 0.024673938751220703, + "step": 9321 + }, + { + "epoch": 1.422271728515625e-05, + "step": 9321, + "training_step_time": 0.178023099899292 + }, + { + "epoch": 1.42242431640625e-05, + "model_forward_time": 0.024322509765625, + "step": 9322 + }, + { + "epoch": 1.42242431640625e-05, + "step": 9322, + "training_step_time": 0.15357208251953125 + }, + { + "epoch": 1.422576904296875e-05, + "model_forward_time": 0.02444744110107422, + "step": 9323 + }, + { + "epoch": 1.422576904296875e-05, + "step": 9323, + "training_step_time": 0.12020182609558105 + }, + { + "epoch": 1.4227294921875e-05, + "model_forward_time": 0.02468132972717285, + "step": 9324 + }, + { + "epoch": 1.4227294921875e-05, + "step": 9324, + "training_step_time": 0.10423636436462402 + }, + { + "epoch": 1.422882080078125e-05, + "model_forward_time": 0.02575063705444336, + "step": 9325 + }, + { + "epoch": 1.422882080078125e-05, + "step": 9325, + "training_step_time": 0.10520267486572266 + }, + { + "epoch": 1.42303466796875e-05, + "model_forward_time": 0.02524876594543457, + "step": 9326 + }, + { + "epoch": 1.42303466796875e-05, + "step": 9326, + "training_step_time": 0.10900163650512695 + }, + { + "epoch": 1.423187255859375e-05, + "model_forward_time": 0.0251312255859375, + "step": 9327 + }, + { + "epoch": 1.423187255859375e-05, + "step": 9327, + "training_step_time": 0.11357593536376953 + }, + { + "epoch": 1.42333984375e-05, + "model_forward_time": 0.02510833740234375, + "step": 9328 + }, + { + "epoch": 1.42333984375e-05, + "step": 9328, + "training_step_time": 0.10618352890014648 + }, + { + "epoch": 1.423492431640625e-05, + "model_forward_time": 0.0253140926361084, + "step": 9329 + }, + { + "epoch": 1.423492431640625e-05, + "step": 9329, + "training_step_time": 0.13569021224975586 + }, + { + "epoch": 1.42364501953125e-05, + "grad_norm": 0.4366268813610077, + "learning_rate": 8.250382402347065e-05, + "loss": 0.03, + "step": 9330 + }, + { + "epoch": 1.42364501953125e-05, + "model_forward_time": 0.024549245834350586, + "step": 9330 + }, + { + "epoch": 1.42364501953125e-05, + "step": 9330, + "training_step_time": 0.1672680377960205 + }, + { + "epoch": 1.423797607421875e-05, + "model_forward_time": 0.025560855865478516, + "step": 9331 + }, + { + "epoch": 1.423797607421875e-05, + "step": 9331, + "training_step_time": 0.17728209495544434 + }, + { + "epoch": 1.4239501953125e-05, + "model_forward_time": 0.024358749389648438, + "step": 9332 + }, + { + "epoch": 1.4239501953125e-05, + "step": 9332, + "training_step_time": 0.15977144241333008 + }, + { + "epoch": 1.424102783203125e-05, + "model_forward_time": 0.027455568313598633, + "step": 9333 + }, + { + "epoch": 1.424102783203125e-05, + "step": 9333, + "training_step_time": 0.139298677444458 + }, + { + "epoch": 1.42425537109375e-05, + "model_forward_time": 0.024326086044311523, + "step": 9334 + }, + { + "epoch": 1.42425537109375e-05, + "step": 9334, + "training_step_time": 0.14159107208251953 + }, + { + "epoch": 1.424407958984375e-05, + "model_forward_time": 0.024331331253051758, + "step": 9335 + }, + { + "epoch": 1.424407958984375e-05, + "step": 9335, + "training_step_time": 0.12559103965759277 + }, + { + "epoch": 1.424560546875e-05, + "model_forward_time": 0.02417588233947754, + "step": 9336 + }, + { + "epoch": 1.424560546875e-05, + "step": 9336, + "training_step_time": 0.1267712116241455 + }, + { + "epoch": 1.424713134765625e-05, + "model_forward_time": 0.024806499481201172, + "step": 9337 + }, + { + "epoch": 1.424713134765625e-05, + "step": 9337, + "training_step_time": 0.12349319458007812 + }, + { + "epoch": 1.42486572265625e-05, + "model_forward_time": 0.024956464767456055, + "step": 9338 + }, + { + "epoch": 1.42486572265625e-05, + "step": 9338, + "training_step_time": 0.11702919006347656 + }, + { + "epoch": 1.425018310546875e-05, + "model_forward_time": 0.025315284729003906, + "step": 9339 + }, + { + "epoch": 1.425018310546875e-05, + "step": 9339, + "training_step_time": 0.11564302444458008 + }, + { + "epoch": 1.4251708984375e-05, + "grad_norm": 0.38389018177986145, + "learning_rate": 8.246192361433196e-05, + "loss": 0.0361, + "step": 9340 + }, + { + "epoch": 1.4251708984375e-05, + "model_forward_time": 0.025294065475463867, + "step": 9340 + }, + { + "epoch": 1.4251708984375e-05, + "step": 9340, + "training_step_time": 0.1131129264831543 + }, + { + "epoch": 1.425323486328125e-05, + "model_forward_time": 0.02499079704284668, + "step": 9341 + }, + { + "epoch": 1.425323486328125e-05, + "step": 9341, + "training_step_time": 0.11035776138305664 + }, + { + "epoch": 1.42547607421875e-05, + "model_forward_time": 0.025301218032836914, + "step": 9342 + }, + { + "epoch": 1.42547607421875e-05, + "step": 9342, + "training_step_time": 0.10878896713256836 + }, + { + "epoch": 1.425628662109375e-05, + "model_forward_time": 0.025458097457885742, + "step": 9343 + }, + { + "epoch": 1.425628662109375e-05, + "step": 9343, + "training_step_time": 0.11203670501708984 + }, + { + "epoch": 1.42578125e-05, + "model_forward_time": 0.025565385818481445, + "step": 9344 + }, + { + "epoch": 1.42578125e-05, + "step": 9344, + "training_step_time": 0.10824418067932129 + }, + { + "epoch": 1.425933837890625e-05, + "model_forward_time": 0.02601933479309082, + "step": 9345 + }, + { + "epoch": 1.425933837890625e-05, + "step": 9345, + "training_step_time": 0.10953688621520996 + }, + { + "epoch": 1.42608642578125e-05, + "model_forward_time": 0.025738000869750977, + "step": 9346 + }, + { + "epoch": 1.42608642578125e-05, + "step": 9346, + "training_step_time": 0.11192870140075684 + }, + { + "epoch": 1.426239013671875e-05, + "model_forward_time": 0.025590181350708008, + "step": 9347 + }, + { + "epoch": 1.426239013671875e-05, + "step": 9347, + "training_step_time": 0.11047792434692383 + }, + { + "epoch": 1.4263916015625e-05, + "model_forward_time": 0.0254209041595459, + "step": 9348 + }, + { + "epoch": 1.4263916015625e-05, + "step": 9348, + "training_step_time": 0.11213278770446777 + }, + { + "epoch": 1.426544189453125e-05, + "model_forward_time": 0.02570486068725586, + "step": 9349 + }, + { + "epoch": 1.426544189453125e-05, + "step": 9349, + "training_step_time": 0.1730644702911377 + }, + { + "epoch": 1.42669677734375e-05, + "grad_norm": 0.41978979110717773, + "learning_rate": 8.241998376089508e-05, + "loss": 0.0325, + "step": 9350 + }, + { + "epoch": 1.42669677734375e-05, + "model_forward_time": 0.02469658851623535, + "step": 9350 + }, + { + "epoch": 1.42669677734375e-05, + "step": 9350, + "training_step_time": 0.161177396774292 + }, + { + "epoch": 1.426849365234375e-05, + "model_forward_time": 0.02526092529296875, + "step": 9351 + }, + { + "epoch": 1.426849365234375e-05, + "step": 9351, + "training_step_time": 0.10650777816772461 + }, + { + "epoch": 1.427001953125e-05, + "model_forward_time": 0.02555704116821289, + "step": 9352 + }, + { + "epoch": 1.427001953125e-05, + "step": 9352, + "training_step_time": 0.10751914978027344 + }, + { + "epoch": 1.427154541015625e-05, + "model_forward_time": 0.02543163299560547, + "step": 9353 + }, + { + "epoch": 1.427154541015625e-05, + "step": 9353, + "training_step_time": 0.10830354690551758 + }, + { + "epoch": 1.42730712890625e-05, + "model_forward_time": 0.024722576141357422, + "step": 9354 + }, + { + "epoch": 1.42730712890625e-05, + "step": 9354, + "training_step_time": 0.10813260078430176 + }, + { + "epoch": 1.427459716796875e-05, + "model_forward_time": 0.025714635848999023, + "step": 9355 + }, + { + "epoch": 1.427459716796875e-05, + "step": 9355, + "training_step_time": 0.10796904563903809 + }, + { + "epoch": 1.4276123046875e-05, + "model_forward_time": 0.025572776794433594, + "step": 9356 + }, + { + "epoch": 1.4276123046875e-05, + "step": 9356, + "training_step_time": 0.10874414443969727 + }, + { + "epoch": 1.427764892578125e-05, + "model_forward_time": 0.025202274322509766, + "step": 9357 + }, + { + "epoch": 1.427764892578125e-05, + "step": 9357, + "training_step_time": 0.10853314399719238 + }, + { + "epoch": 1.42791748046875e-05, + "model_forward_time": 0.0253293514251709, + "step": 9358 + }, + { + "epoch": 1.42791748046875e-05, + "step": 9358, + "training_step_time": 0.1074666976928711 + }, + { + "epoch": 1.428070068359375e-05, + "model_forward_time": 0.025534391403198242, + "step": 9359 + }, + { + "epoch": 1.428070068359375e-05, + "step": 9359, + "training_step_time": 0.1083376407623291 + }, + { + "epoch": 1.42822265625e-05, + "grad_norm": 0.34948891401290894, + "learning_rate": 8.237800451412095e-05, + "loss": 0.0298, + "step": 9360 + }, + { + "epoch": 1.42822265625e-05, + "model_forward_time": 0.025350093841552734, + "step": 9360 + }, + { + "epoch": 1.42822265625e-05, + "step": 9360, + "training_step_time": 0.12054991722106934 + }, + { + "epoch": 1.428375244140625e-05, + "model_forward_time": 0.02513599395751953, + "step": 9361 + }, + { + "epoch": 1.428375244140625e-05, + "step": 9361, + "training_step_time": 0.20360779762268066 + }, + { + "epoch": 1.42852783203125e-05, + "model_forward_time": 0.024605274200439453, + "step": 9362 + }, + { + "epoch": 1.42852783203125e-05, + "step": 9362, + "training_step_time": 0.16749978065490723 + }, + { + "epoch": 1.428680419921875e-05, + "model_forward_time": 0.024476289749145508, + "step": 9363 + }, + { + "epoch": 1.428680419921875e-05, + "step": 9363, + "training_step_time": 0.18401813507080078 + }, + { + "epoch": 1.4288330078125e-05, + "model_forward_time": 0.02448582649230957, + "step": 9364 + }, + { + "epoch": 1.4288330078125e-05, + "step": 9364, + "training_step_time": 0.17475533485412598 + }, + { + "epoch": 1.428985595703125e-05, + "model_forward_time": 0.024755001068115234, + "step": 9365 + }, + { + "epoch": 1.428985595703125e-05, + "step": 9365, + "training_step_time": 0.17977619171142578 + }, + { + "epoch": 1.42913818359375e-05, + "model_forward_time": 0.024433612823486328, + "step": 9366 + }, + { + "epoch": 1.42913818359375e-05, + "step": 9366, + "training_step_time": 0.1424570083618164 + }, + { + "epoch": 1.429290771484375e-05, + "model_forward_time": 0.024227619171142578, + "step": 9367 + }, + { + "epoch": 1.429290771484375e-05, + "step": 9367, + "training_step_time": 0.1109161376953125 + }, + { + "epoch": 1.429443359375e-05, + "model_forward_time": 0.024853944778442383, + "step": 9368 + }, + { + "epoch": 1.429443359375e-05, + "step": 9368, + "training_step_time": 0.1168217658996582 + }, + { + "epoch": 1.429595947265625e-05, + "model_forward_time": 0.02489781379699707, + "step": 9369 + }, + { + "epoch": 1.429595947265625e-05, + "step": 9369, + "training_step_time": 0.11917328834533691 + }, + { + "epoch": 1.42974853515625e-05, + "grad_norm": 0.33603987097740173, + "learning_rate": 8.233598592501828e-05, + "loss": 0.0455, + "step": 9370 + }, + { + "epoch": 1.42974853515625e-05, + "model_forward_time": 0.025384187698364258, + "step": 9370 + }, + { + "epoch": 1.42974853515625e-05, + "step": 9370, + "training_step_time": 0.11890053749084473 + }, + { + "epoch": 1.429901123046875e-05, + "model_forward_time": 0.025571107864379883, + "step": 9371 + }, + { + "epoch": 1.429901123046875e-05, + "step": 9371, + "training_step_time": 0.11262965202331543 + }, + { + "epoch": 1.4300537109375e-05, + "model_forward_time": 0.02538585662841797, + "step": 9372 + }, + { + "epoch": 1.4300537109375e-05, + "step": 9372, + "training_step_time": 0.11399149894714355 + }, + { + "epoch": 1.430206298828125e-05, + "model_forward_time": 0.025331735610961914, + "step": 9373 + }, + { + "epoch": 1.430206298828125e-05, + "step": 9373, + "training_step_time": 0.11225509643554688 + }, + { + "epoch": 1.43035888671875e-05, + "model_forward_time": 0.024239778518676758, + "step": 9374 + }, + { + "epoch": 1.43035888671875e-05, + "step": 9374, + "training_step_time": 0.10890555381774902 + }, + { + "epoch": 1.430511474609375e-05, + "model_forward_time": 0.024962186813354492, + "step": 9375 + }, + { + "epoch": 1.430511474609375e-05, + "step": 9375, + "training_step_time": 0.10840606689453125 + }, + { + "epoch": 1.4306640625e-05, + "model_forward_time": 0.025429725646972656, + "step": 9376 + }, + { + "epoch": 1.4306640625e-05, + "step": 9376, + "training_step_time": 0.11031770706176758 + }, + { + "epoch": 1.430816650390625e-05, + "model_forward_time": 0.02509927749633789, + "step": 9377 + }, + { + "epoch": 1.430816650390625e-05, + "step": 9377, + "training_step_time": 0.11021065711975098 + }, + { + "epoch": 1.43096923828125e-05, + "model_forward_time": 0.02535414695739746, + "step": 9378 + }, + { + "epoch": 1.43096923828125e-05, + "step": 9378, + "training_step_time": 0.11138367652893066 + }, + { + "epoch": 1.431121826171875e-05, + "model_forward_time": 0.025476932525634766, + "step": 9379 + }, + { + "epoch": 1.431121826171875e-05, + "step": 9379, + "training_step_time": 0.11031198501586914 + }, + { + "epoch": 1.4312744140625e-05, + "grad_norm": 0.3461342751979828, + "learning_rate": 8.229392804464362e-05, + "loss": 0.0308, + "step": 9380 + }, + { + "epoch": 1.4312744140625e-05, + "model_forward_time": 0.025185346603393555, + "step": 9380 + }, + { + "epoch": 1.4312744140625e-05, + "step": 9380, + "training_step_time": 0.11371135711669922 + }, + { + "epoch": 1.431427001953125e-05, + "model_forward_time": 0.024500608444213867, + "step": 9381 + }, + { + "epoch": 1.431427001953125e-05, + "step": 9381, + "training_step_time": 0.10595202445983887 + }, + { + "epoch": 1.43157958984375e-05, + "model_forward_time": 0.02512836456298828, + "step": 9382 + }, + { + "epoch": 1.43157958984375e-05, + "step": 9382, + "training_step_time": 0.1128995418548584 + }, + { + "epoch": 1.431732177734375e-05, + "model_forward_time": 0.027281522750854492, + "step": 9383 + }, + { + "epoch": 1.431732177734375e-05, + "step": 9383, + "training_step_time": 0.11076545715332031 + }, + { + "epoch": 1.431884765625e-05, + "model_forward_time": 0.025717496871948242, + "step": 9384 + }, + { + "epoch": 1.431884765625e-05, + "step": 9384, + "training_step_time": 0.10793733596801758 + }, + { + "epoch": 1.432037353515625e-05, + "model_forward_time": 0.02528095245361328, + "step": 9385 + }, + { + "epoch": 1.432037353515625e-05, + "step": 9385, + "training_step_time": 0.1065375804901123 + }, + { + "epoch": 1.43218994140625e-05, + "model_forward_time": 0.028439760208129883, + "step": 9386 + }, + { + "epoch": 1.43218994140625e-05, + "step": 9386, + "training_step_time": 0.10967803001403809 + }, + { + "epoch": 1.432342529296875e-05, + "model_forward_time": 0.025116920471191406, + "step": 9387 + }, + { + "epoch": 1.432342529296875e-05, + "step": 9387, + "training_step_time": 0.10731315612792969 + }, + { + "epoch": 1.4324951171875e-05, + "model_forward_time": 0.025457143783569336, + "step": 9388 + }, + { + "epoch": 1.4324951171875e-05, + "step": 9388, + "training_step_time": 0.10696244239807129 + }, + { + "epoch": 1.432647705078125e-05, + "model_forward_time": 0.025299549102783203, + "step": 9389 + }, + { + "epoch": 1.432647705078125e-05, + "step": 9389, + "training_step_time": 0.10823822021484375 + }, + { + "epoch": 1.43280029296875e-05, + "grad_norm": 0.37236952781677246, + "learning_rate": 8.225183092410128e-05, + "loss": 0.0302, + "step": 9390 + }, + { + "epoch": 1.43280029296875e-05, + "model_forward_time": 0.025687456130981445, + "step": 9390 + }, + { + "epoch": 1.43280029296875e-05, + "step": 9390, + "training_step_time": 0.10878229141235352 + }, + { + "epoch": 1.432952880859375e-05, + "model_forward_time": 0.02568960189819336, + "step": 9391 + }, + { + "epoch": 1.432952880859375e-05, + "step": 9391, + "training_step_time": 0.10934019088745117 + }, + { + "epoch": 1.43310546875e-05, + "model_forward_time": 0.025292634963989258, + "step": 9392 + }, + { + "epoch": 1.43310546875e-05, + "step": 9392, + "training_step_time": 0.1673116683959961 + }, + { + "epoch": 1.433258056640625e-05, + "model_forward_time": 0.02449512481689453, + "step": 9393 + }, + { + "epoch": 1.433258056640625e-05, + "step": 9393, + "training_step_time": 0.16326141357421875 + }, + { + "epoch": 1.43341064453125e-05, + "model_forward_time": 0.024966716766357422, + "step": 9394 + }, + { + "epoch": 1.43341064453125e-05, + "step": 9394, + "training_step_time": 0.11365890502929688 + }, + { + "epoch": 1.433563232421875e-05, + "model_forward_time": 0.025084495544433594, + "step": 9395 + }, + { + "epoch": 1.433563232421875e-05, + "step": 9395, + "training_step_time": 0.168412446975708 + }, + { + "epoch": 1.4337158203125e-05, + "model_forward_time": 0.024068832397460938, + "step": 9396 + }, + { + "epoch": 1.4337158203125e-05, + "step": 9396, + "training_step_time": 0.17074203491210938 + }, + { + "epoch": 1.433868408203125e-05, + "model_forward_time": 0.02477884292602539, + "step": 9397 + }, + { + "epoch": 1.433868408203125e-05, + "step": 9397, + "training_step_time": 0.11169052124023438 + }, + { + "epoch": 1.43402099609375e-05, + "model_forward_time": 0.024664878845214844, + "step": 9398 + }, + { + "epoch": 1.43402099609375e-05, + "step": 9398, + "training_step_time": 0.10529661178588867 + }, + { + "epoch": 1.434173583984375e-05, + "model_forward_time": 0.025512218475341797, + "step": 9399 + }, + { + "epoch": 1.434173583984375e-05, + "step": 9399, + "training_step_time": 0.10719561576843262 + }, + { + "epoch": 1.434326171875e-05, + "grad_norm": 0.42937326431274414, + "learning_rate": 8.220969461454322e-05, + "loss": 0.0448, + "step": 9400 + }, + { + "epoch": 1.434326171875e-05, + "model_forward_time": 0.02642822265625, + "step": 9400 + }, + { + "epoch": 1.434326171875e-05, + "step": 9400, + "training_step_time": 0.10843586921691895 + }, + { + "epoch": 1.434478759765625e-05, + "model_forward_time": 0.025964021682739258, + "step": 9401 + }, + { + "epoch": 1.434478759765625e-05, + "step": 9401, + "training_step_time": 0.11408257484436035 + }, + { + "epoch": 1.43463134765625e-05, + "model_forward_time": 0.025191545486450195, + "step": 9402 + }, + { + "epoch": 1.43463134765625e-05, + "step": 9402, + "training_step_time": 0.14751529693603516 + }, + { + "epoch": 1.434783935546875e-05, + "model_forward_time": 0.025240182876586914, + "step": 9403 + }, + { + "epoch": 1.434783935546875e-05, + "step": 9403, + "training_step_time": 0.1714925765991211 + }, + { + "epoch": 1.4349365234375e-05, + "model_forward_time": 0.024412870407104492, + "step": 9404 + }, + { + "epoch": 1.4349365234375e-05, + "step": 9404, + "training_step_time": 0.18913531303405762 + }, + { + "epoch": 1.435089111328125e-05, + "model_forward_time": 0.0242154598236084, + "step": 9405 + }, + { + "epoch": 1.435089111328125e-05, + "step": 9405, + "training_step_time": 0.14969229698181152 + }, + { + "epoch": 1.43524169921875e-05, + "model_forward_time": 0.024442434310913086, + "step": 9406 + }, + { + "epoch": 1.43524169921875e-05, + "step": 9406, + "training_step_time": 0.19721150398254395 + }, + { + "epoch": 1.435394287109375e-05, + "model_forward_time": 0.02468562126159668, + "step": 9407 + }, + { + "epoch": 1.435394287109375e-05, + "step": 9407, + "training_step_time": 0.1781773567199707 + }, + { + "epoch": 1.435546875e-05, + "model_forward_time": 0.024179935455322266, + "step": 9408 + }, + { + "epoch": 1.435546875e-05, + "step": 9408, + "training_step_time": 0.1791691780090332 + }, + { + "epoch": 1.435699462890625e-05, + "model_forward_time": 0.024804353713989258, + "step": 9409 + }, + { + "epoch": 1.435699462890625e-05, + "step": 9409, + "training_step_time": 0.15016937255859375 + }, + { + "epoch": 1.43585205078125e-05, + "grad_norm": 0.5000482201576233, + "learning_rate": 8.2167519167169e-05, + "loss": 0.0389, + "step": 9410 + }, + { + "epoch": 1.43585205078125e-05, + "model_forward_time": 0.02443671226501465, + "step": 9410 + }, + { + "epoch": 1.43585205078125e-05, + "step": 9410, + "training_step_time": 0.17367863655090332 + }, + { + "epoch": 1.436004638671875e-05, + "model_forward_time": 0.02401280403137207, + "step": 9411 + }, + { + "epoch": 1.436004638671875e-05, + "step": 9411, + "training_step_time": 0.17651033401489258 + }, + { + "epoch": 1.4361572265625e-05, + "model_forward_time": 0.024470806121826172, + "step": 9412 + }, + { + "epoch": 1.4361572265625e-05, + "step": 9412, + "training_step_time": 0.12769865989685059 + }, + { + "epoch": 1.436309814453125e-05, + "model_forward_time": 0.024325132369995117, + "step": 9413 + }, + { + "epoch": 1.436309814453125e-05, + "step": 9413, + "training_step_time": 0.10736441612243652 + }, + { + "epoch": 1.43646240234375e-05, + "model_forward_time": 0.025475025177001953, + "step": 9414 + }, + { + "epoch": 1.43646240234375e-05, + "step": 9414, + "training_step_time": 0.10654568672180176 + }, + { + "epoch": 1.436614990234375e-05, + "model_forward_time": 0.025160551071166992, + "step": 9415 + }, + { + "epoch": 1.436614990234375e-05, + "step": 9415, + "training_step_time": 0.11472940444946289 + }, + { + "epoch": 1.436767578125e-05, + "model_forward_time": 0.02504420280456543, + "step": 9416 + }, + { + "epoch": 1.436767578125e-05, + "step": 9416, + "training_step_time": 0.1070094108581543 + }, + { + "epoch": 1.436920166015625e-05, + "model_forward_time": 0.02505207061767578, + "step": 9417 + }, + { + "epoch": 1.436920166015625e-05, + "step": 9417, + "training_step_time": 0.15564656257629395 + }, + { + "epoch": 1.43707275390625e-05, + "model_forward_time": 0.025227069854736328, + "step": 9418 + }, + { + "epoch": 1.43707275390625e-05, + "step": 9418, + "training_step_time": 0.1701970100402832 + }, + { + "epoch": 1.437225341796875e-05, + "model_forward_time": 0.02423238754272461, + "step": 9419 + }, + { + "epoch": 1.437225341796875e-05, + "step": 9419, + "training_step_time": 0.1639697551727295 + }, + { + "epoch": 1.4373779296875e-05, + "grad_norm": 0.7076467871665955, + "learning_rate": 8.212530463322583e-05, + "loss": 0.0343, + "step": 9420 + }, + { + "epoch": 1.4373779296875e-05, + "model_forward_time": 0.02458977699279785, + "step": 9420 + }, + { + "epoch": 1.4373779296875e-05, + "step": 9420, + "training_step_time": 0.1418919563293457 + }, + { + "epoch": 1.437530517578125e-05, + "model_forward_time": 0.024471282958984375, + "step": 9421 + }, + { + "epoch": 1.437530517578125e-05, + "step": 9421, + "training_step_time": 0.14858531951904297 + }, + { + "epoch": 1.43768310546875e-05, + "model_forward_time": 0.024564743041992188, + "step": 9422 + }, + { + "epoch": 1.43768310546875e-05, + "step": 9422, + "training_step_time": 0.13172292709350586 + }, + { + "epoch": 1.437835693359375e-05, + "model_forward_time": 0.023899078369140625, + "step": 9423 + }, + { + "epoch": 1.437835693359375e-05, + "step": 9423, + "training_step_time": 0.13115763664245605 + }, + { + "epoch": 1.43798828125e-05, + "model_forward_time": 0.024674415588378906, + "step": 9424 + }, + { + "epoch": 1.43798828125e-05, + "step": 9424, + "training_step_time": 0.12712359428405762 + }, + { + "epoch": 1.438140869140625e-05, + "model_forward_time": 0.024802446365356445, + "step": 9425 + }, + { + "epoch": 1.438140869140625e-05, + "step": 9425, + "training_step_time": 0.12287735939025879 + }, + { + "epoch": 1.43829345703125e-05, + "model_forward_time": 0.02494072914123535, + "step": 9426 + }, + { + "epoch": 1.43829345703125e-05, + "step": 9426, + "training_step_time": 0.1213235855102539 + }, + { + "epoch": 1.438446044921875e-05, + "model_forward_time": 0.025444507598876953, + "step": 9427 + }, + { + "epoch": 1.438446044921875e-05, + "step": 9427, + "training_step_time": 0.11888933181762695 + }, + { + "epoch": 1.4385986328125e-05, + "model_forward_time": 0.025269508361816406, + "step": 9428 + }, + { + "epoch": 1.4385986328125e-05, + "step": 9428, + "training_step_time": 0.10966730117797852 + }, + { + "epoch": 1.438751220703125e-05, + "model_forward_time": 0.02518749237060547, + "step": 9429 + }, + { + "epoch": 1.438751220703125e-05, + "step": 9429, + "training_step_time": 0.11363387107849121 + }, + { + "epoch": 1.43890380859375e-05, + "grad_norm": 0.4879451394081116, + "learning_rate": 8.20830510640083e-05, + "loss": 0.0283, + "step": 9430 + }, + { + "epoch": 1.43890380859375e-05, + "model_forward_time": 0.025077342987060547, + "step": 9430 + }, + { + "epoch": 1.43890380859375e-05, + "step": 9430, + "training_step_time": 0.1096804141998291 + }, + { + "epoch": 1.439056396484375e-05, + "model_forward_time": 0.025364398956298828, + "step": 9431 + }, + { + "epoch": 1.439056396484375e-05, + "step": 9431, + "training_step_time": 0.11052846908569336 + }, + { + "epoch": 1.439208984375e-05, + "model_forward_time": 0.025980472564697266, + "step": 9432 + }, + { + "epoch": 1.439208984375e-05, + "step": 9432, + "training_step_time": 0.11096501350402832 + }, + { + "epoch": 1.439361572265625e-05, + "model_forward_time": 0.025103330612182617, + "step": 9433 + }, + { + "epoch": 1.439361572265625e-05, + "step": 9433, + "training_step_time": 0.1758592128753662 + }, + { + "epoch": 1.43951416015625e-05, + "model_forward_time": 0.02556014060974121, + "step": 9434 + }, + { + "epoch": 1.43951416015625e-05, + "step": 9434, + "training_step_time": 0.16201496124267578 + }, + { + "epoch": 1.439666748046875e-05, + "model_forward_time": 0.02410578727722168, + "step": 9435 + }, + { + "epoch": 1.439666748046875e-05, + "step": 9435, + "training_step_time": 0.10678410530090332 + }, + { + "epoch": 1.4398193359375e-05, + "model_forward_time": 0.024822235107421875, + "step": 9436 + }, + { + "epoch": 1.4398193359375e-05, + "step": 9436, + "training_step_time": 0.10857462882995605 + }, + { + "epoch": 1.439971923828125e-05, + "model_forward_time": 0.02538156509399414, + "step": 9437 + }, + { + "epoch": 1.439971923828125e-05, + "step": 9437, + "training_step_time": 0.11333513259887695 + }, + { + "epoch": 1.44012451171875e-05, + "model_forward_time": 0.025501012802124023, + "step": 9438 + }, + { + "epoch": 1.44012451171875e-05, + "step": 9438, + "training_step_time": 0.11481881141662598 + }, + { + "epoch": 1.440277099609375e-05, + "model_forward_time": 0.025145769119262695, + "step": 9439 + }, + { + "epoch": 1.440277099609375e-05, + "step": 9439, + "training_step_time": 0.11007142066955566 + }, + { + "epoch": 1.4404296875e-05, + "grad_norm": 0.6473632454872131, + "learning_rate": 8.204075851085849e-05, + "loss": 0.0311, + "step": 9440 + }, + { + "epoch": 1.4404296875e-05, + "model_forward_time": 0.025249481201171875, + "step": 9440 + }, + { + "epoch": 1.4404296875e-05, + "step": 9440, + "training_step_time": 0.10944151878356934 + }, + { + "epoch": 1.440582275390625e-05, + "model_forward_time": 0.025234222412109375, + "step": 9441 + }, + { + "epoch": 1.440582275390625e-05, + "step": 9441, + "training_step_time": 0.1064310073852539 + }, + { + "epoch": 1.44073486328125e-05, + "model_forward_time": 0.028178691864013672, + "step": 9442 + }, + { + "epoch": 1.44073486328125e-05, + "step": 9442, + "training_step_time": 0.1155390739440918 + }, + { + "epoch": 1.440887451171875e-05, + "model_forward_time": 0.024820804595947266, + "step": 9443 + }, + { + "epoch": 1.440887451171875e-05, + "step": 9443, + "training_step_time": 0.11186695098876953 + }, + { + "epoch": 1.4410400390625e-05, + "model_forward_time": 0.025270700454711914, + "step": 9444 + }, + { + "epoch": 1.4410400390625e-05, + "step": 9444, + "training_step_time": 0.10855865478515625 + }, + { + "epoch": 1.441192626953125e-05, + "model_forward_time": 0.025063514709472656, + "step": 9445 + }, + { + "epoch": 1.441192626953125e-05, + "step": 9445, + "training_step_time": 0.10862016677856445 + }, + { + "epoch": 1.44134521484375e-05, + "model_forward_time": 0.024934053421020508, + "step": 9446 + }, + { + "epoch": 1.44134521484375e-05, + "step": 9446, + "training_step_time": 0.10694766044616699 + }, + { + "epoch": 1.441497802734375e-05, + "model_forward_time": 0.025220870971679688, + "step": 9447 + }, + { + "epoch": 1.441497802734375e-05, + "step": 9447, + "training_step_time": 0.1406717300415039 + }, + { + "epoch": 1.441650390625e-05, + "model_forward_time": 0.0249941349029541, + "step": 9448 + }, + { + "epoch": 1.441650390625e-05, + "step": 9448, + "training_step_time": 0.14068269729614258 + }, + { + "epoch": 1.441802978515625e-05, + "model_forward_time": 0.024707794189453125, + "step": 9449 + }, + { + "epoch": 1.441802978515625e-05, + "step": 9449, + "training_step_time": 0.18294644355773926 + }, + { + "epoch": 1.44195556640625e-05, + "grad_norm": 0.567092776298523, + "learning_rate": 8.199842702516583e-05, + "loss": 0.0267, + "step": 9450 + }, + { + "epoch": 1.44195556640625e-05, + "model_forward_time": 0.02435922622680664, + "step": 9450 + }, + { + "epoch": 1.44195556640625e-05, + "step": 9450, + "training_step_time": 0.1739346981048584 + }, + { + "epoch": 1.442108154296875e-05, + "model_forward_time": 0.024892091751098633, + "step": 9451 + }, + { + "epoch": 1.442108154296875e-05, + "step": 9451, + "training_step_time": 0.1849069595336914 + }, + { + "epoch": 1.4422607421875e-05, + "model_forward_time": 0.024327754974365234, + "step": 9452 + }, + { + "epoch": 1.4422607421875e-05, + "step": 9452, + "training_step_time": 0.2120833396911621 + }, + { + "epoch": 1.442413330078125e-05, + "model_forward_time": 0.024076223373413086, + "step": 9453 + }, + { + "epoch": 1.442413330078125e-05, + "step": 9453, + "training_step_time": 0.13730549812316895 + }, + { + "epoch": 1.44256591796875e-05, + "model_forward_time": 0.02444624900817871, + "step": 9454 + }, + { + "epoch": 1.44256591796875e-05, + "step": 9454, + "training_step_time": 0.15231609344482422 + }, + { + "epoch": 1.442718505859375e-05, + "model_forward_time": 0.025484085083007812, + "step": 9455 + }, + { + "epoch": 1.442718505859375e-05, + "step": 9455, + "training_step_time": 0.12475156784057617 + }, + { + "epoch": 1.44287109375e-05, + "model_forward_time": 0.024611234664916992, + "step": 9456 + }, + { + "epoch": 1.44287109375e-05, + "step": 9456, + "training_step_time": 0.11552882194519043 + }, + { + "epoch": 1.443023681640625e-05, + "model_forward_time": 0.0254366397857666, + "step": 9457 + }, + { + "epoch": 1.443023681640625e-05, + "step": 9457, + "training_step_time": 0.10818004608154297 + }, + { + "epoch": 1.44317626953125e-05, + "model_forward_time": 0.02500462532043457, + "step": 9458 + }, + { + "epoch": 1.44317626953125e-05, + "step": 9458, + "training_step_time": 0.1084434986114502 + }, + { + "epoch": 1.443328857421875e-05, + "model_forward_time": 0.02553582191467285, + "step": 9459 + }, + { + "epoch": 1.443328857421875e-05, + "step": 9459, + "training_step_time": 0.10947966575622559 + }, + { + "epoch": 1.4434814453125e-05, + "grad_norm": 0.2936333417892456, + "learning_rate": 8.19560566583671e-05, + "loss": 0.029, + "step": 9460 + }, + { + "epoch": 1.4434814453125e-05, + "model_forward_time": 0.025308609008789062, + "step": 9460 + }, + { + "epoch": 1.4434814453125e-05, + "step": 9460, + "training_step_time": 0.11976122856140137 + }, + { + "epoch": 1.443634033203125e-05, + "model_forward_time": 0.02499842643737793, + "step": 9461 + }, + { + "epoch": 1.443634033203125e-05, + "step": 9461, + "training_step_time": 0.10711407661437988 + }, + { + "epoch": 1.44378662109375e-05, + "model_forward_time": 0.025167465209960938, + "step": 9462 + }, + { + "epoch": 1.44378662109375e-05, + "step": 9462, + "training_step_time": 0.10757589340209961 + }, + { + "epoch": 1.443939208984375e-05, + "model_forward_time": 0.025046110153198242, + "step": 9463 + }, + { + "epoch": 1.443939208984375e-05, + "step": 9463, + "training_step_time": 0.10794234275817871 + }, + { + "epoch": 1.444091796875e-05, + "model_forward_time": 0.025148630142211914, + "step": 9464 + }, + { + "epoch": 1.444091796875e-05, + "step": 9464, + "training_step_time": 0.10971498489379883 + }, + { + "epoch": 1.444244384765625e-05, + "model_forward_time": 0.026518583297729492, + "step": 9465 + }, + { + "epoch": 1.444244384765625e-05, + "step": 9465, + "training_step_time": 0.11096501350402832 + }, + { + "epoch": 1.44439697265625e-05, + "model_forward_time": 0.025455474853515625, + "step": 9466 + }, + { + "epoch": 1.44439697265625e-05, + "step": 9466, + "training_step_time": 0.11070609092712402 + }, + { + "epoch": 1.444549560546875e-05, + "model_forward_time": 0.025563955307006836, + "step": 9467 + }, + { + "epoch": 1.444549560546875e-05, + "step": 9467, + "training_step_time": 0.1109166145324707 + }, + { + "epoch": 1.4447021484375e-05, + "model_forward_time": 0.02547168731689453, + "step": 9468 + }, + { + "epoch": 1.4447021484375e-05, + "step": 9468, + "training_step_time": 0.11037540435791016 + }, + { + "epoch": 1.444854736328125e-05, + "model_forward_time": 0.024895191192626953, + "step": 9469 + }, + { + "epoch": 1.444854736328125e-05, + "step": 9469, + "training_step_time": 0.1079864501953125 + }, + { + "epoch": 1.44500732421875e-05, + "grad_norm": 0.46716660261154175, + "learning_rate": 8.191364746194625e-05, + "loss": 0.0505, + "step": 9470 + }, + { + "epoch": 1.44500732421875e-05, + "model_forward_time": 0.025061368942260742, + "step": 9470 + }, + { + "epoch": 1.44500732421875e-05, + "step": 9470, + "training_step_time": 0.10871458053588867 + }, + { + "epoch": 1.445159912109375e-05, + "model_forward_time": 0.02486395835876465, + "step": 9471 + }, + { + "epoch": 1.445159912109375e-05, + "step": 9471, + "training_step_time": 0.11480283737182617 + }, + { + "epoch": 1.4453125e-05, + "model_forward_time": 0.025530099868774414, + "step": 9472 + }, + { + "epoch": 1.4453125e-05, + "step": 9472, + "training_step_time": 0.11725187301635742 + }, + { + "epoch": 1.445465087890625e-05, + "model_forward_time": 0.025233983993530273, + "step": 9473 + }, + { + "epoch": 1.445465087890625e-05, + "step": 9473, + "training_step_time": 0.10801315307617188 + }, + { + "epoch": 1.44561767578125e-05, + "model_forward_time": 0.02501535415649414, + "step": 9474 + }, + { + "epoch": 1.44561767578125e-05, + "step": 9474, + "training_step_time": 0.10945701599121094 + }, + { + "epoch": 1.445770263671875e-05, + "model_forward_time": 0.025026559829711914, + "step": 9475 + }, + { + "epoch": 1.445770263671875e-05, + "step": 9475, + "training_step_time": 0.10894036293029785 + }, + { + "epoch": 1.4459228515625e-05, + "model_forward_time": 0.02521204948425293, + "step": 9476 + }, + { + "epoch": 1.4459228515625e-05, + "step": 9476, + "training_step_time": 0.10709834098815918 + }, + { + "epoch": 1.446075439453125e-05, + "model_forward_time": 0.025342702865600586, + "step": 9477 + }, + { + "epoch": 1.446075439453125e-05, + "step": 9477, + "training_step_time": 0.10789132118225098 + }, + { + "epoch": 1.44622802734375e-05, + "model_forward_time": 0.02583003044128418, + "step": 9478 + }, + { + "epoch": 1.44622802734375e-05, + "step": 9478, + "training_step_time": 0.10968708992004395 + }, + { + "epoch": 1.446380615234375e-05, + "model_forward_time": 0.025195598602294922, + "step": 9479 + }, + { + "epoch": 1.446380615234375e-05, + "step": 9479, + "training_step_time": 0.1731727123260498 + }, + { + "epoch": 1.446533203125e-05, + "grad_norm": 0.2792838215827942, + "learning_rate": 8.18711994874345e-05, + "loss": 0.0281, + "step": 9480 + }, + { + "epoch": 1.446533203125e-05, + "model_forward_time": 0.024681806564331055, + "step": 9480 + }, + { + "epoch": 1.446533203125e-05, + "step": 9480, + "training_step_time": 0.16242027282714844 + }, + { + "epoch": 1.446685791015625e-05, + "model_forward_time": 0.02485823631286621, + "step": 9481 + }, + { + "epoch": 1.446685791015625e-05, + "step": 9481, + "training_step_time": 0.10384654998779297 + }, + { + "epoch": 1.44683837890625e-05, + "model_forward_time": 0.025029420852661133, + "step": 9482 + }, + { + "epoch": 1.44683837890625e-05, + "step": 9482, + "training_step_time": 0.10553216934204102 + }, + { + "epoch": 1.446990966796875e-05, + "model_forward_time": 0.025967836380004883, + "step": 9483 + }, + { + "epoch": 1.446990966796875e-05, + "step": 9483, + "training_step_time": 0.12078142166137695 + }, + { + "epoch": 1.4471435546875e-05, + "model_forward_time": 0.025418996810913086, + "step": 9484 + }, + { + "epoch": 1.4471435546875e-05, + "step": 9484, + "training_step_time": 0.11324691772460938 + }, + { + "epoch": 1.447296142578125e-05, + "model_forward_time": 0.025324344635009766, + "step": 9485 + }, + { + "epoch": 1.447296142578125e-05, + "step": 9485, + "training_step_time": 0.10676264762878418 + }, + { + "epoch": 1.44744873046875e-05, + "model_forward_time": 0.026190757751464844, + "step": 9486 + }, + { + "epoch": 1.44744873046875e-05, + "step": 9486, + "training_step_time": 0.1081688404083252 + }, + { + "epoch": 1.447601318359375e-05, + "model_forward_time": 0.025905132293701172, + "step": 9487 + }, + { + "epoch": 1.447601318359375e-05, + "step": 9487, + "training_step_time": 0.10705804824829102 + }, + { + "epoch": 1.44775390625e-05, + "model_forward_time": 0.02496957778930664, + "step": 9488 + }, + { + "epoch": 1.44775390625e-05, + "step": 9488, + "training_step_time": 0.1114494800567627 + }, + { + "epoch": 1.447906494140625e-05, + "model_forward_time": 0.025019407272338867, + "step": 9489 + }, + { + "epoch": 1.447906494140625e-05, + "step": 9489, + "training_step_time": 0.10798192024230957 + }, + { + "epoch": 1.44805908203125e-05, + "grad_norm": 0.3415721356868744, + "learning_rate": 8.182871278641009e-05, + "loss": 0.0259, + "step": 9490 + }, + { + "epoch": 1.44805908203125e-05, + "model_forward_time": 0.02524852752685547, + "step": 9490 + }, + { + "epoch": 1.44805908203125e-05, + "step": 9490, + "training_step_time": 0.10957932472229004 + }, + { + "epoch": 1.448211669921875e-05, + "model_forward_time": 0.025458335876464844, + "step": 9491 + }, + { + "epoch": 1.448211669921875e-05, + "step": 9491, + "training_step_time": 0.1059732437133789 + }, + { + "epoch": 1.4483642578125e-05, + "model_forward_time": 0.025094985961914062, + "step": 9492 + }, + { + "epoch": 1.4483642578125e-05, + "step": 9492, + "training_step_time": 0.10744810104370117 + }, + { + "epoch": 1.448516845703125e-05, + "model_forward_time": 0.025411605834960938, + "step": 9493 + }, + { + "epoch": 1.448516845703125e-05, + "step": 9493, + "training_step_time": 0.1339278221130371 + }, + { + "epoch": 1.44866943359375e-05, + "model_forward_time": 0.025409221649169922, + "step": 9494 + }, + { + "epoch": 1.44866943359375e-05, + "step": 9494, + "training_step_time": 0.13221406936645508 + }, + { + "epoch": 1.448822021484375e-05, + "model_forward_time": 0.02527904510498047, + "step": 9495 + }, + { + "epoch": 1.448822021484375e-05, + "step": 9495, + "training_step_time": 0.10785627365112305 + }, + { + "epoch": 1.448974609375e-05, + "model_forward_time": 0.02569437026977539, + "step": 9496 + }, + { + "epoch": 1.448974609375e-05, + "step": 9496, + "training_step_time": 0.13548731803894043 + }, + { + "epoch": 1.449127197265625e-05, + "model_forward_time": 0.026509523391723633, + "step": 9497 + }, + { + "epoch": 1.449127197265625e-05, + "step": 9497, + "training_step_time": 0.17113208770751953 + }, + { + "epoch": 1.44927978515625e-05, + "model_forward_time": 0.02646946907043457, + "step": 9498 + }, + { + "epoch": 1.44927978515625e-05, + "step": 9498, + "training_step_time": 0.18021655082702637 + }, + { + "epoch": 1.449432373046875e-05, + "model_forward_time": 0.024465560913085938, + "step": 9499 + }, + { + "epoch": 1.449432373046875e-05, + "step": 9499, + "training_step_time": 0.19385623931884766 + }, + { + "epoch": 1.4495849609375e-05, + "grad_norm": 0.2710087299346924, + "learning_rate": 8.178618741049842e-05, + "loss": 0.0226, + "step": 9500 + }, + { + "epoch": 1.4495849609375e-05, + "model_forward_time": 0.024242877960205078, + "step": 9500 + }, + { + "epoch": 1.4495849609375e-05, + "step": 9500, + "training_step_time": 0.16071033477783203 + }, + { + "epoch": 1.449737548828125e-05, + "model_forward_time": 0.024139404296875, + "step": 9501 + }, + { + "epoch": 1.449737548828125e-05, + "step": 9501, + "training_step_time": 0.21726179122924805 + }, + { + "epoch": 1.44989013671875e-05, + "model_forward_time": 0.024756431579589844, + "step": 9502 + }, + { + "epoch": 1.44989013671875e-05, + "step": 9502, + "training_step_time": 0.11127328872680664 + }, + { + "epoch": 1.450042724609375e-05, + "model_forward_time": 0.02484607696533203, + "step": 9503 + }, + { + "epoch": 1.450042724609375e-05, + "step": 9503, + "training_step_time": 0.10360264778137207 + }, + { + "epoch": 1.4501953125e-05, + "model_forward_time": 0.025338411331176758, + "step": 9504 + }, + { + "epoch": 1.4501953125e-05, + "step": 9504, + "training_step_time": 0.10797262191772461 + }, + { + "epoch": 1.450347900390625e-05, + "model_forward_time": 0.02516627311706543, + "step": 9505 + }, + { + "epoch": 1.450347900390625e-05, + "step": 9505, + "training_step_time": 0.10799360275268555 + }, + { + "epoch": 1.45050048828125e-05, + "model_forward_time": 0.025120019912719727, + "step": 9506 + }, + { + "epoch": 1.45050048828125e-05, + "step": 9506, + "training_step_time": 0.11000633239746094 + }, + { + "epoch": 1.450653076171875e-05, + "model_forward_time": 0.024922609329223633, + "step": 9507 + }, + { + "epoch": 1.450653076171875e-05, + "step": 9507, + "training_step_time": 0.10812830924987793 + }, + { + "epoch": 1.4508056640625e-05, + "model_forward_time": 0.025143146514892578, + "step": 9508 + }, + { + "epoch": 1.4508056640625e-05, + "step": 9508, + "training_step_time": 0.11268377304077148 + }, + { + "epoch": 1.450958251953125e-05, + "model_forward_time": 0.02468419075012207, + "step": 9509 + }, + { + "epoch": 1.450958251953125e-05, + "step": 9509, + "training_step_time": 0.11316180229187012 + }, + { + "epoch": 1.45111083984375e-05, + "grad_norm": 0.21017690002918243, + "learning_rate": 8.174362341137177e-05, + "loss": 0.0256, + "step": 9510 + }, + { + "epoch": 1.45111083984375e-05, + "model_forward_time": 0.023894786834716797, + "step": 9510 + }, + { + "epoch": 1.45111083984375e-05, + "step": 9510, + "training_step_time": 0.1072230339050293 + }, + { + "epoch": 1.451263427734375e-05, + "model_forward_time": 0.023804664611816406, + "step": 9511 + }, + { + "epoch": 1.451263427734375e-05, + "step": 9511, + "training_step_time": 0.11113691329956055 + }, + { + "epoch": 1.451416015625e-05, + "model_forward_time": 0.02505660057067871, + "step": 9512 + }, + { + "epoch": 1.451416015625e-05, + "step": 9512, + "training_step_time": 0.10786795616149902 + }, + { + "epoch": 1.451568603515625e-05, + "model_forward_time": 0.025088071823120117, + "step": 9513 + }, + { + "epoch": 1.451568603515625e-05, + "step": 9513, + "training_step_time": 0.1077268123626709 + }, + { + "epoch": 1.45172119140625e-05, + "model_forward_time": 0.02546215057373047, + "step": 9514 + }, + { + "epoch": 1.45172119140625e-05, + "step": 9514, + "training_step_time": 0.10877370834350586 + }, + { + "epoch": 1.451873779296875e-05, + "model_forward_time": 0.02508997917175293, + "step": 9515 + }, + { + "epoch": 1.451873779296875e-05, + "step": 9515, + "training_step_time": 0.15111684799194336 + }, + { + "epoch": 1.4520263671875e-05, + "model_forward_time": 0.02387261390686035, + "step": 9516 + }, + { + "epoch": 1.4520263671875e-05, + "step": 9516, + "training_step_time": 0.17554235458374023 + }, + { + "epoch": 1.452178955078125e-05, + "model_forward_time": 0.024065732955932617, + "step": 9517 + }, + { + "epoch": 1.452178955078125e-05, + "step": 9517, + "training_step_time": 0.16174054145812988 + }, + { + "epoch": 1.45233154296875e-05, + "model_forward_time": 0.024174213409423828, + "step": 9518 + }, + { + "epoch": 1.45233154296875e-05, + "step": 9518, + "training_step_time": 0.14461040496826172 + }, + { + "epoch": 1.452484130859375e-05, + "model_forward_time": 0.024128198623657227, + "step": 9519 + }, + { + "epoch": 1.452484130859375e-05, + "step": 9519, + "training_step_time": 0.13492989540100098 + }, + { + "epoch": 1.45263671875e-05, + "grad_norm": 0.3300616443157196, + "learning_rate": 8.170102084074946e-05, + "loss": 0.0289, + "step": 9520 + }, + { + "epoch": 1.45263671875e-05, + "model_forward_time": 0.02454686164855957, + "step": 9520 + }, + { + "epoch": 1.45263671875e-05, + "step": 9520, + "training_step_time": 0.12767696380615234 + }, + { + "epoch": 1.452789306640625e-05, + "model_forward_time": 0.024207353591918945, + "step": 9521 + }, + { + "epoch": 1.452789306640625e-05, + "step": 9521, + "training_step_time": 0.1253831386566162 + }, + { + "epoch": 1.45294189453125e-05, + "model_forward_time": 0.024947166442871094, + "step": 9522 + }, + { + "epoch": 1.45294189453125e-05, + "step": 9522, + "training_step_time": 0.10431671142578125 + }, + { + "epoch": 1.453094482421875e-05, + "model_forward_time": 0.024843454360961914, + "step": 9523 + }, + { + "epoch": 1.453094482421875e-05, + "step": 9523, + "training_step_time": 0.11184525489807129 + }, + { + "epoch": 1.4532470703125e-05, + "model_forward_time": 0.024613618850708008, + "step": 9524 + }, + { + "epoch": 1.4532470703125e-05, + "step": 9524, + "training_step_time": 0.10435128211975098 + }, + { + "epoch": 1.453399658203125e-05, + "model_forward_time": 0.0252227783203125, + "step": 9525 + }, + { + "epoch": 1.453399658203125e-05, + "step": 9525, + "training_step_time": 0.1284794807434082 + }, + { + "epoch": 1.45355224609375e-05, + "model_forward_time": 0.02527022361755371, + "step": 9526 + }, + { + "epoch": 1.45355224609375e-05, + "step": 9526, + "training_step_time": 0.11169075965881348 + }, + { + "epoch": 1.453704833984375e-05, + "model_forward_time": 0.025441408157348633, + "step": 9527 + }, + { + "epoch": 1.453704833984375e-05, + "step": 9527, + "training_step_time": 0.2080228328704834 + }, + { + "epoch": 1.453857421875e-05, + "model_forward_time": 0.02463984489440918, + "step": 9528 + }, + { + "epoch": 1.453857421875e-05, + "step": 9528, + "training_step_time": 0.1180112361907959 + }, + { + "epoch": 1.454010009765625e-05, + "model_forward_time": 0.02492547035217285, + "step": 9529 + }, + { + "epoch": 1.454010009765625e-05, + "step": 9529, + "training_step_time": 0.10512661933898926 + }, + { + "epoch": 1.45416259765625e-05, + "grad_norm": 0.3115748465061188, + "learning_rate": 8.165837975039763e-05, + "loss": 0.031, + "step": 9530 + }, + { + "epoch": 1.45416259765625e-05, + "model_forward_time": 0.0252840518951416, + "step": 9530 + }, + { + "epoch": 1.45416259765625e-05, + "step": 9530, + "training_step_time": 0.10770344734191895 + }, + { + "epoch": 1.454315185546875e-05, + "model_forward_time": 0.0253448486328125, + "step": 9531 + }, + { + "epoch": 1.454315185546875e-05, + "step": 9531, + "training_step_time": 0.10718703269958496 + }, + { + "epoch": 1.4544677734375e-05, + "model_forward_time": 0.025614261627197266, + "step": 9532 + }, + { + "epoch": 1.4544677734375e-05, + "step": 9532, + "training_step_time": 0.11180472373962402 + }, + { + "epoch": 1.454620361328125e-05, + "model_forward_time": 0.02585291862487793, + "step": 9533 + }, + { + "epoch": 1.454620361328125e-05, + "step": 9533, + "training_step_time": 0.1879589557647705 + }, + { + "epoch": 1.45477294921875e-05, + "model_forward_time": 0.023278236389160156, + "step": 9534 + }, + { + "epoch": 1.45477294921875e-05, + "step": 9534, + "training_step_time": 0.20203685760498047 + }, + { + "epoch": 1.454925537109375e-05, + "model_forward_time": 0.02315831184387207, + "step": 9535 + }, + { + "epoch": 1.454925537109375e-05, + "step": 9535, + "training_step_time": 0.20372414588928223 + }, + { + "epoch": 1.455078125e-05, + "model_forward_time": 0.023410797119140625, + "step": 9536 + }, + { + "epoch": 1.455078125e-05, + "step": 9536, + "training_step_time": 0.21012592315673828 + }, + { + "epoch": 1.455230712890625e-05, + "model_forward_time": 0.02411794662475586, + "step": 9537 + }, + { + "epoch": 1.455230712890625e-05, + "step": 9537, + "training_step_time": 0.20641613006591797 + }, + { + "epoch": 1.45538330078125e-05, + "model_forward_time": 0.024140357971191406, + "step": 9538 + }, + { + "epoch": 1.45538330078125e-05, + "step": 9538, + "training_step_time": 0.22237777709960938 + }, + { + "epoch": 1.455535888671875e-05, + "model_forward_time": 0.02439117431640625, + "step": 9539 + }, + { + "epoch": 1.455535888671875e-05, + "step": 9539, + "training_step_time": 0.12183237075805664 + }, + { + "epoch": 1.4556884765625e-05, + "grad_norm": 0.6548312306404114, + "learning_rate": 8.161570019212921e-05, + "loss": 0.0393, + "step": 9540 + }, + { + "epoch": 1.4556884765625e-05, + "model_forward_time": 0.0243532657623291, + "step": 9540 + }, + { + "epoch": 1.4556884765625e-05, + "step": 9540, + "training_step_time": 0.1226193904876709 + }, + { + "epoch": 1.455841064453125e-05, + "model_forward_time": 0.02480626106262207, + "step": 9541 + }, + { + "epoch": 1.455841064453125e-05, + "step": 9541, + "training_step_time": 0.14112520217895508 + }, + { + "epoch": 1.45599365234375e-05, + "model_forward_time": 0.0285947322845459, + "step": 9542 + }, + { + "epoch": 1.45599365234375e-05, + "step": 9542, + "training_step_time": 0.17905378341674805 + }, + { + "epoch": 1.456146240234375e-05, + "model_forward_time": 0.024629831314086914, + "step": 9543 + }, + { + "epoch": 1.456146240234375e-05, + "step": 9543, + "training_step_time": 0.17879033088684082 + }, + { + "epoch": 1.456298828125e-05, + "model_forward_time": 0.024370193481445312, + "step": 9544 + }, + { + "epoch": 1.456298828125e-05, + "step": 9544, + "training_step_time": 0.10547280311584473 + }, + { + "epoch": 1.456451416015625e-05, + "model_forward_time": 0.024445056915283203, + "step": 9545 + }, + { + "epoch": 1.456451416015625e-05, + "step": 9545, + "training_step_time": 0.10920095443725586 + }, + { + "epoch": 1.45660400390625e-05, + "model_forward_time": 0.02520895004272461, + "step": 9546 + }, + { + "epoch": 1.45660400390625e-05, + "step": 9546, + "training_step_time": 0.11049532890319824 + }, + { + "epoch": 1.456756591796875e-05, + "model_forward_time": 0.025287866592407227, + "step": 9547 + }, + { + "epoch": 1.456756591796875e-05, + "step": 9547, + "training_step_time": 0.11164546012878418 + }, + { + "epoch": 1.4569091796875e-05, + "model_forward_time": 0.02517247200012207, + "step": 9548 + }, + { + "epoch": 1.4569091796875e-05, + "step": 9548, + "training_step_time": 0.11104512214660645 + }, + { + "epoch": 1.457061767578125e-05, + "model_forward_time": 0.025051355361938477, + "step": 9549 + }, + { + "epoch": 1.457061767578125e-05, + "step": 9549, + "training_step_time": 0.11538076400756836 + }, + { + "epoch": 1.45721435546875e-05, + "grad_norm": 0.331142783164978, + "learning_rate": 8.157298221780389e-05, + "loss": 0.0299, + "step": 9550 + }, + { + "epoch": 1.45721435546875e-05, + "model_forward_time": 0.025533676147460938, + "step": 9550 + }, + { + "epoch": 1.45721435546875e-05, + "step": 9550, + "training_step_time": 0.1122429370880127 + }, + { + "epoch": 1.457366943359375e-05, + "model_forward_time": 0.025920867919921875, + "step": 9551 + }, + { + "epoch": 1.457366943359375e-05, + "step": 9551, + "training_step_time": 0.11168408393859863 + }, + { + "epoch": 1.45751953125e-05, + "model_forward_time": 0.024968862533569336, + "step": 9552 + }, + { + "epoch": 1.45751953125e-05, + "step": 9552, + "training_step_time": 0.11013317108154297 + }, + { + "epoch": 1.457672119140625e-05, + "model_forward_time": 0.025202035903930664, + "step": 9553 + }, + { + "epoch": 1.457672119140625e-05, + "step": 9553, + "training_step_time": 0.1139991283416748 + }, + { + "epoch": 1.45782470703125e-05, + "model_forward_time": 0.026732444763183594, + "step": 9554 + }, + { + "epoch": 1.45782470703125e-05, + "step": 9554, + "training_step_time": 0.11099481582641602 + }, + { + "epoch": 1.457977294921875e-05, + "model_forward_time": 0.02541351318359375, + "step": 9555 + }, + { + "epoch": 1.457977294921875e-05, + "step": 9555, + "training_step_time": 0.10876035690307617 + }, + { + "epoch": 1.4581298828125e-05, + "model_forward_time": 0.025623559951782227, + "step": 9556 + }, + { + "epoch": 1.4581298828125e-05, + "step": 9556, + "training_step_time": 0.10907530784606934 + }, + { + "epoch": 1.458282470703125e-05, + "model_forward_time": 0.02522587776184082, + "step": 9557 + }, + { + "epoch": 1.458282470703125e-05, + "step": 9557, + "training_step_time": 0.11230945587158203 + }, + { + "epoch": 1.45843505859375e-05, + "model_forward_time": 0.024865150451660156, + "step": 9558 + }, + { + "epoch": 1.45843505859375e-05, + "step": 9558, + "training_step_time": 0.11005592346191406 + }, + { + "epoch": 1.458587646484375e-05, + "model_forward_time": 0.025716781616210938, + "step": 9559 + }, + { + "epoch": 1.458587646484375e-05, + "step": 9559, + "training_step_time": 0.11000323295593262 + }, + { + "epoch": 1.458740234375e-05, + "grad_norm": 0.5295901894569397, + "learning_rate": 8.153022587932803e-05, + "loss": 0.0347, + "step": 9560 + }, + { + "epoch": 1.458740234375e-05, + "model_forward_time": 0.02482891082763672, + "step": 9560 + }, + { + "epoch": 1.458740234375e-05, + "step": 9560, + "training_step_time": 0.11299443244934082 + }, + { + "epoch": 1.458892822265625e-05, + "model_forward_time": 0.025539636611938477, + "step": 9561 + }, + { + "epoch": 1.458892822265625e-05, + "step": 9561, + "training_step_time": 0.10791587829589844 + }, + { + "epoch": 1.45904541015625e-05, + "model_forward_time": 0.02529740333557129, + "step": 9562 + }, + { + "epoch": 1.45904541015625e-05, + "step": 9562, + "training_step_time": 0.1103065013885498 + }, + { + "epoch": 1.459197998046875e-05, + "model_forward_time": 0.025145530700683594, + "step": 9563 + }, + { + "epoch": 1.459197998046875e-05, + "step": 9563, + "training_step_time": 0.10874509811401367 + }, + { + "epoch": 1.4593505859375e-05, + "model_forward_time": 0.025287628173828125, + "step": 9564 + }, + { + "epoch": 1.4593505859375e-05, + "step": 9564, + "training_step_time": 0.10816621780395508 + }, + { + "epoch": 1.459503173828125e-05, + "model_forward_time": 0.025239229202270508, + "step": 9565 + }, + { + "epoch": 1.459503173828125e-05, + "step": 9565, + "training_step_time": 0.10697746276855469 + }, + { + "epoch": 1.45965576171875e-05, + "model_forward_time": 0.026006460189819336, + "step": 9566 + }, + { + "epoch": 1.45965576171875e-05, + "step": 9566, + "training_step_time": 0.10965323448181152 + }, + { + "epoch": 1.459808349609375e-05, + "model_forward_time": 0.025601625442504883, + "step": 9567 + }, + { + "epoch": 1.459808349609375e-05, + "step": 9567, + "training_step_time": 0.10748767852783203 + }, + { + "epoch": 1.4599609375e-05, + "model_forward_time": 0.02563023567199707, + "step": 9568 + }, + { + "epoch": 1.4599609375e-05, + "step": 9568, + "training_step_time": 0.10876321792602539 + }, + { + "epoch": 1.460113525390625e-05, + "model_forward_time": 0.02503347396850586, + "step": 9569 + }, + { + "epoch": 1.460113525390625e-05, + "step": 9569, + "training_step_time": 0.11268496513366699 + }, + { + "epoch": 1.46026611328125e-05, + "grad_norm": 0.5119488835334778, + "learning_rate": 8.148743122865463e-05, + "loss": 0.0484, + "step": 9570 + }, + { + "epoch": 1.46026611328125e-05, + "model_forward_time": 0.02516794204711914, + "step": 9570 + }, + { + "epoch": 1.46026611328125e-05, + "step": 9570, + "training_step_time": 0.10786795616149902 + }, + { + "epoch": 1.460418701171875e-05, + "model_forward_time": 0.02553105354309082, + "step": 9571 + }, + { + "epoch": 1.460418701171875e-05, + "step": 9571, + "training_step_time": 0.21245646476745605 + }, + { + "epoch": 1.4605712890625e-05, + "model_forward_time": 0.024172067642211914, + "step": 9572 + }, + { + "epoch": 1.4605712890625e-05, + "step": 9572, + "training_step_time": 0.1259140968322754 + }, + { + "epoch": 1.460723876953125e-05, + "model_forward_time": 0.02753901481628418, + "step": 9573 + }, + { + "epoch": 1.460723876953125e-05, + "step": 9573, + "training_step_time": 0.11209988594055176 + }, + { + "epoch": 1.46087646484375e-05, + "model_forward_time": 0.025592565536499023, + "step": 9574 + }, + { + "epoch": 1.46087646484375e-05, + "step": 9574, + "training_step_time": 0.1690821647644043 + }, + { + "epoch": 1.461029052734375e-05, + "model_forward_time": 0.02434682846069336, + "step": 9575 + }, + { + "epoch": 1.461029052734375e-05, + "step": 9575, + "training_step_time": 0.19556570053100586 + }, + { + "epoch": 1.461181640625e-05, + "model_forward_time": 0.0247499942779541, + "step": 9576 + }, + { + "epoch": 1.461181640625e-05, + "step": 9576, + "training_step_time": 0.19040489196777344 + }, + { + "epoch": 1.461334228515625e-05, + "model_forward_time": 0.02502751350402832, + "step": 9577 + }, + { + "epoch": 1.461334228515625e-05, + "step": 9577, + "training_step_time": 0.18045425415039062 + }, + { + "epoch": 1.46148681640625e-05, + "model_forward_time": 0.024462223052978516, + "step": 9578 + }, + { + "epoch": 1.46148681640625e-05, + "step": 9578, + "training_step_time": 0.2007603645324707 + }, + { + "epoch": 1.461639404296875e-05, + "model_forward_time": 0.024204254150390625, + "step": 9579 + }, + { + "epoch": 1.461639404296875e-05, + "step": 9579, + "training_step_time": 0.15765070915222168 + }, + { + "epoch": 1.4617919921875e-05, + "grad_norm": 0.45523786544799805, + "learning_rate": 8.14445983177832e-05, + "loss": 0.0373, + "step": 9580 + }, + { + "epoch": 1.4617919921875e-05, + "model_forward_time": 0.022441625595092773, + "step": 9580 + }, + { + "epoch": 1.4617919921875e-05, + "step": 9580, + "training_step_time": 0.18658185005187988 + }, + { + "epoch": 1.461944580078125e-05, + "model_forward_time": 0.02386331558227539, + "step": 9581 + }, + { + "epoch": 1.461944580078125e-05, + "step": 9581, + "training_step_time": 0.1789553165435791 + }, + { + "epoch": 1.46209716796875e-05, + "model_forward_time": 0.024242639541625977, + "step": 9582 + }, + { + "epoch": 1.46209716796875e-05, + "step": 9582, + "training_step_time": 0.16912627220153809 + }, + { + "epoch": 1.462249755859375e-05, + "model_forward_time": 0.024147748947143555, + "step": 9583 + }, + { + "epoch": 1.462249755859375e-05, + "step": 9583, + "training_step_time": 0.18231987953186035 + }, + { + "epoch": 1.46240234375e-05, + "model_forward_time": 0.027846813201904297, + "step": 9584 + }, + { + "epoch": 1.46240234375e-05, + "step": 9584, + "training_step_time": 0.12413930892944336 + }, + { + "epoch": 1.462554931640625e-05, + "model_forward_time": 0.024219751358032227, + "step": 9585 + }, + { + "epoch": 1.462554931640625e-05, + "step": 9585, + "training_step_time": 0.20951390266418457 + }, + { + "epoch": 1.46270751953125e-05, + "model_forward_time": 0.024255037307739258, + "step": 9586 + }, + { + "epoch": 1.46270751953125e-05, + "step": 9586, + "training_step_time": 0.11900210380554199 + }, + { + "epoch": 1.462860107421875e-05, + "model_forward_time": 0.02521204948425293, + "step": 9587 + }, + { + "epoch": 1.462860107421875e-05, + "step": 9587, + "training_step_time": 0.10596251487731934 + }, + { + "epoch": 1.4630126953125e-05, + "model_forward_time": 0.026239395141601562, + "step": 9588 + }, + { + "epoch": 1.4630126953125e-05, + "step": 9588, + "training_step_time": 0.10923075675964355 + }, + { + "epoch": 1.463165283203125e-05, + "model_forward_time": 0.025093555450439453, + "step": 9589 + }, + { + "epoch": 1.463165283203125e-05, + "step": 9589, + "training_step_time": 0.13231730461120605 + }, + { + "epoch": 1.46331787109375e-05, + "grad_norm": 0.46255525946617126, + "learning_rate": 8.140172719875979e-05, + "loss": 0.0298, + "step": 9590 + }, + { + "epoch": 1.46331787109375e-05, + "model_forward_time": 0.024947404861450195, + "step": 9590 + }, + { + "epoch": 1.46331787109375e-05, + "step": 9590, + "training_step_time": 0.14930367469787598 + }, + { + "epoch": 1.463470458984375e-05, + "model_forward_time": 0.024849414825439453, + "step": 9591 + }, + { + "epoch": 1.463470458984375e-05, + "step": 9591, + "training_step_time": 0.15892410278320312 + }, + { + "epoch": 1.463623046875e-05, + "model_forward_time": 0.024472951889038086, + "step": 9592 + }, + { + "epoch": 1.463623046875e-05, + "step": 9592, + "training_step_time": 0.1548452377319336 + }, + { + "epoch": 1.463775634765625e-05, + "model_forward_time": 0.02400827407836914, + "step": 9593 + }, + { + "epoch": 1.463775634765625e-05, + "step": 9593, + "training_step_time": 0.13833308219909668 + }, + { + "epoch": 1.46392822265625e-05, + "model_forward_time": 0.024338245391845703, + "step": 9594 + }, + { + "epoch": 1.46392822265625e-05, + "step": 9594, + "training_step_time": 0.1295173168182373 + }, + { + "epoch": 1.464080810546875e-05, + "model_forward_time": 0.024268150329589844, + "step": 9595 + }, + { + "epoch": 1.464080810546875e-05, + "step": 9595, + "training_step_time": 0.1271040439605713 + }, + { + "epoch": 1.4642333984375e-05, + "model_forward_time": 0.024587631225585938, + "step": 9596 + }, + { + "epoch": 1.4642333984375e-05, + "step": 9596, + "training_step_time": 0.12419867515563965 + }, + { + "epoch": 1.464385986328125e-05, + "model_forward_time": 0.025130748748779297, + "step": 9597 + }, + { + "epoch": 1.464385986328125e-05, + "step": 9597, + "training_step_time": 0.12128448486328125 + }, + { + "epoch": 1.46453857421875e-05, + "model_forward_time": 0.02537679672241211, + "step": 9598 + }, + { + "epoch": 1.46453857421875e-05, + "step": 9598, + "training_step_time": 0.1133875846862793 + }, + { + "epoch": 1.464691162109375e-05, + "model_forward_time": 0.02492356300354004, + "step": 9599 + }, + { + "epoch": 1.464691162109375e-05, + "step": 9599, + "training_step_time": 0.11612200736999512 + }, + { + "epoch": 1.46484375e-05, + "grad_norm": 0.3734428286552429, + "learning_rate": 8.135881792367686e-05, + "loss": 0.0259, + "step": 9600 + }, + { + "epoch": 1.46484375e-05, + "model_forward_time": 0.025065898895263672, + "step": 9600 + }, + { + "epoch": 1.46484375e-05, + "step": 9600, + "training_step_time": 0.11228609085083008 + }, + { + "epoch": 1.464996337890625e-05, + "model_forward_time": 0.025210857391357422, + "step": 9601 + }, + { + "epoch": 1.464996337890625e-05, + "step": 9601, + "training_step_time": 0.10897493362426758 + }, + { + "epoch": 1.46514892578125e-05, + "model_forward_time": 0.02472853660583496, + "step": 9602 + }, + { + "epoch": 1.46514892578125e-05, + "step": 9602, + "training_step_time": 0.11083149909973145 + }, + { + "epoch": 1.465301513671875e-05, + "model_forward_time": 0.025244951248168945, + "step": 9603 + }, + { + "epoch": 1.465301513671875e-05, + "step": 9603, + "training_step_time": 0.10868978500366211 + }, + { + "epoch": 1.4654541015625e-05, + "model_forward_time": 0.024953842163085938, + "step": 9604 + }, + { + "epoch": 1.4654541015625e-05, + "step": 9604, + "training_step_time": 0.10859179496765137 + }, + { + "epoch": 1.465606689453125e-05, + "model_forward_time": 0.0256350040435791, + "step": 9605 + }, + { + "epoch": 1.465606689453125e-05, + "step": 9605, + "training_step_time": 0.11030006408691406 + }, + { + "epoch": 1.46575927734375e-05, + "model_forward_time": 0.027234554290771484, + "step": 9606 + }, + { + "epoch": 1.46575927734375e-05, + "step": 9606, + "training_step_time": 0.1144266128540039 + }, + { + "epoch": 1.465911865234375e-05, + "model_forward_time": 0.025156259536743164, + "step": 9607 + }, + { + "epoch": 1.465911865234375e-05, + "step": 9607, + "training_step_time": 0.1060640811920166 + }, + { + "epoch": 1.466064453125e-05, + "model_forward_time": 0.024080276489257812, + "step": 9608 + }, + { + "epoch": 1.466064453125e-05, + "step": 9608, + "training_step_time": 0.14950919151306152 + }, + { + "epoch": 1.466217041015625e-05, + "model_forward_time": 0.025336742401123047, + "step": 9609 + }, + { + "epoch": 1.466217041015625e-05, + "step": 9609, + "training_step_time": 0.11146688461303711 + }, + { + "epoch": 1.46636962890625e-05, + "grad_norm": 0.3587009906768799, + "learning_rate": 8.13158705446732e-05, + "loss": 0.0266, + "step": 9610 + }, + { + "epoch": 1.46636962890625e-05, + "model_forward_time": 0.0251314640045166, + "step": 9610 + }, + { + "epoch": 1.46636962890625e-05, + "step": 9610, + "training_step_time": 0.20196771621704102 + }, + { + "epoch": 1.466522216796875e-05, + "model_forward_time": 0.02415013313293457, + "step": 9611 + }, + { + "epoch": 1.466522216796875e-05, + "step": 9611, + "training_step_time": 0.13425207138061523 + }, + { + "epoch": 1.4666748046875e-05, + "model_forward_time": 0.02660679817199707, + "step": 9612 + }, + { + "epoch": 1.4666748046875e-05, + "step": 9612, + "training_step_time": 0.11210155487060547 + }, + { + "epoch": 1.466827392578125e-05, + "model_forward_time": 0.025350570678710938, + "step": 9613 + }, + { + "epoch": 1.466827392578125e-05, + "step": 9613, + "training_step_time": 0.10577201843261719 + }, + { + "epoch": 1.46697998046875e-05, + "model_forward_time": 0.025360822677612305, + "step": 9614 + }, + { + "epoch": 1.46697998046875e-05, + "step": 9614, + "training_step_time": 0.10686516761779785 + }, + { + "epoch": 1.467132568359375e-05, + "model_forward_time": 0.025519132614135742, + "step": 9615 + }, + { + "epoch": 1.467132568359375e-05, + "step": 9615, + "training_step_time": 0.10672879219055176 + }, + { + "epoch": 1.46728515625e-05, + "model_forward_time": 0.025675058364868164, + "step": 9616 + }, + { + "epoch": 1.46728515625e-05, + "step": 9616, + "training_step_time": 0.10918760299682617 + }, + { + "epoch": 1.467437744140625e-05, + "model_forward_time": 0.02524590492248535, + "step": 9617 + }, + { + "epoch": 1.467437744140625e-05, + "step": 9617, + "training_step_time": 0.10837078094482422 + }, + { + "epoch": 1.46759033203125e-05, + "model_forward_time": 0.025150060653686523, + "step": 9618 + }, + { + "epoch": 1.46759033203125e-05, + "step": 9618, + "training_step_time": 0.10574865341186523 + }, + { + "epoch": 1.467742919921875e-05, + "model_forward_time": 0.02521800994873047, + "step": 9619 + }, + { + "epoch": 1.467742919921875e-05, + "step": 9619, + "training_step_time": 0.10458683967590332 + }, + { + "epoch": 1.4678955078125e-05, + "grad_norm": 0.283431738615036, + "learning_rate": 8.127288511393392e-05, + "loss": 0.0295, + "step": 9620 + }, + { + "epoch": 1.4678955078125e-05, + "model_forward_time": 0.02683424949645996, + "step": 9620 + }, + { + "epoch": 1.4678955078125e-05, + "step": 9620, + "training_step_time": 0.13709187507629395 + }, + { + "epoch": 1.468048095703125e-05, + "model_forward_time": 0.025423526763916016, + "step": 9621 + }, + { + "epoch": 1.468048095703125e-05, + "step": 9621, + "training_step_time": 0.13492822647094727 + }, + { + "epoch": 1.46820068359375e-05, + "model_forward_time": 0.024751901626586914, + "step": 9622 + }, + { + "epoch": 1.46820068359375e-05, + "step": 9622, + "training_step_time": 0.11292171478271484 + }, + { + "epoch": 1.468353271484375e-05, + "model_forward_time": 0.025397300720214844, + "step": 9623 + }, + { + "epoch": 1.468353271484375e-05, + "step": 9623, + "training_step_time": 0.10853195190429688 + }, + { + "epoch": 1.468505859375e-05, + "model_forward_time": 0.02549123764038086, + "step": 9624 + }, + { + "epoch": 1.468505859375e-05, + "step": 9624, + "training_step_time": 0.11530613899230957 + }, + { + "epoch": 1.468658447265625e-05, + "model_forward_time": 0.02539992332458496, + "step": 9625 + }, + { + "epoch": 1.468658447265625e-05, + "step": 9625, + "training_step_time": 0.22533321380615234 + }, + { + "epoch": 1.46881103515625e-05, + "model_forward_time": 0.024422407150268555, + "step": 9626 + }, + { + "epoch": 1.46881103515625e-05, + "step": 9626, + "training_step_time": 0.13031411170959473 + }, + { + "epoch": 1.468963623046875e-05, + "model_forward_time": 0.024100303649902344, + "step": 9627 + }, + { + "epoch": 1.468963623046875e-05, + "step": 9627, + "training_step_time": 0.13034868240356445 + }, + { + "epoch": 1.4691162109375e-05, + "model_forward_time": 0.024877071380615234, + "step": 9628 + }, + { + "epoch": 1.4691162109375e-05, + "step": 9628, + "training_step_time": 0.14666152000427246 + }, + { + "epoch": 1.469268798828125e-05, + "model_forward_time": 0.024692773818969727, + "step": 9629 + }, + { + "epoch": 1.469268798828125e-05, + "step": 9629, + "training_step_time": 0.21528410911560059 + }, + { + "epoch": 1.46942138671875e-05, + "grad_norm": 0.3566807210445404, + "learning_rate": 8.12298616836904e-05, + "loss": 0.0272, + "step": 9630 + }, + { + "epoch": 1.46942138671875e-05, + "model_forward_time": 0.024663448333740234, + "step": 9630 + }, + { + "epoch": 1.46942138671875e-05, + "step": 9630, + "training_step_time": 0.10884881019592285 + }, + { + "epoch": 1.469573974609375e-05, + "model_forward_time": 0.024942398071289062, + "step": 9631 + }, + { + "epoch": 1.469573974609375e-05, + "step": 9631, + "training_step_time": 0.10837674140930176 + }, + { + "epoch": 1.4697265625e-05, + "model_forward_time": 0.025676727294921875, + "step": 9632 + }, + { + "epoch": 1.4697265625e-05, + "step": 9632, + "training_step_time": 0.10961794853210449 + }, + { + "epoch": 1.469879150390625e-05, + "model_forward_time": 0.025337696075439453, + "step": 9633 + }, + { + "epoch": 1.469879150390625e-05, + "step": 9633, + "training_step_time": 0.11236691474914551 + }, + { + "epoch": 1.47003173828125e-05, + "model_forward_time": 0.02465367317199707, + "step": 9634 + }, + { + "epoch": 1.47003173828125e-05, + "step": 9634, + "training_step_time": 0.11215806007385254 + }, + { + "epoch": 1.470184326171875e-05, + "model_forward_time": 0.025501728057861328, + "step": 9635 + }, + { + "epoch": 1.470184326171875e-05, + "step": 9635, + "training_step_time": 0.1109151840209961 + }, + { + "epoch": 1.4703369140625e-05, + "model_forward_time": 0.025271177291870117, + "step": 9636 + }, + { + "epoch": 1.4703369140625e-05, + "step": 9636, + "training_step_time": 0.10974717140197754 + }, + { + "epoch": 1.470489501953125e-05, + "model_forward_time": 0.025774240493774414, + "step": 9637 + }, + { + "epoch": 1.470489501953125e-05, + "step": 9637, + "training_step_time": 0.10982036590576172 + }, + { + "epoch": 1.47064208984375e-05, + "model_forward_time": 0.0253756046295166, + "step": 9638 + }, + { + "epoch": 1.47064208984375e-05, + "step": 9638, + "training_step_time": 0.11546850204467773 + }, + { + "epoch": 1.470794677734375e-05, + "model_forward_time": 0.02523970603942871, + "step": 9639 + }, + { + "epoch": 1.470794677734375e-05, + "step": 9639, + "training_step_time": 0.10881662368774414 + }, + { + "epoch": 1.470947265625e-05, + "grad_norm": 0.39975979924201965, + "learning_rate": 8.118680030622014e-05, + "loss": 0.0246, + "step": 9640 + }, + { + "epoch": 1.470947265625e-05, + "model_forward_time": 0.024846792221069336, + "step": 9640 + }, + { + "epoch": 1.470947265625e-05, + "step": 9640, + "training_step_time": 0.11009716987609863 + }, + { + "epoch": 1.471099853515625e-05, + "model_forward_time": 0.02546072006225586, + "step": 9641 + }, + { + "epoch": 1.471099853515625e-05, + "step": 9641, + "training_step_time": 0.10982537269592285 + }, + { + "epoch": 1.47125244140625e-05, + "model_forward_time": 0.025167226791381836, + "step": 9642 + }, + { + "epoch": 1.47125244140625e-05, + "step": 9642, + "training_step_time": 0.11650824546813965 + }, + { + "epoch": 1.471405029296875e-05, + "model_forward_time": 0.02553534507751465, + "step": 9643 + }, + { + "epoch": 1.471405029296875e-05, + "step": 9643, + "training_step_time": 0.11020517349243164 + }, + { + "epoch": 1.4715576171875e-05, + "model_forward_time": 0.025464296340942383, + "step": 9644 + }, + { + "epoch": 1.4715576171875e-05, + "step": 9644, + "training_step_time": 0.10854196548461914 + }, + { + "epoch": 1.471710205078125e-05, + "model_forward_time": 0.02610325813293457, + "step": 9645 + }, + { + "epoch": 1.471710205078125e-05, + "step": 9645, + "training_step_time": 0.11237668991088867 + }, + { + "epoch": 1.47186279296875e-05, + "model_forward_time": 0.02532792091369629, + "step": 9646 + }, + { + "epoch": 1.47186279296875e-05, + "step": 9646, + "training_step_time": 0.11131787300109863 + }, + { + "epoch": 1.472015380859375e-05, + "model_forward_time": 0.02550053596496582, + "step": 9647 + }, + { + "epoch": 1.472015380859375e-05, + "step": 9647, + "training_step_time": 0.11355805397033691 + }, + { + "epoch": 1.47216796875e-05, + "model_forward_time": 0.025294065475463867, + "step": 9648 + }, + { + "epoch": 1.47216796875e-05, + "step": 9648, + "training_step_time": 0.11553692817687988 + }, + { + "epoch": 1.472320556640625e-05, + "model_forward_time": 0.025770187377929688, + "step": 9649 + }, + { + "epoch": 1.472320556640625e-05, + "step": 9649, + "training_step_time": 0.11240124702453613 + }, + { + "epoch": 1.47247314453125e-05, + "grad_norm": 0.4785434305667877, + "learning_rate": 8.114370103384681e-05, + "loss": 0.0316, + "step": 9650 + }, + { + "epoch": 1.47247314453125e-05, + "model_forward_time": 0.025299787521362305, + "step": 9650 + }, + { + "epoch": 1.47247314453125e-05, + "step": 9650, + "training_step_time": 0.10789299011230469 + }, + { + "epoch": 1.472625732421875e-05, + "model_forward_time": 0.025302886962890625, + "step": 9651 + }, + { + "epoch": 1.472625732421875e-05, + "step": 9651, + "training_step_time": 0.11245298385620117 + }, + { + "epoch": 1.4727783203125e-05, + "model_forward_time": 0.024153709411621094, + "step": 9652 + }, + { + "epoch": 1.4727783203125e-05, + "step": 9652, + "training_step_time": 0.16206693649291992 + }, + { + "epoch": 1.472930908203125e-05, + "model_forward_time": 0.025659799575805664, + "step": 9653 + }, + { + "epoch": 1.472930908203125e-05, + "step": 9653, + "training_step_time": 0.11801004409790039 + }, + { + "epoch": 1.47308349609375e-05, + "model_forward_time": 0.02515411376953125, + "step": 9654 + }, + { + "epoch": 1.47308349609375e-05, + "step": 9654, + "training_step_time": 0.17615747451782227 + }, + { + "epoch": 1.473236083984375e-05, + "model_forward_time": 0.02446889877319336, + "step": 9655 + }, + { + "epoch": 1.473236083984375e-05, + "step": 9655, + "training_step_time": 0.20401382446289062 + }, + { + "epoch": 1.473388671875e-05, + "model_forward_time": 0.025043249130249023, + "step": 9656 + }, + { + "epoch": 1.473388671875e-05, + "step": 9656, + "training_step_time": 0.11120223999023438 + }, + { + "epoch": 1.473541259765625e-05, + "model_forward_time": 0.024876117706298828, + "step": 9657 + }, + { + "epoch": 1.473541259765625e-05, + "step": 9657, + "training_step_time": 0.11959958076477051 + }, + { + "epoch": 1.47369384765625e-05, + "model_forward_time": 0.02533698081970215, + "step": 9658 + }, + { + "epoch": 1.47369384765625e-05, + "step": 9658, + "training_step_time": 0.11945939064025879 + }, + { + "epoch": 1.473846435546875e-05, + "model_forward_time": 0.025476932525634766, + "step": 9659 + }, + { + "epoch": 1.473846435546875e-05, + "step": 9659, + "training_step_time": 0.12511682510375977 + }, + { + "epoch": 1.4739990234375e-05, + "grad_norm": 0.4794352948665619, + "learning_rate": 8.110056391894005e-05, + "loss": 0.0303, + "step": 9660 + }, + { + "epoch": 1.4739990234375e-05, + "model_forward_time": 0.025388717651367188, + "step": 9660 + }, + { + "epoch": 1.4739990234375e-05, + "step": 9660, + "training_step_time": 0.10523366928100586 + }, + { + "epoch": 1.474151611328125e-05, + "model_forward_time": 0.025391101837158203, + "step": 9661 + }, + { + "epoch": 1.474151611328125e-05, + "step": 9661, + "training_step_time": 0.10731005668640137 + }, + { + "epoch": 1.47430419921875e-05, + "model_forward_time": 0.025214195251464844, + "step": 9662 + }, + { + "epoch": 1.47430419921875e-05, + "step": 9662, + "training_step_time": 0.10710453987121582 + }, + { + "epoch": 1.474456787109375e-05, + "model_forward_time": 0.02506256103515625, + "step": 9663 + }, + { + "epoch": 1.474456787109375e-05, + "step": 9663, + "training_step_time": 0.10680460929870605 + }, + { + "epoch": 1.474609375e-05, + "model_forward_time": 0.026640892028808594, + "step": 9664 + }, + { + "epoch": 1.474609375e-05, + "step": 9664, + "training_step_time": 0.11574983596801758 + }, + { + "epoch": 1.474761962890625e-05, + "model_forward_time": 0.02542591094970703, + "step": 9665 + }, + { + "epoch": 1.474761962890625e-05, + "step": 9665, + "training_step_time": 0.11315250396728516 + }, + { + "epoch": 1.47491455078125e-05, + "model_forward_time": 0.02511906623840332, + "step": 9666 + }, + { + "epoch": 1.47491455078125e-05, + "step": 9666, + "training_step_time": 0.1337742805480957 + }, + { + "epoch": 1.475067138671875e-05, + "model_forward_time": 0.0276944637298584, + "step": 9667 + }, + { + "epoch": 1.475067138671875e-05, + "step": 9667, + "training_step_time": 0.11696386337280273 + }, + { + "epoch": 1.4752197265625e-05, + "model_forward_time": 0.02507948875427246, + "step": 9668 + }, + { + "epoch": 1.4752197265625e-05, + "step": 9668, + "training_step_time": 0.10918450355529785 + }, + { + "epoch": 1.475372314453125e-05, + "model_forward_time": 0.024968624114990234, + "step": 9669 + }, + { + "epoch": 1.475372314453125e-05, + "step": 9669, + "training_step_time": 0.11141586303710938 + }, + { + "epoch": 1.47552490234375e-05, + "grad_norm": 0.5360229015350342, + "learning_rate": 8.105738901391552e-05, + "loss": 0.0403, + "step": 9670 + }, + { + "epoch": 1.47552490234375e-05, + "model_forward_time": 0.025995254516601562, + "step": 9670 + }, + { + "epoch": 1.47552490234375e-05, + "step": 9670, + "training_step_time": 0.18514609336853027 + }, + { + "epoch": 1.475677490234375e-05, + "model_forward_time": 0.024682998657226562, + "step": 9671 + }, + { + "epoch": 1.475677490234375e-05, + "step": 9671, + "training_step_time": 0.18868637084960938 + }, + { + "epoch": 1.475830078125e-05, + "model_forward_time": 0.024445533752441406, + "step": 9672 + }, + { + "epoch": 1.475830078125e-05, + "step": 9672, + "training_step_time": 0.21336054801940918 + }, + { + "epoch": 1.475982666015625e-05, + "model_forward_time": 0.025230884552001953, + "step": 9673 + }, + { + "epoch": 1.475982666015625e-05, + "step": 9673, + "training_step_time": 0.2315990924835205 + }, + { + "epoch": 1.47613525390625e-05, + "model_forward_time": 0.024277687072753906, + "step": 9674 + }, + { + "epoch": 1.47613525390625e-05, + "step": 9674, + "training_step_time": 0.22087931632995605 + }, + { + "epoch": 1.476287841796875e-05, + "model_forward_time": 0.0244596004486084, + "step": 9675 + }, + { + "epoch": 1.476287841796875e-05, + "step": 9675, + "training_step_time": 0.16072559356689453 + }, + { + "epoch": 1.4764404296875e-05, + "model_forward_time": 0.024389266967773438, + "step": 9676 + }, + { + "epoch": 1.4764404296875e-05, + "step": 9676, + "training_step_time": 0.13886690139770508 + }, + { + "epoch": 1.476593017578125e-05, + "model_forward_time": 0.024599552154541016, + "step": 9677 + }, + { + "epoch": 1.476593017578125e-05, + "step": 9677, + "training_step_time": 0.13137102127075195 + }, + { + "epoch": 1.47674560546875e-05, + "model_forward_time": 0.024481534957885742, + "step": 9678 + }, + { + "epoch": 1.47674560546875e-05, + "step": 9678, + "training_step_time": 0.12673735618591309 + }, + { + "epoch": 1.476898193359375e-05, + "model_forward_time": 0.024860143661499023, + "step": 9679 + }, + { + "epoch": 1.476898193359375e-05, + "step": 9679, + "training_step_time": 0.1255016326904297 + }, + { + "epoch": 1.47705078125e-05, + "grad_norm": 0.46959853172302246, + "learning_rate": 8.101417637123484e-05, + "loss": 0.0251, + "step": 9680 + }, + { + "epoch": 1.47705078125e-05, + "model_forward_time": 0.02493429183959961, + "step": 9680 + }, + { + "epoch": 1.47705078125e-05, + "step": 9680, + "training_step_time": 0.1185448169708252 + }, + { + "epoch": 1.477203369140625e-05, + "model_forward_time": 0.025189638137817383, + "step": 9681 + }, + { + "epoch": 1.477203369140625e-05, + "step": 9681, + "training_step_time": 0.11478567123413086 + }, + { + "epoch": 1.47735595703125e-05, + "model_forward_time": 0.024971485137939453, + "step": 9682 + }, + { + "epoch": 1.47735595703125e-05, + "step": 9682, + "training_step_time": 0.11745119094848633 + }, + { + "epoch": 1.477508544921875e-05, + "model_forward_time": 0.025211811065673828, + "step": 9683 + }, + { + "epoch": 1.477508544921875e-05, + "step": 9683, + "training_step_time": 0.11037755012512207 + }, + { + "epoch": 1.4776611328125e-05, + "model_forward_time": 0.024907350540161133, + "step": 9684 + }, + { + "epoch": 1.4776611328125e-05, + "step": 9684, + "training_step_time": 0.1110231876373291 + }, + { + "epoch": 1.477813720703125e-05, + "model_forward_time": 0.02500605583190918, + "step": 9685 + }, + { + "epoch": 1.477813720703125e-05, + "step": 9685, + "training_step_time": 0.11049795150756836 + }, + { + "epoch": 1.47796630859375e-05, + "model_forward_time": 0.025127649307250977, + "step": 9686 + }, + { + "epoch": 1.47796630859375e-05, + "step": 9686, + "training_step_time": 0.11315226554870605 + }, + { + "epoch": 1.478118896484375e-05, + "model_forward_time": 0.025789260864257812, + "step": 9687 + }, + { + "epoch": 1.478118896484375e-05, + "step": 9687, + "training_step_time": 0.11287832260131836 + }, + { + "epoch": 1.478271484375e-05, + "model_forward_time": 0.023795127868652344, + "step": 9688 + }, + { + "epoch": 1.478271484375e-05, + "step": 9688, + "training_step_time": 0.11110544204711914 + }, + { + "epoch": 1.478424072265625e-05, + "model_forward_time": 0.025890588760375977, + "step": 9689 + }, + { + "epoch": 1.478424072265625e-05, + "step": 9689, + "training_step_time": 0.11648321151733398 + }, + { + "epoch": 1.47857666015625e-05, + "grad_norm": 0.3298199772834778, + "learning_rate": 8.097092604340542e-05, + "loss": 0.0286, + "step": 9690 + }, + { + "epoch": 1.47857666015625e-05, + "model_forward_time": 0.025205612182617188, + "step": 9690 + }, + { + "epoch": 1.47857666015625e-05, + "step": 9690, + "training_step_time": 0.10801434516906738 + }, + { + "epoch": 1.478729248046875e-05, + "model_forward_time": 0.02547597885131836, + "step": 9691 + }, + { + "epoch": 1.478729248046875e-05, + "step": 9691, + "training_step_time": 0.11466455459594727 + }, + { + "epoch": 1.4788818359375e-05, + "model_forward_time": 0.02488875389099121, + "step": 9692 + }, + { + "epoch": 1.4788818359375e-05, + "step": 9692, + "training_step_time": 0.10933732986450195 + }, + { + "epoch": 1.479034423828125e-05, + "model_forward_time": 0.02526116371154785, + "step": 9693 + }, + { + "epoch": 1.479034423828125e-05, + "step": 9693, + "training_step_time": 0.11175751686096191 + }, + { + "epoch": 1.47918701171875e-05, + "model_forward_time": 0.025300025939941406, + "step": 9694 + }, + { + "epoch": 1.47918701171875e-05, + "step": 9694, + "training_step_time": 0.11196041107177734 + }, + { + "epoch": 1.479339599609375e-05, + "model_forward_time": 0.02659440040588379, + "step": 9695 + }, + { + "epoch": 1.479339599609375e-05, + "step": 9695, + "training_step_time": 0.10761713981628418 + }, + { + "epoch": 1.4794921875e-05, + "model_forward_time": 0.025219440460205078, + "step": 9696 + }, + { + "epoch": 1.4794921875e-05, + "step": 9696, + "training_step_time": 0.10684776306152344 + }, + { + "epoch": 1.479644775390625e-05, + "model_forward_time": 0.025108814239501953, + "step": 9697 + }, + { + "epoch": 1.479644775390625e-05, + "step": 9697, + "training_step_time": 0.10663199424743652 + }, + { + "epoch": 1.47979736328125e-05, + "model_forward_time": 0.024571895599365234, + "step": 9698 + }, + { + "epoch": 1.47979736328125e-05, + "step": 9698, + "training_step_time": 0.11971378326416016 + }, + { + "epoch": 1.479949951171875e-05, + "model_forward_time": 0.024518489837646484, + "step": 9699 + }, + { + "epoch": 1.479949951171875e-05, + "step": 9699, + "training_step_time": 0.12796521186828613 + }, + { + "epoch": 1.4801025390625e-05, + "grad_norm": 0.41840147972106934, + "learning_rate": 8.092763808298048e-05, + "loss": 0.0246, + "step": 9700 + }, + { + "epoch": 1.4801025390625e-05, + "model_forward_time": 0.027907371520996094, + "step": 9700 + }, + { + "epoch": 1.4801025390625e-05, + "step": 9700, + "training_step_time": 0.11349678039550781 + }, + { + "epoch": 1.480255126953125e-05, + "model_forward_time": 0.02542710304260254, + "step": 9701 + }, + { + "epoch": 1.480255126953125e-05, + "step": 9701, + "training_step_time": 0.10866808891296387 + }, + { + "epoch": 1.48040771484375e-05, + "model_forward_time": 0.0252993106842041, + "step": 9702 + }, + { + "epoch": 1.48040771484375e-05, + "step": 9702, + "training_step_time": 0.22054100036621094 + }, + { + "epoch": 1.480560302734375e-05, + "model_forward_time": 0.02471184730529785, + "step": 9703 + }, + { + "epoch": 1.480560302734375e-05, + "step": 9703, + "training_step_time": 0.11343932151794434 + }, + { + "epoch": 1.480712890625e-05, + "model_forward_time": 0.024181127548217773, + "step": 9704 + }, + { + "epoch": 1.480712890625e-05, + "step": 9704, + "training_step_time": 0.10850787162780762 + }, + { + "epoch": 1.480865478515625e-05, + "model_forward_time": 0.026212453842163086, + "step": 9705 + }, + { + "epoch": 1.480865478515625e-05, + "step": 9705, + "training_step_time": 0.11151647567749023 + }, + { + "epoch": 1.48101806640625e-05, + "model_forward_time": 0.02525639533996582, + "step": 9706 + }, + { + "epoch": 1.48101806640625e-05, + "step": 9706, + "training_step_time": 0.10954093933105469 + }, + { + "epoch": 1.481170654296875e-05, + "model_forward_time": 0.025243282318115234, + "step": 9707 + }, + { + "epoch": 1.481170654296875e-05, + "step": 9707, + "training_step_time": 0.11170077323913574 + }, + { + "epoch": 1.4813232421875e-05, + "model_forward_time": 0.025267601013183594, + "step": 9708 + }, + { + "epoch": 1.4813232421875e-05, + "step": 9708, + "training_step_time": 0.15646743774414062 + }, + { + "epoch": 1.481475830078125e-05, + "model_forward_time": 0.0247647762298584, + "step": 9709 + }, + { + "epoch": 1.481475830078125e-05, + "step": 9709, + "training_step_time": 0.13778305053710938 + }, + { + "epoch": 1.48162841796875e-05, + "grad_norm": 0.37349286675453186, + "learning_rate": 8.088431254255899e-05, + "loss": 0.0246, + "step": 9710 + }, + { + "epoch": 1.48162841796875e-05, + "model_forward_time": 0.024553775787353516, + "step": 9710 + }, + { + "epoch": 1.48162841796875e-05, + "step": 9710, + "training_step_time": 0.10950875282287598 + }, + { + "epoch": 1.481781005859375e-05, + "model_forward_time": 0.025505542755126953, + "step": 9711 + }, + { + "epoch": 1.481781005859375e-05, + "step": 9711, + "training_step_time": 0.11762428283691406 + }, + { + "epoch": 1.48193359375e-05, + "model_forward_time": 0.02509307861328125, + "step": 9712 + }, + { + "epoch": 1.48193359375e-05, + "step": 9712, + "training_step_time": 0.11009478569030762 + }, + { + "epoch": 1.482086181640625e-05, + "model_forward_time": 0.02518153190612793, + "step": 9713 + }, + { + "epoch": 1.482086181640625e-05, + "step": 9713, + "training_step_time": 0.11281967163085938 + }, + { + "epoch": 1.48223876953125e-05, + "model_forward_time": 0.025235891342163086, + "step": 9714 + }, + { + "epoch": 1.48223876953125e-05, + "step": 9714, + "training_step_time": 0.19005632400512695 + }, + { + "epoch": 1.482391357421875e-05, + "model_forward_time": 0.02453756332397461, + "step": 9715 + }, + { + "epoch": 1.482391357421875e-05, + "step": 9715, + "training_step_time": 0.14233946800231934 + }, + { + "epoch": 1.4825439453125e-05, + "model_forward_time": 0.02457141876220703, + "step": 9716 + }, + { + "epoch": 1.4825439453125e-05, + "step": 9716, + "training_step_time": 0.1924452781677246 + }, + { + "epoch": 1.482696533203125e-05, + "model_forward_time": 0.024697065353393555, + "step": 9717 + }, + { + "epoch": 1.482696533203125e-05, + "step": 9717, + "training_step_time": 0.19617819786071777 + }, + { + "epoch": 1.48284912109375e-05, + "model_forward_time": 0.0245821475982666, + "step": 9718 + }, + { + "epoch": 1.48284912109375e-05, + "step": 9718, + "training_step_time": 0.1259009838104248 + }, + { + "epoch": 1.483001708984375e-05, + "model_forward_time": 0.024280786514282227, + "step": 9719 + }, + { + "epoch": 1.483001708984375e-05, + "step": 9719, + "training_step_time": 0.10862994194030762 + }, + { + "epoch": 1.483154296875e-05, + "grad_norm": 0.4185222089290619, + "learning_rate": 8.084094947478556e-05, + "loss": 0.0314, + "step": 9720 + }, + { + "epoch": 1.483154296875e-05, + "model_forward_time": 0.025447368621826172, + "step": 9720 + }, + { + "epoch": 1.483154296875e-05, + "step": 9720, + "training_step_time": 0.10726070404052734 + }, + { + "epoch": 1.483306884765625e-05, + "model_forward_time": 0.02860116958618164, + "step": 9721 + }, + { + "epoch": 1.483306884765625e-05, + "step": 9721, + "training_step_time": 0.11400175094604492 + }, + { + "epoch": 1.48345947265625e-05, + "model_forward_time": 0.0255279541015625, + "step": 9722 + }, + { + "epoch": 1.48345947265625e-05, + "step": 9722, + "training_step_time": 0.10703349113464355 + }, + { + "epoch": 1.483612060546875e-05, + "model_forward_time": 0.02534770965576172, + "step": 9723 + }, + { + "epoch": 1.483612060546875e-05, + "step": 9723, + "training_step_time": 0.10702657699584961 + }, + { + "epoch": 1.4837646484375e-05, + "model_forward_time": 0.025156497955322266, + "step": 9724 + }, + { + "epoch": 1.4837646484375e-05, + "step": 9724, + "training_step_time": 0.10672497749328613 + }, + { + "epoch": 1.483917236328125e-05, + "model_forward_time": 0.02500605583190918, + "step": 9725 + }, + { + "epoch": 1.483917236328125e-05, + "step": 9725, + "training_step_time": 0.10639238357543945 + }, + { + "epoch": 1.48406982421875e-05, + "model_forward_time": 0.02504730224609375, + "step": 9726 + }, + { + "epoch": 1.48406982421875e-05, + "step": 9726, + "training_step_time": 0.10973453521728516 + }, + { + "epoch": 1.484222412109375e-05, + "model_forward_time": 0.02534317970275879, + "step": 9727 + }, + { + "epoch": 1.484222412109375e-05, + "step": 9727, + "training_step_time": 0.10865592956542969 + }, + { + "epoch": 1.484375e-05, + "model_forward_time": 0.02507638931274414, + "step": 9728 + }, + { + "epoch": 1.484375e-05, + "step": 9728, + "training_step_time": 0.11304926872253418 + }, + { + "epoch": 1.484527587890625e-05, + "model_forward_time": 0.02521800994873047, + "step": 9729 + }, + { + "epoch": 1.484527587890625e-05, + "step": 9729, + "training_step_time": 0.11442923545837402 + }, + { + "epoch": 1.48468017578125e-05, + "grad_norm": 0.499977171421051, + "learning_rate": 8.07975489323504e-05, + "loss": 0.0416, + "step": 9730 + }, + { + "epoch": 1.48468017578125e-05, + "model_forward_time": 0.024951934814453125, + "step": 9730 + }, + { + "epoch": 1.48468017578125e-05, + "step": 9730, + "training_step_time": 0.11084127426147461 + }, + { + "epoch": 1.484832763671875e-05, + "model_forward_time": 0.02566838264465332, + "step": 9731 + }, + { + "epoch": 1.484832763671875e-05, + "step": 9731, + "training_step_time": 0.11111211776733398 + }, + { + "epoch": 1.4849853515625e-05, + "model_forward_time": 0.02501225471496582, + "step": 9732 + }, + { + "epoch": 1.4849853515625e-05, + "step": 9732, + "training_step_time": 0.11335420608520508 + }, + { + "epoch": 1.485137939453125e-05, + "model_forward_time": 0.02534961700439453, + "step": 9733 + }, + { + "epoch": 1.485137939453125e-05, + "step": 9733, + "training_step_time": 0.10802817344665527 + }, + { + "epoch": 1.48529052734375e-05, + "model_forward_time": 0.024985313415527344, + "step": 9734 + }, + { + "epoch": 1.48529052734375e-05, + "step": 9734, + "training_step_time": 0.10816645622253418 + }, + { + "epoch": 1.485443115234375e-05, + "model_forward_time": 0.02543950080871582, + "step": 9735 + }, + { + "epoch": 1.485443115234375e-05, + "step": 9735, + "training_step_time": 0.11236882209777832 + }, + { + "epoch": 1.485595703125e-05, + "model_forward_time": 0.025061368942260742, + "step": 9736 + }, + { + "epoch": 1.485595703125e-05, + "step": 9736, + "training_step_time": 0.10949158668518066 + }, + { + "epoch": 1.485748291015625e-05, + "model_forward_time": 0.025288820266723633, + "step": 9737 + }, + { + "epoch": 1.485748291015625e-05, + "step": 9737, + "training_step_time": 0.1098475456237793 + }, + { + "epoch": 1.48590087890625e-05, + "model_forward_time": 0.02564835548400879, + "step": 9738 + }, + { + "epoch": 1.48590087890625e-05, + "step": 9738, + "training_step_time": 0.11377549171447754 + }, + { + "epoch": 1.486053466796875e-05, + "model_forward_time": 0.02519392967224121, + "step": 9739 + }, + { + "epoch": 1.486053466796875e-05, + "step": 9739, + "training_step_time": 0.10792064666748047 + }, + { + "epoch": 1.4862060546875e-05, + "grad_norm": 0.9022095203399658, + "learning_rate": 8.075411096798928e-05, + "loss": 0.0329, + "step": 9740 + }, + { + "epoch": 1.4862060546875e-05, + "model_forward_time": 0.024444103240966797, + "step": 9740 + }, + { + "epoch": 1.4862060546875e-05, + "step": 9740, + "training_step_time": 0.10895538330078125 + }, + { + "epoch": 1.486358642578125e-05, + "model_forward_time": 0.025072097778320312, + "step": 9741 + }, + { + "epoch": 1.486358642578125e-05, + "step": 9741, + "training_step_time": 0.1060342788696289 + }, + { + "epoch": 1.48651123046875e-05, + "model_forward_time": 0.02538323402404785, + "step": 9742 + }, + { + "epoch": 1.48651123046875e-05, + "step": 9742, + "training_step_time": 0.10661435127258301 + }, + { + "epoch": 1.486663818359375e-05, + "model_forward_time": 0.025185346603393555, + "step": 9743 + }, + { + "epoch": 1.486663818359375e-05, + "step": 9743, + "training_step_time": 0.10816645622253418 + }, + { + "epoch": 1.48681640625e-05, + "model_forward_time": 0.025403261184692383, + "step": 9744 + }, + { + "epoch": 1.48681640625e-05, + "step": 9744, + "training_step_time": 0.10636615753173828 + }, + { + "epoch": 1.486968994140625e-05, + "model_forward_time": 0.025394678115844727, + "step": 9745 + }, + { + "epoch": 1.486968994140625e-05, + "step": 9745, + "training_step_time": 0.1062932014465332 + }, + { + "epoch": 1.48712158203125e-05, + "model_forward_time": 0.02556467056274414, + "step": 9746 + }, + { + "epoch": 1.48712158203125e-05, + "step": 9746, + "training_step_time": 0.11341190338134766 + }, + { + "epoch": 1.487274169921875e-05, + "model_forward_time": 0.025713682174682617, + "step": 9747 + }, + { + "epoch": 1.487274169921875e-05, + "step": 9747, + "training_step_time": 0.11193251609802246 + }, + { + "epoch": 1.4874267578125e-05, + "model_forward_time": 0.02573561668395996, + "step": 9748 + }, + { + "epoch": 1.4874267578125e-05, + "step": 9748, + "training_step_time": 0.10944962501525879 + }, + { + "epoch": 1.487579345703125e-05, + "model_forward_time": 0.025519847869873047, + "step": 9749 + }, + { + "epoch": 1.487579345703125e-05, + "step": 9749, + "training_step_time": 0.10676288604736328 + }, + { + "epoch": 1.48773193359375e-05, + "grad_norm": 0.564054012298584, + "learning_rate": 8.07106356344834e-05, + "loss": 0.0277, + "step": 9750 + }, + { + "epoch": 1.48773193359375e-05, + "model_forward_time": 0.025513172149658203, + "step": 9750 + }, + { + "epoch": 1.48773193359375e-05, + "step": 9750, + "training_step_time": 0.12445688247680664 + }, + { + "epoch": 1.487884521484375e-05, + "model_forward_time": 0.02587270736694336, + "step": 9751 + }, + { + "epoch": 1.487884521484375e-05, + "step": 9751, + "training_step_time": 0.10859227180480957 + }, + { + "epoch": 1.488037109375e-05, + "model_forward_time": 0.02566218376159668, + "step": 9752 + }, + { + "epoch": 1.488037109375e-05, + "step": 9752, + "training_step_time": 0.10823416709899902 + }, + { + "epoch": 1.488189697265625e-05, + "model_forward_time": 0.02594304084777832, + "step": 9753 + }, + { + "epoch": 1.488189697265625e-05, + "step": 9753, + "training_step_time": 0.10813021659851074 + }, + { + "epoch": 1.48834228515625e-05, + "model_forward_time": 0.026610374450683594, + "step": 9754 + }, + { + "epoch": 1.48834228515625e-05, + "step": 9754, + "training_step_time": 0.11319208145141602 + }, + { + "epoch": 1.488494873046875e-05, + "model_forward_time": 0.025405406951904297, + "step": 9755 + }, + { + "epoch": 1.488494873046875e-05, + "step": 9755, + "training_step_time": 0.10914015769958496 + }, + { + "epoch": 1.4886474609375e-05, + "model_forward_time": 0.02536487579345703, + "step": 9756 + }, + { + "epoch": 1.4886474609375e-05, + "step": 9756, + "training_step_time": 0.16846585273742676 + }, + { + "epoch": 1.488800048828125e-05, + "model_forward_time": 0.02488994598388672, + "step": 9757 + }, + { + "epoch": 1.488800048828125e-05, + "step": 9757, + "training_step_time": 0.12117958068847656 + }, + { + "epoch": 1.48895263671875e-05, + "model_forward_time": 0.024817943572998047, + "step": 9758 + }, + { + "epoch": 1.48895263671875e-05, + "step": 9758, + "training_step_time": 0.10559487342834473 + }, + { + "epoch": 1.489105224609375e-05, + "model_forward_time": 0.02567577362060547, + "step": 9759 + }, + { + "epoch": 1.489105224609375e-05, + "step": 9759, + "training_step_time": 0.12911105155944824 + }, + { + "epoch": 1.4892578125e-05, + "grad_norm": 0.4195360541343689, + "learning_rate": 8.06671229846594e-05, + "loss": 0.033, + "step": 9760 + }, + { + "epoch": 1.4892578125e-05, + "model_forward_time": 0.02538752555847168, + "step": 9760 + }, + { + "epoch": 1.4892578125e-05, + "step": 9760, + "training_step_time": 0.10829472541809082 + }, + { + "epoch": 1.489410400390625e-05, + "model_forward_time": 0.025613784790039062, + "step": 9761 + }, + { + "epoch": 1.489410400390625e-05, + "step": 9761, + "training_step_time": 0.22438907623291016 + }, + { + "epoch": 1.48956298828125e-05, + "model_forward_time": 0.02418231964111328, + "step": 9762 + }, + { + "epoch": 1.48956298828125e-05, + "step": 9762, + "training_step_time": 0.17682647705078125 + }, + { + "epoch": 1.489715576171875e-05, + "model_forward_time": 0.024318456649780273, + "step": 9763 + }, + { + "epoch": 1.489715576171875e-05, + "step": 9763, + "training_step_time": 0.16802334785461426 + }, + { + "epoch": 1.4898681640625e-05, + "model_forward_time": 0.024797916412353516, + "step": 9764 + }, + { + "epoch": 1.4898681640625e-05, + "step": 9764, + "training_step_time": 0.13013696670532227 + }, + { + "epoch": 1.490020751953125e-05, + "model_forward_time": 0.024611949920654297, + "step": 9765 + }, + { + "epoch": 1.490020751953125e-05, + "step": 9765, + "training_step_time": 0.1895887851715088 + }, + { + "epoch": 1.49017333984375e-05, + "model_forward_time": 0.02512359619140625, + "step": 9766 + }, + { + "epoch": 1.49017333984375e-05, + "step": 9766, + "training_step_time": 0.11816024780273438 + }, + { + "epoch": 1.490325927734375e-05, + "model_forward_time": 0.02516770362854004, + "step": 9767 + }, + { + "epoch": 1.490325927734375e-05, + "step": 9767, + "training_step_time": 0.10843205451965332 + }, + { + "epoch": 1.490478515625e-05, + "model_forward_time": 0.026070833206176758, + "step": 9768 + }, + { + "epoch": 1.490478515625e-05, + "step": 9768, + "training_step_time": 0.10669851303100586 + }, + { + "epoch": 1.490631103515625e-05, + "model_forward_time": 0.025387287139892578, + "step": 9769 + }, + { + "epoch": 1.490631103515625e-05, + "step": 9769, + "training_step_time": 0.10902237892150879 + }, + { + "epoch": 1.49078369140625e-05, + "grad_norm": 0.4531439542770386, + "learning_rate": 8.062357307138926e-05, + "loss": 0.0321, + "step": 9770 + }, + { + "epoch": 1.49078369140625e-05, + "model_forward_time": 0.025746583938598633, + "step": 9770 + }, + { + "epoch": 1.49078369140625e-05, + "step": 9770, + "training_step_time": 0.10598921775817871 + }, + { + "epoch": 1.490936279296875e-05, + "model_forward_time": 0.02561497688293457, + "step": 9771 + }, + { + "epoch": 1.490936279296875e-05, + "step": 9771, + "training_step_time": 0.10606884956359863 + }, + { + "epoch": 1.4910888671875e-05, + "model_forward_time": 0.025025606155395508, + "step": 9772 + }, + { + "epoch": 1.4910888671875e-05, + "step": 9772, + "training_step_time": 0.10648107528686523 + }, + { + "epoch": 1.491241455078125e-05, + "model_forward_time": 0.025859355926513672, + "step": 9773 + }, + { + "epoch": 1.491241455078125e-05, + "step": 9773, + "training_step_time": 0.1109614372253418 + }, + { + "epoch": 1.49139404296875e-05, + "model_forward_time": 0.025506019592285156, + "step": 9774 + }, + { + "epoch": 1.49139404296875e-05, + "step": 9774, + "training_step_time": 0.10644173622131348 + }, + { + "epoch": 1.491546630859375e-05, + "model_forward_time": 0.024400949478149414, + "step": 9775 + }, + { + "epoch": 1.491546630859375e-05, + "step": 9775, + "training_step_time": 0.10740447044372559 + }, + { + "epoch": 1.49169921875e-05, + "model_forward_time": 0.024697542190551758, + "step": 9776 + }, + { + "epoch": 1.49169921875e-05, + "step": 9776, + "training_step_time": 0.10973525047302246 + }, + { + "epoch": 1.491851806640625e-05, + "model_forward_time": 0.025323867797851562, + "step": 9777 + }, + { + "epoch": 1.491851806640625e-05, + "step": 9777, + "training_step_time": 0.10542726516723633 + }, + { + "epoch": 1.49200439453125e-05, + "model_forward_time": 0.02647089958190918, + "step": 9778 + }, + { + "epoch": 1.49200439453125e-05, + "step": 9778, + "training_step_time": 0.10923576354980469 + }, + { + "epoch": 1.492156982421875e-05, + "model_forward_time": 0.02543354034423828, + "step": 9779 + }, + { + "epoch": 1.492156982421875e-05, + "step": 9779, + "training_step_time": 0.1098027229309082 + }, + { + "epoch": 1.4923095703125e-05, + "grad_norm": 0.4400675594806671, + "learning_rate": 8.057998594759022e-05, + "loss": 0.0327, + "step": 9780 + }, + { + "epoch": 1.4923095703125e-05, + "model_forward_time": 0.025960683822631836, + "step": 9780 + }, + { + "epoch": 1.4923095703125e-05, + "step": 9780, + "training_step_time": 0.11166071891784668 + }, + { + "epoch": 1.492462158203125e-05, + "model_forward_time": 0.026118755340576172, + "step": 9781 + }, + { + "epoch": 1.492462158203125e-05, + "step": 9781, + "training_step_time": 0.11455893516540527 + }, + { + "epoch": 1.49261474609375e-05, + "model_forward_time": 0.02448105812072754, + "step": 9782 + }, + { + "epoch": 1.49261474609375e-05, + "step": 9782, + "training_step_time": 0.11598944664001465 + }, + { + "epoch": 1.492767333984375e-05, + "model_forward_time": 0.024310588836669922, + "step": 9783 + }, + { + "epoch": 1.492767333984375e-05, + "step": 9783, + "training_step_time": 0.11236286163330078 + }, + { + "epoch": 1.492919921875e-05, + "model_forward_time": 0.02670598030090332, + "step": 9784 + }, + { + "epoch": 1.492919921875e-05, + "step": 9784, + "training_step_time": 0.11348271369934082 + }, + { + "epoch": 1.493072509765625e-05, + "model_forward_time": 0.024506568908691406, + "step": 9785 + }, + { + "epoch": 1.493072509765625e-05, + "step": 9785, + "training_step_time": 0.10960721969604492 + }, + { + "epoch": 1.49322509765625e-05, + "model_forward_time": 0.0262601375579834, + "step": 9786 + }, + { + "epoch": 1.49322509765625e-05, + "step": 9786, + "training_step_time": 0.10893511772155762 + }, + { + "epoch": 1.493377685546875e-05, + "model_forward_time": 0.025414228439331055, + "step": 9787 + }, + { + "epoch": 1.493377685546875e-05, + "step": 9787, + "training_step_time": 0.11485648155212402 + }, + { + "epoch": 1.4935302734375e-05, + "model_forward_time": 0.025536537170410156, + "step": 9788 + }, + { + "epoch": 1.4935302734375e-05, + "step": 9788, + "training_step_time": 0.11267232894897461 + }, + { + "epoch": 1.493682861328125e-05, + "model_forward_time": 0.02567768096923828, + "step": 9789 + }, + { + "epoch": 1.493682861328125e-05, + "step": 9789, + "training_step_time": 0.1086418628692627 + }, + { + "epoch": 1.49383544921875e-05, + "grad_norm": 0.3012600839138031, + "learning_rate": 8.053636166622476e-05, + "loss": 0.0253, + "step": 9790 + }, + { + "epoch": 1.49383544921875e-05, + "model_forward_time": 0.025525331497192383, + "step": 9790 + }, + { + "epoch": 1.49383544921875e-05, + "step": 9790, + "training_step_time": 0.10766363143920898 + }, + { + "epoch": 1.493988037109375e-05, + "model_forward_time": 0.02569866180419922, + "step": 9791 + }, + { + "epoch": 1.493988037109375e-05, + "step": 9791, + "training_step_time": 0.10968136787414551 + }, + { + "epoch": 1.494140625e-05, + "model_forward_time": 0.024981975555419922, + "step": 9792 + }, + { + "epoch": 1.494140625e-05, + "step": 9792, + "training_step_time": 0.11271810531616211 + }, + { + "epoch": 1.494293212890625e-05, + "model_forward_time": 0.02527451515197754, + "step": 9793 + }, + { + "epoch": 1.494293212890625e-05, + "step": 9793, + "training_step_time": 0.10753035545349121 + }, + { + "epoch": 1.49444580078125e-05, + "model_forward_time": 0.02565908432006836, + "step": 9794 + }, + { + "epoch": 1.49444580078125e-05, + "step": 9794, + "training_step_time": 0.11359643936157227 + }, + { + "epoch": 1.494598388671875e-05, + "model_forward_time": 0.026448965072631836, + "step": 9795 + }, + { + "epoch": 1.494598388671875e-05, + "step": 9795, + "training_step_time": 0.10906648635864258 + }, + { + "epoch": 1.4947509765625e-05, + "model_forward_time": 0.025510311126708984, + "step": 9796 + }, + { + "epoch": 1.4947509765625e-05, + "step": 9796, + "training_step_time": 0.2145400047302246 + }, + { + "epoch": 1.494903564453125e-05, + "model_forward_time": 0.024912357330322266, + "step": 9797 + }, + { + "epoch": 1.494903564453125e-05, + "step": 9797, + "training_step_time": 0.12426090240478516 + }, + { + "epoch": 1.49505615234375e-05, + "model_forward_time": 0.02521538734436035, + "step": 9798 + }, + { + "epoch": 1.49505615234375e-05, + "step": 9798, + "training_step_time": 0.1052711009979248 + }, + { + "epoch": 1.495208740234375e-05, + "model_forward_time": 0.025508403778076172, + "step": 9799 + }, + { + "epoch": 1.495208740234375e-05, + "step": 9799, + "training_step_time": 0.10692262649536133 + }, + { + "epoch": 1.495361328125e-05, + "grad_norm": 0.3177136480808258, + "learning_rate": 8.049270028030046e-05, + "loss": 0.0247, + "step": 9800 + }, + { + "epoch": 1.495361328125e-05, + "model_forward_time": 0.025712251663208008, + "step": 9800 + }, + { + "epoch": 1.495361328125e-05, + "step": 9800, + "training_step_time": 0.1069943904876709 + }, + { + "epoch": 1.495513916015625e-05, + "model_forward_time": 0.025146007537841797, + "step": 9801 + }, + { + "epoch": 1.495513916015625e-05, + "step": 9801, + "training_step_time": 0.11007094383239746 + }, + { + "epoch": 1.49566650390625e-05, + "model_forward_time": 0.025117874145507812, + "step": 9802 + }, + { + "epoch": 1.49566650390625e-05, + "step": 9802, + "training_step_time": 0.1951737403869629 + }, + { + "epoch": 1.495819091796875e-05, + "model_forward_time": 0.024478435516357422, + "step": 9803 + }, + { + "epoch": 1.495819091796875e-05, + "step": 9803, + "training_step_time": 0.14208459854125977 + }, + { + "epoch": 1.4959716796875e-05, + "model_forward_time": 0.024687767028808594, + "step": 9804 + }, + { + "epoch": 1.4959716796875e-05, + "step": 9804, + "training_step_time": 0.11280369758605957 + }, + { + "epoch": 1.496124267578125e-05, + "model_forward_time": 0.02464151382446289, + "step": 9805 + }, + { + "epoch": 1.496124267578125e-05, + "step": 9805, + "training_step_time": 0.1066131591796875 + }, + { + "epoch": 1.49627685546875e-05, + "model_forward_time": 0.02573990821838379, + "step": 9806 + }, + { + "epoch": 1.49627685546875e-05, + "step": 9806, + "training_step_time": 0.12056183815002441 + }, + { + "epoch": 1.496429443359375e-05, + "model_forward_time": 0.02540874481201172, + "step": 9807 + }, + { + "epoch": 1.496429443359375e-05, + "step": 9807, + "training_step_time": 0.20915937423706055 + }, + { + "epoch": 1.49658203125e-05, + "model_forward_time": 0.025008440017700195, + "step": 9808 + }, + { + "epoch": 1.49658203125e-05, + "step": 9808, + "training_step_time": 0.1241757869720459 + }, + { + "epoch": 1.496734619140625e-05, + "model_forward_time": 0.024881601333618164, + "step": 9809 + }, + { + "epoch": 1.496734619140625e-05, + "step": 9809, + "training_step_time": 0.21195316314697266 + }, + { + "epoch": 1.49688720703125e-05, + "grad_norm": 0.40076327323913574, + "learning_rate": 8.044900184287007e-05, + "loss": 0.0336, + "step": 9810 + }, + { + "epoch": 1.49688720703125e-05, + "model_forward_time": 0.02485489845275879, + "step": 9810 + }, + { + "epoch": 1.49688720703125e-05, + "step": 9810, + "training_step_time": 0.18085384368896484 + }, + { + "epoch": 1.497039794921875e-05, + "model_forward_time": 0.024980783462524414, + "step": 9811 + }, + { + "epoch": 1.497039794921875e-05, + "step": 9811, + "training_step_time": 0.17284893989562988 + }, + { + "epoch": 1.4971923828125e-05, + "model_forward_time": 0.025893449783325195, + "step": 9812 + }, + { + "epoch": 1.4971923828125e-05, + "step": 9812, + "training_step_time": 0.11108875274658203 + }, + { + "epoch": 1.497344970703125e-05, + "model_forward_time": 0.02360248565673828, + "step": 9813 + }, + { + "epoch": 1.497344970703125e-05, + "step": 9813, + "training_step_time": 0.10728788375854492 + }, + { + "epoch": 1.49749755859375e-05, + "model_forward_time": 0.025328397750854492, + "step": 9814 + }, + { + "epoch": 1.49749755859375e-05, + "step": 9814, + "training_step_time": 0.10765552520751953 + }, + { + "epoch": 1.497650146484375e-05, + "model_forward_time": 0.02597332000732422, + "step": 9815 + }, + { + "epoch": 1.497650146484375e-05, + "step": 9815, + "training_step_time": 0.10791397094726562 + }, + { + "epoch": 1.497802734375e-05, + "model_forward_time": 0.025432586669921875, + "step": 9816 + }, + { + "epoch": 1.497802734375e-05, + "step": 9816, + "training_step_time": 0.10934948921203613 + }, + { + "epoch": 1.497955322265625e-05, + "model_forward_time": 0.02537822723388672, + "step": 9817 + }, + { + "epoch": 1.497955322265625e-05, + "step": 9817, + "training_step_time": 0.11173439025878906 + }, + { + "epoch": 1.49810791015625e-05, + "model_forward_time": 0.025218963623046875, + "step": 9818 + }, + { + "epoch": 1.49810791015625e-05, + "step": 9818, + "training_step_time": 0.11391496658325195 + }, + { + "epoch": 1.498260498046875e-05, + "model_forward_time": 0.02529430389404297, + "step": 9819 + }, + { + "epoch": 1.498260498046875e-05, + "step": 9819, + "training_step_time": 0.11739540100097656 + }, + { + "epoch": 1.4984130859375e-05, + "grad_norm": 0.4028327465057373, + "learning_rate": 8.040526640703128e-05, + "loss": 0.0331, + "step": 9820 + }, + { + "epoch": 1.4984130859375e-05, + "model_forward_time": 0.025072097778320312, + "step": 9820 + }, + { + "epoch": 1.4984130859375e-05, + "step": 9820, + "training_step_time": 0.11196613311767578 + }, + { + "epoch": 1.498565673828125e-05, + "model_forward_time": 0.025506258010864258, + "step": 9821 + }, + { + "epoch": 1.498565673828125e-05, + "step": 9821, + "training_step_time": 0.11948871612548828 + }, + { + "epoch": 1.49871826171875e-05, + "model_forward_time": 0.024015188217163086, + "step": 9822 + }, + { + "epoch": 1.49871826171875e-05, + "step": 9822, + "training_step_time": 0.11364483833312988 + }, + { + "epoch": 1.498870849609375e-05, + "model_forward_time": 0.024483442306518555, + "step": 9823 + }, + { + "epoch": 1.498870849609375e-05, + "step": 9823, + "training_step_time": 0.1119391918182373 + }, + { + "epoch": 1.4990234375e-05, + "model_forward_time": 0.02518749237060547, + "step": 9824 + }, + { + "epoch": 1.4990234375e-05, + "step": 9824, + "training_step_time": 0.10871124267578125 + }, + { + "epoch": 1.499176025390625e-05, + "model_forward_time": 0.02550530433654785, + "step": 9825 + }, + { + "epoch": 1.499176025390625e-05, + "step": 9825, + "training_step_time": 0.1175835132598877 + }, + { + "epoch": 1.49932861328125e-05, + "model_forward_time": 0.02519702911376953, + "step": 9826 + }, + { + "epoch": 1.49932861328125e-05, + "step": 9826, + "training_step_time": 0.10766935348510742 + }, + { + "epoch": 1.499481201171875e-05, + "model_forward_time": 0.025609493255615234, + "step": 9827 + }, + { + "epoch": 1.499481201171875e-05, + "step": 9827, + "training_step_time": 0.10742592811584473 + }, + { + "epoch": 1.4996337890625e-05, + "model_forward_time": 0.025310516357421875, + "step": 9828 + }, + { + "epoch": 1.4996337890625e-05, + "step": 9828, + "training_step_time": 0.10732388496398926 + }, + { + "epoch": 1.499786376953125e-05, + "model_forward_time": 0.025436878204345703, + "step": 9829 + }, + { + "epoch": 1.499786376953125e-05, + "step": 9829, + "training_step_time": 0.11087250709533691 + }, + { + "epoch": 1.49993896484375e-05, + "grad_norm": 0.4979908764362335, + "learning_rate": 8.036149402592676e-05, + "loss": 0.0307, + "step": 9830 + }, + { + "epoch": 1.49993896484375e-05, + "model_forward_time": 0.025301694869995117, + "step": 9830 + }, + { + "epoch": 1.49993896484375e-05, + "step": 9830, + "training_step_time": 0.1091160774230957 + }, + { + "epoch": 1.500091552734375e-05, + "model_forward_time": 0.025782108306884766, + "step": 9831 + }, + { + "epoch": 1.500091552734375e-05, + "step": 9831, + "training_step_time": 0.10886120796203613 + }, + { + "epoch": 1.500244140625e-05, + "model_forward_time": 0.02517843246459961, + "step": 9832 + }, + { + "epoch": 1.500244140625e-05, + "step": 9832, + "training_step_time": 0.1075742244720459 + }, + { + "epoch": 1.500396728515625e-05, + "model_forward_time": 0.025171279907226562, + "step": 9833 + }, + { + "epoch": 1.500396728515625e-05, + "step": 9833, + "training_step_time": 0.10749101638793945 + }, + { + "epoch": 1.50054931640625e-05, + "model_forward_time": 0.028049230575561523, + "step": 9834 + }, + { + "epoch": 1.50054931640625e-05, + "step": 9834, + "training_step_time": 0.11225223541259766 + }, + { + "epoch": 1.500701904296875e-05, + "model_forward_time": 0.026309967041015625, + "step": 9835 + }, + { + "epoch": 1.500701904296875e-05, + "step": 9835, + "training_step_time": 0.1086430549621582 + }, + { + "epoch": 1.5008544921875e-05, + "model_forward_time": 0.025262117385864258, + "step": 9836 + }, + { + "epoch": 1.5008544921875e-05, + "step": 9836, + "training_step_time": 0.108184814453125 + }, + { + "epoch": 1.501007080078125e-05, + "model_forward_time": 0.025929689407348633, + "step": 9837 + }, + { + "epoch": 1.501007080078125e-05, + "step": 9837, + "training_step_time": 0.11485576629638672 + }, + { + "epoch": 1.50115966796875e-05, + "model_forward_time": 0.025205373764038086, + "step": 9838 + }, + { + "epoch": 1.50115966796875e-05, + "step": 9838, + "training_step_time": 0.16666173934936523 + }, + { + "epoch": 1.501312255859375e-05, + "model_forward_time": 0.024776697158813477, + "step": 9839 + }, + { + "epoch": 1.501312255859375e-05, + "step": 9839, + "training_step_time": 0.16666126251220703 + }, + { + "epoch": 1.50146484375e-05, + "grad_norm": 0.2715790271759033, + "learning_rate": 8.031768475274413e-05, + "loss": 0.0382, + "step": 9840 + }, + { + "epoch": 1.50146484375e-05, + "model_forward_time": 0.024921417236328125, + "step": 9840 + }, + { + "epoch": 1.50146484375e-05, + "step": 9840, + "training_step_time": 0.1100156307220459 + }, + { + "epoch": 1.501617431640625e-05, + "model_forward_time": 0.024936199188232422, + "step": 9841 + }, + { + "epoch": 1.501617431640625e-05, + "step": 9841, + "training_step_time": 0.21661925315856934 + }, + { + "epoch": 1.50177001953125e-05, + "model_forward_time": 0.02498340606689453, + "step": 9842 + }, + { + "epoch": 1.50177001953125e-05, + "step": 9842, + "training_step_time": 0.11571121215820312 + }, + { + "epoch": 1.501922607421875e-05, + "model_forward_time": 0.026135921478271484, + "step": 9843 + }, + { + "epoch": 1.501922607421875e-05, + "step": 9843, + "training_step_time": 0.10500335693359375 + }, + { + "epoch": 1.5020751953125e-05, + "model_forward_time": 0.02542281150817871, + "step": 9844 + }, + { + "epoch": 1.5020751953125e-05, + "step": 9844, + "training_step_time": 0.10891294479370117 + }, + { + "epoch": 1.502227783203125e-05, + "model_forward_time": 0.02547001838684082, + "step": 9845 + }, + { + "epoch": 1.502227783203125e-05, + "step": 9845, + "training_step_time": 0.10589766502380371 + }, + { + "epoch": 1.50238037109375e-05, + "model_forward_time": 0.025128841400146484, + "step": 9846 + }, + { + "epoch": 1.50238037109375e-05, + "step": 9846, + "training_step_time": 0.10641646385192871 + }, + { + "epoch": 1.502532958984375e-05, + "model_forward_time": 0.024691343307495117, + "step": 9847 + }, + { + "epoch": 1.502532958984375e-05, + "step": 9847, + "training_step_time": 0.1967618465423584 + }, + { + "epoch": 1.502685546875e-05, + "model_forward_time": 0.024727821350097656, + "step": 9848 + }, + { + "epoch": 1.502685546875e-05, + "step": 9848, + "training_step_time": 0.14198613166809082 + }, + { + "epoch": 1.502838134765625e-05, + "model_forward_time": 0.02498149871826172, + "step": 9849 + }, + { + "epoch": 1.502838134765625e-05, + "step": 9849, + "training_step_time": 0.10376548767089844 + }, + { + "epoch": 1.50299072265625e-05, + "grad_norm": 0.35556262731552124, + "learning_rate": 8.027383864071573e-05, + "loss": 0.0358, + "step": 9850 + }, + { + "epoch": 1.50299072265625e-05, + "model_forward_time": 0.025561809539794922, + "step": 9850 + }, + { + "epoch": 1.50299072265625e-05, + "step": 9850, + "training_step_time": 0.11064934730529785 + }, + { + "epoch": 1.503143310546875e-05, + "model_forward_time": 0.025798320770263672, + "step": 9851 + }, + { + "epoch": 1.503143310546875e-05, + "step": 9851, + "training_step_time": 0.11286473274230957 + }, + { + "epoch": 1.5032958984375e-05, + "model_forward_time": 0.025142908096313477, + "step": 9852 + }, + { + "epoch": 1.5032958984375e-05, + "step": 9852, + "training_step_time": 0.10771894454956055 + }, + { + "epoch": 1.503448486328125e-05, + "model_forward_time": 0.02587151527404785, + "step": 9853 + }, + { + "epoch": 1.503448486328125e-05, + "step": 9853, + "training_step_time": 0.1893303394317627 + }, + { + "epoch": 1.50360107421875e-05, + "model_forward_time": 0.02511453628540039, + "step": 9854 + }, + { + "epoch": 1.50360107421875e-05, + "step": 9854, + "training_step_time": 0.20250678062438965 + }, + { + "epoch": 1.503753662109375e-05, + "model_forward_time": 0.025141000747680664, + "step": 9855 + }, + { + "epoch": 1.503753662109375e-05, + "step": 9855, + "training_step_time": 0.1935136318206787 + }, + { + "epoch": 1.50390625e-05, + "model_forward_time": 0.02452254295349121, + "step": 9856 + }, + { + "epoch": 1.50390625e-05, + "step": 9856, + "training_step_time": 0.19210362434387207 + }, + { + "epoch": 1.504058837890625e-05, + "model_forward_time": 0.024485349655151367, + "step": 9857 + }, + { + "epoch": 1.504058837890625e-05, + "step": 9857, + "training_step_time": 0.14561009407043457 + }, + { + "epoch": 1.50421142578125e-05, + "model_forward_time": 0.0250241756439209, + "step": 9858 + }, + { + "epoch": 1.50421142578125e-05, + "step": 9858, + "training_step_time": 0.10615849494934082 + }, + { + "epoch": 1.504364013671875e-05, + "model_forward_time": 0.024925708770751953, + "step": 9859 + }, + { + "epoch": 1.504364013671875e-05, + "step": 9859, + "training_step_time": 0.10599160194396973 + }, + { + "epoch": 1.5045166015625e-05, + "grad_norm": 0.2787579596042633, + "learning_rate": 8.022995574311876e-05, + "loss": 0.0274, + "step": 9860 + }, + { + "epoch": 1.5045166015625e-05, + "model_forward_time": 0.025533676147460938, + "step": 9860 + }, + { + "epoch": 1.5045166015625e-05, + "step": 9860, + "training_step_time": 0.1068272590637207 + }, + { + "epoch": 1.504669189453125e-05, + "model_forward_time": 0.025585651397705078, + "step": 9861 + }, + { + "epoch": 1.504669189453125e-05, + "step": 9861, + "training_step_time": 0.10719132423400879 + }, + { + "epoch": 1.50482177734375e-05, + "model_forward_time": 0.025596141815185547, + "step": 9862 + }, + { + "epoch": 1.50482177734375e-05, + "step": 9862, + "training_step_time": 0.10933804512023926 + }, + { + "epoch": 1.504974365234375e-05, + "model_forward_time": 0.024993181228637695, + "step": 9863 + }, + { + "epoch": 1.504974365234375e-05, + "step": 9863, + "training_step_time": 0.10825157165527344 + }, + { + "epoch": 1.505126953125e-05, + "model_forward_time": 0.025699853897094727, + "step": 9864 + }, + { + "epoch": 1.505126953125e-05, + "step": 9864, + "training_step_time": 0.1085350513458252 + }, + { + "epoch": 1.505279541015625e-05, + "model_forward_time": 0.02520918846130371, + "step": 9865 + }, + { + "epoch": 1.505279541015625e-05, + "step": 9865, + "training_step_time": 0.11288762092590332 + }, + { + "epoch": 1.50543212890625e-05, + "model_forward_time": 0.024970054626464844, + "step": 9866 + }, + { + "epoch": 1.50543212890625e-05, + "step": 9866, + "training_step_time": 0.11149358749389648 + }, + { + "epoch": 1.505584716796875e-05, + "model_forward_time": 0.025483131408691406, + "step": 9867 + }, + { + "epoch": 1.505584716796875e-05, + "step": 9867, + "training_step_time": 0.18761181831359863 + }, + { + "epoch": 1.5057373046875e-05, + "model_forward_time": 0.02482438087463379, + "step": 9868 + }, + { + "epoch": 1.5057373046875e-05, + "step": 9868, + "training_step_time": 0.21346616744995117 + }, + { + "epoch": 1.505889892578125e-05, + "model_forward_time": 0.025168895721435547, + "step": 9869 + }, + { + "epoch": 1.505889892578125e-05, + "step": 9869, + "training_step_time": 0.21142888069152832 + }, + { + "epoch": 1.50604248046875e-05, + "grad_norm": 0.2760957181453705, + "learning_rate": 8.018603611327504e-05, + "loss": 0.0258, + "step": 9870 + }, + { + "epoch": 1.50604248046875e-05, + "model_forward_time": 0.024290800094604492, + "step": 9870 + }, + { + "epoch": 1.50604248046875e-05, + "step": 9870, + "training_step_time": 0.2133183479309082 + }, + { + "epoch": 1.506195068359375e-05, + "model_forward_time": 0.024898767471313477, + "step": 9871 + }, + { + "epoch": 1.506195068359375e-05, + "step": 9871, + "training_step_time": 0.20910954475402832 + }, + { + "epoch": 1.50634765625e-05, + "model_forward_time": 0.02498173713684082, + "step": 9872 + }, + { + "epoch": 1.50634765625e-05, + "step": 9872, + "training_step_time": 0.19759917259216309 + }, + { + "epoch": 1.506500244140625e-05, + "model_forward_time": 0.024593591690063477, + "step": 9873 + }, + { + "epoch": 1.506500244140625e-05, + "step": 9873, + "training_step_time": 0.17523527145385742 + }, + { + "epoch": 1.50665283203125e-05, + "model_forward_time": 0.024432897567749023, + "step": 9874 + }, + { + "epoch": 1.50665283203125e-05, + "step": 9874, + "training_step_time": 0.10233092308044434 + }, + { + "epoch": 1.506805419921875e-05, + "model_forward_time": 0.024423599243164062, + "step": 9875 + }, + { + "epoch": 1.506805419921875e-05, + "step": 9875, + "training_step_time": 0.10180854797363281 + }, + { + "epoch": 1.5069580078125e-05, + "model_forward_time": 0.026668310165405273, + "step": 9876 + }, + { + "epoch": 1.5069580078125e-05, + "step": 9876, + "training_step_time": 0.1063845157623291 + }, + { + "epoch": 1.507110595703125e-05, + "model_forward_time": 0.025010108947753906, + "step": 9877 + }, + { + "epoch": 1.507110595703125e-05, + "step": 9877, + "training_step_time": 0.1053462028503418 + }, + { + "epoch": 1.50726318359375e-05, + "model_forward_time": 0.02447199821472168, + "step": 9878 + }, + { + "epoch": 1.50726318359375e-05, + "step": 9878, + "training_step_time": 0.11030745506286621 + }, + { + "epoch": 1.507415771484375e-05, + "model_forward_time": 0.025578975677490234, + "step": 9879 + }, + { + "epoch": 1.507415771484375e-05, + "step": 9879, + "training_step_time": 0.1108407974243164 + }, + { + "epoch": 1.507568359375e-05, + "grad_norm": 0.3437744379043579, + "learning_rate": 8.01420798045511e-05, + "loss": 0.0353, + "step": 9880 + }, + { + "epoch": 1.507568359375e-05, + "model_forward_time": 0.02547287940979004, + "step": 9880 + }, + { + "epoch": 1.507568359375e-05, + "step": 9880, + "training_step_time": 0.10824370384216309 + }, + { + "epoch": 1.507720947265625e-05, + "model_forward_time": 0.025673866271972656, + "step": 9881 + }, + { + "epoch": 1.507720947265625e-05, + "step": 9881, + "training_step_time": 0.11393046379089355 + }, + { + "epoch": 1.50787353515625e-05, + "model_forward_time": 0.025342226028442383, + "step": 9882 + }, + { + "epoch": 1.50787353515625e-05, + "step": 9882, + "training_step_time": 0.1698153018951416 + }, + { + "epoch": 1.508026123046875e-05, + "model_forward_time": 0.0241544246673584, + "step": 9883 + }, + { + "epoch": 1.508026123046875e-05, + "step": 9883, + "training_step_time": 0.17233800888061523 + }, + { + "epoch": 1.5081787109375e-05, + "model_forward_time": 0.025681257247924805, + "step": 9884 + }, + { + "epoch": 1.5081787109375e-05, + "step": 9884, + "training_step_time": 0.1049811840057373 + }, + { + "epoch": 1.508331298828125e-05, + "model_forward_time": 0.02523517608642578, + "step": 9885 + }, + { + "epoch": 1.508331298828125e-05, + "step": 9885, + "training_step_time": 0.10669064521789551 + }, + { + "epoch": 1.50848388671875e-05, + "model_forward_time": 0.025859355926513672, + "step": 9886 + }, + { + "epoch": 1.50848388671875e-05, + "step": 9886, + "training_step_time": 0.10920238494873047 + }, + { + "epoch": 1.508636474609375e-05, + "model_forward_time": 0.02513909339904785, + "step": 9887 + }, + { + "epoch": 1.508636474609375e-05, + "step": 9887, + "training_step_time": 0.10944366455078125 + }, + { + "epoch": 1.5087890625e-05, + "model_forward_time": 0.025278568267822266, + "step": 9888 + }, + { + "epoch": 1.5087890625e-05, + "step": 9888, + "training_step_time": 0.11968684196472168 + }, + { + "epoch": 1.508941650390625e-05, + "model_forward_time": 0.025418758392333984, + "step": 9889 + }, + { + "epoch": 1.508941650390625e-05, + "step": 9889, + "training_step_time": 0.13589787483215332 + }, + { + "epoch": 1.50909423828125e-05, + "grad_norm": 0.2674311697483063, + "learning_rate": 8.009808687035798e-05, + "loss": 0.0212, + "step": 9890 + }, + { + "epoch": 1.50909423828125e-05, + "model_forward_time": 0.02550029754638672, + "step": 9890 + }, + { + "epoch": 1.50909423828125e-05, + "step": 9890, + "training_step_time": 0.11166596412658691 + }, + { + "epoch": 1.509246826171875e-05, + "model_forward_time": 0.025786638259887695, + "step": 9891 + }, + { + "epoch": 1.509246826171875e-05, + "step": 9891, + "training_step_time": 0.1151120662689209 + }, + { + "epoch": 1.5093994140625e-05, + "model_forward_time": 0.025448083877563477, + "step": 9892 + }, + { + "epoch": 1.5093994140625e-05, + "step": 9892, + "training_step_time": 0.11392068862915039 + }, + { + "epoch": 1.509552001953125e-05, + "model_forward_time": 0.02524280548095703, + "step": 9893 + }, + { + "epoch": 1.509552001953125e-05, + "step": 9893, + "training_step_time": 0.15471959114074707 + }, + { + "epoch": 1.50970458984375e-05, + "model_forward_time": 0.025466203689575195, + "step": 9894 + }, + { + "epoch": 1.50970458984375e-05, + "step": 9894, + "training_step_time": 0.20530128479003906 + }, + { + "epoch": 1.509857177734375e-05, + "model_forward_time": 0.025064706802368164, + "step": 9895 + }, + { + "epoch": 1.509857177734375e-05, + "step": 9895, + "training_step_time": 0.12743377685546875 + }, + { + "epoch": 1.510009765625e-05, + "model_forward_time": 0.024439573287963867, + "step": 9896 + }, + { + "epoch": 1.510009765625e-05, + "step": 9896, + "training_step_time": 0.15816116333007812 + }, + { + "epoch": 1.510162353515625e-05, + "model_forward_time": 0.024706125259399414, + "step": 9897 + }, + { + "epoch": 1.510162353515625e-05, + "step": 9897, + "training_step_time": 0.20281171798706055 + }, + { + "epoch": 1.51031494140625e-05, + "model_forward_time": 0.024506568908691406, + "step": 9898 + }, + { + "epoch": 1.51031494140625e-05, + "step": 9898, + "training_step_time": 0.1391308307647705 + }, + { + "epoch": 1.510467529296875e-05, + "model_forward_time": 0.02452540397644043, + "step": 9899 + }, + { + "epoch": 1.510467529296875e-05, + "step": 9899, + "training_step_time": 0.10457706451416016 + }, + { + "epoch": 1.5106201171875e-05, + "grad_norm": 0.35864633321762085, + "learning_rate": 8.005405736415126e-05, + "loss": 0.0309, + "step": 9900 + }, + { + "epoch": 1.5106201171875e-05, + "model_forward_time": 0.026026010513305664, + "step": 9900 + }, + { + "epoch": 1.5106201171875e-05, + "step": 9900, + "training_step_time": 0.10687041282653809 + }, + { + "epoch": 1.510772705078125e-05, + "model_forward_time": 0.025847196578979492, + "step": 9901 + }, + { + "epoch": 1.510772705078125e-05, + "step": 9901, + "training_step_time": 0.10817790031433105 + }, + { + "epoch": 1.51092529296875e-05, + "model_forward_time": 0.025614261627197266, + "step": 9902 + }, + { + "epoch": 1.51092529296875e-05, + "step": 9902, + "training_step_time": 0.10854530334472656 + }, + { + "epoch": 1.511077880859375e-05, + "model_forward_time": 0.02646040916442871, + "step": 9903 + }, + { + "epoch": 1.511077880859375e-05, + "step": 9903, + "training_step_time": 0.10735607147216797 + }, + { + "epoch": 1.51123046875e-05, + "model_forward_time": 0.02680349349975586, + "step": 9904 + }, + { + "epoch": 1.51123046875e-05, + "step": 9904, + "training_step_time": 0.11060953140258789 + }, + { + "epoch": 1.511383056640625e-05, + "model_forward_time": 0.026361465454101562, + "step": 9905 + }, + { + "epoch": 1.511383056640625e-05, + "step": 9905, + "training_step_time": 0.11171269416809082 + }, + { + "epoch": 1.51153564453125e-05, + "model_forward_time": 0.02573680877685547, + "step": 9906 + }, + { + "epoch": 1.51153564453125e-05, + "step": 9906, + "training_step_time": 0.10826635360717773 + }, + { + "epoch": 1.511688232421875e-05, + "model_forward_time": 0.02542853355407715, + "step": 9907 + }, + { + "epoch": 1.511688232421875e-05, + "step": 9907, + "training_step_time": 0.10780572891235352 + }, + { + "epoch": 1.5118408203125e-05, + "model_forward_time": 0.025533676147460938, + "step": 9908 + }, + { + "epoch": 1.5118408203125e-05, + "step": 9908, + "training_step_time": 0.10691094398498535 + }, + { + "epoch": 1.511993408203125e-05, + "model_forward_time": 0.025153160095214844, + "step": 9909 + }, + { + "epoch": 1.511993408203125e-05, + "step": 9909, + "training_step_time": 0.1094207763671875 + }, + { + "epoch": 1.51214599609375e-05, + "grad_norm": 0.36182212829589844, + "learning_rate": 8.000999133943093e-05, + "loss": 0.0314, + "step": 9910 + }, + { + "epoch": 1.51214599609375e-05, + "model_forward_time": 0.025544404983520508, + "step": 9910 + }, + { + "epoch": 1.51214599609375e-05, + "step": 9910, + "training_step_time": 0.12115240097045898 + }, + { + "epoch": 1.512298583984375e-05, + "model_forward_time": 0.0252838134765625, + "step": 9911 + }, + { + "epoch": 1.512298583984375e-05, + "step": 9911, + "training_step_time": 0.14619112014770508 + }, + { + "epoch": 1.512451171875e-05, + "model_forward_time": 0.024916887283325195, + "step": 9912 + }, + { + "epoch": 1.512451171875e-05, + "step": 9912, + "training_step_time": 0.11918830871582031 + }, + { + "epoch": 1.512603759765625e-05, + "model_forward_time": 0.025249719619750977, + "step": 9913 + }, + { + "epoch": 1.512603759765625e-05, + "step": 9913, + "training_step_time": 0.11227917671203613 + }, + { + "epoch": 1.51275634765625e-05, + "model_forward_time": 0.024380207061767578, + "step": 9914 + }, + { + "epoch": 1.51275634765625e-05, + "step": 9914, + "training_step_time": 0.1116933822631836 + }, + { + "epoch": 1.512908935546875e-05, + "model_forward_time": 0.025173187255859375, + "step": 9915 + }, + { + "epoch": 1.512908935546875e-05, + "step": 9915, + "training_step_time": 0.10962867736816406 + }, + { + "epoch": 1.5130615234375e-05, + "model_forward_time": 0.02461719512939453, + "step": 9916 + }, + { + "epoch": 1.5130615234375e-05, + "step": 9916, + "training_step_time": 0.1162116527557373 + }, + { + "epoch": 1.513214111328125e-05, + "model_forward_time": 0.02581024169921875, + "step": 9917 + }, + { + "epoch": 1.513214111328125e-05, + "step": 9917, + "training_step_time": 0.11242890357971191 + }, + { + "epoch": 1.51336669921875e-05, + "model_forward_time": 0.026114702224731445, + "step": 9918 + }, + { + "epoch": 1.51336669921875e-05, + "step": 9918, + "training_step_time": 0.11237239837646484 + }, + { + "epoch": 1.513519287109375e-05, + "model_forward_time": 0.02534627914428711, + "step": 9919 + }, + { + "epoch": 1.513519287109375e-05, + "step": 9919, + "training_step_time": 0.1073911190032959 + }, + { + "epoch": 1.513671875e-05, + "grad_norm": 0.394562304019928, + "learning_rate": 7.996588884974135e-05, + "loss": 0.0348, + "step": 9920 + }, + { + "epoch": 1.513671875e-05, + "model_forward_time": 0.02500319480895996, + "step": 9920 + }, + { + "epoch": 1.513671875e-05, + "step": 9920, + "training_step_time": 0.11501407623291016 + }, + { + "epoch": 1.513824462890625e-05, + "model_forward_time": 0.026851415634155273, + "step": 9921 + }, + { + "epoch": 1.513824462890625e-05, + "step": 9921, + "training_step_time": 0.11193370819091797 + }, + { + "epoch": 1.51397705078125e-05, + "model_forward_time": 0.025678396224975586, + "step": 9922 + }, + { + "epoch": 1.51397705078125e-05, + "step": 9922, + "training_step_time": 0.11248397827148438 + }, + { + "epoch": 1.514129638671875e-05, + "model_forward_time": 0.025379657745361328, + "step": 9923 + }, + { + "epoch": 1.514129638671875e-05, + "step": 9923, + "training_step_time": 0.10933828353881836 + }, + { + "epoch": 1.5142822265625e-05, + "model_forward_time": 0.024812936782836914, + "step": 9924 + }, + { + "epoch": 1.5142822265625e-05, + "step": 9924, + "training_step_time": 0.11638903617858887 + }, + { + "epoch": 1.514434814453125e-05, + "model_forward_time": 0.025019168853759766, + "step": 9925 + }, + { + "epoch": 1.514434814453125e-05, + "step": 9925, + "training_step_time": 0.12170553207397461 + }, + { + "epoch": 1.51458740234375e-05, + "model_forward_time": 0.02584052085876465, + "step": 9926 + }, + { + "epoch": 1.51458740234375e-05, + "step": 9926, + "training_step_time": 0.11509990692138672 + }, + { + "epoch": 1.514739990234375e-05, + "model_forward_time": 0.0254364013671875, + "step": 9927 + }, + { + "epoch": 1.514739990234375e-05, + "step": 9927, + "training_step_time": 0.1844475269317627 + }, + { + "epoch": 1.514892578125e-05, + "model_forward_time": 0.025149822235107422, + "step": 9928 + }, + { + "epoch": 1.514892578125e-05, + "step": 9928, + "training_step_time": 0.13775134086608887 + }, + { + "epoch": 1.515045166015625e-05, + "model_forward_time": 0.024923086166381836, + "step": 9929 + }, + { + "epoch": 1.515045166015625e-05, + "step": 9929, + "training_step_time": 0.1127326488494873 + }, + { + "epoch": 1.51519775390625e-05, + "grad_norm": 0.31549686193466187, + "learning_rate": 7.992174994867123e-05, + "loss": 0.0235, + "step": 9930 + }, + { + "epoch": 1.51519775390625e-05, + "model_forward_time": 0.02564716339111328, + "step": 9930 + }, + { + "epoch": 1.51519775390625e-05, + "step": 9930, + "training_step_time": 0.10958647727966309 + }, + { + "epoch": 1.515350341796875e-05, + "model_forward_time": 0.025803089141845703, + "step": 9931 + }, + { + "epoch": 1.515350341796875e-05, + "step": 9931, + "training_step_time": 0.11190533638000488 + }, + { + "epoch": 1.5155029296875e-05, + "model_forward_time": 0.025421619415283203, + "step": 9932 + }, + { + "epoch": 1.5155029296875e-05, + "step": 9932, + "training_step_time": 0.11043334007263184 + }, + { + "epoch": 1.515655517578125e-05, + "model_forward_time": 0.0252993106842041, + "step": 9933 + }, + { + "epoch": 1.515655517578125e-05, + "step": 9933, + "training_step_time": 0.16924571990966797 + }, + { + "epoch": 1.51580810546875e-05, + "model_forward_time": 0.024580001831054688, + "step": 9934 + }, + { + "epoch": 1.51580810546875e-05, + "step": 9934, + "training_step_time": 0.13338065147399902 + }, + { + "epoch": 1.515960693359375e-05, + "model_forward_time": 0.024840354919433594, + "step": 9935 + }, + { + "epoch": 1.515960693359375e-05, + "step": 9935, + "training_step_time": 0.11204123497009277 + }, + { + "epoch": 1.51611328125e-05, + "model_forward_time": 0.027257919311523438, + "step": 9936 + }, + { + "epoch": 1.51611328125e-05, + "step": 9936, + "training_step_time": 0.11745691299438477 + }, + { + "epoch": 1.516265869140625e-05, + "model_forward_time": 0.025320053100585938, + "step": 9937 + }, + { + "epoch": 1.516265869140625e-05, + "step": 9937, + "training_step_time": 0.10972094535827637 + }, + { + "epoch": 1.51641845703125e-05, + "model_forward_time": 0.025059223175048828, + "step": 9938 + }, + { + "epoch": 1.51641845703125e-05, + "step": 9938, + "training_step_time": 0.11083126068115234 + }, + { + "epoch": 1.516571044921875e-05, + "model_forward_time": 0.026792287826538086, + "step": 9939 + }, + { + "epoch": 1.516571044921875e-05, + "step": 9939, + "training_step_time": 0.2091670036315918 + }, + { + "epoch": 1.5167236328125e-05, + "grad_norm": 0.2688508927822113, + "learning_rate": 7.987757468985348e-05, + "loss": 0.0269, + "step": 9940 + }, + { + "epoch": 1.5167236328125e-05, + "model_forward_time": 0.02456188201904297, + "step": 9940 + }, + { + "epoch": 1.5167236328125e-05, + "step": 9940, + "training_step_time": 0.19881248474121094 + }, + { + "epoch": 1.516876220703125e-05, + "model_forward_time": 0.02462315559387207, + "step": 9941 + }, + { + "epoch": 1.516876220703125e-05, + "step": 9941, + "training_step_time": 0.1479027271270752 + }, + { + "epoch": 1.51702880859375e-05, + "model_forward_time": 0.025420427322387695, + "step": 9942 + }, + { + "epoch": 1.51702880859375e-05, + "step": 9942, + "training_step_time": 0.13161826133728027 + }, + { + "epoch": 1.517181396484375e-05, + "model_forward_time": 0.02457284927368164, + "step": 9943 + }, + { + "epoch": 1.517181396484375e-05, + "step": 9943, + "training_step_time": 0.11319947242736816 + }, + { + "epoch": 1.517333984375e-05, + "model_forward_time": 0.02525782585144043, + "step": 9944 + }, + { + "epoch": 1.517333984375e-05, + "step": 9944, + "training_step_time": 0.11025524139404297 + }, + { + "epoch": 1.517486572265625e-05, + "model_forward_time": 0.02533698081970215, + "step": 9945 + }, + { + "epoch": 1.517486572265625e-05, + "step": 9945, + "training_step_time": 0.10592961311340332 + }, + { + "epoch": 1.51763916015625e-05, + "model_forward_time": 0.025657176971435547, + "step": 9946 + }, + { + "epoch": 1.51763916015625e-05, + "step": 9946, + "training_step_time": 0.10996222496032715 + }, + { + "epoch": 1.517791748046875e-05, + "model_forward_time": 0.02540135383605957, + "step": 9947 + }, + { + "epoch": 1.517791748046875e-05, + "step": 9947, + "training_step_time": 0.10944628715515137 + }, + { + "epoch": 1.5179443359375e-05, + "model_forward_time": 0.025317668914794922, + "step": 9948 + }, + { + "epoch": 1.5179443359375e-05, + "step": 9948, + "training_step_time": 0.1083524227142334 + }, + { + "epoch": 1.518096923828125e-05, + "model_forward_time": 0.025221586227416992, + "step": 9949 + }, + { + "epoch": 1.518096923828125e-05, + "step": 9949, + "training_step_time": 0.11153769493103027 + }, + { + "epoch": 1.51824951171875e-05, + "grad_norm": 0.2855030596256256, + "learning_rate": 7.983336312696522e-05, + "loss": 0.02, + "step": 9950 + }, + { + "epoch": 1.51824951171875e-05, + "model_forward_time": 0.025673389434814453, + "step": 9950 + }, + { + "epoch": 1.51824951171875e-05, + "step": 9950, + "training_step_time": 0.11349177360534668 + }, + { + "epoch": 1.518402099609375e-05, + "model_forward_time": 0.02541661262512207, + "step": 9951 + }, + { + "epoch": 1.518402099609375e-05, + "step": 9951, + "training_step_time": 0.10981893539428711 + }, + { + "epoch": 1.5185546875e-05, + "model_forward_time": 0.025097131729125977, + "step": 9952 + }, + { + "epoch": 1.5185546875e-05, + "step": 9952, + "training_step_time": 0.10949063301086426 + }, + { + "epoch": 1.518707275390625e-05, + "model_forward_time": 0.025618553161621094, + "step": 9953 + }, + { + "epoch": 1.518707275390625e-05, + "step": 9953, + "training_step_time": 0.10845112800598145 + }, + { + "epoch": 1.51885986328125e-05, + "model_forward_time": 0.025267839431762695, + "step": 9954 + }, + { + "epoch": 1.51885986328125e-05, + "step": 9954, + "training_step_time": 0.11058163642883301 + }, + { + "epoch": 1.519012451171875e-05, + "model_forward_time": 0.02546525001525879, + "step": 9955 + }, + { + "epoch": 1.519012451171875e-05, + "step": 9955, + "training_step_time": 0.10924887657165527 + }, + { + "epoch": 1.5191650390625e-05, + "model_forward_time": 0.024658679962158203, + "step": 9956 + }, + { + "epoch": 1.5191650390625e-05, + "step": 9956, + "training_step_time": 0.10893774032592773 + }, + { + "epoch": 1.519317626953125e-05, + "model_forward_time": 0.025359153747558594, + "step": 9957 + }, + { + "epoch": 1.519317626953125e-05, + "step": 9957, + "training_step_time": 0.10691499710083008 + }, + { + "epoch": 1.51947021484375e-05, + "model_forward_time": 0.025293827056884766, + "step": 9958 + }, + { + "epoch": 1.51947021484375e-05, + "step": 9958, + "training_step_time": 0.10974264144897461 + }, + { + "epoch": 1.519622802734375e-05, + "model_forward_time": 0.02467942237854004, + "step": 9959 + }, + { + "epoch": 1.519622802734375e-05, + "step": 9959, + "training_step_time": 0.11054801940917969 + }, + { + "epoch": 1.519775390625e-05, + "grad_norm": 0.37216871976852417, + "learning_rate": 7.978911531372765e-05, + "loss": 0.0228, + "step": 9960 + }, + { + "epoch": 1.519775390625e-05, + "model_forward_time": 0.025072336196899414, + "step": 9960 + }, + { + "epoch": 1.519775390625e-05, + "step": 9960, + "training_step_time": 0.10569047927856445 + }, + { + "epoch": 1.519927978515625e-05, + "model_forward_time": 0.025372028350830078, + "step": 9961 + }, + { + "epoch": 1.519927978515625e-05, + "step": 9961, + "training_step_time": 0.10686922073364258 + }, + { + "epoch": 1.52008056640625e-05, + "model_forward_time": 0.02462172508239746, + "step": 9962 + }, + { + "epoch": 1.52008056640625e-05, + "step": 9962, + "training_step_time": 0.10532379150390625 + }, + { + "epoch": 1.520233154296875e-05, + "model_forward_time": 0.025685548782348633, + "step": 9963 + }, + { + "epoch": 1.520233154296875e-05, + "step": 9963, + "training_step_time": 0.10824155807495117 + }, + { + "epoch": 1.5203857421875e-05, + "model_forward_time": 0.025533676147460938, + "step": 9964 + }, + { + "epoch": 1.5203857421875e-05, + "step": 9964, + "training_step_time": 0.11055803298950195 + }, + { + "epoch": 1.520538330078125e-05, + "model_forward_time": 0.025403261184692383, + "step": 9965 + }, + { + "epoch": 1.520538330078125e-05, + "step": 9965, + "training_step_time": 0.10929131507873535 + }, + { + "epoch": 1.52069091796875e-05, + "model_forward_time": 0.02533578872680664, + "step": 9966 + }, + { + "epoch": 1.52069091796875e-05, + "step": 9966, + "training_step_time": 0.15046215057373047 + }, + { + "epoch": 1.520843505859375e-05, + "model_forward_time": 0.02427840232849121, + "step": 9967 + }, + { + "epoch": 1.520843505859375e-05, + "step": 9967, + "training_step_time": 0.16626191139221191 + }, + { + "epoch": 1.52099609375e-05, + "model_forward_time": 0.023575544357299805, + "step": 9968 + }, + { + "epoch": 1.52099609375e-05, + "step": 9968, + "training_step_time": 0.1519632339477539 + }, + { + "epoch": 1.521148681640625e-05, + "model_forward_time": 0.02497243881225586, + "step": 9969 + }, + { + "epoch": 1.521148681640625e-05, + "step": 9969, + "training_step_time": 0.10973238945007324 + }, + { + "epoch": 1.52130126953125e-05, + "grad_norm": 0.46723583340644836, + "learning_rate": 7.974483130390604e-05, + "loss": 0.0345, + "step": 9970 + }, + { + "epoch": 1.52130126953125e-05, + "model_forward_time": 0.02503824234008789, + "step": 9970 + }, + { + "epoch": 1.52130126953125e-05, + "step": 9970, + "training_step_time": 0.17492461204528809 + }, + { + "epoch": 1.521453857421875e-05, + "model_forward_time": 0.024873733520507812, + "step": 9971 + }, + { + "epoch": 1.521453857421875e-05, + "step": 9971, + "training_step_time": 0.15408539772033691 + }, + { + "epoch": 1.5216064453125e-05, + "model_forward_time": 0.02453756332397461, + "step": 9972 + }, + { + "epoch": 1.5216064453125e-05, + "step": 9972, + "training_step_time": 0.11650347709655762 + }, + { + "epoch": 1.521759033203125e-05, + "model_forward_time": 0.025258541107177734, + "step": 9973 + }, + { + "epoch": 1.521759033203125e-05, + "step": 9973, + "training_step_time": 0.17031502723693848 + }, + { + "epoch": 1.52191162109375e-05, + "model_forward_time": 0.027492284774780273, + "step": 9974 + }, + { + "epoch": 1.52191162109375e-05, + "step": 9974, + "training_step_time": 0.17324447631835938 + }, + { + "epoch": 1.522064208984375e-05, + "model_forward_time": 0.025945425033569336, + "step": 9975 + }, + { + "epoch": 1.522064208984375e-05, + "step": 9975, + "training_step_time": 0.10860991477966309 + }, + { + "epoch": 1.522216796875e-05, + "model_forward_time": 0.024903297424316406, + "step": 9976 + }, + { + "epoch": 1.522216796875e-05, + "step": 9976, + "training_step_time": 0.10824179649353027 + }, + { + "epoch": 1.522369384765625e-05, + "model_forward_time": 0.025647640228271484, + "step": 9977 + }, + { + "epoch": 1.522369384765625e-05, + "step": 9977, + "training_step_time": 0.10925769805908203 + }, + { + "epoch": 1.52252197265625e-05, + "model_forward_time": 0.025425434112548828, + "step": 9978 + }, + { + "epoch": 1.52252197265625e-05, + "step": 9978, + "training_step_time": 0.1094205379486084 + }, + { + "epoch": 1.522674560546875e-05, + "model_forward_time": 0.025199413299560547, + "step": 9979 + }, + { + "epoch": 1.522674560546875e-05, + "step": 9979, + "training_step_time": 0.11535406112670898 + }, + { + "epoch": 1.5228271484375e-05, + "grad_norm": 0.20894256234169006, + "learning_rate": 7.970051115130966e-05, + "loss": 0.0259, + "step": 9980 + }, + { + "epoch": 1.5228271484375e-05, + "model_forward_time": 0.026558637619018555, + "step": 9980 + }, + { + "epoch": 1.5228271484375e-05, + "step": 9980, + "training_step_time": 0.1382884979248047 + }, + { + "epoch": 1.522979736328125e-05, + "model_forward_time": 0.02524876594543457, + "step": 9981 + }, + { + "epoch": 1.522979736328125e-05, + "step": 9981, + "training_step_time": 0.10781216621398926 + }, + { + "epoch": 1.52313232421875e-05, + "model_forward_time": 0.025518417358398438, + "step": 9982 + }, + { + "epoch": 1.52313232421875e-05, + "step": 9982, + "training_step_time": 0.11473298072814941 + }, + { + "epoch": 1.523284912109375e-05, + "model_forward_time": 0.025401592254638672, + "step": 9983 + }, + { + "epoch": 1.523284912109375e-05, + "step": 9983, + "training_step_time": 0.10905647277832031 + }, + { + "epoch": 1.5234375e-05, + "model_forward_time": 0.025116682052612305, + "step": 9984 + }, + { + "epoch": 1.5234375e-05, + "step": 9984, + "training_step_time": 0.11001873016357422 + }, + { + "epoch": 1.523590087890625e-05, + "model_forward_time": 0.02494502067565918, + "step": 9985 + }, + { + "epoch": 1.523590087890625e-05, + "step": 9985, + "training_step_time": 0.2067408561706543 + }, + { + "epoch": 1.52374267578125e-05, + "model_forward_time": 0.023936748504638672, + "step": 9986 + }, + { + "epoch": 1.52374267578125e-05, + "step": 9986, + "training_step_time": 0.1898496150970459 + }, + { + "epoch": 1.523895263671875e-05, + "model_forward_time": 0.024956703186035156, + "step": 9987 + }, + { + "epoch": 1.523895263671875e-05, + "step": 9987, + "training_step_time": 0.16049599647521973 + }, + { + "epoch": 1.5240478515625e-05, + "model_forward_time": 0.024861812591552734, + "step": 9988 + }, + { + "epoch": 1.5240478515625e-05, + "step": 9988, + "training_step_time": 0.16083621978759766 + }, + { + "epoch": 1.524200439453125e-05, + "model_forward_time": 0.024207115173339844, + "step": 9989 + }, + { + "epoch": 1.524200439453125e-05, + "step": 9989, + "training_step_time": 0.1655561923980713 + }, + { + "epoch": 1.52435302734375e-05, + "grad_norm": 0.27305060625076294, + "learning_rate": 7.965615490979163e-05, + "loss": 0.0306, + "step": 9990 + }, + { + "epoch": 1.52435302734375e-05, + "model_forward_time": 0.025997638702392578, + "step": 9990 + }, + { + "epoch": 1.52435302734375e-05, + "step": 9990, + "training_step_time": 0.10743832588195801 + }, + { + "epoch": 1.524505615234375e-05, + "model_forward_time": 0.02498912811279297, + "step": 9991 + }, + { + "epoch": 1.524505615234375e-05, + "step": 9991, + "training_step_time": 0.10569429397583008 + }, + { + "epoch": 1.524658203125e-05, + "model_forward_time": 0.025407075881958008, + "step": 9992 + }, + { + "epoch": 1.524658203125e-05, + "step": 9992, + "training_step_time": 0.10815191268920898 + }, + { + "epoch": 1.524810791015625e-05, + "model_forward_time": 0.0259549617767334, + "step": 9993 + }, + { + "epoch": 1.524810791015625e-05, + "step": 9993, + "training_step_time": 0.1083524227142334 + }, + { + "epoch": 1.52496337890625e-05, + "model_forward_time": 0.024721622467041016, + "step": 9994 + }, + { + "epoch": 1.52496337890625e-05, + "step": 9994, + "training_step_time": 0.11060500144958496 + }, + { + "epoch": 1.525115966796875e-05, + "model_forward_time": 0.024374008178710938, + "step": 9995 + }, + { + "epoch": 1.525115966796875e-05, + "step": 9995, + "training_step_time": 0.11023306846618652 + }, + { + "epoch": 1.5252685546875e-05, + "model_forward_time": 0.025157928466796875, + "step": 9996 + }, + { + "epoch": 1.5252685546875e-05, + "step": 9996, + "training_step_time": 0.10677242279052734 + }, + { + "epoch": 1.525421142578125e-05, + "model_forward_time": 0.02511739730834961, + "step": 9997 + }, + { + "epoch": 1.525421142578125e-05, + "step": 9997, + "training_step_time": 0.11240077018737793 + }, + { + "epoch": 1.52557373046875e-05, + "model_forward_time": 0.02527141571044922, + "step": 9998 + }, + { + "epoch": 1.52557373046875e-05, + "step": 9998, + "training_step_time": 0.13327813148498535 + }, + { + "epoch": 1.525726318359375e-05, + "model_forward_time": 0.025368690490722656, + "step": 9999 + }, + { + "epoch": 1.525726318359375e-05, + "step": 9999, + "training_step_time": 0.14312124252319336 + }, + { + "epoch": 1.52587890625e-05, + "grad_norm": 0.351493775844574, + "learning_rate": 7.961176263324901e-05, + "loss": 0.0377, + "step": 10000 + }, + { + "epoch": 1.52587890625e-05, + "model_forward_time": 0.024866580963134766, + "step": 10000 + }, + { + "epoch": 1.52587890625e-05, + "step": 10000, + "training_step_time": 0.09874486923217773 + }, + { + "epoch": 1.526031494140625e-05, + "model_forward_time": 0.022858619689941406, + "step": 10001 + }, + { + "epoch": 1.526031494140625e-05, + "step": 10001, + "training_step_time": 0.10366487503051758 + }, + { + "epoch": 1.52618408203125e-05, + "model_forward_time": 0.02444171905517578, + "step": 10002 + }, + { + "epoch": 1.52618408203125e-05, + "step": 10002, + "training_step_time": 0.10867643356323242 + }, + { + "epoch": 1.526336669921875e-05, + "model_forward_time": 0.025006532669067383, + "step": 10003 + }, + { + "epoch": 1.526336669921875e-05, + "step": 10003, + "training_step_time": 0.1083674430847168 + }, + { + "epoch": 1.5264892578125e-05, + "model_forward_time": 0.025159597396850586, + "step": 10004 + }, + { + "epoch": 1.5264892578125e-05, + "step": 10004, + "training_step_time": 0.10979270935058594 + }, + { + "epoch": 1.526641845703125e-05, + "model_forward_time": 0.025738000869750977, + "step": 10005 + }, + { + "epoch": 1.526641845703125e-05, + "step": 10005, + "training_step_time": 0.10994672775268555 + }, + { + "epoch": 1.52679443359375e-05, + "model_forward_time": 0.025104045867919922, + "step": 10006 + }, + { + "epoch": 1.52679443359375e-05, + "step": 10006, + "training_step_time": 0.11083459854125977 + }, + { + "epoch": 1.526947021484375e-05, + "model_forward_time": 0.02450418472290039, + "step": 10007 + }, + { + "epoch": 1.526947021484375e-05, + "step": 10007, + "training_step_time": 0.10733461380004883 + }, + { + "epoch": 1.527099609375e-05, + "model_forward_time": 0.025336265563964844, + "step": 10008 + }, + { + "epoch": 1.527099609375e-05, + "step": 10008, + "training_step_time": 0.10805916786193848 + }, + { + "epoch": 1.527252197265625e-05, + "model_forward_time": 0.024996280670166016, + "step": 10009 + }, + { + "epoch": 1.527252197265625e-05, + "step": 10009, + "training_step_time": 0.10877299308776855 + }, + { + "epoch": 1.52740478515625e-05, + "grad_norm": 0.3605310618877411, + "learning_rate": 7.956733437562259e-05, + "loss": 0.0323, + "step": 10010 + }, + { + "epoch": 1.52740478515625e-05, + "model_forward_time": 0.02529454231262207, + "step": 10010 + }, + { + "epoch": 1.52740478515625e-05, + "step": 10010, + "training_step_time": 0.1079092025756836 + }, + { + "epoch": 1.527557373046875e-05, + "model_forward_time": 0.025272369384765625, + "step": 10011 + }, + { + "epoch": 1.527557373046875e-05, + "step": 10011, + "training_step_time": 0.10971426963806152 + }, + { + "epoch": 1.5277099609375e-05, + "model_forward_time": 0.025789737701416016, + "step": 10012 + }, + { + "epoch": 1.5277099609375e-05, + "step": 10012, + "training_step_time": 0.10787677764892578 + }, + { + "epoch": 1.527862548828125e-05, + "model_forward_time": 0.02490711212158203, + "step": 10013 + }, + { + "epoch": 1.527862548828125e-05, + "step": 10013, + "training_step_time": 0.11001753807067871 + }, + { + "epoch": 1.52801513671875e-05, + "model_forward_time": 0.02554798126220703, + "step": 10014 + }, + { + "epoch": 1.52801513671875e-05, + "step": 10014, + "training_step_time": 0.11288619041442871 + }, + { + "epoch": 1.528167724609375e-05, + "model_forward_time": 0.0250546932220459, + "step": 10015 + }, + { + "epoch": 1.528167724609375e-05, + "step": 10015, + "training_step_time": 0.10996460914611816 + }, + { + "epoch": 1.5283203125e-05, + "model_forward_time": 0.02510380744934082, + "step": 10016 + }, + { + "epoch": 1.5283203125e-05, + "step": 10016, + "training_step_time": 0.11495614051818848 + }, + { + "epoch": 1.528472900390625e-05, + "model_forward_time": 0.02476334571838379, + "step": 10017 + }, + { + "epoch": 1.528472900390625e-05, + "step": 10017, + "training_step_time": 0.11229825019836426 + }, + { + "epoch": 1.52862548828125e-05, + "model_forward_time": 0.025278568267822266, + "step": 10018 + }, + { + "epoch": 1.52862548828125e-05, + "step": 10018, + "training_step_time": 0.11230254173278809 + }, + { + "epoch": 1.528778076171875e-05, + "model_forward_time": 0.024647951126098633, + "step": 10019 + }, + { + "epoch": 1.528778076171875e-05, + "step": 10019, + "training_step_time": 0.17102670669555664 + }, + { + "epoch": 1.5289306640625e-05, + "grad_norm": 0.4322364330291748, + "learning_rate": 7.952287019089685e-05, + "loss": 0.0302, + "step": 10020 + }, + { + "epoch": 1.5289306640625e-05, + "model_forward_time": 0.024394750595092773, + "step": 10020 + }, + { + "epoch": 1.5289306640625e-05, + "step": 10020, + "training_step_time": 0.16081976890563965 + }, + { + "epoch": 1.529083251953125e-05, + "model_forward_time": 0.02476024627685547, + "step": 10021 + }, + { + "epoch": 1.529083251953125e-05, + "step": 10021, + "training_step_time": 0.11350131034851074 + }, + { + "epoch": 1.52923583984375e-05, + "model_forward_time": 0.02490711212158203, + "step": 10022 + }, + { + "epoch": 1.52923583984375e-05, + "step": 10022, + "training_step_time": 0.20881056785583496 + }, + { + "epoch": 1.529388427734375e-05, + "model_forward_time": 0.023776769638061523, + "step": 10023 + }, + { + "epoch": 1.529388427734375e-05, + "step": 10023, + "training_step_time": 0.12075257301330566 + }, + { + "epoch": 1.529541015625e-05, + "model_forward_time": 0.024541616439819336, + "step": 10024 + }, + { + "epoch": 1.529541015625e-05, + "step": 10024, + "training_step_time": 0.10636687278747559 + }, + { + "epoch": 1.529693603515625e-05, + "model_forward_time": 0.025691509246826172, + "step": 10025 + }, + { + "epoch": 1.529693603515625e-05, + "step": 10025, + "training_step_time": 0.10951018333435059 + }, + { + "epoch": 1.52984619140625e-05, + "model_forward_time": 0.024759531021118164, + "step": 10026 + }, + { + "epoch": 1.52984619140625e-05, + "step": 10026, + "training_step_time": 0.11280679702758789 + }, + { + "epoch": 1.529998779296875e-05, + "model_forward_time": 0.025245189666748047, + "step": 10027 + }, + { + "epoch": 1.529998779296875e-05, + "step": 10027, + "training_step_time": 0.1070256233215332 + }, + { + "epoch": 1.5301513671875e-05, + "model_forward_time": 0.026827096939086914, + "step": 10028 + }, + { + "epoch": 1.5301513671875e-05, + "step": 10028, + "training_step_time": 0.11000728607177734 + }, + { + "epoch": 1.530303955078125e-05, + "model_forward_time": 0.025188207626342773, + "step": 10029 + }, + { + "epoch": 1.530303955078125e-05, + "step": 10029, + "training_step_time": 0.1352078914642334 + }, + { + "epoch": 1.53045654296875e-05, + "grad_norm": 0.3623270094394684, + "learning_rate": 7.947837013310005e-05, + "loss": 0.0207, + "step": 10030 + }, + { + "epoch": 1.53045654296875e-05, + "model_forward_time": 0.024950742721557617, + "step": 10030 + }, + { + "epoch": 1.53045654296875e-05, + "step": 10030, + "training_step_time": 0.11240601539611816 + }, + { + "epoch": 1.530609130859375e-05, + "model_forward_time": 0.02467632293701172, + "step": 10031 + }, + { + "epoch": 1.530609130859375e-05, + "step": 10031, + "training_step_time": 0.11512279510498047 + }, + { + "epoch": 1.53076171875e-05, + "model_forward_time": 0.025052785873413086, + "step": 10032 + }, + { + "epoch": 1.53076171875e-05, + "step": 10032, + "training_step_time": 0.10787200927734375 + }, + { + "epoch": 1.530914306640625e-05, + "model_forward_time": 0.024851083755493164, + "step": 10033 + }, + { + "epoch": 1.530914306640625e-05, + "step": 10033, + "training_step_time": 0.1102304458618164 + }, + { + "epoch": 1.53106689453125e-05, + "model_forward_time": 0.025536298751831055, + "step": 10034 + }, + { + "epoch": 1.53106689453125e-05, + "step": 10034, + "training_step_time": 0.20285677909851074 + }, + { + "epoch": 1.531219482421875e-05, + "model_forward_time": 0.024356603622436523, + "step": 10035 + }, + { + "epoch": 1.531219482421875e-05, + "step": 10035, + "training_step_time": 0.16776418685913086 + }, + { + "epoch": 1.5313720703125e-05, + "model_forward_time": 0.02516317367553711, + "step": 10036 + }, + { + "epoch": 1.5313720703125e-05, + "step": 10036, + "training_step_time": 0.1768326759338379 + }, + { + "epoch": 1.531524658203125e-05, + "model_forward_time": 0.024338483810424805, + "step": 10037 + }, + { + "epoch": 1.531524658203125e-05, + "step": 10037, + "training_step_time": 0.19268107414245605 + }, + { + "epoch": 1.53167724609375e-05, + "model_forward_time": 0.0242307186126709, + "step": 10038 + }, + { + "epoch": 1.53167724609375e-05, + "step": 10038, + "training_step_time": 0.11486053466796875 + }, + { + "epoch": 1.531829833984375e-05, + "model_forward_time": 0.02393794059753418, + "step": 10039 + }, + { + "epoch": 1.531829833984375e-05, + "step": 10039, + "training_step_time": 0.11479878425598145 + }, + { + "epoch": 1.531982421875e-05, + "grad_norm": 0.3786519467830658, + "learning_rate": 7.943383425630387e-05, + "loss": 0.0351, + "step": 10040 + }, + { + "epoch": 1.531982421875e-05, + "model_forward_time": 0.025146961212158203, + "step": 10040 + }, + { + "epoch": 1.531982421875e-05, + "step": 10040, + "training_step_time": 0.10745835304260254 + }, + { + "epoch": 1.532135009765625e-05, + "model_forward_time": 0.02490544319152832, + "step": 10041 + }, + { + "epoch": 1.532135009765625e-05, + "step": 10041, + "training_step_time": 0.10728311538696289 + }, + { + "epoch": 1.53228759765625e-05, + "model_forward_time": 0.02537822723388672, + "step": 10042 + }, + { + "epoch": 1.53228759765625e-05, + "step": 10042, + "training_step_time": 0.10988092422485352 + }, + { + "epoch": 1.532440185546875e-05, + "model_forward_time": 0.024898052215576172, + "step": 10043 + }, + { + "epoch": 1.532440185546875e-05, + "step": 10043, + "training_step_time": 0.1100320816040039 + }, + { + "epoch": 1.5325927734375e-05, + "model_forward_time": 0.02429938316345215, + "step": 10044 + }, + { + "epoch": 1.5325927734375e-05, + "step": 10044, + "training_step_time": 0.1069495677947998 + }, + { + "epoch": 1.532745361328125e-05, + "model_forward_time": 0.024873733520507812, + "step": 10045 + }, + { + "epoch": 1.532745361328125e-05, + "step": 10045, + "training_step_time": 0.10822772979736328 + }, + { + "epoch": 1.53289794921875e-05, + "model_forward_time": 0.024940013885498047, + "step": 10046 + }, + { + "epoch": 1.53289794921875e-05, + "step": 10046, + "training_step_time": 0.1103048324584961 + }, + { + "epoch": 1.533050537109375e-05, + "model_forward_time": 0.02517247200012207, + "step": 10047 + }, + { + "epoch": 1.533050537109375e-05, + "step": 10047, + "training_step_time": 0.10545134544372559 + }, + { + "epoch": 1.533203125e-05, + "model_forward_time": 0.025310993194580078, + "step": 10048 + }, + { + "epoch": 1.533203125e-05, + "step": 10048, + "training_step_time": 0.10932755470275879 + }, + { + "epoch": 1.533355712890625e-05, + "model_forward_time": 0.02487039566040039, + "step": 10049 + }, + { + "epoch": 1.533355712890625e-05, + "step": 10049, + "training_step_time": 0.11080360412597656 + }, + { + "epoch": 1.53350830078125e-05, + "grad_norm": 0.2707097828388214, + "learning_rate": 7.938926261462366e-05, + "loss": 0.0289, + "step": 10050 + }, + { + "epoch": 1.53350830078125e-05, + "model_forward_time": 0.024997234344482422, + "step": 10050 + }, + { + "epoch": 1.53350830078125e-05, + "step": 10050, + "training_step_time": 0.1090240478515625 + }, + { + "epoch": 1.533660888671875e-05, + "model_forward_time": 0.02492213249206543, + "step": 10051 + }, + { + "epoch": 1.533660888671875e-05, + "step": 10051, + "training_step_time": 0.11160731315612793 + }, + { + "epoch": 1.5338134765625e-05, + "model_forward_time": 0.024927377700805664, + "step": 10052 + }, + { + "epoch": 1.5338134765625e-05, + "step": 10052, + "training_step_time": 0.10678601264953613 + }, + { + "epoch": 1.533966064453125e-05, + "model_forward_time": 0.025282621383666992, + "step": 10053 + }, + { + "epoch": 1.533966064453125e-05, + "step": 10053, + "training_step_time": 0.10588407516479492 + }, + { + "epoch": 1.53411865234375e-05, + "model_forward_time": 0.025478124618530273, + "step": 10054 + }, + { + "epoch": 1.53411865234375e-05, + "step": 10054, + "training_step_time": 0.10859227180480957 + }, + { + "epoch": 1.534271240234375e-05, + "model_forward_time": 0.025096893310546875, + "step": 10055 + }, + { + "epoch": 1.534271240234375e-05, + "step": 10055, + "training_step_time": 0.10732102394104004 + }, + { + "epoch": 1.534423828125e-05, + "model_forward_time": 0.02495598793029785, + "step": 10056 + }, + { + "epoch": 1.534423828125e-05, + "step": 10056, + "training_step_time": 0.10802960395812988 + }, + { + "epoch": 1.534576416015625e-05, + "model_forward_time": 0.02487945556640625, + "step": 10057 + }, + { + "epoch": 1.534576416015625e-05, + "step": 10057, + "training_step_time": 0.10804152488708496 + }, + { + "epoch": 1.53472900390625e-05, + "model_forward_time": 0.025394678115844727, + "step": 10058 + }, + { + "epoch": 1.53472900390625e-05, + "step": 10058, + "training_step_time": 0.10782098770141602 + }, + { + "epoch": 1.534881591796875e-05, + "model_forward_time": 0.025424480438232422, + "step": 10059 + }, + { + "epoch": 1.534881591796875e-05, + "step": 10059, + "training_step_time": 0.11058688163757324 + }, + { + "epoch": 1.5350341796875e-05, + "grad_norm": 0.2767201364040375, + "learning_rate": 7.934465526221815e-05, + "loss": 0.029, + "step": 10060 + }, + { + "epoch": 1.5350341796875e-05, + "model_forward_time": 0.025366783142089844, + "step": 10060 + }, + { + "epoch": 1.5350341796875e-05, + "step": 10060, + "training_step_time": 0.10860729217529297 + }, + { + "epoch": 1.535186767578125e-05, + "model_forward_time": 0.024857044219970703, + "step": 10061 + }, + { + "epoch": 1.535186767578125e-05, + "step": 10061, + "training_step_time": 0.10847115516662598 + }, + { + "epoch": 1.53533935546875e-05, + "model_forward_time": 0.02563166618347168, + "step": 10062 + }, + { + "epoch": 1.53533935546875e-05, + "step": 10062, + "training_step_time": 0.1102597713470459 + }, + { + "epoch": 1.535491943359375e-05, + "model_forward_time": 0.024884462356567383, + "step": 10063 + }, + { + "epoch": 1.535491943359375e-05, + "step": 10063, + "training_step_time": 0.11015748977661133 + }, + { + "epoch": 1.53564453125e-05, + "model_forward_time": 0.025075674057006836, + "step": 10064 + }, + { + "epoch": 1.53564453125e-05, + "step": 10064, + "training_step_time": 0.10977935791015625 + }, + { + "epoch": 1.535797119140625e-05, + "model_forward_time": 0.026086091995239258, + "step": 10065 + }, + { + "epoch": 1.535797119140625e-05, + "step": 10065, + "training_step_time": 0.11261773109436035 + }, + { + "epoch": 1.53594970703125e-05, + "model_forward_time": 0.025553226470947266, + "step": 10066 + }, + { + "epoch": 1.53594970703125e-05, + "step": 10066, + "training_step_time": 0.21470260620117188 + }, + { + "epoch": 1.536102294921875e-05, + "model_forward_time": 0.02509284019470215, + "step": 10067 + }, + { + "epoch": 1.536102294921875e-05, + "step": 10067, + "training_step_time": 0.11184287071228027 + }, + { + "epoch": 1.5362548828125e-05, + "model_forward_time": 0.02429676055908203, + "step": 10068 + }, + { + "epoch": 1.5362548828125e-05, + "step": 10068, + "training_step_time": 0.10765743255615234 + }, + { + "epoch": 1.536407470703125e-05, + "model_forward_time": 0.02538299560546875, + "step": 10069 + }, + { + "epoch": 1.536407470703125e-05, + "step": 10069, + "training_step_time": 0.10883116722106934 + }, + { + "epoch": 1.53656005859375e-05, + "grad_norm": 0.32040807604789734, + "learning_rate": 7.930001225328946e-05, + "loss": 0.0203, + "step": 10070 + }, + { + "epoch": 1.53656005859375e-05, + "model_forward_time": 0.025388240814208984, + "step": 10070 + }, + { + "epoch": 1.53656005859375e-05, + "step": 10070, + "training_step_time": 0.11775374412536621 + }, + { + "epoch": 1.536712646484375e-05, + "model_forward_time": 0.026128053665161133, + "step": 10071 + }, + { + "epoch": 1.536712646484375e-05, + "step": 10071, + "training_step_time": 0.11798739433288574 + }, + { + "epoch": 1.536865234375e-05, + "model_forward_time": 0.02519845962524414, + "step": 10072 + }, + { + "epoch": 1.536865234375e-05, + "step": 10072, + "training_step_time": 0.1072995662689209 + }, + { + "epoch": 1.537017822265625e-05, + "model_forward_time": 0.02544379234313965, + "step": 10073 + }, + { + "epoch": 1.537017822265625e-05, + "step": 10073, + "training_step_time": 0.10733842849731445 + }, + { + "epoch": 1.53717041015625e-05, + "model_forward_time": 0.025278091430664062, + "step": 10074 + }, + { + "epoch": 1.53717041015625e-05, + "step": 10074, + "training_step_time": 0.11060619354248047 + }, + { + "epoch": 1.537322998046875e-05, + "model_forward_time": 0.025327205657958984, + "step": 10075 + }, + { + "epoch": 1.537322998046875e-05, + "step": 10075, + "training_step_time": 0.15362310409545898 + }, + { + "epoch": 1.5374755859375e-05, + "model_forward_time": 0.024886131286621094, + "step": 10076 + }, + { + "epoch": 1.5374755859375e-05, + "step": 10076, + "training_step_time": 0.14186549186706543 + }, + { + "epoch": 1.537628173828125e-05, + "model_forward_time": 0.02475595474243164, + "step": 10077 + }, + { + "epoch": 1.537628173828125e-05, + "step": 10077, + "training_step_time": 0.11292743682861328 + }, + { + "epoch": 1.53778076171875e-05, + "model_forward_time": 0.024794340133666992, + "step": 10078 + }, + { + "epoch": 1.53778076171875e-05, + "step": 10078, + "training_step_time": 0.17627787590026855 + }, + { + "epoch": 1.537933349609375e-05, + "model_forward_time": 0.023310184478759766, + "step": 10079 + }, + { + "epoch": 1.537933349609375e-05, + "step": 10079, + "training_step_time": 0.20332908630371094 + }, + { + "epoch": 1.5380859375e-05, + "grad_norm": 0.22738216817378998, + "learning_rate": 7.925533364208309e-05, + "loss": 0.0283, + "step": 10080 + }, + { + "epoch": 1.5380859375e-05, + "model_forward_time": 0.0241391658782959, + "step": 10080 + }, + { + "epoch": 1.5380859375e-05, + "step": 10080, + "training_step_time": 0.8756346702575684 + }, + { + "epoch": 1.538238525390625e-05, + "model_forward_time": 0.022922277450561523, + "step": 10081 + }, + { + "epoch": 1.538238525390625e-05, + "step": 10081, + "training_step_time": 0.17417335510253906 + }, + { + "epoch": 1.53839111328125e-05, + "model_forward_time": 0.023819923400878906, + "step": 10082 + }, + { + "epoch": 1.53839111328125e-05, + "step": 10082, + "training_step_time": 0.10905218124389648 + }, + { + "epoch": 1.538543701171875e-05, + "model_forward_time": 0.02519083023071289, + "step": 10083 + }, + { + "epoch": 1.538543701171875e-05, + "step": 10083, + "training_step_time": 0.1093900203704834 + }, + { + "epoch": 1.5386962890625e-05, + "model_forward_time": 0.025296449661254883, + "step": 10084 + }, + { + "epoch": 1.5386962890625e-05, + "step": 10084, + "training_step_time": 0.10810303688049316 + }, + { + "epoch": 1.538848876953125e-05, + "model_forward_time": 0.02500319480895996, + "step": 10085 + }, + { + "epoch": 1.538848876953125e-05, + "step": 10085, + "training_step_time": 0.10812759399414062 + }, + { + "epoch": 1.53900146484375e-05, + "model_forward_time": 0.026134967803955078, + "step": 10086 + }, + { + "epoch": 1.53900146484375e-05, + "step": 10086, + "training_step_time": 0.10683560371398926 + }, + { + "epoch": 1.539154052734375e-05, + "model_forward_time": 0.025140762329101562, + "step": 10087 + }, + { + "epoch": 1.539154052734375e-05, + "step": 10087, + "training_step_time": 0.1069185733795166 + }, + { + "epoch": 1.539306640625e-05, + "model_forward_time": 0.025353193283081055, + "step": 10088 + }, + { + "epoch": 1.539306640625e-05, + "step": 10088, + "training_step_time": 0.10728216171264648 + }, + { + "epoch": 1.539459228515625e-05, + "model_forward_time": 0.025439977645874023, + "step": 10089 + }, + { + "epoch": 1.539459228515625e-05, + "step": 10089, + "training_step_time": 0.10678887367248535 + }, + { + "epoch": 1.53961181640625e-05, + "grad_norm": 0.504446268081665, + "learning_rate": 7.921061948288773e-05, + "loss": 0.0277, + "step": 10090 + }, + { + "epoch": 1.53961181640625e-05, + "model_forward_time": 0.024975061416625977, + "step": 10090 + }, + { + "epoch": 1.53961181640625e-05, + "step": 10090, + "training_step_time": 0.11394882202148438 + }, + { + "epoch": 1.539764404296875e-05, + "model_forward_time": 0.02552032470703125, + "step": 10091 + }, + { + "epoch": 1.539764404296875e-05, + "step": 10091, + "training_step_time": 0.11300373077392578 + }, + { + "epoch": 1.5399169921875e-05, + "model_forward_time": 0.024634599685668945, + "step": 10092 + }, + { + "epoch": 1.5399169921875e-05, + "step": 10092, + "training_step_time": 0.10746908187866211 + }, + { + "epoch": 1.540069580078125e-05, + "model_forward_time": 0.025124788284301758, + "step": 10093 + }, + { + "epoch": 1.540069580078125e-05, + "step": 10093, + "training_step_time": 0.1068117618560791 + }, + { + "epoch": 1.54022216796875e-05, + "model_forward_time": 0.025466442108154297, + "step": 10094 + }, + { + "epoch": 1.54022216796875e-05, + "step": 10094, + "training_step_time": 0.10998392105102539 + }, + { + "epoch": 1.540374755859375e-05, + "model_forward_time": 0.02523493766784668, + "step": 10095 + }, + { + "epoch": 1.540374755859375e-05, + "step": 10095, + "training_step_time": 0.1072075366973877 + }, + { + "epoch": 1.54052734375e-05, + "model_forward_time": 0.025516271591186523, + "step": 10096 + }, + { + "epoch": 1.54052734375e-05, + "step": 10096, + "training_step_time": 0.1092982292175293 + }, + { + "epoch": 1.540679931640625e-05, + "model_forward_time": 0.02397894859313965, + "step": 10097 + }, + { + "epoch": 1.540679931640625e-05, + "step": 10097, + "training_step_time": 0.10802984237670898 + }, + { + "epoch": 1.54083251953125e-05, + "model_forward_time": 0.025351762771606445, + "step": 10098 + }, + { + "epoch": 1.54083251953125e-05, + "step": 10098, + "training_step_time": 0.1107323169708252 + }, + { + "epoch": 1.540985107421875e-05, + "model_forward_time": 0.024886608123779297, + "step": 10099 + }, + { + "epoch": 1.540985107421875e-05, + "step": 10099, + "training_step_time": 0.10786557197570801 + }, + { + "epoch": 1.5411376953125e-05, + "grad_norm": 0.501907467842102, + "learning_rate": 7.916586983003533e-05, + "loss": 0.0291, + "step": 10100 + }, + { + "epoch": 1.5411376953125e-05, + "model_forward_time": 0.024957656860351562, + "step": 10100 + }, + { + "epoch": 1.5411376953125e-05, + "step": 10100, + "training_step_time": 0.11036181449890137 + }, + { + "epoch": 1.541290283203125e-05, + "model_forward_time": 0.024985313415527344, + "step": 10101 + }, + { + "epoch": 1.541290283203125e-05, + "step": 10101, + "training_step_time": 0.10629725456237793 + }, + { + "epoch": 1.54144287109375e-05, + "model_forward_time": 0.024733304977416992, + "step": 10102 + }, + { + "epoch": 1.54144287109375e-05, + "step": 10102, + "training_step_time": 0.1578047275543213 + }, + { + "epoch": 1.541595458984375e-05, + "model_forward_time": 0.025124311447143555, + "step": 10103 + }, + { + "epoch": 1.541595458984375e-05, + "step": 10103, + "training_step_time": 0.16382408142089844 + }, + { + "epoch": 1.541748046875e-05, + "model_forward_time": 0.023879289627075195, + "step": 10104 + }, + { + "epoch": 1.541748046875e-05, + "step": 10104, + "training_step_time": 0.14234328269958496 + }, + { + "epoch": 1.541900634765625e-05, + "model_forward_time": 0.02464437484741211, + "step": 10105 + }, + { + "epoch": 1.541900634765625e-05, + "step": 10105, + "training_step_time": 0.10982894897460938 + }, + { + "epoch": 1.54205322265625e-05, + "model_forward_time": 0.02779102325439453, + "step": 10106 + }, + { + "epoch": 1.54205322265625e-05, + "step": 10106, + "training_step_time": 0.1487720012664795 + }, + { + "epoch": 1.542205810546875e-05, + "model_forward_time": 0.024782657623291016, + "step": 10107 + }, + { + "epoch": 1.542205810546875e-05, + "step": 10107, + "training_step_time": 0.1222691535949707 + }, + { + "epoch": 1.5423583984375e-05, + "model_forward_time": 0.024914264678955078, + "step": 10108 + }, + { + "epoch": 1.5423583984375e-05, + "step": 10108, + "training_step_time": 0.18389654159545898 + }, + { + "epoch": 1.542510986328125e-05, + "model_forward_time": 0.024308204650878906, + "step": 10109 + }, + { + "epoch": 1.542510986328125e-05, + "step": 10109, + "training_step_time": 0.14718270301818848 + }, + { + "epoch": 1.54266357421875e-05, + "grad_norm": 0.3539285659790039, + "learning_rate": 7.912108473790092e-05, + "loss": 0.0199, + "step": 10110 + }, + { + "epoch": 1.54266357421875e-05, + "model_forward_time": 0.0243222713470459, + "step": 10110 + }, + { + "epoch": 1.54266357421875e-05, + "step": 10110, + "training_step_time": 0.21373891830444336 + }, + { + "epoch": 1.542816162109375e-05, + "model_forward_time": 0.024480819702148438, + "step": 10111 + }, + { + "epoch": 1.542816162109375e-05, + "step": 10111, + "training_step_time": 0.10843658447265625 + }, + { + "epoch": 1.54296875e-05, + "model_forward_time": 0.024720430374145508, + "step": 10112 + }, + { + "epoch": 1.54296875e-05, + "step": 10112, + "training_step_time": 0.11596989631652832 + }, + { + "epoch": 1.543121337890625e-05, + "model_forward_time": 0.02765369415283203, + "step": 10113 + }, + { + "epoch": 1.543121337890625e-05, + "step": 10113, + "training_step_time": 0.1116480827331543 + }, + { + "epoch": 1.54327392578125e-05, + "model_forward_time": 0.024901628494262695, + "step": 10114 + }, + { + "epoch": 1.54327392578125e-05, + "step": 10114, + "training_step_time": 0.11186599731445312 + }, + { + "epoch": 1.543426513671875e-05, + "model_forward_time": 0.025295019149780273, + "step": 10115 + }, + { + "epoch": 1.543426513671875e-05, + "step": 10115, + "training_step_time": 0.13511276245117188 + }, + { + "epoch": 1.5435791015625e-05, + "model_forward_time": 0.02487659454345703, + "step": 10116 + }, + { + "epoch": 1.5435791015625e-05, + "step": 10116, + "training_step_time": 0.11017584800720215 + }, + { + "epoch": 1.543731689453125e-05, + "model_forward_time": 0.02500152587890625, + "step": 10117 + }, + { + "epoch": 1.543731689453125e-05, + "step": 10117, + "training_step_time": 0.12564420700073242 + }, + { + "epoch": 1.54388427734375e-05, + "model_forward_time": 0.02510523796081543, + "step": 10118 + }, + { + "epoch": 1.54388427734375e-05, + "step": 10118, + "training_step_time": 0.10876727104187012 + }, + { + "epoch": 1.544036865234375e-05, + "model_forward_time": 0.0252838134765625, + "step": 10119 + }, + { + "epoch": 1.544036865234375e-05, + "step": 10119, + "training_step_time": 0.1938619613647461 + }, + { + "epoch": 1.544189453125e-05, + "grad_norm": 0.3355169892311096, + "learning_rate": 7.907626426090262e-05, + "loss": 0.024, + "step": 10120 + }, + { + "epoch": 1.544189453125e-05, + "model_forward_time": 0.024459362030029297, + "step": 10120 + }, + { + "epoch": 1.544189453125e-05, + "step": 10120, + "training_step_time": 0.12192940711975098 + }, + { + "epoch": 1.544342041015625e-05, + "model_forward_time": 0.02383112907409668, + "step": 10121 + }, + { + "epoch": 1.544342041015625e-05, + "step": 10121, + "training_step_time": 0.1436760425567627 + }, + { + "epoch": 1.54449462890625e-05, + "model_forward_time": 0.0248720645904541, + "step": 10122 + }, + { + "epoch": 1.54449462890625e-05, + "step": 10122, + "training_step_time": 0.1451125144958496 + }, + { + "epoch": 1.544647216796875e-05, + "model_forward_time": 0.02434086799621582, + "step": 10123 + }, + { + "epoch": 1.544647216796875e-05, + "step": 10123, + "training_step_time": 0.21503233909606934 + }, + { + "epoch": 1.5447998046875e-05, + "model_forward_time": 0.02450394630432129, + "step": 10124 + }, + { + "epoch": 1.5447998046875e-05, + "step": 10124, + "training_step_time": 0.11735272407531738 + }, + { + "epoch": 1.544952392578125e-05, + "model_forward_time": 0.023896217346191406, + "step": 10125 + }, + { + "epoch": 1.544952392578125e-05, + "step": 10125, + "training_step_time": 0.11086225509643555 + }, + { + "epoch": 1.54510498046875e-05, + "model_forward_time": 0.02550983428955078, + "step": 10126 + }, + { + "epoch": 1.54510498046875e-05, + "step": 10126, + "training_step_time": 0.1104276180267334 + }, + { + "epoch": 1.545257568359375e-05, + "model_forward_time": 0.025431156158447266, + "step": 10127 + }, + { + "epoch": 1.545257568359375e-05, + "step": 10127, + "training_step_time": 0.11063575744628906 + }, + { + "epoch": 1.54541015625e-05, + "model_forward_time": 0.02494668960571289, + "step": 10128 + }, + { + "epoch": 1.54541015625e-05, + "step": 10128, + "training_step_time": 0.11187005043029785 + }, + { + "epoch": 1.545562744140625e-05, + "model_forward_time": 0.024761676788330078, + "step": 10129 + }, + { + "epoch": 1.545562744140625e-05, + "step": 10129, + "training_step_time": 0.10711669921875 + }, + { + "epoch": 1.54571533203125e-05, + "grad_norm": 0.44187474250793457, + "learning_rate": 7.903140845350153e-05, + "loss": 0.0335, + "step": 10130 + }, + { + "epoch": 1.54571533203125e-05, + "model_forward_time": 0.025022029876708984, + "step": 10130 + }, + { + "epoch": 1.54571533203125e-05, + "step": 10130, + "training_step_time": 0.10806918144226074 + }, + { + "epoch": 1.545867919921875e-05, + "model_forward_time": 0.025018692016601562, + "step": 10131 + }, + { + "epoch": 1.545867919921875e-05, + "step": 10131, + "training_step_time": 0.10681724548339844 + }, + { + "epoch": 1.5460205078125e-05, + "model_forward_time": 0.025160789489746094, + "step": 10132 + }, + { + "epoch": 1.5460205078125e-05, + "step": 10132, + "training_step_time": 0.10776472091674805 + }, + { + "epoch": 1.546173095703125e-05, + "model_forward_time": 0.025214672088623047, + "step": 10133 + }, + { + "epoch": 1.546173095703125e-05, + "step": 10133, + "training_step_time": 0.10763883590698242 + }, + { + "epoch": 1.54632568359375e-05, + "model_forward_time": 0.024786710739135742, + "step": 10134 + }, + { + "epoch": 1.54632568359375e-05, + "step": 10134, + "training_step_time": 0.10617494583129883 + }, + { + "epoch": 1.546478271484375e-05, + "model_forward_time": 0.025217056274414062, + "step": 10135 + }, + { + "epoch": 1.546478271484375e-05, + "step": 10135, + "training_step_time": 0.11153984069824219 + }, + { + "epoch": 1.546630859375e-05, + "model_forward_time": 0.02510857582092285, + "step": 10136 + }, + { + "epoch": 1.546630859375e-05, + "step": 10136, + "training_step_time": 0.11200785636901855 + }, + { + "epoch": 1.546783447265625e-05, + "model_forward_time": 0.024888277053833008, + "step": 10137 + }, + { + "epoch": 1.546783447265625e-05, + "step": 10137, + "training_step_time": 0.10623979568481445 + }, + { + "epoch": 1.54693603515625e-05, + "model_forward_time": 0.02521514892578125, + "step": 10138 + }, + { + "epoch": 1.54693603515625e-05, + "step": 10138, + "training_step_time": 0.10669827461242676 + }, + { + "epoch": 1.547088623046875e-05, + "model_forward_time": 0.02492666244506836, + "step": 10139 + }, + { + "epoch": 1.547088623046875e-05, + "step": 10139, + "training_step_time": 0.1093299388885498 + }, + { + "epoch": 1.5472412109375e-05, + "grad_norm": 0.4617132544517517, + "learning_rate": 7.898651737020166e-05, + "loss": 0.0264, + "step": 10140 + }, + { + "epoch": 1.5472412109375e-05, + "model_forward_time": 0.024262428283691406, + "step": 10140 + }, + { + "epoch": 1.5472412109375e-05, + "step": 10140, + "training_step_time": 0.10864901542663574 + }, + { + "epoch": 1.547393798828125e-05, + "model_forward_time": 0.024130821228027344, + "step": 10141 + }, + { + "epoch": 1.547393798828125e-05, + "step": 10141, + "training_step_time": 0.12260818481445312 + }, + { + "epoch": 1.54754638671875e-05, + "model_forward_time": 0.024144411087036133, + "step": 10142 + }, + { + "epoch": 1.54754638671875e-05, + "step": 10142, + "training_step_time": 0.12442755699157715 + }, + { + "epoch": 1.547698974609375e-05, + "model_forward_time": 0.024448394775390625, + "step": 10143 + }, + { + "epoch": 1.547698974609375e-05, + "step": 10143, + "training_step_time": 0.12493252754211426 + }, + { + "epoch": 1.5478515625e-05, + "model_forward_time": 0.023976802825927734, + "step": 10144 + }, + { + "epoch": 1.5478515625e-05, + "step": 10144, + "training_step_time": 0.12100362777709961 + }, + { + "epoch": 1.548004150390625e-05, + "model_forward_time": 0.023984909057617188, + "step": 10145 + }, + { + "epoch": 1.548004150390625e-05, + "step": 10145, + "training_step_time": 0.11830854415893555 + }, + { + "epoch": 1.54815673828125e-05, + "model_forward_time": 0.02394866943359375, + "step": 10146 + }, + { + "epoch": 1.54815673828125e-05, + "step": 10146, + "training_step_time": 0.11339521408081055 + }, + { + "epoch": 1.548309326171875e-05, + "model_forward_time": 0.025025129318237305, + "step": 10147 + }, + { + "epoch": 1.548309326171875e-05, + "step": 10147, + "training_step_time": 0.1125185489654541 + }, + { + "epoch": 1.5484619140625e-05, + "model_forward_time": 0.025191783905029297, + "step": 10148 + }, + { + "epoch": 1.5484619140625e-05, + "step": 10148, + "training_step_time": 0.11363911628723145 + }, + { + "epoch": 1.548614501953125e-05, + "model_forward_time": 0.025202512741088867, + "step": 10149 + }, + { + "epoch": 1.548614501953125e-05, + "step": 10149, + "training_step_time": 0.11296248435974121 + }, + { + "epoch": 1.54876708984375e-05, + "grad_norm": 0.3276233673095703, + "learning_rate": 7.894159106554997e-05, + "loss": 0.0357, + "step": 10150 + }, + { + "epoch": 1.54876708984375e-05, + "model_forward_time": 0.024685382843017578, + "step": 10150 + }, + { + "epoch": 1.54876708984375e-05, + "step": 10150, + "training_step_time": 0.10923314094543457 + }, + { + "epoch": 1.548919677734375e-05, + "model_forward_time": 0.025930404663085938, + "step": 10151 + }, + { + "epoch": 1.548919677734375e-05, + "step": 10151, + "training_step_time": 0.10867023468017578 + }, + { + "epoch": 1.549072265625e-05, + "model_forward_time": 0.025136947631835938, + "step": 10152 + }, + { + "epoch": 1.549072265625e-05, + "step": 10152, + "training_step_time": 0.14772748947143555 + }, + { + "epoch": 1.549224853515625e-05, + "model_forward_time": 0.02504134178161621, + "step": 10153 + }, + { + "epoch": 1.549224853515625e-05, + "step": 10153, + "training_step_time": 0.15914702415466309 + }, + { + "epoch": 1.54937744140625e-05, + "model_forward_time": 0.024249792098999023, + "step": 10154 + }, + { + "epoch": 1.54937744140625e-05, + "step": 10154, + "training_step_time": 0.11568784713745117 + }, + { + "epoch": 1.549530029296875e-05, + "model_forward_time": 0.02451610565185547, + "step": 10155 + }, + { + "epoch": 1.549530029296875e-05, + "step": 10155, + "training_step_time": 0.13522982597351074 + }, + { + "epoch": 1.5496826171875e-05, + "model_forward_time": 0.02516031265258789, + "step": 10156 + }, + { + "epoch": 1.5496826171875e-05, + "step": 10156, + "training_step_time": 0.20038580894470215 + }, + { + "epoch": 1.549835205078125e-05, + "model_forward_time": 0.024537086486816406, + "step": 10157 + }, + { + "epoch": 1.549835205078125e-05, + "step": 10157, + "training_step_time": 0.107574462890625 + }, + { + "epoch": 1.54998779296875e-05, + "model_forward_time": 0.02442193031311035, + "step": 10158 + }, + { + "epoch": 1.54998779296875e-05, + "step": 10158, + "training_step_time": 0.10739016532897949 + }, + { + "epoch": 1.550140380859375e-05, + "model_forward_time": 0.025618553161621094, + "step": 10159 + }, + { + "epoch": 1.550140380859375e-05, + "step": 10159, + "training_step_time": 0.10900402069091797 + }, + { + "epoch": 1.55029296875e-05, + "grad_norm": 0.3725495934486389, + "learning_rate": 7.88966295941361e-05, + "loss": 0.0291, + "step": 10160 + }, + { + "epoch": 1.55029296875e-05, + "model_forward_time": 0.024832725524902344, + "step": 10160 + }, + { + "epoch": 1.55029296875e-05, + "step": 10160, + "training_step_time": 0.10961580276489258 + }, + { + "epoch": 1.550445556640625e-05, + "model_forward_time": 0.024940013885498047, + "step": 10161 + }, + { + "epoch": 1.550445556640625e-05, + "step": 10161, + "training_step_time": 0.12753939628601074 + }, + { + "epoch": 1.55059814453125e-05, + "model_forward_time": 0.024743318557739258, + "step": 10162 + }, + { + "epoch": 1.55059814453125e-05, + "step": 10162, + "training_step_time": 0.11195707321166992 + }, + { + "epoch": 1.550750732421875e-05, + "model_forward_time": 0.025281429290771484, + "step": 10163 + }, + { + "epoch": 1.550750732421875e-05, + "step": 10163, + "training_step_time": 0.11185574531555176 + }, + { + "epoch": 1.5509033203125e-05, + "model_forward_time": 0.024654626846313477, + "step": 10164 + }, + { + "epoch": 1.5509033203125e-05, + "step": 10164, + "training_step_time": 0.11767244338989258 + }, + { + "epoch": 1.551055908203125e-05, + "model_forward_time": 0.025166749954223633, + "step": 10165 + }, + { + "epoch": 1.551055908203125e-05, + "step": 10165, + "training_step_time": 0.21035242080688477 + }, + { + "epoch": 1.55120849609375e-05, + "model_forward_time": 0.024691343307495117, + "step": 10166 + }, + { + "epoch": 1.55120849609375e-05, + "step": 10166, + "training_step_time": 0.11823272705078125 + }, + { + "epoch": 1.551361083984375e-05, + "model_forward_time": 0.024360179901123047, + "step": 10167 + }, + { + "epoch": 1.551361083984375e-05, + "step": 10167, + "training_step_time": 0.16715764999389648 + }, + { + "epoch": 1.551513671875e-05, + "model_forward_time": 0.025134563446044922, + "step": 10168 + }, + { + "epoch": 1.551513671875e-05, + "step": 10168, + "training_step_time": 0.14338970184326172 + }, + { + "epoch": 1.551666259765625e-05, + "model_forward_time": 0.024564743041992188, + "step": 10169 + }, + { + "epoch": 1.551666259765625e-05, + "step": 10169, + "training_step_time": 0.22011566162109375 + }, + { + "epoch": 1.55181884765625e-05, + "grad_norm": 0.2825896739959717, + "learning_rate": 7.88516330105925e-05, + "loss": 0.0314, + "step": 10170 + }, + { + "epoch": 1.55181884765625e-05, + "model_forward_time": 0.024059534072875977, + "step": 10170 + }, + { + "epoch": 1.55181884765625e-05, + "step": 10170, + "training_step_time": 0.10860753059387207 + }, + { + "epoch": 1.551971435546875e-05, + "model_forward_time": 0.02482771873474121, + "step": 10171 + }, + { + "epoch": 1.551971435546875e-05, + "step": 10171, + "training_step_time": 0.10794401168823242 + }, + { + "epoch": 1.5521240234375e-05, + "model_forward_time": 0.024988889694213867, + "step": 10172 + }, + { + "epoch": 1.5521240234375e-05, + "step": 10172, + "training_step_time": 0.12369894981384277 + }, + { + "epoch": 1.552276611328125e-05, + "model_forward_time": 0.025028705596923828, + "step": 10173 + }, + { + "epoch": 1.552276611328125e-05, + "step": 10173, + "training_step_time": 0.10767269134521484 + }, + { + "epoch": 1.55242919921875e-05, + "model_forward_time": 0.025082111358642578, + "step": 10174 + }, + { + "epoch": 1.55242919921875e-05, + "step": 10174, + "training_step_time": 0.10709834098815918 + }, + { + "epoch": 1.552581787109375e-05, + "model_forward_time": 0.025279998779296875, + "step": 10175 + }, + { + "epoch": 1.552581787109375e-05, + "step": 10175, + "training_step_time": 0.10889506340026855 + }, + { + "epoch": 1.552734375e-05, + "model_forward_time": 0.024627685546875, + "step": 10176 + }, + { + "epoch": 1.552734375e-05, + "step": 10176, + "training_step_time": 0.11142921447753906 + }, + { + "epoch": 1.552886962890625e-05, + "model_forward_time": 0.02795696258544922, + "step": 10177 + }, + { + "epoch": 1.552886962890625e-05, + "step": 10177, + "training_step_time": 0.11047983169555664 + }, + { + "epoch": 1.55303955078125e-05, + "model_forward_time": 0.02510523796081543, + "step": 10178 + }, + { + "epoch": 1.55303955078125e-05, + "step": 10178, + "training_step_time": 0.11141157150268555 + }, + { + "epoch": 1.553192138671875e-05, + "model_forward_time": 0.025104284286499023, + "step": 10179 + }, + { + "epoch": 1.553192138671875e-05, + "step": 10179, + "training_step_time": 0.11028814315795898 + }, + { + "epoch": 1.5533447265625e-05, + "grad_norm": 0.43389415740966797, + "learning_rate": 7.880660136959428e-05, + "loss": 0.0361, + "step": 10180 + }, + { + "epoch": 1.5533447265625e-05, + "model_forward_time": 0.02452397346496582, + "step": 10180 + }, + { + "epoch": 1.5533447265625e-05, + "step": 10180, + "training_step_time": 0.10983538627624512 + }, + { + "epoch": 1.553497314453125e-05, + "model_forward_time": 0.025237321853637695, + "step": 10181 + }, + { + "epoch": 1.553497314453125e-05, + "step": 10181, + "training_step_time": 0.10766911506652832 + }, + { + "epoch": 1.55364990234375e-05, + "model_forward_time": 0.025011539459228516, + "step": 10182 + }, + { + "epoch": 1.55364990234375e-05, + "step": 10182, + "training_step_time": 0.1086127758026123 + }, + { + "epoch": 1.553802490234375e-05, + "model_forward_time": 0.025360107421875, + "step": 10183 + }, + { + "epoch": 1.553802490234375e-05, + "step": 10183, + "training_step_time": 0.11029529571533203 + }, + { + "epoch": 1.553955078125e-05, + "model_forward_time": 0.025107383728027344, + "step": 10184 + }, + { + "epoch": 1.553955078125e-05, + "step": 10184, + "training_step_time": 0.11351752281188965 + }, + { + "epoch": 1.554107666015625e-05, + "model_forward_time": 0.024702072143554688, + "step": 10185 + }, + { + "epoch": 1.554107666015625e-05, + "step": 10185, + "training_step_time": 0.10797858238220215 + }, + { + "epoch": 1.55426025390625e-05, + "model_forward_time": 0.02491450309753418, + "step": 10186 + }, + { + "epoch": 1.55426025390625e-05, + "step": 10186, + "training_step_time": 0.1082754135131836 + }, + { + "epoch": 1.554412841796875e-05, + "model_forward_time": 0.0250091552734375, + "step": 10187 + }, + { + "epoch": 1.554412841796875e-05, + "step": 10187, + "training_step_time": 0.10904169082641602 + }, + { + "epoch": 1.5545654296875e-05, + "model_forward_time": 0.025329113006591797, + "step": 10188 + }, + { + "epoch": 1.5545654296875e-05, + "step": 10188, + "training_step_time": 0.1121213436126709 + }, + { + "epoch": 1.554718017578125e-05, + "model_forward_time": 0.024982690811157227, + "step": 10189 + }, + { + "epoch": 1.554718017578125e-05, + "step": 10189, + "training_step_time": 0.10792684555053711 + }, + { + "epoch": 1.55487060546875e-05, + "grad_norm": 0.4379105865955353, + "learning_rate": 7.87615347258591e-05, + "loss": 0.0403, + "step": 10190 + }, + { + "epoch": 1.55487060546875e-05, + "model_forward_time": 0.02482151985168457, + "step": 10190 + }, + { + "epoch": 1.55487060546875e-05, + "step": 10190, + "training_step_time": 0.10898780822753906 + }, + { + "epoch": 1.555023193359375e-05, + "model_forward_time": 0.025065183639526367, + "step": 10191 + }, + { + "epoch": 1.555023193359375e-05, + "step": 10191, + "training_step_time": 0.11155295372009277 + }, + { + "epoch": 1.55517578125e-05, + "model_forward_time": 0.024828672409057617, + "step": 10192 + }, + { + "epoch": 1.55517578125e-05, + "step": 10192, + "training_step_time": 0.11032843589782715 + }, + { + "epoch": 1.555328369140625e-05, + "model_forward_time": 0.024771451950073242, + "step": 10193 + }, + { + "epoch": 1.555328369140625e-05, + "step": 10193, + "training_step_time": 0.1096959114074707 + }, + { + "epoch": 1.55548095703125e-05, + "model_forward_time": 0.024700164794921875, + "step": 10194 + }, + { + "epoch": 1.55548095703125e-05, + "step": 10194, + "training_step_time": 0.10805416107177734 + }, + { + "epoch": 1.555633544921875e-05, + "model_forward_time": 0.025272846221923828, + "step": 10195 + }, + { + "epoch": 1.555633544921875e-05, + "step": 10195, + "training_step_time": 0.10793685913085938 + }, + { + "epoch": 1.5557861328125e-05, + "model_forward_time": 0.025032520294189453, + "step": 10196 + }, + { + "epoch": 1.5557861328125e-05, + "step": 10196, + "training_step_time": 0.11159372329711914 + }, + { + "epoch": 1.555938720703125e-05, + "model_forward_time": 0.024997472763061523, + "step": 10197 + }, + { + "epoch": 1.555938720703125e-05, + "step": 10197, + "training_step_time": 0.1065070629119873 + }, + { + "epoch": 1.55609130859375e-05, + "model_forward_time": 0.024725675582885742, + "step": 10198 + }, + { + "epoch": 1.55609130859375e-05, + "step": 10198, + "training_step_time": 0.14882612228393555 + }, + { + "epoch": 1.556243896484375e-05, + "model_forward_time": 0.02486896514892578, + "step": 10199 + }, + { + "epoch": 1.556243896484375e-05, + "step": 10199, + "training_step_time": 0.1683499813079834 + }, + { + "epoch": 1.556396484375e-05, + "grad_norm": 0.33704426884651184, + "learning_rate": 7.871643313414718e-05, + "loss": 0.0319, + "step": 10200 + }, + { + "epoch": 1.556396484375e-05, + "model_forward_time": 0.024451494216918945, + "step": 10200 + }, + { + "epoch": 1.556396484375e-05, + "step": 10200, + "training_step_time": 0.10930633544921875 + }, + { + "epoch": 1.556549072265625e-05, + "model_forward_time": 0.024338722229003906, + "step": 10201 + }, + { + "epoch": 1.556549072265625e-05, + "step": 10201, + "training_step_time": 0.13043999671936035 + }, + { + "epoch": 1.55670166015625e-05, + "model_forward_time": 0.025401592254638672, + "step": 10202 + }, + { + "epoch": 1.55670166015625e-05, + "step": 10202, + "training_step_time": 0.20703792572021484 + }, + { + "epoch": 1.556854248046875e-05, + "model_forward_time": 0.024214982986450195, + "step": 10203 + }, + { + "epoch": 1.556854248046875e-05, + "step": 10203, + "training_step_time": 0.11046314239501953 + }, + { + "epoch": 1.5570068359375e-05, + "model_forward_time": 0.025408267974853516, + "step": 10204 + }, + { + "epoch": 1.5570068359375e-05, + "step": 10204, + "training_step_time": 0.1057283878326416 + }, + { + "epoch": 1.557159423828125e-05, + "model_forward_time": 0.025188922882080078, + "step": 10205 + }, + { + "epoch": 1.557159423828125e-05, + "step": 10205, + "training_step_time": 0.10866475105285645 + }, + { + "epoch": 1.55731201171875e-05, + "model_forward_time": 0.025022268295288086, + "step": 10206 + }, + { + "epoch": 1.55731201171875e-05, + "step": 10206, + "training_step_time": 0.1103060245513916 + }, + { + "epoch": 1.557464599609375e-05, + "model_forward_time": 0.024996280670166016, + "step": 10207 + }, + { + "epoch": 1.557464599609375e-05, + "step": 10207, + "training_step_time": 0.1293339729309082 + }, + { + "epoch": 1.5576171875e-05, + "model_forward_time": 0.0248410701751709, + "step": 10208 + }, + { + "epoch": 1.5576171875e-05, + "step": 10208, + "training_step_time": 0.1147916316986084 + }, + { + "epoch": 1.557769775390625e-05, + "model_forward_time": 0.0248110294342041, + "step": 10209 + }, + { + "epoch": 1.557769775390625e-05, + "step": 10209, + "training_step_time": 0.1178891658782959 + }, + { + "epoch": 1.55792236328125e-05, + "grad_norm": 0.41021430492401123, + "learning_rate": 7.867129664926123e-05, + "loss": 0.0298, + "step": 10210 + }, + { + "epoch": 1.55792236328125e-05, + "model_forward_time": 0.025182008743286133, + "step": 10210 + }, + { + "epoch": 1.55792236328125e-05, + "step": 10210, + "training_step_time": 0.16698932647705078 + }, + { + "epoch": 1.558074951171875e-05, + "model_forward_time": 0.024476289749145508, + "step": 10211 + }, + { + "epoch": 1.558074951171875e-05, + "step": 10211, + "training_step_time": 0.16120672225952148 + }, + { + "epoch": 1.5582275390625e-05, + "model_forward_time": 0.02428150177001953, + "step": 10212 + }, + { + "epoch": 1.5582275390625e-05, + "step": 10212, + "training_step_time": 0.2140662670135498 + }, + { + "epoch": 1.558380126953125e-05, + "model_forward_time": 0.02470541000366211, + "step": 10213 + }, + { + "epoch": 1.558380126953125e-05, + "step": 10213, + "training_step_time": 0.17453718185424805 + }, + { + "epoch": 1.55853271484375e-05, + "model_forward_time": 0.02372288703918457, + "step": 10214 + }, + { + "epoch": 1.55853271484375e-05, + "step": 10214, + "training_step_time": 0.11080789566040039 + }, + { + "epoch": 1.558685302734375e-05, + "model_forward_time": 0.0245511531829834, + "step": 10215 + }, + { + "epoch": 1.558685302734375e-05, + "step": 10215, + "training_step_time": 0.12552475929260254 + }, + { + "epoch": 1.558837890625e-05, + "model_forward_time": 0.02731609344482422, + "step": 10216 + }, + { + "epoch": 1.558837890625e-05, + "step": 10216, + "training_step_time": 0.13292694091796875 + }, + { + "epoch": 1.558990478515625e-05, + "model_forward_time": 0.024525880813598633, + "step": 10217 + }, + { + "epoch": 1.558990478515625e-05, + "step": 10217, + "training_step_time": 0.11364865303039551 + }, + { + "epoch": 1.55914306640625e-05, + "model_forward_time": 0.025045156478881836, + "step": 10218 + }, + { + "epoch": 1.55914306640625e-05, + "step": 10218, + "training_step_time": 0.11681747436523438 + }, + { + "epoch": 1.559295654296875e-05, + "model_forward_time": 0.02478194236755371, + "step": 10219 + }, + { + "epoch": 1.559295654296875e-05, + "step": 10219, + "training_step_time": 0.10770082473754883 + }, + { + "epoch": 1.5594482421875e-05, + "grad_norm": 0.3275561034679413, + "learning_rate": 7.862612532604632e-05, + "loss": 0.0239, + "step": 10220 + }, + { + "epoch": 1.5594482421875e-05, + "model_forward_time": 0.024823665618896484, + "step": 10220 + }, + { + "epoch": 1.5594482421875e-05, + "step": 10220, + "training_step_time": 0.10671210289001465 + }, + { + "epoch": 1.559600830078125e-05, + "model_forward_time": 0.02500629425048828, + "step": 10221 + }, + { + "epoch": 1.559600830078125e-05, + "step": 10221, + "training_step_time": 0.10480308532714844 + }, + { + "epoch": 1.55975341796875e-05, + "model_forward_time": 0.02762746810913086, + "step": 10222 + }, + { + "epoch": 1.55975341796875e-05, + "step": 10222, + "training_step_time": 0.10710310935974121 + }, + { + "epoch": 1.559906005859375e-05, + "model_forward_time": 0.024909019470214844, + "step": 10223 + }, + { + "epoch": 1.559906005859375e-05, + "step": 10223, + "training_step_time": 0.10817360877990723 + }, + { + "epoch": 1.56005859375e-05, + "model_forward_time": 0.024895906448364258, + "step": 10224 + }, + { + "epoch": 1.56005859375e-05, + "step": 10224, + "training_step_time": 0.11210775375366211 + }, + { + "epoch": 1.560211181640625e-05, + "model_forward_time": 0.025146484375, + "step": 10225 + }, + { + "epoch": 1.560211181640625e-05, + "step": 10225, + "training_step_time": 0.17537879943847656 + }, + { + "epoch": 1.56036376953125e-05, + "model_forward_time": 0.024442672729492188, + "step": 10226 + }, + { + "epoch": 1.56036376953125e-05, + "step": 10226, + "training_step_time": 0.18540072441101074 + }, + { + "epoch": 1.560516357421875e-05, + "model_forward_time": 0.024337053298950195, + "step": 10227 + }, + { + "epoch": 1.560516357421875e-05, + "step": 10227, + "training_step_time": 0.18052291870117188 + }, + { + "epoch": 1.5606689453125e-05, + "model_forward_time": 0.0257570743560791, + "step": 10228 + }, + { + "epoch": 1.5606689453125e-05, + "step": 10228, + "training_step_time": 0.16207289695739746 + }, + { + "epoch": 1.560821533203125e-05, + "model_forward_time": 0.024485111236572266, + "step": 10229 + }, + { + "epoch": 1.560821533203125e-05, + "step": 10229, + "training_step_time": 0.15619993209838867 + }, + { + "epoch": 1.56097412109375e-05, + "grad_norm": 0.5337497591972351, + "learning_rate": 7.858091921938988e-05, + "loss": 0.0314, + "step": 10230 + }, + { + "epoch": 1.56097412109375e-05, + "model_forward_time": 0.02418208122253418, + "step": 10230 + }, + { + "epoch": 1.56097412109375e-05, + "step": 10230, + "training_step_time": 0.14315152168273926 + }, + { + "epoch": 1.561126708984375e-05, + "model_forward_time": 0.023787736892700195, + "step": 10231 + }, + { + "epoch": 1.561126708984375e-05, + "step": 10231, + "training_step_time": 0.12587380409240723 + }, + { + "epoch": 1.561279296875e-05, + "model_forward_time": 0.024656295776367188, + "step": 10232 + }, + { + "epoch": 1.561279296875e-05, + "step": 10232, + "training_step_time": 0.1253657341003418 + }, + { + "epoch": 1.561431884765625e-05, + "model_forward_time": 0.02521514892578125, + "step": 10233 + }, + { + "epoch": 1.561431884765625e-05, + "step": 10233, + "training_step_time": 0.12489080429077148 + }, + { + "epoch": 1.56158447265625e-05, + "model_forward_time": 0.0247342586517334, + "step": 10234 + }, + { + "epoch": 1.56158447265625e-05, + "step": 10234, + "training_step_time": 0.11517691612243652 + }, + { + "epoch": 1.561737060546875e-05, + "model_forward_time": 0.02461695671081543, + "step": 10235 + }, + { + "epoch": 1.561737060546875e-05, + "step": 10235, + "training_step_time": 0.11486172676086426 + }, + { + "epoch": 1.5618896484375e-05, + "model_forward_time": 0.0251922607421875, + "step": 10236 + }, + { + "epoch": 1.5618896484375e-05, + "step": 10236, + "training_step_time": 0.11312198638916016 + }, + { + "epoch": 1.562042236328125e-05, + "model_forward_time": 0.02558135986328125, + "step": 10237 + }, + { + "epoch": 1.562042236328125e-05, + "step": 10237, + "training_step_time": 0.11275792121887207 + }, + { + "epoch": 1.56219482421875e-05, + "model_forward_time": 0.025443077087402344, + "step": 10238 + }, + { + "epoch": 1.56219482421875e-05, + "step": 10238, + "training_step_time": 0.11144113540649414 + }, + { + "epoch": 1.562347412109375e-05, + "model_forward_time": 0.02516460418701172, + "step": 10239 + }, + { + "epoch": 1.562347412109375e-05, + "step": 10239, + "training_step_time": 0.108642578125 + }, + { + "epoch": 1.5625e-05, + "grad_norm": 0.21214427053928375, + "learning_rate": 7.85356783842216e-05, + "loss": 0.0287, + "step": 10240 + }, + { + "epoch": 1.5625e-05, + "model_forward_time": 0.02578902244567871, + "step": 10240 + }, + { + "epoch": 1.5625e-05, + "step": 10240, + "training_step_time": 0.10879278182983398 + }, + { + "epoch": 1.562652587890625e-05, + "model_forward_time": 0.024303674697875977, + "step": 10241 + }, + { + "epoch": 1.562652587890625e-05, + "step": 10241, + "training_step_time": 0.1445763111114502 + }, + { + "epoch": 1.56280517578125e-05, + "model_forward_time": 0.02421712875366211, + "step": 10242 + }, + { + "epoch": 1.56280517578125e-05, + "step": 10242, + "training_step_time": 0.16765046119689941 + }, + { + "epoch": 1.562957763671875e-05, + "model_forward_time": 0.02426433563232422, + "step": 10243 + }, + { + "epoch": 1.562957763671875e-05, + "step": 10243, + "training_step_time": 0.11311125755310059 + }, + { + "epoch": 1.5631103515625e-05, + "model_forward_time": 0.024117469787597656, + "step": 10244 + }, + { + "epoch": 1.5631103515625e-05, + "step": 10244, + "training_step_time": 0.1351630687713623 + }, + { + "epoch": 1.563262939453125e-05, + "model_forward_time": 0.025215864181518555, + "step": 10245 + }, + { + "epoch": 1.563262939453125e-05, + "step": 10245, + "training_step_time": 0.20808887481689453 + }, + { + "epoch": 1.56341552734375e-05, + "model_forward_time": 0.02449488639831543, + "step": 10246 + }, + { + "epoch": 1.56341552734375e-05, + "step": 10246, + "training_step_time": 0.1065220832824707 + }, + { + "epoch": 1.563568115234375e-05, + "model_forward_time": 0.024907588958740234, + "step": 10247 + }, + { + "epoch": 1.563568115234375e-05, + "step": 10247, + "training_step_time": 0.10624432563781738 + }, + { + "epoch": 1.563720703125e-05, + "model_forward_time": 0.02440476417541504, + "step": 10248 + }, + { + "epoch": 1.563720703125e-05, + "step": 10248, + "training_step_time": 0.16401243209838867 + }, + { + "epoch": 1.563873291015625e-05, + "model_forward_time": 0.02478194236755371, + "step": 10249 + }, + { + "epoch": 1.563873291015625e-05, + "step": 10249, + "training_step_time": 0.12877440452575684 + }, + { + "epoch": 1.56402587890625e-05, + "grad_norm": 0.3400780260562897, + "learning_rate": 7.849040287551331e-05, + "loss": 0.0263, + "step": 10250 + }, + { + "epoch": 1.56402587890625e-05, + "model_forward_time": 0.024584293365478516, + "step": 10250 + }, + { + "epoch": 1.56402587890625e-05, + "step": 10250, + "training_step_time": 0.20516180992126465 + }, + { + "epoch": 1.564178466796875e-05, + "model_forward_time": 0.024913311004638672, + "step": 10251 + }, + { + "epoch": 1.564178466796875e-05, + "step": 10251, + "training_step_time": 0.10778260231018066 + }, + { + "epoch": 1.5643310546875e-05, + "model_forward_time": 0.02475714683532715, + "step": 10252 + }, + { + "epoch": 1.5643310546875e-05, + "step": 10252, + "training_step_time": 0.11064982414245605 + }, + { + "epoch": 1.564483642578125e-05, + "model_forward_time": 0.025543212890625, + "step": 10253 + }, + { + "epoch": 1.564483642578125e-05, + "step": 10253, + "training_step_time": 0.12283635139465332 + }, + { + "epoch": 1.56463623046875e-05, + "model_forward_time": 0.02518606185913086, + "step": 10254 + }, + { + "epoch": 1.56463623046875e-05, + "step": 10254, + "training_step_time": 0.1218266487121582 + }, + { + "epoch": 1.564788818359375e-05, + "model_forward_time": 0.025127887725830078, + "step": 10255 + }, + { + "epoch": 1.564788818359375e-05, + "step": 10255, + "training_step_time": 0.12517857551574707 + }, + { + "epoch": 1.56494140625e-05, + "model_forward_time": 0.025538921356201172, + "step": 10256 + }, + { + "epoch": 1.56494140625e-05, + "step": 10256, + "training_step_time": 0.12697267532348633 + }, + { + "epoch": 1.565093994140625e-05, + "model_forward_time": 0.0253446102142334, + "step": 10257 + }, + { + "epoch": 1.565093994140625e-05, + "step": 10257, + "training_step_time": 0.15961241722106934 + }, + { + "epoch": 1.56524658203125e-05, + "model_forward_time": 0.02763509750366211, + "step": 10258 + }, + { + "epoch": 1.56524658203125e-05, + "step": 10258, + "training_step_time": 0.17592763900756836 + }, + { + "epoch": 1.565399169921875e-05, + "model_forward_time": 0.024128198623657227, + "step": 10259 + }, + { + "epoch": 1.565399169921875e-05, + "step": 10259, + "training_step_time": 0.12496781349182129 + }, + { + "epoch": 1.5655517578125e-05, + "grad_norm": 0.3840779960155487, + "learning_rate": 7.844509274827907e-05, + "loss": 0.0207, + "step": 10260 + }, + { + "epoch": 1.5655517578125e-05, + "model_forward_time": 0.023936748504638672, + "step": 10260 + }, + { + "epoch": 1.5655517578125e-05, + "step": 10260, + "training_step_time": 0.10991787910461426 + }, + { + "epoch": 1.565704345703125e-05, + "model_forward_time": 0.024654865264892578, + "step": 10261 + }, + { + "epoch": 1.565704345703125e-05, + "step": 10261, + "training_step_time": 0.10660600662231445 + }, + { + "epoch": 1.56585693359375e-05, + "model_forward_time": 0.026385068893432617, + "step": 10262 + }, + { + "epoch": 1.56585693359375e-05, + "step": 10262, + "training_step_time": 0.11168503761291504 + }, + { + "epoch": 1.566009521484375e-05, + "model_forward_time": 0.02561783790588379, + "step": 10263 + }, + { + "epoch": 1.566009521484375e-05, + "step": 10263, + "training_step_time": 0.10788774490356445 + }, + { + "epoch": 1.566162109375e-05, + "model_forward_time": 0.025317907333374023, + "step": 10264 + }, + { + "epoch": 1.566162109375e-05, + "step": 10264, + "training_step_time": 0.10884642601013184 + }, + { + "epoch": 1.566314697265625e-05, + "model_forward_time": 0.027535438537597656, + "step": 10265 + }, + { + "epoch": 1.566314697265625e-05, + "step": 10265, + "training_step_time": 0.10944414138793945 + }, + { + "epoch": 1.56646728515625e-05, + "model_forward_time": 0.02522754669189453, + "step": 10266 + }, + { + "epoch": 1.56646728515625e-05, + "step": 10266, + "training_step_time": 0.11238646507263184 + }, + { + "epoch": 1.566619873046875e-05, + "model_forward_time": 0.025668621063232422, + "step": 10267 + }, + { + "epoch": 1.566619873046875e-05, + "step": 10267, + "training_step_time": 0.11189842224121094 + }, + { + "epoch": 1.5667724609375e-05, + "model_forward_time": 0.025130271911621094, + "step": 10268 + }, + { + "epoch": 1.5667724609375e-05, + "step": 10268, + "training_step_time": 0.10653567314147949 + }, + { + "epoch": 1.566925048828125e-05, + "model_forward_time": 0.025359153747558594, + "step": 10269 + }, + { + "epoch": 1.566925048828125e-05, + "step": 10269, + "training_step_time": 0.1083986759185791 + }, + { + "epoch": 1.56707763671875e-05, + "grad_norm": 0.5742058157920837, + "learning_rate": 7.839974805757496e-05, + "loss": 0.0311, + "step": 10270 + }, + { + "epoch": 1.56707763671875e-05, + "model_forward_time": 0.02496194839477539, + "step": 10270 + }, + { + "epoch": 1.56707763671875e-05, + "step": 10270, + "training_step_time": 0.11120343208312988 + }, + { + "epoch": 1.567230224609375e-05, + "model_forward_time": 0.02530956268310547, + "step": 10271 + }, + { + "epoch": 1.567230224609375e-05, + "step": 10271, + "training_step_time": 0.11691498756408691 + }, + { + "epoch": 1.5673828125e-05, + "model_forward_time": 0.02495884895324707, + "step": 10272 + }, + { + "epoch": 1.5673828125e-05, + "step": 10272, + "training_step_time": 0.11290431022644043 + }, + { + "epoch": 1.567535400390625e-05, + "model_forward_time": 0.025396347045898438, + "step": 10273 + }, + { + "epoch": 1.567535400390625e-05, + "step": 10273, + "training_step_time": 0.11287784576416016 + }, + { + "epoch": 1.56768798828125e-05, + "model_forward_time": 0.02756333351135254, + "step": 10274 + }, + { + "epoch": 1.56768798828125e-05, + "step": 10274, + "training_step_time": 0.11761093139648438 + }, + { + "epoch": 1.567840576171875e-05, + "model_forward_time": 0.024433374404907227, + "step": 10275 + }, + { + "epoch": 1.567840576171875e-05, + "step": 10275, + "training_step_time": 0.1141507625579834 + }, + { + "epoch": 1.5679931640625e-05, + "model_forward_time": 0.025264263153076172, + "step": 10276 + }, + { + "epoch": 1.5679931640625e-05, + "step": 10276, + "training_step_time": 0.11176371574401855 + }, + { + "epoch": 1.568145751953125e-05, + "model_forward_time": 0.024656057357788086, + "step": 10277 + }, + { + "epoch": 1.568145751953125e-05, + "step": 10277, + "training_step_time": 0.11355209350585938 + }, + { + "epoch": 1.56829833984375e-05, + "model_forward_time": 0.025318384170532227, + "step": 10278 + }, + { + "epoch": 1.56829833984375e-05, + "step": 10278, + "training_step_time": 0.11013364791870117 + }, + { + "epoch": 1.568450927734375e-05, + "model_forward_time": 0.025177478790283203, + "step": 10279 + }, + { + "epoch": 1.568450927734375e-05, + "step": 10279, + "training_step_time": 0.1130211353302002 + }, + { + "epoch": 1.568603515625e-05, + "grad_norm": 0.41737622022628784, + "learning_rate": 7.835436885849902e-05, + "loss": 0.0323, + "step": 10280 + }, + { + "epoch": 1.568603515625e-05, + "model_forward_time": 0.0251772403717041, + "step": 10280 + }, + { + "epoch": 1.568603515625e-05, + "step": 10280, + "training_step_time": 0.11031460762023926 + }, + { + "epoch": 1.568756103515625e-05, + "model_forward_time": 0.025476455688476562, + "step": 10281 + }, + { + "epoch": 1.568756103515625e-05, + "step": 10281, + "training_step_time": 0.10994696617126465 + }, + { + "epoch": 1.56890869140625e-05, + "model_forward_time": 0.02496170997619629, + "step": 10282 + }, + { + "epoch": 1.56890869140625e-05, + "step": 10282, + "training_step_time": 0.11352396011352539 + }, + { + "epoch": 1.569061279296875e-05, + "model_forward_time": 0.025426626205444336, + "step": 10283 + }, + { + "epoch": 1.569061279296875e-05, + "step": 10283, + "training_step_time": 0.11088180541992188 + }, + { + "epoch": 1.5692138671875e-05, + "model_forward_time": 0.025313138961791992, + "step": 10284 + }, + { + "epoch": 1.5692138671875e-05, + "step": 10284, + "training_step_time": 0.11724114418029785 + }, + { + "epoch": 1.569366455078125e-05, + "model_forward_time": 0.025242090225219727, + "step": 10285 + }, + { + "epoch": 1.569366455078125e-05, + "step": 10285, + "training_step_time": 0.10913515090942383 + }, + { + "epoch": 1.56951904296875e-05, + "model_forward_time": 0.02550983428955078, + "step": 10286 + }, + { + "epoch": 1.56951904296875e-05, + "step": 10286, + "training_step_time": 0.10813069343566895 + }, + { + "epoch": 1.569671630859375e-05, + "model_forward_time": 0.025197505950927734, + "step": 10287 + }, + { + "epoch": 1.569671630859375e-05, + "step": 10287, + "training_step_time": 0.201124906539917 + }, + { + "epoch": 1.56982421875e-05, + "model_forward_time": 0.024276018142700195, + "step": 10288 + }, + { + "epoch": 1.56982421875e-05, + "step": 10288, + "training_step_time": 0.18512201309204102 + }, + { + "epoch": 1.569976806640625e-05, + "model_forward_time": 0.028685569763183594, + "step": 10289 + }, + { + "epoch": 1.569976806640625e-05, + "step": 10289, + "training_step_time": 0.16069865226745605 + }, + { + "epoch": 1.57012939453125e-05, + "grad_norm": 0.26310300827026367, + "learning_rate": 7.830895520619128e-05, + "loss": 0.0373, + "step": 10290 + }, + { + "epoch": 1.57012939453125e-05, + "model_forward_time": 0.024645566940307617, + "step": 10290 + }, + { + "epoch": 1.57012939453125e-05, + "step": 10290, + "training_step_time": 0.15724515914916992 + }, + { + "epoch": 1.570281982421875e-05, + "model_forward_time": 0.02434682846069336, + "step": 10291 + }, + { + "epoch": 1.570281982421875e-05, + "step": 10291, + "training_step_time": 0.10386896133422852 + }, + { + "epoch": 1.5704345703125e-05, + "model_forward_time": 0.02530670166015625, + "step": 10292 + }, + { + "epoch": 1.5704345703125e-05, + "step": 10292, + "training_step_time": 0.10911059379577637 + }, + { + "epoch": 1.570587158203125e-05, + "model_forward_time": 0.02528858184814453, + "step": 10293 + }, + { + "epoch": 1.570587158203125e-05, + "step": 10293, + "training_step_time": 0.1091923713684082 + }, + { + "epoch": 1.57073974609375e-05, + "model_forward_time": 0.025514602661132812, + "step": 10294 + }, + { + "epoch": 1.57073974609375e-05, + "step": 10294, + "training_step_time": 0.1449434757232666 + }, + { + "epoch": 1.570892333984375e-05, + "model_forward_time": 0.024932384490966797, + "step": 10295 + }, + { + "epoch": 1.570892333984375e-05, + "step": 10295, + "training_step_time": 0.1384599208831787 + }, + { + "epoch": 1.571044921875e-05, + "model_forward_time": 0.024788856506347656, + "step": 10296 + }, + { + "epoch": 1.571044921875e-05, + "step": 10296, + "training_step_time": 0.1149301528930664 + }, + { + "epoch": 1.571197509765625e-05, + "model_forward_time": 0.02512359619140625, + "step": 10297 + }, + { + "epoch": 1.571197509765625e-05, + "step": 10297, + "training_step_time": 0.11466646194458008 + }, + { + "epoch": 1.57135009765625e-05, + "model_forward_time": 0.024535179138183594, + "step": 10298 + }, + { + "epoch": 1.57135009765625e-05, + "step": 10298, + "training_step_time": 0.11847686767578125 + }, + { + "epoch": 1.571502685546875e-05, + "model_forward_time": 0.025227069854736328, + "step": 10299 + }, + { + "epoch": 1.571502685546875e-05, + "step": 10299, + "training_step_time": 0.12407088279724121 + }, + { + "epoch": 1.5716552734375e-05, + "grad_norm": 0.6257118582725525, + "learning_rate": 7.82635071558336e-05, + "loss": 0.028, + "step": 10300 + }, + { + "epoch": 1.5716552734375e-05, + "model_forward_time": 0.025081872940063477, + "step": 10300 + }, + { + "epoch": 1.5716552734375e-05, + "step": 10300, + "training_step_time": 0.19926214218139648 + }, + { + "epoch": 1.571807861328125e-05, + "model_forward_time": 0.02441263198852539, + "step": 10301 + }, + { + "epoch": 1.571807861328125e-05, + "step": 10301, + "training_step_time": 0.12649202346801758 + }, + { + "epoch": 1.57196044921875e-05, + "model_forward_time": 0.0240018367767334, + "step": 10302 + }, + { + "epoch": 1.57196044921875e-05, + "step": 10302, + "training_step_time": 0.13016057014465332 + }, + { + "epoch": 1.572113037109375e-05, + "model_forward_time": 0.024486541748046875, + "step": 10303 + }, + { + "epoch": 1.572113037109375e-05, + "step": 10303, + "training_step_time": 0.17170429229736328 + }, + { + "epoch": 1.572265625e-05, + "model_forward_time": 0.024966955184936523, + "step": 10304 + }, + { + "epoch": 1.572265625e-05, + "step": 10304, + "training_step_time": 0.16825151443481445 + }, + { + "epoch": 1.572418212890625e-05, + "model_forward_time": 0.024628400802612305, + "step": 10305 + }, + { + "epoch": 1.572418212890625e-05, + "step": 10305, + "training_step_time": 0.13275146484375 + }, + { + "epoch": 1.57257080078125e-05, + "model_forward_time": 0.02416062355041504, + "step": 10306 + }, + { + "epoch": 1.57257080078125e-05, + "step": 10306, + "training_step_time": 0.10798788070678711 + }, + { + "epoch": 1.572723388671875e-05, + "model_forward_time": 0.025408267974853516, + "step": 10307 + }, + { + "epoch": 1.572723388671875e-05, + "step": 10307, + "training_step_time": 0.12199568748474121 + }, + { + "epoch": 1.5728759765625e-05, + "model_forward_time": 0.025058507919311523, + "step": 10308 + }, + { + "epoch": 1.5728759765625e-05, + "step": 10308, + "training_step_time": 0.10778594017028809 + }, + { + "epoch": 1.573028564453125e-05, + "model_forward_time": 0.025176525115966797, + "step": 10309 + }, + { + "epoch": 1.573028564453125e-05, + "step": 10309, + "training_step_time": 0.10720205307006836 + }, + { + "epoch": 1.57318115234375e-05, + "grad_norm": 0.36297407746315, + "learning_rate": 7.821802476264966e-05, + "loss": 0.0224, + "step": 10310 + }, + { + "epoch": 1.57318115234375e-05, + "model_forward_time": 0.02517247200012207, + "step": 10310 + }, + { + "epoch": 1.57318115234375e-05, + "step": 10310, + "training_step_time": 0.11198067665100098 + }, + { + "epoch": 1.573333740234375e-05, + "model_forward_time": 0.025423288345336914, + "step": 10311 + }, + { + "epoch": 1.573333740234375e-05, + "step": 10311, + "training_step_time": 0.10831046104431152 + }, + { + "epoch": 1.573486328125e-05, + "model_forward_time": 0.025219202041625977, + "step": 10312 + }, + { + "epoch": 1.573486328125e-05, + "step": 10312, + "training_step_time": 0.10748600959777832 + }, + { + "epoch": 1.573638916015625e-05, + "model_forward_time": 0.024961233139038086, + "step": 10313 + }, + { + "epoch": 1.573638916015625e-05, + "step": 10313, + "training_step_time": 0.1071629524230957 + }, + { + "epoch": 1.57379150390625e-05, + "model_forward_time": 0.024869203567504883, + "step": 10314 + }, + { + "epoch": 1.57379150390625e-05, + "step": 10314, + "training_step_time": 0.10721874237060547 + }, + { + "epoch": 1.573944091796875e-05, + "model_forward_time": 0.025130748748779297, + "step": 10315 + }, + { + "epoch": 1.573944091796875e-05, + "step": 10315, + "training_step_time": 0.10694360733032227 + }, + { + "epoch": 1.5740966796875e-05, + "model_forward_time": 0.025161027908325195, + "step": 10316 + }, + { + "epoch": 1.5740966796875e-05, + "step": 10316, + "training_step_time": 0.10783004760742188 + }, + { + "epoch": 1.574249267578125e-05, + "model_forward_time": 0.025210142135620117, + "step": 10317 + }, + { + "epoch": 1.574249267578125e-05, + "step": 10317, + "training_step_time": 0.1116178035736084 + }, + { + "epoch": 1.57440185546875e-05, + "model_forward_time": 0.024329185485839844, + "step": 10318 + }, + { + "epoch": 1.57440185546875e-05, + "step": 10318, + "training_step_time": 0.10855627059936523 + }, + { + "epoch": 1.574554443359375e-05, + "model_forward_time": 0.026468753814697266, + "step": 10319 + }, + { + "epoch": 1.574554443359375e-05, + "step": 10319, + "training_step_time": 0.11608552932739258 + }, + { + "epoch": 1.57470703125e-05, + "grad_norm": 0.5447708964347839, + "learning_rate": 7.817250808190483e-05, + "loss": 0.0283, + "step": 10320 + }, + { + "epoch": 1.57470703125e-05, + "model_forward_time": 0.024248838424682617, + "step": 10320 + }, + { + "epoch": 1.57470703125e-05, + "step": 10320, + "training_step_time": 0.12022972106933594 + }, + { + "epoch": 1.574859619140625e-05, + "model_forward_time": 0.02447676658630371, + "step": 10321 + }, + { + "epoch": 1.574859619140625e-05, + "step": 10321, + "training_step_time": 0.11070585250854492 + }, + { + "epoch": 1.57501220703125e-05, + "model_forward_time": 0.02437758445739746, + "step": 10322 + }, + { + "epoch": 1.57501220703125e-05, + "step": 10322, + "training_step_time": 0.1063680648803711 + }, + { + "epoch": 1.575164794921875e-05, + "model_forward_time": 0.02397918701171875, + "step": 10323 + }, + { + "epoch": 1.575164794921875e-05, + "step": 10323, + "training_step_time": 0.10804319381713867 + }, + { + "epoch": 1.5753173828125e-05, + "model_forward_time": 0.024433612823486328, + "step": 10324 + }, + { + "epoch": 1.5753173828125e-05, + "step": 10324, + "training_step_time": 0.1075127124786377 + }, + { + "epoch": 1.575469970703125e-05, + "model_forward_time": 0.024324417114257812, + "step": 10325 + }, + { + "epoch": 1.575469970703125e-05, + "step": 10325, + "training_step_time": 0.10645389556884766 + }, + { + "epoch": 1.57562255859375e-05, + "model_forward_time": 0.024509191513061523, + "step": 10326 + }, + { + "epoch": 1.57562255859375e-05, + "step": 10326, + "training_step_time": 0.10995078086853027 + }, + { + "epoch": 1.575775146484375e-05, + "model_forward_time": 0.024807214736938477, + "step": 10327 + }, + { + "epoch": 1.575775146484375e-05, + "step": 10327, + "training_step_time": 0.10920119285583496 + }, + { + "epoch": 1.575927734375e-05, + "model_forward_time": 0.02449512481689453, + "step": 10328 + }, + { + "epoch": 1.575927734375e-05, + "step": 10328, + "training_step_time": 0.1074056625366211 + }, + { + "epoch": 1.576080322265625e-05, + "model_forward_time": 0.02454543113708496, + "step": 10329 + }, + { + "epoch": 1.576080322265625e-05, + "step": 10329, + "training_step_time": 0.10706949234008789 + }, + { + "epoch": 1.57623291015625e-05, + "grad_norm": 0.4327964186668396, + "learning_rate": 7.81269571689062e-05, + "loss": 0.0286, + "step": 10330 + }, + { + "epoch": 1.57623291015625e-05, + "model_forward_time": 0.02394247055053711, + "step": 10330 + }, + { + "epoch": 1.57623291015625e-05, + "step": 10330, + "training_step_time": 0.10729646682739258 + }, + { + "epoch": 1.576385498046875e-05, + "model_forward_time": 0.024763107299804688, + "step": 10331 + }, + { + "epoch": 1.576385498046875e-05, + "step": 10331, + "training_step_time": 0.10574126243591309 + }, + { + "epoch": 1.5765380859375e-05, + "model_forward_time": 0.023756027221679688, + "step": 10332 + }, + { + "epoch": 1.5765380859375e-05, + "step": 10332, + "training_step_time": 0.10875678062438965 + }, + { + "epoch": 1.576690673828125e-05, + "model_forward_time": 0.023831844329833984, + "step": 10333 + }, + { + "epoch": 1.576690673828125e-05, + "step": 10333, + "training_step_time": 0.11569762229919434 + }, + { + "epoch": 1.57684326171875e-05, + "model_forward_time": 0.024391651153564453, + "step": 10334 + }, + { + "epoch": 1.57684326171875e-05, + "step": 10334, + "training_step_time": 0.11629223823547363 + }, + { + "epoch": 1.576995849609375e-05, + "model_forward_time": 0.024626970291137695, + "step": 10335 + }, + { + "epoch": 1.576995849609375e-05, + "step": 10335, + "training_step_time": 0.11095881462097168 + }, + { + "epoch": 1.5771484375e-05, + "model_forward_time": 0.024695634841918945, + "step": 10336 + }, + { + "epoch": 1.5771484375e-05, + "step": 10336, + "training_step_time": 0.22139835357666016 + }, + { + "epoch": 1.577301025390625e-05, + "model_forward_time": 0.023496627807617188, + "step": 10337 + }, + { + "epoch": 1.577301025390625e-05, + "step": 10337, + "training_step_time": 0.12123847007751465 + }, + { + "epoch": 1.57745361328125e-05, + "model_forward_time": 0.02317643165588379, + "step": 10338 + }, + { + "epoch": 1.57745361328125e-05, + "step": 10338, + "training_step_time": 0.10971307754516602 + }, + { + "epoch": 1.577606201171875e-05, + "model_forward_time": 0.02431178092956543, + "step": 10339 + }, + { + "epoch": 1.577606201171875e-05, + "step": 10339, + "training_step_time": 0.11378884315490723 + }, + { + "epoch": 1.5777587890625e-05, + "grad_norm": 0.6996987462043762, + "learning_rate": 7.808137207900241e-05, + "loss": 0.0295, + "step": 10340 + }, + { + "epoch": 1.5777587890625e-05, + "model_forward_time": 0.02449941635131836, + "step": 10340 + }, + { + "epoch": 1.5777587890625e-05, + "step": 10340, + "training_step_time": 0.10889911651611328 + }, + { + "epoch": 1.577911376953125e-05, + "model_forward_time": 0.024165630340576172, + "step": 10341 + }, + { + "epoch": 1.577911376953125e-05, + "step": 10341, + "training_step_time": 0.1344013214111328 + }, + { + "epoch": 1.57806396484375e-05, + "model_forward_time": 0.02767634391784668, + "step": 10342 + }, + { + "epoch": 1.57806396484375e-05, + "step": 10342, + "training_step_time": 0.1290912628173828 + }, + { + "epoch": 1.578216552734375e-05, + "model_forward_time": 0.023853302001953125, + "step": 10343 + }, + { + "epoch": 1.578216552734375e-05, + "step": 10343, + "training_step_time": 0.11023759841918945 + }, + { + "epoch": 1.578369140625e-05, + "model_forward_time": 0.02432560920715332, + "step": 10344 + }, + { + "epoch": 1.578369140625e-05, + "step": 10344, + "training_step_time": 0.11093735694885254 + }, + { + "epoch": 1.578521728515625e-05, + "model_forward_time": 0.024164676666259766, + "step": 10345 + }, + { + "epoch": 1.578521728515625e-05, + "step": 10345, + "training_step_time": 0.11302733421325684 + }, + { + "epoch": 1.57867431640625e-05, + "model_forward_time": 0.024091482162475586, + "step": 10346 + }, + { + "epoch": 1.57867431640625e-05, + "step": 10346, + "training_step_time": 0.15638375282287598 + }, + { + "epoch": 1.578826904296875e-05, + "model_forward_time": 0.023479461669921875, + "step": 10347 + }, + { + "epoch": 1.578826904296875e-05, + "step": 10347, + "training_step_time": 0.1535797119140625 + }, + { + "epoch": 1.5789794921875e-05, + "model_forward_time": 0.02494215965270996, + "step": 10348 + }, + { + "epoch": 1.5789794921875e-05, + "step": 10348, + "training_step_time": 0.1681196689605713 + }, + { + "epoch": 1.579132080078125e-05, + "model_forward_time": 0.027090787887573242, + "step": 10349 + }, + { + "epoch": 1.579132080078125e-05, + "step": 10349, + "training_step_time": 0.19069576263427734 + }, + { + "epoch": 1.57928466796875e-05, + "grad_norm": 0.40495797991752625, + "learning_rate": 7.803575286758364e-05, + "loss": 0.0371, + "step": 10350 + }, + { + "epoch": 1.57928466796875e-05, + "model_forward_time": 0.023637771606445312, + "step": 10350 + }, + { + "epoch": 1.57928466796875e-05, + "step": 10350, + "training_step_time": 0.1645183563232422 + }, + { + "epoch": 1.579437255859375e-05, + "model_forward_time": 0.023058652877807617, + "step": 10351 + }, + { + "epoch": 1.579437255859375e-05, + "step": 10351, + "training_step_time": 0.1358168125152588 + }, + { + "epoch": 1.57958984375e-05, + "model_forward_time": 0.023500442504882812, + "step": 10352 + }, + { + "epoch": 1.57958984375e-05, + "step": 10352, + "training_step_time": 0.1294393539428711 + }, + { + "epoch": 1.579742431640625e-05, + "model_forward_time": 0.02541637420654297, + "step": 10353 + }, + { + "epoch": 1.579742431640625e-05, + "step": 10353, + "training_step_time": 0.10862898826599121 + }, + { + "epoch": 1.57989501953125e-05, + "model_forward_time": 0.02476644515991211, + "step": 10354 + }, + { + "epoch": 1.57989501953125e-05, + "step": 10354, + "training_step_time": 0.10879230499267578 + }, + { + "epoch": 1.580047607421875e-05, + "model_forward_time": 0.024498462677001953, + "step": 10355 + }, + { + "epoch": 1.580047607421875e-05, + "step": 10355, + "training_step_time": 0.1165318489074707 + }, + { + "epoch": 1.5802001953125e-05, + "model_forward_time": 0.02446770668029785, + "step": 10356 + }, + { + "epoch": 1.5802001953125e-05, + "step": 10356, + "training_step_time": 0.1173095703125 + }, + { + "epoch": 1.580352783203125e-05, + "model_forward_time": 0.02425408363342285, + "step": 10357 + }, + { + "epoch": 1.580352783203125e-05, + "step": 10357, + "training_step_time": 0.12216401100158691 + }, + { + "epoch": 1.58050537109375e-05, + "model_forward_time": 0.02538323402404785, + "step": 10358 + }, + { + "epoch": 1.58050537109375e-05, + "step": 10358, + "training_step_time": 0.12340855598449707 + }, + { + "epoch": 1.580657958984375e-05, + "model_forward_time": 0.024383544921875, + "step": 10359 + }, + { + "epoch": 1.580657958984375e-05, + "step": 10359, + "training_step_time": 0.12047386169433594 + }, + { + "epoch": 1.580810546875e-05, + "grad_norm": 0.25256213545799255, + "learning_rate": 7.799009959008155e-05, + "loss": 0.0204, + "step": 10360 + }, + { + "epoch": 1.580810546875e-05, + "model_forward_time": 0.024165868759155273, + "step": 10360 + }, + { + "epoch": 1.580810546875e-05, + "step": 10360, + "training_step_time": 0.12019491195678711 + }, + { + "epoch": 1.580963134765625e-05, + "model_forward_time": 0.02428412437438965, + "step": 10361 + }, + { + "epoch": 1.580963134765625e-05, + "step": 10361, + "training_step_time": 0.11509919166564941 + }, + { + "epoch": 1.58111572265625e-05, + "model_forward_time": 0.02392101287841797, + "step": 10362 + }, + { + "epoch": 1.58111572265625e-05, + "step": 10362, + "training_step_time": 0.11330366134643555 + }, + { + "epoch": 1.581268310546875e-05, + "model_forward_time": 0.0244138240814209, + "step": 10363 + }, + { + "epoch": 1.581268310546875e-05, + "step": 10363, + "training_step_time": 0.11504054069519043 + }, + { + "epoch": 1.5814208984375e-05, + "model_forward_time": 0.024362802505493164, + "step": 10364 + }, + { + "epoch": 1.5814208984375e-05, + "step": 10364, + "training_step_time": 0.11533236503601074 + }, + { + "epoch": 1.581573486328125e-05, + "model_forward_time": 0.024304628372192383, + "step": 10365 + }, + { + "epoch": 1.581573486328125e-05, + "step": 10365, + "training_step_time": 0.10999441146850586 + }, + { + "epoch": 1.58172607421875e-05, + "model_forward_time": 0.024489402770996094, + "step": 10366 + }, + { + "epoch": 1.58172607421875e-05, + "step": 10366, + "training_step_time": 0.10736966133117676 + }, + { + "epoch": 1.581878662109375e-05, + "model_forward_time": 0.024550199508666992, + "step": 10367 + }, + { + "epoch": 1.581878662109375e-05, + "step": 10367, + "training_step_time": 0.11003804206848145 + }, + { + "epoch": 1.58203125e-05, + "model_forward_time": 0.02416515350341797, + "step": 10368 + }, + { + "epoch": 1.58203125e-05, + "step": 10368, + "training_step_time": 0.10773372650146484 + }, + { + "epoch": 1.582183837890625e-05, + "model_forward_time": 0.024495601654052734, + "step": 10369 + }, + { + "epoch": 1.582183837890625e-05, + "step": 10369, + "training_step_time": 0.11035394668579102 + }, + { + "epoch": 1.58233642578125e-05, + "grad_norm": 0.4363476037979126, + "learning_rate": 7.794441230196913e-05, + "loss": 0.0259, + "step": 10370 + }, + { + "epoch": 1.58233642578125e-05, + "model_forward_time": 0.02430438995361328, + "step": 10370 + }, + { + "epoch": 1.58233642578125e-05, + "step": 10370, + "training_step_time": 0.10989904403686523 + }, + { + "epoch": 1.582489013671875e-05, + "model_forward_time": 0.024333953857421875, + "step": 10371 + }, + { + "epoch": 1.582489013671875e-05, + "step": 10371, + "training_step_time": 0.10590648651123047 + }, + { + "epoch": 1.5826416015625e-05, + "model_forward_time": 0.02736210823059082, + "step": 10372 + }, + { + "epoch": 1.5826416015625e-05, + "step": 10372, + "training_step_time": 0.1126255989074707 + }, + { + "epoch": 1.582794189453125e-05, + "model_forward_time": 0.025012493133544922, + "step": 10373 + }, + { + "epoch": 1.582794189453125e-05, + "step": 10373, + "training_step_time": 0.10909485816955566 + }, + { + "epoch": 1.58294677734375e-05, + "model_forward_time": 0.024019718170166016, + "step": 10374 + }, + { + "epoch": 1.58294677734375e-05, + "step": 10374, + "training_step_time": 0.10557317733764648 + }, + { + "epoch": 1.583099365234375e-05, + "model_forward_time": 0.023888826370239258, + "step": 10375 + }, + { + "epoch": 1.583099365234375e-05, + "step": 10375, + "training_step_time": 0.10823607444763184 + }, + { + "epoch": 1.583251953125e-05, + "model_forward_time": 0.025122642517089844, + "step": 10376 + }, + { + "epoch": 1.583251953125e-05, + "step": 10376, + "training_step_time": 0.10625076293945312 + }, + { + "epoch": 1.583404541015625e-05, + "model_forward_time": 0.024137020111083984, + "step": 10377 + }, + { + "epoch": 1.583404541015625e-05, + "step": 10377, + "training_step_time": 0.10349607467651367 + }, + { + "epoch": 1.58355712890625e-05, + "model_forward_time": 0.02344989776611328, + "step": 10378 + }, + { + "epoch": 1.58355712890625e-05, + "step": 10378, + "training_step_time": 0.10759115219116211 + }, + { + "epoch": 1.583709716796875e-05, + "model_forward_time": 0.02417278289794922, + "step": 10379 + }, + { + "epoch": 1.583709716796875e-05, + "step": 10379, + "training_step_time": 0.11425375938415527 + }, + { + "epoch": 1.5838623046875e-05, + "grad_norm": 0.5339275598526001, + "learning_rate": 7.789869105876083e-05, + "loss": 0.0274, + "step": 10380 + }, + { + "epoch": 1.5838623046875e-05, + "model_forward_time": 0.024263858795166016, + "step": 10380 + }, + { + "epoch": 1.5838623046875e-05, + "step": 10380, + "training_step_time": 0.11874818801879883 + }, + { + "epoch": 1.584014892578125e-05, + "model_forward_time": 0.024433135986328125, + "step": 10381 + }, + { + "epoch": 1.584014892578125e-05, + "step": 10381, + "training_step_time": 0.10884571075439453 + }, + { + "epoch": 1.58416748046875e-05, + "model_forward_time": 0.0244140625, + "step": 10382 + }, + { + "epoch": 1.58416748046875e-05, + "step": 10382, + "training_step_time": 0.1699063777923584 + }, + { + "epoch": 1.584320068359375e-05, + "model_forward_time": 0.0238039493560791, + "step": 10383 + }, + { + "epoch": 1.584320068359375e-05, + "step": 10383, + "training_step_time": 0.17198586463928223 + }, + { + "epoch": 1.58447265625e-05, + "model_forward_time": 0.023403644561767578, + "step": 10384 + }, + { + "epoch": 1.58447265625e-05, + "step": 10384, + "training_step_time": 0.10480403900146484 + }, + { + "epoch": 1.584625244140625e-05, + "model_forward_time": 0.023782730102539062, + "step": 10385 + }, + { + "epoch": 1.584625244140625e-05, + "step": 10385, + "training_step_time": 0.10520076751708984 + }, + { + "epoch": 1.58477783203125e-05, + "model_forward_time": 0.024272680282592773, + "step": 10386 + }, + { + "epoch": 1.58477783203125e-05, + "step": 10386, + "training_step_time": 0.10866498947143555 + }, + { + "epoch": 1.584930419921875e-05, + "model_forward_time": 0.02450394630432129, + "step": 10387 + }, + { + "epoch": 1.584930419921875e-05, + "step": 10387, + "training_step_time": 0.124359130859375 + }, + { + "epoch": 1.5850830078125e-05, + "model_forward_time": 0.023942947387695312, + "step": 10388 + }, + { + "epoch": 1.5850830078125e-05, + "step": 10388, + "training_step_time": 0.12716412544250488 + }, + { + "epoch": 1.585235595703125e-05, + "model_forward_time": 0.02417159080505371, + "step": 10389 + }, + { + "epoch": 1.585235595703125e-05, + "step": 10389, + "training_step_time": 0.11841893196105957 + }, + { + "epoch": 1.58538818359375e-05, + "grad_norm": 0.3493765890598297, + "learning_rate": 7.785293591601217e-05, + "loss": 0.0258, + "step": 10390 + }, + { + "epoch": 1.58538818359375e-05, + "model_forward_time": 0.023992061614990234, + "step": 10390 + }, + { + "epoch": 1.58538818359375e-05, + "step": 10390, + "training_step_time": 0.1124420166015625 + }, + { + "epoch": 1.585540771484375e-05, + "model_forward_time": 0.023928403854370117, + "step": 10391 + }, + { + "epoch": 1.585540771484375e-05, + "step": 10391, + "training_step_time": 0.10845112800598145 + }, + { + "epoch": 1.585693359375e-05, + "model_forward_time": 0.024823904037475586, + "step": 10392 + }, + { + "epoch": 1.585693359375e-05, + "step": 10392, + "training_step_time": 0.19379806518554688 + }, + { + "epoch": 1.585845947265625e-05, + "model_forward_time": 0.02316141128540039, + "step": 10393 + }, + { + "epoch": 1.585845947265625e-05, + "step": 10393, + "training_step_time": 0.1700119972229004 + }, + { + "epoch": 1.58599853515625e-05, + "model_forward_time": 0.026870250701904297, + "step": 10394 + }, + { + "epoch": 1.58599853515625e-05, + "step": 10394, + "training_step_time": 0.1321122646331787 + }, + { + "epoch": 1.586151123046875e-05, + "model_forward_time": 0.023236513137817383, + "step": 10395 + }, + { + "epoch": 1.586151123046875e-05, + "step": 10395, + "training_step_time": 0.1626434326171875 + }, + { + "epoch": 1.5863037109375e-05, + "model_forward_time": 0.023585796356201172, + "step": 10396 + }, + { + "epoch": 1.5863037109375e-05, + "step": 10396, + "training_step_time": 0.17064833641052246 + }, + { + "epoch": 1.586456298828125e-05, + "model_forward_time": 0.023578166961669922, + "step": 10397 + }, + { + "epoch": 1.586456298828125e-05, + "step": 10397, + "training_step_time": 0.17374849319458008 + }, + { + "epoch": 1.58660888671875e-05, + "model_forward_time": 0.02345132827758789, + "step": 10398 + }, + { + "epoch": 1.58660888671875e-05, + "step": 10398, + "training_step_time": 0.1232905387878418 + }, + { + "epoch": 1.586761474609375e-05, + "model_forward_time": 0.02559041976928711, + "step": 10399 + }, + { + "epoch": 1.586761474609375e-05, + "step": 10399, + "training_step_time": 0.11759591102600098 + }, + { + "epoch": 1.5869140625e-05, + "grad_norm": 0.35689061880111694, + "learning_rate": 7.780714692932002e-05, + "loss": 0.031, + "step": 10400 + }, + { + "epoch": 1.5869140625e-05, + "model_forward_time": 0.024298906326293945, + "step": 10400 + }, + { + "epoch": 1.5869140625e-05, + "step": 10400, + "training_step_time": 0.1124122142791748 + }, + { + "epoch": 1.587066650390625e-05, + "model_forward_time": 0.024615049362182617, + "step": 10401 + }, + { + "epoch": 1.587066650390625e-05, + "step": 10401, + "training_step_time": 0.10725879669189453 + }, + { + "epoch": 1.58721923828125e-05, + "model_forward_time": 0.02423548698425293, + "step": 10402 + }, + { + "epoch": 1.58721923828125e-05, + "step": 10402, + "training_step_time": 0.10816550254821777 + }, + { + "epoch": 1.587371826171875e-05, + "model_forward_time": 0.024158954620361328, + "step": 10403 + }, + { + "epoch": 1.587371826171875e-05, + "step": 10403, + "training_step_time": 0.10678791999816895 + }, + { + "epoch": 1.5875244140625e-05, + "model_forward_time": 0.024281978607177734, + "step": 10404 + }, + { + "epoch": 1.5875244140625e-05, + "step": 10404, + "training_step_time": 0.10839986801147461 + }, + { + "epoch": 1.587677001953125e-05, + "model_forward_time": 0.024219036102294922, + "step": 10405 + }, + { + "epoch": 1.587677001953125e-05, + "step": 10405, + "training_step_time": 0.10494089126586914 + }, + { + "epoch": 1.58782958984375e-05, + "model_forward_time": 0.024111509323120117, + "step": 10406 + }, + { + "epoch": 1.58782958984375e-05, + "step": 10406, + "training_step_time": 0.10543084144592285 + }, + { + "epoch": 1.587982177734375e-05, + "model_forward_time": 0.024381637573242188, + "step": 10407 + }, + { + "epoch": 1.587982177734375e-05, + "step": 10407, + "training_step_time": 0.10554051399230957 + }, + { + "epoch": 1.588134765625e-05, + "model_forward_time": 0.02364516258239746, + "step": 10408 + }, + { + "epoch": 1.588134765625e-05, + "step": 10408, + "training_step_time": 0.10828971862792969 + }, + { + "epoch": 1.588287353515625e-05, + "model_forward_time": 0.024113178253173828, + "step": 10409 + }, + { + "epoch": 1.588287353515625e-05, + "step": 10409, + "training_step_time": 0.10925960540771484 + }, + { + "epoch": 1.58843994140625e-05, + "grad_norm": 0.3861542344093323, + "learning_rate": 7.776132415432234e-05, + "loss": 0.0279, + "step": 10410 + }, + { + "epoch": 1.58843994140625e-05, + "model_forward_time": 0.024335384368896484, + "step": 10410 + }, + { + "epoch": 1.58843994140625e-05, + "step": 10410, + "training_step_time": 0.1092679500579834 + }, + { + "epoch": 1.588592529296875e-05, + "model_forward_time": 0.024267196655273438, + "step": 10411 + }, + { + "epoch": 1.588592529296875e-05, + "step": 10411, + "training_step_time": 0.11186528205871582 + }, + { + "epoch": 1.5887451171875e-05, + "model_forward_time": 0.024225473403930664, + "step": 10412 + }, + { + "epoch": 1.5887451171875e-05, + "step": 10412, + "training_step_time": 0.11743903160095215 + }, + { + "epoch": 1.588897705078125e-05, + "model_forward_time": 0.02416825294494629, + "step": 10413 + }, + { + "epoch": 1.588897705078125e-05, + "step": 10413, + "training_step_time": 0.11883044242858887 + }, + { + "epoch": 1.58905029296875e-05, + "model_forward_time": 0.024515628814697266, + "step": 10414 + }, + { + "epoch": 1.58905029296875e-05, + "step": 10414, + "training_step_time": 0.11916422843933105 + }, + { + "epoch": 1.589202880859375e-05, + "model_forward_time": 0.02464914321899414, + "step": 10415 + }, + { + "epoch": 1.589202880859375e-05, + "step": 10415, + "training_step_time": 0.12026143074035645 + }, + { + "epoch": 1.58935546875e-05, + "model_forward_time": 0.0243988037109375, + "step": 10416 + }, + { + "epoch": 1.58935546875e-05, + "step": 10416, + "training_step_time": 0.11646246910095215 + }, + { + "epoch": 1.589508056640625e-05, + "model_forward_time": 0.024510860443115234, + "step": 10417 + }, + { + "epoch": 1.589508056640625e-05, + "step": 10417, + "training_step_time": 0.11707425117492676 + }, + { + "epoch": 1.58966064453125e-05, + "model_forward_time": 0.024254322052001953, + "step": 10418 + }, + { + "epoch": 1.58966064453125e-05, + "step": 10418, + "training_step_time": 0.11407184600830078 + }, + { + "epoch": 1.589813232421875e-05, + "model_forward_time": 0.02410125732421875, + "step": 10419 + }, + { + "epoch": 1.589813232421875e-05, + "step": 10419, + "training_step_time": 0.11098575592041016 + }, + { + "epoch": 1.5899658203125e-05, + "grad_norm": 0.26254281401634216, + "learning_rate": 7.771546764669807e-05, + "loss": 0.0283, + "step": 10420 + }, + { + "epoch": 1.5899658203125e-05, + "model_forward_time": 0.02383875846862793, + "step": 10420 + }, + { + "epoch": 1.5899658203125e-05, + "step": 10420, + "training_step_time": 0.11048102378845215 + }, + { + "epoch": 1.590118408203125e-05, + "model_forward_time": 0.024370193481445312, + "step": 10421 + }, + { + "epoch": 1.590118408203125e-05, + "step": 10421, + "training_step_time": 0.11007237434387207 + }, + { + "epoch": 1.59027099609375e-05, + "model_forward_time": 0.024503231048583984, + "step": 10422 + }, + { + "epoch": 1.59027099609375e-05, + "step": 10422, + "training_step_time": 0.10834693908691406 + }, + { + "epoch": 1.590423583984375e-05, + "model_forward_time": 0.024507522583007812, + "step": 10423 + }, + { + "epoch": 1.590423583984375e-05, + "step": 10423, + "training_step_time": 0.10723018646240234 + }, + { + "epoch": 1.590576171875e-05, + "model_forward_time": 0.024882793426513672, + "step": 10424 + }, + { + "epoch": 1.590576171875e-05, + "step": 10424, + "training_step_time": 0.10694742202758789 + }, + { + "epoch": 1.590728759765625e-05, + "model_forward_time": 0.024363040924072266, + "step": 10425 + }, + { + "epoch": 1.590728759765625e-05, + "step": 10425, + "training_step_time": 0.16831207275390625 + }, + { + "epoch": 1.59088134765625e-05, + "model_forward_time": 0.02494525909423828, + "step": 10426 + }, + { + "epoch": 1.59088134765625e-05, + "step": 10426, + "training_step_time": 0.16390180587768555 + }, + { + "epoch": 1.591033935546875e-05, + "model_forward_time": 0.02399158477783203, + "step": 10427 + }, + { + "epoch": 1.591033935546875e-05, + "step": 10427, + "training_step_time": 0.11094951629638672 + }, + { + "epoch": 1.5911865234375e-05, + "model_forward_time": 0.0238187313079834, + "step": 10428 + }, + { + "epoch": 1.5911865234375e-05, + "step": 10428, + "training_step_time": 0.10598421096801758 + }, + { + "epoch": 1.591339111328125e-05, + "model_forward_time": 0.024426937103271484, + "step": 10429 + }, + { + "epoch": 1.591339111328125e-05, + "step": 10429, + "training_step_time": 0.12009811401367188 + }, + { + "epoch": 1.59149169921875e-05, + "grad_norm": 0.4070347249507904, + "learning_rate": 7.766957746216721e-05, + "loss": 0.0222, + "step": 10430 + }, + { + "epoch": 1.59149169921875e-05, + "model_forward_time": 0.024413585662841797, + "step": 10430 + }, + { + "epoch": 1.59149169921875e-05, + "step": 10430, + "training_step_time": 0.11384940147399902 + }, + { + "epoch": 1.591644287109375e-05, + "model_forward_time": 0.024778127670288086, + "step": 10431 + }, + { + "epoch": 1.591644287109375e-05, + "step": 10431, + "training_step_time": 0.10653281211853027 + }, + { + "epoch": 1.591796875e-05, + "model_forward_time": 0.024657487869262695, + "step": 10432 + }, + { + "epoch": 1.591796875e-05, + "step": 10432, + "training_step_time": 0.10950922966003418 + }, + { + "epoch": 1.591949462890625e-05, + "model_forward_time": 0.024526119232177734, + "step": 10433 + }, + { + "epoch": 1.591949462890625e-05, + "step": 10433, + "training_step_time": 0.14588642120361328 + }, + { + "epoch": 1.59210205078125e-05, + "model_forward_time": 0.02535700798034668, + "step": 10434 + }, + { + "epoch": 1.59210205078125e-05, + "step": 10434, + "training_step_time": 0.1367475986480713 + }, + { + "epoch": 1.592254638671875e-05, + "model_forward_time": 0.02455925941467285, + "step": 10435 + }, + { + "epoch": 1.592254638671875e-05, + "step": 10435, + "training_step_time": 0.11002755165100098 + }, + { + "epoch": 1.5924072265625e-05, + "model_forward_time": 0.025478124618530273, + "step": 10436 + }, + { + "epoch": 1.5924072265625e-05, + "step": 10436, + "training_step_time": 0.11432528495788574 + }, + { + "epoch": 1.592559814453125e-05, + "model_forward_time": 0.026172637939453125, + "step": 10437 + }, + { + "epoch": 1.592559814453125e-05, + "step": 10437, + "training_step_time": 0.11094403266906738 + }, + { + "epoch": 1.59271240234375e-05, + "model_forward_time": 0.025421619415283203, + "step": 10438 + }, + { + "epoch": 1.59271240234375e-05, + "step": 10438, + "training_step_time": 0.11231613159179688 + }, + { + "epoch": 1.592864990234375e-05, + "model_forward_time": 0.0251615047454834, + "step": 10439 + }, + { + "epoch": 1.592864990234375e-05, + "step": 10439, + "training_step_time": 0.13277983665466309 + }, + { + "epoch": 1.593017578125e-05, + "grad_norm": 0.6251704692840576, + "learning_rate": 7.762365365649067e-05, + "loss": 0.0223, + "step": 10440 + }, + { + "epoch": 1.593017578125e-05, + "model_forward_time": 0.02540731430053711, + "step": 10440 + }, + { + "epoch": 1.593017578125e-05, + "step": 10440, + "training_step_time": 0.17345547676086426 + }, + { + "epoch": 1.593170166015625e-05, + "model_forward_time": 0.024407386779785156, + "step": 10441 + }, + { + "epoch": 1.593170166015625e-05, + "step": 10441, + "training_step_time": 0.1159369945526123 + }, + { + "epoch": 1.59332275390625e-05, + "model_forward_time": 0.024710416793823242, + "step": 10442 + }, + { + "epoch": 1.59332275390625e-05, + "step": 10442, + "training_step_time": 0.18603181838989258 + }, + { + "epoch": 1.593475341796875e-05, + "model_forward_time": 0.02434563636779785, + "step": 10443 + }, + { + "epoch": 1.593475341796875e-05, + "step": 10443, + "training_step_time": 0.16681909561157227 + }, + { + "epoch": 1.5936279296875e-05, + "model_forward_time": 0.024476051330566406, + "step": 10444 + }, + { + "epoch": 1.5936279296875e-05, + "step": 10444, + "training_step_time": 0.18172955513000488 + }, + { + "epoch": 1.593780517578125e-05, + "model_forward_time": 0.024565458297729492, + "step": 10445 + }, + { + "epoch": 1.593780517578125e-05, + "step": 10445, + "training_step_time": 0.1333320140838623 + }, + { + "epoch": 1.59393310546875e-05, + "model_forward_time": 0.024302005767822266, + "step": 10446 + }, + { + "epoch": 1.59393310546875e-05, + "step": 10446, + "training_step_time": 0.12752246856689453 + }, + { + "epoch": 1.594085693359375e-05, + "model_forward_time": 0.024477720260620117, + "step": 10447 + }, + { + "epoch": 1.594085693359375e-05, + "step": 10447, + "training_step_time": 0.1943376064300537 + }, + { + "epoch": 1.59423828125e-05, + "model_forward_time": 0.02451014518737793, + "step": 10448 + }, + { + "epoch": 1.59423828125e-05, + "step": 10448, + "training_step_time": 0.1097555160522461 + }, + { + "epoch": 1.594390869140625e-05, + "model_forward_time": 0.024710416793823242, + "step": 10449 + }, + { + "epoch": 1.594390869140625e-05, + "step": 10449, + "training_step_time": 0.11312270164489746 + }, + { + "epoch": 1.59454345703125e-05, + "grad_norm": 0.31595566868782043, + "learning_rate": 7.757769628547018e-05, + "loss": 0.0309, + "step": 10450 + }, + { + "epoch": 1.59454345703125e-05, + "model_forward_time": 0.025224685668945312, + "step": 10450 + }, + { + "epoch": 1.59454345703125e-05, + "step": 10450, + "training_step_time": 0.10930180549621582 + }, + { + "epoch": 1.594696044921875e-05, + "model_forward_time": 0.025315523147583008, + "step": 10451 + }, + { + "epoch": 1.594696044921875e-05, + "step": 10451, + "training_step_time": 0.10760951042175293 + }, + { + "epoch": 1.5948486328125e-05, + "model_forward_time": 0.0253446102142334, + "step": 10452 + }, + { + "epoch": 1.5948486328125e-05, + "step": 10452, + "training_step_time": 0.10981440544128418 + }, + { + "epoch": 1.595001220703125e-05, + "model_forward_time": 0.02613091468811035, + "step": 10453 + }, + { + "epoch": 1.595001220703125e-05, + "step": 10453, + "training_step_time": 0.11004495620727539 + }, + { + "epoch": 1.59515380859375e-05, + "model_forward_time": 0.026526927947998047, + "step": 10454 + }, + { + "epoch": 1.59515380859375e-05, + "step": 10454, + "training_step_time": 0.11154627799987793 + }, + { + "epoch": 1.595306396484375e-05, + "model_forward_time": 0.02550506591796875, + "step": 10455 + }, + { + "epoch": 1.595306396484375e-05, + "step": 10455, + "training_step_time": 0.10796117782592773 + }, + { + "epoch": 1.595458984375e-05, + "model_forward_time": 0.025444984436035156, + "step": 10456 + }, + { + "epoch": 1.595458984375e-05, + "step": 10456, + "training_step_time": 0.10827040672302246 + }, + { + "epoch": 1.595611572265625e-05, + "model_forward_time": 0.025686979293823242, + "step": 10457 + }, + { + "epoch": 1.595611572265625e-05, + "step": 10457, + "training_step_time": 0.1076347827911377 + }, + { + "epoch": 1.59576416015625e-05, + "model_forward_time": 0.025017976760864258, + "step": 10458 + }, + { + "epoch": 1.59576416015625e-05, + "step": 10458, + "training_step_time": 0.10752582550048828 + }, + { + "epoch": 1.595916748046875e-05, + "model_forward_time": 0.025257587432861328, + "step": 10459 + }, + { + "epoch": 1.595916748046875e-05, + "step": 10459, + "training_step_time": 0.10657358169555664 + }, + { + "epoch": 1.5960693359375e-05, + "grad_norm": 0.42718175053596497, + "learning_rate": 7.753170540494832e-05, + "loss": 0.0252, + "step": 10460 + }, + { + "epoch": 1.5960693359375e-05, + "model_forward_time": 0.025238752365112305, + "step": 10460 + }, + { + "epoch": 1.5960693359375e-05, + "step": 10460, + "training_step_time": 0.1077113151550293 + }, + { + "epoch": 1.596221923828125e-05, + "model_forward_time": 0.025685787200927734, + "step": 10461 + }, + { + "epoch": 1.596221923828125e-05, + "step": 10461, + "training_step_time": 0.10884928703308105 + }, + { + "epoch": 1.59637451171875e-05, + "model_forward_time": 0.024918556213378906, + "step": 10462 + }, + { + "epoch": 1.59637451171875e-05, + "step": 10462, + "training_step_time": 0.1101377010345459 + }, + { + "epoch": 1.596527099609375e-05, + "model_forward_time": 0.024950027465820312, + "step": 10463 + }, + { + "epoch": 1.596527099609375e-05, + "step": 10463, + "training_step_time": 0.10728859901428223 + }, + { + "epoch": 1.5966796875e-05, + "model_forward_time": 0.026139259338378906, + "step": 10464 + }, + { + "epoch": 1.5966796875e-05, + "step": 10464, + "training_step_time": 0.10781478881835938 + }, + { + "epoch": 1.596832275390625e-05, + "model_forward_time": 0.024890661239624023, + "step": 10465 + }, + { + "epoch": 1.596832275390625e-05, + "step": 10465, + "training_step_time": 0.10637140274047852 + }, + { + "epoch": 1.59698486328125e-05, + "model_forward_time": 0.02523517608642578, + "step": 10466 + }, + { + "epoch": 1.59698486328125e-05, + "step": 10466, + "training_step_time": 0.11573934555053711 + }, + { + "epoch": 1.597137451171875e-05, + "model_forward_time": 0.025285720825195312, + "step": 10467 + }, + { + "epoch": 1.597137451171875e-05, + "step": 10467, + "training_step_time": 0.10697102546691895 + }, + { + "epoch": 1.5972900390625e-05, + "model_forward_time": 0.025109291076660156, + "step": 10468 + }, + { + "epoch": 1.5972900390625e-05, + "step": 10468, + "training_step_time": 0.11490702629089355 + }, + { + "epoch": 1.597442626953125e-05, + "model_forward_time": 0.025126218795776367, + "step": 10469 + }, + { + "epoch": 1.597442626953125e-05, + "step": 10469, + "training_step_time": 0.11527299880981445 + }, + { + "epoch": 1.59759521484375e-05, + "grad_norm": 0.4019438922405243, + "learning_rate": 7.748568107080832e-05, + "loss": 0.026, + "step": 10470 + }, + { + "epoch": 1.59759521484375e-05, + "model_forward_time": 0.025460481643676758, + "step": 10470 + }, + { + "epoch": 1.59759521484375e-05, + "step": 10470, + "training_step_time": 0.10825705528259277 + }, + { + "epoch": 1.597747802734375e-05, + "model_forward_time": 0.02575516700744629, + "step": 10471 + }, + { + "epoch": 1.597747802734375e-05, + "step": 10471, + "training_step_time": 0.16867518424987793 + }, + { + "epoch": 1.597900390625e-05, + "model_forward_time": 0.02455282211303711, + "step": 10472 + }, + { + "epoch": 1.597900390625e-05, + "step": 10472, + "training_step_time": 0.15870380401611328 + }, + { + "epoch": 1.598052978515625e-05, + "model_forward_time": 0.026572227478027344, + "step": 10473 + }, + { + "epoch": 1.598052978515625e-05, + "step": 10473, + "training_step_time": 0.11470770835876465 + }, + { + "epoch": 1.59820556640625e-05, + "model_forward_time": 0.024698257446289062, + "step": 10474 + }, + { + "epoch": 1.59820556640625e-05, + "step": 10474, + "training_step_time": 0.10711908340454102 + }, + { + "epoch": 1.598358154296875e-05, + "model_forward_time": 0.025254249572753906, + "step": 10475 + }, + { + "epoch": 1.598358154296875e-05, + "step": 10475, + "training_step_time": 0.17104721069335938 + }, + { + "epoch": 1.5985107421875e-05, + "model_forward_time": 0.025230884552001953, + "step": 10476 + }, + { + "epoch": 1.5985107421875e-05, + "step": 10476, + "training_step_time": 0.10722947120666504 + }, + { + "epoch": 1.598663330078125e-05, + "model_forward_time": 0.024804353713989258, + "step": 10477 + }, + { + "epoch": 1.598663330078125e-05, + "step": 10477, + "training_step_time": 0.10540151596069336 + }, + { + "epoch": 1.59881591796875e-05, + "model_forward_time": 0.0254819393157959, + "step": 10478 + }, + { + "epoch": 1.59881591796875e-05, + "step": 10478, + "training_step_time": 0.11165833473205566 + }, + { + "epoch": 1.598968505859375e-05, + "model_forward_time": 0.02542710304260254, + "step": 10479 + }, + { + "epoch": 1.598968505859375e-05, + "step": 10479, + "training_step_time": 0.14678025245666504 + }, + { + "epoch": 1.59912109375e-05, + "grad_norm": 0.19216464459896088, + "learning_rate": 7.743962333897405e-05, + "loss": 0.0218, + "step": 10480 + }, + { + "epoch": 1.59912109375e-05, + "model_forward_time": 0.02490544319152832, + "step": 10480 + }, + { + "epoch": 1.59912109375e-05, + "step": 10480, + "training_step_time": 0.10986471176147461 + }, + { + "epoch": 1.599273681640625e-05, + "model_forward_time": 0.024964570999145508, + "step": 10481 + }, + { + "epoch": 1.599273681640625e-05, + "step": 10481, + "training_step_time": 0.11040115356445312 + }, + { + "epoch": 1.59942626953125e-05, + "model_forward_time": 0.025069713592529297, + "step": 10482 + }, + { + "epoch": 1.59942626953125e-05, + "step": 10482, + "training_step_time": 0.11390829086303711 + }, + { + "epoch": 1.599578857421875e-05, + "model_forward_time": 0.025154829025268555, + "step": 10483 + }, + { + "epoch": 1.599578857421875e-05, + "step": 10483, + "training_step_time": 0.11696314811706543 + }, + { + "epoch": 1.5997314453125e-05, + "model_forward_time": 0.024606943130493164, + "step": 10484 + }, + { + "epoch": 1.5997314453125e-05, + "step": 10484, + "training_step_time": 0.14050507545471191 + }, + { + "epoch": 1.599884033203125e-05, + "model_forward_time": 0.02508378028869629, + "step": 10485 + }, + { + "epoch": 1.599884033203125e-05, + "step": 10485, + "training_step_time": 0.19504046440124512 + }, + { + "epoch": 1.60003662109375e-05, + "model_forward_time": 0.02514791488647461, + "step": 10486 + }, + { + "epoch": 1.60003662109375e-05, + "step": 10486, + "training_step_time": 0.13492178916931152 + }, + { + "epoch": 1.600189208984375e-05, + "model_forward_time": 0.0247800350189209, + "step": 10487 + }, + { + "epoch": 1.600189208984375e-05, + "step": 10487, + "training_step_time": 0.10737776756286621 + }, + { + "epoch": 1.600341796875e-05, + "model_forward_time": 0.025420188903808594, + "step": 10488 + }, + { + "epoch": 1.600341796875e-05, + "step": 10488, + "training_step_time": 0.16438078880310059 + }, + { + "epoch": 1.600494384765625e-05, + "model_forward_time": 0.02596139907836914, + "step": 10489 + }, + { + "epoch": 1.600494384765625e-05, + "step": 10489, + "training_step_time": 0.15338516235351562 + }, + { + "epoch": 1.60064697265625e-05, + "grad_norm": 0.24947883188724518, + "learning_rate": 7.739353226541009e-05, + "loss": 0.0213, + "step": 10490 + }, + { + "epoch": 1.60064697265625e-05, + "model_forward_time": 0.024484634399414062, + "step": 10490 + }, + { + "epoch": 1.60064697265625e-05, + "step": 10490, + "training_step_time": 0.18201184272766113 + }, + { + "epoch": 1.600799560546875e-05, + "model_forward_time": 0.024672508239746094, + "step": 10491 + }, + { + "epoch": 1.600799560546875e-05, + "step": 10491, + "training_step_time": 0.13194608688354492 + }, + { + "epoch": 1.6009521484375e-05, + "model_forward_time": 0.024747371673583984, + "step": 10492 + }, + { + "epoch": 1.6009521484375e-05, + "step": 10492, + "training_step_time": 0.11019277572631836 + }, + { + "epoch": 1.601104736328125e-05, + "model_forward_time": 0.0252835750579834, + "step": 10493 + }, + { + "epoch": 1.601104736328125e-05, + "step": 10493, + "training_step_time": 0.11962246894836426 + }, + { + "epoch": 1.60125732421875e-05, + "model_forward_time": 0.025742053985595703, + "step": 10494 + }, + { + "epoch": 1.60125732421875e-05, + "step": 10494, + "training_step_time": 0.10905981063842773 + }, + { + "epoch": 1.601409912109375e-05, + "model_forward_time": 0.025655746459960938, + "step": 10495 + }, + { + "epoch": 1.601409912109375e-05, + "step": 10495, + "training_step_time": 0.10987520217895508 + }, + { + "epoch": 1.6015625e-05, + "model_forward_time": 0.025223970413208008, + "step": 10496 + }, + { + "epoch": 1.6015625e-05, + "step": 10496, + "training_step_time": 0.1106257438659668 + }, + { + "epoch": 1.601715087890625e-05, + "model_forward_time": 0.025254487991333008, + "step": 10497 + }, + { + "epoch": 1.601715087890625e-05, + "step": 10497, + "training_step_time": 0.10752558708190918 + }, + { + "epoch": 1.60186767578125e-05, + "model_forward_time": 0.02494978904724121, + "step": 10498 + }, + { + "epoch": 1.60186767578125e-05, + "step": 10498, + "training_step_time": 0.11049318313598633 + }, + { + "epoch": 1.602020263671875e-05, + "model_forward_time": 0.02514028549194336, + "step": 10499 + }, + { + "epoch": 1.602020263671875e-05, + "step": 10499, + "training_step_time": 0.11073827743530273 + }, + { + "epoch": 1.6021728515625e-05, + "grad_norm": 0.30641114711761475, + "learning_rate": 7.734740790612136e-05, + "loss": 0.0176, + "step": 10500 + }, + { + "epoch": 1.6021728515625e-05, + "model_forward_time": 0.025107145309448242, + "step": 10500 + }, + { + "epoch": 1.6021728515625e-05, + "step": 10500, + "training_step_time": 0.11032819747924805 + }, + { + "epoch": 1.602325439453125e-05, + "model_forward_time": 0.025366544723510742, + "step": 10501 + }, + { + "epoch": 1.602325439453125e-05, + "step": 10501, + "training_step_time": 0.10793328285217285 + }, + { + "epoch": 1.60247802734375e-05, + "model_forward_time": 0.02537679672241211, + "step": 10502 + }, + { + "epoch": 1.60247802734375e-05, + "step": 10502, + "training_step_time": 0.10836482048034668 + }, + { + "epoch": 1.602630615234375e-05, + "model_forward_time": 0.025829076766967773, + "step": 10503 + }, + { + "epoch": 1.602630615234375e-05, + "step": 10503, + "training_step_time": 0.10854649543762207 + }, + { + "epoch": 1.602783203125e-05, + "model_forward_time": 0.025397062301635742, + "step": 10504 + }, + { + "epoch": 1.602783203125e-05, + "step": 10504, + "training_step_time": 0.10765290260314941 + }, + { + "epoch": 1.602935791015625e-05, + "model_forward_time": 0.02538275718688965, + "step": 10505 + }, + { + "epoch": 1.602935791015625e-05, + "step": 10505, + "training_step_time": 0.11289167404174805 + }, + { + "epoch": 1.60308837890625e-05, + "model_forward_time": 0.025197267532348633, + "step": 10506 + }, + { + "epoch": 1.60308837890625e-05, + "step": 10506, + "training_step_time": 0.10851097106933594 + }, + { + "epoch": 1.603240966796875e-05, + "model_forward_time": 0.025312185287475586, + "step": 10507 + }, + { + "epoch": 1.603240966796875e-05, + "step": 10507, + "training_step_time": 0.10791516304016113 + }, + { + "epoch": 1.6033935546875e-05, + "model_forward_time": 0.02554035186767578, + "step": 10508 + }, + { + "epoch": 1.6033935546875e-05, + "step": 10508, + "training_step_time": 0.10912132263183594 + }, + { + "epoch": 1.603546142578125e-05, + "model_forward_time": 0.025310039520263672, + "step": 10509 + }, + { + "epoch": 1.603546142578125e-05, + "step": 10509, + "training_step_time": 0.11201691627502441 + }, + { + "epoch": 1.60369873046875e-05, + "grad_norm": 0.4064136743545532, + "learning_rate": 7.730125031715331e-05, + "loss": 0.0358, + "step": 10510 + }, + { + "epoch": 1.60369873046875e-05, + "model_forward_time": 0.02521061897277832, + "step": 10510 + }, + { + "epoch": 1.60369873046875e-05, + "step": 10510, + "training_step_time": 0.10727596282958984 + }, + { + "epoch": 1.603851318359375e-05, + "model_forward_time": 0.025233983993530273, + "step": 10511 + }, + { + "epoch": 1.603851318359375e-05, + "step": 10511, + "training_step_time": 0.11281418800354004 + }, + { + "epoch": 1.60400390625e-05, + "model_forward_time": 0.025403738021850586, + "step": 10512 + }, + { + "epoch": 1.60400390625e-05, + "step": 10512, + "training_step_time": 0.11193084716796875 + }, + { + "epoch": 1.604156494140625e-05, + "model_forward_time": 0.02489924430847168, + "step": 10513 + }, + { + "epoch": 1.604156494140625e-05, + "step": 10513, + "training_step_time": 0.11498546600341797 + }, + { + "epoch": 1.60430908203125e-05, + "model_forward_time": 0.02527165412902832, + "step": 10514 + }, + { + "epoch": 1.60430908203125e-05, + "step": 10514, + "training_step_time": 0.17951035499572754 + }, + { + "epoch": 1.604461669921875e-05, + "model_forward_time": 0.025029659271240234, + "step": 10515 + }, + { + "epoch": 1.604461669921875e-05, + "step": 10515, + "training_step_time": 0.15661096572875977 + }, + { + "epoch": 1.6046142578125e-05, + "model_forward_time": 0.024759292602539062, + "step": 10516 + }, + { + "epoch": 1.6046142578125e-05, + "step": 10516, + "training_step_time": 0.13867449760437012 + }, + { + "epoch": 1.604766845703125e-05, + "model_forward_time": 0.02432394027709961, + "step": 10517 + }, + { + "epoch": 1.604766845703125e-05, + "step": 10517, + "training_step_time": 0.13477873802185059 + }, + { + "epoch": 1.60491943359375e-05, + "model_forward_time": 0.024957656860351562, + "step": 10518 + }, + { + "epoch": 1.60491943359375e-05, + "step": 10518, + "training_step_time": 0.129655122756958 + }, + { + "epoch": 1.605072021484375e-05, + "model_forward_time": 0.0246121883392334, + "step": 10519 + }, + { + "epoch": 1.605072021484375e-05, + "step": 10519, + "training_step_time": 0.20784544944763184 + }, + { + "epoch": 1.605224609375e-05, + "grad_norm": 0.24456650018692017, + "learning_rate": 7.725505955459183e-05, + "loss": 0.0165, + "step": 10520 + }, + { + "epoch": 1.605224609375e-05, + "model_forward_time": 0.023955106735229492, + "step": 10520 + }, + { + "epoch": 1.605224609375e-05, + "step": 10520, + "training_step_time": 0.14185571670532227 + }, + { + "epoch": 1.605377197265625e-05, + "model_forward_time": 0.02412557601928711, + "step": 10521 + }, + { + "epoch": 1.605377197265625e-05, + "step": 10521, + "training_step_time": 0.20664763450622559 + }, + { + "epoch": 1.60552978515625e-05, + "model_forward_time": 0.024388551712036133, + "step": 10522 + }, + { + "epoch": 1.60552978515625e-05, + "step": 10522, + "training_step_time": 0.11159443855285645 + }, + { + "epoch": 1.605682373046875e-05, + "model_forward_time": 0.024804115295410156, + "step": 10523 + }, + { + "epoch": 1.605682373046875e-05, + "step": 10523, + "training_step_time": 0.18281078338623047 + }, + { + "epoch": 1.6058349609375e-05, + "model_forward_time": 0.024466276168823242, + "step": 10524 + }, + { + "epoch": 1.6058349609375e-05, + "step": 10524, + "training_step_time": 0.16666436195373535 + }, + { + "epoch": 1.605987548828125e-05, + "model_forward_time": 0.02457880973815918, + "step": 10525 + }, + { + "epoch": 1.605987548828125e-05, + "step": 10525, + "training_step_time": 0.19982552528381348 + }, + { + "epoch": 1.60614013671875e-05, + "model_forward_time": 0.023839712142944336, + "step": 10526 + }, + { + "epoch": 1.60614013671875e-05, + "step": 10526, + "training_step_time": 0.12900233268737793 + }, + { + "epoch": 1.606292724609375e-05, + "model_forward_time": 0.025035858154296875, + "step": 10527 + }, + { + "epoch": 1.606292724609375e-05, + "step": 10527, + "training_step_time": 0.18851661682128906 + }, + { + "epoch": 1.6064453125e-05, + "model_forward_time": 0.02438044548034668, + "step": 10528 + }, + { + "epoch": 1.6064453125e-05, + "step": 10528, + "training_step_time": 0.1660594940185547 + }, + { + "epoch": 1.606597900390625e-05, + "model_forward_time": 0.0245358943939209, + "step": 10529 + }, + { + "epoch": 1.606597900390625e-05, + "step": 10529, + "training_step_time": 0.14285039901733398 + }, + { + "epoch": 1.60675048828125e-05, + "grad_norm": 0.33786284923553467, + "learning_rate": 7.720883567456298e-05, + "loss": 0.0252, + "step": 10530 + }, + { + "epoch": 1.60675048828125e-05, + "model_forward_time": 0.02480792999267578, + "step": 10530 + }, + { + "epoch": 1.60675048828125e-05, + "step": 10530, + "training_step_time": 0.12838077545166016 + }, + { + "epoch": 1.606903076171875e-05, + "model_forward_time": 0.02434825897216797, + "step": 10531 + }, + { + "epoch": 1.606903076171875e-05, + "step": 10531, + "training_step_time": 0.1361076831817627 + }, + { + "epoch": 1.6070556640625e-05, + "model_forward_time": 0.02456355094909668, + "step": 10532 + }, + { + "epoch": 1.6070556640625e-05, + "step": 10532, + "training_step_time": 0.11316776275634766 + }, + { + "epoch": 1.607208251953125e-05, + "model_forward_time": 0.025348424911499023, + "step": 10533 + }, + { + "epoch": 1.607208251953125e-05, + "step": 10533, + "training_step_time": 0.15860390663146973 + }, + { + "epoch": 1.60736083984375e-05, + "model_forward_time": 0.024854421615600586, + "step": 10534 + }, + { + "epoch": 1.60736083984375e-05, + "step": 10534, + "training_step_time": 0.19716119766235352 + }, + { + "epoch": 1.607513427734375e-05, + "model_forward_time": 0.023891448974609375, + "step": 10535 + }, + { + "epoch": 1.607513427734375e-05, + "step": 10535, + "training_step_time": 0.12010931968688965 + }, + { + "epoch": 1.607666015625e-05, + "model_forward_time": 0.023906230926513672, + "step": 10536 + }, + { + "epoch": 1.607666015625e-05, + "step": 10536, + "training_step_time": 0.1123809814453125 + }, + { + "epoch": 1.607818603515625e-05, + "model_forward_time": 0.024956226348876953, + "step": 10537 + }, + { + "epoch": 1.607818603515625e-05, + "step": 10537, + "training_step_time": 0.11367988586425781 + }, + { + "epoch": 1.60797119140625e-05, + "model_forward_time": 0.025176525115966797, + "step": 10538 + }, + { + "epoch": 1.60797119140625e-05, + "step": 10538, + "training_step_time": 0.1213839054107666 + }, + { + "epoch": 1.608123779296875e-05, + "model_forward_time": 0.025254487991333008, + "step": 10539 + }, + { + "epoch": 1.608123779296875e-05, + "step": 10539, + "training_step_time": 0.10712575912475586 + }, + { + "epoch": 1.6082763671875e-05, + "grad_norm": 0.2922857999801636, + "learning_rate": 7.716257873323316e-05, + "loss": 0.0218, + "step": 10540 + }, + { + "epoch": 1.6082763671875e-05, + "model_forward_time": 0.0251467227935791, + "step": 10540 + }, + { + "epoch": 1.6082763671875e-05, + "step": 10540, + "training_step_time": 0.10972046852111816 + }, + { + "epoch": 1.608428955078125e-05, + "model_forward_time": 0.028188228607177734, + "step": 10541 + }, + { + "epoch": 1.608428955078125e-05, + "step": 10541, + "training_step_time": 0.11009359359741211 + }, + { + "epoch": 1.60858154296875e-05, + "model_forward_time": 0.025024890899658203, + "step": 10542 + }, + { + "epoch": 1.60858154296875e-05, + "step": 10542, + "training_step_time": 0.11162519454956055 + }, + { + "epoch": 1.608734130859375e-05, + "model_forward_time": 0.025208473205566406, + "step": 10543 + }, + { + "epoch": 1.608734130859375e-05, + "step": 10543, + "training_step_time": 0.10760021209716797 + }, + { + "epoch": 1.60888671875e-05, + "model_forward_time": 0.02494668960571289, + "step": 10544 + }, + { + "epoch": 1.60888671875e-05, + "step": 10544, + "training_step_time": 0.11358499526977539 + }, + { + "epoch": 1.609039306640625e-05, + "model_forward_time": 0.025033950805664062, + "step": 10545 + }, + { + "epoch": 1.609039306640625e-05, + "step": 10545, + "training_step_time": 0.10782980918884277 + }, + { + "epoch": 1.60919189453125e-05, + "model_forward_time": 0.025456666946411133, + "step": 10546 + }, + { + "epoch": 1.60919189453125e-05, + "step": 10546, + "training_step_time": 0.10735702514648438 + }, + { + "epoch": 1.609344482421875e-05, + "model_forward_time": 0.026746511459350586, + "step": 10547 + }, + { + "epoch": 1.609344482421875e-05, + "step": 10547, + "training_step_time": 0.10863375663757324 + }, + { + "epoch": 1.6094970703125e-05, + "model_forward_time": 0.025525808334350586, + "step": 10548 + }, + { + "epoch": 1.6094970703125e-05, + "step": 10548, + "training_step_time": 0.10991716384887695 + }, + { + "epoch": 1.609649658203125e-05, + "model_forward_time": 0.025144577026367188, + "step": 10549 + }, + { + "epoch": 1.609649658203125e-05, + "step": 10549, + "training_step_time": 0.10965776443481445 + }, + { + "epoch": 1.60980224609375e-05, + "grad_norm": 0.2572328746318817, + "learning_rate": 7.711628878680892e-05, + "loss": 0.0169, + "step": 10550 + }, + { + "epoch": 1.60980224609375e-05, + "model_forward_time": 0.025022029876708984, + "step": 10550 + }, + { + "epoch": 1.60980224609375e-05, + "step": 10550, + "training_step_time": 0.10916709899902344 + }, + { + "epoch": 1.609954833984375e-05, + "model_forward_time": 0.025048255920410156, + "step": 10551 + }, + { + "epoch": 1.609954833984375e-05, + "step": 10551, + "training_step_time": 0.10606551170349121 + }, + { + "epoch": 1.610107421875e-05, + "model_forward_time": 0.02530956268310547, + "step": 10552 + }, + { + "epoch": 1.610107421875e-05, + "step": 10552, + "training_step_time": 0.11098814010620117 + }, + { + "epoch": 1.610260009765625e-05, + "model_forward_time": 0.024949312210083008, + "step": 10553 + }, + { + "epoch": 1.610260009765625e-05, + "step": 10553, + "training_step_time": 0.10766291618347168 + }, + { + "epoch": 1.61041259765625e-05, + "model_forward_time": 0.024953126907348633, + "step": 10554 + }, + { + "epoch": 1.61041259765625e-05, + "step": 10554, + "training_step_time": 0.1059420108795166 + }, + { + "epoch": 1.610565185546875e-05, + "model_forward_time": 0.025127649307250977, + "step": 10555 + }, + { + "epoch": 1.610565185546875e-05, + "step": 10555, + "training_step_time": 0.1130363941192627 + }, + { + "epoch": 1.6107177734375e-05, + "model_forward_time": 0.025095462799072266, + "step": 10556 + }, + { + "epoch": 1.6107177734375e-05, + "step": 10556, + "training_step_time": 0.10740447044372559 + }, + { + "epoch": 1.610870361328125e-05, + "model_forward_time": 0.025441646575927734, + "step": 10557 + }, + { + "epoch": 1.610870361328125e-05, + "step": 10557, + "training_step_time": 0.10951924324035645 + }, + { + "epoch": 1.61102294921875e-05, + "model_forward_time": 0.024512529373168945, + "step": 10558 + }, + { + "epoch": 1.61102294921875e-05, + "step": 10558, + "training_step_time": 0.11040949821472168 + }, + { + "epoch": 1.611175537109375e-05, + "model_forward_time": 0.02487325668334961, + "step": 10559 + }, + { + "epoch": 1.611175537109375e-05, + "step": 10559, + "training_step_time": 0.10732626914978027 + }, + { + "epoch": 1.611328125e-05, + "grad_norm": 0.31317847967147827, + "learning_rate": 7.70699658915369e-05, + "loss": 0.0187, + "step": 10560 + }, + { + "epoch": 1.611328125e-05, + "model_forward_time": 0.02568340301513672, + "step": 10560 + }, + { + "epoch": 1.611328125e-05, + "step": 10560, + "training_step_time": 0.10809040069580078 + }, + { + "epoch": 1.611480712890625e-05, + "model_forward_time": 0.025400638580322266, + "step": 10561 + }, + { + "epoch": 1.611480712890625e-05, + "step": 10561, + "training_step_time": 0.1192770004272461 + }, + { + "epoch": 1.61163330078125e-05, + "model_forward_time": 0.024823904037475586, + "step": 10562 + }, + { + "epoch": 1.61163330078125e-05, + "step": 10562, + "training_step_time": 0.1626591682434082 + }, + { + "epoch": 1.611785888671875e-05, + "model_forward_time": 0.024608850479125977, + "step": 10563 + }, + { + "epoch": 1.611785888671875e-05, + "step": 10563, + "training_step_time": 0.2054145336151123 + }, + { + "epoch": 1.6119384765625e-05, + "model_forward_time": 0.024033546447753906, + "step": 10564 + }, + { + "epoch": 1.6119384765625e-05, + "step": 10564, + "training_step_time": 0.1954360008239746 + }, + { + "epoch": 1.612091064453125e-05, + "model_forward_time": 0.024494171142578125, + "step": 10565 + }, + { + "epoch": 1.612091064453125e-05, + "step": 10565, + "training_step_time": 0.10426878929138184 + }, + { + "epoch": 1.61224365234375e-05, + "model_forward_time": 0.02487325668334961, + "step": 10566 + }, + { + "epoch": 1.61224365234375e-05, + "step": 10566, + "training_step_time": 0.10887527465820312 + }, + { + "epoch": 1.612396240234375e-05, + "model_forward_time": 0.02513265609741211, + "step": 10567 + }, + { + "epoch": 1.612396240234375e-05, + "step": 10567, + "training_step_time": 0.10985541343688965 + }, + { + "epoch": 1.612548828125e-05, + "model_forward_time": 0.025142192840576172, + "step": 10568 + }, + { + "epoch": 1.612548828125e-05, + "step": 10568, + "training_step_time": 0.1395702362060547 + }, + { + "epoch": 1.612701416015625e-05, + "model_forward_time": 0.025128841400146484, + "step": 10569 + }, + { + "epoch": 1.612701416015625e-05, + "step": 10569, + "training_step_time": 0.10928678512573242 + }, + { + "epoch": 1.61285400390625e-05, + "grad_norm": 0.337839812040329, + "learning_rate": 7.70236101037038e-05, + "loss": 0.0204, + "step": 10570 + }, + { + "epoch": 1.61285400390625e-05, + "model_forward_time": 0.0249483585357666, + "step": 10570 + }, + { + "epoch": 1.61285400390625e-05, + "step": 10570, + "training_step_time": 0.11575818061828613 + }, + { + "epoch": 1.613006591796875e-05, + "model_forward_time": 0.025151491165161133, + "step": 10571 + }, + { + "epoch": 1.613006591796875e-05, + "step": 10571, + "training_step_time": 0.11050915718078613 + }, + { + "epoch": 1.6131591796875e-05, + "model_forward_time": 0.025507450103759766, + "step": 10572 + }, + { + "epoch": 1.6131591796875e-05, + "step": 10572, + "training_step_time": 0.13017654418945312 + }, + { + "epoch": 1.613311767578125e-05, + "model_forward_time": 0.025402545928955078, + "step": 10573 + }, + { + "epoch": 1.613311767578125e-05, + "step": 10573, + "training_step_time": 0.1840343475341797 + }, + { + "epoch": 1.61346435546875e-05, + "model_forward_time": 0.024535179138183594, + "step": 10574 + }, + { + "epoch": 1.61346435546875e-05, + "step": 10574, + "training_step_time": 0.1960587501525879 + }, + { + "epoch": 1.613616943359375e-05, + "model_forward_time": 0.024472713470458984, + "step": 10575 + }, + { + "epoch": 1.613616943359375e-05, + "step": 10575, + "training_step_time": 0.18572068214416504 + }, + { + "epoch": 1.61376953125e-05, + "model_forward_time": 0.024379968643188477, + "step": 10576 + }, + { + "epoch": 1.61376953125e-05, + "step": 10576, + "training_step_time": 0.15896248817443848 + }, + { + "epoch": 1.613922119140625e-05, + "model_forward_time": 0.024429798126220703, + "step": 10577 + }, + { + "epoch": 1.613922119140625e-05, + "step": 10577, + "training_step_time": 0.18648028373718262 + }, + { + "epoch": 1.61407470703125e-05, + "model_forward_time": 0.024054288864135742, + "step": 10578 + }, + { + "epoch": 1.61407470703125e-05, + "step": 10578, + "training_step_time": 0.15274739265441895 + }, + { + "epoch": 1.614227294921875e-05, + "model_forward_time": 0.024066925048828125, + "step": 10579 + }, + { + "epoch": 1.614227294921875e-05, + "step": 10579, + "training_step_time": 0.13208365440368652 + }, + { + "epoch": 1.6143798828125e-05, + "grad_norm": 0.31975287199020386, + "learning_rate": 7.697722147963626e-05, + "loss": 0.0282, + "step": 10580 + }, + { + "epoch": 1.6143798828125e-05, + "model_forward_time": 0.024564504623413086, + "step": 10580 + }, + { + "epoch": 1.6143798828125e-05, + "step": 10580, + "training_step_time": 0.12956881523132324 + }, + { + "epoch": 1.614532470703125e-05, + "model_forward_time": 0.024527788162231445, + "step": 10581 + }, + { + "epoch": 1.614532470703125e-05, + "step": 10581, + "training_step_time": 0.23170185089111328 + }, + { + "epoch": 1.61468505859375e-05, + "model_forward_time": 0.024355173110961914, + "step": 10582 + }, + { + "epoch": 1.61468505859375e-05, + "step": 10582, + "training_step_time": 0.1052548885345459 + }, + { + "epoch": 1.614837646484375e-05, + "model_forward_time": 0.024548053741455078, + "step": 10583 + }, + { + "epoch": 1.614837646484375e-05, + "step": 10583, + "training_step_time": 0.10744786262512207 + }, + { + "epoch": 1.614990234375e-05, + "model_forward_time": 0.025532007217407227, + "step": 10584 + }, + { + "epoch": 1.614990234375e-05, + "step": 10584, + "training_step_time": 0.11202406883239746 + }, + { + "epoch": 1.615142822265625e-05, + "model_forward_time": 0.025130748748779297, + "step": 10585 + }, + { + "epoch": 1.615142822265625e-05, + "step": 10585, + "training_step_time": 0.11208915710449219 + }, + { + "epoch": 1.61529541015625e-05, + "model_forward_time": 0.025387048721313477, + "step": 10586 + }, + { + "epoch": 1.61529541015625e-05, + "step": 10586, + "training_step_time": 0.10964155197143555 + }, + { + "epoch": 1.615447998046875e-05, + "model_forward_time": 0.025470256805419922, + "step": 10587 + }, + { + "epoch": 1.615447998046875e-05, + "step": 10587, + "training_step_time": 0.11338639259338379 + }, + { + "epoch": 1.6156005859375e-05, + "model_forward_time": 0.02461528778076172, + "step": 10588 + }, + { + "epoch": 1.6156005859375e-05, + "step": 10588, + "training_step_time": 0.10921812057495117 + }, + { + "epoch": 1.615753173828125e-05, + "model_forward_time": 0.02517414093017578, + "step": 10589 + }, + { + "epoch": 1.615753173828125e-05, + "step": 10589, + "training_step_time": 0.11305379867553711 + }, + { + "epoch": 1.61590576171875e-05, + "grad_norm": 0.4433531165122986, + "learning_rate": 7.693080007570084e-05, + "loss": 0.0281, + "step": 10590 + }, + { + "epoch": 1.61590576171875e-05, + "model_forward_time": 0.025302410125732422, + "step": 10590 + }, + { + "epoch": 1.61590576171875e-05, + "step": 10590, + "training_step_time": 0.11017560958862305 + }, + { + "epoch": 1.616058349609375e-05, + "model_forward_time": 0.02539825439453125, + "step": 10591 + }, + { + "epoch": 1.616058349609375e-05, + "step": 10591, + "training_step_time": 0.10996890068054199 + }, + { + "epoch": 1.6162109375e-05, + "model_forward_time": 0.026195526123046875, + "step": 10592 + }, + { + "epoch": 1.6162109375e-05, + "step": 10592, + "training_step_time": 0.11437010765075684 + }, + { + "epoch": 1.616363525390625e-05, + "model_forward_time": 0.02526998519897461, + "step": 10593 + }, + { + "epoch": 1.616363525390625e-05, + "step": 10593, + "training_step_time": 0.1098635196685791 + }, + { + "epoch": 1.61651611328125e-05, + "model_forward_time": 0.02582263946533203, + "step": 10594 + }, + { + "epoch": 1.61651611328125e-05, + "step": 10594, + "training_step_time": 0.10863113403320312 + }, + { + "epoch": 1.616668701171875e-05, + "model_forward_time": 0.02534770965576172, + "step": 10595 + }, + { + "epoch": 1.616668701171875e-05, + "step": 10595, + "training_step_time": 0.10840177536010742 + }, + { + "epoch": 1.6168212890625e-05, + "model_forward_time": 0.02443671226501465, + "step": 10596 + }, + { + "epoch": 1.6168212890625e-05, + "step": 10596, + "training_step_time": 0.10681509971618652 + }, + { + "epoch": 1.616973876953125e-05, + "model_forward_time": 0.02490830421447754, + "step": 10597 + }, + { + "epoch": 1.616973876953125e-05, + "step": 10597, + "training_step_time": 0.10789942741394043 + }, + { + "epoch": 1.61712646484375e-05, + "model_forward_time": 0.025417089462280273, + "step": 10598 + }, + { + "epoch": 1.61712646484375e-05, + "step": 10598, + "training_step_time": 0.1086282730102539 + }, + { + "epoch": 1.617279052734375e-05, + "model_forward_time": 0.025480031967163086, + "step": 10599 + }, + { + "epoch": 1.617279052734375e-05, + "step": 10599, + "training_step_time": 0.10722112655639648 + }, + { + "epoch": 1.617431640625e-05, + "grad_norm": 0.3286585807800293, + "learning_rate": 7.688434594830392e-05, + "loss": 0.0389, + "step": 10600 + }, + { + "epoch": 1.617431640625e-05, + "model_forward_time": 0.025630712509155273, + "step": 10600 + }, + { + "epoch": 1.617431640625e-05, + "step": 10600, + "training_step_time": 0.11150717735290527 + }, + { + "epoch": 1.617584228515625e-05, + "model_forward_time": 0.025196552276611328, + "step": 10601 + }, + { + "epoch": 1.617584228515625e-05, + "step": 10601, + "training_step_time": 0.10967135429382324 + }, + { + "epoch": 1.61773681640625e-05, + "model_forward_time": 0.0252687931060791, + "step": 10602 + }, + { + "epoch": 1.61773681640625e-05, + "step": 10602, + "training_step_time": 0.10914802551269531 + }, + { + "epoch": 1.617889404296875e-05, + "model_forward_time": 0.024845361709594727, + "step": 10603 + }, + { + "epoch": 1.617889404296875e-05, + "step": 10603, + "training_step_time": 0.10592317581176758 + }, + { + "epoch": 1.6180419921875e-05, + "model_forward_time": 0.024120807647705078, + "step": 10604 + }, + { + "epoch": 1.6180419921875e-05, + "step": 10604, + "training_step_time": 0.15098023414611816 + }, + { + "epoch": 1.618194580078125e-05, + "model_forward_time": 0.024387836456298828, + "step": 10605 + }, + { + "epoch": 1.618194580078125e-05, + "step": 10605, + "training_step_time": 0.10559844970703125 + }, + { + "epoch": 1.61834716796875e-05, + "model_forward_time": 0.024695873260498047, + "step": 10606 + }, + { + "epoch": 1.61834716796875e-05, + "step": 10606, + "training_step_time": 0.11150693893432617 + }, + { + "epoch": 1.618499755859375e-05, + "model_forward_time": 0.025368690490722656, + "step": 10607 + }, + { + "epoch": 1.618499755859375e-05, + "step": 10607, + "training_step_time": 0.10921120643615723 + }, + { + "epoch": 1.61865234375e-05, + "model_forward_time": 0.02649998664855957, + "step": 10608 + }, + { + "epoch": 1.61865234375e-05, + "step": 10608, + "training_step_time": 0.11756086349487305 + }, + { + "epoch": 1.618804931640625e-05, + "model_forward_time": 0.025644540786743164, + "step": 10609 + }, + { + "epoch": 1.618804931640625e-05, + "step": 10609, + "training_step_time": 0.1240396499633789 + }, + { + "epoch": 1.61895751953125e-05, + "grad_norm": 0.39339444041252136, + "learning_rate": 7.683785915389164e-05, + "loss": 0.0516, + "step": 10610 + }, + { + "epoch": 1.61895751953125e-05, + "model_forward_time": 0.02530384063720703, + "step": 10610 + }, + { + "epoch": 1.61895751953125e-05, + "step": 10610, + "training_step_time": 0.11513924598693848 + }, + { + "epoch": 1.619110107421875e-05, + "model_forward_time": 0.025251388549804688, + "step": 10611 + }, + { + "epoch": 1.619110107421875e-05, + "step": 10611, + "training_step_time": 0.10688281059265137 + }, + { + "epoch": 1.6192626953125e-05, + "model_forward_time": 0.0250394344329834, + "step": 10612 + }, + { + "epoch": 1.6192626953125e-05, + "step": 10612, + "training_step_time": 0.12037467956542969 + }, + { + "epoch": 1.619415283203125e-05, + "model_forward_time": 0.02498483657836914, + "step": 10613 + }, + { + "epoch": 1.619415283203125e-05, + "step": 10613, + "training_step_time": 0.11236143112182617 + }, + { + "epoch": 1.61956787109375e-05, + "model_forward_time": 0.024936914443969727, + "step": 10614 + }, + { + "epoch": 1.61956787109375e-05, + "step": 10614, + "training_step_time": 0.18805456161499023 + }, + { + "epoch": 1.619720458984375e-05, + "model_forward_time": 0.024165630340576172, + "step": 10615 + }, + { + "epoch": 1.619720458984375e-05, + "step": 10615, + "training_step_time": 0.17286896705627441 + }, + { + "epoch": 1.619873046875e-05, + "model_forward_time": 0.02425551414489746, + "step": 10616 + }, + { + "epoch": 1.619873046875e-05, + "step": 10616, + "training_step_time": 0.17725586891174316 + }, + { + "epoch": 1.620025634765625e-05, + "model_forward_time": 0.02496051788330078, + "step": 10617 + }, + { + "epoch": 1.620025634765625e-05, + "step": 10617, + "training_step_time": 0.11138200759887695 + }, + { + "epoch": 1.62017822265625e-05, + "model_forward_time": 0.02391338348388672, + "step": 10618 + }, + { + "epoch": 1.62017822265625e-05, + "step": 10618, + "training_step_time": 0.13617658615112305 + }, + { + "epoch": 1.620330810546875e-05, + "model_forward_time": 0.024419307708740234, + "step": 10619 + }, + { + "epoch": 1.620330810546875e-05, + "step": 10619, + "training_step_time": 0.14745259284973145 + }, + { + "epoch": 1.6204833984375e-05, + "grad_norm": 0.48044633865356445, + "learning_rate": 7.679133974894983e-05, + "loss": 0.037, + "step": 10620 + }, + { + "epoch": 1.6204833984375e-05, + "model_forward_time": 0.02390575408935547, + "step": 10620 + }, + { + "epoch": 1.6204833984375e-05, + "step": 10620, + "training_step_time": 0.1304788589477539 + }, + { + "epoch": 1.620635986328125e-05, + "model_forward_time": 0.024514436721801758, + "step": 10621 + }, + { + "epoch": 1.620635986328125e-05, + "step": 10621, + "training_step_time": 0.13311409950256348 + }, + { + "epoch": 1.62078857421875e-05, + "model_forward_time": 0.024771928787231445, + "step": 10622 + }, + { + "epoch": 1.62078857421875e-05, + "step": 10622, + "training_step_time": 0.12976574897766113 + }, + { + "epoch": 1.620941162109375e-05, + "model_forward_time": 0.02439713478088379, + "step": 10623 + }, + { + "epoch": 1.620941162109375e-05, + "step": 10623, + "training_step_time": 0.12507915496826172 + }, + { + "epoch": 1.62109375e-05, + "model_forward_time": 0.024590730667114258, + "step": 10624 + }, + { + "epoch": 1.62109375e-05, + "step": 10624, + "training_step_time": 0.21639227867126465 + }, + { + "epoch": 1.621246337890625e-05, + "model_forward_time": 0.02448439598083496, + "step": 10625 + }, + { + "epoch": 1.621246337890625e-05, + "step": 10625, + "training_step_time": 0.13332295417785645 + }, + { + "epoch": 1.62139892578125e-05, + "model_forward_time": 0.02486562728881836, + "step": 10626 + }, + { + "epoch": 1.62139892578125e-05, + "step": 10626, + "training_step_time": 0.12364006042480469 + }, + { + "epoch": 1.621551513671875e-05, + "model_forward_time": 0.02440357208251953, + "step": 10627 + }, + { + "epoch": 1.621551513671875e-05, + "step": 10627, + "training_step_time": 0.12582111358642578 + }, + { + "epoch": 1.6217041015625e-05, + "model_forward_time": 0.0254213809967041, + "step": 10628 + }, + { + "epoch": 1.6217041015625e-05, + "step": 10628, + "training_step_time": 0.11520004272460938 + }, + { + "epoch": 1.621856689453125e-05, + "model_forward_time": 0.025415897369384766, + "step": 10629 + }, + { + "epoch": 1.621856689453125e-05, + "step": 10629, + "training_step_time": 0.11497116088867188 + }, + { + "epoch": 1.62200927734375e-05, + "grad_norm": 0.328964501619339, + "learning_rate": 7.674478779000398e-05, + "loss": 0.0205, + "step": 10630 + }, + { + "epoch": 1.62200927734375e-05, + "model_forward_time": 0.0247952938079834, + "step": 10630 + }, + { + "epoch": 1.62200927734375e-05, + "step": 10630, + "training_step_time": 0.11280703544616699 + }, + { + "epoch": 1.622161865234375e-05, + "model_forward_time": 0.02570486068725586, + "step": 10631 + }, + { + "epoch": 1.622161865234375e-05, + "step": 10631, + "training_step_time": 0.11025571823120117 + }, + { + "epoch": 1.622314453125e-05, + "model_forward_time": 0.025430679321289062, + "step": 10632 + }, + { + "epoch": 1.622314453125e-05, + "step": 10632, + "training_step_time": 0.10847854614257812 + }, + { + "epoch": 1.622467041015625e-05, + "model_forward_time": 0.024906635284423828, + "step": 10633 + }, + { + "epoch": 1.622467041015625e-05, + "step": 10633, + "training_step_time": 0.1090085506439209 + }, + { + "epoch": 1.62261962890625e-05, + "model_forward_time": 0.024785280227661133, + "step": 10634 + }, + { + "epoch": 1.62261962890625e-05, + "step": 10634, + "training_step_time": 0.10831594467163086 + }, + { + "epoch": 1.622772216796875e-05, + "model_forward_time": 0.026300668716430664, + "step": 10635 + }, + { + "epoch": 1.622772216796875e-05, + "step": 10635, + "training_step_time": 0.10818266868591309 + }, + { + "epoch": 1.6229248046875e-05, + "model_forward_time": 0.025136232376098633, + "step": 10636 + }, + { + "epoch": 1.6229248046875e-05, + "step": 10636, + "training_step_time": 0.10817170143127441 + }, + { + "epoch": 1.623077392578125e-05, + "model_forward_time": 0.02570033073425293, + "step": 10637 + }, + { + "epoch": 1.623077392578125e-05, + "step": 10637, + "training_step_time": 0.11177325248718262 + }, + { + "epoch": 1.62322998046875e-05, + "model_forward_time": 0.025026798248291016, + "step": 10638 + }, + { + "epoch": 1.62322998046875e-05, + "step": 10638, + "training_step_time": 0.1074068546295166 + }, + { + "epoch": 1.623382568359375e-05, + "model_forward_time": 0.025051593780517578, + "step": 10639 + }, + { + "epoch": 1.623382568359375e-05, + "step": 10639, + "training_step_time": 0.10706186294555664 + }, + { + "epoch": 1.62353515625e-05, + "grad_norm": 0.3106250762939453, + "learning_rate": 7.66982033336191e-05, + "loss": 0.0306, + "step": 10640 + }, + { + "epoch": 1.62353515625e-05, + "model_forward_time": 0.02465534210205078, + "step": 10640 + }, + { + "epoch": 1.62353515625e-05, + "step": 10640, + "training_step_time": 0.10710573196411133 + }, + { + "epoch": 1.623687744140625e-05, + "model_forward_time": 0.02523350715637207, + "step": 10641 + }, + { + "epoch": 1.623687744140625e-05, + "step": 10641, + "training_step_time": 0.10778594017028809 + }, + { + "epoch": 1.62384033203125e-05, + "model_forward_time": 0.02721261978149414, + "step": 10642 + }, + { + "epoch": 1.62384033203125e-05, + "step": 10642, + "training_step_time": 0.11066079139709473 + }, + { + "epoch": 1.623992919921875e-05, + "model_forward_time": 0.02598118782043457, + "step": 10643 + }, + { + "epoch": 1.623992919921875e-05, + "step": 10643, + "training_step_time": 0.10713410377502441 + }, + { + "epoch": 1.6241455078125e-05, + "model_forward_time": 0.02494072914123535, + "step": 10644 + }, + { + "epoch": 1.6241455078125e-05, + "step": 10644, + "training_step_time": 0.10544276237487793 + }, + { + "epoch": 1.624298095703125e-05, + "model_forward_time": 0.025106191635131836, + "step": 10645 + }, + { + "epoch": 1.624298095703125e-05, + "step": 10645, + "training_step_time": 0.10795307159423828 + }, + { + "epoch": 1.62445068359375e-05, + "model_forward_time": 0.02528834342956543, + "step": 10646 + }, + { + "epoch": 1.62445068359375e-05, + "step": 10646, + "training_step_time": 0.1079854965209961 + }, + { + "epoch": 1.624603271484375e-05, + "model_forward_time": 0.02504444122314453, + "step": 10647 + }, + { + "epoch": 1.624603271484375e-05, + "step": 10647, + "training_step_time": 0.11431479454040527 + }, + { + "epoch": 1.624755859375e-05, + "model_forward_time": 0.024985551834106445, + "step": 10648 + }, + { + "epoch": 1.624755859375e-05, + "step": 10648, + "training_step_time": 0.10938596725463867 + }, + { + "epoch": 1.624908447265625e-05, + "model_forward_time": 0.024998903274536133, + "step": 10649 + }, + { + "epoch": 1.624908447265625e-05, + "step": 10649, + "training_step_time": 0.11159086227416992 + }, + { + "epoch": 1.62506103515625e-05, + "grad_norm": 0.47151684761047363, + "learning_rate": 7.66515864363997e-05, + "loss": 0.0332, + "step": 10650 + }, + { + "epoch": 1.62506103515625e-05, + "model_forward_time": 0.026178359985351562, + "step": 10650 + }, + { + "epoch": 1.62506103515625e-05, + "step": 10650, + "training_step_time": 0.10872888565063477 + }, + { + "epoch": 1.625213623046875e-05, + "model_forward_time": 0.025221586227416992, + "step": 10651 + }, + { + "epoch": 1.625213623046875e-05, + "step": 10651, + "training_step_time": 0.10885477066040039 + }, + { + "epoch": 1.6253662109375e-05, + "model_forward_time": 0.025015592575073242, + "step": 10652 + }, + { + "epoch": 1.6253662109375e-05, + "step": 10652, + "training_step_time": 0.1336359977722168 + }, + { + "epoch": 1.625518798828125e-05, + "model_forward_time": 0.026723146438598633, + "step": 10653 + }, + { + "epoch": 1.625518798828125e-05, + "step": 10653, + "training_step_time": 0.10872745513916016 + }, + { + "epoch": 1.62567138671875e-05, + "model_forward_time": 0.025566577911376953, + "step": 10654 + }, + { + "epoch": 1.62567138671875e-05, + "step": 10654, + "training_step_time": 0.10971260070800781 + }, + { + "epoch": 1.625823974609375e-05, + "model_forward_time": 0.024880409240722656, + "step": 10655 + }, + { + "epoch": 1.625823974609375e-05, + "step": 10655, + "training_step_time": 0.16914129257202148 + }, + { + "epoch": 1.6259765625e-05, + "model_forward_time": 0.024487733840942383, + "step": 10656 + }, + { + "epoch": 1.6259765625e-05, + "step": 10656, + "training_step_time": 0.16909527778625488 + }, + { + "epoch": 1.626129150390625e-05, + "model_forward_time": 0.024396181106567383, + "step": 10657 + }, + { + "epoch": 1.626129150390625e-05, + "step": 10657, + "training_step_time": 0.2048330307006836 + }, + { + "epoch": 1.62628173828125e-05, + "model_forward_time": 0.02409982681274414, + "step": 10658 + }, + { + "epoch": 1.62628173828125e-05, + "step": 10658, + "training_step_time": 0.13302850723266602 + }, + { + "epoch": 1.626434326171875e-05, + "model_forward_time": 0.024069547653198242, + "step": 10659 + }, + { + "epoch": 1.626434326171875e-05, + "step": 10659, + "training_step_time": 0.10647320747375488 + }, + { + "epoch": 1.6265869140625e-05, + "grad_norm": 0.30007100105285645, + "learning_rate": 7.660493715498969e-05, + "loss": 0.0314, + "step": 10660 + }, + { + "epoch": 1.6265869140625e-05, + "model_forward_time": 0.025798797607421875, + "step": 10660 + }, + { + "epoch": 1.6265869140625e-05, + "step": 10660, + "training_step_time": 0.11492276191711426 + }, + { + "epoch": 1.626739501953125e-05, + "model_forward_time": 0.02526259422302246, + "step": 10661 + }, + { + "epoch": 1.626739501953125e-05, + "step": 10661, + "training_step_time": 0.1086728572845459 + }, + { + "epoch": 1.62689208984375e-05, + "model_forward_time": 0.025377511978149414, + "step": 10662 + }, + { + "epoch": 1.62689208984375e-05, + "step": 10662, + "training_step_time": 0.15277576446533203 + }, + { + "epoch": 1.627044677734375e-05, + "model_forward_time": 0.0243985652923584, + "step": 10663 + }, + { + "epoch": 1.627044677734375e-05, + "step": 10663, + "training_step_time": 0.1591494083404541 + }, + { + "epoch": 1.627197265625e-05, + "model_forward_time": 0.024025440216064453, + "step": 10664 + }, + { + "epoch": 1.627197265625e-05, + "step": 10664, + "training_step_time": 0.11126995086669922 + }, + { + "epoch": 1.627349853515625e-05, + "model_forward_time": 0.02484726905822754, + "step": 10665 + }, + { + "epoch": 1.627349853515625e-05, + "step": 10665, + "training_step_time": 0.1302940845489502 + }, + { + "epoch": 1.62750244140625e-05, + "model_forward_time": 0.02515864372253418, + "step": 10666 + }, + { + "epoch": 1.62750244140625e-05, + "step": 10666, + "training_step_time": 0.17456936836242676 + }, + { + "epoch": 1.627655029296875e-05, + "model_forward_time": 0.02462172508239746, + "step": 10667 + }, + { + "epoch": 1.627655029296875e-05, + "step": 10667, + "training_step_time": 0.1685624122619629 + }, + { + "epoch": 1.6278076171875e-05, + "model_forward_time": 0.023932695388793945, + "step": 10668 + }, + { + "epoch": 1.6278076171875e-05, + "step": 10668, + "training_step_time": 0.1955556869506836 + }, + { + "epoch": 1.627960205078125e-05, + "model_forward_time": 0.025030851364135742, + "step": 10669 + }, + { + "epoch": 1.627960205078125e-05, + "step": 10669, + "training_step_time": 0.19170522689819336 + }, + { + "epoch": 1.62811279296875e-05, + "grad_norm": 0.7429282665252686, + "learning_rate": 7.655825554607235e-05, + "loss": 0.0322, + "step": 10670 + }, + { + "epoch": 1.62811279296875e-05, + "model_forward_time": 0.024699687957763672, + "step": 10670 + }, + { + "epoch": 1.62811279296875e-05, + "step": 10670, + "training_step_time": 0.16203022003173828 + }, + { + "epoch": 1.628265380859375e-05, + "model_forward_time": 0.024254560470581055, + "step": 10671 + }, + { + "epoch": 1.628265380859375e-05, + "step": 10671, + "training_step_time": 0.20164752006530762 + }, + { + "epoch": 1.62841796875e-05, + "model_forward_time": 0.024671554565429688, + "step": 10672 + }, + { + "epoch": 1.62841796875e-05, + "step": 10672, + "training_step_time": 0.11528158187866211 + }, + { + "epoch": 1.628570556640625e-05, + "model_forward_time": 0.02441120147705078, + "step": 10673 + }, + { + "epoch": 1.628570556640625e-05, + "step": 10673, + "training_step_time": 0.1053001880645752 + }, + { + "epoch": 1.62872314453125e-05, + "model_forward_time": 0.024768590927124023, + "step": 10674 + }, + { + "epoch": 1.62872314453125e-05, + "step": 10674, + "training_step_time": 0.10788679122924805 + }, + { + "epoch": 1.628875732421875e-05, + "model_forward_time": 0.025109529495239258, + "step": 10675 + }, + { + "epoch": 1.628875732421875e-05, + "step": 10675, + "training_step_time": 0.10856246948242188 + }, + { + "epoch": 1.6290283203125e-05, + "model_forward_time": 0.025765419006347656, + "step": 10676 + }, + { + "epoch": 1.6290283203125e-05, + "step": 10676, + "training_step_time": 0.10743021965026855 + }, + { + "epoch": 1.629180908203125e-05, + "model_forward_time": 0.02504420280456543, + "step": 10677 + }, + { + "epoch": 1.629180908203125e-05, + "step": 10677, + "training_step_time": 0.11191773414611816 + }, + { + "epoch": 1.62933349609375e-05, + "model_forward_time": 0.02527332305908203, + "step": 10678 + }, + { + "epoch": 1.62933349609375e-05, + "step": 10678, + "training_step_time": 0.10640740394592285 + }, + { + "epoch": 1.629486083984375e-05, + "model_forward_time": 0.024402379989624023, + "step": 10679 + }, + { + "epoch": 1.629486083984375e-05, + "step": 10679, + "training_step_time": 0.106353759765625 + }, + { + "epoch": 1.629638671875e-05, + "grad_norm": 0.36857274174690247, + "learning_rate": 7.651154166637025e-05, + "loss": 0.016, + "step": 10680 + }, + { + "epoch": 1.629638671875e-05, + "model_forward_time": 0.02486562728881836, + "step": 10680 + }, + { + "epoch": 1.629638671875e-05, + "step": 10680, + "training_step_time": 0.11055374145507812 + }, + { + "epoch": 1.629791259765625e-05, + "model_forward_time": 0.02511906623840332, + "step": 10681 + }, + { + "epoch": 1.629791259765625e-05, + "step": 10681, + "training_step_time": 0.1139230728149414 + }, + { + "epoch": 1.62994384765625e-05, + "model_forward_time": 0.025153398513793945, + "step": 10682 + }, + { + "epoch": 1.62994384765625e-05, + "step": 10682, + "training_step_time": 0.11853647232055664 + }, + { + "epoch": 1.630096435546875e-05, + "model_forward_time": 0.024991512298583984, + "step": 10683 + }, + { + "epoch": 1.630096435546875e-05, + "step": 10683, + "training_step_time": 0.11930465698242188 + }, + { + "epoch": 1.6302490234375e-05, + "model_forward_time": 0.025042295455932617, + "step": 10684 + }, + { + "epoch": 1.6302490234375e-05, + "step": 10684, + "training_step_time": 0.11550092697143555 + }, + { + "epoch": 1.630401611328125e-05, + "model_forward_time": 0.025124073028564453, + "step": 10685 + }, + { + "epoch": 1.630401611328125e-05, + "step": 10685, + "training_step_time": 0.11941242218017578 + }, + { + "epoch": 1.63055419921875e-05, + "model_forward_time": 0.025148630142211914, + "step": 10686 + }, + { + "epoch": 1.63055419921875e-05, + "step": 10686, + "training_step_time": 0.11960244178771973 + }, + { + "epoch": 1.630706787109375e-05, + "model_forward_time": 0.0247650146484375, + "step": 10687 + }, + { + "epoch": 1.630706787109375e-05, + "step": 10687, + "training_step_time": 0.11365675926208496 + }, + { + "epoch": 1.630859375e-05, + "model_forward_time": 0.025362730026245117, + "step": 10688 + }, + { + "epoch": 1.630859375e-05, + "step": 10688, + "training_step_time": 0.11823296546936035 + }, + { + "epoch": 1.631011962890625e-05, + "model_forward_time": 0.025182247161865234, + "step": 10689 + }, + { + "epoch": 1.631011962890625e-05, + "step": 10689, + "training_step_time": 0.10963582992553711 + }, + { + "epoch": 1.63116455078125e-05, + "grad_norm": 0.28537416458129883, + "learning_rate": 7.646479557264513e-05, + "loss": 0.0208, + "step": 10690 + }, + { + "epoch": 1.63116455078125e-05, + "model_forward_time": 0.025135278701782227, + "step": 10690 + }, + { + "epoch": 1.63116455078125e-05, + "step": 10690, + "training_step_time": 0.10965394973754883 + }, + { + "epoch": 1.631317138671875e-05, + "model_forward_time": 0.0250093936920166, + "step": 10691 + }, + { + "epoch": 1.631317138671875e-05, + "step": 10691, + "training_step_time": 0.10721588134765625 + }, + { + "epoch": 1.6314697265625e-05, + "model_forward_time": 0.02524566650390625, + "step": 10692 + }, + { + "epoch": 1.6314697265625e-05, + "step": 10692, + "training_step_time": 0.1097269058227539 + }, + { + "epoch": 1.631622314453125e-05, + "model_forward_time": 0.025547266006469727, + "step": 10693 + }, + { + "epoch": 1.631622314453125e-05, + "step": 10693, + "training_step_time": 0.1092386245727539 + }, + { + "epoch": 1.63177490234375e-05, + "model_forward_time": 0.02541208267211914, + "step": 10694 + }, + { + "epoch": 1.63177490234375e-05, + "step": 10694, + "training_step_time": 0.10916924476623535 + }, + { + "epoch": 1.631927490234375e-05, + "model_forward_time": 0.02486562728881836, + "step": 10695 + }, + { + "epoch": 1.631927490234375e-05, + "step": 10695, + "training_step_time": 0.17072010040283203 + }, + { + "epoch": 1.632080078125e-05, + "model_forward_time": 0.024959802627563477, + "step": 10696 + }, + { + "epoch": 1.632080078125e-05, + "step": 10696, + "training_step_time": 0.16221308708190918 + }, + { + "epoch": 1.632232666015625e-05, + "model_forward_time": 0.02468395233154297, + "step": 10697 + }, + { + "epoch": 1.632232666015625e-05, + "step": 10697, + "training_step_time": 0.11621689796447754 + }, + { + "epoch": 1.63238525390625e-05, + "model_forward_time": 0.025160789489746094, + "step": 10698 + }, + { + "epoch": 1.63238525390625e-05, + "step": 10698, + "training_step_time": 0.21582889556884766 + }, + { + "epoch": 1.632537841796875e-05, + "model_forward_time": 0.025089263916015625, + "step": 10699 + }, + { + "epoch": 1.632537841796875e-05, + "step": 10699, + "training_step_time": 0.1169281005859375 + }, + { + "epoch": 1.6326904296875e-05, + "grad_norm": 0.44070756435394287, + "learning_rate": 7.641801732169795e-05, + "loss": 0.0454, + "step": 10700 + }, + { + "epoch": 1.6326904296875e-05, + "model_forward_time": 0.02423858642578125, + "step": 10700 + }, + { + "epoch": 1.6326904296875e-05, + "step": 10700, + "training_step_time": 0.10693693161010742 + }, + { + "epoch": 1.632843017578125e-05, + "model_forward_time": 0.02545166015625, + "step": 10701 + }, + { + "epoch": 1.632843017578125e-05, + "step": 10701, + "training_step_time": 0.11317038536071777 + }, + { + "epoch": 1.63299560546875e-05, + "model_forward_time": 0.02544856071472168, + "step": 10702 + }, + { + "epoch": 1.63299560546875e-05, + "step": 10702, + "training_step_time": 0.13507747650146484 + }, + { + "epoch": 1.633148193359375e-05, + "model_forward_time": 0.024816274642944336, + "step": 10703 + }, + { + "epoch": 1.633148193359375e-05, + "step": 10703, + "training_step_time": 0.11315536499023438 + }, + { + "epoch": 1.63330078125e-05, + "model_forward_time": 0.025484800338745117, + "step": 10704 + }, + { + "epoch": 1.63330078125e-05, + "step": 10704, + "training_step_time": 0.11356902122497559 + }, + { + "epoch": 1.633453369140625e-05, + "model_forward_time": 0.025608539581298828, + "step": 10705 + }, + { + "epoch": 1.633453369140625e-05, + "step": 10705, + "training_step_time": 0.11051535606384277 + }, + { + "epoch": 1.63360595703125e-05, + "model_forward_time": 0.02506732940673828, + "step": 10706 + }, + { + "epoch": 1.63360595703125e-05, + "step": 10706, + "training_step_time": 0.16840004920959473 + }, + { + "epoch": 1.633758544921875e-05, + "model_forward_time": 0.02463531494140625, + "step": 10707 + }, + { + "epoch": 1.633758544921875e-05, + "step": 10707, + "training_step_time": 0.1437091827392578 + }, + { + "epoch": 1.6339111328125e-05, + "model_forward_time": 0.024309158325195312, + "step": 10708 + }, + { + "epoch": 1.6339111328125e-05, + "step": 10708, + "training_step_time": 0.10707712173461914 + }, + { + "epoch": 1.634063720703125e-05, + "model_forward_time": 0.026467323303222656, + "step": 10709 + }, + { + "epoch": 1.634063720703125e-05, + "step": 10709, + "training_step_time": 0.1662890911102295 + }, + { + "epoch": 1.63421630859375e-05, + "grad_norm": 0.3846602737903595, + "learning_rate": 7.637120697036866e-05, + "loss": 0.0293, + "step": 10710 + }, + { + "epoch": 1.63421630859375e-05, + "model_forward_time": 0.02404642105102539, + "step": 10710 + }, + { + "epoch": 1.63421630859375e-05, + "step": 10710, + "training_step_time": 0.21455121040344238 + }, + { + "epoch": 1.634368896484375e-05, + "model_forward_time": 0.02450418472290039, + "step": 10711 + }, + { + "epoch": 1.634368896484375e-05, + "step": 10711, + "training_step_time": 0.11643743515014648 + }, + { + "epoch": 1.634521484375e-05, + "model_forward_time": 0.024669408798217773, + "step": 10712 + }, + { + "epoch": 1.634521484375e-05, + "step": 10712, + "training_step_time": 0.20532822608947754 + }, + { + "epoch": 1.634674072265625e-05, + "model_forward_time": 0.024477720260620117, + "step": 10713 + }, + { + "epoch": 1.634674072265625e-05, + "step": 10713, + "training_step_time": 0.11440753936767578 + }, + { + "epoch": 1.63482666015625e-05, + "model_forward_time": 0.024648189544677734, + "step": 10714 + }, + { + "epoch": 1.63482666015625e-05, + "step": 10714, + "training_step_time": 0.1765429973602295 + }, + { + "epoch": 1.634979248046875e-05, + "model_forward_time": 0.024798154830932617, + "step": 10715 + }, + { + "epoch": 1.634979248046875e-05, + "step": 10715, + "training_step_time": 0.1804804801940918 + }, + { + "epoch": 1.6351318359375e-05, + "model_forward_time": 0.024644851684570312, + "step": 10716 + }, + { + "epoch": 1.6351318359375e-05, + "step": 10716, + "training_step_time": 0.10927486419677734 + }, + { + "epoch": 1.635284423828125e-05, + "model_forward_time": 0.024281740188598633, + "step": 10717 + }, + { + "epoch": 1.635284423828125e-05, + "step": 10717, + "training_step_time": 0.11864185333251953 + }, + { + "epoch": 1.63543701171875e-05, + "model_forward_time": 0.02569866180419922, + "step": 10718 + }, + { + "epoch": 1.63543701171875e-05, + "step": 10718, + "training_step_time": 0.11031961441040039 + }, + { + "epoch": 1.635589599609375e-05, + "model_forward_time": 0.024944305419921875, + "step": 10719 + }, + { + "epoch": 1.635589599609375e-05, + "step": 10719, + "training_step_time": 0.10712981224060059 + }, + { + "epoch": 1.6357421875e-05, + "grad_norm": 0.3834470510482788, + "learning_rate": 7.632436457553625e-05, + "loss": 0.0274, + "step": 10720 + }, + { + "epoch": 1.6357421875e-05, + "model_forward_time": 0.025395870208740234, + "step": 10720 + }, + { + "epoch": 1.6357421875e-05, + "step": 10720, + "training_step_time": 0.11194062232971191 + }, + { + "epoch": 1.635894775390625e-05, + "model_forward_time": 0.025382518768310547, + "step": 10721 + }, + { + "epoch": 1.635894775390625e-05, + "step": 10721, + "training_step_time": 0.10833311080932617 + }, + { + "epoch": 1.63604736328125e-05, + "model_forward_time": 0.02544879913330078, + "step": 10722 + }, + { + "epoch": 1.63604736328125e-05, + "step": 10722, + "training_step_time": 0.10913491249084473 + }, + { + "epoch": 1.636199951171875e-05, + "model_forward_time": 0.024900436401367188, + "step": 10723 + }, + { + "epoch": 1.636199951171875e-05, + "step": 10723, + "training_step_time": 0.11480069160461426 + }, + { + "epoch": 1.6363525390625e-05, + "model_forward_time": 0.02527451515197754, + "step": 10724 + }, + { + "epoch": 1.6363525390625e-05, + "step": 10724, + "training_step_time": 0.10774803161621094 + }, + { + "epoch": 1.636505126953125e-05, + "model_forward_time": 0.025036096572875977, + "step": 10725 + }, + { + "epoch": 1.636505126953125e-05, + "step": 10725, + "training_step_time": 0.10722041130065918 + }, + { + "epoch": 1.63665771484375e-05, + "model_forward_time": 0.024894237518310547, + "step": 10726 + }, + { + "epoch": 1.63665771484375e-05, + "step": 10726, + "training_step_time": 0.10756230354309082 + }, + { + "epoch": 1.636810302734375e-05, + "model_forward_time": 0.025210142135620117, + "step": 10727 + }, + { + "epoch": 1.636810302734375e-05, + "step": 10727, + "training_step_time": 0.10796713829040527 + }, + { + "epoch": 1.636962890625e-05, + "model_forward_time": 0.025009870529174805, + "step": 10728 + }, + { + "epoch": 1.636962890625e-05, + "step": 10728, + "training_step_time": 0.1083059310913086 + }, + { + "epoch": 1.637115478515625e-05, + "model_forward_time": 0.025752544403076172, + "step": 10729 + }, + { + "epoch": 1.637115478515625e-05, + "step": 10729, + "training_step_time": 0.10871720314025879 + }, + { + "epoch": 1.63726806640625e-05, + "grad_norm": 0.2945505678653717, + "learning_rate": 7.627749019411866e-05, + "loss": 0.0208, + "step": 10730 + }, + { + "epoch": 1.63726806640625e-05, + "model_forward_time": 0.02495408058166504, + "step": 10730 + }, + { + "epoch": 1.63726806640625e-05, + "step": 10730, + "training_step_time": 0.10800051689147949 + }, + { + "epoch": 1.637420654296875e-05, + "model_forward_time": 0.024964094161987305, + "step": 10731 + }, + { + "epoch": 1.637420654296875e-05, + "step": 10731, + "training_step_time": 0.11511516571044922 + }, + { + "epoch": 1.6375732421875e-05, + "model_forward_time": 0.025507211685180664, + "step": 10732 + }, + { + "epoch": 1.6375732421875e-05, + "step": 10732, + "training_step_time": 0.1115114688873291 + }, + { + "epoch": 1.637725830078125e-05, + "model_forward_time": 0.025075912475585938, + "step": 10733 + }, + { + "epoch": 1.637725830078125e-05, + "step": 10733, + "training_step_time": 0.10691142082214355 + }, + { + "epoch": 1.63787841796875e-05, + "model_forward_time": 0.025284290313720703, + "step": 10734 + }, + { + "epoch": 1.63787841796875e-05, + "step": 10734, + "training_step_time": 0.1107625961303711 + }, + { + "epoch": 1.638031005859375e-05, + "model_forward_time": 0.02559661865234375, + "step": 10735 + }, + { + "epoch": 1.638031005859375e-05, + "step": 10735, + "training_step_time": 0.10758066177368164 + }, + { + "epoch": 1.63818359375e-05, + "model_forward_time": 0.025068283081054688, + "step": 10736 + }, + { + "epoch": 1.63818359375e-05, + "step": 10736, + "training_step_time": 0.10826849937438965 + }, + { + "epoch": 1.638336181640625e-05, + "model_forward_time": 0.02512335777282715, + "step": 10737 + }, + { + "epoch": 1.638336181640625e-05, + "step": 10737, + "training_step_time": 0.11080789566040039 + }, + { + "epoch": 1.63848876953125e-05, + "model_forward_time": 0.023865222930908203, + "step": 10738 + }, + { + "epoch": 1.63848876953125e-05, + "step": 10738, + "training_step_time": 0.10817384719848633 + }, + { + "epoch": 1.638641357421875e-05, + "model_forward_time": 0.02419757843017578, + "step": 10739 + }, + { + "epoch": 1.638641357421875e-05, + "step": 10739, + "training_step_time": 0.10743832588195801 + }, + { + "epoch": 1.6387939453125e-05, + "grad_norm": 0.3899030089378357, + "learning_rate": 7.623058388307269e-05, + "loss": 0.0208, + "step": 10740 + }, + { + "epoch": 1.6387939453125e-05, + "model_forward_time": 0.02422332763671875, + "step": 10740 + }, + { + "epoch": 1.6387939453125e-05, + "step": 10740, + "training_step_time": 0.11203265190124512 + }, + { + "epoch": 1.638946533203125e-05, + "model_forward_time": 0.02547001838684082, + "step": 10741 + }, + { + "epoch": 1.638946533203125e-05, + "step": 10741, + "training_step_time": 0.11022305488586426 + }, + { + "epoch": 1.63909912109375e-05, + "model_forward_time": 0.025500774383544922, + "step": 10742 + }, + { + "epoch": 1.63909912109375e-05, + "step": 10742, + "training_step_time": 0.11261343955993652 + }, + { + "epoch": 1.639251708984375e-05, + "model_forward_time": 0.025368213653564453, + "step": 10743 + }, + { + "epoch": 1.639251708984375e-05, + "step": 10743, + "training_step_time": 0.11301708221435547 + }, + { + "epoch": 1.639404296875e-05, + "model_forward_time": 0.025048255920410156, + "step": 10744 + }, + { + "epoch": 1.639404296875e-05, + "step": 10744, + "training_step_time": 0.11323952674865723 + }, + { + "epoch": 1.639556884765625e-05, + "model_forward_time": 0.02573370933532715, + "step": 10745 + }, + { + "epoch": 1.639556884765625e-05, + "step": 10745, + "training_step_time": 0.20105409622192383 + }, + { + "epoch": 1.63970947265625e-05, + "model_forward_time": 0.024323463439941406, + "step": 10746 + }, + { + "epoch": 1.63970947265625e-05, + "step": 10746, + "training_step_time": 0.2028648853302002 + }, + { + "epoch": 1.639862060546875e-05, + "model_forward_time": 0.025032758712768555, + "step": 10747 + }, + { + "epoch": 1.639862060546875e-05, + "step": 10747, + "training_step_time": 0.13688373565673828 + }, + { + "epoch": 1.6400146484375e-05, + "model_forward_time": 0.02435779571533203, + "step": 10748 + }, + { + "epoch": 1.6400146484375e-05, + "step": 10748, + "training_step_time": 0.10837864875793457 + }, + { + "epoch": 1.640167236328125e-05, + "model_forward_time": 0.02544093132019043, + "step": 10749 + }, + { + "epoch": 1.640167236328125e-05, + "step": 10749, + "training_step_time": 0.11611580848693848 + }, + { + "epoch": 1.64031982421875e-05, + "grad_norm": 0.2776572108268738, + "learning_rate": 7.618364569939391e-05, + "loss": 0.0298, + "step": 10750 + }, + { + "epoch": 1.64031982421875e-05, + "model_forward_time": 0.025522470474243164, + "step": 10750 + }, + { + "epoch": 1.64031982421875e-05, + "step": 10750, + "training_step_time": 0.10948634147644043 + }, + { + "epoch": 1.640472412109375e-05, + "model_forward_time": 0.02557969093322754, + "step": 10751 + }, + { + "epoch": 1.640472412109375e-05, + "step": 10751, + "training_step_time": 0.1764969825744629 + }, + { + "epoch": 1.640625e-05, + "model_forward_time": 0.02479696273803711, + "step": 10752 + }, + { + "epoch": 1.640625e-05, + "step": 10752, + "training_step_time": 0.14751124382019043 + }, + { + "epoch": 1.640777587890625e-05, + "model_forward_time": 0.02422475814819336, + "step": 10753 + }, + { + "epoch": 1.640777587890625e-05, + "step": 10753, + "training_step_time": 0.10578036308288574 + }, + { + "epoch": 1.64093017578125e-05, + "model_forward_time": 0.024740934371948242, + "step": 10754 + }, + { + "epoch": 1.64093017578125e-05, + "step": 10754, + "training_step_time": 0.15947341918945312 + }, + { + "epoch": 1.641082763671875e-05, + "model_forward_time": 0.024953126907348633, + "step": 10755 + }, + { + "epoch": 1.641082763671875e-05, + "step": 10755, + "training_step_time": 0.21271038055419922 + }, + { + "epoch": 1.6412353515625e-05, + "model_forward_time": 0.024477005004882812, + "step": 10756 + }, + { + "epoch": 1.6412353515625e-05, + "step": 10756, + "training_step_time": 0.12402510643005371 + }, + { + "epoch": 1.641387939453125e-05, + "model_forward_time": 0.02431488037109375, + "step": 10757 + }, + { + "epoch": 1.641387939453125e-05, + "step": 10757, + "training_step_time": 0.10337495803833008 + }, + { + "epoch": 1.64154052734375e-05, + "model_forward_time": 0.025301456451416016, + "step": 10758 + }, + { + "epoch": 1.64154052734375e-05, + "step": 10758, + "training_step_time": 0.13145756721496582 + }, + { + "epoch": 1.641693115234375e-05, + "model_forward_time": 0.02530956268310547, + "step": 10759 + }, + { + "epoch": 1.641693115234375e-05, + "step": 10759, + "training_step_time": 0.11580753326416016 + }, + { + "epoch": 1.641845703125e-05, + "grad_norm": 0.3233664333820343, + "learning_rate": 7.613667570011663e-05, + "loss": 0.0295, + "step": 10760 + }, + { + "epoch": 1.641845703125e-05, + "model_forward_time": 0.025384902954101562, + "step": 10760 + }, + { + "epoch": 1.641845703125e-05, + "step": 10760, + "training_step_time": 0.21688437461853027 + }, + { + "epoch": 1.641998291015625e-05, + "model_forward_time": 0.024252653121948242, + "step": 10761 + }, + { + "epoch": 1.641998291015625e-05, + "step": 10761, + "training_step_time": 0.14340710639953613 + }, + { + "epoch": 1.64215087890625e-05, + "model_forward_time": 0.02436542510986328, + "step": 10762 + }, + { + "epoch": 1.64215087890625e-05, + "step": 10762, + "training_step_time": 0.11798572540283203 + }, + { + "epoch": 1.642303466796875e-05, + "model_forward_time": 0.024516820907592773, + "step": 10763 + }, + { + "epoch": 1.642303466796875e-05, + "step": 10763, + "training_step_time": 0.13036394119262695 + }, + { + "epoch": 1.6424560546875e-05, + "model_forward_time": 0.025243520736694336, + "step": 10764 + }, + { + "epoch": 1.6424560546875e-05, + "step": 10764, + "training_step_time": 0.12154674530029297 + }, + { + "epoch": 1.642608642578125e-05, + "model_forward_time": 0.02477884292602539, + "step": 10765 + }, + { + "epoch": 1.642608642578125e-05, + "step": 10765, + "training_step_time": 0.1132802963256836 + }, + { + "epoch": 1.64276123046875e-05, + "model_forward_time": 0.0254819393157959, + "step": 10766 + }, + { + "epoch": 1.64276123046875e-05, + "step": 10766, + "training_step_time": 0.11395001411437988 + }, + { + "epoch": 1.642913818359375e-05, + "model_forward_time": 0.025023221969604492, + "step": 10767 + }, + { + "epoch": 1.642913818359375e-05, + "step": 10767, + "training_step_time": 0.11502909660339355 + }, + { + "epoch": 1.64306640625e-05, + "model_forward_time": 0.025210142135620117, + "step": 10768 + }, + { + "epoch": 1.64306640625e-05, + "step": 10768, + "training_step_time": 0.12074041366577148 + }, + { + "epoch": 1.643218994140625e-05, + "model_forward_time": 0.02584528923034668, + "step": 10769 + }, + { + "epoch": 1.643218994140625e-05, + "step": 10769, + "training_step_time": 0.11638975143432617 + }, + { + "epoch": 1.64337158203125e-05, + "grad_norm": 0.3310029208660126, + "learning_rate": 7.608967394231387e-05, + "loss": 0.0302, + "step": 10770 + }, + { + "epoch": 1.64337158203125e-05, + "model_forward_time": 0.025342464447021484, + "step": 10770 + }, + { + "epoch": 1.64337158203125e-05, + "step": 10770, + "training_step_time": 0.11458420753479004 + }, + { + "epoch": 1.643524169921875e-05, + "model_forward_time": 0.025568723678588867, + "step": 10771 + }, + { + "epoch": 1.643524169921875e-05, + "step": 10771, + "training_step_time": 0.11399269104003906 + }, + { + "epoch": 1.6436767578125e-05, + "model_forward_time": 0.02423858642578125, + "step": 10772 + }, + { + "epoch": 1.6436767578125e-05, + "step": 10772, + "training_step_time": 0.11171507835388184 + }, + { + "epoch": 1.643829345703125e-05, + "model_forward_time": 0.024680614471435547, + "step": 10773 + }, + { + "epoch": 1.643829345703125e-05, + "step": 10773, + "training_step_time": 0.11388969421386719 + }, + { + "epoch": 1.64398193359375e-05, + "model_forward_time": 0.025072574615478516, + "step": 10774 + }, + { + "epoch": 1.64398193359375e-05, + "step": 10774, + "training_step_time": 0.11184072494506836 + }, + { + "epoch": 1.644134521484375e-05, + "model_forward_time": 0.02514791488647461, + "step": 10775 + }, + { + "epoch": 1.644134521484375e-05, + "step": 10775, + "training_step_time": 0.1119232177734375 + }, + { + "epoch": 1.644287109375e-05, + "model_forward_time": 0.02554917335510254, + "step": 10776 + }, + { + "epoch": 1.644287109375e-05, + "step": 10776, + "training_step_time": 0.1116645336151123 + }, + { + "epoch": 1.644439697265625e-05, + "model_forward_time": 0.025325298309326172, + "step": 10777 + }, + { + "epoch": 1.644439697265625e-05, + "step": 10777, + "training_step_time": 0.10777425765991211 + }, + { + "epoch": 1.64459228515625e-05, + "model_forward_time": 0.025165319442749023, + "step": 10778 + }, + { + "epoch": 1.64459228515625e-05, + "step": 10778, + "training_step_time": 0.10851454734802246 + }, + { + "epoch": 1.644744873046875e-05, + "model_forward_time": 0.024199962615966797, + "step": 10779 + }, + { + "epoch": 1.644744873046875e-05, + "step": 10779, + "training_step_time": 0.10663700103759766 + }, + { + "epoch": 1.6448974609375e-05, + "grad_norm": 0.2997332513332367, + "learning_rate": 7.604264048309717e-05, + "loss": 0.0313, + "step": 10780 + }, + { + "epoch": 1.6448974609375e-05, + "model_forward_time": 0.024306058883666992, + "step": 10780 + }, + { + "epoch": 1.6448974609375e-05, + "step": 10780, + "training_step_time": 0.10718274116516113 + }, + { + "epoch": 1.645050048828125e-05, + "model_forward_time": 0.025063514709472656, + "step": 10781 + }, + { + "epoch": 1.645050048828125e-05, + "step": 10781, + "training_step_time": 0.11020898818969727 + }, + { + "epoch": 1.64520263671875e-05, + "model_forward_time": 0.0253903865814209, + "step": 10782 + }, + { + "epoch": 1.64520263671875e-05, + "step": 10782, + "training_step_time": 0.10812878608703613 + }, + { + "epoch": 1.645355224609375e-05, + "model_forward_time": 0.024985551834106445, + "step": 10783 + }, + { + "epoch": 1.645355224609375e-05, + "step": 10783, + "training_step_time": 0.10933136940002441 + }, + { + "epoch": 1.6455078125e-05, + "model_forward_time": 0.025437593460083008, + "step": 10784 + }, + { + "epoch": 1.6455078125e-05, + "step": 10784, + "training_step_time": 0.11381244659423828 + }, + { + "epoch": 1.645660400390625e-05, + "model_forward_time": 0.025662899017333984, + "step": 10785 + }, + { + "epoch": 1.645660400390625e-05, + "step": 10785, + "training_step_time": 0.19509506225585938 + }, + { + "epoch": 1.64581298828125e-05, + "model_forward_time": 0.02461409568786621, + "step": 10786 + }, + { + "epoch": 1.64581298828125e-05, + "step": 10786, + "training_step_time": 0.10313916206359863 + }, + { + "epoch": 1.645965576171875e-05, + "model_forward_time": 0.024393081665039062, + "step": 10787 + }, + { + "epoch": 1.645965576171875e-05, + "step": 10787, + "training_step_time": 0.10772061347961426 + }, + { + "epoch": 1.6461181640625e-05, + "model_forward_time": 0.025321245193481445, + "step": 10788 + }, + { + "epoch": 1.6461181640625e-05, + "step": 10788, + "training_step_time": 0.13216710090637207 + }, + { + "epoch": 1.646270751953125e-05, + "model_forward_time": 0.025519371032714844, + "step": 10789 + }, + { + "epoch": 1.646270751953125e-05, + "step": 10789, + "training_step_time": 0.16803264617919922 + }, + { + "epoch": 1.64642333984375e-05, + "grad_norm": 0.4027278423309326, + "learning_rate": 7.599557537961663e-05, + "loss": 0.0252, + "step": 10790 + }, + { + "epoch": 1.64642333984375e-05, + "model_forward_time": 0.024799823760986328, + "step": 10790 + }, + { + "epoch": 1.64642333984375e-05, + "step": 10790, + "training_step_time": 0.10465049743652344 + }, + { + "epoch": 1.646575927734375e-05, + "model_forward_time": 0.024894237518310547, + "step": 10791 + }, + { + "epoch": 1.646575927734375e-05, + "step": 10791, + "training_step_time": 0.2023172378540039 + }, + { + "epoch": 1.646728515625e-05, + "model_forward_time": 0.024867534637451172, + "step": 10792 + }, + { + "epoch": 1.646728515625e-05, + "step": 10792, + "training_step_time": 0.13690948486328125 + }, + { + "epoch": 1.646881103515625e-05, + "model_forward_time": 0.024422883987426758, + "step": 10793 + }, + { + "epoch": 1.646881103515625e-05, + "step": 10793, + "training_step_time": 0.11196303367614746 + }, + { + "epoch": 1.64703369140625e-05, + "model_forward_time": 0.02568674087524414, + "step": 10794 + }, + { + "epoch": 1.64703369140625e-05, + "step": 10794, + "training_step_time": 0.11054515838623047 + }, + { + "epoch": 1.647186279296875e-05, + "model_forward_time": 0.025094032287597656, + "step": 10795 + }, + { + "epoch": 1.647186279296875e-05, + "step": 10795, + "training_step_time": 0.1194157600402832 + }, + { + "epoch": 1.6473388671875e-05, + "model_forward_time": 0.02504563331604004, + "step": 10796 + }, + { + "epoch": 1.6473388671875e-05, + "step": 10796, + "training_step_time": 0.16044259071350098 + }, + { + "epoch": 1.647491455078125e-05, + "model_forward_time": 0.025817394256591797, + "step": 10797 + }, + { + "epoch": 1.647491455078125e-05, + "step": 10797, + "training_step_time": 0.14841985702514648 + }, + { + "epoch": 1.64764404296875e-05, + "model_forward_time": 0.02500176429748535, + "step": 10798 + }, + { + "epoch": 1.64764404296875e-05, + "step": 10798, + "training_step_time": 0.11089730262756348 + }, + { + "epoch": 1.647796630859375e-05, + "model_forward_time": 0.024941682815551758, + "step": 10799 + }, + { + "epoch": 1.647796630859375e-05, + "step": 10799, + "training_step_time": 0.14629697799682617 + }, + { + "epoch": 1.64794921875e-05, + "grad_norm": 0.45459648966789246, + "learning_rate": 7.594847868906076e-05, + "loss": 0.0331, + "step": 10800 + }, + { + "epoch": 1.64794921875e-05, + "model_forward_time": 0.024891138076782227, + "step": 10800 + }, + { + "epoch": 1.64794921875e-05, + "step": 10800, + "training_step_time": 0.21927833557128906 + }, + { + "epoch": 1.648101806640625e-05, + "model_forward_time": 0.0246732234954834, + "step": 10801 + }, + { + "epoch": 1.648101806640625e-05, + "step": 10801, + "training_step_time": 0.10877418518066406 + }, + { + "epoch": 1.64825439453125e-05, + "model_forward_time": 0.024944067001342773, + "step": 10802 + }, + { + "epoch": 1.64825439453125e-05, + "step": 10802, + "training_step_time": 0.10377264022827148 + }, + { + "epoch": 1.648406982421875e-05, + "model_forward_time": 0.025401592254638672, + "step": 10803 + }, + { + "epoch": 1.648406982421875e-05, + "step": 10803, + "training_step_time": 0.15845608711242676 + }, + { + "epoch": 1.6485595703125e-05, + "model_forward_time": 0.02449345588684082, + "step": 10804 + }, + { + "epoch": 1.6485595703125e-05, + "step": 10804, + "training_step_time": 0.11086606979370117 + }, + { + "epoch": 1.648712158203125e-05, + "model_forward_time": 0.025146484375, + "step": 10805 + }, + { + "epoch": 1.648712158203125e-05, + "step": 10805, + "training_step_time": 0.11435437202453613 + }, + { + "epoch": 1.64886474609375e-05, + "model_forward_time": 0.0251920223236084, + "step": 10806 + }, + { + "epoch": 1.64886474609375e-05, + "step": 10806, + "training_step_time": 0.12303376197814941 + }, + { + "epoch": 1.649017333984375e-05, + "model_forward_time": 0.025351285934448242, + "step": 10807 + }, + { + "epoch": 1.649017333984375e-05, + "step": 10807, + "training_step_time": 0.12134909629821777 + }, + { + "epoch": 1.649169921875e-05, + "model_forward_time": 0.025442123413085938, + "step": 10808 + }, + { + "epoch": 1.649169921875e-05, + "step": 10808, + "training_step_time": 0.11273550987243652 + }, + { + "epoch": 1.649322509765625e-05, + "model_forward_time": 0.02541947364807129, + "step": 10809 + }, + { + "epoch": 1.649322509765625e-05, + "step": 10809, + "training_step_time": 0.11706209182739258 + }, + { + "epoch": 1.64947509765625e-05, + "grad_norm": 0.363253653049469, + "learning_rate": 7.590135046865651e-05, + "loss": 0.0226, + "step": 10810 + }, + { + "epoch": 1.64947509765625e-05, + "model_forward_time": 0.025206804275512695, + "step": 10810 + }, + { + "epoch": 1.64947509765625e-05, + "step": 10810, + "training_step_time": 0.1110086441040039 + }, + { + "epoch": 1.649627685546875e-05, + "model_forward_time": 0.02468729019165039, + "step": 10811 + }, + { + "epoch": 1.649627685546875e-05, + "step": 10811, + "training_step_time": 0.10796141624450684 + }, + { + "epoch": 1.6497802734375e-05, + "model_forward_time": 0.02520155906677246, + "step": 10812 + }, + { + "epoch": 1.6497802734375e-05, + "step": 10812, + "training_step_time": 0.11082959175109863 + }, + { + "epoch": 1.649932861328125e-05, + "model_forward_time": 0.026639223098754883, + "step": 10813 + }, + { + "epoch": 1.649932861328125e-05, + "step": 10813, + "training_step_time": 0.1131441593170166 + }, + { + "epoch": 1.65008544921875e-05, + "model_forward_time": 0.025516033172607422, + "step": 10814 + }, + { + "epoch": 1.65008544921875e-05, + "step": 10814, + "training_step_time": 0.10660457611083984 + }, + { + "epoch": 1.650238037109375e-05, + "model_forward_time": 0.02548050880432129, + "step": 10815 + }, + { + "epoch": 1.650238037109375e-05, + "step": 10815, + "training_step_time": 0.14336204528808594 + }, + { + "epoch": 1.650390625e-05, + "model_forward_time": 0.024364709854125977, + "step": 10816 + }, + { + "epoch": 1.650390625e-05, + "step": 10816, + "training_step_time": 0.15810155868530273 + }, + { + "epoch": 1.650543212890625e-05, + "model_forward_time": 0.0235898494720459, + "step": 10817 + }, + { + "epoch": 1.650543212890625e-05, + "step": 10817, + "training_step_time": 0.14775395393371582 + }, + { + "epoch": 1.65069580078125e-05, + "model_forward_time": 0.023796796798706055, + "step": 10818 + }, + { + "epoch": 1.65069580078125e-05, + "step": 10818, + "training_step_time": 0.15061497688293457 + }, + { + "epoch": 1.650848388671875e-05, + "model_forward_time": 0.025929689407348633, + "step": 10819 + }, + { + "epoch": 1.650848388671875e-05, + "step": 10819, + "training_step_time": 0.13357257843017578 + }, + { + "epoch": 1.6510009765625e-05, + "grad_norm": 0.4513895511627197, + "learning_rate": 7.585419077566912e-05, + "loss": 0.0287, + "step": 10820 + }, + { + "epoch": 1.6510009765625e-05, + "model_forward_time": 0.02393484115600586, + "step": 10820 + }, + { + "epoch": 1.6510009765625e-05, + "step": 10820, + "training_step_time": 0.12855839729309082 + }, + { + "epoch": 1.651153564453125e-05, + "model_forward_time": 0.02543330192565918, + "step": 10821 + }, + { + "epoch": 1.651153564453125e-05, + "step": 10821, + "training_step_time": 0.1230313777923584 + }, + { + "epoch": 1.65130615234375e-05, + "model_forward_time": 0.0242764949798584, + "step": 10822 + }, + { + "epoch": 1.65130615234375e-05, + "step": 10822, + "training_step_time": 0.1207890510559082 + }, + { + "epoch": 1.651458740234375e-05, + "model_forward_time": 0.024845600128173828, + "step": 10823 + }, + { + "epoch": 1.651458740234375e-05, + "step": 10823, + "training_step_time": 0.1168060302734375 + }, + { + "epoch": 1.651611328125e-05, + "model_forward_time": 0.024271249771118164, + "step": 10824 + }, + { + "epoch": 1.651611328125e-05, + "step": 10824, + "training_step_time": 0.11795210838317871 + }, + { + "epoch": 1.651763916015625e-05, + "model_forward_time": 0.024193525314331055, + "step": 10825 + }, + { + "epoch": 1.651763916015625e-05, + "step": 10825, + "training_step_time": 0.11490988731384277 + }, + { + "epoch": 1.65191650390625e-05, + "model_forward_time": 0.025233745574951172, + "step": 10826 + }, + { + "epoch": 1.65191650390625e-05, + "step": 10826, + "training_step_time": 0.10995221138000488 + }, + { + "epoch": 1.652069091796875e-05, + "model_forward_time": 0.025481462478637695, + "step": 10827 + }, + { + "epoch": 1.652069091796875e-05, + "step": 10827, + "training_step_time": 0.11040210723876953 + }, + { + "epoch": 1.6522216796875e-05, + "model_forward_time": 0.02597355842590332, + "step": 10828 + }, + { + "epoch": 1.6522216796875e-05, + "step": 10828, + "training_step_time": 0.10800671577453613 + }, + { + "epoch": 1.652374267578125e-05, + "model_forward_time": 0.02449488639831543, + "step": 10829 + }, + { + "epoch": 1.652374267578125e-05, + "step": 10829, + "training_step_time": 0.14438891410827637 + }, + { + "epoch": 1.65252685546875e-05, + "grad_norm": 0.2281569391489029, + "learning_rate": 7.580699966740201e-05, + "loss": 0.027, + "step": 10830 + }, + { + "epoch": 1.65252685546875e-05, + "model_forward_time": 0.025112628936767578, + "step": 10830 + }, + { + "epoch": 1.65252685546875e-05, + "step": 10830, + "training_step_time": 0.16704463958740234 + }, + { + "epoch": 1.652679443359375e-05, + "model_forward_time": 0.02467489242553711, + "step": 10831 + }, + { + "epoch": 1.652679443359375e-05, + "step": 10831, + "training_step_time": 0.11936545372009277 + }, + { + "epoch": 1.65283203125e-05, + "model_forward_time": 0.024743080139160156, + "step": 10832 + }, + { + "epoch": 1.65283203125e-05, + "step": 10832, + "training_step_time": 0.13126468658447266 + }, + { + "epoch": 1.652984619140625e-05, + "model_forward_time": 0.025455236434936523, + "step": 10833 + }, + { + "epoch": 1.652984619140625e-05, + "step": 10833, + "training_step_time": 0.20081734657287598 + }, + { + "epoch": 1.65313720703125e-05, + "model_forward_time": 0.02518320083618164, + "step": 10834 + }, + { + "epoch": 1.65313720703125e-05, + "step": 10834, + "training_step_time": 0.18924999237060547 + }, + { + "epoch": 1.653289794921875e-05, + "model_forward_time": 0.02413153648376465, + "step": 10835 + }, + { + "epoch": 1.653289794921875e-05, + "step": 10835, + "training_step_time": 0.1416623592376709 + }, + { + "epoch": 1.6534423828125e-05, + "model_forward_time": 0.02464127540588379, + "step": 10836 + }, + { + "epoch": 1.6534423828125e-05, + "step": 10836, + "training_step_time": 0.10721182823181152 + }, + { + "epoch": 1.653594970703125e-05, + "model_forward_time": 0.025005817413330078, + "step": 10837 + }, + { + "epoch": 1.653594970703125e-05, + "step": 10837, + "training_step_time": 0.11602282524108887 + }, + { + "epoch": 1.65374755859375e-05, + "model_forward_time": 0.024360179901123047, + "step": 10838 + }, + { + "epoch": 1.65374755859375e-05, + "step": 10838, + "training_step_time": 0.11002659797668457 + }, + { + "epoch": 1.653900146484375e-05, + "model_forward_time": 0.025038480758666992, + "step": 10839 + }, + { + "epoch": 1.653900146484375e-05, + "step": 10839, + "training_step_time": 0.16441631317138672 + }, + { + "epoch": 1.654052734375e-05, + "grad_norm": 0.31513896584510803, + "learning_rate": 7.57597772011969e-05, + "loss": 0.0347, + "step": 10840 + }, + { + "epoch": 1.654052734375e-05, + "model_forward_time": 0.02461838722229004, + "step": 10840 + }, + { + "epoch": 1.654052734375e-05, + "step": 10840, + "training_step_time": 0.1494278907775879 + }, + { + "epoch": 1.654205322265625e-05, + "model_forward_time": 0.024490833282470703, + "step": 10841 + }, + { + "epoch": 1.654205322265625e-05, + "step": 10841, + "training_step_time": 0.10837817192077637 + }, + { + "epoch": 1.65435791015625e-05, + "model_forward_time": 0.024872779846191406, + "step": 10842 + }, + { + "epoch": 1.65435791015625e-05, + "step": 10842, + "training_step_time": 0.1570901870727539 + }, + { + "epoch": 1.654510498046875e-05, + "model_forward_time": 0.024687528610229492, + "step": 10843 + }, + { + "epoch": 1.654510498046875e-05, + "step": 10843, + "training_step_time": 0.22075152397155762 + }, + { + "epoch": 1.6546630859375e-05, + "model_forward_time": 0.02440047264099121, + "step": 10844 + }, + { + "epoch": 1.6546630859375e-05, + "step": 10844, + "training_step_time": 0.10788726806640625 + }, + { + "epoch": 1.654815673828125e-05, + "model_forward_time": 0.024966955184936523, + "step": 10845 + }, + { + "epoch": 1.654815673828125e-05, + "step": 10845, + "training_step_time": 0.10461187362670898 + }, + { + "epoch": 1.65496826171875e-05, + "model_forward_time": 0.025684595108032227, + "step": 10846 + }, + { + "epoch": 1.65496826171875e-05, + "step": 10846, + "training_step_time": 0.20926189422607422 + }, + { + "epoch": 1.655120849609375e-05, + "model_forward_time": 0.024487733840942383, + "step": 10847 + }, + { + "epoch": 1.655120849609375e-05, + "step": 10847, + "training_step_time": 0.10999631881713867 + }, + { + "epoch": 1.6552734375e-05, + "model_forward_time": 0.02434682846069336, + "step": 10848 + }, + { + "epoch": 1.6552734375e-05, + "step": 10848, + "training_step_time": 0.11000919342041016 + }, + { + "epoch": 1.655426025390625e-05, + "model_forward_time": 0.025496482849121094, + "step": 10849 + }, + { + "epoch": 1.655426025390625e-05, + "step": 10849, + "training_step_time": 0.12700653076171875 + }, + { + "epoch": 1.65557861328125e-05, + "grad_norm": 0.3954463303089142, + "learning_rate": 7.571252343443349e-05, + "loss": 0.0257, + "step": 10850 + }, + { + "epoch": 1.65557861328125e-05, + "model_forward_time": 0.02556324005126953, + "step": 10850 + }, + { + "epoch": 1.65557861328125e-05, + "step": 10850, + "training_step_time": 0.12648272514343262 + }, + { + "epoch": 1.655731201171875e-05, + "model_forward_time": 0.025029897689819336, + "step": 10851 + }, + { + "epoch": 1.655731201171875e-05, + "step": 10851, + "training_step_time": 0.11715555191040039 + }, + { + "epoch": 1.6558837890625e-05, + "model_forward_time": 0.025183439254760742, + "step": 10852 + }, + { + "epoch": 1.6558837890625e-05, + "step": 10852, + "training_step_time": 0.11367082595825195 + }, + { + "epoch": 1.656036376953125e-05, + "model_forward_time": 0.02546381950378418, + "step": 10853 + }, + { + "epoch": 1.656036376953125e-05, + "step": 10853, + "training_step_time": 0.10840892791748047 + }, + { + "epoch": 1.65618896484375e-05, + "model_forward_time": 0.025184154510498047, + "step": 10854 + }, + { + "epoch": 1.65618896484375e-05, + "step": 10854, + "training_step_time": 0.10849332809448242 + }, + { + "epoch": 1.656341552734375e-05, + "model_forward_time": 0.025542497634887695, + "step": 10855 + }, + { + "epoch": 1.656341552734375e-05, + "step": 10855, + "training_step_time": 0.10872411727905273 + }, + { + "epoch": 1.656494140625e-05, + "model_forward_time": 0.025752782821655273, + "step": 10856 + }, + { + "epoch": 1.656494140625e-05, + "step": 10856, + "training_step_time": 0.10850095748901367 + }, + { + "epoch": 1.656646728515625e-05, + "model_forward_time": 0.02546858787536621, + "step": 10857 + }, + { + "epoch": 1.656646728515625e-05, + "step": 10857, + "training_step_time": 0.1070394515991211 + }, + { + "epoch": 1.65679931640625e-05, + "model_forward_time": 0.025093555450439453, + "step": 10858 + }, + { + "epoch": 1.65679931640625e-05, + "step": 10858, + "training_step_time": 0.10788989067077637 + }, + { + "epoch": 1.656951904296875e-05, + "model_forward_time": 0.025656461715698242, + "step": 10859 + }, + { + "epoch": 1.656951904296875e-05, + "step": 10859, + "training_step_time": 0.10597538948059082 + }, + { + "epoch": 1.6571044921875e-05, + "grad_norm": 0.3234105110168457, + "learning_rate": 7.566523842452958e-05, + "loss": 0.02, + "step": 10860 + }, + { + "epoch": 1.6571044921875e-05, + "model_forward_time": 0.025614500045776367, + "step": 10860 + }, + { + "epoch": 1.6571044921875e-05, + "step": 10860, + "training_step_time": 0.10815238952636719 + }, + { + "epoch": 1.657257080078125e-05, + "model_forward_time": 0.02545762062072754, + "step": 10861 + }, + { + "epoch": 1.657257080078125e-05, + "step": 10861, + "training_step_time": 0.10886907577514648 + }, + { + "epoch": 1.65740966796875e-05, + "model_forward_time": 0.025498628616333008, + "step": 10862 + }, + { + "epoch": 1.65740966796875e-05, + "step": 10862, + "training_step_time": 0.11038327217102051 + }, + { + "epoch": 1.657562255859375e-05, + "model_forward_time": 0.026415586471557617, + "step": 10863 + }, + { + "epoch": 1.657562255859375e-05, + "step": 10863, + "training_step_time": 0.11261582374572754 + }, + { + "epoch": 1.65771484375e-05, + "model_forward_time": 0.025345802307128906, + "step": 10864 + }, + { + "epoch": 1.65771484375e-05, + "step": 10864, + "training_step_time": 0.11078405380249023 + }, + { + "epoch": 1.657867431640625e-05, + "model_forward_time": 0.025328636169433594, + "step": 10865 + }, + { + "epoch": 1.657867431640625e-05, + "step": 10865, + "training_step_time": 0.10863113403320312 + }, + { + "epoch": 1.65802001953125e-05, + "model_forward_time": 0.02506232261657715, + "step": 10866 + }, + { + "epoch": 1.65802001953125e-05, + "step": 10866, + "training_step_time": 0.10624146461486816 + }, + { + "epoch": 1.658172607421875e-05, + "model_forward_time": 0.02526068687438965, + "step": 10867 + }, + { + "epoch": 1.658172607421875e-05, + "step": 10867, + "training_step_time": 0.10612916946411133 + }, + { + "epoch": 1.6583251953125e-05, + "model_forward_time": 0.025470972061157227, + "step": 10868 + }, + { + "epoch": 1.6583251953125e-05, + "step": 10868, + "training_step_time": 0.10814714431762695 + }, + { + "epoch": 1.658477783203125e-05, + "model_forward_time": 0.025341033935546875, + "step": 10869 + }, + { + "epoch": 1.658477783203125e-05, + "step": 10869, + "training_step_time": 0.10621404647827148 + }, + { + "epoch": 1.65863037109375e-05, + "grad_norm": 0.34885501861572266, + "learning_rate": 7.561792222894091e-05, + "loss": 0.0254, + "step": 10870 + }, + { + "epoch": 1.65863037109375e-05, + "model_forward_time": 0.02532505989074707, + "step": 10870 + }, + { + "epoch": 1.65863037109375e-05, + "step": 10870, + "training_step_time": 0.10751581192016602 + }, + { + "epoch": 1.658782958984375e-05, + "model_forward_time": 0.02515435218811035, + "step": 10871 + }, + { + "epoch": 1.658782958984375e-05, + "step": 10871, + "training_step_time": 0.1110072135925293 + }, + { + "epoch": 1.658935546875e-05, + "model_forward_time": 0.02512812614440918, + "step": 10872 + }, + { + "epoch": 1.658935546875e-05, + "step": 10872, + "training_step_time": 0.10905790328979492 + }, + { + "epoch": 1.659088134765625e-05, + "model_forward_time": 0.025911331176757812, + "step": 10873 + }, + { + "epoch": 1.659088134765625e-05, + "step": 10873, + "training_step_time": 0.10709404945373535 + }, + { + "epoch": 1.65924072265625e-05, + "model_forward_time": 0.027601957321166992, + "step": 10874 + }, + { + "epoch": 1.65924072265625e-05, + "step": 10874, + "training_step_time": 0.14226984977722168 + }, + { + "epoch": 1.659393310546875e-05, + "model_forward_time": 0.024666309356689453, + "step": 10875 + }, + { + "epoch": 1.659393310546875e-05, + "step": 10875, + "training_step_time": 0.14530253410339355 + }, + { + "epoch": 1.6595458984375e-05, + "model_forward_time": 0.0247344970703125, + "step": 10876 + }, + { + "epoch": 1.6595458984375e-05, + "step": 10876, + "training_step_time": 0.1425631046295166 + }, + { + "epoch": 1.659698486328125e-05, + "model_forward_time": 0.02555108070373535, + "step": 10877 + }, + { + "epoch": 1.659698486328125e-05, + "step": 10877, + "training_step_time": 0.1271219253540039 + }, + { + "epoch": 1.65985107421875e-05, + "model_forward_time": 0.02698659896850586, + "step": 10878 + }, + { + "epoch": 1.65985107421875e-05, + "step": 10878, + "training_step_time": 0.19634532928466797 + }, + { + "epoch": 1.660003662109375e-05, + "model_forward_time": 0.024924039840698242, + "step": 10879 + }, + { + "epoch": 1.660003662109375e-05, + "step": 10879, + "training_step_time": 0.19521760940551758 + }, + { + "epoch": 1.66015625e-05, + "grad_norm": 0.2563628852367401, + "learning_rate": 7.557057490516111e-05, + "loss": 0.0265, + "step": 10880 + }, + { + "epoch": 1.66015625e-05, + "model_forward_time": 0.02491474151611328, + "step": 10880 + }, + { + "epoch": 1.66015625e-05, + "step": 10880, + "training_step_time": 0.13405990600585938 + }, + { + "epoch": 1.660308837890625e-05, + "model_forward_time": 0.02359795570373535, + "step": 10881 + }, + { + "epoch": 1.660308837890625e-05, + "step": 10881, + "training_step_time": 0.1072690486907959 + }, + { + "epoch": 1.66046142578125e-05, + "model_forward_time": 0.025513887405395508, + "step": 10882 + }, + { + "epoch": 1.66046142578125e-05, + "step": 10882, + "training_step_time": 0.12897253036499023 + }, + { + "epoch": 1.660614013671875e-05, + "model_forward_time": 0.025318384170532227, + "step": 10883 + }, + { + "epoch": 1.660614013671875e-05, + "step": 10883, + "training_step_time": 0.10889315605163574 + }, + { + "epoch": 1.6607666015625e-05, + "model_forward_time": 0.025252103805541992, + "step": 10884 + }, + { + "epoch": 1.6607666015625e-05, + "step": 10884, + "training_step_time": 0.1417233943939209 + }, + { + "epoch": 1.660919189453125e-05, + "model_forward_time": 0.025228500366210938, + "step": 10885 + }, + { + "epoch": 1.660919189453125e-05, + "step": 10885, + "training_step_time": 0.19792795181274414 + }, + { + "epoch": 1.66107177734375e-05, + "model_forward_time": 0.024687528610229492, + "step": 10886 + }, + { + "epoch": 1.66107177734375e-05, + "step": 10886, + "training_step_time": 0.14832305908203125 + }, + { + "epoch": 1.661224365234375e-05, + "model_forward_time": 0.024615049362182617, + "step": 10887 + }, + { + "epoch": 1.661224365234375e-05, + "step": 10887, + "training_step_time": 0.18960833549499512 + }, + { + "epoch": 1.661376953125e-05, + "model_forward_time": 0.024505138397216797, + "step": 10888 + }, + { + "epoch": 1.661376953125e-05, + "step": 10888, + "training_step_time": 0.10966348648071289 + }, + { + "epoch": 1.661529541015625e-05, + "model_forward_time": 0.02445054054260254, + "step": 10889 + }, + { + "epoch": 1.661529541015625e-05, + "step": 10889, + "training_step_time": 0.10359764099121094 + }, + { + "epoch": 1.66168212890625e-05, + "grad_norm": 0.44846391677856445, + "learning_rate": 7.552319651072164e-05, + "loss": 0.0276, + "step": 10890 + }, + { + "epoch": 1.66168212890625e-05, + "model_forward_time": 0.02598285675048828, + "step": 10890 + }, + { + "epoch": 1.66168212890625e-05, + "step": 10890, + "training_step_time": 0.10748100280761719 + }, + { + "epoch": 1.661834716796875e-05, + "model_forward_time": 0.02523064613342285, + "step": 10891 + }, + { + "epoch": 1.661834716796875e-05, + "step": 10891, + "training_step_time": 0.1123502254486084 + }, + { + "epoch": 1.6619873046875e-05, + "model_forward_time": 0.02543020248413086, + "step": 10892 + }, + { + "epoch": 1.6619873046875e-05, + "step": 10892, + "training_step_time": 0.1179811954498291 + }, + { + "epoch": 1.662139892578125e-05, + "model_forward_time": 0.025351285934448242, + "step": 10893 + }, + { + "epoch": 1.662139892578125e-05, + "step": 10893, + "training_step_time": 0.11029338836669922 + }, + { + "epoch": 1.66229248046875e-05, + "model_forward_time": 0.025508880615234375, + "step": 10894 + }, + { + "epoch": 1.66229248046875e-05, + "step": 10894, + "training_step_time": 0.11455106735229492 + }, + { + "epoch": 1.662445068359375e-05, + "model_forward_time": 0.025147676467895508, + "step": 10895 + }, + { + "epoch": 1.662445068359375e-05, + "step": 10895, + "training_step_time": 0.11470556259155273 + }, + { + "epoch": 1.66259765625e-05, + "model_forward_time": 0.025081872940063477, + "step": 10896 + }, + { + "epoch": 1.66259765625e-05, + "step": 10896, + "training_step_time": 0.13151311874389648 + }, + { + "epoch": 1.662750244140625e-05, + "model_forward_time": 0.024922847747802734, + "step": 10897 + }, + { + "epoch": 1.662750244140625e-05, + "step": 10897, + "training_step_time": 0.12404012680053711 + }, + { + "epoch": 1.66290283203125e-05, + "model_forward_time": 0.025073528289794922, + "step": 10898 + }, + { + "epoch": 1.66290283203125e-05, + "step": 10898, + "training_step_time": 0.11920404434204102 + }, + { + "epoch": 1.663055419921875e-05, + "model_forward_time": 0.027373790740966797, + "step": 10899 + }, + { + "epoch": 1.663055419921875e-05, + "step": 10899, + "training_step_time": 0.14691615104675293 + }, + { + "epoch": 1.6632080078125e-05, + "grad_norm": 0.30816227197647095, + "learning_rate": 7.547578710319174e-05, + "loss": 0.0272, + "step": 10900 + }, + { + "epoch": 1.6632080078125e-05, + "model_forward_time": 0.02491593360900879, + "step": 10900 + }, + { + "epoch": 1.6632080078125e-05, + "step": 10900, + "training_step_time": 0.10656142234802246 + }, + { + "epoch": 1.663360595703125e-05, + "model_forward_time": 0.0250089168548584, + "step": 10901 + }, + { + "epoch": 1.663360595703125e-05, + "step": 10901, + "training_step_time": 0.10392212867736816 + }, + { + "epoch": 1.66351318359375e-05, + "model_forward_time": 0.024938344955444336, + "step": 10902 + }, + { + "epoch": 1.66351318359375e-05, + "step": 10902, + "training_step_time": 0.10459542274475098 + }, + { + "epoch": 1.663665771484375e-05, + "model_forward_time": 0.02556610107421875, + "step": 10903 + }, + { + "epoch": 1.663665771484375e-05, + "step": 10903, + "training_step_time": 0.10699081420898438 + }, + { + "epoch": 1.663818359375e-05, + "model_forward_time": 0.02520155906677246, + "step": 10904 + }, + { + "epoch": 1.663818359375e-05, + "step": 10904, + "training_step_time": 0.10598516464233398 + }, + { + "epoch": 1.663970947265625e-05, + "model_forward_time": 0.025541305541992188, + "step": 10905 + }, + { + "epoch": 1.663970947265625e-05, + "step": 10905, + "training_step_time": 0.10612368583679199 + }, + { + "epoch": 1.66412353515625e-05, + "model_forward_time": 0.025435447692871094, + "step": 10906 + }, + { + "epoch": 1.66412353515625e-05, + "step": 10906, + "training_step_time": 0.10801315307617188 + }, + { + "epoch": 1.664276123046875e-05, + "model_forward_time": 0.025251388549804688, + "step": 10907 + }, + { + "epoch": 1.664276123046875e-05, + "step": 10907, + "training_step_time": 0.11206316947937012 + }, + { + "epoch": 1.6644287109375e-05, + "model_forward_time": 0.025639057159423828, + "step": 10908 + }, + { + "epoch": 1.6644287109375e-05, + "step": 10908, + "training_step_time": 0.1545724868774414 + }, + { + "epoch": 1.664581298828125e-05, + "model_forward_time": 0.024622440338134766, + "step": 10909 + }, + { + "epoch": 1.664581298828125e-05, + "step": 10909, + "training_step_time": 0.1860671043395996 + }, + { + "epoch": 1.66473388671875e-05, + "grad_norm": 0.2433345466852188, + "learning_rate": 7.542834674017831e-05, + "loss": 0.0216, + "step": 10910 + }, + { + "epoch": 1.66473388671875e-05, + "model_forward_time": 0.024281978607177734, + "step": 10910 + }, + { + "epoch": 1.66473388671875e-05, + "step": 10910, + "training_step_time": 0.18406915664672852 + }, + { + "epoch": 1.664886474609375e-05, + "model_forward_time": 0.024146080017089844, + "step": 10911 + }, + { + "epoch": 1.664886474609375e-05, + "step": 10911, + "training_step_time": 0.17139720916748047 + }, + { + "epoch": 1.6650390625e-05, + "model_forward_time": 0.024176597595214844, + "step": 10912 + }, + { + "epoch": 1.6650390625e-05, + "step": 10912, + "training_step_time": 0.17169570922851562 + }, + { + "epoch": 1.665191650390625e-05, + "model_forward_time": 0.027772188186645508, + "step": 10913 + }, + { + "epoch": 1.665191650390625e-05, + "step": 10913, + "training_step_time": 0.15646743774414062 + }, + { + "epoch": 1.66534423828125e-05, + "model_forward_time": 0.024075746536254883, + "step": 10914 + }, + { + "epoch": 1.66534423828125e-05, + "step": 10914, + "training_step_time": 0.13920235633850098 + }, + { + "epoch": 1.665496826171875e-05, + "model_forward_time": 0.025835752487182617, + "step": 10915 + }, + { + "epoch": 1.665496826171875e-05, + "step": 10915, + "training_step_time": 0.11023092269897461 + }, + { + "epoch": 1.6656494140625e-05, + "model_forward_time": 0.024771928787231445, + "step": 10916 + }, + { + "epoch": 1.6656494140625e-05, + "step": 10916, + "training_step_time": 0.1279587745666504 + }, + { + "epoch": 1.665802001953125e-05, + "model_forward_time": 0.024875164031982422, + "step": 10917 + }, + { + "epoch": 1.665802001953125e-05, + "step": 10917, + "training_step_time": 0.12012791633605957 + }, + { + "epoch": 1.66595458984375e-05, + "model_forward_time": 0.02517223358154297, + "step": 10918 + }, + { + "epoch": 1.66595458984375e-05, + "step": 10918, + "training_step_time": 0.11974024772644043 + }, + { + "epoch": 1.666107177734375e-05, + "model_forward_time": 0.02620220184326172, + "step": 10919 + }, + { + "epoch": 1.666107177734375e-05, + "step": 10919, + "training_step_time": 0.11363601684570312 + }, + { + "epoch": 1.666259765625e-05, + "grad_norm": 0.3204241693019867, + "learning_rate": 7.538087547932585e-05, + "loss": 0.0212, + "step": 10920 + }, + { + "epoch": 1.666259765625e-05, + "model_forward_time": 0.025401592254638672, + "step": 10920 + }, + { + "epoch": 1.666259765625e-05, + "step": 10920, + "training_step_time": 0.21927952766418457 + }, + { + "epoch": 1.666412353515625e-05, + "model_forward_time": 0.024571657180786133, + "step": 10921 + }, + { + "epoch": 1.666412353515625e-05, + "step": 10921, + "training_step_time": 0.12032485008239746 + }, + { + "epoch": 1.66656494140625e-05, + "model_forward_time": 0.02520442008972168, + "step": 10922 + }, + { + "epoch": 1.66656494140625e-05, + "step": 10922, + "training_step_time": 0.12459945678710938 + }, + { + "epoch": 1.666717529296875e-05, + "model_forward_time": 0.025325536727905273, + "step": 10923 + }, + { + "epoch": 1.666717529296875e-05, + "step": 10923, + "training_step_time": 0.14423108100891113 + }, + { + "epoch": 1.6668701171875e-05, + "model_forward_time": 0.024826765060424805, + "step": 10924 + }, + { + "epoch": 1.6668701171875e-05, + "step": 10924, + "training_step_time": 0.1121985912322998 + }, + { + "epoch": 1.667022705078125e-05, + "model_forward_time": 0.024720191955566406, + "step": 10925 + }, + { + "epoch": 1.667022705078125e-05, + "step": 10925, + "training_step_time": 0.10940265655517578 + }, + { + "epoch": 1.66717529296875e-05, + "model_forward_time": 0.02522587776184082, + "step": 10926 + }, + { + "epoch": 1.66717529296875e-05, + "step": 10926, + "training_step_time": 0.13471460342407227 + }, + { + "epoch": 1.667327880859375e-05, + "model_forward_time": 0.025567293167114258, + "step": 10927 + }, + { + "epoch": 1.667327880859375e-05, + "step": 10927, + "training_step_time": 0.17352747917175293 + }, + { + "epoch": 1.66748046875e-05, + "model_forward_time": 0.02487659454345703, + "step": 10928 + }, + { + "epoch": 1.66748046875e-05, + "step": 10928, + "training_step_time": 0.16414666175842285 + }, + { + "epoch": 1.667633056640625e-05, + "model_forward_time": 0.025097131729125977, + "step": 10929 + }, + { + "epoch": 1.667633056640625e-05, + "step": 10929, + "training_step_time": 0.1678166389465332 + }, + { + "epoch": 1.66778564453125e-05, + "grad_norm": 0.34175485372543335, + "learning_rate": 7.533337337831642e-05, + "loss": 0.0334, + "step": 10930 + }, + { + "epoch": 1.66778564453125e-05, + "model_forward_time": 0.02432417869567871, + "step": 10930 + }, + { + "epoch": 1.66778564453125e-05, + "step": 10930, + "training_step_time": 0.1229853630065918 + }, + { + "epoch": 1.667938232421875e-05, + "model_forward_time": 0.02440500259399414, + "step": 10931 + }, + { + "epoch": 1.667938232421875e-05, + "step": 10931, + "training_step_time": 0.11261940002441406 + }, + { + "epoch": 1.6680908203125e-05, + "model_forward_time": 0.025114059448242188, + "step": 10932 + }, + { + "epoch": 1.6680908203125e-05, + "step": 10932, + "training_step_time": 0.1064760684967041 + }, + { + "epoch": 1.668243408203125e-05, + "model_forward_time": 0.02503371238708496, + "step": 10933 + }, + { + "epoch": 1.668243408203125e-05, + "step": 10933, + "training_step_time": 0.11040306091308594 + }, + { + "epoch": 1.66839599609375e-05, + "model_forward_time": 0.02521824836730957, + "step": 10934 + }, + { + "epoch": 1.66839599609375e-05, + "step": 10934, + "training_step_time": 0.10927605628967285 + }, + { + "epoch": 1.668548583984375e-05, + "model_forward_time": 0.025493860244750977, + "step": 10935 + }, + { + "epoch": 1.668548583984375e-05, + "step": 10935, + "training_step_time": 0.10708022117614746 + }, + { + "epoch": 1.668701171875e-05, + "model_forward_time": 0.025241374969482422, + "step": 10936 + }, + { + "epoch": 1.668701171875e-05, + "step": 10936, + "training_step_time": 0.14281105995178223 + }, + { + "epoch": 1.668853759765625e-05, + "model_forward_time": 0.025728225708007812, + "step": 10937 + }, + { + "epoch": 1.668853759765625e-05, + "step": 10937, + "training_step_time": 0.1094818115234375 + }, + { + "epoch": 1.66900634765625e-05, + "model_forward_time": 0.025628089904785156, + "step": 10938 + }, + { + "epoch": 1.66900634765625e-05, + "step": 10938, + "training_step_time": 0.11281967163085938 + }, + { + "epoch": 1.669158935546875e-05, + "model_forward_time": 0.025096654891967773, + "step": 10939 + }, + { + "epoch": 1.669158935546875e-05, + "step": 10939, + "training_step_time": 0.12493252754211426 + }, + { + "epoch": 1.6693115234375e-05, + "grad_norm": 0.34396734833717346, + "learning_rate": 7.528584049486955e-05, + "loss": 0.0283, + "step": 10940 + }, + { + "epoch": 1.6693115234375e-05, + "model_forward_time": 0.025281906127929688, + "step": 10940 + }, + { + "epoch": 1.6693115234375e-05, + "step": 10940, + "training_step_time": 0.12320661544799805 + }, + { + "epoch": 1.669464111328125e-05, + "model_forward_time": 0.02537226676940918, + "step": 10941 + }, + { + "epoch": 1.669464111328125e-05, + "step": 10941, + "training_step_time": 0.1170663833618164 + }, + { + "epoch": 1.66961669921875e-05, + "model_forward_time": 0.025002717971801758, + "step": 10942 + }, + { + "epoch": 1.66961669921875e-05, + "step": 10942, + "training_step_time": 0.12281608581542969 + }, + { + "epoch": 1.669769287109375e-05, + "model_forward_time": 0.024997234344482422, + "step": 10943 + }, + { + "epoch": 1.669769287109375e-05, + "step": 10943, + "training_step_time": 0.17627549171447754 + }, + { + "epoch": 1.669921875e-05, + "model_forward_time": 0.024360179901123047, + "step": 10944 + }, + { + "epoch": 1.669921875e-05, + "step": 10944, + "training_step_time": 0.10507011413574219 + }, + { + "epoch": 1.670074462890625e-05, + "model_forward_time": 0.024075031280517578, + "step": 10945 + }, + { + "epoch": 1.670074462890625e-05, + "step": 10945, + "training_step_time": 0.10422396659851074 + }, + { + "epoch": 1.67022705078125e-05, + "model_forward_time": 0.02556753158569336, + "step": 10946 + }, + { + "epoch": 1.67022705078125e-05, + "step": 10946, + "training_step_time": 0.10721778869628906 + }, + { + "epoch": 1.670379638671875e-05, + "model_forward_time": 0.025260448455810547, + "step": 10947 + }, + { + "epoch": 1.670379638671875e-05, + "step": 10947, + "training_step_time": 0.10821127891540527 + }, + { + "epoch": 1.6705322265625e-05, + "model_forward_time": 0.025302648544311523, + "step": 10948 + }, + { + "epoch": 1.6705322265625e-05, + "step": 10948, + "training_step_time": 0.10439538955688477 + }, + { + "epoch": 1.670684814453125e-05, + "model_forward_time": 0.02876114845275879, + "step": 10949 + }, + { + "epoch": 1.670684814453125e-05, + "step": 10949, + "training_step_time": 0.10774660110473633 + }, + { + "epoch": 1.67083740234375e-05, + "grad_norm": 0.38681089878082275, + "learning_rate": 7.52382768867422e-05, + "loss": 0.0211, + "step": 10950 + }, + { + "epoch": 1.67083740234375e-05, + "model_forward_time": 0.02508234977722168, + "step": 10950 + }, + { + "epoch": 1.67083740234375e-05, + "step": 10950, + "training_step_time": 0.11007428169250488 + }, + { + "epoch": 1.670989990234375e-05, + "model_forward_time": 0.024943113327026367, + "step": 10951 + }, + { + "epoch": 1.670989990234375e-05, + "step": 10951, + "training_step_time": 0.10929155349731445 + }, + { + "epoch": 1.671142578125e-05, + "model_forward_time": 0.024158716201782227, + "step": 10952 + }, + { + "epoch": 1.671142578125e-05, + "step": 10952, + "training_step_time": 0.11240029335021973 + }, + { + "epoch": 1.671295166015625e-05, + "model_forward_time": 0.025289535522460938, + "step": 10953 + }, + { + "epoch": 1.671295166015625e-05, + "step": 10953, + "training_step_time": 0.15502047538757324 + }, + { + "epoch": 1.67144775390625e-05, + "model_forward_time": 0.024763107299804688, + "step": 10954 + }, + { + "epoch": 1.67144775390625e-05, + "step": 10954, + "training_step_time": 0.1723630428314209 + }, + { + "epoch": 1.671600341796875e-05, + "model_forward_time": 0.02430272102355957, + "step": 10955 + }, + { + "epoch": 1.671600341796875e-05, + "step": 10955, + "training_step_time": 0.1819014549255371 + }, + { + "epoch": 1.6717529296875e-05, + "model_forward_time": 0.027202129364013672, + "step": 10956 + }, + { + "epoch": 1.6717529296875e-05, + "step": 10956, + "training_step_time": 0.17137551307678223 + }, + { + "epoch": 1.671905517578125e-05, + "model_forward_time": 0.024152755737304688, + "step": 10957 + }, + { + "epoch": 1.671905517578125e-05, + "step": 10957, + "training_step_time": 0.15536761283874512 + }, + { + "epoch": 1.67205810546875e-05, + "model_forward_time": 0.024048566818237305, + "step": 10958 + }, + { + "epoch": 1.67205810546875e-05, + "step": 10958, + "training_step_time": 0.13645076751708984 + }, + { + "epoch": 1.672210693359375e-05, + "model_forward_time": 0.024808406829833984, + "step": 10959 + }, + { + "epoch": 1.672210693359375e-05, + "step": 10959, + "training_step_time": 0.11322951316833496 + }, + { + "epoch": 1.67236328125e-05, + "grad_norm": 0.2570970952510834, + "learning_rate": 7.519068261172859e-05, + "loss": 0.0166, + "step": 10960 + }, + { + "epoch": 1.67236328125e-05, + "model_forward_time": 0.025191545486450195, + "step": 10960 + }, + { + "epoch": 1.67236328125e-05, + "step": 10960, + "training_step_time": 0.21571731567382812 + }, + { + "epoch": 1.672515869140625e-05, + "model_forward_time": 0.02423834800720215, + "step": 10961 + }, + { + "epoch": 1.672515869140625e-05, + "step": 10961, + "training_step_time": 0.1261765956878662 + }, + { + "epoch": 1.67266845703125e-05, + "model_forward_time": 0.024764537811279297, + "step": 10962 + }, + { + "epoch": 1.67266845703125e-05, + "step": 10962, + "training_step_time": 0.11742568016052246 + }, + { + "epoch": 1.672821044921875e-05, + "model_forward_time": 0.025266647338867188, + "step": 10963 + }, + { + "epoch": 1.672821044921875e-05, + "step": 10963, + "training_step_time": 0.210890531539917 + }, + { + "epoch": 1.6729736328125e-05, + "model_forward_time": 0.02455902099609375, + "step": 10964 + }, + { + "epoch": 1.6729736328125e-05, + "step": 10964, + "training_step_time": 0.12028670310974121 + }, + { + "epoch": 1.673126220703125e-05, + "model_forward_time": 0.024129867553710938, + "step": 10965 + }, + { + "epoch": 1.673126220703125e-05, + "step": 10965, + "training_step_time": 0.10618734359741211 + }, + { + "epoch": 1.67327880859375e-05, + "model_forward_time": 0.025358915328979492, + "step": 10966 + }, + { + "epoch": 1.67327880859375e-05, + "step": 10966, + "training_step_time": 0.13991093635559082 + }, + { + "epoch": 1.673431396484375e-05, + "model_forward_time": 0.02529311180114746, + "step": 10967 + }, + { + "epoch": 1.673431396484375e-05, + "step": 10967, + "training_step_time": 0.10933279991149902 + }, + { + "epoch": 1.673583984375e-05, + "model_forward_time": 0.02552342414855957, + "step": 10968 + }, + { + "epoch": 1.673583984375e-05, + "step": 10968, + "training_step_time": 0.10730528831481934 + }, + { + "epoch": 1.673736572265625e-05, + "model_forward_time": 0.02536463737487793, + "step": 10969 + }, + { + "epoch": 1.673736572265625e-05, + "step": 10969, + "training_step_time": 0.11017680168151855 + }, + { + "epoch": 1.67388916015625e-05, + "grad_norm": 0.24090756475925446, + "learning_rate": 7.514305772766031e-05, + "loss": 0.0182, + "step": 10970 + }, + { + "epoch": 1.67388916015625e-05, + "model_forward_time": 0.025098323822021484, + "step": 10970 + }, + { + "epoch": 1.67388916015625e-05, + "step": 10970, + "training_step_time": 0.120697021484375 + }, + { + "epoch": 1.674041748046875e-05, + "model_forward_time": 0.02523517608642578, + "step": 10971 + }, + { + "epoch": 1.674041748046875e-05, + "step": 10971, + "training_step_time": 0.18422794342041016 + }, + { + "epoch": 1.6741943359375e-05, + "model_forward_time": 0.02452993392944336, + "step": 10972 + }, + { + "epoch": 1.6741943359375e-05, + "step": 10972, + "training_step_time": 0.15419483184814453 + }, + { + "epoch": 1.674346923828125e-05, + "model_forward_time": 0.024731159210205078, + "step": 10973 + }, + { + "epoch": 1.674346923828125e-05, + "step": 10973, + "training_step_time": 0.1690690517425537 + }, + { + "epoch": 1.67449951171875e-05, + "model_forward_time": 0.024987459182739258, + "step": 10974 + }, + { + "epoch": 1.67449951171875e-05, + "step": 10974, + "training_step_time": 0.12488436698913574 + }, + { + "epoch": 1.674652099609375e-05, + "model_forward_time": 0.02503037452697754, + "step": 10975 + }, + { + "epoch": 1.674652099609375e-05, + "step": 10975, + "training_step_time": 0.10767769813537598 + }, + { + "epoch": 1.6748046875e-05, + "model_forward_time": 0.025534391403198242, + "step": 10976 + }, + { + "epoch": 1.6748046875e-05, + "step": 10976, + "training_step_time": 0.1154639720916748 + }, + { + "epoch": 1.674957275390625e-05, + "model_forward_time": 0.02441692352294922, + "step": 10977 + }, + { + "epoch": 1.674957275390625e-05, + "step": 10977, + "training_step_time": 0.1138458251953125 + }, + { + "epoch": 1.67510986328125e-05, + "model_forward_time": 0.025197982788085938, + "step": 10978 + }, + { + "epoch": 1.67510986328125e-05, + "step": 10978, + "training_step_time": 0.17601680755615234 + }, + { + "epoch": 1.675262451171875e-05, + "model_forward_time": 0.024521350860595703, + "step": 10979 + }, + { + "epoch": 1.675262451171875e-05, + "step": 10979, + "training_step_time": 0.19137048721313477 + }, + { + "epoch": 1.6754150390625e-05, + "grad_norm": 0.2127423733472824, + "learning_rate": 7.509540229240601e-05, + "loss": 0.0227, + "step": 10980 + }, + { + "epoch": 1.6754150390625e-05, + "model_forward_time": 0.024681806564331055, + "step": 10980 + }, + { + "epoch": 1.6754150390625e-05, + "step": 10980, + "training_step_time": 0.17478275299072266 + }, + { + "epoch": 1.675567626953125e-05, + "model_forward_time": 0.0247194766998291, + "step": 10981 + }, + { + "epoch": 1.675567626953125e-05, + "step": 10981, + "training_step_time": 0.17474007606506348 + }, + { + "epoch": 1.67572021484375e-05, + "model_forward_time": 0.023975372314453125, + "step": 10982 + }, + { + "epoch": 1.67572021484375e-05, + "step": 10982, + "training_step_time": 0.16277790069580078 + }, + { + "epoch": 1.675872802734375e-05, + "model_forward_time": 0.023458480834960938, + "step": 10983 + }, + { + "epoch": 1.675872802734375e-05, + "step": 10983, + "training_step_time": 0.1749570369720459 + }, + { + "epoch": 1.676025390625e-05, + "model_forward_time": 0.024254798889160156, + "step": 10984 + }, + { + "epoch": 1.676025390625e-05, + "step": 10984, + "training_step_time": 0.11450552940368652 + }, + { + "epoch": 1.676177978515625e-05, + "model_forward_time": 0.023582935333251953, + "step": 10985 + }, + { + "epoch": 1.676177978515625e-05, + "step": 10985, + "training_step_time": 0.10877871513366699 + }, + { + "epoch": 1.67633056640625e-05, + "model_forward_time": 0.025499820709228516, + "step": 10986 + }, + { + "epoch": 1.67633056640625e-05, + "step": 10986, + "training_step_time": 0.10741758346557617 + }, + { + "epoch": 1.676483154296875e-05, + "model_forward_time": 0.02541637420654297, + "step": 10987 + }, + { + "epoch": 1.676483154296875e-05, + "step": 10987, + "training_step_time": 0.10689830780029297 + }, + { + "epoch": 1.6766357421875e-05, + "model_forward_time": 0.025330543518066406, + "step": 10988 + }, + { + "epoch": 1.6766357421875e-05, + "step": 10988, + "training_step_time": 0.10633111000061035 + }, + { + "epoch": 1.676788330078125e-05, + "model_forward_time": 0.025246381759643555, + "step": 10989 + }, + { + "epoch": 1.676788330078125e-05, + "step": 10989, + "training_step_time": 0.10977458953857422 + }, + { + "epoch": 1.67694091796875e-05, + "grad_norm": 0.29465189576148987, + "learning_rate": 7.504771636387163e-05, + "loss": 0.0194, + "step": 10990 + }, + { + "epoch": 1.67694091796875e-05, + "model_forward_time": 0.025088071823120117, + "step": 10990 + }, + { + "epoch": 1.67694091796875e-05, + "step": 10990, + "training_step_time": 0.11047506332397461 + }, + { + "epoch": 1.677093505859375e-05, + "model_forward_time": 0.025476932525634766, + "step": 10991 + }, + { + "epoch": 1.677093505859375e-05, + "step": 10991, + "training_step_time": 0.10808444023132324 + }, + { + "epoch": 1.67724609375e-05, + "model_forward_time": 0.025394916534423828, + "step": 10992 + }, + { + "epoch": 1.67724609375e-05, + "step": 10992, + "training_step_time": 0.1133263111114502 + }, + { + "epoch": 1.677398681640625e-05, + "model_forward_time": 0.02521800994873047, + "step": 10993 + }, + { + "epoch": 1.677398681640625e-05, + "step": 10993, + "training_step_time": 0.11493611335754395 + }, + { + "epoch": 1.67755126953125e-05, + "model_forward_time": 0.02523660659790039, + "step": 10994 + }, + { + "epoch": 1.67755126953125e-05, + "step": 10994, + "training_step_time": 0.11822724342346191 + }, + { + "epoch": 1.677703857421875e-05, + "model_forward_time": 0.025361299514770508, + "step": 10995 + }, + { + "epoch": 1.677703857421875e-05, + "step": 10995, + "training_step_time": 0.11452269554138184 + }, + { + "epoch": 1.6778564453125e-05, + "model_forward_time": 0.025393962860107422, + "step": 10996 + }, + { + "epoch": 1.6778564453125e-05, + "step": 10996, + "training_step_time": 0.11825871467590332 + }, + { + "epoch": 1.678009033203125e-05, + "model_forward_time": 0.0251767635345459, + "step": 10997 + }, + { + "epoch": 1.678009033203125e-05, + "step": 10997, + "training_step_time": 0.11380624771118164 + }, + { + "epoch": 1.67816162109375e-05, + "model_forward_time": 0.02502155303955078, + "step": 10998 + }, + { + "epoch": 1.67816162109375e-05, + "step": 10998, + "training_step_time": 0.11469602584838867 + }, + { + "epoch": 1.678314208984375e-05, + "model_forward_time": 0.02527451515197754, + "step": 10999 + }, + { + "epoch": 1.678314208984375e-05, + "step": 10999, + "training_step_time": 0.1076805591583252 + }, + { + "epoch": 1.678466796875e-05, + "grad_norm": 0.2610098421573639, + "learning_rate": 7.500000000000001e-05, + "loss": 0.0186, + "step": 11000 + }, + { + "epoch": 1.678466796875e-05, + "model_forward_time": 0.02453327178955078, + "step": 11000 + }, + { + "epoch": 1.678466796875e-05, + "step": 11000, + "training_step_time": 0.10767960548400879 + }, + { + "epoch": 1.678619384765625e-05, + "model_forward_time": 0.023268938064575195, + "step": 11001 + }, + { + "epoch": 1.678619384765625e-05, + "step": 11001, + "training_step_time": 0.0983436107635498 + }, + { + "epoch": 1.67877197265625e-05, + "model_forward_time": 0.025027751922607422, + "step": 11002 + }, + { + "epoch": 1.67877197265625e-05, + "step": 11002, + "training_step_time": 0.10344243049621582 + }, + { + "epoch": 1.678924560546875e-05, + "model_forward_time": 0.025501728057861328, + "step": 11003 + }, + { + "epoch": 1.678924560546875e-05, + "step": 11003, + "training_step_time": 0.10609126091003418 + }, + { + "epoch": 1.6790771484375e-05, + "model_forward_time": 0.025641441345214844, + "step": 11004 + }, + { + "epoch": 1.6790771484375e-05, + "step": 11004, + "training_step_time": 0.10907506942749023 + }, + { + "epoch": 1.679229736328125e-05, + "model_forward_time": 0.025785207748413086, + "step": 11005 + }, + { + "epoch": 1.679229736328125e-05, + "step": 11005, + "training_step_time": 0.11139726638793945 + }, + { + "epoch": 1.67938232421875e-05, + "model_forward_time": 0.02514958381652832, + "step": 11006 + }, + { + "epoch": 1.67938232421875e-05, + "step": 11006, + "training_step_time": 0.10842037200927734 + }, + { + "epoch": 1.679534912109375e-05, + "model_forward_time": 0.02629256248474121, + "step": 11007 + }, + { + "epoch": 1.679534912109375e-05, + "step": 11007, + "training_step_time": 0.11193656921386719 + }, + { + "epoch": 1.6796875e-05, + "model_forward_time": 0.025657176971435547, + "step": 11008 + }, + { + "epoch": 1.6796875e-05, + "step": 11008, + "training_step_time": 0.10732531547546387 + }, + { + "epoch": 1.679840087890625e-05, + "model_forward_time": 0.02537059783935547, + "step": 11009 + }, + { + "epoch": 1.679840087890625e-05, + "step": 11009, + "training_step_time": 0.11384963989257812 + }, + { + "epoch": 1.67999267578125e-05, + "grad_norm": 0.25376853346824646, + "learning_rate": 7.495225325877103e-05, + "loss": 0.024, + "step": 11010 + }, + { + "epoch": 1.67999267578125e-05, + "model_forward_time": 0.025641679763793945, + "step": 11010 + }, + { + "epoch": 1.67999267578125e-05, + "step": 11010, + "training_step_time": 0.10683774948120117 + }, + { + "epoch": 1.680145263671875e-05, + "model_forward_time": 0.02544713020324707, + "step": 11011 + }, + { + "epoch": 1.680145263671875e-05, + "step": 11011, + "training_step_time": 0.1070106029510498 + }, + { + "epoch": 1.6802978515625e-05, + "model_forward_time": 0.025514841079711914, + "step": 11012 + }, + { + "epoch": 1.6802978515625e-05, + "step": 11012, + "training_step_time": 0.10878849029541016 + }, + { + "epoch": 1.680450439453125e-05, + "model_forward_time": 0.02551126480102539, + "step": 11013 + }, + { + "epoch": 1.680450439453125e-05, + "step": 11013, + "training_step_time": 0.10933685302734375 + }, + { + "epoch": 1.68060302734375e-05, + "model_forward_time": 0.025161266326904297, + "step": 11014 + }, + { + "epoch": 1.68060302734375e-05, + "step": 11014, + "training_step_time": 0.11080050468444824 + }, + { + "epoch": 1.680755615234375e-05, + "model_forward_time": 0.025237560272216797, + "step": 11015 + }, + { + "epoch": 1.680755615234375e-05, + "step": 11015, + "training_step_time": 0.17717862129211426 + }, + { + "epoch": 1.680908203125e-05, + "model_forward_time": 0.024961233139038086, + "step": 11016 + }, + { + "epoch": 1.680908203125e-05, + "step": 11016, + "training_step_time": 0.1287388801574707 + }, + { + "epoch": 1.681060791015625e-05, + "model_forward_time": 0.024422883987426758, + "step": 11017 + }, + { + "epoch": 1.681060791015625e-05, + "step": 11017, + "training_step_time": 0.1295459270477295 + }, + { + "epoch": 1.68121337890625e-05, + "model_forward_time": 0.02513885498046875, + "step": 11018 + }, + { + "epoch": 1.68121337890625e-05, + "step": 11018, + "training_step_time": 0.11012840270996094 + }, + { + "epoch": 1.681365966796875e-05, + "model_forward_time": 0.02582693099975586, + "step": 11019 + }, + { + "epoch": 1.681365966796875e-05, + "step": 11019, + "training_step_time": 0.17493295669555664 + }, + { + "epoch": 1.6815185546875e-05, + "grad_norm": 0.29249677062034607, + "learning_rate": 7.490447619820152e-05, + "loss": 0.0227, + "step": 11020 + }, + { + "epoch": 1.6815185546875e-05, + "model_forward_time": 0.024664640426635742, + "step": 11020 + }, + { + "epoch": 1.6815185546875e-05, + "step": 11020, + "training_step_time": 0.15517139434814453 + }, + { + "epoch": 1.681671142578125e-05, + "model_forward_time": 0.024332761764526367, + "step": 11021 + }, + { + "epoch": 1.681671142578125e-05, + "step": 11021, + "training_step_time": 0.20817780494689941 + }, + { + "epoch": 1.68182373046875e-05, + "model_forward_time": 0.025025606155395508, + "step": 11022 + }, + { + "epoch": 1.68182373046875e-05, + "step": 11022, + "training_step_time": 0.13396811485290527 + }, + { + "epoch": 1.681976318359375e-05, + "model_forward_time": 0.02447056770324707, + "step": 11023 + }, + { + "epoch": 1.681976318359375e-05, + "step": 11023, + "training_step_time": 0.18937277793884277 + }, + { + "epoch": 1.68212890625e-05, + "model_forward_time": 0.02430891990661621, + "step": 11024 + }, + { + "epoch": 1.68212890625e-05, + "step": 11024, + "training_step_time": 0.17600154876708984 + }, + { + "epoch": 1.682281494140625e-05, + "model_forward_time": 0.02432560920715332, + "step": 11025 + }, + { + "epoch": 1.682281494140625e-05, + "step": 11025, + "training_step_time": 0.1586132049560547 + }, + { + "epoch": 1.68243408203125e-05, + "model_forward_time": 0.024531126022338867, + "step": 11026 + }, + { + "epoch": 1.68243408203125e-05, + "step": 11026, + "training_step_time": 0.21649765968322754 + }, + { + "epoch": 1.682586669921875e-05, + "model_forward_time": 0.02470254898071289, + "step": 11027 + }, + { + "epoch": 1.682586669921875e-05, + "step": 11027, + "training_step_time": 0.12111091613769531 + }, + { + "epoch": 1.6827392578125e-05, + "model_forward_time": 0.024679899215698242, + "step": 11028 + }, + { + "epoch": 1.6827392578125e-05, + "step": 11028, + "training_step_time": 0.11159276962280273 + }, + { + "epoch": 1.682891845703125e-05, + "model_forward_time": 0.025467395782470703, + "step": 11029 + }, + { + "epoch": 1.682891845703125e-05, + "step": 11029, + "training_step_time": 0.10515832901000977 + }, + { + "epoch": 1.68304443359375e-05, + "grad_norm": 0.7210782766342163, + "learning_rate": 7.485666887634506e-05, + "loss": 0.0266, + "step": 11030 + }, + { + "epoch": 1.68304443359375e-05, + "model_forward_time": 0.025027990341186523, + "step": 11030 + }, + { + "epoch": 1.68304443359375e-05, + "step": 11030, + "training_step_time": 0.10636568069458008 + }, + { + "epoch": 1.683197021484375e-05, + "model_forward_time": 0.02503228187561035, + "step": 11031 + }, + { + "epoch": 1.683197021484375e-05, + "step": 11031, + "training_step_time": 0.10616874694824219 + }, + { + "epoch": 1.683349609375e-05, + "model_forward_time": 0.025362491607666016, + "step": 11032 + }, + { + "epoch": 1.683349609375e-05, + "step": 11032, + "training_step_time": 0.11004972457885742 + }, + { + "epoch": 1.683502197265625e-05, + "model_forward_time": 0.025506973266601562, + "step": 11033 + }, + { + "epoch": 1.683502197265625e-05, + "step": 11033, + "training_step_time": 0.10942745208740234 + }, + { + "epoch": 1.68365478515625e-05, + "model_forward_time": 0.025604724884033203, + "step": 11034 + }, + { + "epoch": 1.68365478515625e-05, + "step": 11034, + "training_step_time": 0.2038862705230713 + }, + { + "epoch": 1.683807373046875e-05, + "model_forward_time": 0.024894237518310547, + "step": 11035 + }, + { + "epoch": 1.683807373046875e-05, + "step": 11035, + "training_step_time": 0.11488127708435059 + }, + { + "epoch": 1.6839599609375e-05, + "model_forward_time": 0.02425074577331543, + "step": 11036 + }, + { + "epoch": 1.6839599609375e-05, + "step": 11036, + "training_step_time": 0.11260843276977539 + }, + { + "epoch": 1.684112548828125e-05, + "model_forward_time": 0.02528858184814453, + "step": 11037 + }, + { + "epoch": 1.684112548828125e-05, + "step": 11037, + "training_step_time": 0.11344647407531738 + }, + { + "epoch": 1.68426513671875e-05, + "model_forward_time": 0.025884628295898438, + "step": 11038 + }, + { + "epoch": 1.68426513671875e-05, + "step": 11038, + "training_step_time": 0.13131332397460938 + }, + { + "epoch": 1.684417724609375e-05, + "model_forward_time": 0.025574445724487305, + "step": 11039 + }, + { + "epoch": 1.684417724609375e-05, + "step": 11039, + "training_step_time": 0.1150054931640625 + }, + { + "epoch": 1.6845703125e-05, + "grad_norm": 0.4200476408004761, + "learning_rate": 7.480883135129211e-05, + "loss": 0.0215, + "step": 11040 + }, + { + "epoch": 1.6845703125e-05, + "model_forward_time": 0.024545669555664062, + "step": 11040 + }, + { + "epoch": 1.6845703125e-05, + "step": 11040, + "training_step_time": 0.11476659774780273 + }, + { + "epoch": 1.684722900390625e-05, + "model_forward_time": 0.025471925735473633, + "step": 11041 + }, + { + "epoch": 1.684722900390625e-05, + "step": 11041, + "training_step_time": 0.11226963996887207 + }, + { + "epoch": 1.68487548828125e-05, + "model_forward_time": 0.02468132972717285, + "step": 11042 + }, + { + "epoch": 1.68487548828125e-05, + "step": 11042, + "training_step_time": 0.1121985912322998 + }, + { + "epoch": 1.685028076171875e-05, + "model_forward_time": 0.024639129638671875, + "step": 11043 + }, + { + "epoch": 1.685028076171875e-05, + "step": 11043, + "training_step_time": 0.11081218719482422 + }, + { + "epoch": 1.6851806640625e-05, + "model_forward_time": 0.024403810501098633, + "step": 11044 + }, + { + "epoch": 1.6851806640625e-05, + "step": 11044, + "training_step_time": 0.10872173309326172 + }, + { + "epoch": 1.685333251953125e-05, + "model_forward_time": 0.02540898323059082, + "step": 11045 + }, + { + "epoch": 1.685333251953125e-05, + "step": 11045, + "training_step_time": 0.10756850242614746 + }, + { + "epoch": 1.68548583984375e-05, + "model_forward_time": 0.025561809539794922, + "step": 11046 + }, + { + "epoch": 1.68548583984375e-05, + "step": 11046, + "training_step_time": 0.10976433753967285 + }, + { + "epoch": 1.685638427734375e-05, + "model_forward_time": 0.02570319175720215, + "step": 11047 + }, + { + "epoch": 1.685638427734375e-05, + "step": 11047, + "training_step_time": 0.1087808609008789 + }, + { + "epoch": 1.685791015625e-05, + "model_forward_time": 0.025281906127929688, + "step": 11048 + }, + { + "epoch": 1.685791015625e-05, + "step": 11048, + "training_step_time": 0.10898041725158691 + }, + { + "epoch": 1.685943603515625e-05, + "model_forward_time": 0.02580857276916504, + "step": 11049 + }, + { + "epoch": 1.685943603515625e-05, + "step": 11049, + "training_step_time": 0.10880279541015625 + }, + { + "epoch": 1.68609619140625e-05, + "grad_norm": 0.3526730239391327, + "learning_rate": 7.476096368116974e-05, + "loss": 0.0186, + "step": 11050 + }, + { + "epoch": 1.68609619140625e-05, + "model_forward_time": 0.025604724884033203, + "step": 11050 + }, + { + "epoch": 1.68609619140625e-05, + "step": 11050, + "training_step_time": 0.10728597640991211 + }, + { + "epoch": 1.686248779296875e-05, + "model_forward_time": 0.027692794799804688, + "step": 11051 + }, + { + "epoch": 1.686248779296875e-05, + "step": 11051, + "training_step_time": 0.11530160903930664 + }, + { + "epoch": 1.6864013671875e-05, + "model_forward_time": 0.025698184967041016, + "step": 11052 + }, + { + "epoch": 1.6864013671875e-05, + "step": 11052, + "training_step_time": 0.10894060134887695 + }, + { + "epoch": 1.686553955078125e-05, + "model_forward_time": 0.02559185028076172, + "step": 11053 + }, + { + "epoch": 1.686553955078125e-05, + "step": 11053, + "training_step_time": 0.10940861701965332 + }, + { + "epoch": 1.68670654296875e-05, + "model_forward_time": 0.025597333908081055, + "step": 11054 + }, + { + "epoch": 1.68670654296875e-05, + "step": 11054, + "training_step_time": 0.10698223114013672 + }, + { + "epoch": 1.686859130859375e-05, + "model_forward_time": 0.02545475959777832, + "step": 11055 + }, + { + "epoch": 1.686859130859375e-05, + "step": 11055, + "training_step_time": 0.10831570625305176 + }, + { + "epoch": 1.68701171875e-05, + "model_forward_time": 0.025638818740844727, + "step": 11056 + }, + { + "epoch": 1.68701171875e-05, + "step": 11056, + "training_step_time": 0.10932183265686035 + }, + { + "epoch": 1.687164306640625e-05, + "model_forward_time": 0.0257720947265625, + "step": 11057 + }, + { + "epoch": 1.687164306640625e-05, + "step": 11057, + "training_step_time": 0.11046123504638672 + }, + { + "epoch": 1.68731689453125e-05, + "model_forward_time": 0.02495121955871582, + "step": 11058 + }, + { + "epoch": 1.68731689453125e-05, + "step": 11058, + "training_step_time": 0.10667800903320312 + }, + { + "epoch": 1.687469482421875e-05, + "model_forward_time": 0.02555251121520996, + "step": 11059 + }, + { + "epoch": 1.687469482421875e-05, + "step": 11059, + "training_step_time": 0.11281013488769531 + }, + { + "epoch": 1.6876220703125e-05, + "grad_norm": 0.5046151280403137, + "learning_rate": 7.471306592414168e-05, + "loss": 0.0312, + "step": 11060 + }, + { + "epoch": 1.6876220703125e-05, + "model_forward_time": 0.025336742401123047, + "step": 11060 + }, + { + "epoch": 1.6876220703125e-05, + "step": 11060, + "training_step_time": 0.11218976974487305 + }, + { + "epoch": 1.687774658203125e-05, + "model_forward_time": 0.02591681480407715, + "step": 11061 + }, + { + "epoch": 1.687774658203125e-05, + "step": 11061, + "training_step_time": 0.10997295379638672 + }, + { + "epoch": 1.68792724609375e-05, + "model_forward_time": 0.025686025619506836, + "step": 11062 + }, + { + "epoch": 1.68792724609375e-05, + "step": 11062, + "training_step_time": 0.1153879165649414 + }, + { + "epoch": 1.688079833984375e-05, + "model_forward_time": 0.025724411010742188, + "step": 11063 + }, + { + "epoch": 1.688079833984375e-05, + "step": 11063, + "training_step_time": 0.13169550895690918 + }, + { + "epoch": 1.688232421875e-05, + "model_forward_time": 0.025657176971435547, + "step": 11064 + }, + { + "epoch": 1.688232421875e-05, + "step": 11064, + "training_step_time": 0.14220118522644043 + }, + { + "epoch": 1.688385009765625e-05, + "model_forward_time": 0.024648427963256836, + "step": 11065 + }, + { + "epoch": 1.688385009765625e-05, + "step": 11065, + "training_step_time": 0.128448486328125 + }, + { + "epoch": 1.68853759765625e-05, + "model_forward_time": 0.025058507919311523, + "step": 11066 + }, + { + "epoch": 1.68853759765625e-05, + "step": 11066, + "training_step_time": 0.12008404731750488 + }, + { + "epoch": 1.688690185546875e-05, + "model_forward_time": 0.025208711624145508, + "step": 11067 + }, + { + "epoch": 1.688690185546875e-05, + "step": 11067, + "training_step_time": 0.11882996559143066 + }, + { + "epoch": 1.6888427734375e-05, + "model_forward_time": 0.024825334548950195, + "step": 11068 + }, + { + "epoch": 1.6888427734375e-05, + "step": 11068, + "training_step_time": 0.195112943649292 + }, + { + "epoch": 1.688995361328125e-05, + "model_forward_time": 0.025026798248291016, + "step": 11069 + }, + { + "epoch": 1.688995361328125e-05, + "step": 11069, + "training_step_time": 0.1636645793914795 + }, + { + "epoch": 1.68914794921875e-05, + "grad_norm": 0.4272726774215698, + "learning_rate": 7.466513813840825e-05, + "loss": 0.0176, + "step": 11070 + }, + { + "epoch": 1.68914794921875e-05, + "model_forward_time": 0.024471759796142578, + "step": 11070 + }, + { + "epoch": 1.68914794921875e-05, + "step": 11070, + "training_step_time": 0.13658976554870605 + }, + { + "epoch": 1.689300537109375e-05, + "model_forward_time": 0.024288177490234375, + "step": 11071 + }, + { + "epoch": 1.689300537109375e-05, + "step": 11071, + "training_step_time": 0.15856575965881348 + }, + { + "epoch": 1.689453125e-05, + "model_forward_time": 0.024667978286743164, + "step": 11072 + }, + { + "epoch": 1.689453125e-05, + "step": 11072, + "training_step_time": 0.17634892463684082 + }, + { + "epoch": 1.689605712890625e-05, + "model_forward_time": 0.024953365325927734, + "step": 11073 + }, + { + "epoch": 1.689605712890625e-05, + "step": 11073, + "training_step_time": 0.16644763946533203 + }, + { + "epoch": 1.68975830078125e-05, + "model_forward_time": 0.02419114112854004, + "step": 11074 + }, + { + "epoch": 1.68975830078125e-05, + "step": 11074, + "training_step_time": 0.11136651039123535 + }, + { + "epoch": 1.689910888671875e-05, + "model_forward_time": 0.024768352508544922, + "step": 11075 + }, + { + "epoch": 1.689910888671875e-05, + "step": 11075, + "training_step_time": 0.10836935043334961 + }, + { + "epoch": 1.6900634765625e-05, + "model_forward_time": 0.02548384666442871, + "step": 11076 + }, + { + "epoch": 1.6900634765625e-05, + "step": 11076, + "training_step_time": 0.10998392105102539 + }, + { + "epoch": 1.690216064453125e-05, + "model_forward_time": 0.025430679321289062, + "step": 11077 + }, + { + "epoch": 1.690216064453125e-05, + "step": 11077, + "training_step_time": 0.10568499565124512 + }, + { + "epoch": 1.69036865234375e-05, + "model_forward_time": 0.025182247161865234, + "step": 11078 + }, + { + "epoch": 1.69036865234375e-05, + "step": 11078, + "training_step_time": 0.10708928108215332 + }, + { + "epoch": 1.690521240234375e-05, + "model_forward_time": 0.025151729583740234, + "step": 11079 + }, + { + "epoch": 1.690521240234375e-05, + "step": 11079, + "training_step_time": 0.10581612586975098 + }, + { + "epoch": 1.690673828125e-05, + "grad_norm": 0.2934919595718384, + "learning_rate": 7.461718038220621e-05, + "loss": 0.0255, + "step": 11080 + }, + { + "epoch": 1.690673828125e-05, + "model_forward_time": 0.025043725967407227, + "step": 11080 + }, + { + "epoch": 1.690673828125e-05, + "step": 11080, + "training_step_time": 0.10827851295471191 + }, + { + "epoch": 1.690826416015625e-05, + "model_forward_time": 0.0254669189453125, + "step": 11081 + }, + { + "epoch": 1.690826416015625e-05, + "step": 11081, + "training_step_time": 0.13683199882507324 + }, + { + "epoch": 1.69097900390625e-05, + "model_forward_time": 0.025970935821533203, + "step": 11082 + }, + { + "epoch": 1.69097900390625e-05, + "step": 11082, + "training_step_time": 0.1141054630279541 + }, + { + "epoch": 1.691131591796875e-05, + "model_forward_time": 0.02505660057067871, + "step": 11083 + }, + { + "epoch": 1.691131591796875e-05, + "step": 11083, + "training_step_time": 0.11047744750976562 + }, + { + "epoch": 1.6912841796875e-05, + "model_forward_time": 0.024837017059326172, + "step": 11084 + }, + { + "epoch": 1.6912841796875e-05, + "step": 11084, + "training_step_time": 0.1345205307006836 + }, + { + "epoch": 1.691436767578125e-05, + "model_forward_time": 0.025327682495117188, + "step": 11085 + }, + { + "epoch": 1.691436767578125e-05, + "step": 11085, + "training_step_time": 0.13312244415283203 + }, + { + "epoch": 1.69158935546875e-05, + "model_forward_time": 0.023540735244750977, + "step": 11086 + }, + { + "epoch": 1.69158935546875e-05, + "step": 11086, + "training_step_time": 0.12646961212158203 + }, + { + "epoch": 1.691741943359375e-05, + "model_forward_time": 0.023765087127685547, + "step": 11087 + }, + { + "epoch": 1.691741943359375e-05, + "step": 11087, + "training_step_time": 0.19351792335510254 + }, + { + "epoch": 1.69189453125e-05, + "model_forward_time": 0.02450847625732422, + "step": 11088 + }, + { + "epoch": 1.69189453125e-05, + "step": 11088, + "training_step_time": 0.11889886856079102 + }, + { + "epoch": 1.692047119140625e-05, + "model_forward_time": 0.02297234535217285, + "step": 11089 + }, + { + "epoch": 1.692047119140625e-05, + "step": 11089, + "training_step_time": 0.11371111869812012 + }, + { + "epoch": 1.69219970703125e-05, + "grad_norm": 0.403972327709198, + "learning_rate": 7.456919271380875e-05, + "loss": 0.0204, + "step": 11090 + }, + { + "epoch": 1.69219970703125e-05, + "model_forward_time": 0.024688720703125, + "step": 11090 + }, + { + "epoch": 1.69219970703125e-05, + "step": 11090, + "training_step_time": 0.11239242553710938 + }, + { + "epoch": 1.692352294921875e-05, + "model_forward_time": 0.024477243423461914, + "step": 11091 + }, + { + "epoch": 1.692352294921875e-05, + "step": 11091, + "training_step_time": 0.1142737865447998 + }, + { + "epoch": 1.6925048828125e-05, + "model_forward_time": 0.0252685546875, + "step": 11092 + }, + { + "epoch": 1.6925048828125e-05, + "step": 11092, + "training_step_time": 0.11030888557434082 + }, + { + "epoch": 1.692657470703125e-05, + "model_forward_time": 0.02535104751586914, + "step": 11093 + }, + { + "epoch": 1.692657470703125e-05, + "step": 11093, + "training_step_time": 0.10907292366027832 + }, + { + "epoch": 1.69281005859375e-05, + "model_forward_time": 0.025489091873168945, + "step": 11094 + }, + { + "epoch": 1.69281005859375e-05, + "step": 11094, + "training_step_time": 0.11093974113464355 + }, + { + "epoch": 1.692962646484375e-05, + "model_forward_time": 0.025453567504882812, + "step": 11095 + }, + { + "epoch": 1.692962646484375e-05, + "step": 11095, + "training_step_time": 0.10885071754455566 + }, + { + "epoch": 1.693115234375e-05, + "model_forward_time": 0.025442838668823242, + "step": 11096 + }, + { + "epoch": 1.693115234375e-05, + "step": 11096, + "training_step_time": 0.1108086109161377 + }, + { + "epoch": 1.693267822265625e-05, + "model_forward_time": 0.025211572647094727, + "step": 11097 + }, + { + "epoch": 1.693267822265625e-05, + "step": 11097, + "training_step_time": 0.11159706115722656 + }, + { + "epoch": 1.69342041015625e-05, + "model_forward_time": 0.025383710861206055, + "step": 11098 + }, + { + "epoch": 1.69342041015625e-05, + "step": 11098, + "training_step_time": 0.11027765274047852 + }, + { + "epoch": 1.693572998046875e-05, + "model_forward_time": 0.025734424591064453, + "step": 11099 + }, + { + "epoch": 1.693572998046875e-05, + "step": 11099, + "training_step_time": 0.10881423950195312 + }, + { + "epoch": 1.6937255859375e-05, + "grad_norm": 0.5232256650924683, + "learning_rate": 7.452117519152542e-05, + "loss": 0.025, + "step": 11100 + }, + { + "epoch": 1.6937255859375e-05, + "model_forward_time": 0.025683879852294922, + "step": 11100 + }, + { + "epoch": 1.6937255859375e-05, + "step": 11100, + "training_step_time": 0.1129448413848877 + }, + { + "epoch": 1.693878173828125e-05, + "model_forward_time": 0.025221586227416992, + "step": 11101 + }, + { + "epoch": 1.693878173828125e-05, + "step": 11101, + "training_step_time": 0.11133694648742676 + }, + { + "epoch": 1.69403076171875e-05, + "model_forward_time": 0.025323152542114258, + "step": 11102 + }, + { + "epoch": 1.69403076171875e-05, + "step": 11102, + "training_step_time": 0.10870361328125 + }, + { + "epoch": 1.694183349609375e-05, + "model_forward_time": 0.026496171951293945, + "step": 11103 + }, + { + "epoch": 1.694183349609375e-05, + "step": 11103, + "training_step_time": 0.1148216724395752 + }, + { + "epoch": 1.6943359375e-05, + "model_forward_time": 0.025636672973632812, + "step": 11104 + }, + { + "epoch": 1.6943359375e-05, + "step": 11104, + "training_step_time": 0.21802735328674316 + }, + { + "epoch": 1.694488525390625e-05, + "model_forward_time": 0.024829387664794922, + "step": 11105 + }, + { + "epoch": 1.694488525390625e-05, + "step": 11105, + "training_step_time": 0.11014008522033691 + }, + { + "epoch": 1.69464111328125e-05, + "model_forward_time": 0.02523350715637207, + "step": 11106 + }, + { + "epoch": 1.69464111328125e-05, + "step": 11106, + "training_step_time": 0.11446523666381836 + }, + { + "epoch": 1.694793701171875e-05, + "model_forward_time": 0.025832653045654297, + "step": 11107 + }, + { + "epoch": 1.694793701171875e-05, + "step": 11107, + "training_step_time": 0.2147998809814453 + }, + { + "epoch": 1.6949462890625e-05, + "model_forward_time": 0.024612903594970703, + "step": 11108 + }, + { + "epoch": 1.6949462890625e-05, + "step": 11108, + "training_step_time": 0.1840822696685791 + }, + { + "epoch": 1.695098876953125e-05, + "model_forward_time": 0.02490687370300293, + "step": 11109 + }, + { + "epoch": 1.695098876953125e-05, + "step": 11109, + "training_step_time": 0.14657330513000488 + }, + { + "epoch": 1.69525146484375e-05, + "grad_norm": 0.3509308397769928, + "learning_rate": 7.447312787370203e-05, + "loss": 0.0303, + "step": 11110 + }, + { + "epoch": 1.69525146484375e-05, + "model_forward_time": 0.024929285049438477, + "step": 11110 + }, + { + "epoch": 1.69525146484375e-05, + "step": 11110, + "training_step_time": 0.10591721534729004 + }, + { + "epoch": 1.695404052734375e-05, + "model_forward_time": 0.025742292404174805, + "step": 11111 + }, + { + "epoch": 1.695404052734375e-05, + "step": 11111, + "training_step_time": 0.11249232292175293 + }, + { + "epoch": 1.695556640625e-05, + "model_forward_time": 0.025204181671142578, + "step": 11112 + }, + { + "epoch": 1.695556640625e-05, + "step": 11112, + "training_step_time": 0.1134941577911377 + }, + { + "epoch": 1.695709228515625e-05, + "model_forward_time": 0.025155067443847656, + "step": 11113 + }, + { + "epoch": 1.695709228515625e-05, + "step": 11113, + "training_step_time": 0.12346720695495605 + }, + { + "epoch": 1.69586181640625e-05, + "model_forward_time": 0.025364160537719727, + "step": 11114 + }, + { + "epoch": 1.69586181640625e-05, + "step": 11114, + "training_step_time": 0.17878293991088867 + }, + { + "epoch": 1.696014404296875e-05, + "model_forward_time": 0.02509307861328125, + "step": 11115 + }, + { + "epoch": 1.696014404296875e-05, + "step": 11115, + "training_step_time": 0.16571378707885742 + }, + { + "epoch": 1.6961669921875e-05, + "model_forward_time": 0.02479863166809082, + "step": 11116 + }, + { + "epoch": 1.6961669921875e-05, + "step": 11116, + "training_step_time": 0.16714882850646973 + }, + { + "epoch": 1.696319580078125e-05, + "model_forward_time": 0.024680137634277344, + "step": 11117 + }, + { + "epoch": 1.696319580078125e-05, + "step": 11117, + "training_step_time": 0.1324310302734375 + }, + { + "epoch": 1.69647216796875e-05, + "model_forward_time": 0.024886369705200195, + "step": 11118 + }, + { + "epoch": 1.69647216796875e-05, + "step": 11118, + "training_step_time": 0.11041498184204102 + }, + { + "epoch": 1.696624755859375e-05, + "model_forward_time": 0.025521516799926758, + "step": 11119 + }, + { + "epoch": 1.696624755859375e-05, + "step": 11119, + "training_step_time": 0.10813593864440918 + }, + { + "epoch": 1.69677734375e-05, + "grad_norm": 0.36768996715545654, + "learning_rate": 7.44250508187206e-05, + "loss": 0.0221, + "step": 11120 + }, + { + "epoch": 1.69677734375e-05, + "model_forward_time": 0.025114774703979492, + "step": 11120 + }, + { + "epoch": 1.69677734375e-05, + "step": 11120, + "training_step_time": 0.10756945610046387 + }, + { + "epoch": 1.696929931640625e-05, + "model_forward_time": 0.025240421295166016, + "step": 11121 + }, + { + "epoch": 1.696929931640625e-05, + "step": 11121, + "training_step_time": 0.10737466812133789 + }, + { + "epoch": 1.69708251953125e-05, + "model_forward_time": 0.025578737258911133, + "step": 11122 + }, + { + "epoch": 1.69708251953125e-05, + "step": 11122, + "training_step_time": 0.10948061943054199 + }, + { + "epoch": 1.697235107421875e-05, + "model_forward_time": 0.02613973617553711, + "step": 11123 + }, + { + "epoch": 1.697235107421875e-05, + "step": 11123, + "training_step_time": 0.10831975936889648 + }, + { + "epoch": 1.6973876953125e-05, + "model_forward_time": 0.025525569915771484, + "step": 11124 + }, + { + "epoch": 1.6973876953125e-05, + "step": 11124, + "training_step_time": 0.10606050491333008 + }, + { + "epoch": 1.697540283203125e-05, + "model_forward_time": 0.025348424911499023, + "step": 11125 + }, + { + "epoch": 1.697540283203125e-05, + "step": 11125, + "training_step_time": 0.1058495044708252 + }, + { + "epoch": 1.69769287109375e-05, + "model_forward_time": 0.02520442008972168, + "step": 11126 + }, + { + "epoch": 1.69769287109375e-05, + "step": 11126, + "training_step_time": 0.10817551612854004 + }, + { + "epoch": 1.697845458984375e-05, + "model_forward_time": 0.025578022003173828, + "step": 11127 + }, + { + "epoch": 1.697845458984375e-05, + "step": 11127, + "training_step_time": 0.12018966674804688 + }, + { + "epoch": 1.697998046875e-05, + "model_forward_time": 0.02556777000427246, + "step": 11128 + }, + { + "epoch": 1.697998046875e-05, + "step": 11128, + "training_step_time": 0.11305952072143555 + }, + { + "epoch": 1.698150634765625e-05, + "model_forward_time": 0.0253446102142334, + "step": 11129 + }, + { + "epoch": 1.698150634765625e-05, + "step": 11129, + "training_step_time": 0.1105642318725586 + }, + { + "epoch": 1.69830322265625e-05, + "grad_norm": 0.18100249767303467, + "learning_rate": 7.437694408499933e-05, + "loss": 0.0305, + "step": 11130 + }, + { + "epoch": 1.69830322265625e-05, + "model_forward_time": 0.024835586547851562, + "step": 11130 + }, + { + "epoch": 1.69830322265625e-05, + "step": 11130, + "training_step_time": 0.10748457908630371 + }, + { + "epoch": 1.698455810546875e-05, + "model_forward_time": 0.025644540786743164, + "step": 11131 + }, + { + "epoch": 1.698455810546875e-05, + "step": 11131, + "training_step_time": 0.1407938003540039 + }, + { + "epoch": 1.6986083984375e-05, + "model_forward_time": 0.02554917335510254, + "step": 11132 + }, + { + "epoch": 1.6986083984375e-05, + "step": 11132, + "training_step_time": 0.10951733589172363 + }, + { + "epoch": 1.698760986328125e-05, + "model_forward_time": 0.025508403778076172, + "step": 11133 + }, + { + "epoch": 1.698760986328125e-05, + "step": 11133, + "training_step_time": 0.1130530834197998 + }, + { + "epoch": 1.69891357421875e-05, + "model_forward_time": 0.025455474853515625, + "step": 11134 + }, + { + "epoch": 1.69891357421875e-05, + "step": 11134, + "training_step_time": 0.1064448356628418 + }, + { + "epoch": 1.699066162109375e-05, + "model_forward_time": 0.02570033073425293, + "step": 11135 + }, + { + "epoch": 1.699066162109375e-05, + "step": 11135, + "training_step_time": 0.11916136741638184 + }, + { + "epoch": 1.69921875e-05, + "model_forward_time": 0.02585315704345703, + "step": 11136 + }, + { + "epoch": 1.69921875e-05, + "step": 11136, + "training_step_time": 0.12999725341796875 + }, + { + "epoch": 1.699371337890625e-05, + "model_forward_time": 0.02430105209350586, + "step": 11137 + }, + { + "epoch": 1.699371337890625e-05, + "step": 11137, + "training_step_time": 0.12331938743591309 + }, + { + "epoch": 1.69952392578125e-05, + "model_forward_time": 0.024066686630249023, + "step": 11138 + }, + { + "epoch": 1.69952392578125e-05, + "step": 11138, + "training_step_time": 0.1278059482574463 + }, + { + "epoch": 1.699676513671875e-05, + "model_forward_time": 0.024010658264160156, + "step": 11139 + }, + { + "epoch": 1.699676513671875e-05, + "step": 11139, + "training_step_time": 0.1270914077758789 + }, + { + "epoch": 1.6998291015625e-05, + "grad_norm": 0.28290072083473206, + "learning_rate": 7.432880773099237e-05, + "loss": 0.0227, + "step": 11140 + }, + { + "epoch": 1.6998291015625e-05, + "model_forward_time": 0.023932695388793945, + "step": 11140 + }, + { + "epoch": 1.6998291015625e-05, + "step": 11140, + "training_step_time": 0.12007665634155273 + }, + { + "epoch": 1.699981689453125e-05, + "model_forward_time": 0.025844335556030273, + "step": 11141 + }, + { + "epoch": 1.699981689453125e-05, + "step": 11141, + "training_step_time": 0.11661458015441895 + }, + { + "epoch": 1.70013427734375e-05, + "model_forward_time": 0.025217294692993164, + "step": 11142 + }, + { + "epoch": 1.70013427734375e-05, + "step": 11142, + "training_step_time": 0.11357355117797852 + }, + { + "epoch": 1.700286865234375e-05, + "model_forward_time": 0.0254364013671875, + "step": 11143 + }, + { + "epoch": 1.700286865234375e-05, + "step": 11143, + "training_step_time": 0.11260271072387695 + }, + { + "epoch": 1.700439453125e-05, + "model_forward_time": 0.025177717208862305, + "step": 11144 + }, + { + "epoch": 1.700439453125e-05, + "step": 11144, + "training_step_time": 0.11158370971679688 + }, + { + "epoch": 1.700592041015625e-05, + "model_forward_time": 0.026096105575561523, + "step": 11145 + }, + { + "epoch": 1.700592041015625e-05, + "step": 11145, + "training_step_time": 0.11098909378051758 + }, + { + "epoch": 1.70074462890625e-05, + "model_forward_time": 0.02504706382751465, + "step": 11146 + }, + { + "epoch": 1.70074462890625e-05, + "step": 11146, + "training_step_time": 0.10995030403137207 + }, + { + "epoch": 1.700897216796875e-05, + "model_forward_time": 0.025400638580322266, + "step": 11147 + }, + { + "epoch": 1.700897216796875e-05, + "step": 11147, + "training_step_time": 0.11023283004760742 + }, + { + "epoch": 1.7010498046875e-05, + "model_forward_time": 0.026092052459716797, + "step": 11148 + }, + { + "epoch": 1.7010498046875e-05, + "step": 11148, + "training_step_time": 0.11136269569396973 + }, + { + "epoch": 1.701202392578125e-05, + "model_forward_time": 0.025768756866455078, + "step": 11149 + }, + { + "epoch": 1.701202392578125e-05, + "step": 11149, + "training_step_time": 0.17134761810302734 + }, + { + "epoch": 1.70135498046875e-05, + "grad_norm": 0.25398629903793335, + "learning_rate": 7.428064181518997e-05, + "loss": 0.0139, + "step": 11150 + }, + { + "epoch": 1.70135498046875e-05, + "model_forward_time": 0.024932146072387695, + "step": 11150 + }, + { + "epoch": 1.70135498046875e-05, + "step": 11150, + "training_step_time": 0.1583249568939209 + }, + { + "epoch": 1.701507568359375e-05, + "model_forward_time": 0.025380373001098633, + "step": 11151 + }, + { + "epoch": 1.701507568359375e-05, + "step": 11151, + "training_step_time": 0.11038780212402344 + }, + { + "epoch": 1.70166015625e-05, + "model_forward_time": 0.0251007080078125, + "step": 11152 + }, + { + "epoch": 1.70166015625e-05, + "step": 11152, + "training_step_time": 0.10470890998840332 + }, + { + "epoch": 1.701812744140625e-05, + "model_forward_time": 0.0255734920501709, + "step": 11153 + }, + { + "epoch": 1.701812744140625e-05, + "step": 11153, + "training_step_time": 0.11091828346252441 + }, + { + "epoch": 1.70196533203125e-05, + "model_forward_time": 0.025669336318969727, + "step": 11154 + }, + { + "epoch": 1.70196533203125e-05, + "step": 11154, + "training_step_time": 0.11453056335449219 + }, + { + "epoch": 1.702117919921875e-05, + "model_forward_time": 0.02567124366760254, + "step": 11155 + }, + { + "epoch": 1.702117919921875e-05, + "step": 11155, + "training_step_time": 0.1067659854888916 + }, + { + "epoch": 1.7022705078125e-05, + "model_forward_time": 0.02595663070678711, + "step": 11156 + }, + { + "epoch": 1.7022705078125e-05, + "step": 11156, + "training_step_time": 0.17258095741271973 + }, + { + "epoch": 1.702423095703125e-05, + "model_forward_time": 0.02476191520690918, + "step": 11157 + }, + { + "epoch": 1.702423095703125e-05, + "step": 11157, + "training_step_time": 0.11350822448730469 + }, + { + "epoch": 1.70257568359375e-05, + "model_forward_time": 0.024661779403686523, + "step": 11158 + }, + { + "epoch": 1.70257568359375e-05, + "step": 11158, + "training_step_time": 0.19016647338867188 + }, + { + "epoch": 1.702728271484375e-05, + "model_forward_time": 0.024661779403686523, + "step": 11159 + }, + { + "epoch": 1.702728271484375e-05, + "step": 11159, + "training_step_time": 0.17380928993225098 + }, + { + "epoch": 1.702880859375e-05, + "grad_norm": 0.3613259792327881, + "learning_rate": 7.423244639611826e-05, + "loss": 0.0232, + "step": 11160 + }, + { + "epoch": 1.702880859375e-05, + "model_forward_time": 0.024825096130371094, + "step": 11160 + }, + { + "epoch": 1.702880859375e-05, + "step": 11160, + "training_step_time": 0.2088146209716797 + }, + { + "epoch": 1.703033447265625e-05, + "model_forward_time": 0.02473282814025879, + "step": 11161 + }, + { + "epoch": 1.703033447265625e-05, + "step": 11161, + "training_step_time": 0.13893818855285645 + }, + { + "epoch": 1.70318603515625e-05, + "model_forward_time": 0.025026321411132812, + "step": 11162 + }, + { + "epoch": 1.70318603515625e-05, + "step": 11162, + "training_step_time": 0.11822867393493652 + }, + { + "epoch": 1.703338623046875e-05, + "model_forward_time": 0.025375843048095703, + "step": 11163 + }, + { + "epoch": 1.703338623046875e-05, + "step": 11163, + "training_step_time": 0.12199616432189941 + }, + { + "epoch": 1.7034912109375e-05, + "model_forward_time": 0.025544166564941406, + "step": 11164 + }, + { + "epoch": 1.7034912109375e-05, + "step": 11164, + "training_step_time": 0.11992430686950684 + }, + { + "epoch": 1.703643798828125e-05, + "model_forward_time": 0.025645732879638672, + "step": 11165 + }, + { + "epoch": 1.703643798828125e-05, + "step": 11165, + "training_step_time": 0.10805416107177734 + }, + { + "epoch": 1.70379638671875e-05, + "model_forward_time": 0.025648832321166992, + "step": 11166 + }, + { + "epoch": 1.70379638671875e-05, + "step": 11166, + "training_step_time": 0.1073751449584961 + }, + { + "epoch": 1.703948974609375e-05, + "model_forward_time": 0.026532411575317383, + "step": 11167 + }, + { + "epoch": 1.703948974609375e-05, + "step": 11167, + "training_step_time": 0.12313008308410645 + }, + { + "epoch": 1.7041015625e-05, + "model_forward_time": 0.025435209274291992, + "step": 11168 + }, + { + "epoch": 1.7041015625e-05, + "step": 11168, + "training_step_time": 0.1097421646118164 + }, + { + "epoch": 1.704254150390625e-05, + "model_forward_time": 0.025670289993286133, + "step": 11169 + }, + { + "epoch": 1.704254150390625e-05, + "step": 11169, + "training_step_time": 0.1113283634185791 + }, + { + "epoch": 1.70440673828125e-05, + "grad_norm": 0.26340359449386597, + "learning_rate": 7.418422153233919e-05, + "loss": 0.0187, + "step": 11170 + }, + { + "epoch": 1.70440673828125e-05, + "model_forward_time": 0.025643348693847656, + "step": 11170 + }, + { + "epoch": 1.70440673828125e-05, + "step": 11170, + "training_step_time": 0.1141502857208252 + }, + { + "epoch": 1.704559326171875e-05, + "model_forward_time": 0.02537226676940918, + "step": 11171 + }, + { + "epoch": 1.704559326171875e-05, + "step": 11171, + "training_step_time": 0.10982465744018555 + }, + { + "epoch": 1.7047119140625e-05, + "model_forward_time": 0.025803804397583008, + "step": 11172 + }, + { + "epoch": 1.7047119140625e-05, + "step": 11172, + "training_step_time": 0.11042284965515137 + }, + { + "epoch": 1.704864501953125e-05, + "model_forward_time": 0.025126934051513672, + "step": 11173 + }, + { + "epoch": 1.704864501953125e-05, + "step": 11173, + "training_step_time": 0.15484094619750977 + }, + { + "epoch": 1.70501708984375e-05, + "model_forward_time": 0.025116682052612305, + "step": 11174 + }, + { + "epoch": 1.70501708984375e-05, + "step": 11174, + "training_step_time": 0.1080167293548584 + }, + { + "epoch": 1.705169677734375e-05, + "model_forward_time": 0.024929285049438477, + "step": 11175 + }, + { + "epoch": 1.705169677734375e-05, + "step": 11175, + "training_step_time": 0.11369872093200684 + }, + { + "epoch": 1.705322265625e-05, + "model_forward_time": 0.026050567626953125, + "step": 11176 + }, + { + "epoch": 1.705322265625e-05, + "step": 11176, + "training_step_time": 0.12411093711853027 + }, + { + "epoch": 1.705474853515625e-05, + "model_forward_time": 0.025557279586791992, + "step": 11177 + }, + { + "epoch": 1.705474853515625e-05, + "step": 11177, + "training_step_time": 0.12524175643920898 + }, + { + "epoch": 1.70562744140625e-05, + "model_forward_time": 0.025540828704833984, + "step": 11178 + }, + { + "epoch": 1.70562744140625e-05, + "step": 11178, + "training_step_time": 0.11878705024719238 + }, + { + "epoch": 1.705780029296875e-05, + "model_forward_time": 0.025159358978271484, + "step": 11179 + }, + { + "epoch": 1.705780029296875e-05, + "step": 11179, + "training_step_time": 0.11896824836730957 + }, + { + "epoch": 1.7059326171875e-05, + "grad_norm": 0.25540876388549805, + "learning_rate": 7.413596728245054e-05, + "loss": 0.022, + "step": 11180 + }, + { + "epoch": 1.7059326171875e-05, + "model_forward_time": 0.025393962860107422, + "step": 11180 + }, + { + "epoch": 1.7059326171875e-05, + "step": 11180, + "training_step_time": 0.1093132495880127 + }, + { + "epoch": 1.706085205078125e-05, + "model_forward_time": 0.025466203689575195, + "step": 11181 + }, + { + "epoch": 1.706085205078125e-05, + "step": 11181, + "training_step_time": 0.10653090476989746 + }, + { + "epoch": 1.70623779296875e-05, + "model_forward_time": 0.02513289451599121, + "step": 11182 + }, + { + "epoch": 1.70623779296875e-05, + "step": 11182, + "training_step_time": 0.1080162525177002 + }, + { + "epoch": 1.706390380859375e-05, + "model_forward_time": 0.0254671573638916, + "step": 11183 + }, + { + "epoch": 1.706390380859375e-05, + "step": 11183, + "training_step_time": 0.10893774032592773 + }, + { + "epoch": 1.70654296875e-05, + "model_forward_time": 0.025591373443603516, + "step": 11184 + }, + { + "epoch": 1.70654296875e-05, + "step": 11184, + "training_step_time": 0.10963010787963867 + }, + { + "epoch": 1.706695556640625e-05, + "model_forward_time": 0.024818897247314453, + "step": 11185 + }, + { + "epoch": 1.706695556640625e-05, + "step": 11185, + "training_step_time": 0.11135458946228027 + }, + { + "epoch": 1.70684814453125e-05, + "model_forward_time": 0.025203943252563477, + "step": 11186 + }, + { + "epoch": 1.70684814453125e-05, + "step": 11186, + "training_step_time": 0.11042666435241699 + }, + { + "epoch": 1.707000732421875e-05, + "model_forward_time": 0.025552034378051758, + "step": 11187 + }, + { + "epoch": 1.707000732421875e-05, + "step": 11187, + "training_step_time": 0.10788965225219727 + }, + { + "epoch": 1.7071533203125e-05, + "model_forward_time": 0.02545905113220215, + "step": 11188 + }, + { + "epoch": 1.7071533203125e-05, + "step": 11188, + "training_step_time": 0.10719847679138184 + }, + { + "epoch": 1.707305908203125e-05, + "model_forward_time": 0.024295568466186523, + "step": 11189 + }, + { + "epoch": 1.707305908203125e-05, + "step": 11189, + "training_step_time": 0.10810565948486328 + }, + { + "epoch": 1.70745849609375e-05, + "grad_norm": 0.3643791973590851, + "learning_rate": 7.408768370508576e-05, + "loss": 0.0291, + "step": 11190 + }, + { + "epoch": 1.70745849609375e-05, + "model_forward_time": 0.024782896041870117, + "step": 11190 + }, + { + "epoch": 1.70745849609375e-05, + "step": 11190, + "training_step_time": 0.11055302619934082 + }, + { + "epoch": 1.707611083984375e-05, + "model_forward_time": 0.02525925636291504, + "step": 11191 + }, + { + "epoch": 1.707611083984375e-05, + "step": 11191, + "training_step_time": 0.10683584213256836 + }, + { + "epoch": 1.707763671875e-05, + "model_forward_time": 0.02523493766784668, + "step": 11192 + }, + { + "epoch": 1.707763671875e-05, + "step": 11192, + "training_step_time": 0.11076211929321289 + }, + { + "epoch": 1.707916259765625e-05, + "model_forward_time": 0.025388240814208984, + "step": 11193 + }, + { + "epoch": 1.707916259765625e-05, + "step": 11193, + "training_step_time": 0.10844039916992188 + }, + { + "epoch": 1.70806884765625e-05, + "model_forward_time": 0.02474498748779297, + "step": 11194 + }, + { + "epoch": 1.70806884765625e-05, + "step": 11194, + "training_step_time": 0.11204838752746582 + }, + { + "epoch": 1.708221435546875e-05, + "model_forward_time": 0.025757312774658203, + "step": 11195 + }, + { + "epoch": 1.708221435546875e-05, + "step": 11195, + "training_step_time": 0.11095595359802246 + }, + { + "epoch": 1.7083740234375e-05, + "model_forward_time": 0.024172067642211914, + "step": 11196 + }, + { + "epoch": 1.7083740234375e-05, + "step": 11196, + "training_step_time": 0.10817503929138184 + }, + { + "epoch": 1.708526611328125e-05, + "model_forward_time": 0.024150848388671875, + "step": 11197 + }, + { + "epoch": 1.708526611328125e-05, + "step": 11197, + "training_step_time": 0.11745238304138184 + }, + { + "epoch": 1.70867919921875e-05, + "model_forward_time": 0.02460336685180664, + "step": 11198 + }, + { + "epoch": 1.70867919921875e-05, + "step": 11198, + "training_step_time": 0.10994124412536621 + }, + { + "epoch": 1.708831787109375e-05, + "model_forward_time": 0.025330066680908203, + "step": 11199 + }, + { + "epoch": 1.708831787109375e-05, + "step": 11199, + "training_step_time": 0.1705784797668457 + }, + { + "epoch": 1.708984375e-05, + "grad_norm": 0.272895872592926, + "learning_rate": 7.403937085891397e-05, + "loss": 0.0305, + "step": 11200 + }, + { + "epoch": 1.708984375e-05, + "model_forward_time": 0.023299217224121094, + "step": 11200 + }, + { + "epoch": 1.708984375e-05, + "step": 11200, + "training_step_time": 0.1596074104309082 + }, + { + "epoch": 1.709136962890625e-05, + "model_forward_time": 0.02404618263244629, + "step": 11201 + }, + { + "epoch": 1.709136962890625e-05, + "step": 11201, + "training_step_time": 0.21463704109191895 + }, + { + "epoch": 1.70928955078125e-05, + "model_forward_time": 0.02338433265686035, + "step": 11202 + }, + { + "epoch": 1.70928955078125e-05, + "step": 11202, + "training_step_time": 0.10472607612609863 + }, + { + "epoch": 1.709442138671875e-05, + "model_forward_time": 0.022625446319580078, + "step": 11203 + }, + { + "epoch": 1.709442138671875e-05, + "step": 11203, + "training_step_time": 0.13037323951721191 + }, + { + "epoch": 1.7095947265625e-05, + "model_forward_time": 0.02410435676574707, + "step": 11204 + }, + { + "epoch": 1.7095947265625e-05, + "step": 11204, + "training_step_time": 0.11285948753356934 + }, + { + "epoch": 1.709747314453125e-05, + "model_forward_time": 0.02443218231201172, + "step": 11205 + }, + { + "epoch": 1.709747314453125e-05, + "step": 11205, + "training_step_time": 0.17781734466552734 + }, + { + "epoch": 1.70989990234375e-05, + "model_forward_time": 0.023778200149536133, + "step": 11206 + }, + { + "epoch": 1.70989990234375e-05, + "step": 11206, + "training_step_time": 0.15695667266845703 + }, + { + "epoch": 1.710052490234375e-05, + "model_forward_time": 0.027554035186767578, + "step": 11207 + }, + { + "epoch": 1.710052490234375e-05, + "step": 11207, + "training_step_time": 0.14752459526062012 + }, + { + "epoch": 1.710205078125e-05, + "model_forward_time": 0.025366544723510742, + "step": 11208 + }, + { + "epoch": 1.710205078125e-05, + "step": 11208, + "training_step_time": 0.15101313591003418 + }, + { + "epoch": 1.710357666015625e-05, + "model_forward_time": 0.02483820915222168, + "step": 11209 + }, + { + "epoch": 1.710357666015625e-05, + "step": 11209, + "training_step_time": 0.21700596809387207 + }, + { + "epoch": 1.71051025390625e-05, + "grad_norm": 0.47021782398223877, + "learning_rate": 7.399102880263983e-05, + "loss": 0.0266, + "step": 11210 + }, + { + "epoch": 1.71051025390625e-05, + "model_forward_time": 0.02443408966064453, + "step": 11210 + }, + { + "epoch": 1.71051025390625e-05, + "step": 11210, + "training_step_time": 0.1195991039276123 + }, + { + "epoch": 1.710662841796875e-05, + "model_forward_time": 0.024375200271606445, + "step": 11211 + }, + { + "epoch": 1.710662841796875e-05, + "step": 11211, + "training_step_time": 0.10855698585510254 + }, + { + "epoch": 1.7108154296875e-05, + "model_forward_time": 0.02531886100769043, + "step": 11212 + }, + { + "epoch": 1.7108154296875e-05, + "step": 11212, + "training_step_time": 0.10663461685180664 + }, + { + "epoch": 1.710968017578125e-05, + "model_forward_time": 0.025848865509033203, + "step": 11213 + }, + { + "epoch": 1.710968017578125e-05, + "step": 11213, + "training_step_time": 0.10857605934143066 + }, + { + "epoch": 1.71112060546875e-05, + "model_forward_time": 0.024697303771972656, + "step": 11214 + }, + { + "epoch": 1.71112060546875e-05, + "step": 11214, + "training_step_time": 0.1137082576751709 + }, + { + "epoch": 1.711273193359375e-05, + "model_forward_time": 0.024188518524169922, + "step": 11215 + }, + { + "epoch": 1.711273193359375e-05, + "step": 11215, + "training_step_time": 0.12606310844421387 + }, + { + "epoch": 1.71142578125e-05, + "model_forward_time": 0.023963451385498047, + "step": 11216 + }, + { + "epoch": 1.71142578125e-05, + "step": 11216, + "training_step_time": 0.12078261375427246 + }, + { + "epoch": 1.711578369140625e-05, + "model_forward_time": 0.025397062301635742, + "step": 11217 + }, + { + "epoch": 1.711578369140625e-05, + "step": 11217, + "training_step_time": 0.12273383140563965 + }, + { + "epoch": 1.71173095703125e-05, + "model_forward_time": 0.02507638931274414, + "step": 11218 + }, + { + "epoch": 1.71173095703125e-05, + "step": 11218, + "training_step_time": 0.1221461296081543 + }, + { + "epoch": 1.711883544921875e-05, + "model_forward_time": 0.02522730827331543, + "step": 11219 + }, + { + "epoch": 1.711883544921875e-05, + "step": 11219, + "training_step_time": 0.1271045207977295 + }, + { + "epoch": 1.7120361328125e-05, + "grad_norm": 0.3241020739078522, + "learning_rate": 7.394265759500348e-05, + "loss": 0.0207, + "step": 11220 + }, + { + "epoch": 1.7120361328125e-05, + "model_forward_time": 0.025205373764038086, + "step": 11220 + }, + { + "epoch": 1.7120361328125e-05, + "step": 11220, + "training_step_time": 0.11617779731750488 + }, + { + "epoch": 1.712188720703125e-05, + "model_forward_time": 0.025170087814331055, + "step": 11221 + }, + { + "epoch": 1.712188720703125e-05, + "step": 11221, + "training_step_time": 0.21965932846069336 + }, + { + "epoch": 1.71234130859375e-05, + "model_forward_time": 0.024204254150390625, + "step": 11222 + }, + { + "epoch": 1.71234130859375e-05, + "step": 11222, + "training_step_time": 0.13701152801513672 + }, + { + "epoch": 1.712493896484375e-05, + "model_forward_time": 0.024699926376342773, + "step": 11223 + }, + { + "epoch": 1.712493896484375e-05, + "step": 11223, + "training_step_time": 0.10868310928344727 + }, + { + "epoch": 1.712646484375e-05, + "model_forward_time": 0.024086713790893555, + "step": 11224 + }, + { + "epoch": 1.712646484375e-05, + "step": 11224, + "training_step_time": 0.11427521705627441 + }, + { + "epoch": 1.712799072265625e-05, + "model_forward_time": 0.026080608367919922, + "step": 11225 + }, + { + "epoch": 1.712799072265625e-05, + "step": 11225, + "training_step_time": 0.11011981964111328 + }, + { + "epoch": 1.71295166015625e-05, + "model_forward_time": 0.023947954177856445, + "step": 11226 + }, + { + "epoch": 1.71295166015625e-05, + "step": 11226, + "training_step_time": 0.11208295822143555 + }, + { + "epoch": 1.713104248046875e-05, + "model_forward_time": 0.024003267288208008, + "step": 11227 + }, + { + "epoch": 1.713104248046875e-05, + "step": 11227, + "training_step_time": 0.1077268123626709 + }, + { + "epoch": 1.7132568359375e-05, + "model_forward_time": 0.024229049682617188, + "step": 11228 + }, + { + "epoch": 1.7132568359375e-05, + "step": 11228, + "training_step_time": 0.10883426666259766 + }, + { + "epoch": 1.713409423828125e-05, + "model_forward_time": 0.02457118034362793, + "step": 11229 + }, + { + "epoch": 1.713409423828125e-05, + "step": 11229, + "training_step_time": 0.11219382286071777 + }, + { + "epoch": 1.71356201171875e-05, + "grad_norm": 0.36847159266471863, + "learning_rate": 7.389425729478051e-05, + "loss": 0.033, + "step": 11230 + }, + { + "epoch": 1.71356201171875e-05, + "model_forward_time": 0.024662256240844727, + "step": 11230 + }, + { + "epoch": 1.71356201171875e-05, + "step": 11230, + "training_step_time": 0.11227750778198242 + }, + { + "epoch": 1.713714599609375e-05, + "model_forward_time": 0.02401566505432129, + "step": 11231 + }, + { + "epoch": 1.713714599609375e-05, + "step": 11231, + "training_step_time": 0.10807204246520996 + }, + { + "epoch": 1.7138671875e-05, + "model_forward_time": 0.026584148406982422, + "step": 11232 + }, + { + "epoch": 1.7138671875e-05, + "step": 11232, + "training_step_time": 0.10909628868103027 + }, + { + "epoch": 1.714019775390625e-05, + "model_forward_time": 0.02404475212097168, + "step": 11233 + }, + { + "epoch": 1.714019775390625e-05, + "step": 11233, + "training_step_time": 0.10792970657348633 + }, + { + "epoch": 1.71417236328125e-05, + "model_forward_time": 0.02445387840270996, + "step": 11234 + }, + { + "epoch": 1.71417236328125e-05, + "step": 11234, + "training_step_time": 0.10784673690795898 + }, + { + "epoch": 1.714324951171875e-05, + "model_forward_time": 0.02426433563232422, + "step": 11235 + }, + { + "epoch": 1.714324951171875e-05, + "step": 11235, + "training_step_time": 0.10753321647644043 + }, + { + "epoch": 1.7144775390625e-05, + "model_forward_time": 0.02454662322998047, + "step": 11236 + }, + { + "epoch": 1.7144775390625e-05, + "step": 11236, + "training_step_time": 0.10737967491149902 + }, + { + "epoch": 1.714630126953125e-05, + "model_forward_time": 0.02426934242248535, + "step": 11237 + }, + { + "epoch": 1.714630126953125e-05, + "step": 11237, + "training_step_time": 0.11509585380554199 + }, + { + "epoch": 1.71478271484375e-05, + "model_forward_time": 0.024796009063720703, + "step": 11238 + }, + { + "epoch": 1.71478271484375e-05, + "step": 11238, + "training_step_time": 0.11751389503479004 + }, + { + "epoch": 1.714935302734375e-05, + "model_forward_time": 0.026983976364135742, + "step": 11239 + }, + { + "epoch": 1.714935302734375e-05, + "step": 11239, + "training_step_time": 0.11664533615112305 + }, + { + "epoch": 1.715087890625e-05, + "grad_norm": 0.24231822788715363, + "learning_rate": 7.384582796078184e-05, + "loss": 0.0213, + "step": 11240 + }, + { + "epoch": 1.715087890625e-05, + "model_forward_time": 0.030149459838867188, + "step": 11240 + }, + { + "epoch": 1.715087890625e-05, + "step": 11240, + "training_step_time": 0.15903639793395996 + }, + { + "epoch": 1.715240478515625e-05, + "model_forward_time": 0.026667356491088867, + "step": 11241 + }, + { + "epoch": 1.715240478515625e-05, + "step": 11241, + "training_step_time": 0.24108576774597168 + }, + { + "epoch": 1.71539306640625e-05, + "model_forward_time": 0.028002262115478516, + "step": 11242 + }, + { + "epoch": 1.71539306640625e-05, + "step": 11242, + "training_step_time": 0.2801830768585205 + }, + { + "epoch": 1.715545654296875e-05, + "model_forward_time": 0.04683423042297363, + "step": 11243 + }, + { + "epoch": 1.715545654296875e-05, + "step": 11243, + "training_step_time": 0.3141360282897949 + }, + { + "epoch": 1.7156982421875e-05, + "model_forward_time": 0.03125810623168945, + "step": 11244 + }, + { + "epoch": 1.7156982421875e-05, + "step": 11244, + "training_step_time": 0.3416590690612793 + }, + { + "epoch": 1.715850830078125e-05, + "model_forward_time": 0.0289766788482666, + "step": 11245 + }, + { + "epoch": 1.715850830078125e-05, + "step": 11245, + "training_step_time": 0.40026307106018066 + }, + { + "epoch": 1.71600341796875e-05, + "model_forward_time": 0.030750513076782227, + "step": 11246 + }, + { + "epoch": 1.71600341796875e-05, + "step": 11246, + "training_step_time": 0.3159065246582031 + }, + { + "epoch": 1.716156005859375e-05, + "model_forward_time": 0.030999422073364258, + "step": 11247 + }, + { + "epoch": 1.716156005859375e-05, + "step": 11247, + "training_step_time": 0.3515892028808594 + }, + { + "epoch": 1.71630859375e-05, + "model_forward_time": 0.0354619026184082, + "step": 11248 + }, + { + "epoch": 1.71630859375e-05, + "step": 11248, + "training_step_time": 0.2969551086425781 + }, + { + "epoch": 1.716461181640625e-05, + "model_forward_time": 0.03065633773803711, + "step": 11249 + }, + { + "epoch": 1.716461181640625e-05, + "step": 11249, + "training_step_time": 0.2698521614074707 + }, + { + "epoch": 1.71661376953125e-05, + "grad_norm": 0.4318647086620331, + "learning_rate": 7.379736965185368e-05, + "loss": 0.0178, + "step": 11250 + }, + { + "epoch": 1.71661376953125e-05, + "model_forward_time": 0.032936811447143555, + "step": 11250 + }, + { + "epoch": 1.71661376953125e-05, + "step": 11250, + "training_step_time": 0.23099589347839355 + }, + { + "epoch": 1.716766357421875e-05, + "model_forward_time": 0.02979445457458496, + "step": 11251 + }, + { + "epoch": 1.716766357421875e-05, + "step": 11251, + "training_step_time": 0.22020721435546875 + }, + { + "epoch": 1.7169189453125e-05, + "model_forward_time": 0.0302276611328125, + "step": 11252 + }, + { + "epoch": 1.7169189453125e-05, + "step": 11252, + "training_step_time": 0.32690000534057617 + }, + { + "epoch": 1.717071533203125e-05, + "model_forward_time": 0.030982255935668945, + "step": 11253 + }, + { + "epoch": 1.717071533203125e-05, + "step": 11253, + "training_step_time": 0.2746727466583252 + }, + { + "epoch": 1.71722412109375e-05, + "model_forward_time": 0.029308319091796875, + "step": 11254 + }, + { + "epoch": 1.71722412109375e-05, + "step": 11254, + "training_step_time": 0.19051218032836914 + }, + { + "epoch": 1.717376708984375e-05, + "model_forward_time": 0.03016066551208496, + "step": 11255 + }, + { + "epoch": 1.717376708984375e-05, + "step": 11255, + "training_step_time": 0.15238165855407715 + }, + { + "epoch": 1.717529296875e-05, + "model_forward_time": 0.029612064361572266, + "step": 11256 + }, + { + "epoch": 1.717529296875e-05, + "step": 11256, + "training_step_time": 0.13900303840637207 + }, + { + "epoch": 1.717681884765625e-05, + "model_forward_time": 0.030167579650878906, + "step": 11257 + }, + { + "epoch": 1.717681884765625e-05, + "step": 11257, + "training_step_time": 0.1388835906982422 + }, + { + "epoch": 1.71783447265625e-05, + "model_forward_time": 0.02804422378540039, + "step": 11258 + }, + { + "epoch": 1.71783447265625e-05, + "step": 11258, + "training_step_time": 0.12262725830078125 + }, + { + "epoch": 1.717987060546875e-05, + "model_forward_time": 0.02742290496826172, + "step": 11259 + }, + { + "epoch": 1.717987060546875e-05, + "step": 11259, + "training_step_time": 0.12679076194763184 + }, + { + "epoch": 1.7181396484375e-05, + "grad_norm": 0.4221991300582886, + "learning_rate": 7.374888242687746e-05, + "loss": 0.0152, + "step": 11260 + }, + { + "epoch": 1.7181396484375e-05, + "model_forward_time": 0.026491165161132812, + "step": 11260 + }, + { + "epoch": 1.7181396484375e-05, + "step": 11260, + "training_step_time": 0.11997270584106445 + }, + { + "epoch": 1.718292236328125e-05, + "model_forward_time": 0.027240991592407227, + "step": 11261 + }, + { + "epoch": 1.718292236328125e-05, + "step": 11261, + "training_step_time": 0.11971426010131836 + }, + { + "epoch": 1.71844482421875e-05, + "model_forward_time": 0.02800726890563965, + "step": 11262 + }, + { + "epoch": 1.71844482421875e-05, + "step": 11262, + "training_step_time": 0.11366128921508789 + }, + { + "epoch": 1.718597412109375e-05, + "model_forward_time": 0.02586054801940918, + "step": 11263 + }, + { + "epoch": 1.718597412109375e-05, + "step": 11263, + "training_step_time": 0.10940194129943848 + }, + { + "epoch": 1.71875e-05, + "model_forward_time": 0.029146671295166016, + "step": 11264 + }, + { + "epoch": 1.71875e-05, + "step": 11264, + "training_step_time": 0.10982251167297363 + }, + { + "epoch": 1.718902587890625e-05, + "model_forward_time": 0.024967193603515625, + "step": 11265 + }, + { + "epoch": 1.718902587890625e-05, + "step": 11265, + "training_step_time": 0.109954833984375 + }, + { + "epoch": 1.71905517578125e-05, + "model_forward_time": 0.02532672882080078, + "step": 11266 + }, + { + "epoch": 1.71905517578125e-05, + "step": 11266, + "training_step_time": 0.10913920402526855 + }, + { + "epoch": 1.719207763671875e-05, + "model_forward_time": 0.0249788761138916, + "step": 11267 + }, + { + "epoch": 1.719207763671875e-05, + "step": 11267, + "training_step_time": 0.10576081275939941 + }, + { + "epoch": 1.7193603515625e-05, + "model_forward_time": 0.02524280548095703, + "step": 11268 + }, + { + "epoch": 1.7193603515625e-05, + "step": 11268, + "training_step_time": 0.1308290958404541 + }, + { + "epoch": 1.719512939453125e-05, + "model_forward_time": 0.024198532104492188, + "step": 11269 + }, + { + "epoch": 1.719512939453125e-05, + "step": 11269, + "training_step_time": 0.19747185707092285 + }, + { + "epoch": 1.71966552734375e-05, + "grad_norm": 0.28328460454940796, + "learning_rate": 7.37003663447697e-05, + "loss": 0.0203, + "step": 11270 + }, + { + "epoch": 1.71966552734375e-05, + "model_forward_time": 0.025131702423095703, + "step": 11270 + }, + { + "epoch": 1.71966552734375e-05, + "step": 11270, + "training_step_time": 0.19914460182189941 + }, + { + "epoch": 1.719818115234375e-05, + "model_forward_time": 0.02462601661682129, + "step": 11271 + }, + { + "epoch": 1.719818115234375e-05, + "step": 11271, + "training_step_time": 0.14717435836791992 + }, + { + "epoch": 1.719970703125e-05, + "model_forward_time": 0.024669647216796875, + "step": 11272 + }, + { + "epoch": 1.719970703125e-05, + "step": 11272, + "training_step_time": 0.19244861602783203 + }, + { + "epoch": 1.720123291015625e-05, + "model_forward_time": 0.024356603622436523, + "step": 11273 + }, + { + "epoch": 1.720123291015625e-05, + "step": 11273, + "training_step_time": 0.17815256118774414 + }, + { + "epoch": 1.72027587890625e-05, + "model_forward_time": 0.024710655212402344, + "step": 11274 + }, + { + "epoch": 1.72027587890625e-05, + "step": 11274, + "training_step_time": 0.1367356777191162 + }, + { + "epoch": 1.720428466796875e-05, + "model_forward_time": 0.025089502334594727, + "step": 11275 + }, + { + "epoch": 1.720428466796875e-05, + "step": 11275, + "training_step_time": 0.10929131507873535 + }, + { + "epoch": 1.7205810546875e-05, + "model_forward_time": 0.025282859802246094, + "step": 11276 + }, + { + "epoch": 1.7205810546875e-05, + "step": 11276, + "training_step_time": 0.1066431999206543 + }, + { + "epoch": 1.720733642578125e-05, + "model_forward_time": 0.025750398635864258, + "step": 11277 + }, + { + "epoch": 1.720733642578125e-05, + "step": 11277, + "training_step_time": 0.11887383460998535 + }, + { + "epoch": 1.72088623046875e-05, + "model_forward_time": 0.02523946762084961, + "step": 11278 + }, + { + "epoch": 1.72088623046875e-05, + "step": 11278, + "training_step_time": 0.20529890060424805 + }, + { + "epoch": 1.721038818359375e-05, + "model_forward_time": 0.024829387664794922, + "step": 11279 + }, + { + "epoch": 1.721038818359375e-05, + "step": 11279, + "training_step_time": 0.12160778045654297 + }, + { + "epoch": 1.72119140625e-05, + "grad_norm": 0.5873472690582275, + "learning_rate": 7.365182146448205e-05, + "loss": 0.0365, + "step": 11280 + }, + { + "epoch": 1.72119140625e-05, + "model_forward_time": 0.024168968200683594, + "step": 11280 + }, + { + "epoch": 1.72119140625e-05, + "step": 11280, + "training_step_time": 0.11086606979370117 + }, + { + "epoch": 1.721343994140625e-05, + "model_forward_time": 0.02543330192565918, + "step": 11281 + }, + { + "epoch": 1.721343994140625e-05, + "step": 11281, + "training_step_time": 0.16066336631774902 + }, + { + "epoch": 1.72149658203125e-05, + "model_forward_time": 0.024605989456176758, + "step": 11282 + }, + { + "epoch": 1.72149658203125e-05, + "step": 11282, + "training_step_time": 0.17148900032043457 + }, + { + "epoch": 1.721649169921875e-05, + "model_forward_time": 0.024548768997192383, + "step": 11283 + }, + { + "epoch": 1.721649169921875e-05, + "step": 11283, + "training_step_time": 0.15686869621276855 + }, + { + "epoch": 1.7218017578125e-05, + "model_forward_time": 0.024563074111938477, + "step": 11284 + }, + { + "epoch": 1.7218017578125e-05, + "step": 11284, + "training_step_time": 0.11179065704345703 + }, + { + "epoch": 1.721954345703125e-05, + "model_forward_time": 0.024017333984375, + "step": 11285 + }, + { + "epoch": 1.721954345703125e-05, + "step": 11285, + "training_step_time": 0.1075735092163086 + }, + { + "epoch": 1.72210693359375e-05, + "model_forward_time": 0.02544546127319336, + "step": 11286 + }, + { + "epoch": 1.72210693359375e-05, + "step": 11286, + "training_step_time": 0.11088204383850098 + }, + { + "epoch": 1.722259521484375e-05, + "model_forward_time": 0.02534198760986328, + "step": 11287 + }, + { + "epoch": 1.722259521484375e-05, + "step": 11287, + "training_step_time": 0.14317846298217773 + }, + { + "epoch": 1.722412109375e-05, + "model_forward_time": 0.025422096252441406, + "step": 11288 + }, + { + "epoch": 1.722412109375e-05, + "step": 11288, + "training_step_time": 0.1554572582244873 + }, + { + "epoch": 1.722564697265625e-05, + "model_forward_time": 0.025255441665649414, + "step": 11289 + }, + { + "epoch": 1.722564697265625e-05, + "step": 11289, + "training_step_time": 0.1456451416015625 + }, + { + "epoch": 1.72271728515625e-05, + "grad_norm": 0.5029672384262085, + "learning_rate": 7.36032478450011e-05, + "loss": 0.0234, + "step": 11290 + }, + { + "epoch": 1.72271728515625e-05, + "model_forward_time": 0.024483919143676758, + "step": 11290 + }, + { + "epoch": 1.72271728515625e-05, + "step": 11290, + "training_step_time": 0.14616131782531738 + }, + { + "epoch": 1.722869873046875e-05, + "model_forward_time": 0.025075674057006836, + "step": 11291 + }, + { + "epoch": 1.722869873046875e-05, + "step": 11291, + "training_step_time": 0.1374378204345703 + }, + { + "epoch": 1.7230224609375e-05, + "model_forward_time": 0.02429986000061035, + "step": 11292 + }, + { + "epoch": 1.7230224609375e-05, + "step": 11292, + "training_step_time": 0.127288818359375 + }, + { + "epoch": 1.723175048828125e-05, + "model_forward_time": 0.025243520736694336, + "step": 11293 + }, + { + "epoch": 1.723175048828125e-05, + "step": 11293, + "training_step_time": 0.14596343040466309 + }, + { + "epoch": 1.72332763671875e-05, + "model_forward_time": 0.025833606719970703, + "step": 11294 + }, + { + "epoch": 1.72332763671875e-05, + "step": 11294, + "training_step_time": 0.12303948402404785 + }, + { + "epoch": 1.723480224609375e-05, + "model_forward_time": 0.026549339294433594, + "step": 11295 + }, + { + "epoch": 1.723480224609375e-05, + "step": 11295, + "training_step_time": 0.2084650993347168 + }, + { + "epoch": 1.7236328125e-05, + "model_forward_time": 0.024619102478027344, + "step": 11296 + }, + { + "epoch": 1.7236328125e-05, + "step": 11296, + "training_step_time": 0.1325082778930664 + }, + { + "epoch": 1.723785400390625e-05, + "model_forward_time": 0.02473139762878418, + "step": 11297 + }, + { + "epoch": 1.723785400390625e-05, + "step": 11297, + "training_step_time": 0.11129879951477051 + }, + { + "epoch": 1.72393798828125e-05, + "model_forward_time": 0.025101184844970703, + "step": 11298 + }, + { + "epoch": 1.72393798828125e-05, + "step": 11298, + "training_step_time": 0.11524081230163574 + }, + { + "epoch": 1.724090576171875e-05, + "model_forward_time": 0.02512335777282715, + "step": 11299 + }, + { + "epoch": 1.724090576171875e-05, + "step": 11299, + "training_step_time": 0.10977411270141602 + }, + { + "epoch": 1.7242431640625e-05, + "grad_norm": 0.423922061920166, + "learning_rate": 7.355464554534837e-05, + "loss": 0.0212, + "step": 11300 + }, + { + "epoch": 1.7242431640625e-05, + "model_forward_time": 0.024432897567749023, + "step": 11300 + }, + { + "epoch": 1.7242431640625e-05, + "step": 11300, + "training_step_time": 0.1138761043548584 + }, + { + "epoch": 1.724395751953125e-05, + "model_forward_time": 0.02531123161315918, + "step": 11301 + }, + { + "epoch": 1.724395751953125e-05, + "step": 11301, + "training_step_time": 0.1101381778717041 + }, + { + "epoch": 1.72454833984375e-05, + "model_forward_time": 0.02599191665649414, + "step": 11302 + }, + { + "epoch": 1.72454833984375e-05, + "step": 11302, + "training_step_time": 0.1109776496887207 + }, + { + "epoch": 1.724700927734375e-05, + "model_forward_time": 0.025195837020874023, + "step": 11303 + }, + { + "epoch": 1.724700927734375e-05, + "step": 11303, + "training_step_time": 0.11104774475097656 + }, + { + "epoch": 1.724853515625e-05, + "model_forward_time": 0.02540278434753418, + "step": 11304 + }, + { + "epoch": 1.724853515625e-05, + "step": 11304, + "training_step_time": 0.1133122444152832 + }, + { + "epoch": 1.725006103515625e-05, + "model_forward_time": 0.025713682174682617, + "step": 11305 + }, + { + "epoch": 1.725006103515625e-05, + "step": 11305, + "training_step_time": 0.10701727867126465 + }, + { + "epoch": 1.72515869140625e-05, + "model_forward_time": 0.02561044692993164, + "step": 11306 + }, + { + "epoch": 1.72515869140625e-05, + "step": 11306, + "training_step_time": 0.11105775833129883 + }, + { + "epoch": 1.725311279296875e-05, + "model_forward_time": 0.02550792694091797, + "step": 11307 + }, + { + "epoch": 1.725311279296875e-05, + "step": 11307, + "training_step_time": 0.10890984535217285 + }, + { + "epoch": 1.7254638671875e-05, + "model_forward_time": 0.025569915771484375, + "step": 11308 + }, + { + "epoch": 1.7254638671875e-05, + "step": 11308, + "training_step_time": 0.1134490966796875 + }, + { + "epoch": 1.725616455078125e-05, + "model_forward_time": 0.024857759475708008, + "step": 11309 + }, + { + "epoch": 1.725616455078125e-05, + "step": 11309, + "training_step_time": 0.10868215560913086 + }, + { + "epoch": 1.72576904296875e-05, + "grad_norm": 0.802464485168457, + "learning_rate": 7.350601462458024e-05, + "loss": 0.0242, + "step": 11310 + }, + { + "epoch": 1.72576904296875e-05, + "model_forward_time": 0.026113510131835938, + "step": 11310 + }, + { + "epoch": 1.72576904296875e-05, + "step": 11310, + "training_step_time": 0.10826349258422852 + }, + { + "epoch": 1.725921630859375e-05, + "model_forward_time": 0.025789499282836914, + "step": 11311 + }, + { + "epoch": 1.725921630859375e-05, + "step": 11311, + "training_step_time": 0.1684434413909912 + }, + { + "epoch": 1.72607421875e-05, + "model_forward_time": 0.024709224700927734, + "step": 11312 + }, + { + "epoch": 1.72607421875e-05, + "step": 11312, + "training_step_time": 0.15661311149597168 + }, + { + "epoch": 1.726226806640625e-05, + "model_forward_time": 0.025159597396850586, + "step": 11313 + }, + { + "epoch": 1.726226806640625e-05, + "step": 11313, + "training_step_time": 0.11499834060668945 + }, + { + "epoch": 1.72637939453125e-05, + "model_forward_time": 0.025951862335205078, + "step": 11314 + }, + { + "epoch": 1.72637939453125e-05, + "step": 11314, + "training_step_time": 0.1122126579284668 + }, + { + "epoch": 1.726531982421875e-05, + "model_forward_time": 0.025228023529052734, + "step": 11315 + }, + { + "epoch": 1.726531982421875e-05, + "step": 11315, + "training_step_time": 0.16967177391052246 + }, + { + "epoch": 1.7266845703125e-05, + "model_forward_time": 0.025134801864624023, + "step": 11316 + }, + { + "epoch": 1.7266845703125e-05, + "step": 11316, + "training_step_time": 0.20160579681396484 + }, + { + "epoch": 1.726837158203125e-05, + "model_forward_time": 0.02446770668029785, + "step": 11317 + }, + { + "epoch": 1.726837158203125e-05, + "step": 11317, + "training_step_time": 0.14013338088989258 + }, + { + "epoch": 1.72698974609375e-05, + "model_forward_time": 0.026013851165771484, + "step": 11318 + }, + { + "epoch": 1.72698974609375e-05, + "step": 11318, + "training_step_time": 0.10466122627258301 + }, + { + "epoch": 1.727142333984375e-05, + "model_forward_time": 0.02592945098876953, + "step": 11319 + }, + { + "epoch": 1.727142333984375e-05, + "step": 11319, + "training_step_time": 0.11877894401550293 + }, + { + "epoch": 1.727294921875e-05, + "grad_norm": 0.7488951683044434, + "learning_rate": 7.345735514178787e-05, + "loss": 0.0267, + "step": 11320 + }, + { + "epoch": 1.727294921875e-05, + "model_forward_time": 0.027347803115844727, + "step": 11320 + }, + { + "epoch": 1.727294921875e-05, + "step": 11320, + "training_step_time": 0.11244988441467285 + }, + { + "epoch": 1.727447509765625e-05, + "model_forward_time": 0.026203393936157227, + "step": 11321 + }, + { + "epoch": 1.727447509765625e-05, + "step": 11321, + "training_step_time": 0.1283702850341797 + }, + { + "epoch": 1.72760009765625e-05, + "model_forward_time": 0.026607990264892578, + "step": 11322 + }, + { + "epoch": 1.72760009765625e-05, + "step": 11322, + "training_step_time": 0.1818840503692627 + }, + { + "epoch": 1.727752685546875e-05, + "model_forward_time": 0.025214433670043945, + "step": 11323 + }, + { + "epoch": 1.727752685546875e-05, + "step": 11323, + "training_step_time": 0.1645801067352295 + }, + { + "epoch": 1.7279052734375e-05, + "model_forward_time": 0.02479386329650879, + "step": 11324 + }, + { + "epoch": 1.7279052734375e-05, + "step": 11324, + "training_step_time": 0.17442893981933594 + }, + { + "epoch": 1.728057861328125e-05, + "model_forward_time": 0.02448749542236328, + "step": 11325 + }, + { + "epoch": 1.728057861328125e-05, + "step": 11325, + "training_step_time": 0.12282729148864746 + }, + { + "epoch": 1.72821044921875e-05, + "model_forward_time": 0.025202274322509766, + "step": 11326 + }, + { + "epoch": 1.72821044921875e-05, + "step": 11326, + "training_step_time": 0.12098073959350586 + }, + { + "epoch": 1.728363037109375e-05, + "model_forward_time": 0.025063514709472656, + "step": 11327 + }, + { + "epoch": 1.728363037109375e-05, + "step": 11327, + "training_step_time": 0.11560511589050293 + }, + { + "epoch": 1.728515625e-05, + "model_forward_time": 0.02518439292907715, + "step": 11328 + }, + { + "epoch": 1.728515625e-05, + "step": 11328, + "training_step_time": 0.11234426498413086 + }, + { + "epoch": 1.728668212890625e-05, + "model_forward_time": 0.02424335479736328, + "step": 11329 + }, + { + "epoch": 1.728668212890625e-05, + "step": 11329, + "training_step_time": 0.11182308197021484 + }, + { + "epoch": 1.72882080078125e-05, + "grad_norm": 0.5162204504013062, + "learning_rate": 7.340866715609712e-05, + "loss": 0.0237, + "step": 11330 + }, + { + "epoch": 1.72882080078125e-05, + "model_forward_time": 0.02581024169921875, + "step": 11330 + }, + { + "epoch": 1.72882080078125e-05, + "step": 11330, + "training_step_time": 0.11057281494140625 + }, + { + "epoch": 1.728973388671875e-05, + "model_forward_time": 0.025432586669921875, + "step": 11331 + }, + { + "epoch": 1.728973388671875e-05, + "step": 11331, + "training_step_time": 0.11118388175964355 + }, + { + "epoch": 1.7291259765625e-05, + "model_forward_time": 0.025179147720336914, + "step": 11332 + }, + { + "epoch": 1.7291259765625e-05, + "step": 11332, + "training_step_time": 0.10761761665344238 + }, + { + "epoch": 1.729278564453125e-05, + "model_forward_time": 0.025475740432739258, + "step": 11333 + }, + { + "epoch": 1.729278564453125e-05, + "step": 11333, + "training_step_time": 0.1110997200012207 + }, + { + "epoch": 1.72943115234375e-05, + "model_forward_time": 0.025519132614135742, + "step": 11334 + }, + { + "epoch": 1.72943115234375e-05, + "step": 11334, + "training_step_time": 0.10777139663696289 + }, + { + "epoch": 1.729583740234375e-05, + "model_forward_time": 0.025228500366210938, + "step": 11335 + }, + { + "epoch": 1.729583740234375e-05, + "step": 11335, + "training_step_time": 0.11074209213256836 + }, + { + "epoch": 1.729736328125e-05, + "model_forward_time": 0.0243685245513916, + "step": 11336 + }, + { + "epoch": 1.729736328125e-05, + "step": 11336, + "training_step_time": 0.10677742958068848 + }, + { + "epoch": 1.729888916015625e-05, + "model_forward_time": 0.02434539794921875, + "step": 11337 + }, + { + "epoch": 1.729888916015625e-05, + "step": 11337, + "training_step_time": 0.10769414901733398 + }, + { + "epoch": 1.73004150390625e-05, + "model_forward_time": 0.02644038200378418, + "step": 11338 + }, + { + "epoch": 1.73004150390625e-05, + "step": 11338, + "training_step_time": 0.14895009994506836 + }, + { + "epoch": 1.730194091796875e-05, + "model_forward_time": 0.027883291244506836, + "step": 11339 + }, + { + "epoch": 1.730194091796875e-05, + "step": 11339, + "training_step_time": 0.11562299728393555 + }, + { + "epoch": 1.7303466796875e-05, + "grad_norm": 0.6040328145027161, + "learning_rate": 7.335995072666848e-05, + "loss": 0.0211, + "step": 11340 + }, + { + "epoch": 1.7303466796875e-05, + "model_forward_time": 0.026317119598388672, + "step": 11340 + }, + { + "epoch": 1.7303466796875e-05, + "step": 11340, + "training_step_time": 0.22396183013916016 + }, + { + "epoch": 1.730499267578125e-05, + "model_forward_time": 0.024992942810058594, + "step": 11341 + }, + { + "epoch": 1.730499267578125e-05, + "step": 11341, + "training_step_time": 0.12385010719299316 + }, + { + "epoch": 1.73065185546875e-05, + "model_forward_time": 0.02417612075805664, + "step": 11342 + }, + { + "epoch": 1.73065185546875e-05, + "step": 11342, + "training_step_time": 0.11368632316589355 + }, + { + "epoch": 1.730804443359375e-05, + "model_forward_time": 0.025532960891723633, + "step": 11343 + }, + { + "epoch": 1.730804443359375e-05, + "step": 11343, + "training_step_time": 0.12131762504577637 + }, + { + "epoch": 1.73095703125e-05, + "model_forward_time": 0.025728464126586914, + "step": 11344 + }, + { + "epoch": 1.73095703125e-05, + "step": 11344, + "training_step_time": 0.11219191551208496 + }, + { + "epoch": 1.731109619140625e-05, + "model_forward_time": 0.025579452514648438, + "step": 11345 + }, + { + "epoch": 1.731109619140625e-05, + "step": 11345, + "training_step_time": 0.10891866683959961 + }, + { + "epoch": 1.73126220703125e-05, + "model_forward_time": 0.025442123413085938, + "step": 11346 + }, + { + "epoch": 1.73126220703125e-05, + "step": 11346, + "training_step_time": 0.1104280948638916 + }, + { + "epoch": 1.731414794921875e-05, + "model_forward_time": 0.02526712417602539, + "step": 11347 + }, + { + "epoch": 1.731414794921875e-05, + "step": 11347, + "training_step_time": 0.10797810554504395 + }, + { + "epoch": 1.7315673828125e-05, + "model_forward_time": 0.025713443756103516, + "step": 11348 + }, + { + "epoch": 1.7315673828125e-05, + "step": 11348, + "training_step_time": 0.11075592041015625 + }, + { + "epoch": 1.731719970703125e-05, + "model_forward_time": 0.02565741539001465, + "step": 11349 + }, + { + "epoch": 1.731719970703125e-05, + "step": 11349, + "training_step_time": 0.10775184631347656 + }, + { + "epoch": 1.73187255859375e-05, + "grad_norm": 0.3311750292778015, + "learning_rate": 7.331120591269701e-05, + "loss": 0.0452, + "step": 11350 + }, + { + "epoch": 1.73187255859375e-05, + "model_forward_time": 0.025605440139770508, + "step": 11350 + }, + { + "epoch": 1.73187255859375e-05, + "step": 11350, + "training_step_time": 0.11237812042236328 + }, + { + "epoch": 1.732025146484375e-05, + "model_forward_time": 0.025423049926757812, + "step": 11351 + }, + { + "epoch": 1.732025146484375e-05, + "step": 11351, + "training_step_time": 0.10855555534362793 + }, + { + "epoch": 1.732177734375e-05, + "model_forward_time": 0.025609493255615234, + "step": 11352 + }, + { + "epoch": 1.732177734375e-05, + "step": 11352, + "training_step_time": 0.1083838939666748 + }, + { + "epoch": 1.732330322265625e-05, + "model_forward_time": 0.025478601455688477, + "step": 11353 + }, + { + "epoch": 1.732330322265625e-05, + "step": 11353, + "training_step_time": 0.10992193222045898 + }, + { + "epoch": 1.73248291015625e-05, + "model_forward_time": 0.025615453720092773, + "step": 11354 + }, + { + "epoch": 1.73248291015625e-05, + "step": 11354, + "training_step_time": 0.112457275390625 + }, + { + "epoch": 1.732635498046875e-05, + "model_forward_time": 0.02492380142211914, + "step": 11355 + }, + { + "epoch": 1.732635498046875e-05, + "step": 11355, + "training_step_time": 0.11375808715820312 + }, + { + "epoch": 1.7327880859375e-05, + "model_forward_time": 0.024895668029785156, + "step": 11356 + }, + { + "epoch": 1.7327880859375e-05, + "step": 11356, + "training_step_time": 0.12656879425048828 + }, + { + "epoch": 1.732940673828125e-05, + "model_forward_time": 0.026276350021362305, + "step": 11357 + }, + { + "epoch": 1.732940673828125e-05, + "step": 11357, + "training_step_time": 0.1300828456878662 + }, + { + "epoch": 1.73309326171875e-05, + "model_forward_time": 0.02550983428955078, + "step": 11358 + }, + { + "epoch": 1.73309326171875e-05, + "step": 11358, + "training_step_time": 0.18550372123718262 + }, + { + "epoch": 1.733245849609375e-05, + "model_forward_time": 0.025446653366088867, + "step": 11359 + }, + { + "epoch": 1.733245849609375e-05, + "step": 11359, + "training_step_time": 0.11735057830810547 + }, + { + "epoch": 1.7333984375e-05, + "grad_norm": 0.3064781725406647, + "learning_rate": 7.326243277341227e-05, + "loss": 0.0209, + "step": 11360 + }, + { + "epoch": 1.7333984375e-05, + "model_forward_time": 0.024016857147216797, + "step": 11360 + }, + { + "epoch": 1.7333984375e-05, + "step": 11360, + "training_step_time": 0.1140143871307373 + }, + { + "epoch": 1.733551025390625e-05, + "model_forward_time": 0.02623891830444336, + "step": 11361 + }, + { + "epoch": 1.733551025390625e-05, + "step": 11361, + "training_step_time": 0.16826081275939941 + }, + { + "epoch": 1.73370361328125e-05, + "model_forward_time": 0.025244951248168945, + "step": 11362 + }, + { + "epoch": 1.73370361328125e-05, + "step": 11362, + "training_step_time": 0.11737847328186035 + }, + { + "epoch": 1.733856201171875e-05, + "model_forward_time": 0.02947831153869629, + "step": 11363 + }, + { + "epoch": 1.733856201171875e-05, + "step": 11363, + "training_step_time": 0.11568212509155273 + }, + { + "epoch": 1.7340087890625e-05, + "model_forward_time": 0.02548670768737793, + "step": 11364 + }, + { + "epoch": 1.7340087890625e-05, + "step": 11364, + "training_step_time": 0.11246085166931152 + }, + { + "epoch": 1.734161376953125e-05, + "model_forward_time": 0.024087905883789062, + "step": 11365 + }, + { + "epoch": 1.734161376953125e-05, + "step": 11365, + "training_step_time": 0.11239814758300781 + }, + { + "epoch": 1.73431396484375e-05, + "model_forward_time": 0.0251009464263916, + "step": 11366 + }, + { + "epoch": 1.73431396484375e-05, + "step": 11366, + "training_step_time": 0.18930912017822266 + }, + { + "epoch": 1.734466552734375e-05, + "model_forward_time": 0.02454066276550293, + "step": 11367 + }, + { + "epoch": 1.734466552734375e-05, + "step": 11367, + "training_step_time": 0.1839885711669922 + }, + { + "epoch": 1.734619140625e-05, + "model_forward_time": 0.02461862564086914, + "step": 11368 + }, + { + "epoch": 1.734619140625e-05, + "step": 11368, + "training_step_time": 0.11968660354614258 + }, + { + "epoch": 1.734771728515625e-05, + "model_forward_time": 0.02444624900817871, + "step": 11369 + }, + { + "epoch": 1.734771728515625e-05, + "step": 11369, + "training_step_time": 0.13325166702270508 + }, + { + "epoch": 1.73492431640625e-05, + "grad_norm": 0.3132316470146179, + "learning_rate": 7.32136313680782e-05, + "loss": 0.0148, + "step": 11370 + }, + { + "epoch": 1.73492431640625e-05, + "model_forward_time": 0.025119543075561523, + "step": 11370 + }, + { + "epoch": 1.73492431640625e-05, + "step": 11370, + "training_step_time": 0.15434646606445312 + }, + { + "epoch": 1.735076904296875e-05, + "model_forward_time": 0.024571657180786133, + "step": 11371 + }, + { + "epoch": 1.735076904296875e-05, + "step": 11371, + "training_step_time": 0.2157139778137207 + }, + { + "epoch": 1.7352294921875e-05, + "model_forward_time": 0.024752140045166016, + "step": 11372 + }, + { + "epoch": 1.7352294921875e-05, + "step": 11372, + "training_step_time": 0.11632943153381348 + }, + { + "epoch": 1.735382080078125e-05, + "model_forward_time": 0.024642467498779297, + "step": 11373 + }, + { + "epoch": 1.735382080078125e-05, + "step": 11373, + "training_step_time": 0.10507845878601074 + }, + { + "epoch": 1.73553466796875e-05, + "model_forward_time": 0.02550339698791504, + "step": 11374 + }, + { + "epoch": 1.73553466796875e-05, + "step": 11374, + "training_step_time": 0.10614943504333496 + }, + { + "epoch": 1.735687255859375e-05, + "model_forward_time": 0.025216102600097656, + "step": 11375 + }, + { + "epoch": 1.735687255859375e-05, + "step": 11375, + "training_step_time": 0.10657405853271484 + }, + { + "epoch": 1.73583984375e-05, + "model_forward_time": 0.02566051483154297, + "step": 11376 + }, + { + "epoch": 1.73583984375e-05, + "step": 11376, + "training_step_time": 0.10704159736633301 + }, + { + "epoch": 1.735992431640625e-05, + "model_forward_time": 0.02764749526977539, + "step": 11377 + }, + { + "epoch": 1.735992431640625e-05, + "step": 11377, + "training_step_time": 0.10915279388427734 + }, + { + "epoch": 1.73614501953125e-05, + "model_forward_time": 0.025922536849975586, + "step": 11378 + }, + { + "epoch": 1.73614501953125e-05, + "step": 11378, + "training_step_time": 0.11047077178955078 + }, + { + "epoch": 1.736297607421875e-05, + "model_forward_time": 0.025161266326904297, + "step": 11379 + }, + { + "epoch": 1.736297607421875e-05, + "step": 11379, + "training_step_time": 0.10535717010498047 + }, + { + "epoch": 1.7364501953125e-05, + "grad_norm": 0.2911783754825592, + "learning_rate": 7.316480175599309e-05, + "loss": 0.0229, + "step": 11380 + }, + { + "epoch": 1.7364501953125e-05, + "model_forward_time": 0.024929523468017578, + "step": 11380 + }, + { + "epoch": 1.7364501953125e-05, + "step": 11380, + "training_step_time": 0.11052322387695312 + }, + { + "epoch": 1.736602783203125e-05, + "model_forward_time": 0.024608135223388672, + "step": 11381 + }, + { + "epoch": 1.736602783203125e-05, + "step": 11381, + "training_step_time": 0.10532093048095703 + }, + { + "epoch": 1.73675537109375e-05, + "model_forward_time": 0.02503228187561035, + "step": 11382 + }, + { + "epoch": 1.73675537109375e-05, + "step": 11382, + "training_step_time": 0.10649442672729492 + }, + { + "epoch": 1.736907958984375e-05, + "model_forward_time": 0.024957895278930664, + "step": 11383 + }, + { + "epoch": 1.736907958984375e-05, + "step": 11383, + "training_step_time": 0.14383554458618164 + }, + { + "epoch": 1.737060546875e-05, + "model_forward_time": 0.025818347930908203, + "step": 11384 + }, + { + "epoch": 1.737060546875e-05, + "step": 11384, + "training_step_time": 0.10758614540100098 + }, + { + "epoch": 1.737213134765625e-05, + "model_forward_time": 0.02512049674987793, + "step": 11385 + }, + { + "epoch": 1.737213134765625e-05, + "step": 11385, + "training_step_time": 0.11025166511535645 + }, + { + "epoch": 1.73736572265625e-05, + "model_forward_time": 0.025229930877685547, + "step": 11386 + }, + { + "epoch": 1.73736572265625e-05, + "step": 11386, + "training_step_time": 0.13555216789245605 + }, + { + "epoch": 1.737518310546875e-05, + "model_forward_time": 0.025229930877685547, + "step": 11387 + }, + { + "epoch": 1.737518310546875e-05, + "step": 11387, + "training_step_time": 0.1233208179473877 + }, + { + "epoch": 1.7376708984375e-05, + "model_forward_time": 0.024739503860473633, + "step": 11388 + }, + { + "epoch": 1.7376708984375e-05, + "step": 11388, + "training_step_time": 0.1121053695678711 + }, + { + "epoch": 1.737823486328125e-05, + "model_forward_time": 0.025371074676513672, + "step": 11389 + }, + { + "epoch": 1.737823486328125e-05, + "step": 11389, + "training_step_time": 0.12697649002075195 + }, + { + "epoch": 1.73797607421875e-05, + "grad_norm": 0.2643647789955139, + "learning_rate": 7.311594399648957e-05, + "loss": 0.0271, + "step": 11390 + }, + { + "epoch": 1.73797607421875e-05, + "model_forward_time": 0.025416851043701172, + "step": 11390 + }, + { + "epoch": 1.73797607421875e-05, + "step": 11390, + "training_step_time": 0.13001132011413574 + }, + { + "epoch": 1.738128662109375e-05, + "model_forward_time": 0.024435997009277344, + "step": 11391 + }, + { + "epoch": 1.738128662109375e-05, + "step": 11391, + "training_step_time": 0.13266682624816895 + }, + { + "epoch": 1.73828125e-05, + "model_forward_time": 0.024445295333862305, + "step": 11392 + }, + { + "epoch": 1.73828125e-05, + "step": 11392, + "training_step_time": 0.14111018180847168 + }, + { + "epoch": 1.738433837890625e-05, + "model_forward_time": 0.025170564651489258, + "step": 11393 + }, + { + "epoch": 1.738433837890625e-05, + "step": 11393, + "training_step_time": 0.1332406997680664 + }, + { + "epoch": 1.73858642578125e-05, + "model_forward_time": 0.02453136444091797, + "step": 11394 + }, + { + "epoch": 1.73858642578125e-05, + "step": 11394, + "training_step_time": 0.13200998306274414 + }, + { + "epoch": 1.738739013671875e-05, + "model_forward_time": 0.024686574935913086, + "step": 11395 + }, + { + "epoch": 1.738739013671875e-05, + "step": 11395, + "training_step_time": 0.13438081741333008 + }, + { + "epoch": 1.7388916015625e-05, + "model_forward_time": 0.02545952796936035, + "step": 11396 + }, + { + "epoch": 1.7388916015625e-05, + "step": 11396, + "training_step_time": 0.12328314781188965 + }, + { + "epoch": 1.739044189453125e-05, + "model_forward_time": 0.02444171905517578, + "step": 11397 + }, + { + "epoch": 1.739044189453125e-05, + "step": 11397, + "training_step_time": 0.1197969913482666 + }, + { + "epoch": 1.73919677734375e-05, + "model_forward_time": 0.024901866912841797, + "step": 11398 + }, + { + "epoch": 1.73919677734375e-05, + "step": 11398, + "training_step_time": 0.12039685249328613 + }, + { + "epoch": 1.739349365234375e-05, + "model_forward_time": 0.02532029151916504, + "step": 11399 + }, + { + "epoch": 1.739349365234375e-05, + "step": 11399, + "training_step_time": 0.11009931564331055 + }, + { + "epoch": 1.739501953125e-05, + "grad_norm": 0.2940637767314911, + "learning_rate": 7.30670581489344e-05, + "loss": 0.0312, + "step": 11400 + }, + { + "epoch": 1.739501953125e-05, + "model_forward_time": 0.025704145431518555, + "step": 11400 + }, + { + "epoch": 1.739501953125e-05, + "step": 11400, + "training_step_time": 0.17423462867736816 + }, + { + "epoch": 1.739654541015625e-05, + "model_forward_time": 0.024489164352416992, + "step": 11401 + }, + { + "epoch": 1.739654541015625e-05, + "step": 11401, + "training_step_time": 0.14111638069152832 + }, + { + "epoch": 1.73980712890625e-05, + "model_forward_time": 0.02502298355102539, + "step": 11402 + }, + { + "epoch": 1.73980712890625e-05, + "step": 11402, + "training_step_time": 0.10875272750854492 + }, + { + "epoch": 1.739959716796875e-05, + "model_forward_time": 0.02414989471435547, + "step": 11403 + }, + { + "epoch": 1.739959716796875e-05, + "step": 11403, + "training_step_time": 0.1631312370300293 + }, + { + "epoch": 1.7401123046875e-05, + "model_forward_time": 0.02464008331298828, + "step": 11404 + }, + { + "epoch": 1.7401123046875e-05, + "step": 11404, + "training_step_time": 0.15886354446411133 + }, + { + "epoch": 1.740264892578125e-05, + "model_forward_time": 0.025199413299560547, + "step": 11405 + }, + { + "epoch": 1.740264892578125e-05, + "step": 11405, + "training_step_time": 0.2125260829925537 + }, + { + "epoch": 1.74041748046875e-05, + "model_forward_time": 0.024363279342651367, + "step": 11406 + }, + { + "epoch": 1.74041748046875e-05, + "step": 11406, + "training_step_time": 0.1079411506652832 + }, + { + "epoch": 1.740570068359375e-05, + "model_forward_time": 0.024436473846435547, + "step": 11407 + }, + { + "epoch": 1.740570068359375e-05, + "step": 11407, + "training_step_time": 0.11234784126281738 + }, + { + "epoch": 1.74072265625e-05, + "model_forward_time": 0.02502608299255371, + "step": 11408 + }, + { + "epoch": 1.74072265625e-05, + "step": 11408, + "training_step_time": 0.11837577819824219 + }, + { + "epoch": 1.740875244140625e-05, + "model_forward_time": 0.025286197662353516, + "step": 11409 + }, + { + "epoch": 1.740875244140625e-05, + "step": 11409, + "training_step_time": 0.11015486717224121 + }, + { + "epoch": 1.74102783203125e-05, + "grad_norm": 1.000503659248352, + "learning_rate": 7.301814427272849e-05, + "loss": 0.0217, + "step": 11410 + }, + { + "epoch": 1.74102783203125e-05, + "model_forward_time": 0.025148630142211914, + "step": 11410 + }, + { + "epoch": 1.74102783203125e-05, + "step": 11410, + "training_step_time": 0.18817424774169922 + }, + { + "epoch": 1.741180419921875e-05, + "model_forward_time": 0.024494647979736328, + "step": 11411 + }, + { + "epoch": 1.741180419921875e-05, + "step": 11411, + "training_step_time": 0.10843729972839355 + }, + { + "epoch": 1.7413330078125e-05, + "model_forward_time": 0.024289369583129883, + "step": 11412 + }, + { + "epoch": 1.7413330078125e-05, + "step": 11412, + "training_step_time": 0.11371278762817383 + }, + { + "epoch": 1.741485595703125e-05, + "model_forward_time": 0.026688575744628906, + "step": 11413 + }, + { + "epoch": 1.741485595703125e-05, + "step": 11413, + "training_step_time": 0.13641595840454102 + }, + { + "epoch": 1.74163818359375e-05, + "model_forward_time": 0.024964094161987305, + "step": 11414 + }, + { + "epoch": 1.74163818359375e-05, + "step": 11414, + "training_step_time": 0.15340495109558105 + }, + { + "epoch": 1.741790771484375e-05, + "model_forward_time": 0.026422739028930664, + "step": 11415 + }, + { + "epoch": 1.741790771484375e-05, + "step": 11415, + "training_step_time": 0.10624217987060547 + }, + { + "epoch": 1.741943359375e-05, + "model_forward_time": 0.024811506271362305, + "step": 11416 + }, + { + "epoch": 1.741943359375e-05, + "step": 11416, + "training_step_time": 0.12379908561706543 + }, + { + "epoch": 1.742095947265625e-05, + "model_forward_time": 0.025359153747558594, + "step": 11417 + }, + { + "epoch": 1.742095947265625e-05, + "step": 11417, + "training_step_time": 0.1122884750366211 + }, + { + "epoch": 1.74224853515625e-05, + "model_forward_time": 0.025385618209838867, + "step": 11418 + }, + { + "epoch": 1.74224853515625e-05, + "step": 11418, + "training_step_time": 0.1075444221496582 + }, + { + "epoch": 1.742401123046875e-05, + "model_forward_time": 0.025077104568481445, + "step": 11419 + }, + { + "epoch": 1.742401123046875e-05, + "step": 11419, + "training_step_time": 0.1084439754486084 + }, + { + "epoch": 1.7425537109375e-05, + "grad_norm": 0.2606498599052429, + "learning_rate": 7.296920242730682e-05, + "loss": 0.0315, + "step": 11420 + }, + { + "epoch": 1.7425537109375e-05, + "model_forward_time": 0.02472066879272461, + "step": 11420 + }, + { + "epoch": 1.7425537109375e-05, + "step": 11420, + "training_step_time": 0.10718297958374023 + }, + { + "epoch": 1.742706298828125e-05, + "model_forward_time": 0.025311946868896484, + "step": 11421 + }, + { + "epoch": 1.742706298828125e-05, + "step": 11421, + "training_step_time": 0.10581040382385254 + }, + { + "epoch": 1.74285888671875e-05, + "model_forward_time": 0.025194168090820312, + "step": 11422 + }, + { + "epoch": 1.74285888671875e-05, + "step": 11422, + "training_step_time": 0.10973286628723145 + }, + { + "epoch": 1.743011474609375e-05, + "model_forward_time": 0.025191545486450195, + "step": 11423 + }, + { + "epoch": 1.743011474609375e-05, + "step": 11423, + "training_step_time": 0.11003780364990234 + }, + { + "epoch": 1.7431640625e-05, + "model_forward_time": 0.02537083625793457, + "step": 11424 + }, + { + "epoch": 1.7431640625e-05, + "step": 11424, + "training_step_time": 0.10600018501281738 + }, + { + "epoch": 1.743316650390625e-05, + "model_forward_time": 0.025473594665527344, + "step": 11425 + }, + { + "epoch": 1.743316650390625e-05, + "step": 11425, + "training_step_time": 0.10616827011108398 + }, + { + "epoch": 1.74346923828125e-05, + "model_forward_time": 0.024954557418823242, + "step": 11426 + }, + { + "epoch": 1.74346923828125e-05, + "step": 11426, + "training_step_time": 0.11044883728027344 + }, + { + "epoch": 1.743621826171875e-05, + "model_forward_time": 0.025084257125854492, + "step": 11427 + }, + { + "epoch": 1.743621826171875e-05, + "step": 11427, + "training_step_time": 0.11030840873718262 + }, + { + "epoch": 1.7437744140625e-05, + "model_forward_time": 0.02801227569580078, + "step": 11428 + }, + { + "epoch": 1.7437744140625e-05, + "step": 11428, + "training_step_time": 0.15077877044677734 + }, + { + "epoch": 1.743927001953125e-05, + "model_forward_time": 0.025511980056762695, + "step": 11429 + }, + { + "epoch": 1.743927001953125e-05, + "step": 11429, + "training_step_time": 0.10869455337524414 + }, + { + "epoch": 1.74407958984375e-05, + "grad_norm": 0.2633598744869232, + "learning_rate": 7.292023267213835e-05, + "loss": 0.0202, + "step": 11430 + }, + { + "epoch": 1.74407958984375e-05, + "model_forward_time": 0.024935245513916016, + "step": 11430 + }, + { + "epoch": 1.74407958984375e-05, + "step": 11430, + "training_step_time": 0.11026573181152344 + }, + { + "epoch": 1.744232177734375e-05, + "model_forward_time": 0.025504112243652344, + "step": 11431 + }, + { + "epoch": 1.744232177734375e-05, + "step": 11431, + "training_step_time": 0.12179231643676758 + }, + { + "epoch": 1.744384765625e-05, + "model_forward_time": 0.025516271591186523, + "step": 11432 + }, + { + "epoch": 1.744384765625e-05, + "step": 11432, + "training_step_time": 0.1312730312347412 + }, + { + "epoch": 1.744537353515625e-05, + "model_forward_time": 0.024819135665893555, + "step": 11433 + }, + { + "epoch": 1.744537353515625e-05, + "step": 11433, + "training_step_time": 0.11442136764526367 + }, + { + "epoch": 1.74468994140625e-05, + "model_forward_time": 0.02542257308959961, + "step": 11434 + }, + { + "epoch": 1.74468994140625e-05, + "step": 11434, + "training_step_time": 0.10956192016601562 + }, + { + "epoch": 1.744842529296875e-05, + "model_forward_time": 0.025786876678466797, + "step": 11435 + }, + { + "epoch": 1.744842529296875e-05, + "step": 11435, + "training_step_time": 0.10843706130981445 + }, + { + "epoch": 1.7449951171875e-05, + "model_forward_time": 0.025321483612060547, + "step": 11436 + }, + { + "epoch": 1.7449951171875e-05, + "step": 11436, + "training_step_time": 0.1074676513671875 + }, + { + "epoch": 1.745147705078125e-05, + "model_forward_time": 0.02520895004272461, + "step": 11437 + }, + { + "epoch": 1.745147705078125e-05, + "step": 11437, + "training_step_time": 0.10763692855834961 + }, + { + "epoch": 1.74530029296875e-05, + "model_forward_time": 0.02523660659790039, + "step": 11438 + }, + { + "epoch": 1.74530029296875e-05, + "step": 11438, + "training_step_time": 0.11479043960571289 + }, + { + "epoch": 1.745452880859375e-05, + "model_forward_time": 0.02530074119567871, + "step": 11439 + }, + { + "epoch": 1.745452880859375e-05, + "step": 11439, + "training_step_time": 0.10747933387756348 + }, + { + "epoch": 1.74560546875e-05, + "grad_norm": 0.32708433270454407, + "learning_rate": 7.287123506672595e-05, + "loss": 0.0186, + "step": 11440 + }, + { + "epoch": 1.74560546875e-05, + "model_forward_time": 0.024248838424682617, + "step": 11440 + }, + { + "epoch": 1.74560546875e-05, + "step": 11440, + "training_step_time": 0.10738229751586914 + }, + { + "epoch": 1.745758056640625e-05, + "model_forward_time": 0.024587154388427734, + "step": 11441 + }, + { + "epoch": 1.745758056640625e-05, + "step": 11441, + "training_step_time": 0.10714006423950195 + }, + { + "epoch": 1.74591064453125e-05, + "model_forward_time": 0.025261878967285156, + "step": 11442 + }, + { + "epoch": 1.74591064453125e-05, + "step": 11442, + "training_step_time": 0.10979032516479492 + }, + { + "epoch": 1.746063232421875e-05, + "model_forward_time": 0.025285005569458008, + "step": 11443 + }, + { + "epoch": 1.746063232421875e-05, + "step": 11443, + "training_step_time": 0.10970783233642578 + }, + { + "epoch": 1.7462158203125e-05, + "model_forward_time": 0.025391340255737305, + "step": 11444 + }, + { + "epoch": 1.7462158203125e-05, + "step": 11444, + "training_step_time": 0.10991191864013672 + }, + { + "epoch": 1.746368408203125e-05, + "model_forward_time": 0.025222301483154297, + "step": 11445 + }, + { + "epoch": 1.746368408203125e-05, + "step": 11445, + "training_step_time": 0.10866189002990723 + }, + { + "epoch": 1.74652099609375e-05, + "model_forward_time": 0.024389028549194336, + "step": 11446 + }, + { + "epoch": 1.74652099609375e-05, + "step": 11446, + "training_step_time": 0.15033507347106934 + }, + { + "epoch": 1.746673583984375e-05, + "model_forward_time": 0.024841785430908203, + "step": 11447 + }, + { + "epoch": 1.746673583984375e-05, + "step": 11447, + "training_step_time": 0.17988109588623047 + }, + { + "epoch": 1.746826171875e-05, + "model_forward_time": 0.02453923225402832, + "step": 11448 + }, + { + "epoch": 1.746826171875e-05, + "step": 11448, + "training_step_time": 0.10831880569458008 + }, + { + "epoch": 1.746978759765625e-05, + "model_forward_time": 0.024659395217895508, + "step": 11449 + }, + { + "epoch": 1.746978759765625e-05, + "step": 11449, + "training_step_time": 0.13698863983154297 + }, + { + "epoch": 1.74713134765625e-05, + "grad_norm": 0.42815864086151123, + "learning_rate": 7.282220967060633e-05, + "loss": 0.0197, + "step": 11450 + }, + { + "epoch": 1.74713134765625e-05, + "model_forward_time": 0.025053739547729492, + "step": 11450 + }, + { + "epoch": 1.74713134765625e-05, + "step": 11450, + "training_step_time": 0.10758781433105469 + }, + { + "epoch": 1.747283935546875e-05, + "model_forward_time": 0.02604508399963379, + "step": 11451 + }, + { + "epoch": 1.747283935546875e-05, + "step": 11451, + "training_step_time": 0.144456148147583 + }, + { + "epoch": 1.7474365234375e-05, + "model_forward_time": 0.02534937858581543, + "step": 11452 + }, + { + "epoch": 1.7474365234375e-05, + "step": 11452, + "training_step_time": 0.1423196792602539 + }, + { + "epoch": 1.747589111328125e-05, + "model_forward_time": 0.02475881576538086, + "step": 11453 + }, + { + "epoch": 1.747589111328125e-05, + "step": 11453, + "training_step_time": 0.1102907657623291 + }, + { + "epoch": 1.74774169921875e-05, + "model_forward_time": 0.024916648864746094, + "step": 11454 + }, + { + "epoch": 1.74774169921875e-05, + "step": 11454, + "training_step_time": 0.11167359352111816 + }, + { + "epoch": 1.747894287109375e-05, + "model_forward_time": 0.024913311004638672, + "step": 11455 + }, + { + "epoch": 1.747894287109375e-05, + "step": 11455, + "training_step_time": 0.11174917221069336 + }, + { + "epoch": 1.748046875e-05, + "model_forward_time": 0.025392532348632812, + "step": 11456 + }, + { + "epoch": 1.748046875e-05, + "step": 11456, + "training_step_time": 0.1109919548034668 + }, + { + "epoch": 1.748199462890625e-05, + "model_forward_time": 0.025796175003051758, + "step": 11457 + }, + { + "epoch": 1.748199462890625e-05, + "step": 11457, + "training_step_time": 0.19932317733764648 + }, + { + "epoch": 1.74835205078125e-05, + "model_forward_time": 0.024463891983032227, + "step": 11458 + }, + { + "epoch": 1.74835205078125e-05, + "step": 11458, + "training_step_time": 0.10976171493530273 + }, + { + "epoch": 1.748504638671875e-05, + "model_forward_time": 0.024796485900878906, + "step": 11459 + }, + { + "epoch": 1.748504638671875e-05, + "step": 11459, + "training_step_time": 0.10998058319091797 + }, + { + "epoch": 1.7486572265625e-05, + "grad_norm": 0.35386383533477783, + "learning_rate": 7.277315654334997e-05, + "loss": 0.0264, + "step": 11460 + }, + { + "epoch": 1.7486572265625e-05, + "model_forward_time": 0.025449037551879883, + "step": 11460 + }, + { + "epoch": 1.7486572265625e-05, + "step": 11460, + "training_step_time": 0.11427974700927734 + }, + { + "epoch": 1.748809814453125e-05, + "model_forward_time": 0.025151729583740234, + "step": 11461 + }, + { + "epoch": 1.748809814453125e-05, + "step": 11461, + "training_step_time": 0.15288972854614258 + }, + { + "epoch": 1.74896240234375e-05, + "model_forward_time": 0.0245361328125, + "step": 11462 + }, + { + "epoch": 1.74896240234375e-05, + "step": 11462, + "training_step_time": 0.22175884246826172 + }, + { + "epoch": 1.749114990234375e-05, + "model_forward_time": 0.02476048469543457, + "step": 11463 + }, + { + "epoch": 1.749114990234375e-05, + "step": 11463, + "training_step_time": 0.12286376953125 + }, + { + "epoch": 1.749267578125e-05, + "model_forward_time": 0.02408146858215332, + "step": 11464 + }, + { + "epoch": 1.749267578125e-05, + "step": 11464, + "training_step_time": 0.10543489456176758 + }, + { + "epoch": 1.749420166015625e-05, + "model_forward_time": 0.025523900985717773, + "step": 11465 + }, + { + "epoch": 1.749420166015625e-05, + "step": 11465, + "training_step_time": 0.10750365257263184 + }, + { + "epoch": 1.74957275390625e-05, + "model_forward_time": 0.02597784996032715, + "step": 11466 + }, + { + "epoch": 1.74957275390625e-05, + "step": 11466, + "training_step_time": 0.11063313484191895 + }, + { + "epoch": 1.749725341796875e-05, + "model_forward_time": 0.026036739349365234, + "step": 11467 + }, + { + "epoch": 1.749725341796875e-05, + "step": 11467, + "training_step_time": 0.1090087890625 + }, + { + "epoch": 1.7498779296875e-05, + "model_forward_time": 0.0258944034576416, + "step": 11468 + }, + { + "epoch": 1.7498779296875e-05, + "step": 11468, + "training_step_time": 0.10801196098327637 + }, + { + "epoch": 1.750030517578125e-05, + "model_forward_time": 0.025266170501708984, + "step": 11469 + }, + { + "epoch": 1.750030517578125e-05, + "step": 11469, + "training_step_time": 0.10560250282287598 + }, + { + "epoch": 1.75018310546875e-05, + "grad_norm": 0.4047854244709015, + "learning_rate": 7.272407574456103e-05, + "loss": 0.0161, + "step": 11470 + }, + { + "epoch": 1.75018310546875e-05, + "model_forward_time": 0.02540898323059082, + "step": 11470 + }, + { + "epoch": 1.75018310546875e-05, + "step": 11470, + "training_step_time": 0.11034297943115234 + }, + { + "epoch": 1.750335693359375e-05, + "model_forward_time": 0.025775671005249023, + "step": 11471 + }, + { + "epoch": 1.750335693359375e-05, + "step": 11471, + "training_step_time": 0.11063671112060547 + }, + { + "epoch": 1.75048828125e-05, + "model_forward_time": 0.025475025177001953, + "step": 11472 + }, + { + "epoch": 1.75048828125e-05, + "step": 11472, + "training_step_time": 0.1062626838684082 + }, + { + "epoch": 1.750640869140625e-05, + "model_forward_time": 0.025372982025146484, + "step": 11473 + }, + { + "epoch": 1.750640869140625e-05, + "step": 11473, + "training_step_time": 0.10675668716430664 + }, + { + "epoch": 1.75079345703125e-05, + "model_forward_time": 0.02780771255493164, + "step": 11474 + }, + { + "epoch": 1.75079345703125e-05, + "step": 11474, + "training_step_time": 0.1979062557220459 + }, + { + "epoch": 1.750946044921875e-05, + "model_forward_time": 0.024963855743408203, + "step": 11475 + }, + { + "epoch": 1.750946044921875e-05, + "step": 11475, + "training_step_time": 0.10824942588806152 + }, + { + "epoch": 1.7510986328125e-05, + "model_forward_time": 0.02449655532836914, + "step": 11476 + }, + { + "epoch": 1.7510986328125e-05, + "step": 11476, + "training_step_time": 0.11250662803649902 + }, + { + "epoch": 1.751251220703125e-05, + "model_forward_time": 0.024783849716186523, + "step": 11477 + }, + { + "epoch": 1.751251220703125e-05, + "step": 11477, + "training_step_time": 0.12256050109863281 + }, + { + "epoch": 1.75140380859375e-05, + "model_forward_time": 0.025546550750732422, + "step": 11478 + }, + { + "epoch": 1.75140380859375e-05, + "step": 11478, + "training_step_time": 0.12778162956237793 + }, + { + "epoch": 1.751556396484375e-05, + "model_forward_time": 0.024765968322753906, + "step": 11479 + }, + { + "epoch": 1.751556396484375e-05, + "step": 11479, + "training_step_time": 0.1285717487335205 + }, + { + "epoch": 1.751708984375e-05, + "grad_norm": 0.2882809340953827, + "learning_rate": 7.267496733387731e-05, + "loss": 0.0173, + "step": 11480 + }, + { + "epoch": 1.751708984375e-05, + "model_forward_time": 0.02497696876525879, + "step": 11480 + }, + { + "epoch": 1.751708984375e-05, + "step": 11480, + "training_step_time": 0.11112093925476074 + }, + { + "epoch": 1.751861572265625e-05, + "model_forward_time": 0.025364160537719727, + "step": 11481 + }, + { + "epoch": 1.751861572265625e-05, + "step": 11481, + "training_step_time": 0.1420884132385254 + }, + { + "epoch": 1.75201416015625e-05, + "model_forward_time": 0.025162696838378906, + "step": 11482 + }, + { + "epoch": 1.75201416015625e-05, + "step": 11482, + "training_step_time": 0.11285138130187988 + }, + { + "epoch": 1.752166748046875e-05, + "model_forward_time": 0.024341106414794922, + "step": 11483 + }, + { + "epoch": 1.752166748046875e-05, + "step": 11483, + "training_step_time": 0.10926198959350586 + }, + { + "epoch": 1.7523193359375e-05, + "model_forward_time": 0.024616241455078125, + "step": 11484 + }, + { + "epoch": 1.7523193359375e-05, + "step": 11484, + "training_step_time": 0.10896539688110352 + }, + { + "epoch": 1.752471923828125e-05, + "model_forward_time": 0.02519083023071289, + "step": 11485 + }, + { + "epoch": 1.752471923828125e-05, + "step": 11485, + "training_step_time": 0.10934829711914062 + }, + { + "epoch": 1.75262451171875e-05, + "model_forward_time": 0.025469064712524414, + "step": 11486 + }, + { + "epoch": 1.75262451171875e-05, + "step": 11486, + "training_step_time": 0.10724997520446777 + }, + { + "epoch": 1.752777099609375e-05, + "model_forward_time": 0.025366544723510742, + "step": 11487 + }, + { + "epoch": 1.752777099609375e-05, + "step": 11487, + "training_step_time": 0.10768651962280273 + }, + { + "epoch": 1.7529296875e-05, + "model_forward_time": 0.025552749633789062, + "step": 11488 + }, + { + "epoch": 1.7529296875e-05, + "step": 11488, + "training_step_time": 0.10792922973632812 + }, + { + "epoch": 1.753082275390625e-05, + "model_forward_time": 0.025469541549682617, + "step": 11489 + }, + { + "epoch": 1.753082275390625e-05, + "step": 11489, + "training_step_time": 0.11253786087036133 + }, + { + "epoch": 1.75323486328125e-05, + "grad_norm": 0.42318224906921387, + "learning_rate": 7.262583137097018e-05, + "loss": 0.0236, + "step": 11490 + }, + { + "epoch": 1.75323486328125e-05, + "model_forward_time": 0.027422666549682617, + "step": 11490 + }, + { + "epoch": 1.75323486328125e-05, + "step": 11490, + "training_step_time": 0.1113739013671875 + }, + { + "epoch": 1.753387451171875e-05, + "model_forward_time": 0.02533245086669922, + "step": 11491 + }, + { + "epoch": 1.753387451171875e-05, + "step": 11491, + "training_step_time": 0.10569000244140625 + }, + { + "epoch": 1.7535400390625e-05, + "model_forward_time": 0.02447223663330078, + "step": 11492 + }, + { + "epoch": 1.7535400390625e-05, + "step": 11492, + "training_step_time": 0.1419835090637207 + }, + { + "epoch": 1.753692626953125e-05, + "model_forward_time": 0.02485060691833496, + "step": 11493 + }, + { + "epoch": 1.753692626953125e-05, + "step": 11493, + "training_step_time": 0.12059259414672852 + }, + { + "epoch": 1.75384521484375e-05, + "model_forward_time": 0.02569127082824707, + "step": 11494 + }, + { + "epoch": 1.75384521484375e-05, + "step": 11494, + "training_step_time": 0.11272215843200684 + }, + { + "epoch": 1.753997802734375e-05, + "model_forward_time": 0.02535700798034668, + "step": 11495 + }, + { + "epoch": 1.753997802734375e-05, + "step": 11495, + "training_step_time": 0.1081850528717041 + }, + { + "epoch": 1.754150390625e-05, + "model_forward_time": 0.025861263275146484, + "step": 11496 + }, + { + "epoch": 1.754150390625e-05, + "step": 11496, + "training_step_time": 0.11785888671875 + }, + { + "epoch": 1.754302978515625e-05, + "model_forward_time": 0.026509523391723633, + "step": 11497 + }, + { + "epoch": 1.754302978515625e-05, + "step": 11497, + "training_step_time": 0.1850283145904541 + }, + { + "epoch": 1.75445556640625e-05, + "model_forward_time": 0.024777889251708984, + "step": 11498 + }, + { + "epoch": 1.75445556640625e-05, + "step": 11498, + "training_step_time": 0.13240861892700195 + }, + { + "epoch": 1.754608154296875e-05, + "model_forward_time": 0.02462911605834961, + "step": 11499 + }, + { + "epoch": 1.754608154296875e-05, + "step": 11499, + "training_step_time": 0.10773706436157227 + }, + { + "epoch": 1.7547607421875e-05, + "grad_norm": 0.4000401496887207, + "learning_rate": 7.257666791554448e-05, + "loss": 0.0203, + "step": 11500 + }, + { + "epoch": 1.7547607421875e-05, + "model_forward_time": 0.02429986000061035, + "step": 11500 + }, + { + "epoch": 1.7547607421875e-05, + "step": 11500, + "training_step_time": 0.11380577087402344 + }, + { + "epoch": 1.754913330078125e-05, + "model_forward_time": 0.02381587028503418, + "step": 11501 + }, + { + "epoch": 1.754913330078125e-05, + "step": 11501, + "training_step_time": 0.11920881271362305 + }, + { + "epoch": 1.75506591796875e-05, + "model_forward_time": 0.02417445182800293, + "step": 11502 + }, + { + "epoch": 1.75506591796875e-05, + "step": 11502, + "training_step_time": 0.10823202133178711 + }, + { + "epoch": 1.755218505859375e-05, + "model_forward_time": 0.025033235549926758, + "step": 11503 + }, + { + "epoch": 1.755218505859375e-05, + "step": 11503, + "training_step_time": 0.19985365867614746 + }, + { + "epoch": 1.75537109375e-05, + "model_forward_time": 0.02471780776977539, + "step": 11504 + }, + { + "epoch": 1.75537109375e-05, + "step": 11504, + "training_step_time": 0.1525270938873291 + }, + { + "epoch": 1.755523681640625e-05, + "model_forward_time": 0.02434706687927246, + "step": 11505 + }, + { + "epoch": 1.755523681640625e-05, + "step": 11505, + "training_step_time": 0.18303298950195312 + }, + { + "epoch": 1.75567626953125e-05, + "model_forward_time": 0.02452993392944336, + "step": 11506 + }, + { + "epoch": 1.75567626953125e-05, + "step": 11506, + "training_step_time": 0.19329404830932617 + }, + { + "epoch": 1.755828857421875e-05, + "model_forward_time": 0.0242459774017334, + "step": 11507 + }, + { + "epoch": 1.755828857421875e-05, + "step": 11507, + "training_step_time": 0.17239832878112793 + }, + { + "epoch": 1.7559814453125e-05, + "model_forward_time": 0.024364471435546875, + "step": 11508 + }, + { + "epoch": 1.7559814453125e-05, + "step": 11508, + "training_step_time": 0.17524003982543945 + }, + { + "epoch": 1.756134033203125e-05, + "model_forward_time": 0.024054288864135742, + "step": 11509 + }, + { + "epoch": 1.756134033203125e-05, + "step": 11509, + "training_step_time": 0.10601544380187988 + }, + { + "epoch": 1.75628662109375e-05, + "grad_norm": 0.4016773998737335, + "learning_rate": 7.25274770273384e-05, + "loss": 0.016, + "step": 11510 + }, + { + "epoch": 1.75628662109375e-05, + "model_forward_time": 0.02465224266052246, + "step": 11510 + }, + { + "epoch": 1.75628662109375e-05, + "step": 11510, + "training_step_time": 0.10484647750854492 + }, + { + "epoch": 1.756439208984375e-05, + "model_forward_time": 0.025226354598999023, + "step": 11511 + }, + { + "epoch": 1.756439208984375e-05, + "step": 11511, + "training_step_time": 0.10895276069641113 + }, + { + "epoch": 1.756591796875e-05, + "model_forward_time": 0.025446653366088867, + "step": 11512 + }, + { + "epoch": 1.756591796875e-05, + "step": 11512, + "training_step_time": 0.10906291007995605 + }, + { + "epoch": 1.756744384765625e-05, + "model_forward_time": 0.025188207626342773, + "step": 11513 + }, + { + "epoch": 1.756744384765625e-05, + "step": 11513, + "training_step_time": 0.11518192291259766 + }, + { + "epoch": 1.75689697265625e-05, + "model_forward_time": 0.02516651153564453, + "step": 11514 + }, + { + "epoch": 1.75689697265625e-05, + "step": 11514, + "training_step_time": 0.11129117012023926 + }, + { + "epoch": 1.757049560546875e-05, + "model_forward_time": 0.025735855102539062, + "step": 11515 + }, + { + "epoch": 1.757049560546875e-05, + "step": 11515, + "training_step_time": 0.17116594314575195 + }, + { + "epoch": 1.7572021484375e-05, + "model_forward_time": 0.02439427375793457, + "step": 11516 + }, + { + "epoch": 1.7572021484375e-05, + "step": 11516, + "training_step_time": 0.18446779251098633 + }, + { + "epoch": 1.757354736328125e-05, + "model_forward_time": 0.025471210479736328, + "step": 11517 + }, + { + "epoch": 1.757354736328125e-05, + "step": 11517, + "training_step_time": 0.16740012168884277 + }, + { + "epoch": 1.75750732421875e-05, + "model_forward_time": 0.024227380752563477, + "step": 11518 + }, + { + "epoch": 1.75750732421875e-05, + "step": 11518, + "training_step_time": 0.20984816551208496 + }, + { + "epoch": 1.757659912109375e-05, + "model_forward_time": 0.02423405647277832, + "step": 11519 + }, + { + "epoch": 1.757659912109375e-05, + "step": 11519, + "training_step_time": 0.1461503505706787 + }, + { + "epoch": 1.7578125e-05, + "grad_norm": 0.3682970702648163, + "learning_rate": 7.247825876612353e-05, + "loss": 0.0173, + "step": 11520 + }, + { + "epoch": 1.7578125e-05, + "model_forward_time": 0.024031877517700195, + "step": 11520 + }, + { + "epoch": 1.7578125e-05, + "step": 11520, + "training_step_time": 0.20183467864990234 + }, + { + "epoch": 1.757965087890625e-05, + "model_forward_time": 0.024365901947021484, + "step": 11521 + }, + { + "epoch": 1.757965087890625e-05, + "step": 11521, + "training_step_time": 0.14229655265808105 + }, + { + "epoch": 1.75811767578125e-05, + "model_forward_time": 0.024404525756835938, + "step": 11522 + }, + { + "epoch": 1.75811767578125e-05, + "step": 11522, + "training_step_time": 0.13160324096679688 + }, + { + "epoch": 1.758270263671875e-05, + "model_forward_time": 0.024760007858276367, + "step": 11523 + }, + { + "epoch": 1.758270263671875e-05, + "step": 11523, + "training_step_time": 0.11772799491882324 + }, + { + "epoch": 1.7584228515625e-05, + "model_forward_time": 0.025128841400146484, + "step": 11524 + }, + { + "epoch": 1.7584228515625e-05, + "step": 11524, + "training_step_time": 0.11426830291748047 + }, + { + "epoch": 1.758575439453125e-05, + "model_forward_time": 0.025171518325805664, + "step": 11525 + }, + { + "epoch": 1.758575439453125e-05, + "step": 11525, + "training_step_time": 0.11496949195861816 + }, + { + "epoch": 1.75872802734375e-05, + "model_forward_time": 0.0255281925201416, + "step": 11526 + }, + { + "epoch": 1.75872802734375e-05, + "step": 11526, + "training_step_time": 0.11439156532287598 + }, + { + "epoch": 1.758880615234375e-05, + "model_forward_time": 0.025102853775024414, + "step": 11527 + }, + { + "epoch": 1.758880615234375e-05, + "step": 11527, + "training_step_time": 0.10881400108337402 + }, + { + "epoch": 1.759033203125e-05, + "model_forward_time": 0.02544236183166504, + "step": 11528 + }, + { + "epoch": 1.759033203125e-05, + "step": 11528, + "training_step_time": 0.10804486274719238 + }, + { + "epoch": 1.759185791015625e-05, + "model_forward_time": 0.02587151527404785, + "step": 11529 + }, + { + "epoch": 1.759185791015625e-05, + "step": 11529, + "training_step_time": 0.10777497291564941 + }, + { + "epoch": 1.75933837890625e-05, + "grad_norm": 0.2134154587984085, + "learning_rate": 7.242901319170471e-05, + "loss": 0.0148, + "step": 11530 + }, + { + "epoch": 1.75933837890625e-05, + "model_forward_time": 0.027271270751953125, + "step": 11530 + }, + { + "epoch": 1.75933837890625e-05, + "step": 11530, + "training_step_time": 0.11302709579467773 + }, + { + "epoch": 1.759490966796875e-05, + "model_forward_time": 0.024477481842041016, + "step": 11531 + }, + { + "epoch": 1.759490966796875e-05, + "step": 11531, + "training_step_time": 0.1063687801361084 + }, + { + "epoch": 1.7596435546875e-05, + "model_forward_time": 0.025391101837158203, + "step": 11532 + }, + { + "epoch": 1.7596435546875e-05, + "step": 11532, + "training_step_time": 0.10557341575622559 + }, + { + "epoch": 1.759796142578125e-05, + "model_forward_time": 0.02544116973876953, + "step": 11533 + }, + { + "epoch": 1.759796142578125e-05, + "step": 11533, + "training_step_time": 0.10719895362854004 + }, + { + "epoch": 1.75994873046875e-05, + "model_forward_time": 0.025246858596801758, + "step": 11534 + }, + { + "epoch": 1.75994873046875e-05, + "step": 11534, + "training_step_time": 0.10907173156738281 + }, + { + "epoch": 1.760101318359375e-05, + "model_forward_time": 0.025249958038330078, + "step": 11535 + }, + { + "epoch": 1.760101318359375e-05, + "step": 11535, + "training_step_time": 0.1111907958984375 + }, + { + "epoch": 1.76025390625e-05, + "model_forward_time": 0.02581787109375, + "step": 11536 + }, + { + "epoch": 1.76025390625e-05, + "step": 11536, + "training_step_time": 0.1111152172088623 + }, + { + "epoch": 1.760406494140625e-05, + "model_forward_time": 0.02557063102722168, + "step": 11537 + }, + { + "epoch": 1.760406494140625e-05, + "step": 11537, + "training_step_time": 0.20666003227233887 + }, + { + "epoch": 1.76055908203125e-05, + "model_forward_time": 0.025022268295288086, + "step": 11538 + }, + { + "epoch": 1.76055908203125e-05, + "step": 11538, + "training_step_time": 0.13742303848266602 + }, + { + "epoch": 1.760711669921875e-05, + "model_forward_time": 0.024531841278076172, + "step": 11539 + }, + { + "epoch": 1.760711669921875e-05, + "step": 11539, + "training_step_time": 0.17015647888183594 + }, + { + "epoch": 1.7608642578125e-05, + "grad_norm": 0.21647049486637115, + "learning_rate": 7.237974036391992e-05, + "loss": 0.0211, + "step": 11540 + }, + { + "epoch": 1.7608642578125e-05, + "model_forward_time": 0.024743318557739258, + "step": 11540 + }, + { + "epoch": 1.7608642578125e-05, + "step": 11540, + "training_step_time": 0.1389153003692627 + }, + { + "epoch": 1.761016845703125e-05, + "model_forward_time": 0.02474522590637207, + "step": 11541 + }, + { + "epoch": 1.761016845703125e-05, + "step": 11541, + "training_step_time": 0.10873770713806152 + }, + { + "epoch": 1.76116943359375e-05, + "model_forward_time": 0.025516510009765625, + "step": 11542 + }, + { + "epoch": 1.76116943359375e-05, + "step": 11542, + "training_step_time": 0.12326478958129883 + }, + { + "epoch": 1.761322021484375e-05, + "model_forward_time": 0.025625228881835938, + "step": 11543 + }, + { + "epoch": 1.761322021484375e-05, + "step": 11543, + "training_step_time": 0.1107032299041748 + }, + { + "epoch": 1.761474609375e-05, + "model_forward_time": 0.0255892276763916, + "step": 11544 + }, + { + "epoch": 1.761474609375e-05, + "step": 11544, + "training_step_time": 0.10715460777282715 + }, + { + "epoch": 1.761627197265625e-05, + "model_forward_time": 0.028168439865112305, + "step": 11545 + }, + { + "epoch": 1.761627197265625e-05, + "step": 11545, + "training_step_time": 0.20194244384765625 + }, + { + "epoch": 1.76177978515625e-05, + "model_forward_time": 0.02452707290649414, + "step": 11546 + }, + { + "epoch": 1.76177978515625e-05, + "step": 11546, + "training_step_time": 0.10869026184082031 + }, + { + "epoch": 1.761932373046875e-05, + "model_forward_time": 0.024935483932495117, + "step": 11547 + }, + { + "epoch": 1.761932373046875e-05, + "step": 11547, + "training_step_time": 0.10661029815673828 + }, + { + "epoch": 1.7620849609375e-05, + "model_forward_time": 0.025375843048095703, + "step": 11548 + }, + { + "epoch": 1.7620849609375e-05, + "step": 11548, + "training_step_time": 0.13761067390441895 + }, + { + "epoch": 1.762237548828125e-05, + "model_forward_time": 0.02534198760986328, + "step": 11549 + }, + { + "epoch": 1.762237548828125e-05, + "step": 11549, + "training_step_time": 0.16053152084350586 + }, + { + "epoch": 1.76239013671875e-05, + "grad_norm": 0.6431434750556946, + "learning_rate": 7.233044034264034e-05, + "loss": 0.0234, + "step": 11550 + }, + { + "epoch": 1.76239013671875e-05, + "model_forward_time": 0.02430868148803711, + "step": 11550 + }, + { + "epoch": 1.76239013671875e-05, + "step": 11550, + "training_step_time": 0.22101736068725586 + }, + { + "epoch": 1.762542724609375e-05, + "model_forward_time": 0.025033235549926758, + "step": 11551 + }, + { + "epoch": 1.762542724609375e-05, + "step": 11551, + "training_step_time": 0.11492657661437988 + }, + { + "epoch": 1.7626953125e-05, + "model_forward_time": 0.02476954460144043, + "step": 11552 + }, + { + "epoch": 1.7626953125e-05, + "step": 11552, + "training_step_time": 0.10618138313293457 + }, + { + "epoch": 1.762847900390625e-05, + "model_forward_time": 0.02547764778137207, + "step": 11553 + }, + { + "epoch": 1.762847900390625e-05, + "step": 11553, + "training_step_time": 0.10851502418518066 + }, + { + "epoch": 1.76300048828125e-05, + "model_forward_time": 0.025367259979248047, + "step": 11554 + }, + { + "epoch": 1.76300048828125e-05, + "step": 11554, + "training_step_time": 0.10888099670410156 + }, + { + "epoch": 1.763153076171875e-05, + "model_forward_time": 0.025510549545288086, + "step": 11555 + }, + { + "epoch": 1.763153076171875e-05, + "step": 11555, + "training_step_time": 0.11215877532958984 + }, + { + "epoch": 1.7633056640625e-05, + "model_forward_time": 0.02527785301208496, + "step": 11556 + }, + { + "epoch": 1.7633056640625e-05, + "step": 11556, + "training_step_time": 0.10798525810241699 + }, + { + "epoch": 1.763458251953125e-05, + "model_forward_time": 0.02516770362854004, + "step": 11557 + }, + { + "epoch": 1.763458251953125e-05, + "step": 11557, + "training_step_time": 0.11548089981079102 + }, + { + "epoch": 1.76361083984375e-05, + "model_forward_time": 0.024836301803588867, + "step": 11558 + }, + { + "epoch": 1.76361083984375e-05, + "step": 11558, + "training_step_time": 0.10821533203125 + }, + { + "epoch": 1.763763427734375e-05, + "model_forward_time": 0.025320053100585938, + "step": 11559 + }, + { + "epoch": 1.763763427734375e-05, + "step": 11559, + "training_step_time": 0.10892486572265625 + }, + { + "epoch": 1.763916015625e-05, + "grad_norm": 0.35514453053474426, + "learning_rate": 7.22811131877701e-05, + "loss": 0.0196, + "step": 11560 + }, + { + "epoch": 1.763916015625e-05, + "model_forward_time": 0.024505138397216797, + "step": 11560 + }, + { + "epoch": 1.763916015625e-05, + "step": 11560, + "training_step_time": 0.10990452766418457 + }, + { + "epoch": 1.764068603515625e-05, + "model_forward_time": 0.025338172912597656, + "step": 11561 + }, + { + "epoch": 1.764068603515625e-05, + "step": 11561, + "training_step_time": 0.10818624496459961 + }, + { + "epoch": 1.76422119140625e-05, + "model_forward_time": 0.025676727294921875, + "step": 11562 + }, + { + "epoch": 1.76422119140625e-05, + "step": 11562, + "training_step_time": 0.10762429237365723 + }, + { + "epoch": 1.764373779296875e-05, + "model_forward_time": 0.024802684783935547, + "step": 11563 + }, + { + "epoch": 1.764373779296875e-05, + "step": 11563, + "training_step_time": 0.1070241928100586 + }, + { + "epoch": 1.7645263671875e-05, + "model_forward_time": 0.025272607803344727, + "step": 11564 + }, + { + "epoch": 1.7645263671875e-05, + "step": 11564, + "training_step_time": 0.10660171508789062 + }, + { + "epoch": 1.764678955078125e-05, + "model_forward_time": 0.025356531143188477, + "step": 11565 + }, + { + "epoch": 1.764678955078125e-05, + "step": 11565, + "training_step_time": 0.17947673797607422 + }, + { + "epoch": 1.76483154296875e-05, + "model_forward_time": 0.02485060691833496, + "step": 11566 + }, + { + "epoch": 1.76483154296875e-05, + "step": 11566, + "training_step_time": 0.11304736137390137 + }, + { + "epoch": 1.764984130859375e-05, + "model_forward_time": 0.024890899658203125, + "step": 11567 + }, + { + "epoch": 1.764984130859375e-05, + "step": 11567, + "training_step_time": 0.11322975158691406 + }, + { + "epoch": 1.76513671875e-05, + "model_forward_time": 0.02559828758239746, + "step": 11568 + }, + { + "epoch": 1.76513671875e-05, + "step": 11568, + "training_step_time": 0.11980247497558594 + }, + { + "epoch": 1.765289306640625e-05, + "model_forward_time": 0.02533102035522461, + "step": 11569 + }, + { + "epoch": 1.765289306640625e-05, + "step": 11569, + "training_step_time": 0.12816262245178223 + }, + { + "epoch": 1.76544189453125e-05, + "grad_norm": 0.23926854133605957, + "learning_rate": 7.223175895924638e-05, + "loss": 0.0162, + "step": 11570 + }, + { + "epoch": 1.76544189453125e-05, + "model_forward_time": 0.024827003479003906, + "step": 11570 + }, + { + "epoch": 1.76544189453125e-05, + "step": 11570, + "training_step_time": 0.11135435104370117 + }, + { + "epoch": 1.765594482421875e-05, + "model_forward_time": 0.0253448486328125, + "step": 11571 + }, + { + "epoch": 1.765594482421875e-05, + "step": 11571, + "training_step_time": 0.11720108985900879 + }, + { + "epoch": 1.7657470703125e-05, + "model_forward_time": 0.025081157684326172, + "step": 11572 + }, + { + "epoch": 1.7657470703125e-05, + "step": 11572, + "training_step_time": 0.11251521110534668 + }, + { + "epoch": 1.765899658203125e-05, + "model_forward_time": 0.025596141815185547, + "step": 11573 + }, + { + "epoch": 1.765899658203125e-05, + "step": 11573, + "training_step_time": 0.1108241081237793 + }, + { + "epoch": 1.76605224609375e-05, + "model_forward_time": 0.025294780731201172, + "step": 11574 + }, + { + "epoch": 1.76605224609375e-05, + "step": 11574, + "training_step_time": 0.10776066780090332 + }, + { + "epoch": 1.766204833984375e-05, + "model_forward_time": 0.02524852752685547, + "step": 11575 + }, + { + "epoch": 1.766204833984375e-05, + "step": 11575, + "training_step_time": 0.11140084266662598 + }, + { + "epoch": 1.766357421875e-05, + "model_forward_time": 0.025841951370239258, + "step": 11576 + }, + { + "epoch": 1.766357421875e-05, + "step": 11576, + "training_step_time": 0.109710693359375 + }, + { + "epoch": 1.766510009765625e-05, + "model_forward_time": 0.025137662887573242, + "step": 11577 + }, + { + "epoch": 1.766510009765625e-05, + "step": 11577, + "training_step_time": 0.11013913154602051 + }, + { + "epoch": 1.76666259765625e-05, + "model_forward_time": 0.025402545928955078, + "step": 11578 + }, + { + "epoch": 1.76666259765625e-05, + "step": 11578, + "training_step_time": 0.10993123054504395 + }, + { + "epoch": 1.766815185546875e-05, + "model_forward_time": 0.0252535343170166, + "step": 11579 + }, + { + "epoch": 1.766815185546875e-05, + "step": 11579, + "training_step_time": 0.10754990577697754 + }, + { + "epoch": 1.7669677734375e-05, + "grad_norm": 0.44454053044319153, + "learning_rate": 7.218237771703921e-05, + "loss": 0.026, + "step": 11580 + }, + { + "epoch": 1.7669677734375e-05, + "model_forward_time": 0.025673627853393555, + "step": 11580 + }, + { + "epoch": 1.7669677734375e-05, + "step": 11580, + "training_step_time": 0.10871028900146484 + }, + { + "epoch": 1.767120361328125e-05, + "model_forward_time": 0.025425434112548828, + "step": 11581 + }, + { + "epoch": 1.767120361328125e-05, + "step": 11581, + "training_step_time": 0.13261079788208008 + }, + { + "epoch": 1.76727294921875e-05, + "model_forward_time": 0.024957656860351562, + "step": 11582 + }, + { + "epoch": 1.76727294921875e-05, + "step": 11582, + "training_step_time": 0.12287592887878418 + }, + { + "epoch": 1.767425537109375e-05, + "model_forward_time": 0.025315046310424805, + "step": 11583 + }, + { + "epoch": 1.767425537109375e-05, + "step": 11583, + "training_step_time": 0.11443495750427246 + }, + { + "epoch": 1.767578125e-05, + "model_forward_time": 0.025872468948364258, + "step": 11584 + }, + { + "epoch": 1.767578125e-05, + "step": 11584, + "training_step_time": 0.11126446723937988 + }, + { + "epoch": 1.767730712890625e-05, + "model_forward_time": 0.0257112979888916, + "step": 11585 + }, + { + "epoch": 1.767730712890625e-05, + "step": 11585, + "training_step_time": 0.12569880485534668 + }, + { + "epoch": 1.76788330078125e-05, + "model_forward_time": 0.025865793228149414, + "step": 11586 + }, + { + "epoch": 1.76788330078125e-05, + "step": 11586, + "training_step_time": 0.20575428009033203 + }, + { + "epoch": 1.768035888671875e-05, + "model_forward_time": 0.0246734619140625, + "step": 11587 + }, + { + "epoch": 1.768035888671875e-05, + "step": 11587, + "training_step_time": 0.15195798873901367 + }, + { + "epoch": 1.7681884765625e-05, + "model_forward_time": 0.025154590606689453, + "step": 11588 + }, + { + "epoch": 1.7681884765625e-05, + "step": 11588, + "training_step_time": 0.13219165802001953 + }, + { + "epoch": 1.768341064453125e-05, + "model_forward_time": 0.024851083755493164, + "step": 11589 + }, + { + "epoch": 1.768341064453125e-05, + "step": 11589, + "training_step_time": 0.1083836555480957 + }, + { + "epoch": 1.76849365234375e-05, + "grad_norm": 0.3830711543560028, + "learning_rate": 7.213296952115144e-05, + "loss": 0.023, + "step": 11590 + }, + { + "epoch": 1.76849365234375e-05, + "model_forward_time": 0.025335311889648438, + "step": 11590 + }, + { + "epoch": 1.76849365234375e-05, + "step": 11590, + "training_step_time": 0.1896038055419922 + }, + { + "epoch": 1.768646240234375e-05, + "model_forward_time": 0.025109052658081055, + "step": 11591 + }, + { + "epoch": 1.768646240234375e-05, + "step": 11591, + "training_step_time": 0.20410871505737305 + }, + { + "epoch": 1.768798828125e-05, + "model_forward_time": 0.0247805118560791, + "step": 11592 + }, + { + "epoch": 1.768798828125e-05, + "step": 11592, + "training_step_time": 0.1139223575592041 + }, + { + "epoch": 1.768951416015625e-05, + "model_forward_time": 0.02457571029663086, + "step": 11593 + }, + { + "epoch": 1.768951416015625e-05, + "step": 11593, + "training_step_time": 0.14266324043273926 + }, + { + "epoch": 1.76910400390625e-05, + "model_forward_time": 0.0252077579498291, + "step": 11594 + }, + { + "epoch": 1.76910400390625e-05, + "step": 11594, + "training_step_time": 0.16104626655578613 + }, + { + "epoch": 1.769256591796875e-05, + "model_forward_time": 0.024526357650756836, + "step": 11595 + }, + { + "epoch": 1.769256591796875e-05, + "step": 11595, + "training_step_time": 0.21456432342529297 + }, + { + "epoch": 1.7694091796875e-05, + "model_forward_time": 0.025170087814331055, + "step": 11596 + }, + { + "epoch": 1.7694091796875e-05, + "step": 11596, + "training_step_time": 0.10934233665466309 + }, + { + "epoch": 1.769561767578125e-05, + "model_forward_time": 0.024762630462646484, + "step": 11597 + }, + { + "epoch": 1.769561767578125e-05, + "step": 11597, + "training_step_time": 0.10715699195861816 + }, + { + "epoch": 1.76971435546875e-05, + "model_forward_time": 0.024912595748901367, + "step": 11598 + }, + { + "epoch": 1.76971435546875e-05, + "step": 11598, + "training_step_time": 0.10789823532104492 + }, + { + "epoch": 1.769866943359375e-05, + "model_forward_time": 0.026125669479370117, + "step": 11599 + }, + { + "epoch": 1.769866943359375e-05, + "step": 11599, + "training_step_time": 0.11000180244445801 + }, + { + "epoch": 1.77001953125e-05, + "grad_norm": 0.24346691370010376, + "learning_rate": 7.20835344316187e-05, + "loss": 0.016, + "step": 11600 + }, + { + "epoch": 1.77001953125e-05, + "model_forward_time": 0.02569890022277832, + "step": 11600 + }, + { + "epoch": 1.77001953125e-05, + "step": 11600, + "training_step_time": 0.10920929908752441 + }, + { + "epoch": 1.770172119140625e-05, + "model_forward_time": 0.025798320770263672, + "step": 11601 + }, + { + "epoch": 1.770172119140625e-05, + "step": 11601, + "training_step_time": 0.10577130317687988 + }, + { + "epoch": 1.77032470703125e-05, + "model_forward_time": 0.02657341957092285, + "step": 11602 + }, + { + "epoch": 1.77032470703125e-05, + "step": 11602, + "training_step_time": 0.10783815383911133 + }, + { + "epoch": 1.770477294921875e-05, + "model_forward_time": 0.025250673294067383, + "step": 11603 + }, + { + "epoch": 1.770477294921875e-05, + "step": 11603, + "training_step_time": 0.10725259780883789 + }, + { + "epoch": 1.7706298828125e-05, + "model_forward_time": 0.029310226440429688, + "step": 11604 + }, + { + "epoch": 1.7706298828125e-05, + "step": 11604, + "training_step_time": 0.1129915714263916 + }, + { + "epoch": 1.770782470703125e-05, + "model_forward_time": 0.025476455688476562, + "step": 11605 + }, + { + "epoch": 1.770782470703125e-05, + "step": 11605, + "training_step_time": 0.10977768898010254 + }, + { + "epoch": 1.77093505859375e-05, + "model_forward_time": 0.0253756046295166, + "step": 11606 + }, + { + "epoch": 1.77093505859375e-05, + "step": 11606, + "training_step_time": 0.10576105117797852 + }, + { + "epoch": 1.771087646484375e-05, + "model_forward_time": 0.02524423599243164, + "step": 11607 + }, + { + "epoch": 1.771087646484375e-05, + "step": 11607, + "training_step_time": 0.10758042335510254 + }, + { + "epoch": 1.771240234375e-05, + "model_forward_time": 0.02527141571044922, + "step": 11608 + }, + { + "epoch": 1.771240234375e-05, + "step": 11608, + "training_step_time": 0.1098628044128418 + }, + { + "epoch": 1.771392822265625e-05, + "model_forward_time": 0.025182485580444336, + "step": 11609 + }, + { + "epoch": 1.771392822265625e-05, + "step": 11609, + "training_step_time": 0.10816168785095215 + }, + { + "epoch": 1.77154541015625e-05, + "grad_norm": 0.2136213779449463, + "learning_rate": 7.203407250850928e-05, + "loss": 0.0335, + "step": 11610 + }, + { + "epoch": 1.77154541015625e-05, + "model_forward_time": 0.024566173553466797, + "step": 11610 + }, + { + "epoch": 1.77154541015625e-05, + "step": 11610, + "training_step_time": 0.11071109771728516 + }, + { + "epoch": 1.771697998046875e-05, + "model_forward_time": 0.024147748947143555, + "step": 11611 + }, + { + "epoch": 1.771697998046875e-05, + "step": 11611, + "training_step_time": 0.12538456916809082 + }, + { + "epoch": 1.7718505859375e-05, + "model_forward_time": 0.024240493774414062, + "step": 11612 + }, + { + "epoch": 1.7718505859375e-05, + "step": 11612, + "training_step_time": 0.1155402660369873 + }, + { + "epoch": 1.772003173828125e-05, + "model_forward_time": 0.02580738067626953, + "step": 11613 + }, + { + "epoch": 1.772003173828125e-05, + "step": 11613, + "training_step_time": 0.1743919849395752 + }, + { + "epoch": 1.77215576171875e-05, + "model_forward_time": 0.025368452072143555, + "step": 11614 + }, + { + "epoch": 1.77215576171875e-05, + "step": 11614, + "training_step_time": 0.17767572402954102 + }, + { + "epoch": 1.772308349609375e-05, + "model_forward_time": 0.02798604965209961, + "step": 11615 + }, + { + "epoch": 1.772308349609375e-05, + "step": 11615, + "training_step_time": 0.12120676040649414 + }, + { + "epoch": 1.7724609375e-05, + "model_forward_time": 0.02402329444885254, + "step": 11616 + }, + { + "epoch": 1.7724609375e-05, + "step": 11616, + "training_step_time": 0.1259171962738037 + }, + { + "epoch": 1.772613525390625e-05, + "model_forward_time": 0.02382349967956543, + "step": 11617 + }, + { + "epoch": 1.772613525390625e-05, + "step": 11617, + "training_step_time": 0.1247856616973877 + }, + { + "epoch": 1.77276611328125e-05, + "model_forward_time": 0.02445697784423828, + "step": 11618 + }, + { + "epoch": 1.77276611328125e-05, + "step": 11618, + "training_step_time": 0.1168069839477539 + }, + { + "epoch": 1.772918701171875e-05, + "model_forward_time": 0.02736973762512207, + "step": 11619 + }, + { + "epoch": 1.772918701171875e-05, + "step": 11619, + "training_step_time": 0.11309957504272461 + }, + { + "epoch": 1.7730712890625e-05, + "grad_norm": 0.35636040568351746, + "learning_rate": 7.19845838119241e-05, + "loss": 0.0323, + "step": 11620 + }, + { + "epoch": 1.7730712890625e-05, + "model_forward_time": 0.02549433708190918, + "step": 11620 + }, + { + "epoch": 1.7730712890625e-05, + "step": 11620, + "training_step_time": 0.12039041519165039 + }, + { + "epoch": 1.773223876953125e-05, + "model_forward_time": 0.02460765838623047, + "step": 11621 + }, + { + "epoch": 1.773223876953125e-05, + "step": 11621, + "training_step_time": 0.11919426918029785 + }, + { + "epoch": 1.77337646484375e-05, + "model_forward_time": 0.024448871612548828, + "step": 11622 + }, + { + "epoch": 1.77337646484375e-05, + "step": 11622, + "training_step_time": 0.11164331436157227 + }, + { + "epoch": 1.773529052734375e-05, + "model_forward_time": 0.024251222610473633, + "step": 11623 + }, + { + "epoch": 1.773529052734375e-05, + "step": 11623, + "training_step_time": 0.10883402824401855 + }, + { + "epoch": 1.773681640625e-05, + "model_forward_time": 0.024695634841918945, + "step": 11624 + }, + { + "epoch": 1.773681640625e-05, + "step": 11624, + "training_step_time": 0.10844802856445312 + }, + { + "epoch": 1.773834228515625e-05, + "model_forward_time": 0.025716543197631836, + "step": 11625 + }, + { + "epoch": 1.773834228515625e-05, + "step": 11625, + "training_step_time": 0.10801029205322266 + }, + { + "epoch": 1.77398681640625e-05, + "model_forward_time": 0.025342941284179688, + "step": 11626 + }, + { + "epoch": 1.77398681640625e-05, + "step": 11626, + "training_step_time": 0.152634859085083 + }, + { + "epoch": 1.774139404296875e-05, + "model_forward_time": 0.02515554428100586, + "step": 11627 + }, + { + "epoch": 1.774139404296875e-05, + "step": 11627, + "training_step_time": 0.10698914527893066 + }, + { + "epoch": 1.7742919921875e-05, + "model_forward_time": 0.0254518985748291, + "step": 11628 + }, + { + "epoch": 1.7742919921875e-05, + "step": 11628, + "training_step_time": 0.19255375862121582 + }, + { + "epoch": 1.774444580078125e-05, + "model_forward_time": 0.024674415588378906, + "step": 11629 + }, + { + "epoch": 1.774444580078125e-05, + "step": 11629, + "training_step_time": 0.14569759368896484 + }, + { + "epoch": 1.77459716796875e-05, + "grad_norm": 0.33946698904037476, + "learning_rate": 7.193506840199657e-05, + "loss": 0.0382, + "step": 11630 + }, + { + "epoch": 1.77459716796875e-05, + "model_forward_time": 0.025098323822021484, + "step": 11630 + }, + { + "epoch": 1.77459716796875e-05, + "step": 11630, + "training_step_time": 0.18776917457580566 + }, + { + "epoch": 1.774749755859375e-05, + "model_forward_time": 0.024368762969970703, + "step": 11631 + }, + { + "epoch": 1.774749755859375e-05, + "step": 11631, + "training_step_time": 0.17843222618103027 + }, + { + "epoch": 1.77490234375e-05, + "model_forward_time": 0.02482771873474121, + "step": 11632 + }, + { + "epoch": 1.77490234375e-05, + "step": 11632, + "training_step_time": 0.11714029312133789 + }, + { + "epoch": 1.775054931640625e-05, + "model_forward_time": 0.024992942810058594, + "step": 11633 + }, + { + "epoch": 1.775054931640625e-05, + "step": 11633, + "training_step_time": 0.10330963134765625 + }, + { + "epoch": 1.77520751953125e-05, + "model_forward_time": 0.025561809539794922, + "step": 11634 + }, + { + "epoch": 1.77520751953125e-05, + "step": 11634, + "training_step_time": 0.19768619537353516 + }, + { + "epoch": 1.775360107421875e-05, + "model_forward_time": 0.02456045150756836, + "step": 11635 + }, + { + "epoch": 1.775360107421875e-05, + "step": 11635, + "training_step_time": 0.127349853515625 + }, + { + "epoch": 1.7755126953125e-05, + "model_forward_time": 0.02493429183959961, + "step": 11636 + }, + { + "epoch": 1.7755126953125e-05, + "step": 11636, + "training_step_time": 0.1079869270324707 + }, + { + "epoch": 1.775665283203125e-05, + "model_forward_time": 0.024838924407958984, + "step": 11637 + }, + { + "epoch": 1.775665283203125e-05, + "step": 11637, + "training_step_time": 0.143815279006958 + }, + { + "epoch": 1.77581787109375e-05, + "model_forward_time": 0.025330066680908203, + "step": 11638 + }, + { + "epoch": 1.77581787109375e-05, + "step": 11638, + "training_step_time": 0.1566317081451416 + }, + { + "epoch": 1.775970458984375e-05, + "model_forward_time": 0.024809837341308594, + "step": 11639 + }, + { + "epoch": 1.775970458984375e-05, + "step": 11639, + "training_step_time": 0.23610234260559082 + }, + { + "epoch": 1.776123046875e-05, + "grad_norm": 0.26525023579597473, + "learning_rate": 7.188552633889259e-05, + "loss": 0.0178, + "step": 11640 + }, + { + "epoch": 1.776123046875e-05, + "model_forward_time": 0.023571491241455078, + "step": 11640 + }, + { + "epoch": 1.776123046875e-05, + "step": 11640, + "training_step_time": 0.12482643127441406 + }, + { + "epoch": 1.776275634765625e-05, + "model_forward_time": 0.024907350540161133, + "step": 11641 + }, + { + "epoch": 1.776275634765625e-05, + "step": 11641, + "training_step_time": 0.1228783130645752 + }, + { + "epoch": 1.77642822265625e-05, + "model_forward_time": 0.025429487228393555, + "step": 11642 + }, + { + "epoch": 1.77642822265625e-05, + "step": 11642, + "training_step_time": 0.11895179748535156 + }, + { + "epoch": 1.776580810546875e-05, + "model_forward_time": 0.025605201721191406, + "step": 11643 + }, + { + "epoch": 1.776580810546875e-05, + "step": 11643, + "training_step_time": 0.11651968955993652 + }, + { + "epoch": 1.7767333984375e-05, + "model_forward_time": 0.02673172950744629, + "step": 11644 + }, + { + "epoch": 1.7767333984375e-05, + "step": 11644, + "training_step_time": 0.11143970489501953 + }, + { + "epoch": 1.776885986328125e-05, + "model_forward_time": 0.02569580078125, + "step": 11645 + }, + { + "epoch": 1.776885986328125e-05, + "step": 11645, + "training_step_time": 0.10963630676269531 + }, + { + "epoch": 1.77703857421875e-05, + "model_forward_time": 0.02579808235168457, + "step": 11646 + }, + { + "epoch": 1.77703857421875e-05, + "step": 11646, + "training_step_time": 0.11155867576599121 + }, + { + "epoch": 1.777191162109375e-05, + "model_forward_time": 0.026164531707763672, + "step": 11647 + }, + { + "epoch": 1.777191162109375e-05, + "step": 11647, + "training_step_time": 0.10988450050354004 + }, + { + "epoch": 1.77734375e-05, + "model_forward_time": 0.025140762329101562, + "step": 11648 + }, + { + "epoch": 1.77734375e-05, + "step": 11648, + "training_step_time": 0.10914969444274902 + }, + { + "epoch": 1.777496337890625e-05, + "model_forward_time": 0.025704145431518555, + "step": 11649 + }, + { + "epoch": 1.777496337890625e-05, + "step": 11649, + "training_step_time": 0.1092679500579834 + }, + { + "epoch": 1.77764892578125e-05, + "grad_norm": 0.5020220279693604, + "learning_rate": 7.183595768281043e-05, + "loss": 0.0212, + "step": 11650 + }, + { + "epoch": 1.77764892578125e-05, + "model_forward_time": 0.024474382400512695, + "step": 11650 + }, + { + "epoch": 1.77764892578125e-05, + "step": 11650, + "training_step_time": 0.10893607139587402 + }, + { + "epoch": 1.777801513671875e-05, + "model_forward_time": 0.02434539794921875, + "step": 11651 + }, + { + "epoch": 1.777801513671875e-05, + "step": 11651, + "training_step_time": 0.1083683967590332 + }, + { + "epoch": 1.7779541015625e-05, + "model_forward_time": 0.0255429744720459, + "step": 11652 + }, + { + "epoch": 1.7779541015625e-05, + "step": 11652, + "training_step_time": 0.10792088508605957 + }, + { + "epoch": 1.778106689453125e-05, + "model_forward_time": 0.025328636169433594, + "step": 11653 + }, + { + "epoch": 1.778106689453125e-05, + "step": 11653, + "training_step_time": 0.10857510566711426 + }, + { + "epoch": 1.77825927734375e-05, + "model_forward_time": 0.025704622268676758, + "step": 11654 + }, + { + "epoch": 1.77825927734375e-05, + "step": 11654, + "training_step_time": 0.10866785049438477 + }, + { + "epoch": 1.778411865234375e-05, + "model_forward_time": 0.029287099838256836, + "step": 11655 + }, + { + "epoch": 1.778411865234375e-05, + "step": 11655, + "training_step_time": 0.11653470993041992 + }, + { + "epoch": 1.778564453125e-05, + "model_forward_time": 0.025564908981323242, + "step": 11656 + }, + { + "epoch": 1.778564453125e-05, + "step": 11656, + "training_step_time": 0.11843705177307129 + }, + { + "epoch": 1.778717041015625e-05, + "model_forward_time": 0.025386810302734375, + "step": 11657 + }, + { + "epoch": 1.778717041015625e-05, + "step": 11657, + "training_step_time": 0.17536282539367676 + }, + { + "epoch": 1.77886962890625e-05, + "model_forward_time": 0.024785995483398438, + "step": 11658 + }, + { + "epoch": 1.77886962890625e-05, + "step": 11658, + "training_step_time": 0.1766061782836914 + }, + { + "epoch": 1.779022216796875e-05, + "model_forward_time": 0.024843931198120117, + "step": 11659 + }, + { + "epoch": 1.779022216796875e-05, + "step": 11659, + "training_step_time": 0.11484909057617188 + }, + { + "epoch": 1.7791748046875e-05, + "grad_norm": 0.2558012008666992, + "learning_rate": 7.178636249398072e-05, + "loss": 0.0205, + "step": 11660 + }, + { + "epoch": 1.7791748046875e-05, + "model_forward_time": 0.02511906623840332, + "step": 11660 + }, + { + "epoch": 1.7791748046875e-05, + "step": 11660, + "training_step_time": 0.11955547332763672 + }, + { + "epoch": 1.779327392578125e-05, + "model_forward_time": 0.025829553604125977, + "step": 11661 + }, + { + "epoch": 1.779327392578125e-05, + "step": 11661, + "training_step_time": 0.10851550102233887 + }, + { + "epoch": 1.77947998046875e-05, + "model_forward_time": 0.026876449584960938, + "step": 11662 + }, + { + "epoch": 1.77947998046875e-05, + "step": 11662, + "training_step_time": 0.10976052284240723 + }, + { + "epoch": 1.779632568359375e-05, + "model_forward_time": 0.025194406509399414, + "step": 11663 + }, + { + "epoch": 1.779632568359375e-05, + "step": 11663, + "training_step_time": 0.11402177810668945 + }, + { + "epoch": 1.77978515625e-05, + "model_forward_time": 0.025280237197875977, + "step": 11664 + }, + { + "epoch": 1.77978515625e-05, + "step": 11664, + "training_step_time": 0.10649633407592773 + }, + { + "epoch": 1.779937744140625e-05, + "model_forward_time": 0.02542853355407715, + "step": 11665 + }, + { + "epoch": 1.779937744140625e-05, + "step": 11665, + "training_step_time": 0.10787367820739746 + }, + { + "epoch": 1.78009033203125e-05, + "model_forward_time": 0.025123119354248047, + "step": 11666 + }, + { + "epoch": 1.78009033203125e-05, + "step": 11666, + "training_step_time": 0.10682225227355957 + }, + { + "epoch": 1.780242919921875e-05, + "model_forward_time": 0.025790691375732422, + "step": 11667 + }, + { + "epoch": 1.780242919921875e-05, + "step": 11667, + "training_step_time": 0.10874795913696289 + }, + { + "epoch": 1.7803955078125e-05, + "model_forward_time": 0.025238037109375, + "step": 11668 + }, + { + "epoch": 1.7803955078125e-05, + "step": 11668, + "training_step_time": 0.10617542266845703 + }, + { + "epoch": 1.780548095703125e-05, + "model_forward_time": 0.024748563766479492, + "step": 11669 + }, + { + "epoch": 1.780548095703125e-05, + "step": 11669, + "training_step_time": 0.10572290420532227 + }, + { + "epoch": 1.78070068359375e-05, + "grad_norm": 0.24784539639949799, + "learning_rate": 7.173674083266624e-05, + "loss": 0.0134, + "step": 11670 + }, + { + "epoch": 1.78070068359375e-05, + "model_forward_time": 0.026944637298583984, + "step": 11670 + }, + { + "epoch": 1.78070068359375e-05, + "step": 11670, + "training_step_time": 0.11552238464355469 + }, + { + "epoch": 1.780853271484375e-05, + "model_forward_time": 0.025324106216430664, + "step": 11671 + }, + { + "epoch": 1.780853271484375e-05, + "step": 11671, + "training_step_time": 0.14961695671081543 + }, + { + "epoch": 1.781005859375e-05, + "model_forward_time": 0.025301694869995117, + "step": 11672 + }, + { + "epoch": 1.781005859375e-05, + "step": 11672, + "training_step_time": 0.20115900039672852 + }, + { + "epoch": 1.781158447265625e-05, + "model_forward_time": 0.024788618087768555, + "step": 11673 + }, + { + "epoch": 1.781158447265625e-05, + "step": 11673, + "training_step_time": 0.19629263877868652 + }, + { + "epoch": 1.78131103515625e-05, + "model_forward_time": 0.02545166015625, + "step": 11674 + }, + { + "epoch": 1.78131103515625e-05, + "step": 11674, + "training_step_time": 0.1674516201019287 + }, + { + "epoch": 1.781463623046875e-05, + "model_forward_time": 0.025110960006713867, + "step": 11675 + }, + { + "epoch": 1.781463623046875e-05, + "step": 11675, + "training_step_time": 0.15121912956237793 + }, + { + "epoch": 1.7816162109375e-05, + "model_forward_time": 0.026407241821289062, + "step": 11676 + }, + { + "epoch": 1.7816162109375e-05, + "step": 11676, + "training_step_time": 0.12232398986816406 + }, + { + "epoch": 1.781768798828125e-05, + "model_forward_time": 0.024682044982910156, + "step": 11677 + }, + { + "epoch": 1.781768798828125e-05, + "step": 11677, + "training_step_time": 0.106842041015625 + }, + { + "epoch": 1.78192138671875e-05, + "model_forward_time": 0.025202035903930664, + "step": 11678 + }, + { + "epoch": 1.78192138671875e-05, + "step": 11678, + "training_step_time": 0.19699525833129883 + }, + { + "epoch": 1.782073974609375e-05, + "model_forward_time": 0.024320125579833984, + "step": 11679 + }, + { + "epoch": 1.782073974609375e-05, + "step": 11679, + "training_step_time": 0.12567377090454102 + }, + { + "epoch": 1.7822265625e-05, + "grad_norm": 0.2874007821083069, + "learning_rate": 7.1687092759162e-05, + "loss": 0.0249, + "step": 11680 + }, + { + "epoch": 1.7822265625e-05, + "model_forward_time": 0.02434396743774414, + "step": 11680 + }, + { + "epoch": 1.7822265625e-05, + "step": 11680, + "training_step_time": 0.12557053565979004 + }, + { + "epoch": 1.782379150390625e-05, + "model_forward_time": 0.0240786075592041, + "step": 11681 + }, + { + "epoch": 1.782379150390625e-05, + "step": 11681, + "training_step_time": 0.11521697044372559 + }, + { + "epoch": 1.78253173828125e-05, + "model_forward_time": 0.025308609008789062, + "step": 11682 + }, + { + "epoch": 1.78253173828125e-05, + "step": 11682, + "training_step_time": 0.14262676239013672 + }, + { + "epoch": 1.782684326171875e-05, + "model_forward_time": 0.025249481201171875, + "step": 11683 + }, + { + "epoch": 1.782684326171875e-05, + "step": 11683, + "training_step_time": 0.11459207534790039 + }, + { + "epoch": 1.7828369140625e-05, + "model_forward_time": 0.02537393569946289, + "step": 11684 + }, + { + "epoch": 1.7828369140625e-05, + "step": 11684, + "training_step_time": 0.12342381477355957 + }, + { + "epoch": 1.782989501953125e-05, + "model_forward_time": 0.02558135986328125, + "step": 11685 + }, + { + "epoch": 1.782989501953125e-05, + "step": 11685, + "training_step_time": 0.11874103546142578 + }, + { + "epoch": 1.78314208984375e-05, + "model_forward_time": 0.02527594566345215, + "step": 11686 + }, + { + "epoch": 1.78314208984375e-05, + "step": 11686, + "training_step_time": 0.1051032543182373 + }, + { + "epoch": 1.783294677734375e-05, + "model_forward_time": 0.02523040771484375, + "step": 11687 + }, + { + "epoch": 1.783294677734375e-05, + "step": 11687, + "training_step_time": 0.10513138771057129 + }, + { + "epoch": 1.783447265625e-05, + "model_forward_time": 0.025600910186767578, + "step": 11688 + }, + { + "epoch": 1.783447265625e-05, + "step": 11688, + "training_step_time": 0.11179089546203613 + }, + { + "epoch": 1.783599853515625e-05, + "model_forward_time": 0.025002002716064453, + "step": 11689 + }, + { + "epoch": 1.783599853515625e-05, + "step": 11689, + "training_step_time": 0.10659551620483398 + }, + { + "epoch": 1.78375244140625e-05, + "grad_norm": 0.30975258350372314, + "learning_rate": 7.16374183337951e-05, + "loss": 0.0213, + "step": 11690 + }, + { + "epoch": 1.78375244140625e-05, + "model_forward_time": 0.025231361389160156, + "step": 11690 + }, + { + "epoch": 1.78375244140625e-05, + "step": 11690, + "training_step_time": 0.1071159839630127 + }, + { + "epoch": 1.783905029296875e-05, + "model_forward_time": 0.026062726974487305, + "step": 11691 + }, + { + "epoch": 1.783905029296875e-05, + "step": 11691, + "training_step_time": 0.12330913543701172 + }, + { + "epoch": 1.7840576171875e-05, + "model_forward_time": 0.024029254913330078, + "step": 11692 + }, + { + "epoch": 1.7840576171875e-05, + "step": 11692, + "training_step_time": 0.17774629592895508 + }, + { + "epoch": 1.784210205078125e-05, + "model_forward_time": 0.024976015090942383, + "step": 11693 + }, + { + "epoch": 1.784210205078125e-05, + "step": 11693, + "training_step_time": 0.18052935600280762 + }, + { + "epoch": 1.78436279296875e-05, + "model_forward_time": 0.02461981773376465, + "step": 11694 + }, + { + "epoch": 1.78436279296875e-05, + "step": 11694, + "training_step_time": 0.15902400016784668 + }, + { + "epoch": 1.784515380859375e-05, + "model_forward_time": 0.02421116828918457, + "step": 11695 + }, + { + "epoch": 1.784515380859375e-05, + "step": 11695, + "training_step_time": 0.15053915977478027 + }, + { + "epoch": 1.78466796875e-05, + "model_forward_time": 0.0243990421295166, + "step": 11696 + }, + { + "epoch": 1.78466796875e-05, + "step": 11696, + "training_step_time": 0.1726226806640625 + }, + { + "epoch": 1.784820556640625e-05, + "model_forward_time": 0.02503514289855957, + "step": 11697 + }, + { + "epoch": 1.784820556640625e-05, + "step": 11697, + "training_step_time": 0.15319347381591797 + }, + { + "epoch": 1.78497314453125e-05, + "model_forward_time": 0.024226665496826172, + "step": 11698 + }, + { + "epoch": 1.78497314453125e-05, + "step": 11698, + "training_step_time": 0.17513108253479004 + }, + { + "epoch": 1.785125732421875e-05, + "model_forward_time": 0.02492499351501465, + "step": 11699 + }, + { + "epoch": 1.785125732421875e-05, + "step": 11699, + "training_step_time": 0.12773919105529785 + }, + { + "epoch": 1.7852783203125e-05, + "grad_norm": 0.3183434009552002, + "learning_rate": 7.158771761692464e-05, + "loss": 0.0194, + "step": 11700 + }, + { + "epoch": 1.7852783203125e-05, + "model_forward_time": 0.02441549301147461, + "step": 11700 + }, + { + "epoch": 1.7852783203125e-05, + "step": 11700, + "training_step_time": 0.12762832641601562 + }, + { + "epoch": 1.785430908203125e-05, + "model_forward_time": 0.024658679962158203, + "step": 11701 + }, + { + "epoch": 1.785430908203125e-05, + "step": 11701, + "training_step_time": 0.12233352661132812 + }, + { + "epoch": 1.78558349609375e-05, + "model_forward_time": 0.024938344955444336, + "step": 11702 + }, + { + "epoch": 1.78558349609375e-05, + "step": 11702, + "training_step_time": 0.19814038276672363 + }, + { + "epoch": 1.785736083984375e-05, + "model_forward_time": 0.02831292152404785, + "step": 11703 + }, + { + "epoch": 1.785736083984375e-05, + "step": 11703, + "training_step_time": 0.11129140853881836 + }, + { + "epoch": 1.785888671875e-05, + "model_forward_time": 0.02491450309753418, + "step": 11704 + }, + { + "epoch": 1.785888671875e-05, + "step": 11704, + "training_step_time": 0.11177992820739746 + }, + { + "epoch": 1.786041259765625e-05, + "model_forward_time": 0.02515721321105957, + "step": 11705 + }, + { + "epoch": 1.786041259765625e-05, + "step": 11705, + "training_step_time": 0.11127138137817383 + }, + { + "epoch": 1.78619384765625e-05, + "model_forward_time": 0.02515864372253418, + "step": 11706 + }, + { + "epoch": 1.78619384765625e-05, + "step": 11706, + "training_step_time": 0.10737419128417969 + }, + { + "epoch": 1.786346435546875e-05, + "model_forward_time": 0.025577545166015625, + "step": 11707 + }, + { + "epoch": 1.786346435546875e-05, + "step": 11707, + "training_step_time": 0.10860109329223633 + }, + { + "epoch": 1.7864990234375e-05, + "model_forward_time": 0.025356769561767578, + "step": 11708 + }, + { + "epoch": 1.7864990234375e-05, + "step": 11708, + "training_step_time": 0.11063647270202637 + }, + { + "epoch": 1.786651611328125e-05, + "model_forward_time": 0.0246732234954834, + "step": 11709 + }, + { + "epoch": 1.786651611328125e-05, + "step": 11709, + "training_step_time": 0.10815548896789551 + }, + { + "epoch": 1.78680419921875e-05, + "grad_norm": 0.4140734374523163, + "learning_rate": 7.153799066894171e-05, + "loss": 0.0188, + "step": 11710 + }, + { + "epoch": 1.78680419921875e-05, + "model_forward_time": 0.025480985641479492, + "step": 11710 + }, + { + "epoch": 1.78680419921875e-05, + "step": 11710, + "training_step_time": 0.10764598846435547 + }, + { + "epoch": 1.786956787109375e-05, + "model_forward_time": 0.028128623962402344, + "step": 11711 + }, + { + "epoch": 1.786956787109375e-05, + "step": 11711, + "training_step_time": 0.15357565879821777 + }, + { + "epoch": 1.787109375e-05, + "model_forward_time": 0.024831533432006836, + "step": 11712 + }, + { + "epoch": 1.787109375e-05, + "step": 11712, + "training_step_time": 0.15476202964782715 + }, + { + "epoch": 1.787261962890625e-05, + "model_forward_time": 0.024794578552246094, + "step": 11713 + }, + { + "epoch": 1.787261962890625e-05, + "step": 11713, + "training_step_time": 0.11474776268005371 + }, + { + "epoch": 1.78741455078125e-05, + "model_forward_time": 0.025244474411010742, + "step": 11714 + }, + { + "epoch": 1.78741455078125e-05, + "step": 11714, + "training_step_time": 0.12900328636169434 + }, + { + "epoch": 1.787567138671875e-05, + "model_forward_time": 0.025837182998657227, + "step": 11715 + }, + { + "epoch": 1.787567138671875e-05, + "step": 11715, + "training_step_time": 0.2018749713897705 + }, + { + "epoch": 1.7877197265625e-05, + "model_forward_time": 0.024688005447387695, + "step": 11716 + }, + { + "epoch": 1.7877197265625e-05, + "step": 11716, + "training_step_time": 0.11519742012023926 + }, + { + "epoch": 1.787872314453125e-05, + "model_forward_time": 0.02411198616027832, + "step": 11717 + }, + { + "epoch": 1.787872314453125e-05, + "step": 11717, + "training_step_time": 0.10938572883605957 + }, + { + "epoch": 1.78802490234375e-05, + "model_forward_time": 0.026111364364624023, + "step": 11718 + }, + { + "epoch": 1.78802490234375e-05, + "step": 11718, + "training_step_time": 0.1160573959350586 + }, + { + "epoch": 1.788177490234375e-05, + "model_forward_time": 0.02622056007385254, + "step": 11719 + }, + { + "epoch": 1.788177490234375e-05, + "step": 11719, + "training_step_time": 0.11627006530761719 + }, + { + "epoch": 1.788330078125e-05, + "grad_norm": 0.4929561913013458, + "learning_rate": 7.148823755026921e-05, + "loss": 0.0196, + "step": 11720 + }, + { + "epoch": 1.788330078125e-05, + "model_forward_time": 0.025667190551757812, + "step": 11720 + }, + { + "epoch": 1.788330078125e-05, + "step": 11720, + "training_step_time": 0.10977578163146973 + }, + { + "epoch": 1.788482666015625e-05, + "model_forward_time": 0.02518296241760254, + "step": 11721 + }, + { + "epoch": 1.788482666015625e-05, + "step": 11721, + "training_step_time": 0.1930091381072998 + }, + { + "epoch": 1.78863525390625e-05, + "model_forward_time": 0.024659156799316406, + "step": 11722 + }, + { + "epoch": 1.78863525390625e-05, + "step": 11722, + "training_step_time": 0.1693124771118164 + }, + { + "epoch": 1.788787841796875e-05, + "model_forward_time": 0.024460792541503906, + "step": 11723 + }, + { + "epoch": 1.788787841796875e-05, + "step": 11723, + "training_step_time": 0.1668837070465088 + }, + { + "epoch": 1.7889404296875e-05, + "model_forward_time": 0.024379730224609375, + "step": 11724 + }, + { + "epoch": 1.7889404296875e-05, + "step": 11724, + "training_step_time": 0.13912343978881836 + }, + { + "epoch": 1.789093017578125e-05, + "model_forward_time": 0.024727582931518555, + "step": 11725 + }, + { + "epoch": 1.789093017578125e-05, + "step": 11725, + "training_step_time": 0.11472392082214355 + }, + { + "epoch": 1.78924560546875e-05, + "model_forward_time": 0.025653839111328125, + "step": 11726 + }, + { + "epoch": 1.78924560546875e-05, + "step": 11726, + "training_step_time": 0.12459659576416016 + }, + { + "epoch": 1.789398193359375e-05, + "model_forward_time": 0.0254518985748291, + "step": 11727 + }, + { + "epoch": 1.789398193359375e-05, + "step": 11727, + "training_step_time": 0.12235260009765625 + }, + { + "epoch": 1.78955078125e-05, + "model_forward_time": 0.025080204010009766, + "step": 11728 + }, + { + "epoch": 1.78955078125e-05, + "step": 11728, + "training_step_time": 0.1136469841003418 + }, + { + "epoch": 1.789703369140625e-05, + "model_forward_time": 0.02559971809387207, + "step": 11729 + }, + { + "epoch": 1.789703369140625e-05, + "step": 11729, + "training_step_time": 0.11441326141357422 + }, + { + "epoch": 1.78985595703125e-05, + "grad_norm": 0.2370370477437973, + "learning_rate": 7.143845832136188e-05, + "loss": 0.0137, + "step": 11730 + }, + { + "epoch": 1.78985595703125e-05, + "model_forward_time": 0.025336503982543945, + "step": 11730 + }, + { + "epoch": 1.78985595703125e-05, + "step": 11730, + "training_step_time": 0.11599612236022949 + }, + { + "epoch": 1.790008544921875e-05, + "model_forward_time": 0.0253753662109375, + "step": 11731 + }, + { + "epoch": 1.790008544921875e-05, + "step": 11731, + "training_step_time": 0.11584115028381348 + }, + { + "epoch": 1.7901611328125e-05, + "model_forward_time": 0.0266873836517334, + "step": 11732 + }, + { + "epoch": 1.7901611328125e-05, + "step": 11732, + "training_step_time": 0.11408209800720215 + }, + { + "epoch": 1.790313720703125e-05, + "model_forward_time": 0.025597572326660156, + "step": 11733 + }, + { + "epoch": 1.790313720703125e-05, + "step": 11733, + "training_step_time": 0.11321783065795898 + }, + { + "epoch": 1.79046630859375e-05, + "model_forward_time": 0.025423765182495117, + "step": 11734 + }, + { + "epoch": 1.79046630859375e-05, + "step": 11734, + "training_step_time": 0.11077570915222168 + }, + { + "epoch": 1.790618896484375e-05, + "model_forward_time": 0.027895212173461914, + "step": 11735 + }, + { + "epoch": 1.790618896484375e-05, + "step": 11735, + "training_step_time": 0.11109590530395508 + }, + { + "epoch": 1.790771484375e-05, + "model_forward_time": 0.025504350662231445, + "step": 11736 + }, + { + "epoch": 1.790771484375e-05, + "step": 11736, + "training_step_time": 0.11061596870422363 + }, + { + "epoch": 1.790924072265625e-05, + "model_forward_time": 0.025641918182373047, + "step": 11737 + }, + { + "epoch": 1.790924072265625e-05, + "step": 11737, + "training_step_time": 0.11015653610229492 + }, + { + "epoch": 1.79107666015625e-05, + "model_forward_time": 0.02518153190612793, + "step": 11738 + }, + { + "epoch": 1.79107666015625e-05, + "step": 11738, + "training_step_time": 0.10755777359008789 + }, + { + "epoch": 1.791229248046875e-05, + "model_forward_time": 0.025189638137817383, + "step": 11739 + }, + { + "epoch": 1.791229248046875e-05, + "step": 11739, + "training_step_time": 0.10906028747558594 + }, + { + "epoch": 1.7913818359375e-05, + "grad_norm": 0.3798167407512665, + "learning_rate": 7.138865304270616e-05, + "loss": 0.0193, + "step": 11740 + }, + { + "epoch": 1.7913818359375e-05, + "model_forward_time": 0.025629520416259766, + "step": 11740 + }, + { + "epoch": 1.7913818359375e-05, + "step": 11740, + "training_step_time": 0.11115503311157227 + }, + { + "epoch": 1.791534423828125e-05, + "model_forward_time": 0.025362730026245117, + "step": 11741 + }, + { + "epoch": 1.791534423828125e-05, + "step": 11741, + "training_step_time": 0.11635255813598633 + }, + { + "epoch": 1.79168701171875e-05, + "model_forward_time": 0.027070283889770508, + "step": 11742 + }, + { + "epoch": 1.79168701171875e-05, + "step": 11742, + "training_step_time": 0.20132780075073242 + }, + { + "epoch": 1.791839599609375e-05, + "model_forward_time": 0.024280309677124023, + "step": 11743 + }, + { + "epoch": 1.791839599609375e-05, + "step": 11743, + "training_step_time": 0.10520076751708984 + }, + { + "epoch": 1.7919921875e-05, + "model_forward_time": 0.0245358943939209, + "step": 11744 + }, + { + "epoch": 1.7919921875e-05, + "step": 11744, + "training_step_time": 0.11055636405944824 + }, + { + "epoch": 1.792144775390625e-05, + "model_forward_time": 0.025367259979248047, + "step": 11745 + }, + { + "epoch": 1.792144775390625e-05, + "step": 11745, + "training_step_time": 0.12089133262634277 + }, + { + "epoch": 1.79229736328125e-05, + "model_forward_time": 0.025229930877685547, + "step": 11746 + }, + { + "epoch": 1.79229736328125e-05, + "step": 11746, + "training_step_time": 0.13059186935424805 + }, + { + "epoch": 1.792449951171875e-05, + "model_forward_time": 0.024973154067993164, + "step": 11747 + }, + { + "epoch": 1.792449951171875e-05, + "step": 11747, + "training_step_time": 0.10904932022094727 + }, + { + "epoch": 1.7926025390625e-05, + "model_forward_time": 0.02531147003173828, + "step": 11748 + }, + { + "epoch": 1.7926025390625e-05, + "step": 11748, + "training_step_time": 0.10708236694335938 + }, + { + "epoch": 1.792755126953125e-05, + "model_forward_time": 0.025348186492919922, + "step": 11749 + }, + { + "epoch": 1.792755126953125e-05, + "step": 11749, + "training_step_time": 0.11211419105529785 + }, + { + "epoch": 1.79290771484375e-05, + "grad_norm": 0.20920109748840332, + "learning_rate": 7.133882177482019e-05, + "loss": 0.0259, + "step": 11750 + }, + { + "epoch": 1.79290771484375e-05, + "model_forward_time": 0.02493119239807129, + "step": 11750 + }, + { + "epoch": 1.79290771484375e-05, + "step": 11750, + "training_step_time": 0.10653901100158691 + }, + { + "epoch": 1.793060302734375e-05, + "model_forward_time": 0.026560544967651367, + "step": 11751 + }, + { + "epoch": 1.793060302734375e-05, + "step": 11751, + "training_step_time": 0.10820913314819336 + }, + { + "epoch": 1.793212890625e-05, + "model_forward_time": 0.025300025939941406, + "step": 11752 + }, + { + "epoch": 1.793212890625e-05, + "step": 11752, + "training_step_time": 0.10830354690551758 + }, + { + "epoch": 1.793365478515625e-05, + "model_forward_time": 0.025789976119995117, + "step": 11753 + }, + { + "epoch": 1.793365478515625e-05, + "step": 11753, + "training_step_time": 0.11249709129333496 + }, + { + "epoch": 1.79351806640625e-05, + "model_forward_time": 0.02571702003479004, + "step": 11754 + }, + { + "epoch": 1.79351806640625e-05, + "step": 11754, + "training_step_time": 0.10808610916137695 + }, + { + "epoch": 1.793670654296875e-05, + "model_forward_time": 0.025809526443481445, + "step": 11755 + }, + { + "epoch": 1.793670654296875e-05, + "step": 11755, + "training_step_time": 0.10536909103393555 + }, + { + "epoch": 1.7938232421875e-05, + "model_forward_time": 0.024606704711914062, + "step": 11756 + }, + { + "epoch": 1.7938232421875e-05, + "step": 11756, + "training_step_time": 0.14014959335327148 + }, + { + "epoch": 1.793975830078125e-05, + "model_forward_time": 0.025243520736694336, + "step": 11757 + }, + { + "epoch": 1.793975830078125e-05, + "step": 11757, + "training_step_time": 0.11704897880554199 + }, + { + "epoch": 1.79412841796875e-05, + "model_forward_time": 0.025142431259155273, + "step": 11758 + }, + { + "epoch": 1.79412841796875e-05, + "step": 11758, + "training_step_time": 0.19900965690612793 + }, + { + "epoch": 1.794281005859375e-05, + "model_forward_time": 0.02420520782470703, + "step": 11759 + }, + { + "epoch": 1.794281005859375e-05, + "step": 11759, + "training_step_time": 0.13672661781311035 + }, + { + "epoch": 1.79443359375e-05, + "grad_norm": 0.30930909514427185, + "learning_rate": 7.128896457825364e-05, + "loss": 0.0168, + "step": 11760 + }, + { + "epoch": 1.79443359375e-05, + "model_forward_time": 0.02392745018005371, + "step": 11760 + }, + { + "epoch": 1.79443359375e-05, + "step": 11760, + "training_step_time": 0.19931745529174805 + }, + { + "epoch": 1.794586181640625e-05, + "model_forward_time": 0.024522066116333008, + "step": 11761 + }, + { + "epoch": 1.794586181640625e-05, + "step": 11761, + "training_step_time": 0.18280506134033203 + }, + { + "epoch": 1.79473876953125e-05, + "model_forward_time": 0.023884057998657227, + "step": 11762 + }, + { + "epoch": 1.79473876953125e-05, + "step": 11762, + "training_step_time": 0.11298418045043945 + }, + { + "epoch": 1.794891357421875e-05, + "model_forward_time": 0.024363994598388672, + "step": 11763 + }, + { + "epoch": 1.794891357421875e-05, + "step": 11763, + "training_step_time": 0.11811637878417969 + }, + { + "epoch": 1.7950439453125e-05, + "model_forward_time": 0.025239944458007812, + "step": 11764 + }, + { + "epoch": 1.7950439453125e-05, + "step": 11764, + "training_step_time": 0.11014318466186523 + }, + { + "epoch": 1.795196533203125e-05, + "model_forward_time": 0.025240182876586914, + "step": 11765 + }, + { + "epoch": 1.795196533203125e-05, + "step": 11765, + "training_step_time": 0.19793152809143066 + }, + { + "epoch": 1.79534912109375e-05, + "model_forward_time": 0.024227619171142578, + "step": 11766 + }, + { + "epoch": 1.79534912109375e-05, + "step": 11766, + "training_step_time": 0.1875934600830078 + }, + { + "epoch": 1.795501708984375e-05, + "model_forward_time": 0.024090290069580078, + "step": 11767 + }, + { + "epoch": 1.795501708984375e-05, + "step": 11767, + "training_step_time": 0.14180922508239746 + }, + { + "epoch": 1.795654296875e-05, + "model_forward_time": 0.024306297302246094, + "step": 11768 + }, + { + "epoch": 1.795654296875e-05, + "step": 11768, + "training_step_time": 0.1201925277709961 + }, + { + "epoch": 1.795806884765625e-05, + "model_forward_time": 0.024745941162109375, + "step": 11769 + }, + { + "epoch": 1.795806884765625e-05, + "step": 11769, + "training_step_time": 0.10758209228515625 + }, + { + "epoch": 1.79595947265625e-05, + "grad_norm": 0.3645468056201935, + "learning_rate": 7.12390815135877e-05, + "loss": 0.0218, + "step": 11770 + }, + { + "epoch": 1.79595947265625e-05, + "model_forward_time": 0.025122404098510742, + "step": 11770 + }, + { + "epoch": 1.79595947265625e-05, + "step": 11770, + "training_step_time": 0.11304521560668945 + }, + { + "epoch": 1.796112060546875e-05, + "model_forward_time": 0.025554656982421875, + "step": 11771 + }, + { + "epoch": 1.796112060546875e-05, + "step": 11771, + "training_step_time": 0.11222124099731445 + }, + { + "epoch": 1.7962646484375e-05, + "model_forward_time": 0.02506566047668457, + "step": 11772 + }, + { + "epoch": 1.7962646484375e-05, + "step": 11772, + "training_step_time": 0.10831093788146973 + }, + { + "epoch": 1.796417236328125e-05, + "model_forward_time": 0.024943828582763672, + "step": 11773 + }, + { + "epoch": 1.796417236328125e-05, + "step": 11773, + "training_step_time": 0.10556483268737793 + }, + { + "epoch": 1.79656982421875e-05, + "model_forward_time": 0.024845361709594727, + "step": 11774 + }, + { + "epoch": 1.79656982421875e-05, + "step": 11774, + "training_step_time": 0.10753655433654785 + }, + { + "epoch": 1.796722412109375e-05, + "model_forward_time": 0.025279760360717773, + "step": 11775 + }, + { + "epoch": 1.796722412109375e-05, + "step": 11775, + "training_step_time": 0.11156797409057617 + }, + { + "epoch": 1.796875e-05, + "model_forward_time": 0.02526545524597168, + "step": 11776 + }, + { + "epoch": 1.796875e-05, + "step": 11776, + "training_step_time": 0.18631601333618164 + }, + { + "epoch": 1.797027587890625e-05, + "model_forward_time": 0.02446126937866211, + "step": 11777 + }, + { + "epoch": 1.797027587890625e-05, + "step": 11777, + "training_step_time": 0.20564889907836914 + }, + { + "epoch": 1.79718017578125e-05, + "model_forward_time": 0.024088621139526367, + "step": 11778 + }, + { + "epoch": 1.79718017578125e-05, + "step": 11778, + "training_step_time": 0.1980876922607422 + }, + { + "epoch": 1.797332763671875e-05, + "model_forward_time": 0.024466514587402344, + "step": 11779 + }, + { + "epoch": 1.797332763671875e-05, + "step": 11779, + "training_step_time": 0.19675660133361816 + }, + { + "epoch": 1.7974853515625e-05, + "grad_norm": 0.5852621793746948, + "learning_rate": 7.118917264143501e-05, + "loss": 0.0177, + "step": 11780 + }, + { + "epoch": 1.7974853515625e-05, + "model_forward_time": 0.0243985652923584, + "step": 11780 + }, + { + "epoch": 1.7974853515625e-05, + "step": 11780, + "training_step_time": 0.18271350860595703 + }, + { + "epoch": 1.797637939453125e-05, + "model_forward_time": 0.024523496627807617, + "step": 11781 + }, + { + "epoch": 1.797637939453125e-05, + "step": 11781, + "training_step_time": 0.10518002510070801 + }, + { + "epoch": 1.79779052734375e-05, + "model_forward_time": 0.025251150131225586, + "step": 11782 + }, + { + "epoch": 1.79779052734375e-05, + "step": 11782, + "training_step_time": 0.1033945083618164 + }, + { + "epoch": 1.797943115234375e-05, + "model_forward_time": 0.025688648223876953, + "step": 11783 + }, + { + "epoch": 1.797943115234375e-05, + "step": 11783, + "training_step_time": 0.10590195655822754 + }, + { + "epoch": 1.798095703125e-05, + "model_forward_time": 0.025027036666870117, + "step": 11784 + }, + { + "epoch": 1.798095703125e-05, + "step": 11784, + "training_step_time": 0.20040297508239746 + }, + { + "epoch": 1.798248291015625e-05, + "model_forward_time": 0.024587154388427734, + "step": 11785 + }, + { + "epoch": 1.798248291015625e-05, + "step": 11785, + "training_step_time": 0.11007285118103027 + }, + { + "epoch": 1.79840087890625e-05, + "model_forward_time": 0.024486064910888672, + "step": 11786 + }, + { + "epoch": 1.79840087890625e-05, + "step": 11786, + "training_step_time": 0.10959553718566895 + }, + { + "epoch": 1.798553466796875e-05, + "model_forward_time": 0.024246692657470703, + "step": 11787 + }, + { + "epoch": 1.798553466796875e-05, + "step": 11787, + "training_step_time": 0.18020939826965332 + }, + { + "epoch": 1.7987060546875e-05, + "model_forward_time": 0.02375340461730957, + "step": 11788 + }, + { + "epoch": 1.7987060546875e-05, + "step": 11788, + "training_step_time": 0.23661470413208008 + }, + { + "epoch": 1.798858642578125e-05, + "model_forward_time": 0.024754047393798828, + "step": 11789 + }, + { + "epoch": 1.798858642578125e-05, + "step": 11789, + "training_step_time": 0.2100536823272705 + }, + { + "epoch": 1.79901123046875e-05, + "grad_norm": 0.321106880903244, + "learning_rate": 7.113923802243957e-05, + "loss": 0.0186, + "step": 11790 + }, + { + "epoch": 1.79901123046875e-05, + "model_forward_time": 0.0238034725189209, + "step": 11790 + }, + { + "epoch": 1.79901123046875e-05, + "step": 11790, + "training_step_time": 0.17432641983032227 + }, + { + "epoch": 1.799163818359375e-05, + "model_forward_time": 0.02335953712463379, + "step": 11791 + }, + { + "epoch": 1.799163818359375e-05, + "step": 11791, + "training_step_time": 0.16030454635620117 + }, + { + "epoch": 1.79931640625e-05, + "model_forward_time": 0.02325129508972168, + "step": 11792 + }, + { + "epoch": 1.79931640625e-05, + "step": 11792, + "training_step_time": 0.14293956756591797 + }, + { + "epoch": 1.799468994140625e-05, + "model_forward_time": 0.02725958824157715, + "step": 11793 + }, + { + "epoch": 1.799468994140625e-05, + "step": 11793, + "training_step_time": 0.11002135276794434 + }, + { + "epoch": 1.79962158203125e-05, + "model_forward_time": 0.025141239166259766, + "step": 11794 + }, + { + "epoch": 1.79962158203125e-05, + "step": 11794, + "training_step_time": 0.10535001754760742 + }, + { + "epoch": 1.799774169921875e-05, + "model_forward_time": 0.024230480194091797, + "step": 11795 + }, + { + "epoch": 1.799774169921875e-05, + "step": 11795, + "training_step_time": 0.14729523658752441 + }, + { + "epoch": 1.7999267578125e-05, + "model_forward_time": 0.02480792999267578, + "step": 11796 + }, + { + "epoch": 1.7999267578125e-05, + "step": 11796, + "training_step_time": 0.1072535514831543 + }, + { + "epoch": 1.800079345703125e-05, + "model_forward_time": 0.024956941604614258, + "step": 11797 + }, + { + "epoch": 1.800079345703125e-05, + "step": 11797, + "training_step_time": 0.10697817802429199 + }, + { + "epoch": 1.80023193359375e-05, + "model_forward_time": 0.0248563289642334, + "step": 11798 + }, + { + "epoch": 1.80023193359375e-05, + "step": 11798, + "training_step_time": 0.10707211494445801 + }, + { + "epoch": 1.800384521484375e-05, + "model_forward_time": 0.026293277740478516, + "step": 11799 + }, + { + "epoch": 1.800384521484375e-05, + "step": 11799, + "training_step_time": 0.12851214408874512 + }, + { + "epoch": 1.800537109375e-05, + "grad_norm": 0.3286162316799164, + "learning_rate": 7.108927771727661e-05, + "loss": 0.0198, + "step": 11800 + }, + { + "epoch": 1.800537109375e-05, + "model_forward_time": 0.02498602867126465, + "step": 11800 + }, + { + "epoch": 1.800537109375e-05, + "step": 11800, + "training_step_time": 0.21477150917053223 + }, + { + "epoch": 1.800689697265625e-05, + "model_forward_time": 0.024430274963378906, + "step": 11801 + }, + { + "epoch": 1.800689697265625e-05, + "step": 11801, + "training_step_time": 0.10818862915039062 + }, + { + "epoch": 1.80084228515625e-05, + "model_forward_time": 0.024498462677001953, + "step": 11802 + }, + { + "epoch": 1.80084228515625e-05, + "step": 11802, + "training_step_time": 0.11890983581542969 + }, + { + "epoch": 1.800994873046875e-05, + "model_forward_time": 0.025299072265625, + "step": 11803 + }, + { + "epoch": 1.800994873046875e-05, + "step": 11803, + "training_step_time": 0.11484479904174805 + }, + { + "epoch": 1.8011474609375e-05, + "model_forward_time": 0.02626514434814453, + "step": 11804 + }, + { + "epoch": 1.8011474609375e-05, + "step": 11804, + "training_step_time": 0.1143801212310791 + }, + { + "epoch": 1.801300048828125e-05, + "model_forward_time": 0.02494049072265625, + "step": 11805 + }, + { + "epoch": 1.801300048828125e-05, + "step": 11805, + "training_step_time": 0.19628286361694336 + }, + { + "epoch": 1.80145263671875e-05, + "model_forward_time": 0.02511119842529297, + "step": 11806 + }, + { + "epoch": 1.80145263671875e-05, + "step": 11806, + "training_step_time": 0.1489872932434082 + }, + { + "epoch": 1.801605224609375e-05, + "model_forward_time": 0.025616168975830078, + "step": 11807 + }, + { + "epoch": 1.801605224609375e-05, + "step": 11807, + "training_step_time": 0.14116692543029785 + }, + { + "epoch": 1.8017578125e-05, + "model_forward_time": 0.025133132934570312, + "step": 11808 + }, + { + "epoch": 1.8017578125e-05, + "step": 11808, + "training_step_time": 0.1482248306274414 + }, + { + "epoch": 1.801910400390625e-05, + "model_forward_time": 0.026373624801635742, + "step": 11809 + }, + { + "epoch": 1.801910400390625e-05, + "step": 11809, + "training_step_time": 0.15588116645812988 + }, + { + "epoch": 1.80206298828125e-05, + "grad_norm": 0.43684709072113037, + "learning_rate": 7.103929178665266e-05, + "loss": 0.0305, + "step": 11810 + }, + { + "epoch": 1.80206298828125e-05, + "model_forward_time": 0.024808883666992188, + "step": 11810 + }, + { + "epoch": 1.80206298828125e-05, + "step": 11810, + "training_step_time": 0.22199082374572754 + }, + { + "epoch": 1.802215576171875e-05, + "model_forward_time": 0.02487468719482422, + "step": 11811 + }, + { + "epoch": 1.802215576171875e-05, + "step": 11811, + "training_step_time": 0.12003731727600098 + }, + { + "epoch": 1.8023681640625e-05, + "model_forward_time": 0.02356243133544922, + "step": 11812 + }, + { + "epoch": 1.8023681640625e-05, + "step": 11812, + "training_step_time": 0.11510682106018066 + }, + { + "epoch": 1.802520751953125e-05, + "model_forward_time": 0.02469921112060547, + "step": 11813 + }, + { + "epoch": 1.802520751953125e-05, + "step": 11813, + "training_step_time": 0.11623358726501465 + }, + { + "epoch": 1.80267333984375e-05, + "model_forward_time": 0.025042295455932617, + "step": 11814 + }, + { + "epoch": 1.80267333984375e-05, + "step": 11814, + "training_step_time": 0.11191964149475098 + }, + { + "epoch": 1.802825927734375e-05, + "model_forward_time": 0.0251467227935791, + "step": 11815 + }, + { + "epoch": 1.802825927734375e-05, + "step": 11815, + "training_step_time": 0.10897064208984375 + }, + { + "epoch": 1.802978515625e-05, + "model_forward_time": 0.025455236434936523, + "step": 11816 + }, + { + "epoch": 1.802978515625e-05, + "step": 11816, + "training_step_time": 0.11017823219299316 + }, + { + "epoch": 1.803131103515625e-05, + "model_forward_time": 0.02565288543701172, + "step": 11817 + }, + { + "epoch": 1.803131103515625e-05, + "step": 11817, + "training_step_time": 0.10978913307189941 + }, + { + "epoch": 1.80328369140625e-05, + "model_forward_time": 0.025326251983642578, + "step": 11818 + }, + { + "epoch": 1.80328369140625e-05, + "step": 11818, + "training_step_time": 0.10839009284973145 + }, + { + "epoch": 1.803436279296875e-05, + "model_forward_time": 0.02567315101623535, + "step": 11819 + }, + { + "epoch": 1.803436279296875e-05, + "step": 11819, + "training_step_time": 0.1093289852142334 + }, + { + "epoch": 1.8035888671875e-05, + "grad_norm": 0.3068019449710846, + "learning_rate": 7.09892802913053e-05, + "loss": 0.0223, + "step": 11820 + }, + { + "epoch": 1.8035888671875e-05, + "model_forward_time": 0.025632143020629883, + "step": 11820 + }, + { + "epoch": 1.8035888671875e-05, + "step": 11820, + "training_step_time": 0.10836362838745117 + }, + { + "epoch": 1.803741455078125e-05, + "model_forward_time": 0.025618791580200195, + "step": 11821 + }, + { + "epoch": 1.803741455078125e-05, + "step": 11821, + "training_step_time": 0.11076545715332031 + }, + { + "epoch": 1.80389404296875e-05, + "model_forward_time": 0.025796890258789062, + "step": 11822 + }, + { + "epoch": 1.80389404296875e-05, + "step": 11822, + "training_step_time": 0.10805392265319824 + }, + { + "epoch": 1.804046630859375e-05, + "model_forward_time": 0.025356531143188477, + "step": 11823 + }, + { + "epoch": 1.804046630859375e-05, + "step": 11823, + "training_step_time": 0.10704660415649414 + }, + { + "epoch": 1.80419921875e-05, + "model_forward_time": 0.025439977645874023, + "step": 11824 + }, + { + "epoch": 1.80419921875e-05, + "step": 11824, + "training_step_time": 0.10770082473754883 + }, + { + "epoch": 1.804351806640625e-05, + "model_forward_time": 0.025523900985717773, + "step": 11825 + }, + { + "epoch": 1.804351806640625e-05, + "step": 11825, + "training_step_time": 0.10774683952331543 + }, + { + "epoch": 1.80450439453125e-05, + "model_forward_time": 0.025355100631713867, + "step": 11826 + }, + { + "epoch": 1.80450439453125e-05, + "step": 11826, + "training_step_time": 0.10748481750488281 + }, + { + "epoch": 1.804656982421875e-05, + "model_forward_time": 0.025360822677612305, + "step": 11827 + }, + { + "epoch": 1.804656982421875e-05, + "step": 11827, + "training_step_time": 0.16775155067443848 + }, + { + "epoch": 1.8048095703125e-05, + "model_forward_time": 0.02608013153076172, + "step": 11828 + }, + { + "epoch": 1.8048095703125e-05, + "step": 11828, + "training_step_time": 0.11002421379089355 + }, + { + "epoch": 1.804962158203125e-05, + "model_forward_time": 0.024909257888793945, + "step": 11829 + }, + { + "epoch": 1.804962158203125e-05, + "step": 11829, + "training_step_time": 0.11364579200744629 + }, + { + "epoch": 1.80511474609375e-05, + "grad_norm": 0.5004920959472656, + "learning_rate": 7.093924329200321e-05, + "loss": 0.0354, + "step": 11830 + }, + { + "epoch": 1.80511474609375e-05, + "model_forward_time": 0.02544713020324707, + "step": 11830 + }, + { + "epoch": 1.80511474609375e-05, + "step": 11830, + "training_step_time": 0.12013840675354004 + }, + { + "epoch": 1.805267333984375e-05, + "model_forward_time": 0.025325775146484375, + "step": 11831 + }, + { + "epoch": 1.805267333984375e-05, + "step": 11831, + "training_step_time": 0.12206459045410156 + }, + { + "epoch": 1.805419921875e-05, + "model_forward_time": 0.025445222854614258, + "step": 11832 + }, + { + "epoch": 1.805419921875e-05, + "step": 11832, + "training_step_time": 0.12179994583129883 + }, + { + "epoch": 1.805572509765625e-05, + "model_forward_time": 0.025418519973754883, + "step": 11833 + }, + { + "epoch": 1.805572509765625e-05, + "step": 11833, + "training_step_time": 0.13859272003173828 + }, + { + "epoch": 1.80572509765625e-05, + "model_forward_time": 0.025030136108398438, + "step": 11834 + }, + { + "epoch": 1.80572509765625e-05, + "step": 11834, + "training_step_time": 0.11196327209472656 + }, + { + "epoch": 1.805877685546875e-05, + "model_forward_time": 0.025281667709350586, + "step": 11835 + }, + { + "epoch": 1.805877685546875e-05, + "step": 11835, + "training_step_time": 0.10619878768920898 + }, + { + "epoch": 1.8060302734375e-05, + "model_forward_time": 0.0253908634185791, + "step": 11836 + }, + { + "epoch": 1.8060302734375e-05, + "step": 11836, + "training_step_time": 0.10717105865478516 + }, + { + "epoch": 1.806182861328125e-05, + "model_forward_time": 0.025280475616455078, + "step": 11837 + }, + { + "epoch": 1.806182861328125e-05, + "step": 11837, + "training_step_time": 0.11041641235351562 + }, + { + "epoch": 1.80633544921875e-05, + "model_forward_time": 0.025144338607788086, + "step": 11838 + }, + { + "epoch": 1.80633544921875e-05, + "step": 11838, + "training_step_time": 0.10631942749023438 + }, + { + "epoch": 1.806488037109375e-05, + "model_forward_time": 0.025485992431640625, + "step": 11839 + }, + { + "epoch": 1.806488037109375e-05, + "step": 11839, + "training_step_time": 0.10799312591552734 + }, + { + "epoch": 1.806640625e-05, + "grad_norm": 0.4685319662094116, + "learning_rate": 7.08891808495461e-05, + "loss": 0.0223, + "step": 11840 + }, + { + "epoch": 1.806640625e-05, + "model_forward_time": 0.026576757431030273, + "step": 11840 + }, + { + "epoch": 1.806640625e-05, + "step": 11840, + "training_step_time": 0.10597634315490723 + }, + { + "epoch": 1.806793212890625e-05, + "model_forward_time": 0.02459096908569336, + "step": 11841 + }, + { + "epoch": 1.806793212890625e-05, + "step": 11841, + "training_step_time": 0.15087127685546875 + }, + { + "epoch": 1.80694580078125e-05, + "model_forward_time": 0.024808883666992188, + "step": 11842 + }, + { + "epoch": 1.80694580078125e-05, + "step": 11842, + "training_step_time": 0.16709494590759277 + }, + { + "epoch": 1.807098388671875e-05, + "model_forward_time": 0.02557682991027832, + "step": 11843 + }, + { + "epoch": 1.807098388671875e-05, + "step": 11843, + "training_step_time": 0.10812878608703613 + }, + { + "epoch": 1.8072509765625e-05, + "model_forward_time": 0.028900623321533203, + "step": 11844 + }, + { + "epoch": 1.8072509765625e-05, + "step": 11844, + "training_step_time": 0.1749575138092041 + }, + { + "epoch": 1.807403564453125e-05, + "model_forward_time": 0.02477431297302246, + "step": 11845 + }, + { + "epoch": 1.807403564453125e-05, + "step": 11845, + "training_step_time": 0.15839695930480957 + }, + { + "epoch": 1.80755615234375e-05, + "model_forward_time": 0.024523496627807617, + "step": 11846 + }, + { + "epoch": 1.80755615234375e-05, + "step": 11846, + "training_step_time": 0.20357394218444824 + }, + { + "epoch": 1.807708740234375e-05, + "model_forward_time": 0.02464580535888672, + "step": 11847 + }, + { + "epoch": 1.807708740234375e-05, + "step": 11847, + "training_step_time": 0.11320328712463379 + }, + { + "epoch": 1.807861328125e-05, + "model_forward_time": 0.024836063385009766, + "step": 11848 + }, + { + "epoch": 1.807861328125e-05, + "step": 11848, + "training_step_time": 0.10975027084350586 + }, + { + "epoch": 1.808013916015625e-05, + "model_forward_time": 0.025541067123413086, + "step": 11849 + }, + { + "epoch": 1.808013916015625e-05, + "step": 11849, + "training_step_time": 0.1591174602508545 + }, + { + "epoch": 1.80816650390625e-05, + "grad_norm": 0.20132340490818024, + "learning_rate": 7.083909302476453e-05, + "loss": 0.0198, + "step": 11850 + }, + { + "epoch": 1.80816650390625e-05, + "model_forward_time": 0.024764537811279297, + "step": 11850 + }, + { + "epoch": 1.80816650390625e-05, + "step": 11850, + "training_step_time": 0.1755080223083496 + }, + { + "epoch": 1.808319091796875e-05, + "model_forward_time": 0.025552749633789062, + "step": 11851 + }, + { + "epoch": 1.808319091796875e-05, + "step": 11851, + "training_step_time": 0.12903404235839844 + }, + { + "epoch": 1.8084716796875e-05, + "model_forward_time": 0.027469873428344727, + "step": 11852 + }, + { + "epoch": 1.8084716796875e-05, + "step": 11852, + "training_step_time": 0.10930442810058594 + }, + { + "epoch": 1.808624267578125e-05, + "model_forward_time": 0.025430679321289062, + "step": 11853 + }, + { + "epoch": 1.808624267578125e-05, + "step": 11853, + "training_step_time": 0.11730742454528809 + }, + { + "epoch": 1.80877685546875e-05, + "model_forward_time": 0.02575516700744629, + "step": 11854 + }, + { + "epoch": 1.80877685546875e-05, + "step": 11854, + "training_step_time": 0.10805583000183105 + }, + { + "epoch": 1.808929443359375e-05, + "model_forward_time": 0.02515864372253418, + "step": 11855 + }, + { + "epoch": 1.808929443359375e-05, + "step": 11855, + "training_step_time": 0.11034750938415527 + }, + { + "epoch": 1.80908203125e-05, + "model_forward_time": 0.0248870849609375, + "step": 11856 + }, + { + "epoch": 1.80908203125e-05, + "step": 11856, + "training_step_time": 0.10940027236938477 + }, + { + "epoch": 1.809234619140625e-05, + "model_forward_time": 0.02563166618347168, + "step": 11857 + }, + { + "epoch": 1.809234619140625e-05, + "step": 11857, + "training_step_time": 0.11179566383361816 + }, + { + "epoch": 1.80938720703125e-05, + "model_forward_time": 0.027478456497192383, + "step": 11858 + }, + { + "epoch": 1.80938720703125e-05, + "step": 11858, + "training_step_time": 0.10853338241577148 + }, + { + "epoch": 1.809539794921875e-05, + "model_forward_time": 0.02540898323059082, + "step": 11859 + }, + { + "epoch": 1.809539794921875e-05, + "step": 11859, + "training_step_time": 0.11035943031311035 + }, + { + "epoch": 1.8096923828125e-05, + "grad_norm": 0.2759787142276764, + "learning_rate": 7.078897987851993e-05, + "loss": 0.0268, + "step": 11860 + }, + { + "epoch": 1.8096923828125e-05, + "model_forward_time": 0.025503158569335938, + "step": 11860 + }, + { + "epoch": 1.8096923828125e-05, + "step": 11860, + "training_step_time": 0.11227083206176758 + }, + { + "epoch": 1.809844970703125e-05, + "model_forward_time": 0.02586817741394043, + "step": 11861 + }, + { + "epoch": 1.809844970703125e-05, + "step": 11861, + "training_step_time": 0.12222790718078613 + }, + { + "epoch": 1.80999755859375e-05, + "model_forward_time": 0.025417089462280273, + "step": 11862 + }, + { + "epoch": 1.80999755859375e-05, + "step": 11862, + "training_step_time": 0.12229728698730469 + }, + { + "epoch": 1.810150146484375e-05, + "model_forward_time": 0.025314807891845703, + "step": 11863 + }, + { + "epoch": 1.810150146484375e-05, + "step": 11863, + "training_step_time": 0.11486244201660156 + }, + { + "epoch": 1.810302734375e-05, + "model_forward_time": 0.02563762664794922, + "step": 11864 + }, + { + "epoch": 1.810302734375e-05, + "step": 11864, + "training_step_time": 0.11915469169616699 + }, + { + "epoch": 1.810455322265625e-05, + "model_forward_time": 0.02538013458251953, + "step": 11865 + }, + { + "epoch": 1.810455322265625e-05, + "step": 11865, + "training_step_time": 0.11410689353942871 + }, + { + "epoch": 1.81060791015625e-05, + "model_forward_time": 0.025915861129760742, + "step": 11866 + }, + { + "epoch": 1.81060791015625e-05, + "step": 11866, + "training_step_time": 0.10910224914550781 + }, + { + "epoch": 1.810760498046875e-05, + "model_forward_time": 0.02537369728088379, + "step": 11867 + }, + { + "epoch": 1.810760498046875e-05, + "step": 11867, + "training_step_time": 0.1100761890411377 + }, + { + "epoch": 1.8109130859375e-05, + "model_forward_time": 0.02442002296447754, + "step": 11868 + }, + { + "epoch": 1.8109130859375e-05, + "step": 11868, + "training_step_time": 0.10900139808654785 + }, + { + "epoch": 1.811065673828125e-05, + "model_forward_time": 0.024627685546875, + "step": 11869 + }, + { + "epoch": 1.811065673828125e-05, + "step": 11869, + "training_step_time": 0.10837507247924805 + }, + { + "epoch": 1.81121826171875e-05, + "grad_norm": 0.40154775977134705, + "learning_rate": 7.073884147170452e-05, + "loss": 0.02, + "step": 11870 + }, + { + "epoch": 1.81121826171875e-05, + "model_forward_time": 0.025444507598876953, + "step": 11870 + }, + { + "epoch": 1.81121826171875e-05, + "step": 11870, + "training_step_time": 0.10881567001342773 + }, + { + "epoch": 1.811370849609375e-05, + "model_forward_time": 0.025404691696166992, + "step": 11871 + }, + { + "epoch": 1.811370849609375e-05, + "step": 11871, + "training_step_time": 0.10889887809753418 + }, + { + "epoch": 1.8115234375e-05, + "model_forward_time": 0.025213003158569336, + "step": 11872 + }, + { + "epoch": 1.8115234375e-05, + "step": 11872, + "training_step_time": 0.1129615306854248 + }, + { + "epoch": 1.811676025390625e-05, + "model_forward_time": 0.025122404098510742, + "step": 11873 + }, + { + "epoch": 1.811676025390625e-05, + "step": 11873, + "training_step_time": 0.163055419921875 + }, + { + "epoch": 1.81182861328125e-05, + "model_forward_time": 0.025009632110595703, + "step": 11874 + }, + { + "epoch": 1.81182861328125e-05, + "step": 11874, + "training_step_time": 0.11141037940979004 + }, + { + "epoch": 1.811981201171875e-05, + "model_forward_time": 0.02526235580444336, + "step": 11875 + }, + { + "epoch": 1.811981201171875e-05, + "step": 11875, + "training_step_time": 0.11639595031738281 + }, + { + "epoch": 1.8121337890625e-05, + "model_forward_time": 0.025880098342895508, + "step": 11876 + }, + { + "epoch": 1.8121337890625e-05, + "step": 11876, + "training_step_time": 0.11485481262207031 + }, + { + "epoch": 1.812286376953125e-05, + "model_forward_time": 0.02543354034423828, + "step": 11877 + }, + { + "epoch": 1.812286376953125e-05, + "step": 11877, + "training_step_time": 0.1216130256652832 + }, + { + "epoch": 1.81243896484375e-05, + "model_forward_time": 0.025363683700561523, + "step": 11878 + }, + { + "epoch": 1.81243896484375e-05, + "step": 11878, + "training_step_time": 0.11167597770690918 + }, + { + "epoch": 1.812591552734375e-05, + "model_forward_time": 0.02561020851135254, + "step": 11879 + }, + { + "epoch": 1.812591552734375e-05, + "step": 11879, + "training_step_time": 0.12296438217163086 + }, + { + "epoch": 1.812744140625e-05, + "grad_norm": 0.18449322879314423, + "learning_rate": 7.068867786524116e-05, + "loss": 0.032, + "step": 11880 + }, + { + "epoch": 1.812744140625e-05, + "model_forward_time": 0.025257587432861328, + "step": 11880 + }, + { + "epoch": 1.812744140625e-05, + "step": 11880, + "training_step_time": 0.11114859580993652 + }, + { + "epoch": 1.812896728515625e-05, + "model_forward_time": 0.02570056915283203, + "step": 11881 + }, + { + "epoch": 1.812896728515625e-05, + "step": 11881, + "training_step_time": 0.10738897323608398 + }, + { + "epoch": 1.81304931640625e-05, + "model_forward_time": 0.025901317596435547, + "step": 11882 + }, + { + "epoch": 1.81304931640625e-05, + "step": 11882, + "training_step_time": 0.10774636268615723 + }, + { + "epoch": 1.813201904296875e-05, + "model_forward_time": 0.025601625442504883, + "step": 11883 + }, + { + "epoch": 1.813201904296875e-05, + "step": 11883, + "training_step_time": 0.11485099792480469 + }, + { + "epoch": 1.8133544921875e-05, + "model_forward_time": 0.025133371353149414, + "step": 11884 + }, + { + "epoch": 1.8133544921875e-05, + "step": 11884, + "training_step_time": 0.10707569122314453 + }, + { + "epoch": 1.813507080078125e-05, + "model_forward_time": 0.025577068328857422, + "step": 11885 + }, + { + "epoch": 1.813507080078125e-05, + "step": 11885, + "training_step_time": 0.1050560474395752 + }, + { + "epoch": 1.81365966796875e-05, + "model_forward_time": 0.025252103805541992, + "step": 11886 + }, + { + "epoch": 1.81365966796875e-05, + "step": 11886, + "training_step_time": 0.12417411804199219 + }, + { + "epoch": 1.813812255859375e-05, + "model_forward_time": 0.02517557144165039, + "step": 11887 + }, + { + "epoch": 1.813812255859375e-05, + "step": 11887, + "training_step_time": 0.1231238842010498 + }, + { + "epoch": 1.81396484375e-05, + "model_forward_time": 0.025306224822998047, + "step": 11888 + }, + { + "epoch": 1.81396484375e-05, + "step": 11888, + "training_step_time": 0.10798764228820801 + }, + { + "epoch": 1.814117431640625e-05, + "model_forward_time": 0.02523326873779297, + "step": 11889 + }, + { + "epoch": 1.814117431640625e-05, + "step": 11889, + "training_step_time": 0.11379218101501465 + }, + { + "epoch": 1.81427001953125e-05, + "grad_norm": 0.43687736988067627, + "learning_rate": 7.06384891200834e-05, + "loss": 0.0194, + "step": 11890 + }, + { + "epoch": 1.81427001953125e-05, + "model_forward_time": 0.02571249008178711, + "step": 11890 + }, + { + "epoch": 1.81427001953125e-05, + "step": 11890, + "training_step_time": 0.12089157104492188 + }, + { + "epoch": 1.814422607421875e-05, + "model_forward_time": 0.025255918502807617, + "step": 11891 + }, + { + "epoch": 1.814422607421875e-05, + "step": 11891, + "training_step_time": 0.12742257118225098 + }, + { + "epoch": 1.8145751953125e-05, + "model_forward_time": 0.025601625442504883, + "step": 11892 + }, + { + "epoch": 1.8145751953125e-05, + "step": 11892, + "training_step_time": 0.11220884323120117 + }, + { + "epoch": 1.814727783203125e-05, + "model_forward_time": 0.02551126480102539, + "step": 11893 + }, + { + "epoch": 1.814727783203125e-05, + "step": 11893, + "training_step_time": 0.11354947090148926 + }, + { + "epoch": 1.81488037109375e-05, + "model_forward_time": 0.02521657943725586, + "step": 11894 + }, + { + "epoch": 1.81488037109375e-05, + "step": 11894, + "training_step_time": 0.11655473709106445 + }, + { + "epoch": 1.815032958984375e-05, + "model_forward_time": 0.025279521942138672, + "step": 11895 + }, + { + "epoch": 1.815032958984375e-05, + "step": 11895, + "training_step_time": 0.10974621772766113 + }, + { + "epoch": 1.815185546875e-05, + "model_forward_time": 0.025346994400024414, + "step": 11896 + }, + { + "epoch": 1.815185546875e-05, + "step": 11896, + "training_step_time": 0.19293427467346191 + }, + { + "epoch": 1.815338134765625e-05, + "model_forward_time": 0.024799108505249023, + "step": 11897 + }, + { + "epoch": 1.815338134765625e-05, + "step": 11897, + "training_step_time": 0.19765734672546387 + }, + { + "epoch": 1.81549072265625e-05, + "model_forward_time": 0.02512335777282715, + "step": 11898 + }, + { + "epoch": 1.81549072265625e-05, + "step": 11898, + "training_step_time": 0.12115073204040527 + }, + { + "epoch": 1.815643310546875e-05, + "model_forward_time": 0.02482891082763672, + "step": 11899 + }, + { + "epoch": 1.815643310546875e-05, + "step": 11899, + "training_step_time": 0.13337302207946777 + }, + { + "epoch": 1.8157958984375e-05, + "grad_norm": 0.2076655775308609, + "learning_rate": 7.058827529721525e-05, + "loss": 0.0209, + "step": 11900 + }, + { + "epoch": 1.8157958984375e-05, + "model_forward_time": 0.024956941604614258, + "step": 11900 + }, + { + "epoch": 1.8157958984375e-05, + "step": 11900, + "training_step_time": 0.10858893394470215 + }, + { + "epoch": 1.815948486328125e-05, + "model_forward_time": 0.02528238296508789, + "step": 11901 + }, + { + "epoch": 1.815948486328125e-05, + "step": 11901, + "training_step_time": 0.17809653282165527 + }, + { + "epoch": 1.81610107421875e-05, + "model_forward_time": 0.024624109268188477, + "step": 11902 + }, + { + "epoch": 1.81610107421875e-05, + "step": 11902, + "training_step_time": 0.1360483169555664 + }, + { + "epoch": 1.816253662109375e-05, + "model_forward_time": 0.025101184844970703, + "step": 11903 + }, + { + "epoch": 1.816253662109375e-05, + "step": 11903, + "training_step_time": 0.11453604698181152 + }, + { + "epoch": 1.81640625e-05, + "model_forward_time": 0.02535557746887207, + "step": 11904 + }, + { + "epoch": 1.81640625e-05, + "step": 11904, + "training_step_time": 0.11059975624084473 + }, + { + "epoch": 1.816558837890625e-05, + "model_forward_time": 0.02505970001220703, + "step": 11905 + }, + { + "epoch": 1.816558837890625e-05, + "step": 11905, + "training_step_time": 0.10806918144226074 + }, + { + "epoch": 1.81671142578125e-05, + "model_forward_time": 0.02533578872680664, + "step": 11906 + }, + { + "epoch": 1.81671142578125e-05, + "step": 11906, + "training_step_time": 0.10772705078125 + }, + { + "epoch": 1.816864013671875e-05, + "model_forward_time": 0.025324583053588867, + "step": 11907 + }, + { + "epoch": 1.816864013671875e-05, + "step": 11907, + "training_step_time": 0.1111443042755127 + }, + { + "epoch": 1.8170166015625e-05, + "model_forward_time": 0.024606943130493164, + "step": 11908 + }, + { + "epoch": 1.8170166015625e-05, + "step": 11908, + "training_step_time": 0.11343693733215332 + }, + { + "epoch": 1.817169189453125e-05, + "model_forward_time": 0.024335861206054688, + "step": 11909 + }, + { + "epoch": 1.817169189453125e-05, + "step": 11909, + "training_step_time": 0.1142129898071289 + }, + { + "epoch": 1.81732177734375e-05, + "grad_norm": 0.48608723282814026, + "learning_rate": 7.053803645765128e-05, + "loss": 0.0218, + "step": 11910 + }, + { + "epoch": 1.81732177734375e-05, + "model_forward_time": 0.0243072509765625, + "step": 11910 + }, + { + "epoch": 1.81732177734375e-05, + "step": 11910, + "training_step_time": 0.11305618286132812 + }, + { + "epoch": 1.817474365234375e-05, + "model_forward_time": 0.026094913482666016, + "step": 11911 + }, + { + "epoch": 1.817474365234375e-05, + "step": 11911, + "training_step_time": 0.11337161064147949 + }, + { + "epoch": 1.817626953125e-05, + "model_forward_time": 0.02553081512451172, + "step": 11912 + }, + { + "epoch": 1.817626953125e-05, + "step": 11912, + "training_step_time": 0.11325335502624512 + }, + { + "epoch": 1.817779541015625e-05, + "model_forward_time": 0.025168895721435547, + "step": 11913 + }, + { + "epoch": 1.817779541015625e-05, + "step": 11913, + "training_step_time": 0.10983967781066895 + }, + { + "epoch": 1.81793212890625e-05, + "model_forward_time": 0.02547287940979004, + "step": 11914 + }, + { + "epoch": 1.81793212890625e-05, + "step": 11914, + "training_step_time": 0.11479926109313965 + }, + { + "epoch": 1.818084716796875e-05, + "model_forward_time": 0.025351285934448242, + "step": 11915 + }, + { + "epoch": 1.818084716796875e-05, + "step": 11915, + "training_step_time": 0.11152219772338867 + }, + { + "epoch": 1.8182373046875e-05, + "model_forward_time": 0.025853633880615234, + "step": 11916 + }, + { + "epoch": 1.8182373046875e-05, + "step": 11916, + "training_step_time": 0.11192202568054199 + }, + { + "epoch": 1.818389892578125e-05, + "model_forward_time": 0.025203227996826172, + "step": 11917 + }, + { + "epoch": 1.818389892578125e-05, + "step": 11917, + "training_step_time": 0.10853385925292969 + }, + { + "epoch": 1.81854248046875e-05, + "model_forward_time": 0.025252580642700195, + "step": 11918 + }, + { + "epoch": 1.81854248046875e-05, + "step": 11918, + "training_step_time": 0.11115026473999023 + }, + { + "epoch": 1.818695068359375e-05, + "model_forward_time": 0.025269508361816406, + "step": 11919 + }, + { + "epoch": 1.818695068359375e-05, + "step": 11919, + "training_step_time": 0.18666505813598633 + }, + { + "epoch": 1.81884765625e-05, + "grad_norm": 0.2840731143951416, + "learning_rate": 7.04877726624364e-05, + "loss": 0.0183, + "step": 11920 + }, + { + "epoch": 1.81884765625e-05, + "model_forward_time": 0.024741649627685547, + "step": 11920 + }, + { + "epoch": 1.81884765625e-05, + "step": 11920, + "training_step_time": 0.1402902603149414 + }, + { + "epoch": 1.819000244140625e-05, + "model_forward_time": 0.024918317794799805, + "step": 11921 + }, + { + "epoch": 1.819000244140625e-05, + "step": 11921, + "training_step_time": 0.11569976806640625 + }, + { + "epoch": 1.81915283203125e-05, + "model_forward_time": 0.024883508682250977, + "step": 11922 + }, + { + "epoch": 1.81915283203125e-05, + "step": 11922, + "training_step_time": 0.1259171962738037 + }, + { + "epoch": 1.819305419921875e-05, + "model_forward_time": 0.025394439697265625, + "step": 11923 + }, + { + "epoch": 1.819305419921875e-05, + "step": 11923, + "training_step_time": 0.11852478981018066 + }, + { + "epoch": 1.8194580078125e-05, + "model_forward_time": 0.025624990463256836, + "step": 11924 + }, + { + "epoch": 1.8194580078125e-05, + "step": 11924, + "training_step_time": 0.13027596473693848 + }, + { + "epoch": 1.819610595703125e-05, + "model_forward_time": 0.025112390518188477, + "step": 11925 + }, + { + "epoch": 1.819610595703125e-05, + "step": 11925, + "training_step_time": 0.11115908622741699 + }, + { + "epoch": 1.81976318359375e-05, + "model_forward_time": 0.025536537170410156, + "step": 11926 + }, + { + "epoch": 1.81976318359375e-05, + "step": 11926, + "training_step_time": 0.11501097679138184 + }, + { + "epoch": 1.819915771484375e-05, + "model_forward_time": 0.025500774383544922, + "step": 11927 + }, + { + "epoch": 1.819915771484375e-05, + "step": 11927, + "training_step_time": 0.11031556129455566 + }, + { + "epoch": 1.820068359375e-05, + "model_forward_time": 0.025410175323486328, + "step": 11928 + }, + { + "epoch": 1.820068359375e-05, + "step": 11928, + "training_step_time": 0.10895919799804688 + }, + { + "epoch": 1.820220947265625e-05, + "model_forward_time": 0.024275541305541992, + "step": 11929 + }, + { + "epoch": 1.820220947265625e-05, + "step": 11929, + "training_step_time": 0.1081991195678711 + }, + { + "epoch": 1.82037353515625e-05, + "grad_norm": 0.3231754004955292, + "learning_rate": 7.043748397264587e-05, + "loss": 0.0208, + "step": 11930 + }, + { + "epoch": 1.82037353515625e-05, + "model_forward_time": 0.02547287940979004, + "step": 11930 + }, + { + "epoch": 1.82037353515625e-05, + "step": 11930, + "training_step_time": 0.10900688171386719 + }, + { + "epoch": 1.820526123046875e-05, + "model_forward_time": 0.02678704261779785, + "step": 11931 + }, + { + "epoch": 1.820526123046875e-05, + "step": 11931, + "training_step_time": 0.11053705215454102 + }, + { + "epoch": 1.8206787109375e-05, + "model_forward_time": 0.026015520095825195, + "step": 11932 + }, + { + "epoch": 1.8206787109375e-05, + "step": 11932, + "training_step_time": 0.10813021659851074 + }, + { + "epoch": 1.820831298828125e-05, + "model_forward_time": 0.025629758834838867, + "step": 11933 + }, + { + "epoch": 1.820831298828125e-05, + "step": 11933, + "training_step_time": 0.14456939697265625 + }, + { + "epoch": 1.82098388671875e-05, + "model_forward_time": 0.02581048011779785, + "step": 11934 + }, + { + "epoch": 1.82098388671875e-05, + "step": 11934, + "training_step_time": 0.1088559627532959 + }, + { + "epoch": 1.821136474609375e-05, + "model_forward_time": 0.026561737060546875, + "step": 11935 + }, + { + "epoch": 1.821136474609375e-05, + "step": 11935, + "training_step_time": 0.198958158493042 + }, + { + "epoch": 1.8212890625e-05, + "model_forward_time": 0.024384498596191406, + "step": 11936 + }, + { + "epoch": 1.8212890625e-05, + "step": 11936, + "training_step_time": 0.18587398529052734 + }, + { + "epoch": 1.821441650390625e-05, + "model_forward_time": 0.025376081466674805, + "step": 11937 + }, + { + "epoch": 1.821441650390625e-05, + "step": 11937, + "training_step_time": 0.15593957901000977 + }, + { + "epoch": 1.82159423828125e-05, + "model_forward_time": 0.0261688232421875, + "step": 11938 + }, + { + "epoch": 1.82159423828125e-05, + "step": 11938, + "training_step_time": 0.18075871467590332 + }, + { + "epoch": 1.821746826171875e-05, + "model_forward_time": 0.024759531021118164, + "step": 11939 + }, + { + "epoch": 1.821746826171875e-05, + "step": 11939, + "training_step_time": 0.10346627235412598 + }, + { + "epoch": 1.8218994140625e-05, + "grad_norm": 0.21150803565979004, + "learning_rate": 7.038717044938519e-05, + "loss": 0.0185, + "step": 11940 + }, + { + "epoch": 1.8218994140625e-05, + "model_forward_time": 0.02482318878173828, + "step": 11940 + }, + { + "epoch": 1.8218994140625e-05, + "step": 11940, + "training_step_time": 0.10776424407958984 + }, + { + "epoch": 1.822052001953125e-05, + "model_forward_time": 0.025606155395507812, + "step": 11941 + }, + { + "epoch": 1.822052001953125e-05, + "step": 11941, + "training_step_time": 0.10492753982543945 + }, + { + "epoch": 1.82220458984375e-05, + "model_forward_time": 0.02588629722595215, + "step": 11942 + }, + { + "epoch": 1.82220458984375e-05, + "step": 11942, + "training_step_time": 0.1085667610168457 + }, + { + "epoch": 1.822357177734375e-05, + "model_forward_time": 0.02530646324157715, + "step": 11943 + }, + { + "epoch": 1.822357177734375e-05, + "step": 11943, + "training_step_time": 0.15528392791748047 + }, + { + "epoch": 1.822509765625e-05, + "model_forward_time": 0.024695873260498047, + "step": 11944 + }, + { + "epoch": 1.822509765625e-05, + "step": 11944, + "training_step_time": 0.11482357978820801 + }, + { + "epoch": 1.822662353515625e-05, + "model_forward_time": 0.024913549423217773, + "step": 11945 + }, + { + "epoch": 1.822662353515625e-05, + "step": 11945, + "training_step_time": 0.1322178840637207 + }, + { + "epoch": 1.82281494140625e-05, + "model_forward_time": 0.025041580200195312, + "step": 11946 + }, + { + "epoch": 1.82281494140625e-05, + "step": 11946, + "training_step_time": 0.13593602180480957 + }, + { + "epoch": 1.822967529296875e-05, + "model_forward_time": 0.02477741241455078, + "step": 11947 + }, + { + "epoch": 1.822967529296875e-05, + "step": 11947, + "training_step_time": 0.1152200698852539 + }, + { + "epoch": 1.8231201171875e-05, + "model_forward_time": 0.025507688522338867, + "step": 11948 + }, + { + "epoch": 1.8231201171875e-05, + "step": 11948, + "training_step_time": 0.12780261039733887 + }, + { + "epoch": 1.823272705078125e-05, + "model_forward_time": 0.025537967681884766, + "step": 11949 + }, + { + "epoch": 1.823272705078125e-05, + "step": 11949, + "training_step_time": 0.11397361755371094 + }, + { + "epoch": 1.82342529296875e-05, + "grad_norm": 0.4072251319885254, + "learning_rate": 7.033683215379002e-05, + "loss": 0.014, + "step": 11950 + }, + { + "epoch": 1.82342529296875e-05, + "model_forward_time": 0.025521278381347656, + "step": 11950 + }, + { + "epoch": 1.82342529296875e-05, + "step": 11950, + "training_step_time": 0.10644054412841797 + }, + { + "epoch": 1.823577880859375e-05, + "model_forward_time": 0.02577829360961914, + "step": 11951 + }, + { + "epoch": 1.823577880859375e-05, + "step": 11951, + "training_step_time": 0.12633109092712402 + }, + { + "epoch": 1.82373046875e-05, + "model_forward_time": 0.0243985652923584, + "step": 11952 + }, + { + "epoch": 1.82373046875e-05, + "step": 11952, + "training_step_time": 0.16186285018920898 + }, + { + "epoch": 1.823883056640625e-05, + "model_forward_time": 0.02521061897277832, + "step": 11953 + }, + { + "epoch": 1.823883056640625e-05, + "step": 11953, + "training_step_time": 0.1615769863128662 + }, + { + "epoch": 1.82403564453125e-05, + "model_forward_time": 0.024550914764404297, + "step": 11954 + }, + { + "epoch": 1.82403564453125e-05, + "step": 11954, + "training_step_time": 0.16106295585632324 + }, + { + "epoch": 1.824188232421875e-05, + "model_forward_time": 0.02491593360900879, + "step": 11955 + }, + { + "epoch": 1.824188232421875e-05, + "step": 11955, + "training_step_time": 0.13564062118530273 + }, + { + "epoch": 1.8243408203125e-05, + "model_forward_time": 0.024775028228759766, + "step": 11956 + }, + { + "epoch": 1.8243408203125e-05, + "step": 11956, + "training_step_time": 0.1397690773010254 + }, + { + "epoch": 1.824493408203125e-05, + "model_forward_time": 0.024669885635375977, + "step": 11957 + }, + { + "epoch": 1.824493408203125e-05, + "step": 11957, + "training_step_time": 0.12720513343811035 + }, + { + "epoch": 1.82464599609375e-05, + "model_forward_time": 0.025741100311279297, + "step": 11958 + }, + { + "epoch": 1.82464599609375e-05, + "step": 11958, + "training_step_time": 0.12394833564758301 + }, + { + "epoch": 1.824798583984375e-05, + "model_forward_time": 0.027563810348510742, + "step": 11959 + }, + { + "epoch": 1.824798583984375e-05, + "step": 11959, + "training_step_time": 0.12021398544311523 + }, + { + "epoch": 1.824951171875e-05, + "grad_norm": 0.36168545484542847, + "learning_rate": 7.028646914702614e-05, + "loss": 0.017, + "step": 11960 + }, + { + "epoch": 1.824951171875e-05, + "model_forward_time": 0.024377107620239258, + "step": 11960 + }, + { + "epoch": 1.824951171875e-05, + "step": 11960, + "training_step_time": 0.11315059661865234 + }, + { + "epoch": 1.825103759765625e-05, + "model_forward_time": 0.02544093132019043, + "step": 11961 + }, + { + "epoch": 1.825103759765625e-05, + "step": 11961, + "training_step_time": 0.11243128776550293 + }, + { + "epoch": 1.82525634765625e-05, + "model_forward_time": 0.026935577392578125, + "step": 11962 + }, + { + "epoch": 1.82525634765625e-05, + "step": 11962, + "training_step_time": 0.11411857604980469 + }, + { + "epoch": 1.825408935546875e-05, + "model_forward_time": 0.02548694610595703, + "step": 11963 + }, + { + "epoch": 1.825408935546875e-05, + "step": 11963, + "training_step_time": 0.11361551284790039 + }, + { + "epoch": 1.8255615234375e-05, + "model_forward_time": 0.026876449584960938, + "step": 11964 + }, + { + "epoch": 1.8255615234375e-05, + "step": 11964, + "training_step_time": 0.1275477409362793 + }, + { + "epoch": 1.825714111328125e-05, + "model_forward_time": 0.025558948516845703, + "step": 11965 + }, + { + "epoch": 1.825714111328125e-05, + "step": 11965, + "training_step_time": 0.1113595962524414 + }, + { + "epoch": 1.82586669921875e-05, + "model_forward_time": 0.025162220001220703, + "step": 11966 + }, + { + "epoch": 1.82586669921875e-05, + "step": 11966, + "training_step_time": 0.10969996452331543 + }, + { + "epoch": 1.826019287109375e-05, + "model_forward_time": 0.02564406394958496, + "step": 11967 + }, + { + "epoch": 1.826019287109375e-05, + "step": 11967, + "training_step_time": 0.1226496696472168 + }, + { + "epoch": 1.826171875e-05, + "model_forward_time": 0.025851011276245117, + "step": 11968 + }, + { + "epoch": 1.826171875e-05, + "step": 11968, + "training_step_time": 0.12860870361328125 + }, + { + "epoch": 1.826324462890625e-05, + "model_forward_time": 0.02527642250061035, + "step": 11969 + }, + { + "epoch": 1.826324462890625e-05, + "step": 11969, + "training_step_time": 0.10725736618041992 + }, + { + "epoch": 1.82647705078125e-05, + "grad_norm": 0.3516203761100769, + "learning_rate": 7.023608149028937e-05, + "loss": 0.0215, + "step": 11970 + }, + { + "epoch": 1.82647705078125e-05, + "model_forward_time": 0.027187585830688477, + "step": 11970 + }, + { + "epoch": 1.82647705078125e-05, + "step": 11970, + "training_step_time": 0.1186378002166748 + }, + { + "epoch": 1.826629638671875e-05, + "model_forward_time": 0.025493860244750977, + "step": 11971 + }, + { + "epoch": 1.826629638671875e-05, + "step": 11971, + "training_step_time": 0.10960865020751953 + }, + { + "epoch": 1.8267822265625e-05, + "model_forward_time": 0.025491952896118164, + "step": 11972 + }, + { + "epoch": 1.8267822265625e-05, + "step": 11972, + "training_step_time": 0.10711383819580078 + }, + { + "epoch": 1.826934814453125e-05, + "model_forward_time": 0.025420188903808594, + "step": 11973 + }, + { + "epoch": 1.826934814453125e-05, + "step": 11973, + "training_step_time": 0.10788583755493164 + }, + { + "epoch": 1.82708740234375e-05, + "model_forward_time": 0.02543807029724121, + "step": 11974 + }, + { + "epoch": 1.82708740234375e-05, + "step": 11974, + "training_step_time": 0.1073615550994873 + }, + { + "epoch": 1.827239990234375e-05, + "model_forward_time": 0.02555704116821289, + "step": 11975 + }, + { + "epoch": 1.827239990234375e-05, + "step": 11975, + "training_step_time": 0.10794305801391602 + }, + { + "epoch": 1.827392578125e-05, + "model_forward_time": 0.025316476821899414, + "step": 11976 + }, + { + "epoch": 1.827392578125e-05, + "step": 11976, + "training_step_time": 0.10689735412597656 + }, + { + "epoch": 1.827545166015625e-05, + "model_forward_time": 0.025530338287353516, + "step": 11977 + }, + { + "epoch": 1.827545166015625e-05, + "step": 11977, + "training_step_time": 0.10715746879577637 + }, + { + "epoch": 1.82769775390625e-05, + "model_forward_time": 0.02518749237060547, + "step": 11978 + }, + { + "epoch": 1.82769775390625e-05, + "step": 11978, + "training_step_time": 0.14312481880187988 + }, + { + "epoch": 1.827850341796875e-05, + "model_forward_time": 0.025446176528930664, + "step": 11979 + }, + { + "epoch": 1.827850341796875e-05, + "step": 11979, + "training_step_time": 0.1727156639099121 + }, + { + "epoch": 1.8280029296875e-05, + "grad_norm": 0.47945356369018555, + "learning_rate": 7.018566924480543e-05, + "loss": 0.0236, + "step": 11980 + }, + { + "epoch": 1.8280029296875e-05, + "model_forward_time": 0.025285005569458008, + "step": 11980 + }, + { + "epoch": 1.8280029296875e-05, + "step": 11980, + "training_step_time": 0.1599712371826172 + }, + { + "epoch": 1.828155517578125e-05, + "model_forward_time": 0.02505350112915039, + "step": 11981 + }, + { + "epoch": 1.828155517578125e-05, + "step": 11981, + "training_step_time": 0.17855405807495117 + }, + { + "epoch": 1.82830810546875e-05, + "model_forward_time": 0.024480819702148438, + "step": 11982 + }, + { + "epoch": 1.82830810546875e-05, + "step": 11982, + "training_step_time": 0.18897771835327148 + }, + { + "epoch": 1.828460693359375e-05, + "model_forward_time": 0.024651765823364258, + "step": 11983 + }, + { + "epoch": 1.828460693359375e-05, + "step": 11983, + "training_step_time": 0.10640764236450195 + }, + { + "epoch": 1.82861328125e-05, + "model_forward_time": 0.02447819709777832, + "step": 11984 + }, + { + "epoch": 1.82861328125e-05, + "step": 11984, + "training_step_time": 0.11172819137573242 + }, + { + "epoch": 1.828765869140625e-05, + "model_forward_time": 0.0251615047454834, + "step": 11985 + }, + { + "epoch": 1.828765869140625e-05, + "step": 11985, + "training_step_time": 0.19770359992980957 + }, + { + "epoch": 1.82891845703125e-05, + "model_forward_time": 0.024591922760009766, + "step": 11986 + }, + { + "epoch": 1.82891845703125e-05, + "step": 11986, + "training_step_time": 0.10596251487731934 + }, + { + "epoch": 1.829071044921875e-05, + "model_forward_time": 0.02465057373046875, + "step": 11987 + }, + { + "epoch": 1.829071044921875e-05, + "step": 11987, + "training_step_time": 0.1787106990814209 + }, + { + "epoch": 1.8292236328125e-05, + "model_forward_time": 0.02493739128112793, + "step": 11988 + }, + { + "epoch": 1.8292236328125e-05, + "step": 11988, + "training_step_time": 0.13008356094360352 + }, + { + "epoch": 1.829376220703125e-05, + "model_forward_time": 0.02479410171508789, + "step": 11989 + }, + { + "epoch": 1.829376220703125e-05, + "step": 11989, + "training_step_time": 0.1316540241241455 + }, + { + "epoch": 1.82952880859375e-05, + "grad_norm": 0.2896246910095215, + "learning_rate": 7.013523247183e-05, + "loss": 0.0228, + "step": 11990 + }, + { + "epoch": 1.82952880859375e-05, + "model_forward_time": 0.02452993392944336, + "step": 11990 + }, + { + "epoch": 1.82952880859375e-05, + "step": 11990, + "training_step_time": 0.14707183837890625 + }, + { + "epoch": 1.829681396484375e-05, + "model_forward_time": 0.024994611740112305, + "step": 11991 + }, + { + "epoch": 1.829681396484375e-05, + "step": 11991, + "training_step_time": 0.23108363151550293 + }, + { + "epoch": 1.829833984375e-05, + "model_forward_time": 0.024966955184936523, + "step": 11992 + }, + { + "epoch": 1.829833984375e-05, + "step": 11992, + "training_step_time": 0.11815857887268066 + }, + { + "epoch": 1.829986572265625e-05, + "model_forward_time": 0.0237424373626709, + "step": 11993 + }, + { + "epoch": 1.829986572265625e-05, + "step": 11993, + "training_step_time": 0.11592936515808105 + }, + { + "epoch": 1.83013916015625e-05, + "model_forward_time": 0.024388790130615234, + "step": 11994 + }, + { + "epoch": 1.83013916015625e-05, + "step": 11994, + "training_step_time": 0.11292243003845215 + }, + { + "epoch": 1.830291748046875e-05, + "model_forward_time": 0.024291515350341797, + "step": 11995 + }, + { + "epoch": 1.830291748046875e-05, + "step": 11995, + "training_step_time": 0.11221694946289062 + }, + { + "epoch": 1.8304443359375e-05, + "model_forward_time": 0.025256872177124023, + "step": 11996 + }, + { + "epoch": 1.8304443359375e-05, + "step": 11996, + "training_step_time": 0.10759568214416504 + }, + { + "epoch": 1.830596923828125e-05, + "model_forward_time": 0.025072574615478516, + "step": 11997 + }, + { + "epoch": 1.830596923828125e-05, + "step": 11997, + "training_step_time": 0.1103677749633789 + }, + { + "epoch": 1.83074951171875e-05, + "model_forward_time": 0.025444746017456055, + "step": 11998 + }, + { + "epoch": 1.83074951171875e-05, + "step": 11998, + "training_step_time": 0.11180949211120605 + }, + { + "epoch": 1.830902099609375e-05, + "model_forward_time": 0.02547001838684082, + "step": 11999 + }, + { + "epoch": 1.830902099609375e-05, + "step": 11999, + "training_step_time": 0.11112141609191895 + }, + { + "epoch": 1.8310546875e-05, + "grad_norm": 0.4744170308113098, + "learning_rate": 7.008477123264848e-05, + "loss": 0.0324, + "step": 12000 + }, + { + "epoch": 1.8310546875e-05, + "model_forward_time": 0.024791479110717773, + "step": 12000 + }, + { + "epoch": 1.8310546875e-05, + "step": 12000, + "training_step_time": 0.10333871841430664 + }, + { + "epoch": 1.831207275390625e-05, + "model_forward_time": 0.023221254348754883, + "step": 12001 + }, + { + "epoch": 1.831207275390625e-05, + "step": 12001, + "training_step_time": 0.10272741317749023 + }, + { + "epoch": 1.83135986328125e-05, + "model_forward_time": 0.024647951126098633, + "step": 12002 + }, + { + "epoch": 1.83135986328125e-05, + "step": 12002, + "training_step_time": 0.13118982315063477 + }, + { + "epoch": 1.831512451171875e-05, + "model_forward_time": 0.025197982788085938, + "step": 12003 + }, + { + "epoch": 1.831512451171875e-05, + "step": 12003, + "training_step_time": 0.11956167221069336 + }, + { + "epoch": 1.8316650390625e-05, + "model_forward_time": 0.024960041046142578, + "step": 12004 + }, + { + "epoch": 1.8316650390625e-05, + "step": 12004, + "training_step_time": 0.10664987564086914 + }, + { + "epoch": 1.831817626953125e-05, + "model_forward_time": 0.02516341209411621, + "step": 12005 + }, + { + "epoch": 1.831817626953125e-05, + "step": 12005, + "training_step_time": 0.106658935546875 + }, + { + "epoch": 1.83197021484375e-05, + "model_forward_time": 0.025056838989257812, + "step": 12006 + }, + { + "epoch": 1.83197021484375e-05, + "step": 12006, + "training_step_time": 0.10868263244628906 + }, + { + "epoch": 1.832122802734375e-05, + "model_forward_time": 0.025405406951904297, + "step": 12007 + }, + { + "epoch": 1.832122802734375e-05, + "step": 12007, + "training_step_time": 0.10533976554870605 + }, + { + "epoch": 1.832275390625e-05, + "model_forward_time": 0.02851414680480957, + "step": 12008 + }, + { + "epoch": 1.832275390625e-05, + "step": 12008, + "training_step_time": 0.11095070838928223 + }, + { + "epoch": 1.832427978515625e-05, + "model_forward_time": 0.025354385375976562, + "step": 12009 + }, + { + "epoch": 1.832427978515625e-05, + "step": 12009, + "training_step_time": 0.11118721961975098 + }, + { + "epoch": 1.83258056640625e-05, + "grad_norm": 0.5417850613594055, + "learning_rate": 7.003428558857604e-05, + "loss": 0.0235, + "step": 12010 + }, + { + "epoch": 1.83258056640625e-05, + "model_forward_time": 0.024917125701904297, + "step": 12010 + }, + { + "epoch": 1.83258056640625e-05, + "step": 12010, + "training_step_time": 0.10479569435119629 + }, + { + "epoch": 1.832733154296875e-05, + "model_forward_time": 0.02581787109375, + "step": 12011 + }, + { + "epoch": 1.832733154296875e-05, + "step": 12011, + "training_step_time": 0.10652303695678711 + }, + { + "epoch": 1.8328857421875e-05, + "model_forward_time": 0.0252072811126709, + "step": 12012 + }, + { + "epoch": 1.8328857421875e-05, + "step": 12012, + "training_step_time": 0.10690808296203613 + }, + { + "epoch": 1.833038330078125e-05, + "model_forward_time": 0.02561473846435547, + "step": 12013 + }, + { + "epoch": 1.833038330078125e-05, + "step": 12013, + "training_step_time": 0.13345026969909668 + }, + { + "epoch": 1.83319091796875e-05, + "model_forward_time": 0.025255918502807617, + "step": 12014 + }, + { + "epoch": 1.83319091796875e-05, + "step": 12014, + "training_step_time": 0.11126542091369629 + }, + { + "epoch": 1.833343505859375e-05, + "model_forward_time": 0.025852441787719727, + "step": 12015 + }, + { + "epoch": 1.833343505859375e-05, + "step": 12015, + "training_step_time": 0.1119697093963623 + }, + { + "epoch": 1.83349609375e-05, + "model_forward_time": 0.027318954467773438, + "step": 12016 + }, + { + "epoch": 1.83349609375e-05, + "step": 12016, + "training_step_time": 0.11923694610595703 + }, + { + "epoch": 1.833648681640625e-05, + "model_forward_time": 0.025828123092651367, + "step": 12017 + }, + { + "epoch": 1.833648681640625e-05, + "step": 12017, + "training_step_time": 0.11551046371459961 + }, + { + "epoch": 1.83380126953125e-05, + "model_forward_time": 0.027040719985961914, + "step": 12018 + }, + { + "epoch": 1.83380126953125e-05, + "step": 12018, + "training_step_time": 0.11421847343444824 + }, + { + "epoch": 1.833953857421875e-05, + "model_forward_time": 0.025554656982421875, + "step": 12019 + }, + { + "epoch": 1.833953857421875e-05, + "step": 12019, + "training_step_time": 0.16489005088806152 + }, + { + "epoch": 1.8341064453125e-05, + "grad_norm": 0.4069768190383911, + "learning_rate": 6.99837756009575e-05, + "loss": 0.0192, + "step": 12020 + }, + { + "epoch": 1.8341064453125e-05, + "model_forward_time": 0.02474355697631836, + "step": 12020 + }, + { + "epoch": 1.8341064453125e-05, + "step": 12020, + "training_step_time": 0.11671948432922363 + }, + { + "epoch": 1.834259033203125e-05, + "model_forward_time": 0.02435612678527832, + "step": 12021 + }, + { + "epoch": 1.834259033203125e-05, + "step": 12021, + "training_step_time": 0.21508049964904785 + }, + { + "epoch": 1.83441162109375e-05, + "model_forward_time": 0.02411198616027832, + "step": 12022 + }, + { + "epoch": 1.83441162109375e-05, + "step": 12022, + "training_step_time": 0.13992547988891602 + }, + { + "epoch": 1.834564208984375e-05, + "model_forward_time": 0.024952411651611328, + "step": 12023 + }, + { + "epoch": 1.834564208984375e-05, + "step": 12023, + "training_step_time": 0.11423468589782715 + }, + { + "epoch": 1.834716796875e-05, + "model_forward_time": 0.02573871612548828, + "step": 12024 + }, + { + "epoch": 1.834716796875e-05, + "step": 12024, + "training_step_time": 0.12189459800720215 + }, + { + "epoch": 1.834869384765625e-05, + "model_forward_time": 0.024405956268310547, + "step": 12025 + }, + { + "epoch": 1.834869384765625e-05, + "step": 12025, + "training_step_time": 0.11147475242614746 + }, + { + "epoch": 1.83502197265625e-05, + "model_forward_time": 0.025376081466674805, + "step": 12026 + }, + { + "epoch": 1.83502197265625e-05, + "step": 12026, + "training_step_time": 0.11270976066589355 + }, + { + "epoch": 1.835174560546875e-05, + "model_forward_time": 0.02550220489501953, + "step": 12027 + }, + { + "epoch": 1.835174560546875e-05, + "step": 12027, + "training_step_time": 0.10857820510864258 + }, + { + "epoch": 1.8353271484375e-05, + "model_forward_time": 0.02524089813232422, + "step": 12028 + }, + { + "epoch": 1.8353271484375e-05, + "step": 12028, + "training_step_time": 0.10954976081848145 + }, + { + "epoch": 1.835479736328125e-05, + "model_forward_time": 0.024864673614501953, + "step": 12029 + }, + { + "epoch": 1.835479736328125e-05, + "step": 12029, + "training_step_time": 0.11189651489257812 + }, + { + "epoch": 1.83563232421875e-05, + "grad_norm": 0.46286314725875854, + "learning_rate": 6.993324133116726e-05, + "loss": 0.0186, + "step": 12030 + }, + { + "epoch": 1.83563232421875e-05, + "model_forward_time": 0.02532505989074707, + "step": 12030 + }, + { + "epoch": 1.83563232421875e-05, + "step": 12030, + "training_step_time": 0.10837435722351074 + }, + { + "epoch": 1.835784912109375e-05, + "model_forward_time": 0.024984359741210938, + "step": 12031 + }, + { + "epoch": 1.835784912109375e-05, + "step": 12031, + "training_step_time": 0.10553956031799316 + }, + { + "epoch": 1.8359375e-05, + "model_forward_time": 0.02545166015625, + "step": 12032 + }, + { + "epoch": 1.8359375e-05, + "step": 12032, + "training_step_time": 0.1087334156036377 + }, + { + "epoch": 1.836090087890625e-05, + "model_forward_time": 0.025705814361572266, + "step": 12033 + }, + { + "epoch": 1.836090087890625e-05, + "step": 12033, + "training_step_time": 0.1954329013824463 + }, + { + "epoch": 1.83624267578125e-05, + "model_forward_time": 0.024984121322631836, + "step": 12034 + }, + { + "epoch": 1.83624267578125e-05, + "step": 12034, + "training_step_time": 0.13936972618103027 + }, + { + "epoch": 1.836395263671875e-05, + "model_forward_time": 0.024842023849487305, + "step": 12035 + }, + { + "epoch": 1.836395263671875e-05, + "step": 12035, + "training_step_time": 0.162977933883667 + }, + { + "epoch": 1.8365478515625e-05, + "model_forward_time": 0.02440619468688965, + "step": 12036 + }, + { + "epoch": 1.8365478515625e-05, + "step": 12036, + "training_step_time": 0.11282873153686523 + }, + { + "epoch": 1.836700439453125e-05, + "model_forward_time": 0.025048255920410156, + "step": 12037 + }, + { + "epoch": 1.836700439453125e-05, + "step": 12037, + "training_step_time": 0.10887646675109863 + }, + { + "epoch": 1.83685302734375e-05, + "model_forward_time": 0.02525186538696289, + "step": 12038 + }, + { + "epoch": 1.83685302734375e-05, + "step": 12038, + "training_step_time": 0.10628604888916016 + }, + { + "epoch": 1.837005615234375e-05, + "model_forward_time": 0.025298595428466797, + "step": 12039 + }, + { + "epoch": 1.837005615234375e-05, + "step": 12039, + "training_step_time": 0.1114356517791748 + }, + { + "epoch": 1.837158203125e-05, + "grad_norm": 0.31339505314826965, + "learning_rate": 6.988268284060922e-05, + "loss": 0.0231, + "step": 12040 + }, + { + "epoch": 1.837158203125e-05, + "model_forward_time": 0.025748729705810547, + "step": 12040 + }, + { + "epoch": 1.837158203125e-05, + "step": 12040, + "training_step_time": 0.22601866722106934 + }, + { + "epoch": 1.837310791015625e-05, + "model_forward_time": 0.02509307861328125, + "step": 12041 + }, + { + "epoch": 1.837310791015625e-05, + "step": 12041, + "training_step_time": 0.1063692569732666 + }, + { + "epoch": 1.83746337890625e-05, + "model_forward_time": 0.024160385131835938, + "step": 12042 + }, + { + "epoch": 1.83746337890625e-05, + "step": 12042, + "training_step_time": 0.17446517944335938 + }, + { + "epoch": 1.837615966796875e-05, + "model_forward_time": 0.024640560150146484, + "step": 12043 + }, + { + "epoch": 1.837615966796875e-05, + "step": 12043, + "training_step_time": 0.11757159233093262 + }, + { + "epoch": 1.8377685546875e-05, + "model_forward_time": 0.024425029754638672, + "step": 12044 + }, + { + "epoch": 1.8377685546875e-05, + "step": 12044, + "training_step_time": 0.10908699035644531 + }, + { + "epoch": 1.837921142578125e-05, + "model_forward_time": 0.02488088607788086, + "step": 12045 + }, + { + "epoch": 1.837921142578125e-05, + "step": 12045, + "training_step_time": 0.11651110649108887 + }, + { + "epoch": 1.83807373046875e-05, + "model_forward_time": 0.025441646575927734, + "step": 12046 + }, + { + "epoch": 1.83807373046875e-05, + "step": 12046, + "training_step_time": 0.1179807186126709 + }, + { + "epoch": 1.838226318359375e-05, + "model_forward_time": 0.024173736572265625, + "step": 12047 + }, + { + "epoch": 1.838226318359375e-05, + "step": 12047, + "training_step_time": 0.11464452743530273 + }, + { + "epoch": 1.83837890625e-05, + "model_forward_time": 0.025205373764038086, + "step": 12048 + }, + { + "epoch": 1.83837890625e-05, + "step": 12048, + "training_step_time": 0.12032604217529297 + }, + { + "epoch": 1.838531494140625e-05, + "model_forward_time": 0.025057077407836914, + "step": 12049 + }, + { + "epoch": 1.838531494140625e-05, + "step": 12049, + "training_step_time": 0.11566925048828125 + }, + { + "epoch": 1.83868408203125e-05, + "grad_norm": 0.1898471564054489, + "learning_rate": 6.98321001907167e-05, + "loss": 0.0227, + "step": 12050 + }, + { + "epoch": 1.83868408203125e-05, + "model_forward_time": 0.02514791488647461, + "step": 12050 + }, + { + "epoch": 1.83868408203125e-05, + "step": 12050, + "training_step_time": 0.11374044418334961 + }, + { + "epoch": 1.838836669921875e-05, + "model_forward_time": 0.02511906623840332, + "step": 12051 + }, + { + "epoch": 1.838836669921875e-05, + "step": 12051, + "training_step_time": 0.11185145378112793 + }, + { + "epoch": 1.8389892578125e-05, + "model_forward_time": 0.0259554386138916, + "step": 12052 + }, + { + "epoch": 1.8389892578125e-05, + "step": 12052, + "training_step_time": 0.11596989631652832 + }, + { + "epoch": 1.839141845703125e-05, + "model_forward_time": 0.024854183197021484, + "step": 12053 + }, + { + "epoch": 1.839141845703125e-05, + "step": 12053, + "training_step_time": 0.11325240135192871 + }, + { + "epoch": 1.83929443359375e-05, + "model_forward_time": 0.026573896408081055, + "step": 12054 + }, + { + "epoch": 1.83929443359375e-05, + "step": 12054, + "training_step_time": 0.10871315002441406 + }, + { + "epoch": 1.839447021484375e-05, + "model_forward_time": 0.024114608764648438, + "step": 12055 + }, + { + "epoch": 1.839447021484375e-05, + "step": 12055, + "training_step_time": 0.11040806770324707 + }, + { + "epoch": 1.839599609375e-05, + "model_forward_time": 0.02606654167175293, + "step": 12056 + }, + { + "epoch": 1.839599609375e-05, + "step": 12056, + "training_step_time": 0.1094658374786377 + }, + { + "epoch": 1.839752197265625e-05, + "model_forward_time": 0.025458335876464844, + "step": 12057 + }, + { + "epoch": 1.839752197265625e-05, + "step": 12057, + "training_step_time": 0.1075904369354248 + }, + { + "epoch": 1.83990478515625e-05, + "model_forward_time": 0.025005340576171875, + "step": 12058 + }, + { + "epoch": 1.83990478515625e-05, + "step": 12058, + "training_step_time": 0.1155710220336914 + }, + { + "epoch": 1.840057373046875e-05, + "model_forward_time": 0.02527594566345215, + "step": 12059 + }, + { + "epoch": 1.840057373046875e-05, + "step": 12059, + "training_step_time": 0.11220669746398926 + }, + { + "epoch": 1.8402099609375e-05, + "grad_norm": 0.28267902135849, + "learning_rate": 6.978149344295242e-05, + "loss": 0.0201, + "step": 12060 + }, + { + "epoch": 1.8402099609375e-05, + "model_forward_time": 0.025480270385742188, + "step": 12060 + }, + { + "epoch": 1.8402099609375e-05, + "step": 12060, + "training_step_time": 0.11409258842468262 + }, + { + "epoch": 1.840362548828125e-05, + "model_forward_time": 0.025754690170288086, + "step": 12061 + }, + { + "epoch": 1.840362548828125e-05, + "step": 12061, + "training_step_time": 0.10948038101196289 + }, + { + "epoch": 1.84051513671875e-05, + "model_forward_time": 0.025450468063354492, + "step": 12062 + }, + { + "epoch": 1.84051513671875e-05, + "step": 12062, + "training_step_time": 0.11128354072570801 + }, + { + "epoch": 1.840667724609375e-05, + "model_forward_time": 0.025153160095214844, + "step": 12063 + }, + { + "epoch": 1.840667724609375e-05, + "step": 12063, + "training_step_time": 0.10832738876342773 + }, + { + "epoch": 1.8408203125e-05, + "model_forward_time": 0.025362014770507812, + "step": 12064 + }, + { + "epoch": 1.8408203125e-05, + "step": 12064, + "training_step_time": 0.10732388496398926 + }, + { + "epoch": 1.840972900390625e-05, + "model_forward_time": 0.02524590492248535, + "step": 12065 + }, + { + "epoch": 1.840972900390625e-05, + "step": 12065, + "training_step_time": 0.10705780982971191 + }, + { + "epoch": 1.84112548828125e-05, + "model_forward_time": 0.0252230167388916, + "step": 12066 + }, + { + "epoch": 1.84112548828125e-05, + "step": 12066, + "training_step_time": 0.10941743850708008 + }, + { + "epoch": 1.841278076171875e-05, + "model_forward_time": 0.02524733543395996, + "step": 12067 + }, + { + "epoch": 1.841278076171875e-05, + "step": 12067, + "training_step_time": 0.1461181640625 + }, + { + "epoch": 1.8414306640625e-05, + "model_forward_time": 0.02521347999572754, + "step": 12068 + }, + { + "epoch": 1.8414306640625e-05, + "step": 12068, + "training_step_time": 0.11349010467529297 + }, + { + "epoch": 1.841583251953125e-05, + "model_forward_time": 0.025375843048095703, + "step": 12069 + }, + { + "epoch": 1.841583251953125e-05, + "step": 12069, + "training_step_time": 0.1832740306854248 + }, + { + "epoch": 1.84173583984375e-05, + "grad_norm": 0.1628815084695816, + "learning_rate": 6.973086265880833e-05, + "loss": 0.0341, + "step": 12070 + }, + { + "epoch": 1.84173583984375e-05, + "model_forward_time": 0.026209115982055664, + "step": 12070 + }, + { + "epoch": 1.84173583984375e-05, + "step": 12070, + "training_step_time": 0.17536640167236328 + }, + { + "epoch": 1.841888427734375e-05, + "model_forward_time": 0.02425694465637207, + "step": 12071 + }, + { + "epoch": 1.841888427734375e-05, + "step": 12071, + "training_step_time": 0.11565947532653809 + }, + { + "epoch": 1.842041015625e-05, + "model_forward_time": 0.024743080139160156, + "step": 12072 + }, + { + "epoch": 1.842041015625e-05, + "step": 12072, + "training_step_time": 0.12029480934143066 + }, + { + "epoch": 1.842193603515625e-05, + "model_forward_time": 0.025445222854614258, + "step": 12073 + }, + { + "epoch": 1.842193603515625e-05, + "step": 12073, + "training_step_time": 0.10524106025695801 + }, + { + "epoch": 1.84234619140625e-05, + "model_forward_time": 0.02561807632446289, + "step": 12074 + }, + { + "epoch": 1.84234619140625e-05, + "step": 12074, + "training_step_time": 0.10607767105102539 + }, + { + "epoch": 1.842498779296875e-05, + "model_forward_time": 0.02587413787841797, + "step": 12075 + }, + { + "epoch": 1.842498779296875e-05, + "step": 12075, + "training_step_time": 0.10817146301269531 + }, + { + "epoch": 1.8426513671875e-05, + "model_forward_time": 0.02539801597595215, + "step": 12076 + }, + { + "epoch": 1.8426513671875e-05, + "step": 12076, + "training_step_time": 0.10470080375671387 + }, + { + "epoch": 1.842803955078125e-05, + "model_forward_time": 0.024849891662597656, + "step": 12077 + }, + { + "epoch": 1.842803955078125e-05, + "step": 12077, + "training_step_time": 0.10503196716308594 + }, + { + "epoch": 1.84295654296875e-05, + "model_forward_time": 0.02556300163269043, + "step": 12078 + }, + { + "epoch": 1.84295654296875e-05, + "step": 12078, + "training_step_time": 0.11263847351074219 + }, + { + "epoch": 1.843109130859375e-05, + "model_forward_time": 0.025606393814086914, + "step": 12079 + }, + { + "epoch": 1.843109130859375e-05, + "step": 12079, + "training_step_time": 0.11065435409545898 + }, + { + "epoch": 1.84326171875e-05, + "grad_norm": 0.4551478326320648, + "learning_rate": 6.968020789980562e-05, + "loss": 0.0202, + "step": 12080 + }, + { + "epoch": 1.84326171875e-05, + "model_forward_time": 0.025592327117919922, + "step": 12080 + }, + { + "epoch": 1.84326171875e-05, + "step": 12080, + "training_step_time": 0.12091970443725586 + }, + { + "epoch": 1.843414306640625e-05, + "model_forward_time": 0.025911808013916016, + "step": 12081 + }, + { + "epoch": 1.843414306640625e-05, + "step": 12081, + "training_step_time": 0.1156303882598877 + }, + { + "epoch": 1.84356689453125e-05, + "model_forward_time": 0.025475740432739258, + "step": 12082 + }, + { + "epoch": 1.84356689453125e-05, + "step": 12082, + "training_step_time": 0.12085914611816406 + }, + { + "epoch": 1.843719482421875e-05, + "model_forward_time": 0.02576160430908203, + "step": 12083 + }, + { + "epoch": 1.843719482421875e-05, + "step": 12083, + "training_step_time": 0.13818883895874023 + }, + { + "epoch": 1.8438720703125e-05, + "model_forward_time": 0.02576160430908203, + "step": 12084 + }, + { + "epoch": 1.8438720703125e-05, + "step": 12084, + "training_step_time": 0.11002945899963379 + }, + { + "epoch": 1.844024658203125e-05, + "model_forward_time": 0.024962663650512695, + "step": 12085 + }, + { + "epoch": 1.844024658203125e-05, + "step": 12085, + "training_step_time": 0.128340482711792 + }, + { + "epoch": 1.84417724609375e-05, + "model_forward_time": 0.025377988815307617, + "step": 12086 + }, + { + "epoch": 1.84417724609375e-05, + "step": 12086, + "training_step_time": 0.12563443183898926 + }, + { + "epoch": 1.844329833984375e-05, + "model_forward_time": 0.025313615798950195, + "step": 12087 + }, + { + "epoch": 1.844329833984375e-05, + "step": 12087, + "training_step_time": 0.10725092887878418 + }, + { + "epoch": 1.844482421875e-05, + "model_forward_time": 0.02515578269958496, + "step": 12088 + }, + { + "epoch": 1.844482421875e-05, + "step": 12088, + "training_step_time": 0.11072826385498047 + }, + { + "epoch": 1.844635009765625e-05, + "model_forward_time": 0.02522730827331543, + "step": 12089 + }, + { + "epoch": 1.844635009765625e-05, + "step": 12089, + "training_step_time": 0.11376070976257324 + }, + { + "epoch": 1.84478759765625e-05, + "grad_norm": 0.43498626351356506, + "learning_rate": 6.962952922749457e-05, + "loss": 0.0161, + "step": 12090 + }, + { + "epoch": 1.84478759765625e-05, + "model_forward_time": 0.025298357009887695, + "step": 12090 + }, + { + "epoch": 1.84478759765625e-05, + "step": 12090, + "training_step_time": 0.11504173278808594 + }, + { + "epoch": 1.844940185546875e-05, + "model_forward_time": 0.025652408599853516, + "step": 12091 + }, + { + "epoch": 1.844940185546875e-05, + "step": 12091, + "training_step_time": 0.11237835884094238 + }, + { + "epoch": 1.8450927734375e-05, + "model_forward_time": 0.025173425674438477, + "step": 12092 + }, + { + "epoch": 1.8450927734375e-05, + "step": 12092, + "training_step_time": 0.1069028377532959 + }, + { + "epoch": 1.845245361328125e-05, + "model_forward_time": 0.02529764175415039, + "step": 12093 + }, + { + "epoch": 1.845245361328125e-05, + "step": 12093, + "training_step_time": 0.11280584335327148 + }, + { + "epoch": 1.84539794921875e-05, + "model_forward_time": 0.025171518325805664, + "step": 12094 + }, + { + "epoch": 1.84539794921875e-05, + "step": 12094, + "training_step_time": 0.17817163467407227 + }, + { + "epoch": 1.845550537109375e-05, + "model_forward_time": 0.02435445785522461, + "step": 12095 + }, + { + "epoch": 1.845550537109375e-05, + "step": 12095, + "training_step_time": 0.12177467346191406 + }, + { + "epoch": 1.845703125e-05, + "model_forward_time": 0.024264097213745117, + "step": 12096 + }, + { + "epoch": 1.845703125e-05, + "step": 12096, + "training_step_time": 0.18004798889160156 + }, + { + "epoch": 1.845855712890625e-05, + "model_forward_time": 0.02441883087158203, + "step": 12097 + }, + { + "epoch": 1.845855712890625e-05, + "step": 12097, + "training_step_time": 0.19928359985351562 + }, + { + "epoch": 1.84600830078125e-05, + "model_forward_time": 0.024761438369750977, + "step": 12098 + }, + { + "epoch": 1.84600830078125e-05, + "step": 12098, + "training_step_time": 0.1907656192779541 + }, + { + "epoch": 1.846160888671875e-05, + "model_forward_time": 0.027601242065429688, + "step": 12099 + }, + { + "epoch": 1.846160888671875e-05, + "step": 12099, + "training_step_time": 0.18149161338806152 + }, + { + "epoch": 1.8463134765625e-05, + "grad_norm": 0.3368780016899109, + "learning_rate": 6.957882670345458e-05, + "loss": 0.0264, + "step": 12100 + }, + { + "epoch": 1.8463134765625e-05, + "model_forward_time": 0.024541139602661133, + "step": 12100 + }, + { + "epoch": 1.8463134765625e-05, + "step": 12100, + "training_step_time": 0.15851855278015137 + }, + { + "epoch": 1.846466064453125e-05, + "model_forward_time": 0.024677753448486328, + "step": 12101 + }, + { + "epoch": 1.846466064453125e-05, + "step": 12101, + "training_step_time": 0.16266083717346191 + }, + { + "epoch": 1.84661865234375e-05, + "model_forward_time": 0.02415299415588379, + "step": 12102 + }, + { + "epoch": 1.84661865234375e-05, + "step": 12102, + "training_step_time": 0.14628338813781738 + }, + { + "epoch": 1.846771240234375e-05, + "model_forward_time": 0.024596691131591797, + "step": 12103 + }, + { + "epoch": 1.846771240234375e-05, + "step": 12103, + "training_step_time": 0.13750195503234863 + }, + { + "epoch": 1.846923828125e-05, + "model_forward_time": 0.02510666847229004, + "step": 12104 + }, + { + "epoch": 1.846923828125e-05, + "step": 12104, + "training_step_time": 0.10332322120666504 + }, + { + "epoch": 1.847076416015625e-05, + "model_forward_time": 0.025179147720336914, + "step": 12105 + }, + { + "epoch": 1.847076416015625e-05, + "step": 12105, + "training_step_time": 0.12081432342529297 + }, + { + "epoch": 1.84722900390625e-05, + "model_forward_time": 0.025976896286010742, + "step": 12106 + }, + { + "epoch": 1.84722900390625e-05, + "step": 12106, + "training_step_time": 0.1088707447052002 + }, + { + "epoch": 1.847381591796875e-05, + "model_forward_time": 0.025481224060058594, + "step": 12107 + }, + { + "epoch": 1.847381591796875e-05, + "step": 12107, + "training_step_time": 0.11642813682556152 + }, + { + "epoch": 1.8475341796875e-05, + "model_forward_time": 0.02563929557800293, + "step": 12108 + }, + { + "epoch": 1.8475341796875e-05, + "step": 12108, + "training_step_time": 0.10981416702270508 + }, + { + "epoch": 1.847686767578125e-05, + "model_forward_time": 0.02545952796936035, + "step": 12109 + }, + { + "epoch": 1.847686767578125e-05, + "step": 12109, + "training_step_time": 0.10700488090515137 + }, + { + "epoch": 1.84783935546875e-05, + "grad_norm": 0.2847748100757599, + "learning_rate": 6.952810038929397e-05, + "loss": 0.02, + "step": 12110 + }, + { + "epoch": 1.84783935546875e-05, + "model_forward_time": 0.026195764541625977, + "step": 12110 + }, + { + "epoch": 1.84783935546875e-05, + "step": 12110, + "training_step_time": 0.10912251472473145 + }, + { + "epoch": 1.847991943359375e-05, + "model_forward_time": 0.025635957717895508, + "step": 12111 + }, + { + "epoch": 1.847991943359375e-05, + "step": 12111, + "training_step_time": 0.15905380249023438 + }, + { + "epoch": 1.84814453125e-05, + "model_forward_time": 0.02497577667236328, + "step": 12112 + }, + { + "epoch": 1.84814453125e-05, + "step": 12112, + "training_step_time": 0.11531567573547363 + }, + { + "epoch": 1.848297119140625e-05, + "model_forward_time": 0.024684906005859375, + "step": 12113 + }, + { + "epoch": 1.848297119140625e-05, + "step": 12113, + "training_step_time": 0.17767906188964844 + }, + { + "epoch": 1.84844970703125e-05, + "model_forward_time": 0.024710655212402344, + "step": 12114 + }, + { + "epoch": 1.84844970703125e-05, + "step": 12114, + "training_step_time": 0.1831378936767578 + }, + { + "epoch": 1.848602294921875e-05, + "model_forward_time": 0.024237871170043945, + "step": 12115 + }, + { + "epoch": 1.848602294921875e-05, + "step": 12115, + "training_step_time": 0.11511063575744629 + }, + { + "epoch": 1.8487548828125e-05, + "model_forward_time": 0.024487733840942383, + "step": 12116 + }, + { + "epoch": 1.8487548828125e-05, + "step": 12116, + "training_step_time": 0.11063528060913086 + }, + { + "epoch": 1.848907470703125e-05, + "model_forward_time": 0.025523900985717773, + "step": 12117 + }, + { + "epoch": 1.848907470703125e-05, + "step": 12117, + "training_step_time": 0.1070411205291748 + }, + { + "epoch": 1.84906005859375e-05, + "model_forward_time": 0.02556014060974121, + "step": 12118 + }, + { + "epoch": 1.84906005859375e-05, + "step": 12118, + "training_step_time": 0.11070895195007324 + }, + { + "epoch": 1.849212646484375e-05, + "model_forward_time": 0.025222301483154297, + "step": 12119 + }, + { + "epoch": 1.849212646484375e-05, + "step": 12119, + "training_step_time": 0.10846853256225586 + }, + { + "epoch": 1.849365234375e-05, + "grad_norm": 0.28749608993530273, + "learning_rate": 6.947735034665002e-05, + "loss": 0.014, + "step": 12120 + }, + { + "epoch": 1.849365234375e-05, + "model_forward_time": 0.025959253311157227, + "step": 12120 + }, + { + "epoch": 1.849365234375e-05, + "step": 12120, + "training_step_time": 0.10850644111633301 + }, + { + "epoch": 1.849517822265625e-05, + "model_forward_time": 0.024909019470214844, + "step": 12121 + }, + { + "epoch": 1.849517822265625e-05, + "step": 12121, + "training_step_time": 0.10658764839172363 + }, + { + "epoch": 1.84967041015625e-05, + "model_forward_time": 0.02747201919555664, + "step": 12122 + }, + { + "epoch": 1.84967041015625e-05, + "step": 12122, + "training_step_time": 0.11704349517822266 + }, + { + "epoch": 1.849822998046875e-05, + "model_forward_time": 0.024912595748901367, + "step": 12123 + }, + { + "epoch": 1.849822998046875e-05, + "step": 12123, + "training_step_time": 0.20823168754577637 + }, + { + "epoch": 1.8499755859375e-05, + "model_forward_time": 0.024495363235473633, + "step": 12124 + }, + { + "epoch": 1.8499755859375e-05, + "step": 12124, + "training_step_time": 0.1333141326904297 + }, + { + "epoch": 1.850128173828125e-05, + "model_forward_time": 0.02456808090209961, + "step": 12125 + }, + { + "epoch": 1.850128173828125e-05, + "step": 12125, + "training_step_time": 0.15640497207641602 + }, + { + "epoch": 1.85028076171875e-05, + "model_forward_time": 0.024996519088745117, + "step": 12126 + }, + { + "epoch": 1.85028076171875e-05, + "step": 12126, + "training_step_time": 0.14251399040222168 + }, + { + "epoch": 1.850433349609375e-05, + "model_forward_time": 0.02455925941467285, + "step": 12127 + }, + { + "epoch": 1.850433349609375e-05, + "step": 12127, + "training_step_time": 0.11280488967895508 + }, + { + "epoch": 1.8505859375e-05, + "model_forward_time": 0.024810314178466797, + "step": 12128 + }, + { + "epoch": 1.8505859375e-05, + "step": 12128, + "training_step_time": 0.10538744926452637 + }, + { + "epoch": 1.850738525390625e-05, + "model_forward_time": 0.025376558303833008, + "step": 12129 + }, + { + "epoch": 1.850738525390625e-05, + "step": 12129, + "training_step_time": 0.12279915809631348 + }, + { + "epoch": 1.85089111328125e-05, + "grad_norm": 0.37205004692077637, + "learning_rate": 6.942657663718879e-05, + "loss": 0.0213, + "step": 12130 + }, + { + "epoch": 1.85089111328125e-05, + "model_forward_time": 0.025405168533325195, + "step": 12130 + }, + { + "epoch": 1.85089111328125e-05, + "step": 12130, + "training_step_time": 0.15722894668579102 + }, + { + "epoch": 1.851043701171875e-05, + "model_forward_time": 0.024766206741333008, + "step": 12131 + }, + { + "epoch": 1.851043701171875e-05, + "step": 12131, + "training_step_time": 0.15096163749694824 + }, + { + "epoch": 1.8511962890625e-05, + "model_forward_time": 0.025306224822998047, + "step": 12132 + }, + { + "epoch": 1.8511962890625e-05, + "step": 12132, + "training_step_time": 0.15341806411743164 + }, + { + "epoch": 1.851348876953125e-05, + "model_forward_time": 0.02415633201599121, + "step": 12133 + }, + { + "epoch": 1.851348876953125e-05, + "step": 12133, + "training_step_time": 0.13424420356750488 + }, + { + "epoch": 1.85150146484375e-05, + "model_forward_time": 0.02494049072265625, + "step": 12134 + }, + { + "epoch": 1.85150146484375e-05, + "step": 12134, + "training_step_time": 0.13079428672790527 + }, + { + "epoch": 1.851654052734375e-05, + "model_forward_time": 0.025194168090820312, + "step": 12135 + }, + { + "epoch": 1.851654052734375e-05, + "step": 12135, + "training_step_time": 0.1319594383239746 + }, + { + "epoch": 1.851806640625e-05, + "model_forward_time": 0.024608373641967773, + "step": 12136 + }, + { + "epoch": 1.851806640625e-05, + "step": 12136, + "training_step_time": 0.21648597717285156 + }, + { + "epoch": 1.851959228515625e-05, + "model_forward_time": 0.02446579933166504, + "step": 12137 + }, + { + "epoch": 1.851959228515625e-05, + "step": 12137, + "training_step_time": 0.1127159595489502 + }, + { + "epoch": 1.85211181640625e-05, + "model_forward_time": 0.02412724494934082, + "step": 12138 + }, + { + "epoch": 1.85211181640625e-05, + "step": 12138, + "training_step_time": 0.10964059829711914 + }, + { + "epoch": 1.852264404296875e-05, + "model_forward_time": 0.02886343002319336, + "step": 12139 + }, + { + "epoch": 1.852264404296875e-05, + "step": 12139, + "training_step_time": 0.12058329582214355 + }, + { + "epoch": 1.8524169921875e-05, + "grad_norm": 0.3380381762981415, + "learning_rate": 6.937577932260515e-05, + "loss": 0.0153, + "step": 12140 + }, + { + "epoch": 1.8524169921875e-05, + "model_forward_time": 0.025558948516845703, + "step": 12140 + }, + { + "epoch": 1.8524169921875e-05, + "step": 12140, + "training_step_time": 0.11328268051147461 + }, + { + "epoch": 1.852569580078125e-05, + "model_forward_time": 0.025569677352905273, + "step": 12141 + }, + { + "epoch": 1.852569580078125e-05, + "step": 12141, + "training_step_time": 0.12047195434570312 + }, + { + "epoch": 1.85272216796875e-05, + "model_forward_time": 0.02518606185913086, + "step": 12142 + }, + { + "epoch": 1.85272216796875e-05, + "step": 12142, + "training_step_time": 0.10771536827087402 + }, + { + "epoch": 1.852874755859375e-05, + "model_forward_time": 0.02576589584350586, + "step": 12143 + }, + { + "epoch": 1.852874755859375e-05, + "step": 12143, + "training_step_time": 0.11299729347229004 + }, + { + "epoch": 1.85302734375e-05, + "model_forward_time": 0.02525925636291504, + "step": 12144 + }, + { + "epoch": 1.85302734375e-05, + "step": 12144, + "training_step_time": 0.10834002494812012 + }, + { + "epoch": 1.853179931640625e-05, + "model_forward_time": 0.025398731231689453, + "step": 12145 + }, + { + "epoch": 1.853179931640625e-05, + "step": 12145, + "training_step_time": 0.10854387283325195 + }, + { + "epoch": 1.85333251953125e-05, + "model_forward_time": 0.025328636169433594, + "step": 12146 + }, + { + "epoch": 1.85333251953125e-05, + "step": 12146, + "training_step_time": 0.1081233024597168 + }, + { + "epoch": 1.853485107421875e-05, + "model_forward_time": 0.024988889694213867, + "step": 12147 + }, + { + "epoch": 1.853485107421875e-05, + "step": 12147, + "training_step_time": 0.10960221290588379 + }, + { + "epoch": 1.8536376953125e-05, + "model_forward_time": 0.02519822120666504, + "step": 12148 + }, + { + "epoch": 1.8536376953125e-05, + "step": 12148, + "training_step_time": 0.8354175090789795 + }, + { + "epoch": 1.853790283203125e-05, + "model_forward_time": 0.022663354873657227, + "step": 12149 + }, + { + "epoch": 1.853790283203125e-05, + "step": 12149, + "training_step_time": 0.10273098945617676 + }, + { + "epoch": 1.85394287109375e-05, + "grad_norm": 0.39317330718040466, + "learning_rate": 6.932495846462261e-05, + "loss": 0.0173, + "step": 12150 + }, + { + "epoch": 1.85394287109375e-05, + "model_forward_time": 0.0239410400390625, + "step": 12150 + }, + { + "epoch": 1.85394287109375e-05, + "step": 12150, + "training_step_time": 0.17058658599853516 + }, + { + "epoch": 1.854095458984375e-05, + "model_forward_time": 0.024542808532714844, + "step": 12151 + }, + { + "epoch": 1.854095458984375e-05, + "step": 12151, + "training_step_time": 0.10926365852355957 + }, + { + "epoch": 1.854248046875e-05, + "model_forward_time": 0.02427816390991211, + "step": 12152 + }, + { + "epoch": 1.854248046875e-05, + "step": 12152, + "training_step_time": 0.11043620109558105 + }, + { + "epoch": 1.854400634765625e-05, + "model_forward_time": 0.025025129318237305, + "step": 12153 + }, + { + "epoch": 1.854400634765625e-05, + "step": 12153, + "training_step_time": 0.12153434753417969 + }, + { + "epoch": 1.85455322265625e-05, + "model_forward_time": 0.025335073471069336, + "step": 12154 + }, + { + "epoch": 1.85455322265625e-05, + "step": 12154, + "training_step_time": 0.1292552947998047 + }, + { + "epoch": 1.854705810546875e-05, + "model_forward_time": 0.02577519416809082, + "step": 12155 + }, + { + "epoch": 1.854705810546875e-05, + "step": 12155, + "training_step_time": 0.11459016799926758 + }, + { + "epoch": 1.8548583984375e-05, + "model_forward_time": 0.0253298282623291, + "step": 12156 + }, + { + "epoch": 1.8548583984375e-05, + "step": 12156, + "training_step_time": 0.11096453666687012 + }, + { + "epoch": 1.855010986328125e-05, + "model_forward_time": 0.025742292404174805, + "step": 12157 + }, + { + "epoch": 1.855010986328125e-05, + "step": 12157, + "training_step_time": 0.10995149612426758 + }, + { + "epoch": 1.85516357421875e-05, + "model_forward_time": 0.025273799896240234, + "step": 12158 + }, + { + "epoch": 1.85516357421875e-05, + "step": 12158, + "training_step_time": 0.10606193542480469 + }, + { + "epoch": 1.855316162109375e-05, + "model_forward_time": 0.02552056312561035, + "step": 12159 + }, + { + "epoch": 1.855316162109375e-05, + "step": 12159, + "training_step_time": 0.10692667961120605 + }, + { + "epoch": 1.85546875e-05, + "grad_norm": 0.28868332505226135, + "learning_rate": 6.927411412499332e-05, + "loss": 0.0146, + "step": 12160 + }, + { + "epoch": 1.85546875e-05, + "model_forward_time": 0.02510809898376465, + "step": 12160 + }, + { + "epoch": 1.85546875e-05, + "step": 12160, + "training_step_time": 0.10293769836425781 + }, + { + "epoch": 1.855621337890625e-05, + "model_forward_time": 0.02461838722229004, + "step": 12161 + }, + { + "epoch": 1.855621337890625e-05, + "step": 12161, + "training_step_time": 0.14577412605285645 + }, + { + "epoch": 1.85577392578125e-05, + "model_forward_time": 0.024579286575317383, + "step": 12162 + }, + { + "epoch": 1.85577392578125e-05, + "step": 12162, + "training_step_time": 0.10670280456542969 + }, + { + "epoch": 1.855926513671875e-05, + "model_forward_time": 0.02532172203063965, + "step": 12163 + }, + { + "epoch": 1.855926513671875e-05, + "step": 12163, + "training_step_time": 0.1863081455230713 + }, + { + "epoch": 1.8560791015625e-05, + "model_forward_time": 0.024344921112060547, + "step": 12164 + }, + { + "epoch": 1.8560791015625e-05, + "step": 12164, + "training_step_time": 0.13602018356323242 + }, + { + "epoch": 1.856231689453125e-05, + "model_forward_time": 0.024878978729248047, + "step": 12165 + }, + { + "epoch": 1.856231689453125e-05, + "step": 12165, + "training_step_time": 0.1930091381072998 + }, + { + "epoch": 1.85638427734375e-05, + "model_forward_time": 0.024596214294433594, + "step": 12166 + }, + { + "epoch": 1.85638427734375e-05, + "step": 12166, + "training_step_time": 0.14045000076293945 + }, + { + "epoch": 1.856536865234375e-05, + "model_forward_time": 0.024628400802612305, + "step": 12167 + }, + { + "epoch": 1.856536865234375e-05, + "step": 12167, + "training_step_time": 0.11679720878601074 + }, + { + "epoch": 1.856689453125e-05, + "model_forward_time": 0.02660655975341797, + "step": 12168 + }, + { + "epoch": 1.856689453125e-05, + "step": 12168, + "training_step_time": 0.11935281753540039 + }, + { + "epoch": 1.856842041015625e-05, + "model_forward_time": 0.02539682388305664, + "step": 12169 + }, + { + "epoch": 1.856842041015625e-05, + "step": 12169, + "training_step_time": 0.10825419425964355 + }, + { + "epoch": 1.85699462890625e-05, + "grad_norm": 0.24523653090000153, + "learning_rate": 6.922324636549795e-05, + "loss": 0.0156, + "step": 12170 + }, + { + "epoch": 1.85699462890625e-05, + "model_forward_time": 0.025346994400024414, + "step": 12170 + }, + { + "epoch": 1.85699462890625e-05, + "step": 12170, + "training_step_time": 0.1883068084716797 + }, + { + "epoch": 1.857147216796875e-05, + "model_forward_time": 0.027129411697387695, + "step": 12171 + }, + { + "epoch": 1.857147216796875e-05, + "step": 12171, + "training_step_time": 0.1811690330505371 + }, + { + "epoch": 1.8572998046875e-05, + "model_forward_time": 0.023925065994262695, + "step": 12172 + }, + { + "epoch": 1.8572998046875e-05, + "step": 12172, + "training_step_time": 0.12838196754455566 + }, + { + "epoch": 1.857452392578125e-05, + "model_forward_time": 0.024210691452026367, + "step": 12173 + }, + { + "epoch": 1.857452392578125e-05, + "step": 12173, + "training_step_time": 0.10532450675964355 + }, + { + "epoch": 1.85760498046875e-05, + "model_forward_time": 0.025958776473999023, + "step": 12174 + }, + { + "epoch": 1.85760498046875e-05, + "step": 12174, + "training_step_time": 0.12211942672729492 + }, + { + "epoch": 1.857757568359375e-05, + "model_forward_time": 0.024934053421020508, + "step": 12175 + }, + { + "epoch": 1.857757568359375e-05, + "step": 12175, + "training_step_time": 0.10917043685913086 + }, + { + "epoch": 1.85791015625e-05, + "model_forward_time": 0.025083303451538086, + "step": 12176 + }, + { + "epoch": 1.85791015625e-05, + "step": 12176, + "training_step_time": 0.12683629989624023 + }, + { + "epoch": 1.858062744140625e-05, + "model_forward_time": 0.02460622787475586, + "step": 12177 + }, + { + "epoch": 1.858062744140625e-05, + "step": 12177, + "training_step_time": 0.1219024658203125 + }, + { + "epoch": 1.85821533203125e-05, + "model_forward_time": 0.025339365005493164, + "step": 12178 + }, + { + "epoch": 1.85821533203125e-05, + "step": 12178, + "training_step_time": 0.11470603942871094 + }, + { + "epoch": 1.858367919921875e-05, + "model_forward_time": 0.025956153869628906, + "step": 12179 + }, + { + "epoch": 1.858367919921875e-05, + "step": 12179, + "training_step_time": 0.1099996566772461 + }, + { + "epoch": 1.8585205078125e-05, + "grad_norm": 0.4693124294281006, + "learning_rate": 6.917235524794558e-05, + "loss": 0.0299, + "step": 12180 + }, + { + "epoch": 1.8585205078125e-05, + "model_forward_time": 0.025193214416503906, + "step": 12180 + }, + { + "epoch": 1.8585205078125e-05, + "step": 12180, + "training_step_time": 0.10866832733154297 + }, + { + "epoch": 1.858673095703125e-05, + "model_forward_time": 0.025312423706054688, + "step": 12181 + }, + { + "epoch": 1.858673095703125e-05, + "step": 12181, + "training_step_time": 0.10819029808044434 + }, + { + "epoch": 1.85882568359375e-05, + "model_forward_time": 0.025712251663208008, + "step": 12182 + }, + { + "epoch": 1.85882568359375e-05, + "step": 12182, + "training_step_time": 0.10832548141479492 + }, + { + "epoch": 1.858978271484375e-05, + "model_forward_time": 0.025365591049194336, + "step": 12183 + }, + { + "epoch": 1.858978271484375e-05, + "step": 12183, + "training_step_time": 0.10812616348266602 + }, + { + "epoch": 1.859130859375e-05, + "model_forward_time": 0.025095701217651367, + "step": 12184 + }, + { + "epoch": 1.859130859375e-05, + "step": 12184, + "training_step_time": 0.10722160339355469 + }, + { + "epoch": 1.859283447265625e-05, + "model_forward_time": 0.02508258819580078, + "step": 12185 + }, + { + "epoch": 1.859283447265625e-05, + "step": 12185, + "training_step_time": 0.10984325408935547 + }, + { + "epoch": 1.85943603515625e-05, + "model_forward_time": 0.0254666805267334, + "step": 12186 + }, + { + "epoch": 1.85943603515625e-05, + "step": 12186, + "training_step_time": 0.10843181610107422 + }, + { + "epoch": 1.859588623046875e-05, + "model_forward_time": 0.02558612823486328, + "step": 12187 + }, + { + "epoch": 1.859588623046875e-05, + "step": 12187, + "training_step_time": 0.10842323303222656 + }, + { + "epoch": 1.8597412109375e-05, + "model_forward_time": 0.02507305145263672, + "step": 12188 + }, + { + "epoch": 1.8597412109375e-05, + "step": 12188, + "training_step_time": 0.11646866798400879 + }, + { + "epoch": 1.859893798828125e-05, + "model_forward_time": 0.025184154510498047, + "step": 12189 + }, + { + "epoch": 1.859893798828125e-05, + "step": 12189, + "training_step_time": 0.1075582504272461 + }, + { + "epoch": 1.86004638671875e-05, + "grad_norm": 0.3445988893508911, + "learning_rate": 6.912144083417376e-05, + "loss": 0.0154, + "step": 12190 + }, + { + "epoch": 1.86004638671875e-05, + "model_forward_time": 0.025242328643798828, + "step": 12190 + }, + { + "epoch": 1.86004638671875e-05, + "step": 12190, + "training_step_time": 0.10817742347717285 + }, + { + "epoch": 1.860198974609375e-05, + "model_forward_time": 0.024753332138061523, + "step": 12191 + }, + { + "epoch": 1.860198974609375e-05, + "step": 12191, + "training_step_time": 0.10774350166320801 + }, + { + "epoch": 1.8603515625e-05, + "model_forward_time": 0.02552509307861328, + "step": 12192 + }, + { + "epoch": 1.8603515625e-05, + "step": 12192, + "training_step_time": 0.11028528213500977 + }, + { + "epoch": 1.860504150390625e-05, + "model_forward_time": 0.02488851547241211, + "step": 12193 + }, + { + "epoch": 1.860504150390625e-05, + "step": 12193, + "training_step_time": 0.10948014259338379 + }, + { + "epoch": 1.86065673828125e-05, + "model_forward_time": 0.02522730827331543, + "step": 12194 + }, + { + "epoch": 1.86065673828125e-05, + "step": 12194, + "training_step_time": 0.11433005332946777 + }, + { + "epoch": 1.860809326171875e-05, + "model_forward_time": 0.028829336166381836, + "step": 12195 + }, + { + "epoch": 1.860809326171875e-05, + "step": 12195, + "training_step_time": 0.11514568328857422 + }, + { + "epoch": 1.8609619140625e-05, + "model_forward_time": 0.025232553482055664, + "step": 12196 + }, + { + "epoch": 1.8609619140625e-05, + "step": 12196, + "training_step_time": 0.11287474632263184 + }, + { + "epoch": 1.861114501953125e-05, + "model_forward_time": 0.025613069534301758, + "step": 12197 + }, + { + "epoch": 1.861114501953125e-05, + "step": 12197, + "training_step_time": 0.12331390380859375 + }, + { + "epoch": 1.86126708984375e-05, + "model_forward_time": 0.025481462478637695, + "step": 12198 + }, + { + "epoch": 1.86126708984375e-05, + "step": 12198, + "training_step_time": 0.11653327941894531 + }, + { + "epoch": 1.861419677734375e-05, + "model_forward_time": 0.025342941284179688, + "step": 12199 + }, + { + "epoch": 1.861419677734375e-05, + "step": 12199, + "training_step_time": 0.22729110717773438 + }, + { + "epoch": 1.861572265625e-05, + "grad_norm": 0.2358154058456421, + "learning_rate": 6.90705031860483e-05, + "loss": 0.0249, + "step": 12200 + }, + { + "epoch": 1.861572265625e-05, + "model_forward_time": 0.02457141876220703, + "step": 12200 + }, + { + "epoch": 1.861572265625e-05, + "step": 12200, + "training_step_time": 0.1245582103729248 + }, + { + "epoch": 1.861724853515625e-05, + "model_forward_time": 0.024930953979492188, + "step": 12201 + }, + { + "epoch": 1.861724853515625e-05, + "step": 12201, + "training_step_time": 0.11001896858215332 + }, + { + "epoch": 1.86187744140625e-05, + "model_forward_time": 0.028508424758911133, + "step": 12202 + }, + { + "epoch": 1.86187744140625e-05, + "step": 12202, + "training_step_time": 0.11352682113647461 + }, + { + "epoch": 1.862030029296875e-05, + "model_forward_time": 0.025401830673217773, + "step": 12203 + }, + { + "epoch": 1.862030029296875e-05, + "step": 12203, + "training_step_time": 0.11102414131164551 + }, + { + "epoch": 1.8621826171875e-05, + "model_forward_time": 0.0267031192779541, + "step": 12204 + }, + { + "epoch": 1.8621826171875e-05, + "step": 12204, + "training_step_time": 0.10894393920898438 + }, + { + "epoch": 1.862335205078125e-05, + "model_forward_time": 0.025269269943237305, + "step": 12205 + }, + { + "epoch": 1.862335205078125e-05, + "step": 12205, + "training_step_time": 0.1085808277130127 + }, + { + "epoch": 1.86248779296875e-05, + "model_forward_time": 0.02559947967529297, + "step": 12206 + }, + { + "epoch": 1.86248779296875e-05, + "step": 12206, + "training_step_time": 0.10754179954528809 + }, + { + "epoch": 1.862640380859375e-05, + "model_forward_time": 0.024886369705200195, + "step": 12207 + }, + { + "epoch": 1.862640380859375e-05, + "step": 12207, + "training_step_time": 0.14352679252624512 + }, + { + "epoch": 1.86279296875e-05, + "model_forward_time": 0.024336576461791992, + "step": 12208 + }, + { + "epoch": 1.86279296875e-05, + "step": 12208, + "training_step_time": 0.1642289161682129 + }, + { + "epoch": 1.862945556640625e-05, + "model_forward_time": 0.028010845184326172, + "step": 12209 + }, + { + "epoch": 1.862945556640625e-05, + "step": 12209, + "training_step_time": 0.11157894134521484 + }, + { + "epoch": 1.86309814453125e-05, + "grad_norm": 0.27985885739326477, + "learning_rate": 6.901954236546323e-05, + "loss": 0.0307, + "step": 12210 + }, + { + "epoch": 1.86309814453125e-05, + "model_forward_time": 0.024744033813476562, + "step": 12210 + }, + { + "epoch": 1.86309814453125e-05, + "step": 12210, + "training_step_time": 0.1626591682434082 + }, + { + "epoch": 1.863250732421875e-05, + "model_forward_time": 0.024492979049682617, + "step": 12211 + }, + { + "epoch": 1.863250732421875e-05, + "step": 12211, + "training_step_time": 0.1714036464691162 + }, + { + "epoch": 1.8634033203125e-05, + "model_forward_time": 0.0251007080078125, + "step": 12212 + }, + { + "epoch": 1.8634033203125e-05, + "step": 12212, + "training_step_time": 0.19283533096313477 + }, + { + "epoch": 1.863555908203125e-05, + "model_forward_time": 0.024848461151123047, + "step": 12213 + }, + { + "epoch": 1.863555908203125e-05, + "step": 12213, + "training_step_time": 0.11724138259887695 + }, + { + "epoch": 1.86370849609375e-05, + "model_forward_time": 0.025547266006469727, + "step": 12214 + }, + { + "epoch": 1.86370849609375e-05, + "step": 12214, + "training_step_time": 0.10605788230895996 + }, + { + "epoch": 1.863861083984375e-05, + "model_forward_time": 0.024709463119506836, + "step": 12215 + }, + { + "epoch": 1.863861083984375e-05, + "step": 12215, + "training_step_time": 0.19892477989196777 + }, + { + "epoch": 1.864013671875e-05, + "model_forward_time": 0.024607181549072266, + "step": 12216 + }, + { + "epoch": 1.864013671875e-05, + "step": 12216, + "training_step_time": 0.10834670066833496 + }, + { + "epoch": 1.864166259765625e-05, + "model_forward_time": 0.024436235427856445, + "step": 12217 + }, + { + "epoch": 1.864166259765625e-05, + "step": 12217, + "training_step_time": 0.1727888584136963 + }, + { + "epoch": 1.86431884765625e-05, + "model_forward_time": 0.024678468704223633, + "step": 12218 + }, + { + "epoch": 1.86431884765625e-05, + "step": 12218, + "training_step_time": 0.11142873764038086 + }, + { + "epoch": 1.864471435546875e-05, + "model_forward_time": 0.024569988250732422, + "step": 12219 + }, + { + "epoch": 1.864471435546875e-05, + "step": 12219, + "training_step_time": 0.1053457260131836 + }, + { + "epoch": 1.8646240234375e-05, + "grad_norm": 0.5920787453651428, + "learning_rate": 6.896855843434078e-05, + "loss": 0.0179, + "step": 12220 + }, + { + "epoch": 1.8646240234375e-05, + "model_forward_time": 0.025516748428344727, + "step": 12220 + }, + { + "epoch": 1.8646240234375e-05, + "step": 12220, + "training_step_time": 0.12272787094116211 + }, + { + "epoch": 1.864776611328125e-05, + "model_forward_time": 0.024941682815551758, + "step": 12221 + }, + { + "epoch": 1.864776611328125e-05, + "step": 12221, + "training_step_time": 0.10534191131591797 + }, + { + "epoch": 1.86492919921875e-05, + "model_forward_time": 0.02530694007873535, + "step": 12222 + }, + { + "epoch": 1.86492919921875e-05, + "step": 12222, + "training_step_time": 0.12171316146850586 + }, + { + "epoch": 1.865081787109375e-05, + "model_forward_time": 0.02616715431213379, + "step": 12223 + }, + { + "epoch": 1.865081787109375e-05, + "step": 12223, + "training_step_time": 0.10560345649719238 + }, + { + "epoch": 1.865234375e-05, + "model_forward_time": 0.025304317474365234, + "step": 12224 + }, + { + "epoch": 1.865234375e-05, + "step": 12224, + "training_step_time": 0.11466836929321289 + }, + { + "epoch": 1.865386962890625e-05, + "model_forward_time": 0.02510809898376465, + "step": 12225 + }, + { + "epoch": 1.865386962890625e-05, + "step": 12225, + "training_step_time": 0.10394167900085449 + }, + { + "epoch": 1.86553955078125e-05, + "model_forward_time": 0.025491714477539062, + "step": 12226 + }, + { + "epoch": 1.86553955078125e-05, + "step": 12226, + "training_step_time": 0.10468292236328125 + }, + { + "epoch": 1.865692138671875e-05, + "model_forward_time": 0.02528214454650879, + "step": 12227 + }, + { + "epoch": 1.865692138671875e-05, + "step": 12227, + "training_step_time": 0.10531830787658691 + }, + { + "epoch": 1.8658447265625e-05, + "model_forward_time": 0.02516794204711914, + "step": 12228 + }, + { + "epoch": 1.8658447265625e-05, + "step": 12228, + "training_step_time": 0.10705089569091797 + }, + { + "epoch": 1.865997314453125e-05, + "model_forward_time": 0.02544116973876953, + "step": 12229 + }, + { + "epoch": 1.865997314453125e-05, + "step": 12229, + "training_step_time": 0.10489082336425781 + }, + { + "epoch": 1.86614990234375e-05, + "grad_norm": 0.38967519998550415, + "learning_rate": 6.89175514546312e-05, + "loss": 0.0238, + "step": 12230 + }, + { + "epoch": 1.86614990234375e-05, + "model_forward_time": 0.02520465850830078, + "step": 12230 + }, + { + "epoch": 1.86614990234375e-05, + "step": 12230, + "training_step_time": 0.10487127304077148 + }, + { + "epoch": 1.866302490234375e-05, + "model_forward_time": 0.02511119842529297, + "step": 12231 + }, + { + "epoch": 1.866302490234375e-05, + "step": 12231, + "training_step_time": 0.10860204696655273 + }, + { + "epoch": 1.866455078125e-05, + "model_forward_time": 0.0253448486328125, + "step": 12232 + }, + { + "epoch": 1.866455078125e-05, + "step": 12232, + "training_step_time": 0.10773730278015137 + }, + { + "epoch": 1.866607666015625e-05, + "model_forward_time": 0.025053024291992188, + "step": 12233 + }, + { + "epoch": 1.866607666015625e-05, + "step": 12233, + "training_step_time": 0.11473274230957031 + }, + { + "epoch": 1.86676025390625e-05, + "model_forward_time": 0.02533435821533203, + "step": 12234 + }, + { + "epoch": 1.86676025390625e-05, + "step": 12234, + "training_step_time": 0.10961723327636719 + }, + { + "epoch": 1.866912841796875e-05, + "model_forward_time": 0.025982141494750977, + "step": 12235 + }, + { + "epoch": 1.866912841796875e-05, + "step": 12235, + "training_step_time": 0.10794568061828613 + }, + { + "epoch": 1.8670654296875e-05, + "model_forward_time": 0.024712324142456055, + "step": 12236 + }, + { + "epoch": 1.8670654296875e-05, + "step": 12236, + "training_step_time": 0.10839700698852539 + }, + { + "epoch": 1.867218017578125e-05, + "model_forward_time": 0.024310827255249023, + "step": 12237 + }, + { + "epoch": 1.867218017578125e-05, + "step": 12237, + "training_step_time": 0.10738158226013184 + }, + { + "epoch": 1.86737060546875e-05, + "model_forward_time": 0.024081945419311523, + "step": 12238 + }, + { + "epoch": 1.86737060546875e-05, + "step": 12238, + "training_step_time": 0.11095452308654785 + }, + { + "epoch": 1.867523193359375e-05, + "model_forward_time": 0.025173187255859375, + "step": 12239 + }, + { + "epoch": 1.867523193359375e-05, + "step": 12239, + "training_step_time": 0.1131296157836914 + }, + { + "epoch": 1.86767578125e-05, + "grad_norm": 0.4197467565536499, + "learning_rate": 6.886652148831279e-05, + "loss": 0.017, + "step": 12240 + }, + { + "epoch": 1.86767578125e-05, + "model_forward_time": 0.025347232818603516, + "step": 12240 + }, + { + "epoch": 1.86767578125e-05, + "step": 12240, + "training_step_time": 0.10682916641235352 + }, + { + "epoch": 1.867828369140625e-05, + "model_forward_time": 0.025358915328979492, + "step": 12241 + }, + { + "epoch": 1.867828369140625e-05, + "step": 12241, + "training_step_time": 0.10932016372680664 + }, + { + "epoch": 1.86798095703125e-05, + "model_forward_time": 0.025012493133544922, + "step": 12242 + }, + { + "epoch": 1.86798095703125e-05, + "step": 12242, + "training_step_time": 0.10710310935974121 + }, + { + "epoch": 1.868133544921875e-05, + "model_forward_time": 0.025623321533203125, + "step": 12243 + }, + { + "epoch": 1.868133544921875e-05, + "step": 12243, + "training_step_time": 0.16004395484924316 + }, + { + "epoch": 1.8682861328125e-05, + "model_forward_time": 0.024783849716186523, + "step": 12244 + }, + { + "epoch": 1.8682861328125e-05, + "step": 12244, + "training_step_time": 0.10759615898132324 + }, + { + "epoch": 1.868438720703125e-05, + "model_forward_time": 0.0251162052154541, + "step": 12245 + }, + { + "epoch": 1.868438720703125e-05, + "step": 12245, + "training_step_time": 0.11182188987731934 + }, + { + "epoch": 1.86859130859375e-05, + "model_forward_time": 0.025276899337768555, + "step": 12246 + }, + { + "epoch": 1.86859130859375e-05, + "step": 12246, + "training_step_time": 0.12452292442321777 + }, + { + "epoch": 1.868743896484375e-05, + "model_forward_time": 0.02532339096069336, + "step": 12247 + }, + { + "epoch": 1.868743896484375e-05, + "step": 12247, + "training_step_time": 0.13311219215393066 + }, + { + "epoch": 1.868896484375e-05, + "model_forward_time": 0.025262832641601562, + "step": 12248 + }, + { + "epoch": 1.868896484375e-05, + "step": 12248, + "training_step_time": 0.11025476455688477 + }, + { + "epoch": 1.869049072265625e-05, + "model_forward_time": 0.025403499603271484, + "step": 12249 + }, + { + "epoch": 1.869049072265625e-05, + "step": 12249, + "training_step_time": 0.11726641654968262 + }, + { + "epoch": 1.86920166015625e-05, + "grad_norm": 0.3409532904624939, + "learning_rate": 6.881546859739179e-05, + "loss": 0.0286, + "step": 12250 + }, + { + "epoch": 1.86920166015625e-05, + "model_forward_time": 0.02521657943725586, + "step": 12250 + }, + { + "epoch": 1.86920166015625e-05, + "step": 12250, + "training_step_time": 0.10749268531799316 + }, + { + "epoch": 1.869354248046875e-05, + "model_forward_time": 0.02523040771484375, + "step": 12251 + }, + { + "epoch": 1.869354248046875e-05, + "step": 12251, + "training_step_time": 0.10775041580200195 + }, + { + "epoch": 1.8695068359375e-05, + "model_forward_time": 0.025638341903686523, + "step": 12252 + }, + { + "epoch": 1.8695068359375e-05, + "step": 12252, + "training_step_time": 0.1044301986694336 + }, + { + "epoch": 1.869659423828125e-05, + "model_forward_time": 0.02445220947265625, + "step": 12253 + }, + { + "epoch": 1.869659423828125e-05, + "step": 12253, + "training_step_time": 0.1233832836151123 + }, + { + "epoch": 1.86981201171875e-05, + "model_forward_time": 0.02466130256652832, + "step": 12254 + }, + { + "epoch": 1.86981201171875e-05, + "step": 12254, + "training_step_time": 0.12880825996398926 + }, + { + "epoch": 1.869964599609375e-05, + "model_forward_time": 0.025395631790161133, + "step": 12255 + }, + { + "epoch": 1.869964599609375e-05, + "step": 12255, + "training_step_time": 0.10640430450439453 + }, + { + "epoch": 1.8701171875e-05, + "model_forward_time": 0.025504350662231445, + "step": 12256 + }, + { + "epoch": 1.8701171875e-05, + "step": 12256, + "training_step_time": 0.12272858619689941 + }, + { + "epoch": 1.870269775390625e-05, + "model_forward_time": 0.02539205551147461, + "step": 12257 + }, + { + "epoch": 1.870269775390625e-05, + "step": 12257, + "training_step_time": 0.1430068016052246 + }, + { + "epoch": 1.87042236328125e-05, + "model_forward_time": 0.025111913681030273, + "step": 12258 + }, + { + "epoch": 1.87042236328125e-05, + "step": 12258, + "training_step_time": 0.2016453742980957 + }, + { + "epoch": 1.870574951171875e-05, + "model_forward_time": 0.02498006820678711, + "step": 12259 + }, + { + "epoch": 1.870574951171875e-05, + "step": 12259, + "training_step_time": 0.1595308780670166 + }, + { + "epoch": 1.8707275390625e-05, + "grad_norm": 0.43211686611175537, + "learning_rate": 6.876439284390223e-05, + "loss": 0.0281, + "step": 12260 + }, + { + "epoch": 1.8707275390625e-05, + "model_forward_time": 0.024408340454101562, + "step": 12260 + }, + { + "epoch": 1.8707275390625e-05, + "step": 12260, + "training_step_time": 0.11379694938659668 + }, + { + "epoch": 1.870880126953125e-05, + "model_forward_time": 0.024810314178466797, + "step": 12261 + }, + { + "epoch": 1.870880126953125e-05, + "step": 12261, + "training_step_time": 0.1100618839263916 + }, + { + "epoch": 1.87103271484375e-05, + "model_forward_time": 0.02516651153564453, + "step": 12262 + }, + { + "epoch": 1.87103271484375e-05, + "step": 12262, + "training_step_time": 0.19472813606262207 + }, + { + "epoch": 1.871185302734375e-05, + "model_forward_time": 0.02423572540283203, + "step": 12263 + }, + { + "epoch": 1.871185302734375e-05, + "step": 12263, + "training_step_time": 0.10750007629394531 + }, + { + "epoch": 1.871337890625e-05, + "model_forward_time": 0.023868560791015625, + "step": 12264 + }, + { + "epoch": 1.871337890625e-05, + "step": 12264, + "training_step_time": 0.1659398078918457 + }, + { + "epoch": 1.871490478515625e-05, + "model_forward_time": 0.024478435516357422, + "step": 12265 + }, + { + "epoch": 1.871490478515625e-05, + "step": 12265, + "training_step_time": 0.12684178352355957 + }, + { + "epoch": 1.87164306640625e-05, + "model_forward_time": 0.02469158172607422, + "step": 12266 + }, + { + "epoch": 1.87164306640625e-05, + "step": 12266, + "training_step_time": 0.16400361061096191 + }, + { + "epoch": 1.871795654296875e-05, + "model_forward_time": 0.02426433563232422, + "step": 12267 + }, + { + "epoch": 1.871795654296875e-05, + "step": 12267, + "training_step_time": 0.11981987953186035 + }, + { + "epoch": 1.8719482421875e-05, + "model_forward_time": 0.027681350708007812, + "step": 12268 + }, + { + "epoch": 1.8719482421875e-05, + "step": 12268, + "training_step_time": 0.10977530479431152 + }, + { + "epoch": 1.872100830078125e-05, + "model_forward_time": 0.025437355041503906, + "step": 12269 + }, + { + "epoch": 1.872100830078125e-05, + "step": 12269, + "training_step_time": 0.13452506065368652 + }, + { + "epoch": 1.87225341796875e-05, + "grad_norm": 0.24758252501487732, + "learning_rate": 6.871329428990602e-05, + "loss": 0.0216, + "step": 12270 + }, + { + "epoch": 1.87225341796875e-05, + "model_forward_time": 0.02554917335510254, + "step": 12270 + }, + { + "epoch": 1.87225341796875e-05, + "step": 12270, + "training_step_time": 0.10578608512878418 + }, + { + "epoch": 1.872406005859375e-05, + "model_forward_time": 0.02552628517150879, + "step": 12271 + }, + { + "epoch": 1.872406005859375e-05, + "step": 12271, + "training_step_time": 0.10780692100524902 + }, + { + "epoch": 1.87255859375e-05, + "model_forward_time": 0.02537679672241211, + "step": 12272 + }, + { + "epoch": 1.87255859375e-05, + "step": 12272, + "training_step_time": 0.11155557632446289 + }, + { + "epoch": 1.872711181640625e-05, + "model_forward_time": 0.025572776794433594, + "step": 12273 + }, + { + "epoch": 1.872711181640625e-05, + "step": 12273, + "training_step_time": 0.1747570037841797 + }, + { + "epoch": 1.87286376953125e-05, + "model_forward_time": 0.024780988693237305, + "step": 12274 + }, + { + "epoch": 1.87286376953125e-05, + "step": 12274, + "training_step_time": 0.20663881301879883 + }, + { + "epoch": 1.873016357421875e-05, + "model_forward_time": 0.02562093734741211, + "step": 12275 + }, + { + "epoch": 1.873016357421875e-05, + "step": 12275, + "training_step_time": 0.1924285888671875 + }, + { + "epoch": 1.8731689453125e-05, + "model_forward_time": 0.02427196502685547, + "step": 12276 + }, + { + "epoch": 1.8731689453125e-05, + "step": 12276, + "training_step_time": 0.1919260025024414 + }, + { + "epoch": 1.873321533203125e-05, + "model_forward_time": 0.024675846099853516, + "step": 12277 + }, + { + "epoch": 1.873321533203125e-05, + "step": 12277, + "training_step_time": 0.17362666130065918 + }, + { + "epoch": 1.87347412109375e-05, + "model_forward_time": 0.023099660873413086, + "step": 12278 + }, + { + "epoch": 1.87347412109375e-05, + "step": 12278, + "training_step_time": 0.1619856357574463 + }, + { + "epoch": 1.873626708984375e-05, + "model_forward_time": 0.0247800350189209, + "step": 12279 + }, + { + "epoch": 1.873626708984375e-05, + "step": 12279, + "training_step_time": 0.1556997299194336 + }, + { + "epoch": 1.873779296875e-05, + "grad_norm": 0.20096886157989502, + "learning_rate": 6.86621729974927e-05, + "loss": 0.0179, + "step": 12280 + }, + { + "epoch": 1.873779296875e-05, + "model_forward_time": 0.024496793746948242, + "step": 12280 + }, + { + "epoch": 1.873779296875e-05, + "step": 12280, + "training_step_time": 0.1393299102783203 + }, + { + "epoch": 1.873931884765625e-05, + "model_forward_time": 0.024519681930541992, + "step": 12281 + }, + { + "epoch": 1.873931884765625e-05, + "step": 12281, + "training_step_time": 0.12804031372070312 + }, + { + "epoch": 1.87408447265625e-05, + "model_forward_time": 0.024983644485473633, + "step": 12282 + }, + { + "epoch": 1.87408447265625e-05, + "step": 12282, + "training_step_time": 0.12546658515930176 + }, + { + "epoch": 1.874237060546875e-05, + "model_forward_time": 0.024938106536865234, + "step": 12283 + }, + { + "epoch": 1.874237060546875e-05, + "step": 12283, + "training_step_time": 0.12483000755310059 + }, + { + "epoch": 1.8743896484375e-05, + "model_forward_time": 0.024898290634155273, + "step": 12284 + }, + { + "epoch": 1.8743896484375e-05, + "step": 12284, + "training_step_time": 0.12295699119567871 + }, + { + "epoch": 1.874542236328125e-05, + "model_forward_time": 0.025463581085205078, + "step": 12285 + }, + { + "epoch": 1.874542236328125e-05, + "step": 12285, + "training_step_time": 0.11552762985229492 + }, + { + "epoch": 1.87469482421875e-05, + "model_forward_time": 0.025156497955322266, + "step": 12286 + }, + { + "epoch": 1.87469482421875e-05, + "step": 12286, + "training_step_time": 0.15613532066345215 + }, + { + "epoch": 1.874847412109375e-05, + "model_forward_time": 0.02501535415649414, + "step": 12287 + }, + { + "epoch": 1.874847412109375e-05, + "step": 12287, + "training_step_time": 0.16799545288085938 + }, + { + "epoch": 1.875e-05, + "model_forward_time": 0.02463984489440918, + "step": 12288 + }, + { + "epoch": 1.875e-05, + "step": 12288, + "training_step_time": 0.1384732723236084 + }, + { + "epoch": 1.875152587890625e-05, + "model_forward_time": 0.02461719512939453, + "step": 12289 + }, + { + "epoch": 1.875152587890625e-05, + "step": 12289, + "training_step_time": 0.1095285415649414 + }, + { + "epoch": 1.87530517578125e-05, + "grad_norm": 0.3073076903820038, + "learning_rate": 6.861102902877946e-05, + "loss": 0.0256, + "step": 12290 + }, + { + "epoch": 1.87530517578125e-05, + "model_forward_time": 0.024844884872436523, + "step": 12290 + }, + { + "epoch": 1.87530517578125e-05, + "step": 12290, + "training_step_time": 0.1140146255493164 + }, + { + "epoch": 1.875457763671875e-05, + "model_forward_time": 0.02497720718383789, + "step": 12291 + }, + { + "epoch": 1.875457763671875e-05, + "step": 12291, + "training_step_time": 0.13397526741027832 + }, + { + "epoch": 1.8756103515625e-05, + "model_forward_time": 0.024916648864746094, + "step": 12292 + }, + { + "epoch": 1.8756103515625e-05, + "step": 12292, + "training_step_time": 0.10604643821716309 + }, + { + "epoch": 1.875762939453125e-05, + "model_forward_time": 0.02440166473388672, + "step": 12293 + }, + { + "epoch": 1.875762939453125e-05, + "step": 12293, + "training_step_time": 0.10359025001525879 + }, + { + "epoch": 1.87591552734375e-05, + "model_forward_time": 0.026600360870361328, + "step": 12294 + }, + { + "epoch": 1.87591552734375e-05, + "step": 12294, + "training_step_time": 0.1537926197052002 + }, + { + "epoch": 1.876068115234375e-05, + "model_forward_time": 0.0252988338470459, + "step": 12295 + }, + { + "epoch": 1.876068115234375e-05, + "step": 12295, + "training_step_time": 0.11036539077758789 + }, + { + "epoch": 1.876220703125e-05, + "model_forward_time": 0.025516986846923828, + "step": 12296 + }, + { + "epoch": 1.876220703125e-05, + "step": 12296, + "training_step_time": 0.19042229652404785 + }, + { + "epoch": 1.876373291015625e-05, + "model_forward_time": 0.02401447296142578, + "step": 12297 + }, + { + "epoch": 1.876373291015625e-05, + "step": 12297, + "training_step_time": 0.14038825035095215 + }, + { + "epoch": 1.87652587890625e-05, + "model_forward_time": 0.024471044540405273, + "step": 12298 + }, + { + "epoch": 1.87652587890625e-05, + "step": 12298, + "training_step_time": 0.19980883598327637 + }, + { + "epoch": 1.876678466796875e-05, + "model_forward_time": 0.02475738525390625, + "step": 12299 + }, + { + "epoch": 1.876678466796875e-05, + "step": 12299, + "training_step_time": 0.13838839530944824 + }, + { + "epoch": 1.8768310546875e-05, + "grad_norm": 0.3325486481189728, + "learning_rate": 6.855986244591104e-05, + "loss": 0.014, + "step": 12300 + }, + { + "epoch": 1.8768310546875e-05, + "model_forward_time": 0.024495363235473633, + "step": 12300 + }, + { + "epoch": 1.8768310546875e-05, + "step": 12300, + "training_step_time": 0.1112680435180664 + }, + { + "epoch": 1.876983642578125e-05, + "model_forward_time": 0.025615215301513672, + "step": 12301 + }, + { + "epoch": 1.876983642578125e-05, + "step": 12301, + "training_step_time": 0.10787606239318848 + }, + { + "epoch": 1.87713623046875e-05, + "model_forward_time": 0.025152921676635742, + "step": 12302 + }, + { + "epoch": 1.87713623046875e-05, + "step": 12302, + "training_step_time": 0.19803667068481445 + }, + { + "epoch": 1.877288818359375e-05, + "model_forward_time": 0.024358510971069336, + "step": 12303 + }, + { + "epoch": 1.877288818359375e-05, + "step": 12303, + "training_step_time": 0.10419797897338867 + }, + { + "epoch": 1.87744140625e-05, + "model_forward_time": 0.02458047866821289, + "step": 12304 + }, + { + "epoch": 1.87744140625e-05, + "step": 12304, + "training_step_time": 0.1511688232421875 + }, + { + "epoch": 1.877593994140625e-05, + "model_forward_time": 0.02506422996520996, + "step": 12305 + }, + { + "epoch": 1.877593994140625e-05, + "step": 12305, + "training_step_time": 0.11256766319274902 + }, + { + "epoch": 1.87774658203125e-05, + "model_forward_time": 0.024837017059326172, + "step": 12306 + }, + { + "epoch": 1.87774658203125e-05, + "step": 12306, + "training_step_time": 0.21063733100891113 + }, + { + "epoch": 1.877899169921875e-05, + "model_forward_time": 0.029025793075561523, + "step": 12307 + }, + { + "epoch": 1.877899169921875e-05, + "step": 12307, + "training_step_time": 0.10807490348815918 + }, + { + "epoch": 1.8780517578125e-05, + "model_forward_time": 0.02538013458251953, + "step": 12308 + }, + { + "epoch": 1.8780517578125e-05, + "step": 12308, + "training_step_time": 0.10535120964050293 + }, + { + "epoch": 1.878204345703125e-05, + "model_forward_time": 0.025409698486328125, + "step": 12309 + }, + { + "epoch": 1.878204345703125e-05, + "step": 12309, + "training_step_time": 0.12454891204833984 + }, + { + "epoch": 1.87835693359375e-05, + "grad_norm": 0.23103386163711548, + "learning_rate": 6.850867331105967e-05, + "loss": 0.0171, + "step": 12310 + }, + { + "epoch": 1.87835693359375e-05, + "model_forward_time": 0.02554154396057129, + "step": 12310 + }, + { + "epoch": 1.87835693359375e-05, + "step": 12310, + "training_step_time": 0.10951566696166992 + }, + { + "epoch": 1.878509521484375e-05, + "model_forward_time": 0.025292634963989258, + "step": 12311 + }, + { + "epoch": 1.878509521484375e-05, + "step": 12311, + "training_step_time": 0.11287808418273926 + }, + { + "epoch": 1.878662109375e-05, + "model_forward_time": 0.02538466453552246, + "step": 12312 + }, + { + "epoch": 1.878662109375e-05, + "step": 12312, + "training_step_time": 0.10770463943481445 + }, + { + "epoch": 1.878814697265625e-05, + "model_forward_time": 0.02527475357055664, + "step": 12313 + }, + { + "epoch": 1.878814697265625e-05, + "step": 12313, + "training_step_time": 0.10830116271972656 + }, + { + "epoch": 1.87896728515625e-05, + "model_forward_time": 0.025318145751953125, + "step": 12314 + }, + { + "epoch": 1.87896728515625e-05, + "step": 12314, + "training_step_time": 0.10912108421325684 + }, + { + "epoch": 1.879119873046875e-05, + "model_forward_time": 0.024888277053833008, + "step": 12315 + }, + { + "epoch": 1.879119873046875e-05, + "step": 12315, + "training_step_time": 0.1120748519897461 + }, + { + "epoch": 1.8792724609375e-05, + "model_forward_time": 0.025374174118041992, + "step": 12316 + }, + { + "epoch": 1.8792724609375e-05, + "step": 12316, + "training_step_time": 0.11240935325622559 + }, + { + "epoch": 1.879425048828125e-05, + "model_forward_time": 0.025285720825195312, + "step": 12317 + }, + { + "epoch": 1.879425048828125e-05, + "step": 12317, + "training_step_time": 0.10642027854919434 + }, + { + "epoch": 1.87957763671875e-05, + "model_forward_time": 0.025480985641479492, + "step": 12318 + }, + { + "epoch": 1.87957763671875e-05, + "step": 12318, + "training_step_time": 0.10683417320251465 + }, + { + "epoch": 1.879730224609375e-05, + "model_forward_time": 0.02534770965576172, + "step": 12319 + }, + { + "epoch": 1.879730224609375e-05, + "step": 12319, + "training_step_time": 0.10824346542358398 + }, + { + "epoch": 1.8798828125e-05, + "grad_norm": 0.3637978136539459, + "learning_rate": 6.845746168642497e-05, + "loss": 0.0225, + "step": 12320 + }, + { + "epoch": 1.8798828125e-05, + "model_forward_time": 0.02551412582397461, + "step": 12320 + }, + { + "epoch": 1.8798828125e-05, + "step": 12320, + "training_step_time": 0.1079092025756836 + }, + { + "epoch": 1.880035400390625e-05, + "model_forward_time": 0.025127172470092773, + "step": 12321 + }, + { + "epoch": 1.880035400390625e-05, + "step": 12321, + "training_step_time": 0.10747361183166504 + }, + { + "epoch": 1.88018798828125e-05, + "model_forward_time": 0.025167465209960938, + "step": 12322 + }, + { + "epoch": 1.88018798828125e-05, + "step": 12322, + "training_step_time": 0.10764551162719727 + }, + { + "epoch": 1.880340576171875e-05, + "model_forward_time": 0.025203943252563477, + "step": 12323 + }, + { + "epoch": 1.880340576171875e-05, + "step": 12323, + "training_step_time": 0.11088705062866211 + }, + { + "epoch": 1.8804931640625e-05, + "model_forward_time": 0.025739431381225586, + "step": 12324 + }, + { + "epoch": 1.8804931640625e-05, + "step": 12324, + "training_step_time": 0.10899090766906738 + }, + { + "epoch": 1.880645751953125e-05, + "model_forward_time": 0.025204896926879883, + "step": 12325 + }, + { + "epoch": 1.880645751953125e-05, + "step": 12325, + "training_step_time": 0.1083824634552002 + }, + { + "epoch": 1.88079833984375e-05, + "model_forward_time": 0.02506232261657715, + "step": 12326 + }, + { + "epoch": 1.88079833984375e-05, + "step": 12326, + "training_step_time": 0.10599541664123535 + }, + { + "epoch": 1.880950927734375e-05, + "model_forward_time": 0.025223731994628906, + "step": 12327 + }, + { + "epoch": 1.880950927734375e-05, + "step": 12327, + "training_step_time": 0.10683751106262207 + }, + { + "epoch": 1.881103515625e-05, + "model_forward_time": 0.026131391525268555, + "step": 12328 + }, + { + "epoch": 1.881103515625e-05, + "step": 12328, + "training_step_time": 0.10608911514282227 + }, + { + "epoch": 1.881256103515625e-05, + "model_forward_time": 0.02544426918029785, + "step": 12329 + }, + { + "epoch": 1.881256103515625e-05, + "step": 12329, + "training_step_time": 0.10672307014465332 + }, + { + "epoch": 1.88140869140625e-05, + "grad_norm": 0.1840587705373764, + "learning_rate": 6.840622763423391e-05, + "loss": 0.0171, + "step": 12330 + }, + { + "epoch": 1.88140869140625e-05, + "model_forward_time": 0.025277137756347656, + "step": 12330 + }, + { + "epoch": 1.88140869140625e-05, + "step": 12330, + "training_step_time": 0.18488264083862305 + }, + { + "epoch": 1.881561279296875e-05, + "model_forward_time": 0.02463221549987793, + "step": 12331 + }, + { + "epoch": 1.881561279296875e-05, + "step": 12331, + "training_step_time": 0.10830545425415039 + }, + { + "epoch": 1.8817138671875e-05, + "model_forward_time": 0.025836467742919922, + "step": 12332 + }, + { + "epoch": 1.8817138671875e-05, + "step": 12332, + "training_step_time": 0.1074824333190918 + }, + { + "epoch": 1.881866455078125e-05, + "model_forward_time": 0.025452852249145508, + "step": 12333 + }, + { + "epoch": 1.881866455078125e-05, + "step": 12333, + "training_step_time": 0.12705016136169434 + }, + { + "epoch": 1.88201904296875e-05, + "model_forward_time": 0.02521491050720215, + "step": 12334 + }, + { + "epoch": 1.88201904296875e-05, + "step": 12334, + "training_step_time": 0.12420487403869629 + }, + { + "epoch": 1.882171630859375e-05, + "model_forward_time": 0.02533435821533203, + "step": 12335 + }, + { + "epoch": 1.882171630859375e-05, + "step": 12335, + "training_step_time": 0.11873888969421387 + }, + { + "epoch": 1.88232421875e-05, + "model_forward_time": 0.02522873878479004, + "step": 12336 + }, + { + "epoch": 1.88232421875e-05, + "step": 12336, + "training_step_time": 0.11315345764160156 + }, + { + "epoch": 1.882476806640625e-05, + "model_forward_time": 0.02526116371154785, + "step": 12337 + }, + { + "epoch": 1.882476806640625e-05, + "step": 12337, + "training_step_time": 0.11102008819580078 + }, + { + "epoch": 1.88262939453125e-05, + "model_forward_time": 0.025408029556274414, + "step": 12338 + }, + { + "epoch": 1.88262939453125e-05, + "step": 12338, + "training_step_time": 0.10679817199707031 + }, + { + "epoch": 1.882781982421875e-05, + "model_forward_time": 0.0257565975189209, + "step": 12339 + }, + { + "epoch": 1.882781982421875e-05, + "step": 12339, + "training_step_time": 0.10509538650512695 + }, + { + "epoch": 1.8829345703125e-05, + "grad_norm": 0.34008651971817017, + "learning_rate": 6.835497121674066e-05, + "loss": 0.0246, + "step": 12340 + }, + { + "epoch": 1.8829345703125e-05, + "model_forward_time": 0.024199962615966797, + "step": 12340 + }, + { + "epoch": 1.8829345703125e-05, + "step": 12340, + "training_step_time": 0.1422128677368164 + }, + { + "epoch": 1.883087158203125e-05, + "model_forward_time": 0.023710250854492188, + "step": 12341 + }, + { + "epoch": 1.883087158203125e-05, + "step": 12341, + "training_step_time": 0.16194796562194824 + }, + { + "epoch": 1.88323974609375e-05, + "model_forward_time": 0.02458047866821289, + "step": 12342 + }, + { + "epoch": 1.88323974609375e-05, + "step": 12342, + "training_step_time": 0.11478424072265625 + }, + { + "epoch": 1.883392333984375e-05, + "model_forward_time": 0.024599313735961914, + "step": 12343 + }, + { + "epoch": 1.883392333984375e-05, + "step": 12343, + "training_step_time": 0.1451280117034912 + }, + { + "epoch": 1.883544921875e-05, + "model_forward_time": 0.024982690811157227, + "step": 12344 + }, + { + "epoch": 1.883544921875e-05, + "step": 12344, + "training_step_time": 0.21150898933410645 + }, + { + "epoch": 1.883697509765625e-05, + "model_forward_time": 0.025720596313476562, + "step": 12345 + }, + { + "epoch": 1.883697509765625e-05, + "step": 12345, + "training_step_time": 0.13385963439941406 + }, + { + "epoch": 1.88385009765625e-05, + "model_forward_time": 0.02490544319152832, + "step": 12346 + }, + { + "epoch": 1.88385009765625e-05, + "step": 12346, + "training_step_time": 0.1907804012298584 + }, + { + "epoch": 1.884002685546875e-05, + "model_forward_time": 0.023786544799804688, + "step": 12347 + }, + { + "epoch": 1.884002685546875e-05, + "step": 12347, + "training_step_time": 0.12320876121520996 + }, + { + "epoch": 1.8841552734375e-05, + "model_forward_time": 0.02321457862854004, + "step": 12348 + }, + { + "epoch": 1.8841552734375e-05, + "step": 12348, + "training_step_time": 0.11755156517028809 + }, + { + "epoch": 1.884307861328125e-05, + "model_forward_time": 0.02467942237854004, + "step": 12349 + }, + { + "epoch": 1.884307861328125e-05, + "step": 12349, + "training_step_time": 0.20057892799377441 + }, + { + "epoch": 1.88446044921875e-05, + "grad_norm": 0.37574952840805054, + "learning_rate": 6.830369249622662e-05, + "loss": 0.0208, + "step": 12350 + }, + { + "epoch": 1.88446044921875e-05, + "model_forward_time": 0.02397608757019043, + "step": 12350 + }, + { + "epoch": 1.88446044921875e-05, + "step": 12350, + "training_step_time": 0.12459111213684082 + }, + { + "epoch": 1.884613037109375e-05, + "model_forward_time": 0.024800777435302734, + "step": 12351 + }, + { + "epoch": 1.884613037109375e-05, + "step": 12351, + "training_step_time": 0.13967084884643555 + }, + { + "epoch": 1.884765625e-05, + "model_forward_time": 0.026838064193725586, + "step": 12352 + }, + { + "epoch": 1.884765625e-05, + "step": 12352, + "training_step_time": 0.10840272903442383 + }, + { + "epoch": 1.884918212890625e-05, + "model_forward_time": 0.02546525001525879, + "step": 12353 + }, + { + "epoch": 1.884918212890625e-05, + "step": 12353, + "training_step_time": 0.15772199630737305 + }, + { + "epoch": 1.88507080078125e-05, + "model_forward_time": 0.025690317153930664, + "step": 12354 + }, + { + "epoch": 1.88507080078125e-05, + "step": 12354, + "training_step_time": 0.1327216625213623 + }, + { + "epoch": 1.885223388671875e-05, + "model_forward_time": 0.024763107299804688, + "step": 12355 + }, + { + "epoch": 1.885223388671875e-05, + "step": 12355, + "training_step_time": 0.11109471321105957 + }, + { + "epoch": 1.8853759765625e-05, + "model_forward_time": 0.025065183639526367, + "step": 12356 + }, + { + "epoch": 1.8853759765625e-05, + "step": 12356, + "training_step_time": 0.10661697387695312 + }, + { + "epoch": 1.885528564453125e-05, + "model_forward_time": 0.025434494018554688, + "step": 12357 + }, + { + "epoch": 1.885528564453125e-05, + "step": 12357, + "training_step_time": 0.10590958595275879 + }, + { + "epoch": 1.88568115234375e-05, + "model_forward_time": 0.025129079818725586, + "step": 12358 + }, + { + "epoch": 1.88568115234375e-05, + "step": 12358, + "training_step_time": 0.10525918006896973 + }, + { + "epoch": 1.885833740234375e-05, + "model_forward_time": 0.025310039520263672, + "step": 12359 + }, + { + "epoch": 1.885833740234375e-05, + "step": 12359, + "training_step_time": 0.10958719253540039 + }, + { + "epoch": 1.885986328125e-05, + "grad_norm": 0.19731856882572174, + "learning_rate": 6.825239153500029e-05, + "loss": 0.0218, + "step": 12360 + }, + { + "epoch": 1.885986328125e-05, + "model_forward_time": 0.025300264358520508, + "step": 12360 + }, + { + "epoch": 1.885986328125e-05, + "step": 12360, + "training_step_time": 0.10790109634399414 + }, + { + "epoch": 1.886138916015625e-05, + "model_forward_time": 0.025275468826293945, + "step": 12361 + }, + { + "epoch": 1.886138916015625e-05, + "step": 12361, + "training_step_time": 0.10804510116577148 + }, + { + "epoch": 1.88629150390625e-05, + "model_forward_time": 0.02518486976623535, + "step": 12362 + }, + { + "epoch": 1.88629150390625e-05, + "step": 12362, + "training_step_time": 0.10567140579223633 + }, + { + "epoch": 1.886444091796875e-05, + "model_forward_time": 0.025133132934570312, + "step": 12363 + }, + { + "epoch": 1.886444091796875e-05, + "step": 12363, + "training_step_time": 0.10894584655761719 + }, + { + "epoch": 1.8865966796875e-05, + "model_forward_time": 0.02524566650390625, + "step": 12364 + }, + { + "epoch": 1.8865966796875e-05, + "step": 12364, + "training_step_time": 0.10762310028076172 + }, + { + "epoch": 1.886749267578125e-05, + "model_forward_time": 0.02541327476501465, + "step": 12365 + }, + { + "epoch": 1.886749267578125e-05, + "step": 12365, + "training_step_time": 0.1047513484954834 + }, + { + "epoch": 1.88690185546875e-05, + "model_forward_time": 0.025325536727905273, + "step": 12366 + }, + { + "epoch": 1.88690185546875e-05, + "step": 12366, + "training_step_time": 0.10537528991699219 + }, + { + "epoch": 1.887054443359375e-05, + "model_forward_time": 0.025299787521362305, + "step": 12367 + }, + { + "epoch": 1.887054443359375e-05, + "step": 12367, + "training_step_time": 0.10898709297180176 + }, + { + "epoch": 1.88720703125e-05, + "model_forward_time": 0.02564525604248047, + "step": 12368 + }, + { + "epoch": 1.88720703125e-05, + "step": 12368, + "training_step_time": 0.10590147972106934 + }, + { + "epoch": 1.887359619140625e-05, + "model_forward_time": 0.02532482147216797, + "step": 12369 + }, + { + "epoch": 1.887359619140625e-05, + "step": 12369, + "training_step_time": 0.10904717445373535 + }, + { + "epoch": 1.88751220703125e-05, + "grad_norm": 0.29167595505714417, + "learning_rate": 6.820106839539715e-05, + "loss": 0.0181, + "step": 12370 + }, + { + "epoch": 1.88751220703125e-05, + "model_forward_time": 0.025223731994628906, + "step": 12370 + }, + { + "epoch": 1.88751220703125e-05, + "step": 12370, + "training_step_time": 0.10453557968139648 + }, + { + "epoch": 1.887664794921875e-05, + "model_forward_time": 0.02516651153564453, + "step": 12371 + }, + { + "epoch": 1.887664794921875e-05, + "step": 12371, + "training_step_time": 0.10536074638366699 + }, + { + "epoch": 1.8878173828125e-05, + "model_forward_time": 0.02500629425048828, + "step": 12372 + }, + { + "epoch": 1.8878173828125e-05, + "step": 12372, + "training_step_time": 0.10740852355957031 + }, + { + "epoch": 1.887969970703125e-05, + "model_forward_time": 0.0249783992767334, + "step": 12373 + }, + { + "epoch": 1.887969970703125e-05, + "step": 12373, + "training_step_time": 0.10741233825683594 + }, + { + "epoch": 1.88812255859375e-05, + "model_forward_time": 0.025302648544311523, + "step": 12374 + }, + { + "epoch": 1.88812255859375e-05, + "step": 12374, + "training_step_time": 0.10703158378601074 + }, + { + "epoch": 1.888275146484375e-05, + "model_forward_time": 0.02522134780883789, + "step": 12375 + }, + { + "epoch": 1.888275146484375e-05, + "step": 12375, + "training_step_time": 0.10625791549682617 + }, + { + "epoch": 1.888427734375e-05, + "model_forward_time": 0.02533245086669922, + "step": 12376 + }, + { + "epoch": 1.888427734375e-05, + "step": 12376, + "training_step_time": 0.1524808406829834 + }, + { + "epoch": 1.888580322265625e-05, + "model_forward_time": 0.024506807327270508, + "step": 12377 + }, + { + "epoch": 1.888580322265625e-05, + "step": 12377, + "training_step_time": 0.11447715759277344 + }, + { + "epoch": 1.88873291015625e-05, + "model_forward_time": 0.02473306655883789, + "step": 12378 + }, + { + "epoch": 1.88873291015625e-05, + "step": 12378, + "training_step_time": 0.14330244064331055 + }, + { + "epoch": 1.888885498046875e-05, + "model_forward_time": 0.024526357650756836, + "step": 12379 + }, + { + "epoch": 1.888885498046875e-05, + "step": 12379, + "training_step_time": 0.12606072425842285 + }, + { + "epoch": 1.8890380859375e-05, + "grad_norm": 0.4402039647102356, + "learning_rate": 6.814972313977967e-05, + "loss": 0.018, + "step": 12380 + }, + { + "epoch": 1.8890380859375e-05, + "model_forward_time": 0.0242919921875, + "step": 12380 + }, + { + "epoch": 1.8890380859375e-05, + "step": 12380, + "training_step_time": 0.12673258781433105 + }, + { + "epoch": 1.889190673828125e-05, + "model_forward_time": 0.024732589721679688, + "step": 12381 + }, + { + "epoch": 1.889190673828125e-05, + "step": 12381, + "training_step_time": 0.10816407203674316 + }, + { + "epoch": 1.88934326171875e-05, + "model_forward_time": 0.025115251541137695, + "step": 12382 + }, + { + "epoch": 1.88934326171875e-05, + "step": 12382, + "training_step_time": 0.12321949005126953 + }, + { + "epoch": 1.889495849609375e-05, + "model_forward_time": 0.02582526206970215, + "step": 12383 + }, + { + "epoch": 1.889495849609375e-05, + "step": 12383, + "training_step_time": 0.10942721366882324 + }, + { + "epoch": 1.8896484375e-05, + "model_forward_time": 0.026068687438964844, + "step": 12384 + }, + { + "epoch": 1.8896484375e-05, + "step": 12384, + "training_step_time": 0.10623717308044434 + }, + { + "epoch": 1.889801025390625e-05, + "model_forward_time": 0.02541971206665039, + "step": 12385 + }, + { + "epoch": 1.889801025390625e-05, + "step": 12385, + "training_step_time": 0.10899567604064941 + }, + { + "epoch": 1.88995361328125e-05, + "model_forward_time": 0.02564263343811035, + "step": 12386 + }, + { + "epoch": 1.88995361328125e-05, + "step": 12386, + "training_step_time": 0.11678028106689453 + }, + { + "epoch": 1.890106201171875e-05, + "model_forward_time": 0.02530193328857422, + "step": 12387 + }, + { + "epoch": 1.890106201171875e-05, + "step": 12387, + "training_step_time": 0.16473865509033203 + }, + { + "epoch": 1.8902587890625e-05, + "model_forward_time": 0.0247039794921875, + "step": 12388 + }, + { + "epoch": 1.8902587890625e-05, + "step": 12388, + "training_step_time": 0.19936466217041016 + }, + { + "epoch": 1.890411376953125e-05, + "model_forward_time": 0.02464914321899414, + "step": 12389 + }, + { + "epoch": 1.890411376953125e-05, + "step": 12389, + "training_step_time": 0.1688237190246582 + }, + { + "epoch": 1.89056396484375e-05, + "grad_norm": 0.3412000834941864, + "learning_rate": 6.809835583053715e-05, + "loss": 0.0229, + "step": 12390 + }, + { + "epoch": 1.89056396484375e-05, + "model_forward_time": 0.024265527725219727, + "step": 12390 + }, + { + "epoch": 1.89056396484375e-05, + "step": 12390, + "training_step_time": 0.19477415084838867 + }, + { + "epoch": 1.890716552734375e-05, + "model_forward_time": 0.024587392807006836, + "step": 12391 + }, + { + "epoch": 1.890716552734375e-05, + "step": 12391, + "training_step_time": 0.14510011672973633 + }, + { + "epoch": 1.890869140625e-05, + "model_forward_time": 0.02468085289001465, + "step": 12392 + }, + { + "epoch": 1.890869140625e-05, + "step": 12392, + "training_step_time": 0.13998651504516602 + }, + { + "epoch": 1.891021728515625e-05, + "model_forward_time": 0.0245053768157959, + "step": 12393 + }, + { + "epoch": 1.891021728515625e-05, + "step": 12393, + "training_step_time": 0.16028904914855957 + }, + { + "epoch": 1.89117431640625e-05, + "model_forward_time": 0.024384737014770508, + "step": 12394 + }, + { + "epoch": 1.89117431640625e-05, + "step": 12394, + "training_step_time": 0.12204265594482422 + }, + { + "epoch": 1.891326904296875e-05, + "model_forward_time": 0.024790287017822266, + "step": 12395 + }, + { + "epoch": 1.891326904296875e-05, + "step": 12395, + "training_step_time": 0.15529417991638184 + }, + { + "epoch": 1.8914794921875e-05, + "model_forward_time": 0.024407148361206055, + "step": 12396 + }, + { + "epoch": 1.8914794921875e-05, + "step": 12396, + "training_step_time": 0.10825872421264648 + }, + { + "epoch": 1.891632080078125e-05, + "model_forward_time": 0.02492070198059082, + "step": 12397 + }, + { + "epoch": 1.891632080078125e-05, + "step": 12397, + "training_step_time": 0.12683939933776855 + }, + { + "epoch": 1.89178466796875e-05, + "model_forward_time": 0.025208711624145508, + "step": 12398 + }, + { + "epoch": 1.89178466796875e-05, + "step": 12398, + "training_step_time": 0.10716462135314941 + }, + { + "epoch": 1.891937255859375e-05, + "model_forward_time": 0.025267362594604492, + "step": 12399 + }, + { + "epoch": 1.891937255859375e-05, + "step": 12399, + "training_step_time": 0.14610719680786133 + }, + { + "epoch": 1.89208984375e-05, + "grad_norm": 0.4185691773891449, + "learning_rate": 6.804696653008575e-05, + "loss": 0.026, + "step": 12400 + }, + { + "epoch": 1.89208984375e-05, + "model_forward_time": 0.02498483657836914, + "step": 12400 + }, + { + "epoch": 1.89208984375e-05, + "step": 12400, + "training_step_time": 0.12882542610168457 + }, + { + "epoch": 1.892242431640625e-05, + "model_forward_time": 0.02451777458190918, + "step": 12401 + }, + { + "epoch": 1.892242431640625e-05, + "step": 12401, + "training_step_time": 0.11398959159851074 + }, + { + "epoch": 1.89239501953125e-05, + "model_forward_time": 0.025174617767333984, + "step": 12402 + }, + { + "epoch": 1.89239501953125e-05, + "step": 12402, + "training_step_time": 0.10223889350891113 + }, + { + "epoch": 1.892547607421875e-05, + "model_forward_time": 0.025710105895996094, + "step": 12403 + }, + { + "epoch": 1.892547607421875e-05, + "step": 12403, + "training_step_time": 0.11376714706420898 + }, + { + "epoch": 1.8927001953125e-05, + "model_forward_time": 0.02510380744934082, + "step": 12404 + }, + { + "epoch": 1.8927001953125e-05, + "step": 12404, + "training_step_time": 0.11000561714172363 + }, + { + "epoch": 1.892852783203125e-05, + "model_forward_time": 0.025359392166137695, + "step": 12405 + }, + { + "epoch": 1.892852783203125e-05, + "step": 12405, + "training_step_time": 0.1058349609375 + }, + { + "epoch": 1.89300537109375e-05, + "model_forward_time": 0.025488853454589844, + "step": 12406 + }, + { + "epoch": 1.89300537109375e-05, + "step": 12406, + "training_step_time": 0.10675549507141113 + }, + { + "epoch": 1.893157958984375e-05, + "model_forward_time": 0.025101184844970703, + "step": 12407 + }, + { + "epoch": 1.893157958984375e-05, + "step": 12407, + "training_step_time": 0.10515546798706055 + }, + { + "epoch": 1.893310546875e-05, + "model_forward_time": 0.02524423599243164, + "step": 12408 + }, + { + "epoch": 1.893310546875e-05, + "step": 12408, + "training_step_time": 0.10808348655700684 + }, + { + "epoch": 1.893463134765625e-05, + "model_forward_time": 0.025549650192260742, + "step": 12409 + }, + { + "epoch": 1.893463134765625e-05, + "step": 12409, + "training_step_time": 1.0445003509521484 + }, + { + "epoch": 1.89361572265625e-05, + "grad_norm": 0.48495861887931824, + "learning_rate": 6.799555530086828e-05, + "loss": 0.0246, + "step": 12410 + }, + { + "epoch": 1.89361572265625e-05, + "model_forward_time": 0.02288508415222168, + "step": 12410 + }, + { + "epoch": 1.89361572265625e-05, + "step": 12410, + "training_step_time": 0.09758305549621582 + }, + { + "epoch": 1.893768310546875e-05, + "model_forward_time": 0.024596691131591797, + "step": 12411 + }, + { + "epoch": 1.893768310546875e-05, + "step": 12411, + "training_step_time": 0.10153460502624512 + }, + { + "epoch": 1.8939208984375e-05, + "model_forward_time": 0.025188922882080078, + "step": 12412 + }, + { + "epoch": 1.8939208984375e-05, + "step": 12412, + "training_step_time": 0.104034423828125 + }, + { + "epoch": 1.894073486328125e-05, + "model_forward_time": 0.025454998016357422, + "step": 12413 + }, + { + "epoch": 1.894073486328125e-05, + "step": 12413, + "training_step_time": 0.1116485595703125 + }, + { + "epoch": 1.89422607421875e-05, + "model_forward_time": 0.02714228630065918, + "step": 12414 + }, + { + "epoch": 1.89422607421875e-05, + "step": 12414, + "training_step_time": 0.10633015632629395 + }, + { + "epoch": 1.894378662109375e-05, + "model_forward_time": 0.02537226676940918, + "step": 12415 + }, + { + "epoch": 1.894378662109375e-05, + "step": 12415, + "training_step_time": 0.17282557487487793 + }, + { + "epoch": 1.89453125e-05, + "model_forward_time": 0.025146961212158203, + "step": 12416 + }, + { + "epoch": 1.89453125e-05, + "step": 12416, + "training_step_time": 0.12714505195617676 + }, + { + "epoch": 1.894683837890625e-05, + "model_forward_time": 0.024581432342529297, + "step": 12417 + }, + { + "epoch": 1.894683837890625e-05, + "step": 12417, + "training_step_time": 0.12929415702819824 + }, + { + "epoch": 1.89483642578125e-05, + "model_forward_time": 0.024431943893432617, + "step": 12418 + }, + { + "epoch": 1.89483642578125e-05, + "step": 12418, + "training_step_time": 0.12660908699035645 + }, + { + "epoch": 1.894989013671875e-05, + "model_forward_time": 0.02477717399597168, + "step": 12419 + }, + { + "epoch": 1.894989013671875e-05, + "step": 12419, + "training_step_time": 0.12832999229431152 + }, + { + "epoch": 1.8951416015625e-05, + "grad_norm": 0.317827433347702, + "learning_rate": 6.794412220535426e-05, + "loss": 0.0161, + "step": 12420 + }, + { + "epoch": 1.8951416015625e-05, + "model_forward_time": 0.024704456329345703, + "step": 12420 + }, + { + "epoch": 1.8951416015625e-05, + "step": 12420, + "training_step_time": 0.2005174160003662 + }, + { + "epoch": 1.895294189453125e-05, + "model_forward_time": 0.024624347686767578, + "step": 12421 + }, + { + "epoch": 1.895294189453125e-05, + "step": 12421, + "training_step_time": 0.12208342552185059 + }, + { + "epoch": 1.89544677734375e-05, + "model_forward_time": 0.024376630783081055, + "step": 12422 + }, + { + "epoch": 1.89544677734375e-05, + "step": 12422, + "training_step_time": 0.11449456214904785 + }, + { + "epoch": 1.895599365234375e-05, + "model_forward_time": 0.025345325469970703, + "step": 12423 + }, + { + "epoch": 1.895599365234375e-05, + "step": 12423, + "training_step_time": 0.19217419624328613 + }, + { + "epoch": 1.895751953125e-05, + "model_forward_time": 0.0243682861328125, + "step": 12424 + }, + { + "epoch": 1.895751953125e-05, + "step": 12424, + "training_step_time": 0.12922215461730957 + }, + { + "epoch": 1.895904541015625e-05, + "model_forward_time": 0.024464130401611328, + "step": 12425 + }, + { + "epoch": 1.895904541015625e-05, + "step": 12425, + "training_step_time": 0.10964369773864746 + }, + { + "epoch": 1.89605712890625e-05, + "model_forward_time": 0.02536153793334961, + "step": 12426 + }, + { + "epoch": 1.89605712890625e-05, + "step": 12426, + "training_step_time": 0.1135101318359375 + }, + { + "epoch": 1.896209716796875e-05, + "model_forward_time": 0.0253145694732666, + "step": 12427 + }, + { + "epoch": 1.896209716796875e-05, + "step": 12427, + "training_step_time": 0.13592147827148438 + }, + { + "epoch": 1.8963623046875e-05, + "model_forward_time": 0.02542877197265625, + "step": 12428 + }, + { + "epoch": 1.8963623046875e-05, + "step": 12428, + "training_step_time": 0.11644196510314941 + }, + { + "epoch": 1.896514892578125e-05, + "model_forward_time": 0.025406837463378906, + "step": 12429 + }, + { + "epoch": 1.896514892578125e-05, + "step": 12429, + "training_step_time": 0.20539259910583496 + }, + { + "epoch": 1.89666748046875e-05, + "grad_norm": 0.4247405529022217, + "learning_rate": 6.789266730603974e-05, + "loss": 0.0322, + "step": 12430 + }, + { + "epoch": 1.89666748046875e-05, + "model_forward_time": 0.024506330490112305, + "step": 12430 + }, + { + "epoch": 1.89666748046875e-05, + "step": 12430, + "training_step_time": 0.10954165458679199 + }, + { + "epoch": 1.896820068359375e-05, + "model_forward_time": 0.02474379539489746, + "step": 12431 + }, + { + "epoch": 1.896820068359375e-05, + "step": 12431, + "training_step_time": 0.10731339454650879 + }, + { + "epoch": 1.89697265625e-05, + "model_forward_time": 0.026944875717163086, + "step": 12432 + }, + { + "epoch": 1.89697265625e-05, + "step": 12432, + "training_step_time": 0.16124701499938965 + }, + { + "epoch": 1.897125244140625e-05, + "model_forward_time": 0.024966001510620117, + "step": 12433 + }, + { + "epoch": 1.897125244140625e-05, + "step": 12433, + "training_step_time": 0.2124950885772705 + }, + { + "epoch": 1.89727783203125e-05, + "model_forward_time": 0.024837255477905273, + "step": 12434 + }, + { + "epoch": 1.89727783203125e-05, + "step": 12434, + "training_step_time": 0.12279462814331055 + }, + { + "epoch": 1.897430419921875e-05, + "model_forward_time": 0.023987293243408203, + "step": 12435 + }, + { + "epoch": 1.897430419921875e-05, + "step": 12435, + "training_step_time": 0.10796785354614258 + }, + { + "epoch": 1.8975830078125e-05, + "model_forward_time": 0.02544426918029785, + "step": 12436 + }, + { + "epoch": 1.8975830078125e-05, + "step": 12436, + "training_step_time": 0.10940217971801758 + }, + { + "epoch": 1.897735595703125e-05, + "model_forward_time": 0.025070667266845703, + "step": 12437 + }, + { + "epoch": 1.897735595703125e-05, + "step": 12437, + "training_step_time": 0.16007423400878906 + }, + { + "epoch": 1.89788818359375e-05, + "model_forward_time": 0.02470707893371582, + "step": 12438 + }, + { + "epoch": 1.89788818359375e-05, + "step": 12438, + "training_step_time": 0.1268634796142578 + }, + { + "epoch": 1.898040771484375e-05, + "model_forward_time": 0.02463555335998535, + "step": 12439 + }, + { + "epoch": 1.898040771484375e-05, + "step": 12439, + "training_step_time": 0.11903238296508789 + }, + { + "epoch": 1.898193359375e-05, + "grad_norm": 0.5766150951385498, + "learning_rate": 6.784119066544727e-05, + "loss": 0.0265, + "step": 12440 + }, + { + "epoch": 1.898193359375e-05, + "model_forward_time": 0.024762868881225586, + "step": 12440 + }, + { + "epoch": 1.898193359375e-05, + "step": 12440, + "training_step_time": 0.10688614845275879 + }, + { + "epoch": 1.898345947265625e-05, + "model_forward_time": 0.02472209930419922, + "step": 12441 + }, + { + "epoch": 1.898345947265625e-05, + "step": 12441, + "training_step_time": 0.10756731033325195 + }, + { + "epoch": 1.89849853515625e-05, + "model_forward_time": 0.02513885498046875, + "step": 12442 + }, + { + "epoch": 1.89849853515625e-05, + "step": 12442, + "training_step_time": 0.10503816604614258 + }, + { + "epoch": 1.898651123046875e-05, + "model_forward_time": 0.025223255157470703, + "step": 12443 + }, + { + "epoch": 1.898651123046875e-05, + "step": 12443, + "training_step_time": 0.10659146308898926 + }, + { + "epoch": 1.8988037109375e-05, + "model_forward_time": 0.02519702911376953, + "step": 12444 + }, + { + "epoch": 1.8988037109375e-05, + "step": 12444, + "training_step_time": 0.1053168773651123 + }, + { + "epoch": 1.898956298828125e-05, + "model_forward_time": 0.02677631378173828, + "step": 12445 + }, + { + "epoch": 1.898956298828125e-05, + "step": 12445, + "training_step_time": 0.12026596069335938 + }, + { + "epoch": 1.89910888671875e-05, + "model_forward_time": 0.026805877685546875, + "step": 12446 + }, + { + "epoch": 1.89910888671875e-05, + "step": 12446, + "training_step_time": 0.11909794807434082 + }, + { + "epoch": 1.899261474609375e-05, + "model_forward_time": 0.025559425354003906, + "step": 12447 + }, + { + "epoch": 1.899261474609375e-05, + "step": 12447, + "training_step_time": 0.1145484447479248 + }, + { + "epoch": 1.8994140625e-05, + "model_forward_time": 0.02417159080505371, + "step": 12448 + }, + { + "epoch": 1.8994140625e-05, + "step": 12448, + "training_step_time": 0.1136021614074707 + }, + { + "epoch": 1.899566650390625e-05, + "model_forward_time": 0.0243222713470459, + "step": 12449 + }, + { + "epoch": 1.899566650390625e-05, + "step": 12449, + "training_step_time": 0.11375093460083008 + }, + { + "epoch": 1.89971923828125e-05, + "grad_norm": 0.4734836518764496, + "learning_rate": 6.778969234612584e-05, + "loss": 0.0205, + "step": 12450 + }, + { + "epoch": 1.89971923828125e-05, + "model_forward_time": 0.025812864303588867, + "step": 12450 + }, + { + "epoch": 1.89971923828125e-05, + "step": 12450, + "training_step_time": 0.10823345184326172 + }, + { + "epoch": 1.899871826171875e-05, + "model_forward_time": 0.025609970092773438, + "step": 12451 + }, + { + "epoch": 1.899871826171875e-05, + "step": 12451, + "training_step_time": 0.11190342903137207 + }, + { + "epoch": 1.9000244140625e-05, + "model_forward_time": 0.025358915328979492, + "step": 12452 + }, + { + "epoch": 1.9000244140625e-05, + "step": 12452, + "training_step_time": 0.10784769058227539 + }, + { + "epoch": 1.900177001953125e-05, + "model_forward_time": 0.025796890258789062, + "step": 12453 + }, + { + "epoch": 1.900177001953125e-05, + "step": 12453, + "training_step_time": 0.10739588737487793 + }, + { + "epoch": 1.90032958984375e-05, + "model_forward_time": 0.025723695755004883, + "step": 12454 + }, + { + "epoch": 1.90032958984375e-05, + "step": 12454, + "training_step_time": 0.10760998725891113 + }, + { + "epoch": 1.900482177734375e-05, + "model_forward_time": 0.0256805419921875, + "step": 12455 + }, + { + "epoch": 1.900482177734375e-05, + "step": 12455, + "training_step_time": 0.10752534866333008 + }, + { + "epoch": 1.900634765625e-05, + "model_forward_time": 0.025437593460083008, + "step": 12456 + }, + { + "epoch": 1.900634765625e-05, + "step": 12456, + "training_step_time": 0.1102454662322998 + }, + { + "epoch": 1.900787353515625e-05, + "model_forward_time": 0.025408029556274414, + "step": 12457 + }, + { + "epoch": 1.900787353515625e-05, + "step": 12457, + "training_step_time": 0.10683226585388184 + }, + { + "epoch": 1.90093994140625e-05, + "model_forward_time": 0.028138399124145508, + "step": 12458 + }, + { + "epoch": 1.90093994140625e-05, + "step": 12458, + "training_step_time": 0.10922384262084961 + }, + { + "epoch": 1.901092529296875e-05, + "model_forward_time": 0.02608513832092285, + "step": 12459 + }, + { + "epoch": 1.901092529296875e-05, + "step": 12459, + "training_step_time": 0.11196565628051758 + }, + { + "epoch": 1.9012451171875e-05, + "grad_norm": 0.39418792724609375, + "learning_rate": 6.773817241065072e-05, + "loss": 0.024, + "step": 12460 + }, + { + "epoch": 1.9012451171875e-05, + "model_forward_time": 0.024403095245361328, + "step": 12460 + }, + { + "epoch": 1.9012451171875e-05, + "step": 12460, + "training_step_time": 0.10802555084228516 + }, + { + "epoch": 1.901397705078125e-05, + "model_forward_time": 0.024373531341552734, + "step": 12461 + }, + { + "epoch": 1.901397705078125e-05, + "step": 12461, + "training_step_time": 0.1495351791381836 + }, + { + "epoch": 1.90155029296875e-05, + "model_forward_time": 0.025109291076660156, + "step": 12462 + }, + { + "epoch": 1.90155029296875e-05, + "step": 12462, + "training_step_time": 0.11609768867492676 + }, + { + "epoch": 1.901702880859375e-05, + "model_forward_time": 0.02511310577392578, + "step": 12463 + }, + { + "epoch": 1.901702880859375e-05, + "step": 12463, + "training_step_time": 0.10685181617736816 + }, + { + "epoch": 1.90185546875e-05, + "model_forward_time": 0.025152921676635742, + "step": 12464 + }, + { + "epoch": 1.90185546875e-05, + "step": 12464, + "training_step_time": 0.12397575378417969 + }, + { + "epoch": 1.902008056640625e-05, + "model_forward_time": 0.024967670440673828, + "step": 12465 + }, + { + "epoch": 1.902008056640625e-05, + "step": 12465, + "training_step_time": 0.13022232055664062 + }, + { + "epoch": 1.90216064453125e-05, + "model_forward_time": 0.025221586227416992, + "step": 12466 + }, + { + "epoch": 1.90216064453125e-05, + "step": 12466, + "training_step_time": 0.11591005325317383 + }, + { + "epoch": 1.902313232421875e-05, + "model_forward_time": 0.025347232818603516, + "step": 12467 + }, + { + "epoch": 1.902313232421875e-05, + "step": 12467, + "training_step_time": 0.11409664154052734 + }, + { + "epoch": 1.9024658203125e-05, + "model_forward_time": 0.025150537490844727, + "step": 12468 + }, + { + "epoch": 1.9024658203125e-05, + "step": 12468, + "training_step_time": 0.10904955863952637 + }, + { + "epoch": 1.902618408203125e-05, + "model_forward_time": 0.025264263153076172, + "step": 12469 + }, + { + "epoch": 1.902618408203125e-05, + "step": 12469, + "training_step_time": 0.10419416427612305 + }, + { + "epoch": 1.90277099609375e-05, + "grad_norm": 0.2917688488960266, + "learning_rate": 6.768663092162356e-05, + "loss": 0.0172, + "step": 12470 + }, + { + "epoch": 1.90277099609375e-05, + "model_forward_time": 0.025104999542236328, + "step": 12470 + }, + { + "epoch": 1.90277099609375e-05, + "step": 12470, + "training_step_time": 0.11041045188903809 + }, + { + "epoch": 1.902923583984375e-05, + "model_forward_time": 0.02505183219909668, + "step": 12471 + }, + { + "epoch": 1.902923583984375e-05, + "step": 12471, + "training_step_time": 0.1162574291229248 + }, + { + "epoch": 1.903076171875e-05, + "model_forward_time": 0.025197505950927734, + "step": 12472 + }, + { + "epoch": 1.903076171875e-05, + "step": 12472, + "training_step_time": 0.20747780799865723 + }, + { + "epoch": 1.903228759765625e-05, + "model_forward_time": 0.025223493576049805, + "step": 12473 + }, + { + "epoch": 1.903228759765625e-05, + "step": 12473, + "training_step_time": 0.14615249633789062 + }, + { + "epoch": 1.90338134765625e-05, + "model_forward_time": 0.02490234375, + "step": 12474 + }, + { + "epoch": 1.90338134765625e-05, + "step": 12474, + "training_step_time": 0.19183659553527832 + }, + { + "epoch": 1.903533935546875e-05, + "model_forward_time": 0.024422407150268555, + "step": 12475 + }, + { + "epoch": 1.903533935546875e-05, + "step": 12475, + "training_step_time": 0.1274125576019287 + }, + { + "epoch": 1.9036865234375e-05, + "model_forward_time": 0.024765729904174805, + "step": 12476 + }, + { + "epoch": 1.9036865234375e-05, + "step": 12476, + "training_step_time": 0.10905838012695312 + }, + { + "epoch": 1.903839111328125e-05, + "model_forward_time": 0.025257587432861328, + "step": 12477 + }, + { + "epoch": 1.903839111328125e-05, + "step": 12477, + "training_step_time": 0.11559414863586426 + }, + { + "epoch": 1.90399169921875e-05, + "model_forward_time": 0.02515101432800293, + "step": 12478 + }, + { + "epoch": 1.90399169921875e-05, + "step": 12478, + "training_step_time": 0.873516321182251 + }, + { + "epoch": 1.904144287109375e-05, + "model_forward_time": 0.023021459579467773, + "step": 12479 + }, + { + "epoch": 1.904144287109375e-05, + "step": 12479, + "training_step_time": 0.13878536224365234 + }, + { + "epoch": 1.904296875e-05, + "grad_norm": 0.19898393750190735, + "learning_rate": 6.763506794167208e-05, + "loss": 0.013, + "step": 12480 + }, + { + "epoch": 1.904296875e-05, + "model_forward_time": 0.02435135841369629, + "step": 12480 + }, + { + "epoch": 1.904296875e-05, + "step": 12480, + "training_step_time": 0.10429859161376953 + }, + { + "epoch": 1.904449462890625e-05, + "model_forward_time": 0.024643898010253906, + "step": 12481 + }, + { + "epoch": 1.904449462890625e-05, + "step": 12481, + "training_step_time": 0.10291409492492676 + }, + { + "epoch": 1.90460205078125e-05, + "model_forward_time": 0.028352022171020508, + "step": 12482 + }, + { + "epoch": 1.90460205078125e-05, + "step": 12482, + "training_step_time": 0.10936307907104492 + }, + { + "epoch": 1.904754638671875e-05, + "model_forward_time": 0.02559375762939453, + "step": 12483 + }, + { + "epoch": 1.904754638671875e-05, + "step": 12483, + "training_step_time": 0.10857033729553223 + }, + { + "epoch": 1.9049072265625e-05, + "model_forward_time": 0.02539348602294922, + "step": 12484 + }, + { + "epoch": 1.9049072265625e-05, + "step": 12484, + "training_step_time": 0.10625982284545898 + }, + { + "epoch": 1.905059814453125e-05, + "model_forward_time": 0.025647401809692383, + "step": 12485 + }, + { + "epoch": 1.905059814453125e-05, + "step": 12485, + "training_step_time": 0.10619020462036133 + }, + { + "epoch": 1.90521240234375e-05, + "model_forward_time": 0.025593996047973633, + "step": 12486 + }, + { + "epoch": 1.90521240234375e-05, + "step": 12486, + "training_step_time": 0.10822057723999023 + }, + { + "epoch": 1.905364990234375e-05, + "model_forward_time": 0.0254824161529541, + "step": 12487 + }, + { + "epoch": 1.905364990234375e-05, + "step": 12487, + "training_step_time": 0.10751771926879883 + }, + { + "epoch": 1.905517578125e-05, + "model_forward_time": 0.025308609008789062, + "step": 12488 + }, + { + "epoch": 1.905517578125e-05, + "step": 12488, + "training_step_time": 0.1058659553527832 + }, + { + "epoch": 1.905670166015625e-05, + "model_forward_time": 0.02500176429748535, + "step": 12489 + }, + { + "epoch": 1.905670166015625e-05, + "step": 12489, + "training_step_time": 0.10617542266845703 + }, + { + "epoch": 1.90582275390625e-05, + "grad_norm": 0.37793731689453125, + "learning_rate": 6.758348353345014e-05, + "loss": 0.0155, + "step": 12490 + }, + { + "epoch": 1.90582275390625e-05, + "model_forward_time": 0.025348186492919922, + "step": 12490 + }, + { + "epoch": 1.90582275390625e-05, + "step": 12490, + "training_step_time": 0.1160585880279541 + }, + { + "epoch": 1.905975341796875e-05, + "model_forward_time": 0.025263071060180664, + "step": 12491 + }, + { + "epoch": 1.905975341796875e-05, + "step": 12491, + "training_step_time": 0.10581755638122559 + }, + { + "epoch": 1.9061279296875e-05, + "model_forward_time": 0.02572178840637207, + "step": 12492 + }, + { + "epoch": 1.9061279296875e-05, + "step": 12492, + "training_step_time": 0.10743045806884766 + }, + { + "epoch": 1.906280517578125e-05, + "model_forward_time": 0.025246143341064453, + "step": 12493 + }, + { + "epoch": 1.906280517578125e-05, + "step": 12493, + "training_step_time": 0.10716652870178223 + }, + { + "epoch": 1.90643310546875e-05, + "model_forward_time": 0.0251767635345459, + "step": 12494 + }, + { + "epoch": 1.90643310546875e-05, + "step": 12494, + "training_step_time": 0.10807228088378906 + }, + { + "epoch": 1.906585693359375e-05, + "model_forward_time": 0.02521347999572754, + "step": 12495 + }, + { + "epoch": 1.906585693359375e-05, + "step": 12495, + "training_step_time": 0.10840344429016113 + }, + { + "epoch": 1.90673828125e-05, + "model_forward_time": 0.025959491729736328, + "step": 12496 + }, + { + "epoch": 1.90673828125e-05, + "step": 12496, + "training_step_time": 0.11014533042907715 + }, + { + "epoch": 1.906890869140625e-05, + "model_forward_time": 0.02523016929626465, + "step": 12497 + }, + { + "epoch": 1.906890869140625e-05, + "step": 12497, + "training_step_time": 0.10666704177856445 + }, + { + "epoch": 1.90704345703125e-05, + "model_forward_time": 0.025559663772583008, + "step": 12498 + }, + { + "epoch": 1.90704345703125e-05, + "step": 12498, + "training_step_time": 0.10705804824829102 + }, + { + "epoch": 1.907196044921875e-05, + "model_forward_time": 0.028780221939086914, + "step": 12499 + }, + { + "epoch": 1.907196044921875e-05, + "step": 12499, + "training_step_time": 0.11050248146057129 + }, + { + "epoch": 1.9073486328125e-05, + "grad_norm": 0.24092067778110504, + "learning_rate": 6.753187775963773e-05, + "loss": 0.0207, + "step": 12500 + }, + { + "epoch": 1.9073486328125e-05, + "model_forward_time": 0.025239944458007812, + "step": 12500 + }, + { + "epoch": 1.9073486328125e-05, + "step": 12500, + "training_step_time": 0.10977792739868164 + }, + { + "epoch": 1.907501220703125e-05, + "model_forward_time": 0.025148630142211914, + "step": 12501 + }, + { + "epoch": 1.907501220703125e-05, + "step": 12501, + "training_step_time": 0.10672736167907715 + }, + { + "epoch": 1.90765380859375e-05, + "model_forward_time": 0.02518010139465332, + "step": 12502 + }, + { + "epoch": 1.90765380859375e-05, + "step": 12502, + "training_step_time": 0.10593819618225098 + }, + { + "epoch": 1.907806396484375e-05, + "model_forward_time": 0.025443077087402344, + "step": 12503 + }, + { + "epoch": 1.907806396484375e-05, + "step": 12503, + "training_step_time": 0.19675660133361816 + }, + { + "epoch": 1.907958984375e-05, + "model_forward_time": 0.02471303939819336, + "step": 12504 + }, + { + "epoch": 1.907958984375e-05, + "step": 12504, + "training_step_time": 0.11966705322265625 + }, + { + "epoch": 1.908111572265625e-05, + "model_forward_time": 0.026642799377441406, + "step": 12505 + }, + { + "epoch": 1.908111572265625e-05, + "step": 12505, + "training_step_time": 0.10705161094665527 + }, + { + "epoch": 1.90826416015625e-05, + "model_forward_time": 0.025485992431640625, + "step": 12506 + }, + { + "epoch": 1.90826416015625e-05, + "step": 12506, + "training_step_time": 0.11581897735595703 + }, + { + "epoch": 1.908416748046875e-05, + "model_forward_time": 0.026882171630859375, + "step": 12507 + }, + { + "epoch": 1.908416748046875e-05, + "step": 12507, + "training_step_time": 0.13390302658081055 + }, + { + "epoch": 1.9085693359375e-05, + "model_forward_time": 0.025402545928955078, + "step": 12508 + }, + { + "epoch": 1.9085693359375e-05, + "step": 12508, + "training_step_time": 0.1069033145904541 + }, + { + "epoch": 1.908721923828125e-05, + "model_forward_time": 0.025069713592529297, + "step": 12509 + }, + { + "epoch": 1.908721923828125e-05, + "step": 12509, + "training_step_time": 0.12042713165283203 + }, + { + "epoch": 1.90887451171875e-05, + "grad_norm": 0.3779175877571106, + "learning_rate": 6.748025068294067e-05, + "loss": 0.0332, + "step": 12510 + }, + { + "epoch": 1.90887451171875e-05, + "model_forward_time": 0.02490520477294922, + "step": 12510 + }, + { + "epoch": 1.90887451171875e-05, + "step": 12510, + "training_step_time": 0.11036229133605957 + }, + { + "epoch": 1.909027099609375e-05, + "model_forward_time": 0.025593280792236328, + "step": 12511 + }, + { + "epoch": 1.909027099609375e-05, + "step": 12511, + "training_step_time": 0.10973262786865234 + }, + { + "epoch": 1.9091796875e-05, + "model_forward_time": 0.024927377700805664, + "step": 12512 + }, + { + "epoch": 1.9091796875e-05, + "step": 12512, + "training_step_time": 0.1047067642211914 + }, + { + "epoch": 1.909332275390625e-05, + "model_forward_time": 0.025954484939575195, + "step": 12513 + }, + { + "epoch": 1.909332275390625e-05, + "step": 12513, + "training_step_time": 0.11730122566223145 + }, + { + "epoch": 1.90948486328125e-05, + "model_forward_time": 0.02887725830078125, + "step": 12514 + }, + { + "epoch": 1.90948486328125e-05, + "step": 12514, + "training_step_time": 0.10994791984558105 + }, + { + "epoch": 1.909637451171875e-05, + "model_forward_time": 0.025658607482910156, + "step": 12515 + }, + { + "epoch": 1.909637451171875e-05, + "step": 12515, + "training_step_time": 0.12349677085876465 + }, + { + "epoch": 1.9097900390625e-05, + "model_forward_time": 0.025382518768310547, + "step": 12516 + }, + { + "epoch": 1.9097900390625e-05, + "step": 12516, + "training_step_time": 0.12978315353393555 + }, + { + "epoch": 1.909942626953125e-05, + "model_forward_time": 0.025622844696044922, + "step": 12517 + }, + { + "epoch": 1.909942626953125e-05, + "step": 12517, + "training_step_time": 0.11504077911376953 + }, + { + "epoch": 1.91009521484375e-05, + "model_forward_time": 0.025829315185546875, + "step": 12518 + }, + { + "epoch": 1.91009521484375e-05, + "step": 12518, + "training_step_time": 0.11781692504882812 + }, + { + "epoch": 1.910247802734375e-05, + "model_forward_time": 0.025525331497192383, + "step": 12519 + }, + { + "epoch": 1.910247802734375e-05, + "step": 12519, + "training_step_time": 0.11563420295715332 + }, + { + "epoch": 1.910400390625e-05, + "grad_norm": 0.3728472590446472, + "learning_rate": 6.742860236609077e-05, + "loss": 0.0192, + "step": 12520 + }, + { + "epoch": 1.910400390625e-05, + "model_forward_time": 0.025527000427246094, + "step": 12520 + }, + { + "epoch": 1.910400390625e-05, + "step": 12520, + "training_step_time": 0.15437936782836914 + }, + { + "epoch": 1.910552978515625e-05, + "model_forward_time": 0.024718046188354492, + "step": 12521 + }, + { + "epoch": 1.910552978515625e-05, + "step": 12521, + "training_step_time": 0.14941644668579102 + }, + { + "epoch": 1.91070556640625e-05, + "model_forward_time": 0.024363279342651367, + "step": 12522 + }, + { + "epoch": 1.91070556640625e-05, + "step": 12522, + "training_step_time": 0.11107635498046875 + }, + { + "epoch": 1.910858154296875e-05, + "model_forward_time": 0.025347471237182617, + "step": 12523 + }, + { + "epoch": 1.910858154296875e-05, + "step": 12523, + "training_step_time": 0.10961794853210449 + }, + { + "epoch": 1.9110107421875e-05, + "model_forward_time": 0.025721311569213867, + "step": 12524 + }, + { + "epoch": 1.9110107421875e-05, + "step": 12524, + "training_step_time": 0.10968255996704102 + }, + { + "epoch": 1.911163330078125e-05, + "model_forward_time": 0.0254209041595459, + "step": 12525 + }, + { + "epoch": 1.911163330078125e-05, + "step": 12525, + "training_step_time": 0.12099289894104004 + }, + { + "epoch": 1.91131591796875e-05, + "model_forward_time": 0.02539682388305664, + "step": 12526 + }, + { + "epoch": 1.91131591796875e-05, + "step": 12526, + "training_step_time": 0.10647821426391602 + }, + { + "epoch": 1.911468505859375e-05, + "model_forward_time": 0.0251924991607666, + "step": 12527 + }, + { + "epoch": 1.911468505859375e-05, + "step": 12527, + "training_step_time": 0.21776556968688965 + }, + { + "epoch": 1.91162109375e-05, + "model_forward_time": 0.024297475814819336, + "step": 12528 + }, + { + "epoch": 1.91162109375e-05, + "step": 12528, + "training_step_time": 0.12176632881164551 + }, + { + "epoch": 1.911773681640625e-05, + "model_forward_time": 0.02496480941772461, + "step": 12529 + }, + { + "epoch": 1.911773681640625e-05, + "step": 12529, + "training_step_time": 0.10559654235839844 + }, + { + "epoch": 1.91192626953125e-05, + "grad_norm": 0.2121896892786026, + "learning_rate": 6.737693287184557e-05, + "loss": 0.0179, + "step": 12530 + }, + { + "epoch": 1.91192626953125e-05, + "model_forward_time": 0.02516317367553711, + "step": 12530 + }, + { + "epoch": 1.91192626953125e-05, + "step": 12530, + "training_step_time": 0.1066582202911377 + }, + { + "epoch": 1.912078857421875e-05, + "model_forward_time": 0.025281667709350586, + "step": 12531 + }, + { + "epoch": 1.912078857421875e-05, + "step": 12531, + "training_step_time": 0.10891985893249512 + }, + { + "epoch": 1.9122314453125e-05, + "model_forward_time": 0.025794029235839844, + "step": 12532 + }, + { + "epoch": 1.9122314453125e-05, + "step": 12532, + "training_step_time": 0.13487768173217773 + }, + { + "epoch": 1.912384033203125e-05, + "model_forward_time": 0.025496482849121094, + "step": 12533 + }, + { + "epoch": 1.912384033203125e-05, + "step": 12533, + "training_step_time": 0.18004727363586426 + }, + { + "epoch": 1.91253662109375e-05, + "model_forward_time": 0.02506566047668457, + "step": 12534 + }, + { + "epoch": 1.91253662109375e-05, + "step": 12534, + "training_step_time": 0.186295747756958 + }, + { + "epoch": 1.912689208984375e-05, + "model_forward_time": 0.02442026138305664, + "step": 12535 + }, + { + "epoch": 1.912689208984375e-05, + "step": 12535, + "training_step_time": 0.17704057693481445 + }, + { + "epoch": 1.912841796875e-05, + "model_forward_time": 0.024864912033081055, + "step": 12536 + }, + { + "epoch": 1.912841796875e-05, + "step": 12536, + "training_step_time": 0.1671915054321289 + }, + { + "epoch": 1.912994384765625e-05, + "model_forward_time": 0.024955272674560547, + "step": 12537 + }, + { + "epoch": 1.912994384765625e-05, + "step": 12537, + "training_step_time": 0.15191650390625 + }, + { + "epoch": 1.91314697265625e-05, + "model_forward_time": 0.024456501007080078, + "step": 12538 + }, + { + "epoch": 1.91314697265625e-05, + "step": 12538, + "training_step_time": 0.14144349098205566 + }, + { + "epoch": 1.913299560546875e-05, + "model_forward_time": 0.024447202682495117, + "step": 12539 + }, + { + "epoch": 1.913299560546875e-05, + "step": 12539, + "training_step_time": 0.13255047798156738 + }, + { + "epoch": 1.9134521484375e-05, + "grad_norm": 0.29751724004745483, + "learning_rate": 6.732524226298841e-05, + "loss": 0.0273, + "step": 12540 + }, + { + "epoch": 1.9134521484375e-05, + "model_forward_time": 0.02472543716430664, + "step": 12540 + }, + { + "epoch": 1.9134521484375e-05, + "step": 12540, + "training_step_time": 0.124664306640625 + }, + { + "epoch": 1.913604736328125e-05, + "model_forward_time": 0.024416208267211914, + "step": 12541 + }, + { + "epoch": 1.913604736328125e-05, + "step": 12541, + "training_step_time": 0.1258559226989746 + }, + { + "epoch": 1.91375732421875e-05, + "model_forward_time": 0.025309324264526367, + "step": 12542 + }, + { + "epoch": 1.91375732421875e-05, + "step": 12542, + "training_step_time": 0.12025666236877441 + }, + { + "epoch": 1.913909912109375e-05, + "model_forward_time": 0.0252993106842041, + "step": 12543 + }, + { + "epoch": 1.913909912109375e-05, + "step": 12543, + "training_step_time": 0.11723017692565918 + }, + { + "epoch": 1.9140625e-05, + "model_forward_time": 0.02551436424255371, + "step": 12544 + }, + { + "epoch": 1.9140625e-05, + "step": 12544, + "training_step_time": 0.11181402206420898 + }, + { + "epoch": 1.914215087890625e-05, + "model_forward_time": 0.02577495574951172, + "step": 12545 + }, + { + "epoch": 1.914215087890625e-05, + "step": 12545, + "training_step_time": 0.10963749885559082 + }, + { + "epoch": 1.91436767578125e-05, + "model_forward_time": 0.024930953979492188, + "step": 12546 + }, + { + "epoch": 1.91436767578125e-05, + "step": 12546, + "training_step_time": 0.1128089427947998 + }, + { + "epoch": 1.914520263671875e-05, + "model_forward_time": 0.0251920223236084, + "step": 12547 + }, + { + "epoch": 1.914520263671875e-05, + "step": 12547, + "training_step_time": 0.17506790161132812 + }, + { + "epoch": 1.9146728515625e-05, + "model_forward_time": 0.024571657180786133, + "step": 12548 + }, + { + "epoch": 1.9146728515625e-05, + "step": 12548, + "training_step_time": 0.1097719669342041 + }, + { + "epoch": 1.914825439453125e-05, + "model_forward_time": 0.02416396141052246, + "step": 12549 + }, + { + "epoch": 1.914825439453125e-05, + "step": 12549, + "training_step_time": 0.18019437789916992 + }, + { + "epoch": 1.91497802734375e-05, + "grad_norm": 0.4422711431980133, + "learning_rate": 6.727353060232822e-05, + "loss": 0.023, + "step": 12550 + }, + { + "epoch": 1.91497802734375e-05, + "model_forward_time": 0.02497076988220215, + "step": 12550 + }, + { + "epoch": 1.91497802734375e-05, + "step": 12550, + "training_step_time": 0.793179988861084 + }, + { + "epoch": 1.915130615234375e-05, + "model_forward_time": 0.022542715072631836, + "step": 12551 + }, + { + "epoch": 1.915130615234375e-05, + "step": 12551, + "training_step_time": 0.1301567554473877 + }, + { + "epoch": 1.915283203125e-05, + "model_forward_time": 0.0247805118560791, + "step": 12552 + }, + { + "epoch": 1.915283203125e-05, + "step": 12552, + "training_step_time": 0.11929750442504883 + }, + { + "epoch": 1.915435791015625e-05, + "model_forward_time": 0.025209426879882812, + "step": 12553 + }, + { + "epoch": 1.915435791015625e-05, + "step": 12553, + "training_step_time": 0.18669486045837402 + }, + { + "epoch": 1.91558837890625e-05, + "model_forward_time": 0.024862051010131836, + "step": 12554 + }, + { + "epoch": 1.91558837890625e-05, + "step": 12554, + "training_step_time": 0.14656853675842285 + }, + { + "epoch": 1.915740966796875e-05, + "model_forward_time": 0.024719715118408203, + "step": 12555 + }, + { + "epoch": 1.915740966796875e-05, + "step": 12555, + "training_step_time": 0.11053109169006348 + }, + { + "epoch": 1.9158935546875e-05, + "model_forward_time": 0.025652647018432617, + "step": 12556 + }, + { + "epoch": 1.9158935546875e-05, + "step": 12556, + "training_step_time": 0.12047815322875977 + }, + { + "epoch": 1.916046142578125e-05, + "model_forward_time": 0.025519847869873047, + "step": 12557 + }, + { + "epoch": 1.916046142578125e-05, + "step": 12557, + "training_step_time": 0.10713863372802734 + }, + { + "epoch": 1.91619873046875e-05, + "model_forward_time": 0.02430891990661621, + "step": 12558 + }, + { + "epoch": 1.91619873046875e-05, + "step": 12558, + "training_step_time": 0.1086878776550293 + }, + { + "epoch": 1.916351318359375e-05, + "model_forward_time": 0.0254976749420166, + "step": 12559 + }, + { + "epoch": 1.916351318359375e-05, + "step": 12559, + "training_step_time": 0.19531679153442383 + }, + { + "epoch": 1.91650390625e-05, + "grad_norm": 0.43292972445487976, + "learning_rate": 6.722179795269956e-05, + "loss": 0.0179, + "step": 12560 + }, + { + "epoch": 1.91650390625e-05, + "model_forward_time": 0.02424025535583496, + "step": 12560 + }, + { + "epoch": 1.91650390625e-05, + "step": 12560, + "training_step_time": 0.13731884956359863 + }, + { + "epoch": 1.916656494140625e-05, + "model_forward_time": 0.024560928344726562, + "step": 12561 + }, + { + "epoch": 1.916656494140625e-05, + "step": 12561, + "training_step_time": 0.12201690673828125 + }, + { + "epoch": 1.91680908203125e-05, + "model_forward_time": 0.0245816707611084, + "step": 12562 + }, + { + "epoch": 1.91680908203125e-05, + "step": 12562, + "training_step_time": 0.14358949661254883 + }, + { + "epoch": 1.916961669921875e-05, + "model_forward_time": 0.024641990661621094, + "step": 12563 + }, + { + "epoch": 1.916961669921875e-05, + "step": 12563, + "training_step_time": 0.11072993278503418 + }, + { + "epoch": 1.9171142578125e-05, + "model_forward_time": 0.02481532096862793, + "step": 12564 + }, + { + "epoch": 1.9171142578125e-05, + "step": 12564, + "training_step_time": 0.16074919700622559 + }, + { + "epoch": 1.917266845703125e-05, + "model_forward_time": 0.024419546127319336, + "step": 12565 + }, + { + "epoch": 1.917266845703125e-05, + "step": 12565, + "training_step_time": 0.1379694938659668 + }, + { + "epoch": 1.91741943359375e-05, + "model_forward_time": 0.02414560317993164, + "step": 12566 + }, + { + "epoch": 1.91741943359375e-05, + "step": 12566, + "training_step_time": 0.11108088493347168 + }, + { + "epoch": 1.917572021484375e-05, + "model_forward_time": 0.025241374969482422, + "step": 12567 + }, + { + "epoch": 1.917572021484375e-05, + "step": 12567, + "training_step_time": 0.10595464706420898 + }, + { + "epoch": 1.917724609375e-05, + "model_forward_time": 0.02493119239807129, + "step": 12568 + }, + { + "epoch": 1.917724609375e-05, + "step": 12568, + "training_step_time": 0.10948395729064941 + }, + { + "epoch": 1.917877197265625e-05, + "model_forward_time": 0.02502298355102539, + "step": 12569 + }, + { + "epoch": 1.917877197265625e-05, + "step": 12569, + "training_step_time": 0.10489702224731445 + }, + { + "epoch": 1.91802978515625e-05, + "grad_norm": 0.3948003351688385, + "learning_rate": 6.71700443769625e-05, + "loss": 0.0184, + "step": 12570 + }, + { + "epoch": 1.91802978515625e-05, + "model_forward_time": 0.025157690048217773, + "step": 12570 + }, + { + "epoch": 1.91802978515625e-05, + "step": 12570, + "training_step_time": 0.10632872581481934 + }, + { + "epoch": 1.918182373046875e-05, + "model_forward_time": 0.025172710418701172, + "step": 12571 + }, + { + "epoch": 1.918182373046875e-05, + "step": 12571, + "training_step_time": 0.10798835754394531 + }, + { + "epoch": 1.9183349609375e-05, + "model_forward_time": 0.02557659149169922, + "step": 12572 + }, + { + "epoch": 1.9183349609375e-05, + "step": 12572, + "training_step_time": 0.10526680946350098 + }, + { + "epoch": 1.918487548828125e-05, + "model_forward_time": 0.02683544158935547, + "step": 12573 + }, + { + "epoch": 1.918487548828125e-05, + "step": 12573, + "training_step_time": 0.10949969291687012 + }, + { + "epoch": 1.91864013671875e-05, + "model_forward_time": 0.025493621826171875, + "step": 12574 + }, + { + "epoch": 1.91864013671875e-05, + "step": 12574, + "training_step_time": 0.10431742668151855 + }, + { + "epoch": 1.918792724609375e-05, + "model_forward_time": 0.025062084197998047, + "step": 12575 + }, + { + "epoch": 1.918792724609375e-05, + "step": 12575, + "training_step_time": 0.1097726821899414 + }, + { + "epoch": 1.9189453125e-05, + "model_forward_time": 0.025292158126831055, + "step": 12576 + }, + { + "epoch": 1.9189453125e-05, + "step": 12576, + "training_step_time": 0.10596513748168945 + }, + { + "epoch": 1.919097900390625e-05, + "model_forward_time": 0.02477264404296875, + "step": 12577 + }, + { + "epoch": 1.919097900390625e-05, + "step": 12577, + "training_step_time": 0.10839176177978516 + }, + { + "epoch": 1.91925048828125e-05, + "model_forward_time": 0.025288105010986328, + "step": 12578 + }, + { + "epoch": 1.91925048828125e-05, + "step": 12578, + "training_step_time": 0.10384416580200195 + }, + { + "epoch": 1.919403076171875e-05, + "model_forward_time": 0.02502918243408203, + "step": 12579 + }, + { + "epoch": 1.919403076171875e-05, + "step": 12579, + "training_step_time": 0.10681438446044922 + }, + { + "epoch": 1.9195556640625e-05, + "grad_norm": 0.2294338196516037, + "learning_rate": 6.711826993800248e-05, + "loss": 0.0181, + "step": 12580 + }, + { + "epoch": 1.9195556640625e-05, + "model_forward_time": 0.023891687393188477, + "step": 12580 + }, + { + "epoch": 1.9195556640625e-05, + "step": 12580, + "training_step_time": 0.10378861427307129 + }, + { + "epoch": 1.919708251953125e-05, + "model_forward_time": 0.023502111434936523, + "step": 12581 + }, + { + "epoch": 1.919708251953125e-05, + "step": 12581, + "training_step_time": 0.10587358474731445 + }, + { + "epoch": 1.91986083984375e-05, + "model_forward_time": 0.025188684463500977, + "step": 12582 + }, + { + "epoch": 1.91986083984375e-05, + "step": 12582, + "training_step_time": 0.10768985748291016 + }, + { + "epoch": 1.920013427734375e-05, + "model_forward_time": 0.025043487548828125, + "step": 12583 + }, + { + "epoch": 1.920013427734375e-05, + "step": 12583, + "training_step_time": 0.10391616821289062 + }, + { + "epoch": 1.920166015625e-05, + "model_forward_time": 0.025211811065673828, + "step": 12584 + }, + { + "epoch": 1.920166015625e-05, + "step": 12584, + "training_step_time": 0.1078484058380127 + }, + { + "epoch": 1.920318603515625e-05, + "model_forward_time": 0.025295257568359375, + "step": 12585 + }, + { + "epoch": 1.920318603515625e-05, + "step": 12585, + "training_step_time": 0.10500144958496094 + }, + { + "epoch": 1.92047119140625e-05, + "model_forward_time": 0.025437116622924805, + "step": 12586 + }, + { + "epoch": 1.92047119140625e-05, + "step": 12586, + "training_step_time": 0.11049842834472656 + }, + { + "epoch": 1.920623779296875e-05, + "model_forward_time": 0.025125503540039062, + "step": 12587 + }, + { + "epoch": 1.920623779296875e-05, + "step": 12587, + "training_step_time": 0.10646724700927734 + }, + { + "epoch": 1.9207763671875e-05, + "model_forward_time": 0.025173187255859375, + "step": 12588 + }, + { + "epoch": 1.9207763671875e-05, + "step": 12588, + "training_step_time": 0.10889649391174316 + }, + { + "epoch": 1.920928955078125e-05, + "model_forward_time": 0.025471210479736328, + "step": 12589 + }, + { + "epoch": 1.920928955078125e-05, + "step": 12589, + "training_step_time": 0.18343424797058105 + }, + { + "epoch": 1.92108154296875e-05, + "grad_norm": 0.21814358234405518, + "learning_rate": 6.706647469873031e-05, + "loss": 0.0257, + "step": 12590 + }, + { + "epoch": 1.92108154296875e-05, + "model_forward_time": 0.024784088134765625, + "step": 12590 + }, + { + "epoch": 1.92108154296875e-05, + "step": 12590, + "training_step_time": 0.10801053047180176 + }, + { + "epoch": 1.921234130859375e-05, + "model_forward_time": 0.024697542190551758, + "step": 12591 + }, + { + "epoch": 1.921234130859375e-05, + "step": 12591, + "training_step_time": 0.11011719703674316 + }, + { + "epoch": 1.92138671875e-05, + "model_forward_time": 0.025035619735717773, + "step": 12592 + }, + { + "epoch": 1.92138671875e-05, + "step": 12592, + "training_step_time": 0.1288747787475586 + }, + { + "epoch": 1.921539306640625e-05, + "model_forward_time": 0.0251615047454834, + "step": 12593 + }, + { + "epoch": 1.921539306640625e-05, + "step": 12593, + "training_step_time": 0.131819486618042 + }, + { + "epoch": 1.92169189453125e-05, + "model_forward_time": 0.024675369262695312, + "step": 12594 + }, + { + "epoch": 1.92169189453125e-05, + "step": 12594, + "training_step_time": 0.10813689231872559 + }, + { + "epoch": 1.921844482421875e-05, + "model_forward_time": 0.02522587776184082, + "step": 12595 + }, + { + "epoch": 1.921844482421875e-05, + "step": 12595, + "training_step_time": 0.11674618721008301 + }, + { + "epoch": 1.9219970703125e-05, + "model_forward_time": 0.02489495277404785, + "step": 12596 + }, + { + "epoch": 1.9219970703125e-05, + "step": 12596, + "training_step_time": 0.10559916496276855 + }, + { + "epoch": 1.922149658203125e-05, + "model_forward_time": 0.024866104125976562, + "step": 12597 + }, + { + "epoch": 1.922149658203125e-05, + "step": 12597, + "training_step_time": 0.10707879066467285 + }, + { + "epoch": 1.92230224609375e-05, + "model_forward_time": 0.024941205978393555, + "step": 12598 + }, + { + "epoch": 1.92230224609375e-05, + "step": 12598, + "training_step_time": 0.10922574996948242 + }, + { + "epoch": 1.922454833984375e-05, + "model_forward_time": 0.02507162094116211, + "step": 12599 + }, + { + "epoch": 1.922454833984375e-05, + "step": 12599, + "training_step_time": 0.11738848686218262 + }, + { + "epoch": 1.922607421875e-05, + "grad_norm": 0.46139249205589294, + "learning_rate": 6.701465872208216e-05, + "loss": 0.0186, + "step": 12600 + }, + { + "epoch": 1.922607421875e-05, + "model_forward_time": 0.025429725646972656, + "step": 12600 + }, + { + "epoch": 1.922607421875e-05, + "step": 12600, + "training_step_time": 0.10998749732971191 + }, + { + "epoch": 1.922760009765625e-05, + "model_forward_time": 0.02521228790283203, + "step": 12601 + }, + { + "epoch": 1.922760009765625e-05, + "step": 12601, + "training_step_time": 0.13446402549743652 + }, + { + "epoch": 1.92291259765625e-05, + "model_forward_time": 0.025613069534301758, + "step": 12602 + }, + { + "epoch": 1.92291259765625e-05, + "step": 12602, + "training_step_time": 0.1969621181488037 + }, + { + "epoch": 1.923065185546875e-05, + "model_forward_time": 0.0245358943939209, + "step": 12603 + }, + { + "epoch": 1.923065185546875e-05, + "step": 12603, + "training_step_time": 0.1727464199066162 + }, + { + "epoch": 1.9232177734375e-05, + "model_forward_time": 0.024669647216796875, + "step": 12604 + }, + { + "epoch": 1.9232177734375e-05, + "step": 12604, + "training_step_time": 0.1159052848815918 + }, + { + "epoch": 1.923370361328125e-05, + "model_forward_time": 0.025000333786010742, + "step": 12605 + }, + { + "epoch": 1.923370361328125e-05, + "step": 12605, + "training_step_time": 0.10856866836547852 + }, + { + "epoch": 1.92352294921875e-05, + "model_forward_time": 0.02567005157470703, + "step": 12606 + }, + { + "epoch": 1.92352294921875e-05, + "step": 12606, + "training_step_time": 0.19438481330871582 + }, + { + "epoch": 1.923675537109375e-05, + "model_forward_time": 0.024843692779541016, + "step": 12607 + }, + { + "epoch": 1.923675537109375e-05, + "step": 12607, + "training_step_time": 0.13912272453308105 + }, + { + "epoch": 1.923828125e-05, + "model_forward_time": 0.02520895004272461, + "step": 12608 + }, + { + "epoch": 1.923828125e-05, + "step": 12608, + "training_step_time": 0.11437821388244629 + }, + { + "epoch": 1.923980712890625e-05, + "model_forward_time": 0.025266408920288086, + "step": 12609 + }, + { + "epoch": 1.923980712890625e-05, + "step": 12609, + "training_step_time": 0.1401810646057129 + }, + { + "epoch": 1.92413330078125e-05, + "grad_norm": 0.2110971063375473, + "learning_rate": 6.696282207101928e-05, + "loss": 0.0126, + "step": 12610 + }, + { + "epoch": 1.92413330078125e-05, + "model_forward_time": 0.0250704288482666, + "step": 12610 + }, + { + "epoch": 1.92413330078125e-05, + "step": 12610, + "training_step_time": 0.17672300338745117 + }, + { + "epoch": 1.924285888671875e-05, + "model_forward_time": 0.024567842483520508, + "step": 12611 + }, + { + "epoch": 1.924285888671875e-05, + "step": 12611, + "training_step_time": 0.12393832206726074 + }, + { + "epoch": 1.9244384765625e-05, + "model_forward_time": 0.02484750747680664, + "step": 12612 + }, + { + "epoch": 1.9244384765625e-05, + "step": 12612, + "training_step_time": 0.11827278137207031 + }, + { + "epoch": 1.924591064453125e-05, + "model_forward_time": 0.02541041374206543, + "step": 12613 + }, + { + "epoch": 1.924591064453125e-05, + "step": 12613, + "training_step_time": 0.10392117500305176 + }, + { + "epoch": 1.92474365234375e-05, + "model_forward_time": 0.025450468063354492, + "step": 12614 + }, + { + "epoch": 1.92474365234375e-05, + "step": 12614, + "training_step_time": 0.1038973331451416 + }, + { + "epoch": 1.924896240234375e-05, + "model_forward_time": 0.025098562240600586, + "step": 12615 + }, + { + "epoch": 1.924896240234375e-05, + "step": 12615, + "training_step_time": 0.10679221153259277 + }, + { + "epoch": 1.925048828125e-05, + "model_forward_time": 0.025987625122070312, + "step": 12616 + }, + { + "epoch": 1.925048828125e-05, + "step": 12616, + "training_step_time": 0.10654354095458984 + }, + { + "epoch": 1.925201416015625e-05, + "model_forward_time": 0.026123523712158203, + "step": 12617 + }, + { + "epoch": 1.925201416015625e-05, + "step": 12617, + "training_step_time": 0.10748744010925293 + }, + { + "epoch": 1.92535400390625e-05, + "model_forward_time": 0.02560734748840332, + "step": 12618 + }, + { + "epoch": 1.92535400390625e-05, + "step": 12618, + "training_step_time": 0.10573768615722656 + }, + { + "epoch": 1.925506591796875e-05, + "model_forward_time": 0.025284767150878906, + "step": 12619 + }, + { + "epoch": 1.925506591796875e-05, + "step": 12619, + "training_step_time": 0.11049270629882812 + }, + { + "epoch": 1.9256591796875e-05, + "grad_norm": 0.341896116733551, + "learning_rate": 6.691096480852808e-05, + "loss": 0.0133, + "step": 12620 + }, + { + "epoch": 1.9256591796875e-05, + "model_forward_time": 0.025957822799682617, + "step": 12620 + }, + { + "epoch": 1.9256591796875e-05, + "step": 12620, + "training_step_time": 0.11049723625183105 + }, + { + "epoch": 1.925811767578125e-05, + "model_forward_time": 0.025624990463256836, + "step": 12621 + }, + { + "epoch": 1.925811767578125e-05, + "step": 12621, + "training_step_time": 0.10804581642150879 + }, + { + "epoch": 1.92596435546875e-05, + "model_forward_time": 0.024890422821044922, + "step": 12622 + }, + { + "epoch": 1.92596435546875e-05, + "step": 12622, + "training_step_time": 0.10812187194824219 + }, + { + "epoch": 1.926116943359375e-05, + "model_forward_time": 0.02484297752380371, + "step": 12623 + }, + { + "epoch": 1.926116943359375e-05, + "step": 12623, + "training_step_time": 0.10878157615661621 + }, + { + "epoch": 1.92626953125e-05, + "model_forward_time": 0.025377511978149414, + "step": 12624 + }, + { + "epoch": 1.92626953125e-05, + "step": 12624, + "training_step_time": 0.10845375061035156 + }, + { + "epoch": 1.926422119140625e-05, + "model_forward_time": 0.02545309066772461, + "step": 12625 + }, + { + "epoch": 1.926422119140625e-05, + "step": 12625, + "training_step_time": 0.10585904121398926 + }, + { + "epoch": 1.92657470703125e-05, + "model_forward_time": 0.025503158569335938, + "step": 12626 + }, + { + "epoch": 1.92657470703125e-05, + "step": 12626, + "training_step_time": 0.10647988319396973 + }, + { + "epoch": 1.926727294921875e-05, + "model_forward_time": 0.025379419326782227, + "step": 12627 + }, + { + "epoch": 1.926727294921875e-05, + "step": 12627, + "training_step_time": 0.10555768013000488 + }, + { + "epoch": 1.9268798828125e-05, + "model_forward_time": 0.025585651397705078, + "step": 12628 + }, + { + "epoch": 1.9268798828125e-05, + "step": 12628, + "training_step_time": 0.10747885704040527 + }, + { + "epoch": 1.927032470703125e-05, + "model_forward_time": 0.025168418884277344, + "step": 12629 + }, + { + "epoch": 1.927032470703125e-05, + "step": 12629, + "training_step_time": 0.1109018325805664 + }, + { + "epoch": 1.92718505859375e-05, + "grad_norm": 0.2618494927883148, + "learning_rate": 6.685908699762002e-05, + "loss": 0.0129, + "step": 12630 + }, + { + "epoch": 1.92718505859375e-05, + "model_forward_time": 0.02467060089111328, + "step": 12630 + }, + { + "epoch": 1.92718505859375e-05, + "step": 12630, + "training_step_time": 0.10836100578308105 + }, + { + "epoch": 1.927337646484375e-05, + "model_forward_time": 0.025249242782592773, + "step": 12631 + }, + { + "epoch": 1.927337646484375e-05, + "step": 12631, + "training_step_time": 0.10479235649108887 + }, + { + "epoch": 1.927490234375e-05, + "model_forward_time": 0.02525615692138672, + "step": 12632 + }, + { + "epoch": 1.927490234375e-05, + "step": 12632, + "training_step_time": 0.10565781593322754 + }, + { + "epoch": 1.927642822265625e-05, + "model_forward_time": 0.025009870529174805, + "step": 12633 + }, + { + "epoch": 1.927642822265625e-05, + "step": 12633, + "training_step_time": 0.10599398612976074 + }, + { + "epoch": 1.92779541015625e-05, + "model_forward_time": 0.0249631404876709, + "step": 12634 + }, + { + "epoch": 1.92779541015625e-05, + "step": 12634, + "training_step_time": 0.10808539390563965 + }, + { + "epoch": 1.927947998046875e-05, + "model_forward_time": 0.025622844696044922, + "step": 12635 + }, + { + "epoch": 1.927947998046875e-05, + "step": 12635, + "training_step_time": 0.10614228248596191 + }, + { + "epoch": 1.9281005859375e-05, + "model_forward_time": 0.025485515594482422, + "step": 12636 + }, + { + "epoch": 1.9281005859375e-05, + "step": 12636, + "training_step_time": 0.1859593391418457 + }, + { + "epoch": 1.928253173828125e-05, + "model_forward_time": 0.024666786193847656, + "step": 12637 + }, + { + "epoch": 1.928253173828125e-05, + "step": 12637, + "training_step_time": 0.11871767044067383 + }, + { + "epoch": 1.92840576171875e-05, + "model_forward_time": 0.024733304977416992, + "step": 12638 + }, + { + "epoch": 1.92840576171875e-05, + "step": 12638, + "training_step_time": 0.10988140106201172 + }, + { + "epoch": 1.928558349609375e-05, + "model_forward_time": 0.024190187454223633, + "step": 12639 + }, + { + "epoch": 1.928558349609375e-05, + "step": 12639, + "training_step_time": 0.11555123329162598 + }, + { + "epoch": 1.9287109375e-05, + "grad_norm": 0.6126453280448914, + "learning_rate": 6.680718870133156e-05, + "loss": 0.0267, + "step": 12640 + }, + { + "epoch": 1.9287109375e-05, + "model_forward_time": 0.024391889572143555, + "step": 12640 + }, + { + "epoch": 1.9287109375e-05, + "step": 12640, + "training_step_time": 0.1274721622467041 + }, + { + "epoch": 1.928863525390625e-05, + "model_forward_time": 0.02519989013671875, + "step": 12641 + }, + { + "epoch": 1.928863525390625e-05, + "step": 12641, + "training_step_time": 0.1184241771697998 + }, + { + "epoch": 1.92901611328125e-05, + "model_forward_time": 0.024925947189331055, + "step": 12642 + }, + { + "epoch": 1.92901611328125e-05, + "step": 12642, + "training_step_time": 0.11610698699951172 + }, + { + "epoch": 1.929168701171875e-05, + "model_forward_time": 0.025300025939941406, + "step": 12643 + }, + { + "epoch": 1.929168701171875e-05, + "step": 12643, + "training_step_time": 0.10991692543029785 + }, + { + "epoch": 1.9293212890625e-05, + "model_forward_time": 0.025727272033691406, + "step": 12644 + }, + { + "epoch": 1.9293212890625e-05, + "step": 12644, + "training_step_time": 0.10796976089477539 + }, + { + "epoch": 1.929473876953125e-05, + "model_forward_time": 0.025136470794677734, + "step": 12645 + }, + { + "epoch": 1.929473876953125e-05, + "step": 12645, + "training_step_time": 0.15371084213256836 + }, + { + "epoch": 1.92962646484375e-05, + "model_forward_time": 0.02484273910522461, + "step": 12646 + }, + { + "epoch": 1.92962646484375e-05, + "step": 12646, + "training_step_time": 0.11310195922851562 + }, + { + "epoch": 1.929779052734375e-05, + "model_forward_time": 0.025235652923583984, + "step": 12647 + }, + { + "epoch": 1.929779052734375e-05, + "step": 12647, + "training_step_time": 0.2005300521850586 + }, + { + "epoch": 1.929931640625e-05, + "model_forward_time": 0.024328947067260742, + "step": 12648 + }, + { + "epoch": 1.929931640625e-05, + "step": 12648, + "training_step_time": 0.18968605995178223 + }, + { + "epoch": 1.930084228515625e-05, + "model_forward_time": 0.02456045150756836, + "step": 12649 + }, + { + "epoch": 1.930084228515625e-05, + "step": 12649, + "training_step_time": 0.15802597999572754 + }, + { + "epoch": 1.93023681640625e-05, + "grad_norm": 0.24479682743549347, + "learning_rate": 6.675526998272405e-05, + "loss": 0.0157, + "step": 12650 + }, + { + "epoch": 1.93023681640625e-05, + "model_forward_time": 0.0250399112701416, + "step": 12650 + }, + { + "epoch": 1.93023681640625e-05, + "step": 12650, + "training_step_time": 0.17844200134277344 + }, + { + "epoch": 1.930389404296875e-05, + "model_forward_time": 0.024821996688842773, + "step": 12651 + }, + { + "epoch": 1.930389404296875e-05, + "step": 12651, + "training_step_time": 0.10304903984069824 + }, + { + "epoch": 1.9305419921875e-05, + "model_forward_time": 0.025290727615356445, + "step": 12652 + }, + { + "epoch": 1.9305419921875e-05, + "step": 12652, + "training_step_time": 0.10643911361694336 + }, + { + "epoch": 1.930694580078125e-05, + "model_forward_time": 0.02562713623046875, + "step": 12653 + }, + { + "epoch": 1.930694580078125e-05, + "step": 12653, + "training_step_time": 0.18957948684692383 + }, + { + "epoch": 1.93084716796875e-05, + "model_forward_time": 0.024721622467041016, + "step": 12654 + }, + { + "epoch": 1.93084716796875e-05, + "step": 12654, + "training_step_time": 0.11517858505249023 + }, + { + "epoch": 1.930999755859375e-05, + "model_forward_time": 0.024723291397094727, + "step": 12655 + }, + { + "epoch": 1.930999755859375e-05, + "step": 12655, + "training_step_time": 0.11389827728271484 + }, + { + "epoch": 1.93115234375e-05, + "model_forward_time": 0.0255429744720459, + "step": 12656 + }, + { + "epoch": 1.93115234375e-05, + "step": 12656, + "training_step_time": 0.12598848342895508 + }, + { + "epoch": 1.931304931640625e-05, + "model_forward_time": 0.025797605514526367, + "step": 12657 + }, + { + "epoch": 1.931304931640625e-05, + "step": 12657, + "training_step_time": 0.1098930835723877 + }, + { + "epoch": 1.93145751953125e-05, + "model_forward_time": 0.025637388229370117, + "step": 12658 + }, + { + "epoch": 1.93145751953125e-05, + "step": 12658, + "training_step_time": 0.11049389839172363 + }, + { + "epoch": 1.931610107421875e-05, + "model_forward_time": 0.025377273559570312, + "step": 12659 + }, + { + "epoch": 1.931610107421875e-05, + "step": 12659, + "training_step_time": 0.1252450942993164 + }, + { + "epoch": 1.9317626953125e-05, + "grad_norm": 0.3344530165195465, + "learning_rate": 6.670333090488356e-05, + "loss": 0.0267, + "step": 12660 + }, + { + "epoch": 1.9317626953125e-05, + "model_forward_time": 0.025220155715942383, + "step": 12660 + }, + { + "epoch": 1.9317626953125e-05, + "step": 12660, + "training_step_time": 0.11451268196105957 + }, + { + "epoch": 1.931915283203125e-05, + "model_forward_time": 0.025136232376098633, + "step": 12661 + }, + { + "epoch": 1.931915283203125e-05, + "step": 12661, + "training_step_time": 0.10765194892883301 + }, + { + "epoch": 1.93206787109375e-05, + "model_forward_time": 0.025594472885131836, + "step": 12662 + }, + { + "epoch": 1.93206787109375e-05, + "step": 12662, + "training_step_time": 0.10930633544921875 + }, + { + "epoch": 1.932220458984375e-05, + "model_forward_time": 0.025304317474365234, + "step": 12663 + }, + { + "epoch": 1.932220458984375e-05, + "step": 12663, + "training_step_time": 0.10629510879516602 + }, + { + "epoch": 1.932373046875e-05, + "model_forward_time": 0.02521800994873047, + "step": 12664 + }, + { + "epoch": 1.932373046875e-05, + "step": 12664, + "training_step_time": 0.1077733039855957 + }, + { + "epoch": 1.932525634765625e-05, + "model_forward_time": 0.027035951614379883, + "step": 12665 + }, + { + "epoch": 1.932525634765625e-05, + "step": 12665, + "training_step_time": 0.10764312744140625 + }, + { + "epoch": 1.93267822265625e-05, + "model_forward_time": 0.025460004806518555, + "step": 12666 + }, + { + "epoch": 1.93267822265625e-05, + "step": 12666, + "training_step_time": 0.11117243766784668 + }, + { + "epoch": 1.932830810546875e-05, + "model_forward_time": 0.02533864974975586, + "step": 12667 + }, + { + "epoch": 1.932830810546875e-05, + "step": 12667, + "training_step_time": 0.10635733604431152 + }, + { + "epoch": 1.9329833984375e-05, + "model_forward_time": 0.025207042694091797, + "step": 12668 + }, + { + "epoch": 1.9329833984375e-05, + "step": 12668, + "training_step_time": 0.11119651794433594 + }, + { + "epoch": 1.933135986328125e-05, + "model_forward_time": 0.024214506149291992, + "step": 12669 + }, + { + "epoch": 1.933135986328125e-05, + "step": 12669, + "training_step_time": 0.10557246208190918 + }, + { + "epoch": 1.93328857421875e-05, + "grad_norm": 0.387548565864563, + "learning_rate": 6.6651371530921e-05, + "loss": 0.0176, + "step": 12670 + }, + { + "epoch": 1.93328857421875e-05, + "model_forward_time": 0.02405834197998047, + "step": 12670 + }, + { + "epoch": 1.93328857421875e-05, + "step": 12670, + "training_step_time": 0.10777544975280762 + }, + { + "epoch": 1.933441162109375e-05, + "model_forward_time": 0.024535417556762695, + "step": 12671 + }, + { + "epoch": 1.933441162109375e-05, + "step": 12671, + "training_step_time": 0.10795474052429199 + }, + { + "epoch": 1.93359375e-05, + "model_forward_time": 0.025798559188842773, + "step": 12672 + }, + { + "epoch": 1.93359375e-05, + "step": 12672, + "training_step_time": 0.10553407669067383 + }, + { + "epoch": 1.933746337890625e-05, + "model_forward_time": 0.02546238899230957, + "step": 12673 + }, + { + "epoch": 1.933746337890625e-05, + "step": 12673, + "training_step_time": 0.10803055763244629 + }, + { + "epoch": 1.93389892578125e-05, + "model_forward_time": 0.025255441665649414, + "step": 12674 + }, + { + "epoch": 1.93389892578125e-05, + "step": 12674, + "training_step_time": 0.10548663139343262 + }, + { + "epoch": 1.934051513671875e-05, + "model_forward_time": 0.025194644927978516, + "step": 12675 + }, + { + "epoch": 1.934051513671875e-05, + "step": 12675, + "training_step_time": 0.10522794723510742 + }, + { + "epoch": 1.9342041015625e-05, + "model_forward_time": 0.02551436424255371, + "step": 12676 + }, + { + "epoch": 1.9342041015625e-05, + "step": 12676, + "training_step_time": 0.10499906539916992 + }, + { + "epoch": 1.934356689453125e-05, + "model_forward_time": 0.025441408157348633, + "step": 12677 + }, + { + "epoch": 1.934356689453125e-05, + "step": 12677, + "training_step_time": 0.10617899894714355 + }, + { + "epoch": 1.93450927734375e-05, + "model_forward_time": 0.025286436080932617, + "step": 12678 + }, + { + "epoch": 1.93450927734375e-05, + "step": 12678, + "training_step_time": 0.10955572128295898 + }, + { + "epoch": 1.934661865234375e-05, + "model_forward_time": 0.02510809898376465, + "step": 12679 + }, + { + "epoch": 1.934661865234375e-05, + "step": 12679, + "training_step_time": 0.10473370552062988 + }, + { + "epoch": 1.934814453125e-05, + "grad_norm": 0.23371650278568268, + "learning_rate": 6.659939192397192e-05, + "loss": 0.0208, + "step": 12680 + }, + { + "epoch": 1.934814453125e-05, + "model_forward_time": 0.024955272674560547, + "step": 12680 + }, + { + "epoch": 1.934814453125e-05, + "step": 12680, + "training_step_time": 0.10444879531860352 + }, + { + "epoch": 1.934967041015625e-05, + "model_forward_time": 0.02525162696838379, + "step": 12681 + }, + { + "epoch": 1.934967041015625e-05, + "step": 12681, + "training_step_time": 0.10809993743896484 + }, + { + "epoch": 1.93511962890625e-05, + "model_forward_time": 0.027800321578979492, + "step": 12682 + }, + { + "epoch": 1.93511962890625e-05, + "step": 12682, + "training_step_time": 0.10878992080688477 + }, + { + "epoch": 1.935272216796875e-05, + "model_forward_time": 0.02567887306213379, + "step": 12683 + }, + { + "epoch": 1.935272216796875e-05, + "step": 12683, + "training_step_time": 0.13672995567321777 + }, + { + "epoch": 1.9354248046875e-05, + "model_forward_time": 0.025494813919067383, + "step": 12684 + }, + { + "epoch": 1.9354248046875e-05, + "step": 12684, + "training_step_time": 0.1049489974975586 + }, + { + "epoch": 1.935577392578125e-05, + "model_forward_time": 0.025285959243774414, + "step": 12685 + }, + { + "epoch": 1.935577392578125e-05, + "step": 12685, + "training_step_time": 0.10925078392028809 + }, + { + "epoch": 1.93572998046875e-05, + "model_forward_time": 0.025732994079589844, + "step": 12686 + }, + { + "epoch": 1.93572998046875e-05, + "step": 12686, + "training_step_time": 0.12573981285095215 + }, + { + "epoch": 1.935882568359375e-05, + "model_forward_time": 0.025373458862304688, + "step": 12687 + }, + { + "epoch": 1.935882568359375e-05, + "step": 12687, + "training_step_time": 0.12750506401062012 + }, + { + "epoch": 1.93603515625e-05, + "model_forward_time": 0.02517080307006836, + "step": 12688 + }, + { + "epoch": 1.93603515625e-05, + "step": 12688, + "training_step_time": 0.11224007606506348 + }, + { + "epoch": 1.936187744140625e-05, + "model_forward_time": 0.02629232406616211, + "step": 12689 + }, + { + "epoch": 1.936187744140625e-05, + "step": 12689, + "training_step_time": 0.12698793411254883 + }, + { + "epoch": 1.93634033203125e-05, + "grad_norm": 0.3474064767360687, + "learning_rate": 6.654739214719641e-05, + "loss": 0.0207, + "step": 12690 + }, + { + "epoch": 1.93634033203125e-05, + "model_forward_time": 0.02561163902282715, + "step": 12690 + }, + { + "epoch": 1.93634033203125e-05, + "step": 12690, + "training_step_time": 0.10907292366027832 + }, + { + "epoch": 1.936492919921875e-05, + "model_forward_time": 0.025131702423095703, + "step": 12691 + }, + { + "epoch": 1.936492919921875e-05, + "step": 12691, + "training_step_time": 0.10292792320251465 + }, + { + "epoch": 1.9366455078125e-05, + "model_forward_time": 0.024382591247558594, + "step": 12692 + }, + { + "epoch": 1.9366455078125e-05, + "step": 12692, + "training_step_time": 0.15087056159973145 + }, + { + "epoch": 1.936798095703125e-05, + "model_forward_time": 0.024495363235473633, + "step": 12693 + }, + { + "epoch": 1.936798095703125e-05, + "step": 12693, + "training_step_time": 0.19289565086364746 + }, + { + "epoch": 1.93695068359375e-05, + "model_forward_time": 0.02473306655883789, + "step": 12694 + }, + { + "epoch": 1.93695068359375e-05, + "step": 12694, + "training_step_time": 0.21766972541809082 + }, + { + "epoch": 1.937103271484375e-05, + "model_forward_time": 0.024669408798217773, + "step": 12695 + }, + { + "epoch": 1.937103271484375e-05, + "step": 12695, + "training_step_time": 0.15679335594177246 + }, + { + "epoch": 1.937255859375e-05, + "model_forward_time": 0.02606940269470215, + "step": 12696 + }, + { + "epoch": 1.937255859375e-05, + "step": 12696, + "training_step_time": 0.11690235137939453 + }, + { + "epoch": 1.937408447265625e-05, + "model_forward_time": 0.024926185607910156, + "step": 12697 + }, + { + "epoch": 1.937408447265625e-05, + "step": 12697, + "training_step_time": 0.11148786544799805 + }, + { + "epoch": 1.93756103515625e-05, + "model_forward_time": 0.025763750076293945, + "step": 12698 + }, + { + "epoch": 1.93756103515625e-05, + "step": 12698, + "training_step_time": 0.19720196723937988 + }, + { + "epoch": 1.937713623046875e-05, + "model_forward_time": 0.025014638900756836, + "step": 12699 + }, + { + "epoch": 1.937713623046875e-05, + "step": 12699, + "training_step_time": 0.10392260551452637 + }, + { + "epoch": 1.9378662109375e-05, + "grad_norm": 0.44094225764274597, + "learning_rate": 6.649537226377915e-05, + "loss": 0.0189, + "step": 12700 + }, + { + "epoch": 1.9378662109375e-05, + "model_forward_time": 0.024032115936279297, + "step": 12700 + }, + { + "epoch": 1.9378662109375e-05, + "step": 12700, + "training_step_time": 0.1922895908355713 + }, + { + "epoch": 1.938018798828125e-05, + "model_forward_time": 0.024451494216918945, + "step": 12701 + }, + { + "epoch": 1.938018798828125e-05, + "step": 12701, + "training_step_time": 0.12868714332580566 + }, + { + "epoch": 1.93817138671875e-05, + "model_forward_time": 0.025012969970703125, + "step": 12702 + }, + { + "epoch": 1.93817138671875e-05, + "step": 12702, + "training_step_time": 0.1278972625732422 + }, + { + "epoch": 1.938323974609375e-05, + "model_forward_time": 0.024950504302978516, + "step": 12703 + }, + { + "epoch": 1.938323974609375e-05, + "step": 12703, + "training_step_time": 0.11070775985717773 + }, + { + "epoch": 1.9384765625e-05, + "model_forward_time": 0.025105953216552734, + "step": 12704 + }, + { + "epoch": 1.9384765625e-05, + "step": 12704, + "training_step_time": 0.1747570037841797 + }, + { + "epoch": 1.938629150390625e-05, + "model_forward_time": 0.02465200424194336, + "step": 12705 + }, + { + "epoch": 1.938629150390625e-05, + "step": 12705, + "training_step_time": 0.13484907150268555 + }, + { + "epoch": 1.93878173828125e-05, + "model_forward_time": 0.02415299415588379, + "step": 12706 + }, + { + "epoch": 1.93878173828125e-05, + "step": 12706, + "training_step_time": 0.11393952369689941 + }, + { + "epoch": 1.938934326171875e-05, + "model_forward_time": 0.025377750396728516, + "step": 12707 + }, + { + "epoch": 1.938934326171875e-05, + "step": 12707, + "training_step_time": 0.10611414909362793 + }, + { + "epoch": 1.9390869140625e-05, + "model_forward_time": 0.02612018585205078, + "step": 12708 + }, + { + "epoch": 1.9390869140625e-05, + "step": 12708, + "training_step_time": 0.17444610595703125 + }, + { + "epoch": 1.939239501953125e-05, + "model_forward_time": 0.026726245880126953, + "step": 12709 + }, + { + "epoch": 1.939239501953125e-05, + "step": 12709, + "training_step_time": 0.19596290588378906 + }, + { + "epoch": 1.93939208984375e-05, + "grad_norm": 0.510518491268158, + "learning_rate": 6.644333233692916e-05, + "loss": 0.0166, + "step": 12710 + }, + { + "epoch": 1.93939208984375e-05, + "model_forward_time": 0.02432847023010254, + "step": 12710 + }, + { + "epoch": 1.93939208984375e-05, + "step": 12710, + "training_step_time": 0.1941695213317871 + }, + { + "epoch": 1.939544677734375e-05, + "model_forward_time": 0.02467179298400879, + "step": 12711 + }, + { + "epoch": 1.939544677734375e-05, + "step": 12711, + "training_step_time": 0.2008965015411377 + }, + { + "epoch": 1.939697265625e-05, + "model_forward_time": 0.024685382843017578, + "step": 12712 + }, + { + "epoch": 1.939697265625e-05, + "step": 12712, + "training_step_time": 0.18499112129211426 + }, + { + "epoch": 1.939849853515625e-05, + "model_forward_time": 0.024480581283569336, + "step": 12713 + }, + { + "epoch": 1.939849853515625e-05, + "step": 12713, + "training_step_time": 0.16906523704528809 + }, + { + "epoch": 1.94000244140625e-05, + "model_forward_time": 0.028041601181030273, + "step": 12714 + }, + { + "epoch": 1.94000244140625e-05, + "step": 12714, + "training_step_time": 0.16216421127319336 + }, + { + "epoch": 1.940155029296875e-05, + "model_forward_time": 0.024445295333862305, + "step": 12715 + }, + { + "epoch": 1.940155029296875e-05, + "step": 12715, + "training_step_time": 0.15805435180664062 + }, + { + "epoch": 1.9403076171875e-05, + "model_forward_time": 0.024680614471435547, + "step": 12716 + }, + { + "epoch": 1.9403076171875e-05, + "step": 12716, + "training_step_time": 0.13758087158203125 + }, + { + "epoch": 1.940460205078125e-05, + "model_forward_time": 0.024912595748901367, + "step": 12717 + }, + { + "epoch": 1.940460205078125e-05, + "step": 12717, + "training_step_time": 0.13077497482299805 + }, + { + "epoch": 1.94061279296875e-05, + "model_forward_time": 0.02513718605041504, + "step": 12718 + }, + { + "epoch": 1.94061279296875e-05, + "step": 12718, + "training_step_time": 0.1228799819946289 + }, + { + "epoch": 1.940765380859375e-05, + "model_forward_time": 0.025346994400024414, + "step": 12719 + }, + { + "epoch": 1.940765380859375e-05, + "step": 12719, + "training_step_time": 0.11973333358764648 + }, + { + "epoch": 1.94091796875e-05, + "grad_norm": 0.3286479115486145, + "learning_rate": 6.639127242987988e-05, + "loss": 0.0189, + "step": 12720 + }, + { + "epoch": 1.94091796875e-05, + "model_forward_time": 0.025264501571655273, + "step": 12720 + }, + { + "epoch": 1.94091796875e-05, + "step": 12720, + "training_step_time": 0.11438894271850586 + }, + { + "epoch": 1.941070556640625e-05, + "model_forward_time": 0.028886795043945312, + "step": 12721 + }, + { + "epoch": 1.941070556640625e-05, + "step": 12721, + "training_step_time": 0.11502599716186523 + }, + { + "epoch": 1.94122314453125e-05, + "model_forward_time": 0.02595210075378418, + "step": 12722 + }, + { + "epoch": 1.94122314453125e-05, + "step": 12722, + "training_step_time": 0.12808537483215332 + }, + { + "epoch": 1.941375732421875e-05, + "model_forward_time": 0.025502920150756836, + "step": 12723 + }, + { + "epoch": 1.941375732421875e-05, + "step": 12723, + "training_step_time": 0.16045022010803223 + }, + { + "epoch": 1.9415283203125e-05, + "model_forward_time": 0.025359153747558594, + "step": 12724 + }, + { + "epoch": 1.9415283203125e-05, + "step": 12724, + "training_step_time": 0.11385941505432129 + }, + { + "epoch": 1.941680908203125e-05, + "model_forward_time": 0.025402545928955078, + "step": 12725 + }, + { + "epoch": 1.941680908203125e-05, + "step": 12725, + "training_step_time": 0.10817670822143555 + }, + { + "epoch": 1.94183349609375e-05, + "model_forward_time": 0.02525019645690918, + "step": 12726 + }, + { + "epoch": 1.94183349609375e-05, + "step": 12726, + "training_step_time": 0.12291574478149414 + }, + { + "epoch": 1.941986083984375e-05, + "model_forward_time": 0.025476694107055664, + "step": 12727 + }, + { + "epoch": 1.941986083984375e-05, + "step": 12727, + "training_step_time": 0.12531304359436035 + }, + { + "epoch": 1.942138671875e-05, + "model_forward_time": 0.025182247161865234, + "step": 12728 + }, + { + "epoch": 1.942138671875e-05, + "step": 12728, + "training_step_time": 0.11971092224121094 + }, + { + "epoch": 1.942291259765625e-05, + "model_forward_time": 0.025836944580078125, + "step": 12729 + }, + { + "epoch": 1.942291259765625e-05, + "step": 12729, + "training_step_time": 0.11362171173095703 + }, + { + "epoch": 1.94244384765625e-05, + "grad_norm": 0.4129391610622406, + "learning_rate": 6.6339192605889e-05, + "loss": 0.017, + "step": 12730 + }, + { + "epoch": 1.94244384765625e-05, + "model_forward_time": 0.02591848373413086, + "step": 12730 + }, + { + "epoch": 1.94244384765625e-05, + "step": 12730, + "training_step_time": 0.10706138610839844 + }, + { + "epoch": 1.942596435546875e-05, + "model_forward_time": 0.02567744255065918, + "step": 12731 + }, + { + "epoch": 1.942596435546875e-05, + "step": 12731, + "training_step_time": 0.10634374618530273 + }, + { + "epoch": 1.9427490234375e-05, + "model_forward_time": 0.025088071823120117, + "step": 12732 + }, + { + "epoch": 1.9427490234375e-05, + "step": 12732, + "training_step_time": 0.1469581127166748 + }, + { + "epoch": 1.942901611328125e-05, + "model_forward_time": 0.024851560592651367, + "step": 12733 + }, + { + "epoch": 1.942901611328125e-05, + "step": 12733, + "training_step_time": 0.17246031761169434 + }, + { + "epoch": 1.94305419921875e-05, + "model_forward_time": 0.025064468383789062, + "step": 12734 + }, + { + "epoch": 1.94305419921875e-05, + "step": 12734, + "training_step_time": 0.13810062408447266 + }, + { + "epoch": 1.943206787109375e-05, + "model_forward_time": 0.025342464447021484, + "step": 12735 + }, + { + "epoch": 1.943206787109375e-05, + "step": 12735, + "training_step_time": 0.10831689834594727 + }, + { + "epoch": 1.943359375e-05, + "model_forward_time": 0.026210784912109375, + "step": 12736 + }, + { + "epoch": 1.943359375e-05, + "step": 12736, + "training_step_time": 0.19332408905029297 + }, + { + "epoch": 1.943511962890625e-05, + "model_forward_time": 0.024960756301879883, + "step": 12737 + }, + { + "epoch": 1.943511962890625e-05, + "step": 12737, + "training_step_time": 0.14487051963806152 + }, + { + "epoch": 1.94366455078125e-05, + "model_forward_time": 0.02463984489440918, + "step": 12738 + }, + { + "epoch": 1.94366455078125e-05, + "step": 12738, + "training_step_time": 0.10111284255981445 + }, + { + "epoch": 1.943817138671875e-05, + "model_forward_time": 0.025391340255737305, + "step": 12739 + }, + { + "epoch": 1.943817138671875e-05, + "step": 12739, + "training_step_time": 0.10775971412658691 + }, + { + "epoch": 1.9439697265625e-05, + "grad_norm": 0.3393997251987457, + "learning_rate": 6.628709292823844e-05, + "loss": 0.0206, + "step": 12740 + }, + { + "epoch": 1.9439697265625e-05, + "model_forward_time": 0.0257875919342041, + "step": 12740 + }, + { + "epoch": 1.9439697265625e-05, + "step": 12740, + "training_step_time": 0.10635638236999512 + }, + { + "epoch": 1.944122314453125e-05, + "model_forward_time": 0.02565622329711914, + "step": 12741 + }, + { + "epoch": 1.944122314453125e-05, + "step": 12741, + "training_step_time": 0.20505642890930176 + }, + { + "epoch": 1.94427490234375e-05, + "model_forward_time": 0.025096893310546875, + "step": 12742 + }, + { + "epoch": 1.94427490234375e-05, + "step": 12742, + "training_step_time": 0.12585091590881348 + }, + { + "epoch": 1.944427490234375e-05, + "model_forward_time": 0.024597883224487305, + "step": 12743 + }, + { + "epoch": 1.944427490234375e-05, + "step": 12743, + "training_step_time": 0.12032365798950195 + }, + { + "epoch": 1.944580078125e-05, + "model_forward_time": 0.025203943252563477, + "step": 12744 + }, + { + "epoch": 1.944580078125e-05, + "step": 12744, + "training_step_time": 0.13687372207641602 + }, + { + "epoch": 1.944732666015625e-05, + "model_forward_time": 0.024367332458496094, + "step": 12745 + }, + { + "epoch": 1.944732666015625e-05, + "step": 12745, + "training_step_time": 0.12261390686035156 + }, + { + "epoch": 1.94488525390625e-05, + "model_forward_time": 0.025084733963012695, + "step": 12746 + }, + { + "epoch": 1.94488525390625e-05, + "step": 12746, + "training_step_time": 0.12470579147338867 + }, + { + "epoch": 1.945037841796875e-05, + "model_forward_time": 0.0253145694732666, + "step": 12747 + }, + { + "epoch": 1.945037841796875e-05, + "step": 12747, + "training_step_time": 0.10892963409423828 + }, + { + "epoch": 1.9451904296875e-05, + "model_forward_time": 0.02574634552001953, + "step": 12748 + }, + { + "epoch": 1.9451904296875e-05, + "step": 12748, + "training_step_time": 0.10658383369445801 + }, + { + "epoch": 1.945343017578125e-05, + "model_forward_time": 0.025261402130126953, + "step": 12749 + }, + { + "epoch": 1.945343017578125e-05, + "step": 12749, + "training_step_time": 0.1090092658996582 + }, + { + "epoch": 1.94549560546875e-05, + "grad_norm": 0.28004905581474304, + "learning_rate": 6.623497346023418e-05, + "loss": 0.0123, + "step": 12750 + }, + { + "epoch": 1.94549560546875e-05, + "model_forward_time": 0.02552509307861328, + "step": 12750 + }, + { + "epoch": 1.94549560546875e-05, + "step": 12750, + "training_step_time": 0.10732436180114746 + }, + { + "epoch": 1.945648193359375e-05, + "model_forward_time": 0.025625228881835938, + "step": 12751 + }, + { + "epoch": 1.945648193359375e-05, + "step": 12751, + "training_step_time": 0.11023306846618652 + }, + { + "epoch": 1.94580078125e-05, + "model_forward_time": 0.025531291961669922, + "step": 12752 + }, + { + "epoch": 1.94580078125e-05, + "step": 12752, + "training_step_time": 0.10707283020019531 + }, + { + "epoch": 1.945953369140625e-05, + "model_forward_time": 0.025151491165161133, + "step": 12753 + }, + { + "epoch": 1.945953369140625e-05, + "step": 12753, + "training_step_time": 0.11092758178710938 + }, + { + "epoch": 1.94610595703125e-05, + "model_forward_time": 0.025544166564941406, + "step": 12754 + }, + { + "epoch": 1.94610595703125e-05, + "step": 12754, + "training_step_time": 0.10715937614440918 + }, + { + "epoch": 1.946258544921875e-05, + "model_forward_time": 0.025353431701660156, + "step": 12755 + }, + { + "epoch": 1.946258544921875e-05, + "step": 12755, + "training_step_time": 0.10713982582092285 + }, + { + "epoch": 1.9464111328125e-05, + "model_forward_time": 0.024909019470214844, + "step": 12756 + }, + { + "epoch": 1.9464111328125e-05, + "step": 12756, + "training_step_time": 0.1053171157836914 + }, + { + "epoch": 1.946563720703125e-05, + "model_forward_time": 0.02499699592590332, + "step": 12757 + }, + { + "epoch": 1.946563720703125e-05, + "step": 12757, + "training_step_time": 0.10581326484680176 + }, + { + "epoch": 1.94671630859375e-05, + "model_forward_time": 0.027768850326538086, + "step": 12758 + }, + { + "epoch": 1.94671630859375e-05, + "step": 12758, + "training_step_time": 0.10840296745300293 + }, + { + "epoch": 1.946868896484375e-05, + "model_forward_time": 0.025197267532348633, + "step": 12759 + }, + { + "epoch": 1.946868896484375e-05, + "step": 12759, + "training_step_time": 0.1060795783996582 + }, + { + "epoch": 1.947021484375e-05, + "grad_norm": 0.44285303354263306, + "learning_rate": 6.61828342652063e-05, + "loss": 0.0274, + "step": 12760 + }, + { + "epoch": 1.947021484375e-05, + "model_forward_time": 0.025188922882080078, + "step": 12760 + }, + { + "epoch": 1.947021484375e-05, + "step": 12760, + "training_step_time": 0.10631084442138672 + }, + { + "epoch": 1.947174072265625e-05, + "model_forward_time": 0.02573227882385254, + "step": 12761 + }, + { + "epoch": 1.947174072265625e-05, + "step": 12761, + "training_step_time": 0.10662961006164551 + }, + { + "epoch": 1.94732666015625e-05, + "model_forward_time": 0.025211095809936523, + "step": 12762 + }, + { + "epoch": 1.94732666015625e-05, + "step": 12762, + "training_step_time": 0.10442638397216797 + }, + { + "epoch": 1.947479248046875e-05, + "model_forward_time": 0.025521278381347656, + "step": 12763 + }, + { + "epoch": 1.947479248046875e-05, + "step": 12763, + "training_step_time": 0.10545754432678223 + }, + { + "epoch": 1.9476318359375e-05, + "model_forward_time": 0.02527928352355957, + "step": 12764 + }, + { + "epoch": 1.9476318359375e-05, + "step": 12764, + "training_step_time": 0.10514378547668457 + }, + { + "epoch": 1.947784423828125e-05, + "model_forward_time": 0.025167465209960938, + "step": 12765 + }, + { + "epoch": 1.947784423828125e-05, + "step": 12765, + "training_step_time": 0.10724425315856934 + }, + { + "epoch": 1.94793701171875e-05, + "model_forward_time": 0.025214195251464844, + "step": 12766 + }, + { + "epoch": 1.94793701171875e-05, + "step": 12766, + "training_step_time": 0.1123056411743164 + }, + { + "epoch": 1.948089599609375e-05, + "model_forward_time": 0.025623559951782227, + "step": 12767 + }, + { + "epoch": 1.948089599609375e-05, + "step": 12767, + "training_step_time": 0.11095952987670898 + }, + { + "epoch": 1.9482421875e-05, + "model_forward_time": 0.025394201278686523, + "step": 12768 + }, + { + "epoch": 1.9482421875e-05, + "step": 12768, + "training_step_time": 0.11238384246826172 + }, + { + "epoch": 1.948394775390625e-05, + "model_forward_time": 0.025046110153198242, + "step": 12769 + }, + { + "epoch": 1.948394775390625e-05, + "step": 12769, + "training_step_time": 0.11199808120727539 + }, + { + "epoch": 1.94854736328125e-05, + "grad_norm": 0.2459578514099121, + "learning_rate": 6.613067540650886e-05, + "loss": 0.0272, + "step": 12770 + }, + { + "epoch": 1.94854736328125e-05, + "model_forward_time": 0.02509021759033203, + "step": 12770 + }, + { + "epoch": 1.94854736328125e-05, + "step": 12770, + "training_step_time": 0.17148971557617188 + }, + { + "epoch": 1.948699951171875e-05, + "model_forward_time": 0.02474212646484375, + "step": 12771 + }, + { + "epoch": 1.948699951171875e-05, + "step": 12771, + "training_step_time": 0.1065976619720459 + }, + { + "epoch": 1.9488525390625e-05, + "model_forward_time": 0.024770021438598633, + "step": 12772 + }, + { + "epoch": 1.9488525390625e-05, + "step": 12772, + "training_step_time": 0.11105108261108398 + }, + { + "epoch": 1.949005126953125e-05, + "model_forward_time": 0.02567124366760254, + "step": 12773 + }, + { + "epoch": 1.949005126953125e-05, + "step": 12773, + "training_step_time": 0.13022065162658691 + }, + { + "epoch": 1.94915771484375e-05, + "model_forward_time": 0.0250551700592041, + "step": 12774 + }, + { + "epoch": 1.94915771484375e-05, + "step": 12774, + "training_step_time": 0.12306451797485352 + }, + { + "epoch": 1.949310302734375e-05, + "model_forward_time": 0.025131702423095703, + "step": 12775 + }, + { + "epoch": 1.949310302734375e-05, + "step": 12775, + "training_step_time": 0.13038158416748047 + }, + { + "epoch": 1.949462890625e-05, + "model_forward_time": 0.025133132934570312, + "step": 12776 + }, + { + "epoch": 1.949462890625e-05, + "step": 12776, + "training_step_time": 0.858898401260376 + }, + { + "epoch": 1.949615478515625e-05, + "model_forward_time": 0.023734569549560547, + "step": 12777 + }, + { + "epoch": 1.949615478515625e-05, + "step": 12777, + "training_step_time": 0.20022892951965332 + }, + { + "epoch": 1.94976806640625e-05, + "model_forward_time": 0.025213241577148438, + "step": 12778 + }, + { + "epoch": 1.94976806640625e-05, + "step": 12778, + "training_step_time": 0.1919097900390625 + }, + { + "epoch": 1.949920654296875e-05, + "model_forward_time": 0.02719855308532715, + "step": 12779 + }, + { + "epoch": 1.949920654296875e-05, + "step": 12779, + "training_step_time": 0.11533904075622559 + }, + { + "epoch": 1.9500732421875e-05, + "grad_norm": 0.4277324974536896, + "learning_rate": 6.607849694751977e-05, + "loss": 0.0221, + "step": 12780 + }, + { + "epoch": 1.9500732421875e-05, + "model_forward_time": 0.02475571632385254, + "step": 12780 + }, + { + "epoch": 1.9500732421875e-05, + "step": 12780, + "training_step_time": 0.10977649688720703 + }, + { + "epoch": 1.950225830078125e-05, + "model_forward_time": 0.02614569664001465, + "step": 12781 + }, + { + "epoch": 1.950225830078125e-05, + "step": 12781, + "training_step_time": 0.10756063461303711 + }, + { + "epoch": 1.95037841796875e-05, + "model_forward_time": 0.0253143310546875, + "step": 12782 + }, + { + "epoch": 1.95037841796875e-05, + "step": 12782, + "training_step_time": 0.10864901542663574 + }, + { + "epoch": 1.950531005859375e-05, + "model_forward_time": 0.025280475616455078, + "step": 12783 + }, + { + "epoch": 1.950531005859375e-05, + "step": 12783, + "training_step_time": 0.17014741897583008 + }, + { + "epoch": 1.95068359375e-05, + "model_forward_time": 0.024578094482421875, + "step": 12784 + }, + { + "epoch": 1.95068359375e-05, + "step": 12784, + "training_step_time": 0.12132477760314941 + }, + { + "epoch": 1.950836181640625e-05, + "model_forward_time": 0.024729013442993164, + "step": 12785 + }, + { + "epoch": 1.950836181640625e-05, + "step": 12785, + "training_step_time": 0.12656760215759277 + }, + { + "epoch": 1.95098876953125e-05, + "model_forward_time": 0.025323867797851562, + "step": 12786 + }, + { + "epoch": 1.95098876953125e-05, + "step": 12786, + "training_step_time": 0.1218869686126709 + }, + { + "epoch": 1.951141357421875e-05, + "model_forward_time": 0.02502918243408203, + "step": 12787 + }, + { + "epoch": 1.951141357421875e-05, + "step": 12787, + "training_step_time": 0.13781094551086426 + }, + { + "epoch": 1.9512939453125e-05, + "model_forward_time": 0.025066614151000977, + "step": 12788 + }, + { + "epoch": 1.9512939453125e-05, + "step": 12788, + "training_step_time": 0.11885905265808105 + }, + { + "epoch": 1.951446533203125e-05, + "model_forward_time": 0.02532362937927246, + "step": 12789 + }, + { + "epoch": 1.951446533203125e-05, + "step": 12789, + "training_step_time": 0.11675190925598145 + }, + { + "epoch": 1.95159912109375e-05, + "grad_norm": 0.517057478427887, + "learning_rate": 6.602629895164081e-05, + "loss": 0.026, + "step": 12790 + }, + { + "epoch": 1.95159912109375e-05, + "model_forward_time": 0.02506852149963379, + "step": 12790 + }, + { + "epoch": 1.95159912109375e-05, + "step": 12790, + "training_step_time": 0.10750007629394531 + }, + { + "epoch": 1.951751708984375e-05, + "model_forward_time": 0.025145769119262695, + "step": 12791 + }, + { + "epoch": 1.951751708984375e-05, + "step": 12791, + "training_step_time": 0.10510873794555664 + }, + { + "epoch": 1.951904296875e-05, + "model_forward_time": 0.025026798248291016, + "step": 12792 + }, + { + "epoch": 1.951904296875e-05, + "step": 12792, + "training_step_time": 0.10835647583007812 + }, + { + "epoch": 1.952056884765625e-05, + "model_forward_time": 0.02512836456298828, + "step": 12793 + }, + { + "epoch": 1.952056884765625e-05, + "step": 12793, + "training_step_time": 0.10707235336303711 + }, + { + "epoch": 1.95220947265625e-05, + "model_forward_time": 0.025365829467773438, + "step": 12794 + }, + { + "epoch": 1.95220947265625e-05, + "step": 12794, + "training_step_time": 0.11254453659057617 + }, + { + "epoch": 1.952362060546875e-05, + "model_forward_time": 0.025501012802124023, + "step": 12795 + }, + { + "epoch": 1.952362060546875e-05, + "step": 12795, + "training_step_time": 0.10904836654663086 + }, + { + "epoch": 1.9525146484375e-05, + "model_forward_time": 0.025204181671142578, + "step": 12796 + }, + { + "epoch": 1.9525146484375e-05, + "step": 12796, + "training_step_time": 0.1091923713684082 + }, + { + "epoch": 1.952667236328125e-05, + "model_forward_time": 0.025196313858032227, + "step": 12797 + }, + { + "epoch": 1.952667236328125e-05, + "step": 12797, + "training_step_time": 0.10581684112548828 + }, + { + "epoch": 1.95281982421875e-05, + "model_forward_time": 0.02559971809387207, + "step": 12798 + }, + { + "epoch": 1.95281982421875e-05, + "step": 12798, + "training_step_time": 0.10824298858642578 + }, + { + "epoch": 1.952972412109375e-05, + "model_forward_time": 0.025490283966064453, + "step": 12799 + }, + { + "epoch": 1.952972412109375e-05, + "step": 12799, + "training_step_time": 0.10611939430236816 + }, + { + "epoch": 1.953125e-05, + "grad_norm": 0.2915656566619873, + "learning_rate": 6.59740814822974e-05, + "loss": 0.0198, + "step": 12800 + }, + { + "epoch": 1.953125e-05, + "model_forward_time": 0.02503204345703125, + "step": 12800 + }, + { + "epoch": 1.953125e-05, + "step": 12800, + "training_step_time": 0.11137032508850098 + }, + { + "epoch": 1.953277587890625e-05, + "model_forward_time": 0.025264501571655273, + "step": 12801 + }, + { + "epoch": 1.953277587890625e-05, + "step": 12801, + "training_step_time": 0.10472679138183594 + }, + { + "epoch": 1.95343017578125e-05, + "model_forward_time": 0.024953603744506836, + "step": 12802 + }, + { + "epoch": 1.95343017578125e-05, + "step": 12802, + "training_step_time": 0.11614847183227539 + }, + { + "epoch": 1.953582763671875e-05, + "model_forward_time": 0.02570629119873047, + "step": 12803 + }, + { + "epoch": 1.953582763671875e-05, + "step": 12803, + "training_step_time": 0.10995006561279297 + }, + { + "epoch": 1.9537353515625e-05, + "model_forward_time": 0.025226593017578125, + "step": 12804 + }, + { + "epoch": 1.9537353515625e-05, + "step": 12804, + "training_step_time": 0.10994553565979004 + }, + { + "epoch": 1.953887939453125e-05, + "model_forward_time": 0.02480602264404297, + "step": 12805 + }, + { + "epoch": 1.953887939453125e-05, + "step": 12805, + "training_step_time": 0.10703444480895996 + }, + { + "epoch": 1.95404052734375e-05, + "model_forward_time": 0.025394439697265625, + "step": 12806 + }, + { + "epoch": 1.95404052734375e-05, + "step": 12806, + "training_step_time": 0.10557842254638672 + }, + { + "epoch": 1.954193115234375e-05, + "model_forward_time": 0.025169849395751953, + "step": 12807 + }, + { + "epoch": 1.954193115234375e-05, + "step": 12807, + "training_step_time": 0.10796713829040527 + }, + { + "epoch": 1.954345703125e-05, + "model_forward_time": 0.025191545486450195, + "step": 12808 + }, + { + "epoch": 1.954345703125e-05, + "step": 12808, + "training_step_time": 0.10591793060302734 + }, + { + "epoch": 1.954498291015625e-05, + "model_forward_time": 0.025400161743164062, + "step": 12809 + }, + { + "epoch": 1.954498291015625e-05, + "step": 12809, + "training_step_time": 0.10566401481628418 + }, + { + "epoch": 1.95465087890625e-05, + "grad_norm": 0.4159059524536133, + "learning_rate": 6.592184460293877e-05, + "loss": 0.0226, + "step": 12810 + }, + { + "epoch": 1.95465087890625e-05, + "model_forward_time": 0.024941444396972656, + "step": 12810 + }, + { + "epoch": 1.95465087890625e-05, + "step": 12810, + "training_step_time": 0.10492515563964844 + }, + { + "epoch": 1.954803466796875e-05, + "model_forward_time": 0.02572798728942871, + "step": 12811 + }, + { + "epoch": 1.954803466796875e-05, + "step": 12811, + "training_step_time": 0.10578727722167969 + }, + { + "epoch": 1.9549560546875e-05, + "model_forward_time": 0.025561094284057617, + "step": 12812 + }, + { + "epoch": 1.9549560546875e-05, + "step": 12812, + "training_step_time": 0.13389849662780762 + }, + { + "epoch": 1.955108642578125e-05, + "model_forward_time": 0.0256807804107666, + "step": 12813 + }, + { + "epoch": 1.955108642578125e-05, + "step": 12813, + "training_step_time": 0.11229777336120605 + }, + { + "epoch": 1.95526123046875e-05, + "model_forward_time": 0.025319576263427734, + "step": 12814 + }, + { + "epoch": 1.95526123046875e-05, + "step": 12814, + "training_step_time": 0.13170266151428223 + }, + { + "epoch": 1.955413818359375e-05, + "model_forward_time": 0.024202823638916016, + "step": 12815 + }, + { + "epoch": 1.955413818359375e-05, + "step": 12815, + "training_step_time": 0.1668996810913086 + }, + { + "epoch": 1.95556640625e-05, + "model_forward_time": 0.024346590042114258, + "step": 12816 + }, + { + "epoch": 1.95556640625e-05, + "step": 12816, + "training_step_time": 0.18709206581115723 + }, + { + "epoch": 1.955718994140625e-05, + "model_forward_time": 0.024864912033081055, + "step": 12817 + }, + { + "epoch": 1.955718994140625e-05, + "step": 12817, + "training_step_time": 0.14777493476867676 + }, + { + "epoch": 1.95587158203125e-05, + "model_forward_time": 0.02342700958251953, + "step": 12818 + }, + { + "epoch": 1.95587158203125e-05, + "step": 12818, + "training_step_time": 0.13246893882751465 + }, + { + "epoch": 1.956024169921875e-05, + "model_forward_time": 0.025235652923583984, + "step": 12819 + }, + { + "epoch": 1.956024169921875e-05, + "step": 12819, + "training_step_time": 0.10498809814453125 + }, + { + "epoch": 1.9561767578125e-05, + "grad_norm": 0.42813757061958313, + "learning_rate": 6.586958837703759e-05, + "loss": 0.0253, + "step": 12820 + }, + { + "epoch": 1.9561767578125e-05, + "model_forward_time": 0.02482295036315918, + "step": 12820 + }, + { + "epoch": 1.9561767578125e-05, + "step": 12820, + "training_step_time": 0.13930201530456543 + }, + { + "epoch": 1.956329345703125e-05, + "model_forward_time": 0.025035381317138672, + "step": 12821 + }, + { + "epoch": 1.956329345703125e-05, + "step": 12821, + "training_step_time": 0.1259157657623291 + }, + { + "epoch": 1.95648193359375e-05, + "model_forward_time": 0.025003910064697266, + "step": 12822 + }, + { + "epoch": 1.95648193359375e-05, + "step": 12822, + "training_step_time": 0.11384224891662598 + }, + { + "epoch": 1.956634521484375e-05, + "model_forward_time": 0.025055646896362305, + "step": 12823 + }, + { + "epoch": 1.956634521484375e-05, + "step": 12823, + "training_step_time": 0.16033697128295898 + }, + { + "epoch": 1.956787109375e-05, + "model_forward_time": 0.024910926818847656, + "step": 12824 + }, + { + "epoch": 1.956787109375e-05, + "step": 12824, + "training_step_time": 0.17279911041259766 + }, + { + "epoch": 1.956939697265625e-05, + "model_forward_time": 0.024599552154541016, + "step": 12825 + }, + { + "epoch": 1.956939697265625e-05, + "step": 12825, + "training_step_time": 0.11660957336425781 + }, + { + "epoch": 1.95709228515625e-05, + "model_forward_time": 0.024692535400390625, + "step": 12826 + }, + { + "epoch": 1.95709228515625e-05, + "step": 12826, + "training_step_time": 0.19796490669250488 + }, + { + "epoch": 1.957244873046875e-05, + "model_forward_time": 0.024659395217895508, + "step": 12827 + }, + { + "epoch": 1.957244873046875e-05, + "step": 12827, + "training_step_time": 0.10709738731384277 + }, + { + "epoch": 1.9573974609375e-05, + "model_forward_time": 0.025322914123535156, + "step": 12828 + }, + { + "epoch": 1.9573974609375e-05, + "step": 12828, + "training_step_time": 0.10874319076538086 + }, + { + "epoch": 1.957550048828125e-05, + "model_forward_time": 0.024451017379760742, + "step": 12829 + }, + { + "epoch": 1.957550048828125e-05, + "step": 12829, + "training_step_time": 0.15697884559631348 + }, + { + "epoch": 1.95770263671875e-05, + "grad_norm": 0.4591871500015259, + "learning_rate": 6.581731286809014e-05, + "loss": 0.0215, + "step": 12830 + }, + { + "epoch": 1.95770263671875e-05, + "model_forward_time": 0.024854183197021484, + "step": 12830 + }, + { + "epoch": 1.95770263671875e-05, + "step": 12830, + "training_step_time": 0.1105952262878418 + }, + { + "epoch": 1.957855224609375e-05, + "model_forward_time": 0.025530099868774414, + "step": 12831 + }, + { + "epoch": 1.957855224609375e-05, + "step": 12831, + "training_step_time": 0.13998842239379883 + }, + { + "epoch": 1.9580078125e-05, + "model_forward_time": 0.026187896728515625, + "step": 12832 + }, + { + "epoch": 1.9580078125e-05, + "step": 12832, + "training_step_time": 0.15935873985290527 + }, + { + "epoch": 1.958160400390625e-05, + "model_forward_time": 0.024337053298950195, + "step": 12833 + }, + { + "epoch": 1.958160400390625e-05, + "step": 12833, + "training_step_time": 0.2170407772064209 + }, + { + "epoch": 1.95831298828125e-05, + "model_forward_time": 0.024619102478027344, + "step": 12834 + }, + { + "epoch": 1.95831298828125e-05, + "step": 12834, + "training_step_time": 0.1117548942565918 + }, + { + "epoch": 1.958465576171875e-05, + "model_forward_time": 0.02537822723388672, + "step": 12835 + }, + { + "epoch": 1.958465576171875e-05, + "step": 12835, + "training_step_time": 0.10610413551330566 + }, + { + "epoch": 1.9586181640625e-05, + "model_forward_time": 0.025581836700439453, + "step": 12836 + }, + { + "epoch": 1.9586181640625e-05, + "step": 12836, + "training_step_time": 0.10433006286621094 + }, + { + "epoch": 1.958770751953125e-05, + "model_forward_time": 0.025313854217529297, + "step": 12837 + }, + { + "epoch": 1.958770751953125e-05, + "step": 12837, + "training_step_time": 0.10630679130554199 + }, + { + "epoch": 1.95892333984375e-05, + "model_forward_time": 0.025336503982543945, + "step": 12838 + }, + { + "epoch": 1.95892333984375e-05, + "step": 12838, + "training_step_time": 0.10774564743041992 + }, + { + "epoch": 1.959075927734375e-05, + "model_forward_time": 0.025085926055908203, + "step": 12839 + }, + { + "epoch": 1.959075927734375e-05, + "step": 12839, + "training_step_time": 0.10956311225891113 + }, + { + "epoch": 1.959228515625e-05, + "grad_norm": 0.20965513586997986, + "learning_rate": 6.576501813961609e-05, + "loss": 0.0146, + "step": 12840 + }, + { + "epoch": 1.959228515625e-05, + "model_forward_time": 0.025368213653564453, + "step": 12840 + }, + { + "epoch": 1.959228515625e-05, + "step": 12840, + "training_step_time": 0.10538387298583984 + }, + { + "epoch": 1.959381103515625e-05, + "model_forward_time": 0.025298118591308594, + "step": 12841 + }, + { + "epoch": 1.959381103515625e-05, + "step": 12841, + "training_step_time": 0.10614824295043945 + }, + { + "epoch": 1.95953369140625e-05, + "model_forward_time": 0.025177955627441406, + "step": 12842 + }, + { + "epoch": 1.95953369140625e-05, + "step": 12842, + "training_step_time": 0.10755681991577148 + }, + { + "epoch": 1.959686279296875e-05, + "model_forward_time": 0.025674819946289062, + "step": 12843 + }, + { + "epoch": 1.959686279296875e-05, + "step": 12843, + "training_step_time": 0.10666608810424805 + }, + { + "epoch": 1.9598388671875e-05, + "model_forward_time": 0.02536177635192871, + "step": 12844 + }, + { + "epoch": 1.9598388671875e-05, + "step": 12844, + "training_step_time": 0.1089329719543457 + }, + { + "epoch": 1.959991455078125e-05, + "model_forward_time": 0.02554941177368164, + "step": 12845 + }, + { + "epoch": 1.959991455078125e-05, + "step": 12845, + "training_step_time": 0.1053915023803711 + }, + { + "epoch": 1.96014404296875e-05, + "model_forward_time": 0.02528834342956543, + "step": 12846 + }, + { + "epoch": 1.96014404296875e-05, + "step": 12846, + "training_step_time": 0.10483837127685547 + }, + { + "epoch": 1.960296630859375e-05, + "model_forward_time": 0.02777695655822754, + "step": 12847 + }, + { + "epoch": 1.960296630859375e-05, + "step": 12847, + "training_step_time": 0.1075289249420166 + }, + { + "epoch": 1.96044921875e-05, + "model_forward_time": 0.02497386932373047, + "step": 12848 + }, + { + "epoch": 1.96044921875e-05, + "step": 12848, + "training_step_time": 0.10898089408874512 + }, + { + "epoch": 1.960601806640625e-05, + "model_forward_time": 0.025057077407836914, + "step": 12849 + }, + { + "epoch": 1.960601806640625e-05, + "step": 12849, + "training_step_time": 0.1041414737701416 + }, + { + "epoch": 1.96075439453125e-05, + "grad_norm": 0.2478886991739273, + "learning_rate": 6.571270425515843e-05, + "loss": 0.0159, + "step": 12850 + }, + { + "epoch": 1.96075439453125e-05, + "model_forward_time": 0.02523636817932129, + "step": 12850 + }, + { + "epoch": 1.96075439453125e-05, + "step": 12850, + "training_step_time": 0.10432600975036621 + }, + { + "epoch": 1.960906982421875e-05, + "model_forward_time": 0.024936437606811523, + "step": 12851 + }, + { + "epoch": 1.960906982421875e-05, + "step": 12851, + "training_step_time": 0.10858535766601562 + }, + { + "epoch": 1.9610595703125e-05, + "model_forward_time": 0.025166988372802734, + "step": 12852 + }, + { + "epoch": 1.9610595703125e-05, + "step": 12852, + "training_step_time": 0.10373783111572266 + }, + { + "epoch": 1.961212158203125e-05, + "model_forward_time": 0.027532100677490234, + "step": 12853 + }, + { + "epoch": 1.961212158203125e-05, + "step": 12853, + "training_step_time": 0.1087651252746582 + }, + { + "epoch": 1.96136474609375e-05, + "model_forward_time": 0.025132417678833008, + "step": 12854 + }, + { + "epoch": 1.96136474609375e-05, + "step": 12854, + "training_step_time": 0.1066291332244873 + }, + { + "epoch": 1.961517333984375e-05, + "model_forward_time": 0.026541948318481445, + "step": 12855 + }, + { + "epoch": 1.961517333984375e-05, + "step": 12855, + "training_step_time": 0.10902881622314453 + }, + { + "epoch": 1.961669921875e-05, + "model_forward_time": 0.02532339096069336, + "step": 12856 + }, + { + "epoch": 1.961669921875e-05, + "step": 12856, + "training_step_time": 0.10817360877990723 + }, + { + "epoch": 1.961822509765625e-05, + "model_forward_time": 0.02491903305053711, + "step": 12857 + }, + { + "epoch": 1.961822509765625e-05, + "step": 12857, + "training_step_time": 0.2093358039855957 + }, + { + "epoch": 1.96197509765625e-05, + "model_forward_time": 0.025052309036254883, + "step": 12858 + }, + { + "epoch": 1.96197509765625e-05, + "step": 12858, + "training_step_time": 0.11860346794128418 + }, + { + "epoch": 1.962127685546875e-05, + "model_forward_time": 0.02716660499572754, + "step": 12859 + }, + { + "epoch": 1.962127685546875e-05, + "step": 12859, + "training_step_time": 0.10867762565612793 + }, + { + "epoch": 1.9622802734375e-05, + "grad_norm": 0.467568039894104, + "learning_rate": 6.56603712782835e-05, + "loss": 0.0229, + "step": 12860 + }, + { + "epoch": 1.9622802734375e-05, + "model_forward_time": 0.025312185287475586, + "step": 12860 + }, + { + "epoch": 1.9622802734375e-05, + "step": 12860, + "training_step_time": 0.11243152618408203 + }, + { + "epoch": 1.962432861328125e-05, + "model_forward_time": 0.025675535202026367, + "step": 12861 + }, + { + "epoch": 1.962432861328125e-05, + "step": 12861, + "training_step_time": 0.13793659210205078 + }, + { + "epoch": 1.96258544921875e-05, + "model_forward_time": 0.02503514289855957, + "step": 12862 + }, + { + "epoch": 1.96258544921875e-05, + "step": 12862, + "training_step_time": 0.11337804794311523 + }, + { + "epoch": 1.962738037109375e-05, + "model_forward_time": 0.025648832321166992, + "step": 12863 + }, + { + "epoch": 1.962738037109375e-05, + "step": 12863, + "training_step_time": 0.11484026908874512 + }, + { + "epoch": 1.962890625e-05, + "model_forward_time": 0.02514338493347168, + "step": 12864 + }, + { + "epoch": 1.962890625e-05, + "step": 12864, + "training_step_time": 0.10955953598022461 + }, + { + "epoch": 1.963043212890625e-05, + "model_forward_time": 0.024783849716186523, + "step": 12865 + }, + { + "epoch": 1.963043212890625e-05, + "step": 12865, + "training_step_time": 0.10452628135681152 + }, + { + "epoch": 1.96319580078125e-05, + "model_forward_time": 0.024631738662719727, + "step": 12866 + }, + { + "epoch": 1.96319580078125e-05, + "step": 12866, + "training_step_time": 0.1206357479095459 + }, + { + "epoch": 1.963348388671875e-05, + "model_forward_time": 0.024579286575317383, + "step": 12867 + }, + { + "epoch": 1.963348388671875e-05, + "step": 12867, + "training_step_time": 0.22048115730285645 + }, + { + "epoch": 1.9635009765625e-05, + "model_forward_time": 0.025398731231689453, + "step": 12868 + }, + { + "epoch": 1.9635009765625e-05, + "step": 12868, + "training_step_time": 0.12003278732299805 + }, + { + "epoch": 1.963653564453125e-05, + "model_forward_time": 0.025040626525878906, + "step": 12869 + }, + { + "epoch": 1.963653564453125e-05, + "step": 12869, + "training_step_time": 0.11315274238586426 + }, + { + "epoch": 1.96380615234375e-05, + "grad_norm": 0.36412978172302246, + "learning_rate": 6.56080192725808e-05, + "loss": 0.0156, + "step": 12870 + }, + { + "epoch": 1.96380615234375e-05, + "model_forward_time": 0.025540590286254883, + "step": 12870 + }, + { + "epoch": 1.96380615234375e-05, + "step": 12870, + "training_step_time": 0.10921645164489746 + }, + { + "epoch": 1.963958740234375e-05, + "model_forward_time": 0.02552938461303711, + "step": 12871 + }, + { + "epoch": 1.963958740234375e-05, + "step": 12871, + "training_step_time": 0.10552024841308594 + }, + { + "epoch": 1.964111328125e-05, + "model_forward_time": 0.025791645050048828, + "step": 12872 + }, + { + "epoch": 1.964111328125e-05, + "step": 12872, + "training_step_time": 0.1988506317138672 + }, + { + "epoch": 1.964263916015625e-05, + "model_forward_time": 0.024656295776367188, + "step": 12873 + }, + { + "epoch": 1.964263916015625e-05, + "step": 12873, + "training_step_time": 0.10237288475036621 + }, + { + "epoch": 1.96441650390625e-05, + "model_forward_time": 0.024573564529418945, + "step": 12874 + }, + { + "epoch": 1.96441650390625e-05, + "step": 12874, + "training_step_time": 0.1059579849243164 + }, + { + "epoch": 1.964569091796875e-05, + "model_forward_time": 0.025198698043823242, + "step": 12875 + }, + { + "epoch": 1.964569091796875e-05, + "step": 12875, + "training_step_time": 0.16658949851989746 + }, + { + "epoch": 1.9647216796875e-05, + "model_forward_time": 0.024290084838867188, + "step": 12876 + }, + { + "epoch": 1.9647216796875e-05, + "step": 12876, + "training_step_time": 0.1685466766357422 + }, + { + "epoch": 1.964874267578125e-05, + "model_forward_time": 0.024353504180908203, + "step": 12877 + }, + { + "epoch": 1.964874267578125e-05, + "step": 12877, + "training_step_time": 0.1091301441192627 + }, + { + "epoch": 1.96502685546875e-05, + "model_forward_time": 0.024603605270385742, + "step": 12878 + }, + { + "epoch": 1.96502685546875e-05, + "step": 12878, + "training_step_time": 0.12238740921020508 + }, + { + "epoch": 1.965179443359375e-05, + "model_forward_time": 0.025193214416503906, + "step": 12879 + }, + { + "epoch": 1.965179443359375e-05, + "step": 12879, + "training_step_time": 0.10371565818786621 + }, + { + "epoch": 1.96533203125e-05, + "grad_norm": 0.31761202216148376, + "learning_rate": 6.555564830166293e-05, + "loss": 0.0142, + "step": 12880 + }, + { + "epoch": 1.96533203125e-05, + "model_forward_time": 0.025135278701782227, + "step": 12880 + }, + { + "epoch": 1.96533203125e-05, + "step": 12880, + "training_step_time": 0.11572647094726562 + }, + { + "epoch": 1.965484619140625e-05, + "model_forward_time": 0.025266408920288086, + "step": 12881 + }, + { + "epoch": 1.965484619140625e-05, + "step": 12881, + "training_step_time": 0.1117708683013916 + }, + { + "epoch": 1.96563720703125e-05, + "model_forward_time": 0.02540874481201172, + "step": 12882 + }, + { + "epoch": 1.96563720703125e-05, + "step": 12882, + "training_step_time": 0.18756508827209473 + }, + { + "epoch": 1.965789794921875e-05, + "model_forward_time": 0.024507761001586914, + "step": 12883 + }, + { + "epoch": 1.965789794921875e-05, + "step": 12883, + "training_step_time": 0.2094886302947998 + }, + { + "epoch": 1.9659423828125e-05, + "model_forward_time": 0.024311065673828125, + "step": 12884 + }, + { + "epoch": 1.9659423828125e-05, + "step": 12884, + "training_step_time": 0.20440292358398438 + }, + { + "epoch": 1.966094970703125e-05, + "model_forward_time": 0.02422308921813965, + "step": 12885 + }, + { + "epoch": 1.966094970703125e-05, + "step": 12885, + "training_step_time": 0.19777631759643555 + }, + { + "epoch": 1.96624755859375e-05, + "model_forward_time": 0.02491903305053711, + "step": 12886 + }, + { + "epoch": 1.96624755859375e-05, + "step": 12886, + "training_step_time": 0.18086981773376465 + }, + { + "epoch": 1.966400146484375e-05, + "model_forward_time": 0.02459096908569336, + "step": 12887 + }, + { + "epoch": 1.966400146484375e-05, + "step": 12887, + "training_step_time": 0.1700887680053711 + }, + { + "epoch": 1.966552734375e-05, + "model_forward_time": 0.024539709091186523, + "step": 12888 + }, + { + "epoch": 1.966552734375e-05, + "step": 12888, + "training_step_time": 0.16908884048461914 + }, + { + "epoch": 1.966705322265625e-05, + "model_forward_time": 0.024845123291015625, + "step": 12889 + }, + { + "epoch": 1.966705322265625e-05, + "step": 12889, + "training_step_time": 0.10823464393615723 + }, + { + "epoch": 1.96685791015625e-05, + "grad_norm": 0.3218054473400116, + "learning_rate": 6.550325842916559e-05, + "loss": 0.0189, + "step": 12890 + }, + { + "epoch": 1.96685791015625e-05, + "model_forward_time": 0.024412155151367188, + "step": 12890 + }, + { + "epoch": 1.96685791015625e-05, + "step": 12890, + "training_step_time": 0.10080862045288086 + }, + { + "epoch": 1.967010498046875e-05, + "model_forward_time": 0.025099992752075195, + "step": 12891 + }, + { + "epoch": 1.967010498046875e-05, + "step": 12891, + "training_step_time": 0.10512280464172363 + }, + { + "epoch": 1.9671630859375e-05, + "model_forward_time": 0.025232553482055664, + "step": 12892 + }, + { + "epoch": 1.9671630859375e-05, + "step": 12892, + "training_step_time": 0.10602259635925293 + }, + { + "epoch": 1.967315673828125e-05, + "model_forward_time": 0.02545332908630371, + "step": 12893 + }, + { + "epoch": 1.967315673828125e-05, + "step": 12893, + "training_step_time": 0.10567760467529297 + }, + { + "epoch": 1.96746826171875e-05, + "model_forward_time": 0.02527165412902832, + "step": 12894 + }, + { + "epoch": 1.96746826171875e-05, + "step": 12894, + "training_step_time": 0.10685157775878906 + }, + { + "epoch": 1.967620849609375e-05, + "model_forward_time": 0.025189638137817383, + "step": 12895 + }, + { + "epoch": 1.967620849609375e-05, + "step": 12895, + "training_step_time": 0.10503411293029785 + }, + { + "epoch": 1.9677734375e-05, + "model_forward_time": 0.02530384063720703, + "step": 12896 + }, + { + "epoch": 1.9677734375e-05, + "step": 12896, + "training_step_time": 0.1059272289276123 + }, + { + "epoch": 1.967926025390625e-05, + "model_forward_time": 0.024991989135742188, + "step": 12897 + }, + { + "epoch": 1.967926025390625e-05, + "step": 12897, + "training_step_time": 0.10413217544555664 + }, + { + "epoch": 1.96807861328125e-05, + "model_forward_time": 0.02576732635498047, + "step": 12898 + }, + { + "epoch": 1.96807861328125e-05, + "step": 12898, + "training_step_time": 0.10917425155639648 + }, + { + "epoch": 1.968231201171875e-05, + "model_forward_time": 0.025289535522460938, + "step": 12899 + }, + { + "epoch": 1.968231201171875e-05, + "step": 12899, + "training_step_time": 0.10906529426574707 + }, + { + "epoch": 1.9683837890625e-05, + "grad_norm": 0.30722227692604065, + "learning_rate": 6.545084971874738e-05, + "loss": 0.0278, + "step": 12900 + }, + { + "epoch": 1.9683837890625e-05, + "model_forward_time": 0.025529146194458008, + "step": 12900 + }, + { + "epoch": 1.9683837890625e-05, + "step": 12900, + "training_step_time": 0.10883545875549316 + }, + { + "epoch": 1.968536376953125e-05, + "model_forward_time": 0.026594161987304688, + "step": 12901 + }, + { + "epoch": 1.968536376953125e-05, + "step": 12901, + "training_step_time": 0.11978411674499512 + }, + { + "epoch": 1.96868896484375e-05, + "model_forward_time": 0.025186538696289062, + "step": 12902 + }, + { + "epoch": 1.96868896484375e-05, + "step": 12902, + "training_step_time": 0.10829877853393555 + }, + { + "epoch": 1.968841552734375e-05, + "model_forward_time": 0.02528524398803711, + "step": 12903 + }, + { + "epoch": 1.968841552734375e-05, + "step": 12903, + "training_step_time": 0.1163630485534668 + }, + { + "epoch": 1.968994140625e-05, + "model_forward_time": 0.025546789169311523, + "step": 12904 + }, + { + "epoch": 1.968994140625e-05, + "step": 12904, + "training_step_time": 0.1234285831451416 + }, + { + "epoch": 1.969146728515625e-05, + "model_forward_time": 0.024251461029052734, + "step": 12905 + }, + { + "epoch": 1.969146728515625e-05, + "step": 12905, + "training_step_time": 0.11604690551757812 + }, + { + "epoch": 1.96929931640625e-05, + "model_forward_time": 0.02507758140563965, + "step": 12906 + }, + { + "epoch": 1.96929931640625e-05, + "step": 12906, + "training_step_time": 0.11973786354064941 + }, + { + "epoch": 1.969451904296875e-05, + "model_forward_time": 0.025205135345458984, + "step": 12907 + }, + { + "epoch": 1.969451904296875e-05, + "step": 12907, + "training_step_time": 0.11592221260070801 + }, + { + "epoch": 1.9696044921875e-05, + "model_forward_time": 0.02539229393005371, + "step": 12908 + }, + { + "epoch": 1.9696044921875e-05, + "step": 12908, + "training_step_time": 0.10541582107543945 + }, + { + "epoch": 1.969757080078125e-05, + "model_forward_time": 0.028443098068237305, + "step": 12909 + }, + { + "epoch": 1.969757080078125e-05, + "step": 12909, + "training_step_time": 0.1079854965209961 + }, + { + "epoch": 1.96990966796875e-05, + "grad_norm": 0.2507993280887604, + "learning_rate": 6.539842223408984e-05, + "loss": 0.0146, + "step": 12910 + }, + { + "epoch": 1.96990966796875e-05, + "model_forward_time": 0.025532960891723633, + "step": 12910 + }, + { + "epoch": 1.96990966796875e-05, + "step": 12910, + "training_step_time": 0.1952366828918457 + }, + { + "epoch": 1.970062255859375e-05, + "model_forward_time": 0.024492263793945312, + "step": 12911 + }, + { + "epoch": 1.970062255859375e-05, + "step": 12911, + "training_step_time": 0.1786959171295166 + }, + { + "epoch": 1.97021484375e-05, + "model_forward_time": 0.02436089515686035, + "step": 12912 + }, + { + "epoch": 1.97021484375e-05, + "step": 12912, + "training_step_time": 0.17003345489501953 + }, + { + "epoch": 1.970367431640625e-05, + "model_forward_time": 0.02440667152404785, + "step": 12913 + }, + { + "epoch": 1.970367431640625e-05, + "step": 12913, + "training_step_time": 0.1667921543121338 + }, + { + "epoch": 1.97052001953125e-05, + "model_forward_time": 0.02492809295654297, + "step": 12914 + }, + { + "epoch": 1.97052001953125e-05, + "step": 12914, + "training_step_time": 0.17603302001953125 + }, + { + "epoch": 1.970672607421875e-05, + "model_forward_time": 0.024754047393798828, + "step": 12915 + }, + { + "epoch": 1.970672607421875e-05, + "step": 12915, + "training_step_time": 0.10221266746520996 + }, + { + "epoch": 1.9708251953125e-05, + "model_forward_time": 0.024989843368530273, + "step": 12916 + }, + { + "epoch": 1.9708251953125e-05, + "step": 12916, + "training_step_time": 0.10599684715270996 + }, + { + "epoch": 1.970977783203125e-05, + "model_forward_time": 0.0256960391998291, + "step": 12917 + }, + { + "epoch": 1.970977783203125e-05, + "step": 12917, + "training_step_time": 0.10503268241882324 + }, + { + "epoch": 1.97113037109375e-05, + "model_forward_time": 0.025318622589111328, + "step": 12918 + }, + { + "epoch": 1.97113037109375e-05, + "step": 12918, + "training_step_time": 0.10567975044250488 + }, + { + "epoch": 1.971282958984375e-05, + "model_forward_time": 0.02509903907775879, + "step": 12919 + }, + { + "epoch": 1.971282958984375e-05, + "step": 12919, + "training_step_time": 0.1442263126373291 + }, + { + "epoch": 1.971435546875e-05, + "grad_norm": 0.22766125202178955, + "learning_rate": 6.534597603889732e-05, + "loss": 0.01, + "step": 12920 + }, + { + "epoch": 1.971435546875e-05, + "model_forward_time": 0.024898290634155273, + "step": 12920 + }, + { + "epoch": 1.971435546875e-05, + "step": 12920, + "training_step_time": 0.12909150123596191 + }, + { + "epoch": 1.971588134765625e-05, + "model_forward_time": 0.024796247482299805, + "step": 12921 + }, + { + "epoch": 1.971588134765625e-05, + "step": 12921, + "training_step_time": 0.12498784065246582 + }, + { + "epoch": 1.97174072265625e-05, + "model_forward_time": 0.02506399154663086, + "step": 12922 + }, + { + "epoch": 1.97174072265625e-05, + "step": 12922, + "training_step_time": 0.11887478828430176 + }, + { + "epoch": 1.971893310546875e-05, + "model_forward_time": 0.025819063186645508, + "step": 12923 + }, + { + "epoch": 1.971893310546875e-05, + "step": 12923, + "training_step_time": 0.1759481430053711 + }, + { + "epoch": 1.9720458984375e-05, + "model_forward_time": 0.024941682815551758, + "step": 12924 + }, + { + "epoch": 1.9720458984375e-05, + "step": 12924, + "training_step_time": 0.13159751892089844 + }, + { + "epoch": 1.972198486328125e-05, + "model_forward_time": 0.024305105209350586, + "step": 12925 + }, + { + "epoch": 1.972198486328125e-05, + "step": 12925, + "training_step_time": 0.1297473907470703 + }, + { + "epoch": 1.97235107421875e-05, + "model_forward_time": 0.02503490447998047, + "step": 12926 + }, + { + "epoch": 1.97235107421875e-05, + "step": 12926, + "training_step_time": 0.12967920303344727 + }, + { + "epoch": 1.972503662109375e-05, + "model_forward_time": 0.023729324340820312, + "step": 12927 + }, + { + "epoch": 1.972503662109375e-05, + "step": 12927, + "training_step_time": 0.10900282859802246 + }, + { + "epoch": 1.97265625e-05, + "model_forward_time": 0.02577948570251465, + "step": 12928 + }, + { + "epoch": 1.97265625e-05, + "step": 12928, + "training_step_time": 0.10805416107177734 + }, + { + "epoch": 1.972808837890625e-05, + "model_forward_time": 0.025420427322387695, + "step": 12929 + }, + { + "epoch": 1.972808837890625e-05, + "step": 12929, + "training_step_time": 0.10832905769348145 + }, + { + "epoch": 1.97296142578125e-05, + "grad_norm": 0.24631604552268982, + "learning_rate": 6.529351119689688e-05, + "loss": 0.0164, + "step": 12930 + }, + { + "epoch": 1.97296142578125e-05, + "model_forward_time": 0.025557279586791992, + "step": 12930 + }, + { + "epoch": 1.97296142578125e-05, + "step": 12930, + "training_step_time": 0.10869216918945312 + }, + { + "epoch": 1.973114013671875e-05, + "model_forward_time": 0.025218963623046875, + "step": 12931 + }, + { + "epoch": 1.973114013671875e-05, + "step": 12931, + "training_step_time": 0.1116631031036377 + }, + { + "epoch": 1.9732666015625e-05, + "model_forward_time": 0.026793718338012695, + "step": 12932 + }, + { + "epoch": 1.9732666015625e-05, + "step": 12932, + "training_step_time": 0.10741233825683594 + }, + { + "epoch": 1.973419189453125e-05, + "model_forward_time": 0.02524876594543457, + "step": 12933 + }, + { + "epoch": 1.973419189453125e-05, + "step": 12933, + "training_step_time": 0.10444450378417969 + }, + { + "epoch": 1.97357177734375e-05, + "model_forward_time": 0.02561497688293457, + "step": 12934 + }, + { + "epoch": 1.97357177734375e-05, + "step": 12934, + "training_step_time": 0.10614323616027832 + }, + { + "epoch": 1.973724365234375e-05, + "model_forward_time": 0.025424957275390625, + "step": 12935 + }, + { + "epoch": 1.973724365234375e-05, + "step": 12935, + "training_step_time": 0.1080465316772461 + }, + { + "epoch": 1.973876953125e-05, + "model_forward_time": 0.02511453628540039, + "step": 12936 + }, + { + "epoch": 1.973876953125e-05, + "step": 12936, + "training_step_time": 0.10456657409667969 + }, + { + "epoch": 1.974029541015625e-05, + "model_forward_time": 0.025657176971435547, + "step": 12937 + }, + { + "epoch": 1.974029541015625e-05, + "step": 12937, + "training_step_time": 0.10506176948547363 + }, + { + "epoch": 1.97418212890625e-05, + "model_forward_time": 0.02539372444152832, + "step": 12938 + }, + { + "epoch": 1.97418212890625e-05, + "step": 12938, + "training_step_time": 0.10872387886047363 + }, + { + "epoch": 1.974334716796875e-05, + "model_forward_time": 0.026163578033447266, + "step": 12939 + }, + { + "epoch": 1.974334716796875e-05, + "step": 12939, + "training_step_time": 0.10485124588012695 + }, + { + "epoch": 1.9744873046875e-05, + "grad_norm": 0.33000293374061584, + "learning_rate": 6.524102777183825e-05, + "loss": 0.0172, + "step": 12940 + }, + { + "epoch": 1.9744873046875e-05, + "model_forward_time": 0.025562286376953125, + "step": 12940 + }, + { + "epoch": 1.9744873046875e-05, + "step": 12940, + "training_step_time": 0.10541343688964844 + }, + { + "epoch": 1.974639892578125e-05, + "model_forward_time": 0.02530384063720703, + "step": 12941 + }, + { + "epoch": 1.974639892578125e-05, + "step": 12941, + "training_step_time": 0.10480093955993652 + }, + { + "epoch": 1.97479248046875e-05, + "model_forward_time": 0.027099609375, + "step": 12942 + }, + { + "epoch": 1.97479248046875e-05, + "step": 12942, + "training_step_time": 0.17743587493896484 + }, + { + "epoch": 1.974945068359375e-05, + "model_forward_time": 0.025362491607666016, + "step": 12943 + }, + { + "epoch": 1.974945068359375e-05, + "step": 12943, + "training_step_time": 0.20627999305725098 + }, + { + "epoch": 1.97509765625e-05, + "model_forward_time": 0.025545120239257812, + "step": 12944 + }, + { + "epoch": 1.97509765625e-05, + "step": 12944, + "training_step_time": 0.20525312423706055 + }, + { + "epoch": 1.975250244140625e-05, + "model_forward_time": 0.024981021881103516, + "step": 12945 + }, + { + "epoch": 1.975250244140625e-05, + "step": 12945, + "training_step_time": 0.1963975429534912 + }, + { + "epoch": 1.97540283203125e-05, + "model_forward_time": 0.024600982666015625, + "step": 12946 + }, + { + "epoch": 1.97540283203125e-05, + "step": 12946, + "training_step_time": 0.1828014850616455 + }, + { + "epoch": 1.975555419921875e-05, + "model_forward_time": 0.024325132369995117, + "step": 12947 + }, + { + "epoch": 1.975555419921875e-05, + "step": 12947, + "training_step_time": 0.20746731758117676 + }, + { + "epoch": 1.9757080078125e-05, + "model_forward_time": 0.0243527889251709, + "step": 12948 + }, + { + "epoch": 1.9757080078125e-05, + "step": 12948, + "training_step_time": 0.16481661796569824 + }, + { + "epoch": 1.975860595703125e-05, + "model_forward_time": 0.024631261825561523, + "step": 12949 + }, + { + "epoch": 1.975860595703125e-05, + "step": 12949, + "training_step_time": 0.18009614944458008 + }, + { + "epoch": 1.97601318359375e-05, + "grad_norm": 0.2345770299434662, + "learning_rate": 6.518852582749373e-05, + "loss": 0.0097, + "step": 12950 + }, + { + "epoch": 1.97601318359375e-05, + "model_forward_time": 0.0279083251953125, + "step": 12950 + }, + { + "epoch": 1.97601318359375e-05, + "step": 12950, + "training_step_time": 0.14490294456481934 + }, + { + "epoch": 1.976165771484375e-05, + "model_forward_time": 0.02529597282409668, + "step": 12951 + }, + { + "epoch": 1.976165771484375e-05, + "step": 12951, + "training_step_time": 0.10426115989685059 + }, + { + "epoch": 1.976318359375e-05, + "model_forward_time": 0.025103092193603516, + "step": 12952 + }, + { + "epoch": 1.976318359375e-05, + "step": 12952, + "training_step_time": 0.12754106521606445 + }, + { + "epoch": 1.976470947265625e-05, + "model_forward_time": 0.025542736053466797, + "step": 12953 + }, + { + "epoch": 1.976470947265625e-05, + "step": 12953, + "training_step_time": 0.12406206130981445 + }, + { + "epoch": 1.97662353515625e-05, + "model_forward_time": 0.024995088577270508, + "step": 12954 + }, + { + "epoch": 1.97662353515625e-05, + "step": 12954, + "training_step_time": 0.1104421615600586 + }, + { + "epoch": 1.976776123046875e-05, + "model_forward_time": 0.025180578231811523, + "step": 12955 + }, + { + "epoch": 1.976776123046875e-05, + "step": 12955, + "training_step_time": 0.13296890258789062 + }, + { + "epoch": 1.9769287109375e-05, + "model_forward_time": 0.025307416915893555, + "step": 12956 + }, + { + "epoch": 1.9769287109375e-05, + "step": 12956, + "training_step_time": 0.12666916847229004 + }, + { + "epoch": 1.977081298828125e-05, + "model_forward_time": 0.02503514289855957, + "step": 12957 + }, + { + "epoch": 1.977081298828125e-05, + "step": 12957, + "training_step_time": 0.19740962982177734 + }, + { + "epoch": 1.97723388671875e-05, + "model_forward_time": 0.02440166473388672, + "step": 12958 + }, + { + "epoch": 1.97723388671875e-05, + "step": 12958, + "training_step_time": 0.11558794975280762 + }, + { + "epoch": 1.977386474609375e-05, + "model_forward_time": 0.024742603302001953, + "step": 12959 + }, + { + "epoch": 1.977386474609375e-05, + "step": 12959, + "training_step_time": 0.11440348625183105 + }, + { + "epoch": 1.9775390625e-05, + "grad_norm": 0.49542027711868286, + "learning_rate": 6.513600542765817e-05, + "loss": 0.0135, + "step": 12960 + }, + { + "epoch": 1.9775390625e-05, + "model_forward_time": 0.025341510772705078, + "step": 12960 + }, + { + "epoch": 1.9775390625e-05, + "step": 12960, + "training_step_time": 0.11275434494018555 + }, + { + "epoch": 1.977691650390625e-05, + "model_forward_time": 0.02537083625793457, + "step": 12961 + }, + { + "epoch": 1.977691650390625e-05, + "step": 12961, + "training_step_time": 0.11256909370422363 + }, + { + "epoch": 1.97784423828125e-05, + "model_forward_time": 0.025336027145385742, + "step": 12962 + }, + { + "epoch": 1.97784423828125e-05, + "step": 12962, + "training_step_time": 0.15708160400390625 + }, + { + "epoch": 1.977996826171875e-05, + "model_forward_time": 0.024913787841796875, + "step": 12963 + }, + { + "epoch": 1.977996826171875e-05, + "step": 12963, + "training_step_time": 0.12293410301208496 + }, + { + "epoch": 1.9781494140625e-05, + "model_forward_time": 0.024710893630981445, + "step": 12964 + }, + { + "epoch": 1.9781494140625e-05, + "step": 12964, + "training_step_time": 0.13669514656066895 + }, + { + "epoch": 1.978302001953125e-05, + "model_forward_time": 0.026203393936157227, + "step": 12965 + }, + { + "epoch": 1.978302001953125e-05, + "step": 12965, + "training_step_time": 0.15183568000793457 + }, + { + "epoch": 1.97845458984375e-05, + "model_forward_time": 0.024552106857299805, + "step": 12966 + }, + { + "epoch": 1.97845458984375e-05, + "step": 12966, + "training_step_time": 0.17336392402648926 + }, + { + "epoch": 1.978607177734375e-05, + "model_forward_time": 0.025058269500732422, + "step": 12967 + }, + { + "epoch": 1.978607177734375e-05, + "step": 12967, + "training_step_time": 0.16536736488342285 + }, + { + "epoch": 1.978759765625e-05, + "model_forward_time": 0.026524782180786133, + "step": 12968 + }, + { + "epoch": 1.978759765625e-05, + "step": 12968, + "training_step_time": 0.11164498329162598 + }, + { + "epoch": 1.978912353515625e-05, + "model_forward_time": 0.024999141693115234, + "step": 12969 + }, + { + "epoch": 1.978912353515625e-05, + "step": 12969, + "training_step_time": 0.1055443286895752 + }, + { + "epoch": 1.97906494140625e-05, + "grad_norm": 0.3491387665271759, + "learning_rate": 6.508346663614878e-05, + "loss": 0.0139, + "step": 12970 + }, + { + "epoch": 1.97906494140625e-05, + "model_forward_time": 0.02531147003173828, + "step": 12970 + }, + { + "epoch": 1.97906494140625e-05, + "step": 12970, + "training_step_time": 0.10434269905090332 + }, + { + "epoch": 1.979217529296875e-05, + "model_forward_time": 0.025009870529174805, + "step": 12971 + }, + { + "epoch": 1.979217529296875e-05, + "step": 12971, + "training_step_time": 0.10943722724914551 + }, + { + "epoch": 1.9793701171875e-05, + "model_forward_time": 0.025560379028320312, + "step": 12972 + }, + { + "epoch": 1.9793701171875e-05, + "step": 12972, + "training_step_time": 0.10622382164001465 + }, + { + "epoch": 1.979522705078125e-05, + "model_forward_time": 0.025025606155395508, + "step": 12973 + }, + { + "epoch": 1.979522705078125e-05, + "step": 12973, + "training_step_time": 0.10638260841369629 + }, + { + "epoch": 1.97967529296875e-05, + "model_forward_time": 0.025577545166015625, + "step": 12974 + }, + { + "epoch": 1.97967529296875e-05, + "step": 12974, + "training_step_time": 0.10578012466430664 + }, + { + "epoch": 1.979827880859375e-05, + "model_forward_time": 0.02516031265258789, + "step": 12975 + }, + { + "epoch": 1.979827880859375e-05, + "step": 12975, + "training_step_time": 0.1048879623413086 + }, + { + "epoch": 1.97998046875e-05, + "model_forward_time": 0.025217771530151367, + "step": 12976 + }, + { + "epoch": 1.97998046875e-05, + "step": 12976, + "training_step_time": 0.10461187362670898 + }, + { + "epoch": 1.980133056640625e-05, + "model_forward_time": 0.02544569969177246, + "step": 12977 + }, + { + "epoch": 1.980133056640625e-05, + "step": 12977, + "training_step_time": 0.10429859161376953 + }, + { + "epoch": 1.98028564453125e-05, + "model_forward_time": 0.025442123413085938, + "step": 12978 + }, + { + "epoch": 1.98028564453125e-05, + "step": 12978, + "training_step_time": 0.1060633659362793 + }, + { + "epoch": 1.980438232421875e-05, + "model_forward_time": 0.025482654571533203, + "step": 12979 + }, + { + "epoch": 1.980438232421875e-05, + "step": 12979, + "training_step_time": 0.10837769508361816 + }, + { + "epoch": 1.9805908203125e-05, + "grad_norm": 0.2443506419658661, + "learning_rate": 6.503090951680512e-05, + "loss": 0.0108, + "step": 12980 + }, + { + "epoch": 1.9805908203125e-05, + "model_forward_time": 0.025778770446777344, + "step": 12980 + }, + { + "epoch": 1.9805908203125e-05, + "step": 12980, + "training_step_time": 0.10582351684570312 + }, + { + "epoch": 1.980743408203125e-05, + "model_forward_time": 0.02543473243713379, + "step": 12981 + }, + { + "epoch": 1.980743408203125e-05, + "step": 12981, + "training_step_time": 0.10823655128479004 + }, + { + "epoch": 1.98089599609375e-05, + "model_forward_time": 0.025121212005615234, + "step": 12982 + }, + { + "epoch": 1.98089599609375e-05, + "step": 12982, + "training_step_time": 0.10347366333007812 + }, + { + "epoch": 1.981048583984375e-05, + "model_forward_time": 0.025440692901611328, + "step": 12983 + }, + { + "epoch": 1.981048583984375e-05, + "step": 12983, + "training_step_time": 0.11075520515441895 + }, + { + "epoch": 1.981201171875e-05, + "model_forward_time": 0.025128841400146484, + "step": 12984 + }, + { + "epoch": 1.981201171875e-05, + "step": 12984, + "training_step_time": 0.11441922187805176 + }, + { + "epoch": 1.981353759765625e-05, + "model_forward_time": 0.025208473205566406, + "step": 12985 + }, + { + "epoch": 1.981353759765625e-05, + "step": 12985, + "training_step_time": 0.10868549346923828 + }, + { + "epoch": 1.98150634765625e-05, + "model_forward_time": 0.02619147300720215, + "step": 12986 + }, + { + "epoch": 1.98150634765625e-05, + "step": 12986, + "training_step_time": 0.11054706573486328 + }, + { + "epoch": 1.981658935546875e-05, + "model_forward_time": 0.0265505313873291, + "step": 12987 + }, + { + "epoch": 1.981658935546875e-05, + "step": 12987, + "training_step_time": 0.10784482955932617 + }, + { + "epoch": 1.9818115234375e-05, + "model_forward_time": 0.02614736557006836, + "step": 12988 + }, + { + "epoch": 1.9818115234375e-05, + "step": 12988, + "training_step_time": 0.11139512062072754 + }, + { + "epoch": 1.981964111328125e-05, + "model_forward_time": 0.025509119033813477, + "step": 12989 + }, + { + "epoch": 1.981964111328125e-05, + "step": 12989, + "training_step_time": 0.10703849792480469 + }, + { + "epoch": 1.98211669921875e-05, + "grad_norm": 0.2788366377353668, + "learning_rate": 6.497833413348909e-05, + "loss": 0.0148, + "step": 12990 + }, + { + "epoch": 1.98211669921875e-05, + "model_forward_time": 0.025382041931152344, + "step": 12990 + }, + { + "epoch": 1.98211669921875e-05, + "step": 12990, + "training_step_time": 0.19214177131652832 + }, + { + "epoch": 1.982269287109375e-05, + "model_forward_time": 0.02535080909729004, + "step": 12991 + }, + { + "epoch": 1.982269287109375e-05, + "step": 12991, + "training_step_time": 0.13379693031311035 + }, + { + "epoch": 1.982421875e-05, + "model_forward_time": 0.02426600456237793, + "step": 12992 + }, + { + "epoch": 1.982421875e-05, + "step": 12992, + "training_step_time": 0.20628976821899414 + }, + { + "epoch": 1.982574462890625e-05, + "model_forward_time": 0.025187253952026367, + "step": 12993 + }, + { + "epoch": 1.982574462890625e-05, + "step": 12993, + "training_step_time": 0.13337993621826172 + }, + { + "epoch": 1.98272705078125e-05, + "model_forward_time": 0.024390220642089844, + "step": 12994 + }, + { + "epoch": 1.98272705078125e-05, + "step": 12994, + "training_step_time": 0.1152191162109375 + }, + { + "epoch": 1.982879638671875e-05, + "model_forward_time": 0.026357412338256836, + "step": 12995 + }, + { + "epoch": 1.982879638671875e-05, + "step": 12995, + "training_step_time": 0.11371779441833496 + }, + { + "epoch": 1.9830322265625e-05, + "model_forward_time": 0.025954484939575195, + "step": 12996 + }, + { + "epoch": 1.9830322265625e-05, + "step": 12996, + "training_step_time": 0.11198306083679199 + }, + { + "epoch": 1.983184814453125e-05, + "model_forward_time": 0.025496721267700195, + "step": 12997 + }, + { + "epoch": 1.983184814453125e-05, + "step": 12997, + "training_step_time": 0.10410785675048828 + }, + { + "epoch": 1.98333740234375e-05, + "model_forward_time": 0.024810791015625, + "step": 12998 + }, + { + "epoch": 1.98333740234375e-05, + "step": 12998, + "training_step_time": 0.17913269996643066 + }, + { + "epoch": 1.983489990234375e-05, + "model_forward_time": 0.025715351104736328, + "step": 12999 + }, + { + "epoch": 1.983489990234375e-05, + "step": 12999, + "training_step_time": 0.16986942291259766 + }, + { + "epoch": 1.983642578125e-05, + "grad_norm": 0.5717180967330933, + "learning_rate": 6.492574055008473e-05, + "loss": 0.018, + "step": 13000 + }, + { + "epoch": 1.983642578125e-05, + "model_forward_time": 0.025163650512695312, + "step": 13000 + }, + { + "epoch": 1.983642578125e-05, + "step": 13000, + "training_step_time": 0.09932637214660645 + }, + { + "epoch": 1.983795166015625e-05, + "model_forward_time": 0.023018360137939453, + "step": 13001 + }, + { + "epoch": 1.983795166015625e-05, + "step": 13001, + "training_step_time": 0.11437487602233887 + }, + { + "epoch": 1.98394775390625e-05, + "model_forward_time": 0.024817943572998047, + "step": 13002 + }, + { + "epoch": 1.98394775390625e-05, + "step": 13002, + "training_step_time": 0.12758779525756836 + }, + { + "epoch": 1.984100341796875e-05, + "model_forward_time": 0.025259971618652344, + "step": 13003 + }, + { + "epoch": 1.984100341796875e-05, + "step": 13003, + "training_step_time": 0.10886621475219727 + }, + { + "epoch": 1.9842529296875e-05, + "model_forward_time": 0.0252840518951416, + "step": 13004 + }, + { + "epoch": 1.9842529296875e-05, + "step": 13004, + "training_step_time": 0.12966537475585938 + }, + { + "epoch": 1.984405517578125e-05, + "model_forward_time": 0.025537967681884766, + "step": 13005 + }, + { + "epoch": 1.984405517578125e-05, + "step": 13005, + "training_step_time": 0.10333633422851562 + }, + { + "epoch": 1.98455810546875e-05, + "model_forward_time": 0.026198863983154297, + "step": 13006 + }, + { + "epoch": 1.98455810546875e-05, + "step": 13006, + "training_step_time": 0.10606074333190918 + }, + { + "epoch": 1.984710693359375e-05, + "model_forward_time": 0.025611400604248047, + "step": 13007 + }, + { + "epoch": 1.984710693359375e-05, + "step": 13007, + "training_step_time": 0.14496111869812012 + }, + { + "epoch": 1.98486328125e-05, + "model_forward_time": 0.024966955184936523, + "step": 13008 + }, + { + "epoch": 1.98486328125e-05, + "step": 13008, + "training_step_time": 0.15911602973937988 + }, + { + "epoch": 1.985015869140625e-05, + "model_forward_time": 0.024559974670410156, + "step": 13009 + }, + { + "epoch": 1.985015869140625e-05, + "step": 13009, + "training_step_time": 0.1760084629058838 + }, + { + "epoch": 1.98516845703125e-05, + "grad_norm": 0.2336445450782776, + "learning_rate": 6.487312883049819e-05, + "loss": 0.0289, + "step": 13010 + }, + { + "epoch": 1.98516845703125e-05, + "model_forward_time": 0.024847030639648438, + "step": 13010 + }, + { + "epoch": 1.98516845703125e-05, + "step": 13010, + "training_step_time": 0.15555167198181152 + }, + { + "epoch": 1.985321044921875e-05, + "model_forward_time": 0.02464604377746582, + "step": 13011 + }, + { + "epoch": 1.985321044921875e-05, + "step": 13011, + "training_step_time": 0.14980840682983398 + }, + { + "epoch": 1.9854736328125e-05, + "model_forward_time": 0.025061845779418945, + "step": 13012 + }, + { + "epoch": 1.9854736328125e-05, + "step": 13012, + "training_step_time": 0.10904884338378906 + }, + { + "epoch": 1.985626220703125e-05, + "model_forward_time": 0.024730205535888672, + "step": 13013 + }, + { + "epoch": 1.985626220703125e-05, + "step": 13013, + "training_step_time": 0.10241103172302246 + }, + { + "epoch": 1.98577880859375e-05, + "model_forward_time": 0.02540111541748047, + "step": 13014 + }, + { + "epoch": 1.98577880859375e-05, + "step": 13014, + "training_step_time": 0.10588288307189941 + }, + { + "epoch": 1.985931396484375e-05, + "model_forward_time": 0.025182485580444336, + "step": 13015 + }, + { + "epoch": 1.985931396484375e-05, + "step": 13015, + "training_step_time": 0.13691067695617676 + }, + { + "epoch": 1.986083984375e-05, + "model_forward_time": 0.025496959686279297, + "step": 13016 + }, + { + "epoch": 1.986083984375e-05, + "step": 13016, + "training_step_time": 0.1389782428741455 + }, + { + "epoch": 1.986236572265625e-05, + "model_forward_time": 0.02418828010559082, + "step": 13017 + }, + { + "epoch": 1.986236572265625e-05, + "step": 13017, + "training_step_time": 0.10822629928588867 + }, + { + "epoch": 1.98638916015625e-05, + "model_forward_time": 0.025012493133544922, + "step": 13018 + }, + { + "epoch": 1.98638916015625e-05, + "step": 13018, + "training_step_time": 0.12387681007385254 + }, + { + "epoch": 1.986541748046875e-05, + "model_forward_time": 0.02521038055419922, + "step": 13019 + }, + { + "epoch": 1.986541748046875e-05, + "step": 13019, + "training_step_time": 0.1122133731842041 + }, + { + "epoch": 1.9866943359375e-05, + "grad_norm": 0.22945524752140045, + "learning_rate": 6.48204990386577e-05, + "loss": 0.0229, + "step": 13020 + }, + { + "epoch": 1.9866943359375e-05, + "model_forward_time": 0.025598526000976562, + "step": 13020 + }, + { + "epoch": 1.9866943359375e-05, + "step": 13020, + "training_step_time": 0.11237621307373047 + }, + { + "epoch": 1.986846923828125e-05, + "model_forward_time": 0.02527761459350586, + "step": 13021 + }, + { + "epoch": 1.986846923828125e-05, + "step": 13021, + "training_step_time": 0.11441326141357422 + }, + { + "epoch": 1.98699951171875e-05, + "model_forward_time": 0.025340795516967773, + "step": 13022 + }, + { + "epoch": 1.98699951171875e-05, + "step": 13022, + "training_step_time": 0.11266541481018066 + }, + { + "epoch": 1.987152099609375e-05, + "model_forward_time": 0.0253298282623291, + "step": 13023 + }, + { + "epoch": 1.987152099609375e-05, + "step": 13023, + "training_step_time": 0.10731291770935059 + }, + { + "epoch": 1.9873046875e-05, + "model_forward_time": 0.02550363540649414, + "step": 13024 + }, + { + "epoch": 1.9873046875e-05, + "step": 13024, + "training_step_time": 0.10975885391235352 + }, + { + "epoch": 1.987457275390625e-05, + "model_forward_time": 0.027916669845581055, + "step": 13025 + }, + { + "epoch": 1.987457275390625e-05, + "step": 13025, + "training_step_time": 0.11066150665283203 + }, + { + "epoch": 1.98760986328125e-05, + "model_forward_time": 0.02533555030822754, + "step": 13026 + }, + { + "epoch": 1.98760986328125e-05, + "step": 13026, + "training_step_time": 0.10743546485900879 + }, + { + "epoch": 1.987762451171875e-05, + "model_forward_time": 0.025142669677734375, + "step": 13027 + }, + { + "epoch": 1.987762451171875e-05, + "step": 13027, + "training_step_time": 0.10720491409301758 + }, + { + "epoch": 1.9879150390625e-05, + "model_forward_time": 0.025131702423095703, + "step": 13028 + }, + { + "epoch": 1.9879150390625e-05, + "step": 13028, + "training_step_time": 0.10606861114501953 + }, + { + "epoch": 1.988067626953125e-05, + "model_forward_time": 0.02549433708190918, + "step": 13029 + }, + { + "epoch": 1.988067626953125e-05, + "step": 13029, + "training_step_time": 0.10604166984558105 + }, + { + "epoch": 1.98822021484375e-05, + "grad_norm": 0.41425690054893494, + "learning_rate": 6.476785123851336e-05, + "loss": 0.0299, + "step": 13030 + }, + { + "epoch": 1.98822021484375e-05, + "model_forward_time": 0.02429938316345215, + "step": 13030 + }, + { + "epoch": 1.98822021484375e-05, + "step": 13030, + "training_step_time": 0.1160440444946289 + }, + { + "epoch": 1.988372802734375e-05, + "model_forward_time": 0.02533555030822754, + "step": 13031 + }, + { + "epoch": 1.988372802734375e-05, + "step": 13031, + "training_step_time": 0.1092674732208252 + }, + { + "epoch": 1.988525390625e-05, + "model_forward_time": 0.024751663208007812, + "step": 13032 + }, + { + "epoch": 1.988525390625e-05, + "step": 13032, + "training_step_time": 0.10417914390563965 + }, + { + "epoch": 1.988677978515625e-05, + "model_forward_time": 0.027908802032470703, + "step": 13033 + }, + { + "epoch": 1.988677978515625e-05, + "step": 13033, + "training_step_time": 0.10971546173095703 + }, + { + "epoch": 1.98883056640625e-05, + "model_forward_time": 0.025412321090698242, + "step": 13034 + }, + { + "epoch": 1.98883056640625e-05, + "step": 13034, + "training_step_time": 0.10615777969360352 + }, + { + "epoch": 1.988983154296875e-05, + "model_forward_time": 0.025093555450439453, + "step": 13035 + }, + { + "epoch": 1.988983154296875e-05, + "step": 13035, + "training_step_time": 0.10447144508361816 + }, + { + "epoch": 1.9891357421875e-05, + "model_forward_time": 0.025583505630493164, + "step": 13036 + }, + { + "epoch": 1.9891357421875e-05, + "step": 13036, + "training_step_time": 0.10522580146789551 + }, + { + "epoch": 1.989288330078125e-05, + "model_forward_time": 0.02599620819091797, + "step": 13037 + }, + { + "epoch": 1.989288330078125e-05, + "step": 13037, + "training_step_time": 0.10562658309936523 + }, + { + "epoch": 1.98944091796875e-05, + "model_forward_time": 0.025603532791137695, + "step": 13038 + }, + { + "epoch": 1.98944091796875e-05, + "step": 13038, + "training_step_time": 0.10754227638244629 + }, + { + "epoch": 1.989593505859375e-05, + "model_forward_time": 0.025469541549682617, + "step": 13039 + }, + { + "epoch": 1.989593505859375e-05, + "step": 13039, + "training_step_time": 0.10546612739562988 + }, + { + "epoch": 1.98974609375e-05, + "grad_norm": 0.42252713441848755, + "learning_rate": 6.471518549403726e-05, + "loss": 0.0168, + "step": 13040 + }, + { + "epoch": 1.98974609375e-05, + "model_forward_time": 0.025261878967285156, + "step": 13040 + }, + { + "epoch": 1.98974609375e-05, + "step": 13040, + "training_step_time": 0.10693478584289551 + }, + { + "epoch": 1.989898681640625e-05, + "model_forward_time": 0.025131702423095703, + "step": 13041 + }, + { + "epoch": 1.989898681640625e-05, + "step": 13041, + "training_step_time": 0.10682201385498047 + }, + { + "epoch": 1.99005126953125e-05, + "model_forward_time": 0.025505542755126953, + "step": 13042 + }, + { + "epoch": 1.99005126953125e-05, + "step": 13042, + "training_step_time": 0.10527634620666504 + }, + { + "epoch": 1.990203857421875e-05, + "model_forward_time": 0.0250852108001709, + "step": 13043 + }, + { + "epoch": 1.990203857421875e-05, + "step": 13043, + "training_step_time": 0.10788869857788086 + }, + { + "epoch": 1.9903564453125e-05, + "model_forward_time": 0.025376081466674805, + "step": 13044 + }, + { + "epoch": 1.9903564453125e-05, + "step": 13044, + "training_step_time": 0.10537195205688477 + }, + { + "epoch": 1.990509033203125e-05, + "model_forward_time": 0.025628328323364258, + "step": 13045 + }, + { + "epoch": 1.990509033203125e-05, + "step": 13045, + "training_step_time": 0.10646820068359375 + }, + { + "epoch": 1.99066162109375e-05, + "model_forward_time": 0.025632858276367188, + "step": 13046 + }, + { + "epoch": 1.99066162109375e-05, + "step": 13046, + "training_step_time": 0.10693049430847168 + }, + { + "epoch": 1.990814208984375e-05, + "model_forward_time": 0.025464296340942383, + "step": 13047 + }, + { + "epoch": 1.990814208984375e-05, + "step": 13047, + "training_step_time": 0.1395723819732666 + }, + { + "epoch": 1.990966796875e-05, + "model_forward_time": 0.027755022048950195, + "step": 13048 + }, + { + "epoch": 1.990966796875e-05, + "step": 13048, + "training_step_time": 0.12746572494506836 + }, + { + "epoch": 1.991119384765625e-05, + "model_forward_time": 0.024701356887817383, + "step": 13049 + }, + { + "epoch": 1.991119384765625e-05, + "step": 13049, + "training_step_time": 0.23617172241210938 + }, + { + "epoch": 1.99127197265625e-05, + "grad_norm": 0.21935215592384338, + "learning_rate": 6.466250186922325e-05, + "loss": 0.0129, + "step": 13050 + }, + { + "epoch": 1.99127197265625e-05, + "model_forward_time": 0.024423837661743164, + "step": 13050 + }, + { + "epoch": 1.99127197265625e-05, + "step": 13050, + "training_step_time": 0.16428875923156738 + }, + { + "epoch": 1.991424560546875e-05, + "model_forward_time": 0.024848461151123047, + "step": 13051 + }, + { + "epoch": 1.991424560546875e-05, + "step": 13051, + "training_step_time": 0.17467784881591797 + }, + { + "epoch": 1.9915771484375e-05, + "model_forward_time": 0.0247344970703125, + "step": 13052 + }, + { + "epoch": 1.9915771484375e-05, + "step": 13052, + "training_step_time": 0.10133123397827148 + }, + { + "epoch": 1.991729736328125e-05, + "model_forward_time": 0.02526235580444336, + "step": 13053 + }, + { + "epoch": 1.991729736328125e-05, + "step": 13053, + "training_step_time": 0.17099428176879883 + }, + { + "epoch": 1.99188232421875e-05, + "model_forward_time": 0.024710416793823242, + "step": 13054 + }, + { + "epoch": 1.99188232421875e-05, + "step": 13054, + "training_step_time": 0.10448861122131348 + }, + { + "epoch": 1.992034912109375e-05, + "model_forward_time": 0.026925325393676758, + "step": 13055 + }, + { + "epoch": 1.992034912109375e-05, + "step": 13055, + "training_step_time": 0.14220309257507324 + }, + { + "epoch": 1.9921875e-05, + "model_forward_time": 0.025788545608520508, + "step": 13056 + }, + { + "epoch": 1.9921875e-05, + "step": 13056, + "training_step_time": 0.1923828125 + }, + { + "epoch": 1.992340087890625e-05, + "model_forward_time": 0.024652719497680664, + "step": 13057 + }, + { + "epoch": 1.992340087890625e-05, + "step": 13057, + "training_step_time": 0.1302051544189453 + }, + { + "epoch": 1.99249267578125e-05, + "model_forward_time": 0.024805307388305664, + "step": 13058 + }, + { + "epoch": 1.99249267578125e-05, + "step": 13058, + "training_step_time": 0.1180565357208252 + }, + { + "epoch": 1.992645263671875e-05, + "model_forward_time": 0.025587797164916992, + "step": 13059 + }, + { + "epoch": 1.992645263671875e-05, + "step": 13059, + "training_step_time": 0.10673022270202637 + }, + { + "epoch": 1.9927978515625e-05, + "grad_norm": 0.3332976996898651, + "learning_rate": 6.460980042808687e-05, + "loss": 0.016, + "step": 13060 + }, + { + "epoch": 1.9927978515625e-05, + "model_forward_time": 0.025459766387939453, + "step": 13060 + }, + { + "epoch": 1.9927978515625e-05, + "step": 13060, + "training_step_time": 0.10619640350341797 + }, + { + "epoch": 1.992950439453125e-05, + "model_forward_time": 0.0250241756439209, + "step": 13061 + }, + { + "epoch": 1.992950439453125e-05, + "step": 13061, + "training_step_time": 0.10913586616516113 + }, + { + "epoch": 1.99310302734375e-05, + "model_forward_time": 0.024940013885498047, + "step": 13062 + }, + { + "epoch": 1.99310302734375e-05, + "step": 13062, + "training_step_time": 0.1120452880859375 + }, + { + "epoch": 1.993255615234375e-05, + "model_forward_time": 0.025316715240478516, + "step": 13063 + }, + { + "epoch": 1.993255615234375e-05, + "step": 13063, + "training_step_time": 0.11233282089233398 + }, + { + "epoch": 1.993408203125e-05, + "model_forward_time": 0.024810314178466797, + "step": 13064 + }, + { + "epoch": 1.993408203125e-05, + "step": 13064, + "training_step_time": 0.12602853775024414 + }, + { + "epoch": 1.993560791015625e-05, + "model_forward_time": 0.02645587921142578, + "step": 13065 + }, + { + "epoch": 1.993560791015625e-05, + "step": 13065, + "training_step_time": 0.11041903495788574 + }, + { + "epoch": 1.99371337890625e-05, + "model_forward_time": 0.02548384666442871, + "step": 13066 + }, + { + "epoch": 1.99371337890625e-05, + "step": 13066, + "training_step_time": 0.17828083038330078 + }, + { + "epoch": 1.993865966796875e-05, + "model_forward_time": 0.025374650955200195, + "step": 13067 + }, + { + "epoch": 1.993865966796875e-05, + "step": 13067, + "training_step_time": 0.1350555419921875 + }, + { + "epoch": 1.9940185546875e-05, + "model_forward_time": 0.02435016632080078, + "step": 13068 + }, + { + "epoch": 1.9940185546875e-05, + "step": 13068, + "training_step_time": 0.11890387535095215 + }, + { + "epoch": 1.994171142578125e-05, + "model_forward_time": 0.024969100952148438, + "step": 13069 + }, + { + "epoch": 1.994171142578125e-05, + "step": 13069, + "training_step_time": 0.10307550430297852 + }, + { + "epoch": 1.99432373046875e-05, + "grad_norm": 0.2634739279747009, + "learning_rate": 6.455708123466536e-05, + "loss": 0.0262, + "step": 13070 + }, + { + "epoch": 1.99432373046875e-05, + "model_forward_time": 0.025755882263183594, + "step": 13070 + }, + { + "epoch": 1.99432373046875e-05, + "step": 13070, + "training_step_time": 0.10401606559753418 + }, + { + "epoch": 1.994476318359375e-05, + "model_forward_time": 0.024999141693115234, + "step": 13071 + }, + { + "epoch": 1.994476318359375e-05, + "step": 13071, + "training_step_time": 0.10678505897521973 + }, + { + "epoch": 1.99462890625e-05, + "model_forward_time": 0.025098085403442383, + "step": 13072 + }, + { + "epoch": 1.99462890625e-05, + "step": 13072, + "training_step_time": 0.1051645278930664 + }, + { + "epoch": 1.994781494140625e-05, + "model_forward_time": 0.0254514217376709, + "step": 13073 + }, + { + "epoch": 1.994781494140625e-05, + "step": 13073, + "training_step_time": 0.10649538040161133 + }, + { + "epoch": 1.99493408203125e-05, + "model_forward_time": 0.02523517608642578, + "step": 13074 + }, + { + "epoch": 1.99493408203125e-05, + "step": 13074, + "training_step_time": 0.10987496376037598 + }, + { + "epoch": 1.995086669921875e-05, + "model_forward_time": 0.02665543556213379, + "step": 13075 + }, + { + "epoch": 1.995086669921875e-05, + "step": 13075, + "training_step_time": 0.11060547828674316 + }, + { + "epoch": 1.9952392578125e-05, + "model_forward_time": 0.025132417678833008, + "step": 13076 + }, + { + "epoch": 1.9952392578125e-05, + "step": 13076, + "training_step_time": 0.10463261604309082 + }, + { + "epoch": 1.995391845703125e-05, + "model_forward_time": 0.024984359741210938, + "step": 13077 + }, + { + "epoch": 1.995391845703125e-05, + "step": 13077, + "training_step_time": 0.10304689407348633 + }, + { + "epoch": 1.99554443359375e-05, + "model_forward_time": 0.025263071060180664, + "step": 13078 + }, + { + "epoch": 1.99554443359375e-05, + "step": 13078, + "training_step_time": 0.10876107215881348 + }, + { + "epoch": 1.995697021484375e-05, + "model_forward_time": 0.025107622146606445, + "step": 13079 + }, + { + "epoch": 1.995697021484375e-05, + "step": 13079, + "training_step_time": 0.10625338554382324 + }, + { + "epoch": 1.995849609375e-05, + "grad_norm": 0.2288821041584015, + "learning_rate": 6.450434435301751e-05, + "loss": 0.0152, + "step": 13080 + }, + { + "epoch": 1.995849609375e-05, + "model_forward_time": 0.025149106979370117, + "step": 13080 + }, + { + "epoch": 1.995849609375e-05, + "step": 13080, + "training_step_time": 0.1066596508026123 + }, + { + "epoch": 1.996002197265625e-05, + "model_forward_time": 0.025597572326660156, + "step": 13081 + }, + { + "epoch": 1.996002197265625e-05, + "step": 13081, + "training_step_time": 0.10601949691772461 + }, + { + "epoch": 1.99615478515625e-05, + "model_forward_time": 0.025180578231811523, + "step": 13082 + }, + { + "epoch": 1.99615478515625e-05, + "step": 13082, + "training_step_time": 0.1060030460357666 + }, + { + "epoch": 1.996307373046875e-05, + "model_forward_time": 0.02520132064819336, + "step": 13083 + }, + { + "epoch": 1.996307373046875e-05, + "step": 13083, + "training_step_time": 0.10643792152404785 + }, + { + "epoch": 1.9964599609375e-05, + "model_forward_time": 0.025391101837158203, + "step": 13084 + }, + { + "epoch": 1.9964599609375e-05, + "step": 13084, + "training_step_time": 0.10547113418579102 + }, + { + "epoch": 1.996612548828125e-05, + "model_forward_time": 0.024657249450683594, + "step": 13085 + }, + { + "epoch": 1.996612548828125e-05, + "step": 13085, + "training_step_time": 0.10745382308959961 + }, + { + "epoch": 1.99676513671875e-05, + "model_forward_time": 0.025775671005249023, + "step": 13086 + }, + { + "epoch": 1.99676513671875e-05, + "step": 13086, + "training_step_time": 0.10675930976867676 + }, + { + "epoch": 1.996917724609375e-05, + "model_forward_time": 0.027628660202026367, + "step": 13087 + }, + { + "epoch": 1.996917724609375e-05, + "step": 13087, + "training_step_time": 0.1089470386505127 + }, + { + "epoch": 1.9970703125e-05, + "model_forward_time": 0.02516961097717285, + "step": 13088 + }, + { + "epoch": 1.9970703125e-05, + "step": 13088, + "training_step_time": 0.11188769340515137 + }, + { + "epoch": 1.997222900390625e-05, + "model_forward_time": 0.025101661682128906, + "step": 13089 + }, + { + "epoch": 1.997222900390625e-05, + "step": 13089, + "training_step_time": 0.10859203338623047 + }, + { + "epoch": 1.99737548828125e-05, + "grad_norm": 0.27786651253700256, + "learning_rate": 6.445158984722358e-05, + "loss": 0.0162, + "step": 13090 + }, + { + "epoch": 1.99737548828125e-05, + "model_forward_time": 0.025038480758666992, + "step": 13090 + }, + { + "epoch": 1.99737548828125e-05, + "step": 13090, + "training_step_time": 0.11224102973937988 + }, + { + "epoch": 1.997528076171875e-05, + "model_forward_time": 0.02498149871826172, + "step": 13091 + }, + { + "epoch": 1.997528076171875e-05, + "step": 13091, + "training_step_time": 0.1072235107421875 + }, + { + "epoch": 1.9976806640625e-05, + "model_forward_time": 0.0250091552734375, + "step": 13092 + }, + { + "epoch": 1.9976806640625e-05, + "step": 13092, + "training_step_time": 0.11084389686584473 + }, + { + "epoch": 1.997833251953125e-05, + "model_forward_time": 0.024289846420288086, + "step": 13093 + }, + { + "epoch": 1.997833251953125e-05, + "step": 13093, + "training_step_time": 0.1074521541595459 + }, + { + "epoch": 1.99798583984375e-05, + "model_forward_time": 0.024430036544799805, + "step": 13094 + }, + { + "epoch": 1.99798583984375e-05, + "step": 13094, + "training_step_time": 0.18610835075378418 + }, + { + "epoch": 1.998138427734375e-05, + "model_forward_time": 0.025235891342163086, + "step": 13095 + }, + { + "epoch": 1.998138427734375e-05, + "step": 13095, + "training_step_time": 0.11112117767333984 + }, + { + "epoch": 1.998291015625e-05, + "model_forward_time": 0.024611949920654297, + "step": 13096 + }, + { + "epoch": 1.998291015625e-05, + "step": 13096, + "training_step_time": 0.11307859420776367 + }, + { + "epoch": 1.998443603515625e-05, + "model_forward_time": 0.02549433708190918, + "step": 13097 + }, + { + "epoch": 1.998443603515625e-05, + "step": 13097, + "training_step_time": 0.11691522598266602 + }, + { + "epoch": 1.99859619140625e-05, + "model_forward_time": 0.025487661361694336, + "step": 13098 + }, + { + "epoch": 1.99859619140625e-05, + "step": 13098, + "training_step_time": 0.12632012367248535 + }, + { + "epoch": 1.998748779296875e-05, + "model_forward_time": 0.025876998901367188, + "step": 13099 + }, + { + "epoch": 1.998748779296875e-05, + "step": 13099, + "training_step_time": 0.11048626899719238 + }, + { + "epoch": 1.9989013671875e-05, + "grad_norm": 0.24237234890460968, + "learning_rate": 6.439881778138531e-05, + "loss": 0.0184, + "step": 13100 + }, + { + "epoch": 1.9989013671875e-05, + "model_forward_time": 0.025295257568359375, + "step": 13100 + }, + { + "epoch": 1.9989013671875e-05, + "step": 13100, + "training_step_time": 0.16676878929138184 + }, + { + "epoch": 1.999053955078125e-05, + "model_forward_time": 0.024645566940307617, + "step": 13101 + }, + { + "epoch": 1.999053955078125e-05, + "step": 13101, + "training_step_time": 0.17843270301818848 + }, + { + "epoch": 1.99920654296875e-05, + "model_forward_time": 0.025000810623168945, + "step": 13102 + }, + { + "epoch": 1.99920654296875e-05, + "step": 13102, + "training_step_time": 0.19494295120239258 + }, + { + "epoch": 1.999359130859375e-05, + "model_forward_time": 0.024909257888793945, + "step": 13103 + }, + { + "epoch": 1.999359130859375e-05, + "step": 13103, + "training_step_time": 0.15199995040893555 + }, + { + "epoch": 1.99951171875e-05, + "model_forward_time": 0.02503228187561035, + "step": 13104 + }, + { + "epoch": 1.99951171875e-05, + "step": 13104, + "training_step_time": 0.16281890869140625 + }, + { + "epoch": 1.999664306640625e-05, + "model_forward_time": 0.024399518966674805, + "step": 13105 + }, + { + "epoch": 1.999664306640625e-05, + "step": 13105, + "training_step_time": 0.1760883331298828 + }, + { + "epoch": 1.99981689453125e-05, + "model_forward_time": 0.024824142456054688, + "step": 13106 + }, + { + "epoch": 1.99981689453125e-05, + "step": 13106, + "training_step_time": 0.10220789909362793 + }, + { + "epoch": 1.999969482421875e-05, + "model_forward_time": 0.024693727493286133, + "step": 13107 + }, + { + "epoch": 1.999969482421875e-05, + "step": 13107, + "training_step_time": 0.10187840461730957 + }, + { + "epoch": 2.0001220703125e-05, + "model_forward_time": 0.025020599365234375, + "step": 13108 + }, + { + "epoch": 2.0001220703125e-05, + "step": 13108, + "training_step_time": 0.10901093482971191 + }, + { + "epoch": 2.000274658203125e-05, + "model_forward_time": 0.025329113006591797, + "step": 13109 + }, + { + "epoch": 2.000274658203125e-05, + "step": 13109, + "training_step_time": 0.10388827323913574 + }, + { + "epoch": 2.00042724609375e-05, + "grad_norm": 0.37235501408576965, + "learning_rate": 6.43460282196257e-05, + "loss": 0.0128, + "step": 13110 + }, + { + "epoch": 2.00042724609375e-05, + "model_forward_time": 0.025379657745361328, + "step": 13110 + }, + { + "epoch": 2.00042724609375e-05, + "step": 13110, + "training_step_time": 0.19805121421813965 + }, + { + "epoch": 2.000579833984375e-05, + "model_forward_time": 0.024512767791748047, + "step": 13111 + }, + { + "epoch": 2.000579833984375e-05, + "step": 13111, + "training_step_time": 0.1289660930633545 + }, + { + "epoch": 2.000732421875e-05, + "model_forward_time": 0.024247169494628906, + "step": 13112 + }, + { + "epoch": 2.000732421875e-05, + "step": 13112, + "training_step_time": 0.1319580078125 + }, + { + "epoch": 2.000885009765625e-05, + "model_forward_time": 0.024625539779663086, + "step": 13113 + }, + { + "epoch": 2.000885009765625e-05, + "step": 13113, + "training_step_time": 0.13284516334533691 + }, + { + "epoch": 2.00103759765625e-05, + "model_forward_time": 0.024773120880126953, + "step": 13114 + }, + { + "epoch": 2.00103759765625e-05, + "step": 13114, + "training_step_time": 0.11490368843078613 + }, + { + "epoch": 2.001190185546875e-05, + "model_forward_time": 0.025012969970703125, + "step": 13115 + }, + { + "epoch": 2.001190185546875e-05, + "step": 13115, + "training_step_time": 0.13335585594177246 + }, + { + "epoch": 2.0013427734375e-05, + "model_forward_time": 0.025088787078857422, + "step": 13116 + }, + { + "epoch": 2.0013427734375e-05, + "step": 13116, + "training_step_time": 0.10246515274047852 + }, + { + "epoch": 2.001495361328125e-05, + "model_forward_time": 0.02521228790283203, + "step": 13117 + }, + { + "epoch": 2.001495361328125e-05, + "step": 13117, + "training_step_time": 0.10532665252685547 + }, + { + "epoch": 2.00164794921875e-05, + "model_forward_time": 0.025324583053588867, + "step": 13118 + }, + { + "epoch": 2.00164794921875e-05, + "step": 13118, + "training_step_time": 0.10660457611083984 + }, + { + "epoch": 2.001800537109375e-05, + "model_forward_time": 0.0253603458404541, + "step": 13119 + }, + { + "epoch": 2.001800537109375e-05, + "step": 13119, + "training_step_time": 0.1057138442993164 + }, + { + "epoch": 2.001953125e-05, + "grad_norm": 0.3438621759414673, + "learning_rate": 6.42932212260891e-05, + "loss": 0.0112, + "step": 13120 + }, + { + "epoch": 2.001953125e-05, + "model_forward_time": 0.025230884552001953, + "step": 13120 + }, + { + "epoch": 2.001953125e-05, + "step": 13120, + "training_step_time": 0.10791587829589844 + }, + { + "epoch": 2.002105712890625e-05, + "model_forward_time": 0.02501988410949707, + "step": 13121 + }, + { + "epoch": 2.002105712890625e-05, + "step": 13121, + "training_step_time": 0.10616445541381836 + }, + { + "epoch": 2.00225830078125e-05, + "model_forward_time": 0.02545952796936035, + "step": 13122 + }, + { + "epoch": 2.00225830078125e-05, + "step": 13122, + "training_step_time": 0.1059412956237793 + }, + { + "epoch": 2.002410888671875e-05, + "model_forward_time": 0.02530694007873535, + "step": 13123 + }, + { + "epoch": 2.002410888671875e-05, + "step": 13123, + "training_step_time": 0.1060936450958252 + }, + { + "epoch": 2.0025634765625e-05, + "model_forward_time": 0.025653362274169922, + "step": 13124 + }, + { + "epoch": 2.0025634765625e-05, + "step": 13124, + "training_step_time": 0.10892033576965332 + }, + { + "epoch": 2.002716064453125e-05, + "model_forward_time": 0.02498483657836914, + "step": 13125 + }, + { + "epoch": 2.002716064453125e-05, + "step": 13125, + "training_step_time": 0.10973548889160156 + }, + { + "epoch": 2.00286865234375e-05, + "model_forward_time": 0.025124073028564453, + "step": 13126 + }, + { + "epoch": 2.00286865234375e-05, + "step": 13126, + "training_step_time": 0.10728907585144043 + }, + { + "epoch": 2.003021240234375e-05, + "model_forward_time": 0.0252988338470459, + "step": 13127 + }, + { + "epoch": 2.003021240234375e-05, + "step": 13127, + "training_step_time": 0.10434389114379883 + }, + { + "epoch": 2.003173828125e-05, + "model_forward_time": 0.025461912155151367, + "step": 13128 + }, + { + "epoch": 2.003173828125e-05, + "step": 13128, + "training_step_time": 0.10489439964294434 + }, + { + "epoch": 2.003326416015625e-05, + "model_forward_time": 0.025616884231567383, + "step": 13129 + }, + { + "epoch": 2.003326416015625e-05, + "step": 13129, + "training_step_time": 0.10916566848754883 + }, + { + "epoch": 2.00347900390625e-05, + "grad_norm": 0.5092164278030396, + "learning_rate": 6.42403968649409e-05, + "loss": 0.0202, + "step": 13130 + }, + { + "epoch": 2.00347900390625e-05, + "model_forward_time": 0.025549650192260742, + "step": 13130 + }, + { + "epoch": 2.00347900390625e-05, + "step": 13130, + "training_step_time": 0.1053159236907959 + }, + { + "epoch": 2.003631591796875e-05, + "model_forward_time": 0.025358200073242188, + "step": 13131 + }, + { + "epoch": 2.003631591796875e-05, + "step": 13131, + "training_step_time": 0.1114804744720459 + }, + { + "epoch": 2.0037841796875e-05, + "model_forward_time": 0.02514958381652832, + "step": 13132 + }, + { + "epoch": 2.0037841796875e-05, + "step": 13132, + "training_step_time": 0.10991573333740234 + }, + { + "epoch": 2.003936767578125e-05, + "model_forward_time": 0.02521491050720215, + "step": 13133 + }, + { + "epoch": 2.003936767578125e-05, + "step": 13133, + "training_step_time": 0.10701608657836914 + }, + { + "epoch": 2.00408935546875e-05, + "model_forward_time": 0.026064157485961914, + "step": 13134 + }, + { + "epoch": 2.00408935546875e-05, + "step": 13134, + "training_step_time": 0.10944151878356934 + }, + { + "epoch": 2.004241943359375e-05, + "model_forward_time": 0.02555084228515625, + "step": 13135 + }, + { + "epoch": 2.004241943359375e-05, + "step": 13135, + "training_step_time": 0.1080636978149414 + }, + { + "epoch": 2.00439453125e-05, + "model_forward_time": 0.02543044090270996, + "step": 13136 + }, + { + "epoch": 2.00439453125e-05, + "step": 13136, + "training_step_time": 0.10766053199768066 + }, + { + "epoch": 2.004547119140625e-05, + "model_forward_time": 0.02535414695739746, + "step": 13137 + }, + { + "epoch": 2.004547119140625e-05, + "step": 13137, + "training_step_time": 0.10640931129455566 + }, + { + "epoch": 2.00469970703125e-05, + "model_forward_time": 0.02507495880126953, + "step": 13138 + }, + { + "epoch": 2.00469970703125e-05, + "step": 13138, + "training_step_time": 0.10819196701049805 + }, + { + "epoch": 2.004852294921875e-05, + "model_forward_time": 0.025016069412231445, + "step": 13139 + }, + { + "epoch": 2.004852294921875e-05, + "step": 13139, + "training_step_time": 0.135850191116333 + }, + { + "epoch": 2.0050048828125e-05, + "grad_norm": 0.23394645750522614, + "learning_rate": 6.418755520036775e-05, + "loss": 0.0196, + "step": 13140 + }, + { + "epoch": 2.0050048828125e-05, + "model_forward_time": 0.025213956832885742, + "step": 13140 + }, + { + "epoch": 2.0050048828125e-05, + "step": 13140, + "training_step_time": 0.21664166450500488 + }, + { + "epoch": 2.005157470703125e-05, + "model_forward_time": 0.024566173553466797, + "step": 13141 + }, + { + "epoch": 2.005157470703125e-05, + "step": 13141, + "training_step_time": 0.15531229972839355 + }, + { + "epoch": 2.00531005859375e-05, + "model_forward_time": 0.023367881774902344, + "step": 13142 + }, + { + "epoch": 2.00531005859375e-05, + "step": 13142, + "training_step_time": 0.1949901580810547 + }, + { + "epoch": 2.005462646484375e-05, + "model_forward_time": 0.024398088455200195, + "step": 13143 + }, + { + "epoch": 2.005462646484375e-05, + "step": 13143, + "training_step_time": 0.14475178718566895 + }, + { + "epoch": 2.005615234375e-05, + "model_forward_time": 0.024286985397338867, + "step": 13144 + }, + { + "epoch": 2.005615234375e-05, + "step": 13144, + "training_step_time": 0.1433866024017334 + }, + { + "epoch": 2.005767822265625e-05, + "model_forward_time": 0.025110244750976562, + "step": 13145 + }, + { + "epoch": 2.005767822265625e-05, + "step": 13145, + "training_step_time": 0.1800978183746338 + }, + { + "epoch": 2.00592041015625e-05, + "model_forward_time": 0.025079011917114258, + "step": 13146 + }, + { + "epoch": 2.00592041015625e-05, + "step": 13146, + "training_step_time": 0.1626269817352295 + }, + { + "epoch": 2.006072998046875e-05, + "model_forward_time": 0.023705005645751953, + "step": 13147 + }, + { + "epoch": 2.006072998046875e-05, + "step": 13147, + "training_step_time": 0.17360520362854004 + }, + { + "epoch": 2.0062255859375e-05, + "model_forward_time": 0.02312779426574707, + "step": 13148 + }, + { + "epoch": 2.0062255859375e-05, + "step": 13148, + "training_step_time": 0.17927098274230957 + }, + { + "epoch": 2.006378173828125e-05, + "model_forward_time": 0.02369093894958496, + "step": 13149 + }, + { + "epoch": 2.006378173828125e-05, + "step": 13149, + "training_step_time": 0.14154863357543945 + }, + { + "epoch": 2.00653076171875e-05, + "grad_norm": 0.42746293544769287, + "learning_rate": 6.413469629657723e-05, + "loss": 0.02, + "step": 13150 + }, + { + "epoch": 2.00653076171875e-05, + "model_forward_time": 0.023860692977905273, + "step": 13150 + }, + { + "epoch": 2.00653076171875e-05, + "step": 13150, + "training_step_time": 0.1055762767791748 + }, + { + "epoch": 2.006683349609375e-05, + "model_forward_time": 0.02430438995361328, + "step": 13151 + }, + { + "epoch": 2.006683349609375e-05, + "step": 13151, + "training_step_time": 0.10465550422668457 + }, + { + "epoch": 2.0068359375e-05, + "model_forward_time": 0.027730226516723633, + "step": 13152 + }, + { + "epoch": 2.0068359375e-05, + "step": 13152, + "training_step_time": 0.10910868644714355 + }, + { + "epoch": 2.006988525390625e-05, + "model_forward_time": 0.024867534637451172, + "step": 13153 + }, + { + "epoch": 2.006988525390625e-05, + "step": 13153, + "training_step_time": 0.10611414909362793 + }, + { + "epoch": 2.00714111328125e-05, + "model_forward_time": 0.02453923225402832, + "step": 13154 + }, + { + "epoch": 2.00714111328125e-05, + "step": 13154, + "training_step_time": 0.10519623756408691 + }, + { + "epoch": 2.007293701171875e-05, + "model_forward_time": 0.024723052978515625, + "step": 13155 + }, + { + "epoch": 2.007293701171875e-05, + "step": 13155, + "training_step_time": 0.20570826530456543 + }, + { + "epoch": 2.0074462890625e-05, + "model_forward_time": 0.023334741592407227, + "step": 13156 + }, + { + "epoch": 2.0074462890625e-05, + "step": 13156, + "training_step_time": 0.12289237976074219 + }, + { + "epoch": 2.007598876953125e-05, + "model_forward_time": 0.023724794387817383, + "step": 13157 + }, + { + "epoch": 2.007598876953125e-05, + "step": 13157, + "training_step_time": 0.12497711181640625 + }, + { + "epoch": 2.00775146484375e-05, + "model_forward_time": 0.024933338165283203, + "step": 13158 + }, + { + "epoch": 2.00775146484375e-05, + "step": 13158, + "training_step_time": 0.10424065589904785 + }, + { + "epoch": 2.007904052734375e-05, + "model_forward_time": 0.02471470832824707, + "step": 13159 + }, + { + "epoch": 2.007904052734375e-05, + "step": 13159, + "training_step_time": 0.15917348861694336 + }, + { + "epoch": 2.008056640625e-05, + "grad_norm": 0.20946824550628662, + "learning_rate": 6.408182021779791e-05, + "loss": 0.0104, + "step": 13160 + }, + { + "epoch": 2.008056640625e-05, + "model_forward_time": 0.024190425872802734, + "step": 13160 + }, + { + "epoch": 2.008056640625e-05, + "step": 13160, + "training_step_time": 0.12352418899536133 + }, + { + "epoch": 2.008209228515625e-05, + "model_forward_time": 0.0235750675201416, + "step": 13161 + }, + { + "epoch": 2.008209228515625e-05, + "step": 13161, + "training_step_time": 0.10820388793945312 + }, + { + "epoch": 2.00836181640625e-05, + "model_forward_time": 0.02458810806274414, + "step": 13162 + }, + { + "epoch": 2.00836181640625e-05, + "step": 13162, + "training_step_time": 0.10415434837341309 + }, + { + "epoch": 2.008514404296875e-05, + "model_forward_time": 0.02469635009765625, + "step": 13163 + }, + { + "epoch": 2.008514404296875e-05, + "step": 13163, + "training_step_time": 0.10478973388671875 + }, + { + "epoch": 2.0086669921875e-05, + "model_forward_time": 0.024752378463745117, + "step": 13164 + }, + { + "epoch": 2.0086669921875e-05, + "step": 13164, + "training_step_time": 0.10426998138427734 + }, + { + "epoch": 2.008819580078125e-05, + "model_forward_time": 0.02406787872314453, + "step": 13165 + }, + { + "epoch": 2.008819580078125e-05, + "step": 13165, + "training_step_time": 0.10357475280761719 + }, + { + "epoch": 2.00897216796875e-05, + "model_forward_time": 0.02452874183654785, + "step": 13166 + }, + { + "epoch": 2.00897216796875e-05, + "step": 13166, + "training_step_time": 0.10468626022338867 + }, + { + "epoch": 2.009124755859375e-05, + "model_forward_time": 0.0242156982421875, + "step": 13167 + }, + { + "epoch": 2.009124755859375e-05, + "step": 13167, + "training_step_time": 0.10726714134216309 + }, + { + "epoch": 2.00927734375e-05, + "model_forward_time": 0.024643659591674805, + "step": 13168 + }, + { + "epoch": 2.00927734375e-05, + "step": 13168, + "training_step_time": 0.10453391075134277 + }, + { + "epoch": 2.009429931640625e-05, + "model_forward_time": 0.02452850341796875, + "step": 13169 + }, + { + "epoch": 2.009429931640625e-05, + "step": 13169, + "training_step_time": 0.10692954063415527 + }, + { + "epoch": 2.00958251953125e-05, + "grad_norm": 0.13432158529758453, + "learning_rate": 6.402892702827916e-05, + "loss": 0.0161, + "step": 13170 + }, + { + "epoch": 2.00958251953125e-05, + "model_forward_time": 0.024339914321899414, + "step": 13170 + }, + { + "epoch": 2.00958251953125e-05, + "step": 13170, + "training_step_time": 0.10939669609069824 + }, + { + "epoch": 2.009735107421875e-05, + "model_forward_time": 0.024614334106445312, + "step": 13171 + }, + { + "epoch": 2.009735107421875e-05, + "step": 13171, + "training_step_time": 0.10586047172546387 + }, + { + "epoch": 2.0098876953125e-05, + "model_forward_time": 0.02441263198852539, + "step": 13172 + }, + { + "epoch": 2.0098876953125e-05, + "step": 13172, + "training_step_time": 0.10708117485046387 + }, + { + "epoch": 2.010040283203125e-05, + "model_forward_time": 0.024289369583129883, + "step": 13173 + }, + { + "epoch": 2.010040283203125e-05, + "step": 13173, + "training_step_time": 0.1089174747467041 + }, + { + "epoch": 2.01019287109375e-05, + "model_forward_time": 0.02441692352294922, + "step": 13174 + }, + { + "epoch": 2.01019287109375e-05, + "step": 13174, + "training_step_time": 0.11598014831542969 + }, + { + "epoch": 2.010345458984375e-05, + "model_forward_time": 0.02412581443786621, + "step": 13175 + }, + { + "epoch": 2.010345458984375e-05, + "step": 13175, + "training_step_time": 0.112091064453125 + }, + { + "epoch": 2.010498046875e-05, + "model_forward_time": 0.028087615966796875, + "step": 13176 + }, + { + "epoch": 2.010498046875e-05, + "step": 13176, + "training_step_time": 0.11748075485229492 + }, + { + "epoch": 2.010650634765625e-05, + "model_forward_time": 0.02450084686279297, + "step": 13177 + }, + { + "epoch": 2.010650634765625e-05, + "step": 13177, + "training_step_time": 0.10859346389770508 + }, + { + "epoch": 2.01080322265625e-05, + "model_forward_time": 0.024389266967773438, + "step": 13178 + }, + { + "epoch": 2.01080322265625e-05, + "step": 13178, + "training_step_time": 0.1066434383392334 + }, + { + "epoch": 2.010955810546875e-05, + "model_forward_time": 0.024564743041992188, + "step": 13179 + }, + { + "epoch": 2.010955810546875e-05, + "step": 13179, + "training_step_time": 0.10689163208007812 + }, + { + "epoch": 2.0111083984375e-05, + "grad_norm": 0.23071473836898804, + "learning_rate": 6.397601679229126e-05, + "loss": 0.0166, + "step": 13180 + }, + { + "epoch": 2.0111083984375e-05, + "model_forward_time": 0.024986982345581055, + "step": 13180 + }, + { + "epoch": 2.0111083984375e-05, + "step": 13180, + "training_step_time": 0.1074364185333252 + }, + { + "epoch": 2.011260986328125e-05, + "model_forward_time": 0.024399995803833008, + "step": 13181 + }, + { + "epoch": 2.011260986328125e-05, + "step": 13181, + "training_step_time": 0.10699701309204102 + }, + { + "epoch": 2.01141357421875e-05, + "model_forward_time": 0.024799108505249023, + "step": 13182 + }, + { + "epoch": 2.01141357421875e-05, + "step": 13182, + "training_step_time": 0.1033930778503418 + }, + { + "epoch": 2.011566162109375e-05, + "model_forward_time": 0.02440667152404785, + "step": 13183 + }, + { + "epoch": 2.011566162109375e-05, + "step": 13183, + "training_step_time": 0.10511016845703125 + }, + { + "epoch": 2.01171875e-05, + "model_forward_time": 0.02459883689880371, + "step": 13184 + }, + { + "epoch": 2.01171875e-05, + "step": 13184, + "training_step_time": 0.10967278480529785 + }, + { + "epoch": 2.011871337890625e-05, + "model_forward_time": 0.0257260799407959, + "step": 13185 + }, + { + "epoch": 2.011871337890625e-05, + "step": 13185, + "training_step_time": 0.1060800552368164 + }, + { + "epoch": 2.01202392578125e-05, + "model_forward_time": 0.025269746780395508, + "step": 13186 + }, + { + "epoch": 2.01202392578125e-05, + "step": 13186, + "training_step_time": 0.12652015686035156 + }, + { + "epoch": 2.012176513671875e-05, + "model_forward_time": 0.025461673736572266, + "step": 13187 + }, + { + "epoch": 2.012176513671875e-05, + "step": 13187, + "training_step_time": 0.10809564590454102 + }, + { + "epoch": 2.0123291015625e-05, + "model_forward_time": 0.025513887405395508, + "step": 13188 + }, + { + "epoch": 2.0123291015625e-05, + "step": 13188, + "training_step_time": 0.11003375053405762 + }, + { + "epoch": 2.012481689453125e-05, + "model_forward_time": 0.02524089813232422, + "step": 13189 + }, + { + "epoch": 2.012481689453125e-05, + "step": 13189, + "training_step_time": 0.12632155418395996 + }, + { + "epoch": 2.01263427734375e-05, + "grad_norm": 0.1875467747449875, + "learning_rate": 6.39230895741251e-05, + "loss": 0.011, + "step": 13190 + }, + { + "epoch": 2.01263427734375e-05, + "model_forward_time": 0.025649070739746094, + "step": 13190 + }, + { + "epoch": 2.01263427734375e-05, + "step": 13190, + "training_step_time": 0.11919975280761719 + }, + { + "epoch": 2.012786865234375e-05, + "model_forward_time": 0.02534008026123047, + "step": 13191 + }, + { + "epoch": 2.012786865234375e-05, + "step": 13191, + "training_step_time": 0.11327195167541504 + }, + { + "epoch": 2.012939453125e-05, + "model_forward_time": 0.025333642959594727, + "step": 13192 + }, + { + "epoch": 2.012939453125e-05, + "step": 13192, + "training_step_time": 0.1025545597076416 + }, + { + "epoch": 2.013092041015625e-05, + "model_forward_time": 0.023714542388916016, + "step": 13193 + }, + { + "epoch": 2.013092041015625e-05, + "step": 13193, + "training_step_time": 0.1510303020477295 + }, + { + "epoch": 2.01324462890625e-05, + "model_forward_time": 0.024546384811401367, + "step": 13194 + }, + { + "epoch": 2.01324462890625e-05, + "step": 13194, + "training_step_time": 0.15325355529785156 + }, + { + "epoch": 2.013397216796875e-05, + "model_forward_time": 0.025468826293945312, + "step": 13195 + }, + { + "epoch": 2.013397216796875e-05, + "step": 13195, + "training_step_time": 0.14933514595031738 + }, + { + "epoch": 2.0135498046875e-05, + "model_forward_time": 0.02483201026916504, + "step": 13196 + }, + { + "epoch": 2.0135498046875e-05, + "step": 13196, + "training_step_time": 0.11266231536865234 + }, + { + "epoch": 2.013702392578125e-05, + "model_forward_time": 0.025164127349853516, + "step": 13197 + }, + { + "epoch": 2.013702392578125e-05, + "step": 13197, + "training_step_time": 0.13440418243408203 + }, + { + "epoch": 2.01385498046875e-05, + "model_forward_time": 0.024837732315063477, + "step": 13198 + }, + { + "epoch": 2.01385498046875e-05, + "step": 13198, + "training_step_time": 0.12651562690734863 + }, + { + "epoch": 2.014007568359375e-05, + "model_forward_time": 0.02491021156311035, + "step": 13199 + }, + { + "epoch": 2.014007568359375e-05, + "step": 13199, + "training_step_time": 0.11913371086120605 + }, + { + "epoch": 2.01416015625e-05, + "grad_norm": 0.4467898905277252, + "learning_rate": 6.387014543809223e-05, + "loss": 0.0182, + "step": 13200 + }, + { + "epoch": 2.01416015625e-05, + "model_forward_time": 0.025460004806518555, + "step": 13200 + }, + { + "epoch": 2.01416015625e-05, + "step": 13200, + "training_step_time": 0.12027096748352051 + }, + { + "epoch": 2.014312744140625e-05, + "model_forward_time": 0.02543497085571289, + "step": 13201 + }, + { + "epoch": 2.014312744140625e-05, + "step": 13201, + "training_step_time": 0.11974430084228516 + }, + { + "epoch": 2.01446533203125e-05, + "model_forward_time": 0.025207996368408203, + "step": 13202 + }, + { + "epoch": 2.01446533203125e-05, + "step": 13202, + "training_step_time": 0.1846599578857422 + }, + { + "epoch": 2.014617919921875e-05, + "model_forward_time": 0.024797439575195312, + "step": 13203 + }, + { + "epoch": 2.014617919921875e-05, + "step": 13203, + "training_step_time": 0.11417365074157715 + }, + { + "epoch": 2.0147705078125e-05, + "model_forward_time": 0.024991750717163086, + "step": 13204 + }, + { + "epoch": 2.0147705078125e-05, + "step": 13204, + "training_step_time": 0.10914206504821777 + }, + { + "epoch": 2.014923095703125e-05, + "model_forward_time": 0.02550816535949707, + "step": 13205 + }, + { + "epoch": 2.014923095703125e-05, + "step": 13205, + "training_step_time": 0.12216520309448242 + }, + { + "epoch": 2.01507568359375e-05, + "model_forward_time": 0.02749490737915039, + "step": 13206 + }, + { + "epoch": 2.01507568359375e-05, + "step": 13206, + "training_step_time": 0.10899519920349121 + }, + { + "epoch": 2.015228271484375e-05, + "model_forward_time": 0.02510833740234375, + "step": 13207 + }, + { + "epoch": 2.015228271484375e-05, + "step": 13207, + "training_step_time": 0.1255331039428711 + }, + { + "epoch": 2.015380859375e-05, + "model_forward_time": 0.025784015655517578, + "step": 13208 + }, + { + "epoch": 2.015380859375e-05, + "step": 13208, + "training_step_time": 0.11221504211425781 + }, + { + "epoch": 2.015533447265625e-05, + "model_forward_time": 0.025271892547607422, + "step": 13209 + }, + { + "epoch": 2.015533447265625e-05, + "step": 13209, + "training_step_time": 0.1057744026184082 + }, + { + "epoch": 2.01568603515625e-05, + "grad_norm": 0.2989524006843567, + "learning_rate": 6.38171844485248e-05, + "loss": 0.0084, + "step": 13210 + }, + { + "epoch": 2.01568603515625e-05, + "model_forward_time": 0.025249242782592773, + "step": 13210 + }, + { + "epoch": 2.01568603515625e-05, + "step": 13210, + "training_step_time": 0.1072232723236084 + }, + { + "epoch": 2.015838623046875e-05, + "model_forward_time": 0.02526116371154785, + "step": 13211 + }, + { + "epoch": 2.015838623046875e-05, + "step": 13211, + "training_step_time": 0.10965657234191895 + }, + { + "epoch": 2.0159912109375e-05, + "model_forward_time": 0.02570962905883789, + "step": 13212 + }, + { + "epoch": 2.0159912109375e-05, + "step": 13212, + "training_step_time": 0.10574460029602051 + }, + { + "epoch": 2.016143798828125e-05, + "model_forward_time": 0.024075031280517578, + "step": 13213 + }, + { + "epoch": 2.016143798828125e-05, + "step": 13213, + "training_step_time": 0.10756444931030273 + }, + { + "epoch": 2.01629638671875e-05, + "model_forward_time": 0.024388551712036133, + "step": 13214 + }, + { + "epoch": 2.01629638671875e-05, + "step": 13214, + "training_step_time": 0.10751819610595703 + }, + { + "epoch": 2.016448974609375e-05, + "model_forward_time": 0.0251007080078125, + "step": 13215 + }, + { + "epoch": 2.016448974609375e-05, + "step": 13215, + "training_step_time": 0.1071474552154541 + }, + { + "epoch": 2.0166015625e-05, + "model_forward_time": 0.02499699592590332, + "step": 13216 + }, + { + "epoch": 2.0166015625e-05, + "step": 13216, + "training_step_time": 0.10649704933166504 + }, + { + "epoch": 2.016754150390625e-05, + "model_forward_time": 0.02580571174621582, + "step": 13217 + }, + { + "epoch": 2.016754150390625e-05, + "step": 13217, + "training_step_time": 0.10537600517272949 + }, + { + "epoch": 2.01690673828125e-05, + "model_forward_time": 0.025492429733276367, + "step": 13218 + }, + { + "epoch": 2.01690673828125e-05, + "step": 13218, + "training_step_time": 0.10483741760253906 + }, + { + "epoch": 2.017059326171875e-05, + "model_forward_time": 0.025397062301635742, + "step": 13219 + }, + { + "epoch": 2.017059326171875e-05, + "step": 13219, + "training_step_time": 0.1060481071472168 + }, + { + "epoch": 2.0172119140625e-05, + "grad_norm": 0.2838421165943146, + "learning_rate": 6.376420666977538e-05, + "loss": 0.0117, + "step": 13220 + }, + { + "epoch": 2.0172119140625e-05, + "model_forward_time": 0.02544403076171875, + "step": 13220 + }, + { + "epoch": 2.0172119140625e-05, + "step": 13220, + "training_step_time": 0.10497164726257324 + }, + { + "epoch": 2.017364501953125e-05, + "model_forward_time": 0.02568340301513672, + "step": 13221 + }, + { + "epoch": 2.017364501953125e-05, + "step": 13221, + "training_step_time": 0.10411953926086426 + }, + { + "epoch": 2.01751708984375e-05, + "model_forward_time": 0.025105714797973633, + "step": 13222 + }, + { + "epoch": 2.01751708984375e-05, + "step": 13222, + "training_step_time": 0.10828781127929688 + }, + { + "epoch": 2.017669677734375e-05, + "model_forward_time": 0.027075529098510742, + "step": 13223 + }, + { + "epoch": 2.017669677734375e-05, + "step": 13223, + "training_step_time": 0.7103557586669922 + }, + { + "epoch": 2.017822265625e-05, + "model_forward_time": 0.023116111755371094, + "step": 13224 + }, + { + "epoch": 2.017822265625e-05, + "step": 13224, + "training_step_time": 0.09629511833190918 + }, + { + "epoch": 2.017974853515625e-05, + "model_forward_time": 0.026126384735107422, + "step": 13225 + }, + { + "epoch": 2.017974853515625e-05, + "step": 13225, + "training_step_time": 0.10521626472473145 + }, + { + "epoch": 2.01812744140625e-05, + "model_forward_time": 0.02551412582397461, + "step": 13226 + }, + { + "epoch": 2.01812744140625e-05, + "step": 13226, + "training_step_time": 0.10938262939453125 + }, + { + "epoch": 2.018280029296875e-05, + "model_forward_time": 0.026064634323120117, + "step": 13227 + }, + { + "epoch": 2.018280029296875e-05, + "step": 13227, + "training_step_time": 0.10818791389465332 + }, + { + "epoch": 2.0184326171875e-05, + "model_forward_time": 0.024653911590576172, + "step": 13228 + }, + { + "epoch": 2.0184326171875e-05, + "step": 13228, + "training_step_time": 0.10693049430847168 + }, + { + "epoch": 2.018585205078125e-05, + "model_forward_time": 0.024796247482299805, + "step": 13229 + }, + { + "epoch": 2.018585205078125e-05, + "step": 13229, + "training_step_time": 0.18159723281860352 + }, + { + "epoch": 2.01873779296875e-05, + "grad_norm": 0.4310348927974701, + "learning_rate": 6.371121216621698e-05, + "loss": 0.0168, + "step": 13230 + }, + { + "epoch": 2.01873779296875e-05, + "model_forward_time": 0.02542877197265625, + "step": 13230 + }, + { + "epoch": 2.01873779296875e-05, + "step": 13230, + "training_step_time": 0.10765886306762695 + }, + { + "epoch": 2.018890380859375e-05, + "model_forward_time": 0.02446746826171875, + "step": 13231 + }, + { + "epoch": 2.018890380859375e-05, + "step": 13231, + "training_step_time": 0.10975313186645508 + }, + { + "epoch": 2.01904296875e-05, + "model_forward_time": 0.024953126907348633, + "step": 13232 + }, + { + "epoch": 2.01904296875e-05, + "step": 13232, + "training_step_time": 0.12446308135986328 + }, + { + "epoch": 2.019195556640625e-05, + "model_forward_time": 0.025537967681884766, + "step": 13233 + }, + { + "epoch": 2.019195556640625e-05, + "step": 13233, + "training_step_time": 0.1285398006439209 + }, + { + "epoch": 2.01934814453125e-05, + "model_forward_time": 0.027690649032592773, + "step": 13234 + }, + { + "epoch": 2.01934814453125e-05, + "step": 13234, + "training_step_time": 0.1398179531097412 + }, + { + "epoch": 2.019500732421875e-05, + "model_forward_time": 0.026032447814941406, + "step": 13235 + }, + { + "epoch": 2.019500732421875e-05, + "step": 13235, + "training_step_time": 0.11769485473632812 + }, + { + "epoch": 2.0196533203125e-05, + "model_forward_time": 0.02501058578491211, + "step": 13236 + }, + { + "epoch": 2.0196533203125e-05, + "step": 13236, + "training_step_time": 0.17729401588439941 + }, + { + "epoch": 2.019805908203125e-05, + "model_forward_time": 0.0249631404876709, + "step": 13237 + }, + { + "epoch": 2.019805908203125e-05, + "step": 13237, + "training_step_time": 0.17268109321594238 + }, + { + "epoch": 2.01995849609375e-05, + "model_forward_time": 0.02484726905822754, + "step": 13238 + }, + { + "epoch": 2.01995849609375e-05, + "step": 13238, + "training_step_time": 0.16028761863708496 + }, + { + "epoch": 2.020111083984375e-05, + "model_forward_time": 0.025167226791381836, + "step": 13239 + }, + { + "epoch": 2.020111083984375e-05, + "step": 13239, + "training_step_time": 0.17432355880737305 + }, + { + "epoch": 2.020263671875e-05, + "grad_norm": 0.3545497953891754, + "learning_rate": 6.365820100224292e-05, + "loss": 0.0225, + "step": 13240 + }, + { + "epoch": 2.020263671875e-05, + "model_forward_time": 0.02816939353942871, + "step": 13240 + }, + { + "epoch": 2.020263671875e-05, + "step": 13240, + "training_step_time": 0.12832283973693848 + }, + { + "epoch": 2.020416259765625e-05, + "model_forward_time": 0.024519681930541992, + "step": 13241 + }, + { + "epoch": 2.020416259765625e-05, + "step": 13241, + "training_step_time": 0.12159442901611328 + }, + { + "epoch": 2.02056884765625e-05, + "model_forward_time": 0.025325775146484375, + "step": 13242 + }, + { + "epoch": 2.02056884765625e-05, + "step": 13242, + "training_step_time": 0.1211247444152832 + }, + { + "epoch": 2.020721435546875e-05, + "model_forward_time": 0.025580644607543945, + "step": 13243 + }, + { + "epoch": 2.020721435546875e-05, + "step": 13243, + "training_step_time": 0.1179358959197998 + }, + { + "epoch": 2.0208740234375e-05, + "model_forward_time": 0.024941444396972656, + "step": 13244 + }, + { + "epoch": 2.0208740234375e-05, + "step": 13244, + "training_step_time": 0.1675574779510498 + }, + { + "epoch": 2.021026611328125e-05, + "model_forward_time": 0.02480483055114746, + "step": 13245 + }, + { + "epoch": 2.021026611328125e-05, + "step": 13245, + "training_step_time": 0.11046195030212402 + }, + { + "epoch": 2.02117919921875e-05, + "model_forward_time": 0.024949312210083008, + "step": 13246 + }, + { + "epoch": 2.02117919921875e-05, + "step": 13246, + "training_step_time": 0.2097158432006836 + }, + { + "epoch": 2.021331787109375e-05, + "model_forward_time": 0.024770259857177734, + "step": 13247 + }, + { + "epoch": 2.021331787109375e-05, + "step": 13247, + "training_step_time": 0.1073751449584961 + }, + { + "epoch": 2.021484375e-05, + "model_forward_time": 0.0247802734375, + "step": 13248 + }, + { + "epoch": 2.021484375e-05, + "step": 13248, + "training_step_time": 0.11640620231628418 + }, + { + "epoch": 2.021636962890625e-05, + "model_forward_time": 0.02523040771484375, + "step": 13249 + }, + { + "epoch": 2.021636962890625e-05, + "step": 13249, + "training_step_time": 0.13392162322998047 + }, + { + "epoch": 2.02178955078125e-05, + "grad_norm": 0.4664561152458191, + "learning_rate": 6.360517324226676e-05, + "loss": 0.0128, + "step": 13250 + }, + { + "epoch": 2.02178955078125e-05, + "model_forward_time": 0.025210857391357422, + "step": 13250 + }, + { + "epoch": 2.02178955078125e-05, + "step": 13250, + "training_step_time": 0.11370420455932617 + }, + { + "epoch": 2.021942138671875e-05, + "model_forward_time": 0.025385141372680664, + "step": 13251 + }, + { + "epoch": 2.021942138671875e-05, + "step": 13251, + "training_step_time": 0.1073293685913086 + }, + { + "epoch": 2.0220947265625e-05, + "model_forward_time": 0.025185108184814453, + "step": 13252 + }, + { + "epoch": 2.0220947265625e-05, + "step": 13252, + "training_step_time": 0.10932445526123047 + }, + { + "epoch": 2.022247314453125e-05, + "model_forward_time": 0.02559041976928711, + "step": 13253 + }, + { + "epoch": 2.022247314453125e-05, + "step": 13253, + "training_step_time": 0.10640096664428711 + }, + { + "epoch": 2.02239990234375e-05, + "model_forward_time": 0.024692535400390625, + "step": 13254 + }, + { + "epoch": 2.02239990234375e-05, + "step": 13254, + "training_step_time": 0.10883784294128418 + }, + { + "epoch": 2.022552490234375e-05, + "model_forward_time": 0.026071548461914062, + "step": 13255 + }, + { + "epoch": 2.022552490234375e-05, + "step": 13255, + "training_step_time": 0.10596847534179688 + }, + { + "epoch": 2.022705078125e-05, + "model_forward_time": 0.025192737579345703, + "step": 13256 + }, + { + "epoch": 2.022705078125e-05, + "step": 13256, + "training_step_time": 0.10589456558227539 + }, + { + "epoch": 2.022857666015625e-05, + "model_forward_time": 0.025281190872192383, + "step": 13257 + }, + { + "epoch": 2.022857666015625e-05, + "step": 13257, + "training_step_time": 0.10729265213012695 + }, + { + "epoch": 2.02301025390625e-05, + "model_forward_time": 0.028850317001342773, + "step": 13258 + }, + { + "epoch": 2.02301025390625e-05, + "step": 13258, + "training_step_time": 0.10833501815795898 + }, + { + "epoch": 2.023162841796875e-05, + "model_forward_time": 0.025326967239379883, + "step": 13259 + }, + { + "epoch": 2.023162841796875e-05, + "step": 13259, + "training_step_time": 0.10670685768127441 + }, + { + "epoch": 2.0233154296875e-05, + "grad_norm": 0.48605209589004517, + "learning_rate": 6.355212895072223e-05, + "loss": 0.0154, + "step": 13260 + }, + { + "epoch": 2.0233154296875e-05, + "model_forward_time": 0.026866436004638672, + "step": 13260 + }, + { + "epoch": 2.0233154296875e-05, + "step": 13260, + "training_step_time": 0.10517621040344238 + }, + { + "epoch": 2.023468017578125e-05, + "model_forward_time": 0.025850534439086914, + "step": 13261 + }, + { + "epoch": 2.023468017578125e-05, + "step": 13261, + "training_step_time": 0.10683059692382812 + }, + { + "epoch": 2.02362060546875e-05, + "model_forward_time": 0.02562117576599121, + "step": 13262 + }, + { + "epoch": 2.02362060546875e-05, + "step": 13262, + "training_step_time": 0.10760855674743652 + }, + { + "epoch": 2.023773193359375e-05, + "model_forward_time": 0.02621173858642578, + "step": 13263 + }, + { + "epoch": 2.023773193359375e-05, + "step": 13263, + "training_step_time": 0.12128186225891113 + }, + { + "epoch": 2.02392578125e-05, + "model_forward_time": 0.025330305099487305, + "step": 13264 + }, + { + "epoch": 2.02392578125e-05, + "step": 13264, + "training_step_time": 0.1379563808441162 + }, + { + "epoch": 2.024078369140625e-05, + "model_forward_time": 0.025113582611083984, + "step": 13265 + }, + { + "epoch": 2.024078369140625e-05, + "step": 13265, + "training_step_time": 0.1284654140472412 + }, + { + "epoch": 2.02423095703125e-05, + "model_forward_time": 0.02457118034362793, + "step": 13266 + }, + { + "epoch": 2.02423095703125e-05, + "step": 13266, + "training_step_time": 0.11966109275817871 + }, + { + "epoch": 2.024383544921875e-05, + "model_forward_time": 0.02575230598449707, + "step": 13267 + }, + { + "epoch": 2.024383544921875e-05, + "step": 13267, + "training_step_time": 0.12421107292175293 + }, + { + "epoch": 2.0245361328125e-05, + "model_forward_time": 0.025345563888549805, + "step": 13268 + }, + { + "epoch": 2.0245361328125e-05, + "step": 13268, + "training_step_time": 0.11509895324707031 + }, + { + "epoch": 2.024688720703125e-05, + "model_forward_time": 0.02538013458251953, + "step": 13269 + }, + { + "epoch": 2.024688720703125e-05, + "step": 13269, + "training_step_time": 0.1108243465423584 + }, + { + "epoch": 2.02484130859375e-05, + "grad_norm": 0.20968252420425415, + "learning_rate": 6.349906819206313e-05, + "loss": 0.0154, + "step": 13270 + }, + { + "epoch": 2.02484130859375e-05, + "model_forward_time": 0.024813175201416016, + "step": 13270 + }, + { + "epoch": 2.02484130859375e-05, + "step": 13270, + "training_step_time": 0.1151738166809082 + }, + { + "epoch": 2.024993896484375e-05, + "model_forward_time": 0.02462291717529297, + "step": 13271 + }, + { + "epoch": 2.024993896484375e-05, + "step": 13271, + "training_step_time": 0.11022472381591797 + }, + { + "epoch": 2.025146484375e-05, + "model_forward_time": 0.025240182876586914, + "step": 13272 + }, + { + "epoch": 2.025146484375e-05, + "step": 13272, + "training_step_time": 0.10874605178833008 + }, + { + "epoch": 2.025299072265625e-05, + "model_forward_time": 0.025382041931152344, + "step": 13273 + }, + { + "epoch": 2.025299072265625e-05, + "step": 13273, + "training_step_time": 0.10761761665344238 + }, + { + "epoch": 2.02545166015625e-05, + "model_forward_time": 0.02546215057373047, + "step": 13274 + }, + { + "epoch": 2.02545166015625e-05, + "step": 13274, + "training_step_time": 0.2008051872253418 + }, + { + "epoch": 2.025604248046875e-05, + "model_forward_time": 0.024811267852783203, + "step": 13275 + }, + { + "epoch": 2.025604248046875e-05, + "step": 13275, + "training_step_time": 0.11292719841003418 + }, + { + "epoch": 2.0257568359375e-05, + "model_forward_time": 0.025092601776123047, + "step": 13276 + }, + { + "epoch": 2.0257568359375e-05, + "step": 13276, + "training_step_time": 0.112457275390625 + }, + { + "epoch": 2.025909423828125e-05, + "model_forward_time": 0.025597333908081055, + "step": 13277 + }, + { + "epoch": 2.025909423828125e-05, + "step": 13277, + "training_step_time": 0.11565947532653809 + }, + { + "epoch": 2.02606201171875e-05, + "model_forward_time": 0.028142929077148438, + "step": 13278 + }, + { + "epoch": 2.02606201171875e-05, + "step": 13278, + "training_step_time": 0.13158774375915527 + }, + { + "epoch": 2.026214599609375e-05, + "model_forward_time": 0.025293827056884766, + "step": 13279 + }, + { + "epoch": 2.026214599609375e-05, + "step": 13279, + "training_step_time": 0.22027158737182617 + }, + { + "epoch": 2.0263671875e-05, + "grad_norm": 0.47196853160858154, + "learning_rate": 6.344599103076329e-05, + "loss": 0.0145, + "step": 13280 + }, + { + "epoch": 2.0263671875e-05, + "model_forward_time": 0.025439739227294922, + "step": 13280 + }, + { + "epoch": 2.0263671875e-05, + "step": 13280, + "training_step_time": 0.1041116714477539 + }, + { + "epoch": 2.026519775390625e-05, + "model_forward_time": 0.02497720718383789, + "step": 13281 + }, + { + "epoch": 2.026519775390625e-05, + "step": 13281, + "training_step_time": 0.10393500328063965 + }, + { + "epoch": 2.02667236328125e-05, + "model_forward_time": 0.025792837142944336, + "step": 13282 + }, + { + "epoch": 2.02667236328125e-05, + "step": 13282, + "training_step_time": 0.1177985668182373 + }, + { + "epoch": 2.026824951171875e-05, + "model_forward_time": 0.025362253189086914, + "step": 13283 + }, + { + "epoch": 2.026824951171875e-05, + "step": 13283, + "training_step_time": 0.1530294418334961 + }, + { + "epoch": 2.0269775390625e-05, + "model_forward_time": 0.025170087814331055, + "step": 13284 + }, + { + "epoch": 2.0269775390625e-05, + "step": 13284, + "training_step_time": 0.17938518524169922 + }, + { + "epoch": 2.027130126953125e-05, + "model_forward_time": 0.02484917640686035, + "step": 13285 + }, + { + "epoch": 2.027130126953125e-05, + "step": 13285, + "training_step_time": 0.11914205551147461 + }, + { + "epoch": 2.02728271484375e-05, + "model_forward_time": 0.024804115295410156, + "step": 13286 + }, + { + "epoch": 2.02728271484375e-05, + "step": 13286, + "training_step_time": 0.11117434501647949 + }, + { + "epoch": 2.027435302734375e-05, + "model_forward_time": 0.025751829147338867, + "step": 13287 + }, + { + "epoch": 2.027435302734375e-05, + "step": 13287, + "training_step_time": 0.1049811840057373 + }, + { + "epoch": 2.027587890625e-05, + "model_forward_time": 0.025020122528076172, + "step": 13288 + }, + { + "epoch": 2.027587890625e-05, + "step": 13288, + "training_step_time": 0.10649561882019043 + }, + { + "epoch": 2.027740478515625e-05, + "model_forward_time": 0.02551436424255371, + "step": 13289 + }, + { + "epoch": 2.027740478515625e-05, + "step": 13289, + "training_step_time": 0.10687065124511719 + }, + { + "epoch": 2.02789306640625e-05, + "grad_norm": 0.24185921251773834, + "learning_rate": 6.339289753131649e-05, + "loss": 0.0146, + "step": 13290 + }, + { + "epoch": 2.02789306640625e-05, + "model_forward_time": 0.025755643844604492, + "step": 13290 + }, + { + "epoch": 2.02789306640625e-05, + "step": 13290, + "training_step_time": 0.10640978813171387 + }, + { + "epoch": 2.028045654296875e-05, + "model_forward_time": 0.025796890258789062, + "step": 13291 + }, + { + "epoch": 2.028045654296875e-05, + "step": 13291, + "training_step_time": 0.14030838012695312 + }, + { + "epoch": 2.0281982421875e-05, + "model_forward_time": 0.026367664337158203, + "step": 13292 + }, + { + "epoch": 2.0281982421875e-05, + "step": 13292, + "training_step_time": 0.1271061897277832 + }, + { + "epoch": 2.028350830078125e-05, + "model_forward_time": 0.025219202041625977, + "step": 13293 + }, + { + "epoch": 2.028350830078125e-05, + "step": 13293, + "training_step_time": 0.1328415870666504 + }, + { + "epoch": 2.02850341796875e-05, + "model_forward_time": 0.024710655212402344, + "step": 13294 + }, + { + "epoch": 2.02850341796875e-05, + "step": 13294, + "training_step_time": 0.10700201988220215 + }, + { + "epoch": 2.028656005859375e-05, + "model_forward_time": 0.025164365768432617, + "step": 13295 + }, + { + "epoch": 2.028656005859375e-05, + "step": 13295, + "training_step_time": 0.17856693267822266 + }, + { + "epoch": 2.02880859375e-05, + "model_forward_time": 0.02461695671081543, + "step": 13296 + }, + { + "epoch": 2.02880859375e-05, + "step": 13296, + "training_step_time": 0.13098669052124023 + }, + { + "epoch": 2.028961181640625e-05, + "model_forward_time": 0.024326324462890625, + "step": 13297 + }, + { + "epoch": 2.028961181640625e-05, + "step": 13297, + "training_step_time": 0.11051321029663086 + }, + { + "epoch": 2.02911376953125e-05, + "model_forward_time": 0.025365829467773438, + "step": 13298 + }, + { + "epoch": 2.02911376953125e-05, + "step": 13298, + "training_step_time": 0.10321927070617676 + }, + { + "epoch": 2.029266357421875e-05, + "model_forward_time": 0.02542400360107422, + "step": 13299 + }, + { + "epoch": 2.029266357421875e-05, + "step": 13299, + "training_step_time": 0.10504651069641113 + }, + { + "epoch": 2.0294189453125e-05, + "grad_norm": 0.2074599266052246, + "learning_rate": 6.333978775823631e-05, + "loss": 0.0129, + "step": 13300 + }, + { + "epoch": 2.0294189453125e-05, + "model_forward_time": 0.025163650512695312, + "step": 13300 + }, + { + "epoch": 2.0294189453125e-05, + "step": 13300, + "training_step_time": 0.10434365272521973 + }, + { + "epoch": 2.029571533203125e-05, + "model_forward_time": 0.02515554428100586, + "step": 13301 + }, + { + "epoch": 2.029571533203125e-05, + "step": 13301, + "training_step_time": 0.10779452323913574 + }, + { + "epoch": 2.02972412109375e-05, + "model_forward_time": 0.02542591094970703, + "step": 13302 + }, + { + "epoch": 2.02972412109375e-05, + "step": 13302, + "training_step_time": 0.10377287864685059 + }, + { + "epoch": 2.029876708984375e-05, + "model_forward_time": 0.025277137756347656, + "step": 13303 + }, + { + "epoch": 2.029876708984375e-05, + "step": 13303, + "training_step_time": 0.10571765899658203 + }, + { + "epoch": 2.030029296875e-05, + "model_forward_time": 0.02493882179260254, + "step": 13304 + }, + { + "epoch": 2.030029296875e-05, + "step": 13304, + "training_step_time": 0.10590672492980957 + }, + { + "epoch": 2.030181884765625e-05, + "model_forward_time": 0.02521491050720215, + "step": 13305 + }, + { + "epoch": 2.030181884765625e-05, + "step": 13305, + "training_step_time": 0.10861349105834961 + }, + { + "epoch": 2.03033447265625e-05, + "model_forward_time": 0.025342702865600586, + "step": 13306 + }, + { + "epoch": 2.03033447265625e-05, + "step": 13306, + "training_step_time": 0.10641336441040039 + }, + { + "epoch": 2.030487060546875e-05, + "model_forward_time": 0.025427579879760742, + "step": 13307 + }, + { + "epoch": 2.030487060546875e-05, + "step": 13307, + "training_step_time": 0.10676693916320801 + }, + { + "epoch": 2.0306396484375e-05, + "model_forward_time": 0.02509760856628418, + "step": 13308 + }, + { + "epoch": 2.0306396484375e-05, + "step": 13308, + "training_step_time": 0.10972356796264648 + }, + { + "epoch": 2.030792236328125e-05, + "model_forward_time": 0.024326562881469727, + "step": 13309 + }, + { + "epoch": 2.030792236328125e-05, + "step": 13309, + "training_step_time": 0.1069328784942627 + }, + { + "epoch": 2.03094482421875e-05, + "grad_norm": 0.23471605777740479, + "learning_rate": 6.328666177605616e-05, + "loss": 0.0193, + "step": 13310 + }, + { + "epoch": 2.03094482421875e-05, + "model_forward_time": 0.024066925048828125, + "step": 13310 + }, + { + "epoch": 2.03094482421875e-05, + "step": 13310, + "training_step_time": 0.11478137969970703 + }, + { + "epoch": 2.031097412109375e-05, + "model_forward_time": 0.0251619815826416, + "step": 13311 + }, + { + "epoch": 2.031097412109375e-05, + "step": 13311, + "training_step_time": 0.10899877548217773 + }, + { + "epoch": 2.03125e-05, + "model_forward_time": 0.02590155601501465, + "step": 13312 + }, + { + "epoch": 2.03125e-05, + "step": 13312, + "training_step_time": 0.10541939735412598 + }, + { + "epoch": 2.031402587890625e-05, + "model_forward_time": 0.02649545669555664, + "step": 13313 + }, + { + "epoch": 2.031402587890625e-05, + "step": 13313, + "training_step_time": 0.10709834098815918 + }, + { + "epoch": 2.03155517578125e-05, + "model_forward_time": 0.025274991989135742, + "step": 13314 + }, + { + "epoch": 2.03155517578125e-05, + "step": 13314, + "training_step_time": 0.10619544982910156 + }, + { + "epoch": 2.031707763671875e-05, + "model_forward_time": 0.025475740432739258, + "step": 13315 + }, + { + "epoch": 2.031707763671875e-05, + "step": 13315, + "training_step_time": 0.11225461959838867 + }, + { + "epoch": 2.0318603515625e-05, + "model_forward_time": 0.02570366859436035, + "step": 13316 + }, + { + "epoch": 2.0318603515625e-05, + "step": 13316, + "training_step_time": 0.11017584800720215 + }, + { + "epoch": 2.032012939453125e-05, + "model_forward_time": 0.02529311180114746, + "step": 13317 + }, + { + "epoch": 2.032012939453125e-05, + "step": 13317, + "training_step_time": 0.10726737976074219 + }, + { + "epoch": 2.03216552734375e-05, + "model_forward_time": 0.025243282318115234, + "step": 13318 + }, + { + "epoch": 2.03216552734375e-05, + "step": 13318, + "training_step_time": 0.10825228691101074 + }, + { + "epoch": 2.032318115234375e-05, + "model_forward_time": 0.024976491928100586, + "step": 13319 + }, + { + "epoch": 2.032318115234375e-05, + "step": 13319, + "training_step_time": 0.1051335334777832 + }, + { + "epoch": 2.032470703125e-05, + "grad_norm": 0.6534555554389954, + "learning_rate": 6.323351964932908e-05, + "loss": 0.0234, + "step": 13320 + }, + { + "epoch": 2.032470703125e-05, + "model_forward_time": 0.025284767150878906, + "step": 13320 + }, + { + "epoch": 2.032470703125e-05, + "step": 13320, + "training_step_time": 0.10705852508544922 + }, + { + "epoch": 2.032623291015625e-05, + "model_forward_time": 0.02513909339904785, + "step": 13321 + }, + { + "epoch": 2.032623291015625e-05, + "step": 13321, + "training_step_time": 0.1048727035522461 + }, + { + "epoch": 2.03277587890625e-05, + "model_forward_time": 0.02554774284362793, + "step": 13322 + }, + { + "epoch": 2.03277587890625e-05, + "step": 13322, + "training_step_time": 0.15282225608825684 + }, + { + "epoch": 2.032928466796875e-05, + "model_forward_time": 0.02503061294555664, + "step": 13323 + }, + { + "epoch": 2.032928466796875e-05, + "step": 13323, + "training_step_time": 0.1163182258605957 + }, + { + "epoch": 2.0330810546875e-05, + "model_forward_time": 0.024680376052856445, + "step": 13324 + }, + { + "epoch": 2.0330810546875e-05, + "step": 13324, + "training_step_time": 0.13112902641296387 + }, + { + "epoch": 2.033233642578125e-05, + "model_forward_time": 0.02414560317993164, + "step": 13325 + }, + { + "epoch": 2.033233642578125e-05, + "step": 13325, + "training_step_time": 0.12574434280395508 + }, + { + "epoch": 2.03338623046875e-05, + "model_forward_time": 0.0239865779876709, + "step": 13326 + }, + { + "epoch": 2.03338623046875e-05, + "step": 13326, + "training_step_time": 0.15393328666687012 + }, + { + "epoch": 2.033538818359375e-05, + "model_forward_time": 0.024796009063720703, + "step": 13327 + }, + { + "epoch": 2.033538818359375e-05, + "step": 13327, + "training_step_time": 0.1716611385345459 + }, + { + "epoch": 2.03369140625e-05, + "model_forward_time": 0.025322914123535156, + "step": 13328 + }, + { + "epoch": 2.03369140625e-05, + "step": 13328, + "training_step_time": 0.18828606605529785 + }, + { + "epoch": 2.033843994140625e-05, + "model_forward_time": 0.023903846740722656, + "step": 13329 + }, + { + "epoch": 2.033843994140625e-05, + "step": 13329, + "training_step_time": 0.14388418197631836 + }, + { + "epoch": 2.03399658203125e-05, + "grad_norm": 0.27252867817878723, + "learning_rate": 6.31803614426278e-05, + "loss": 0.0168, + "step": 13330 + }, + { + "epoch": 2.03399658203125e-05, + "model_forward_time": 0.024565458297729492, + "step": 13330 + }, + { + "epoch": 2.03399658203125e-05, + "step": 13330, + "training_step_time": 0.17617368698120117 + }, + { + "epoch": 2.034149169921875e-05, + "model_forward_time": 0.024648666381835938, + "step": 13331 + }, + { + "epoch": 2.034149169921875e-05, + "step": 13331, + "training_step_time": 0.17773771286010742 + }, + { + "epoch": 2.0343017578125e-05, + "model_forward_time": 0.024277687072753906, + "step": 13332 + }, + { + "epoch": 2.0343017578125e-05, + "step": 13332, + "training_step_time": 0.1134943962097168 + }, + { + "epoch": 2.034454345703125e-05, + "model_forward_time": 0.024907588958740234, + "step": 13333 + }, + { + "epoch": 2.034454345703125e-05, + "step": 13333, + "training_step_time": 0.1138298511505127 + }, + { + "epoch": 2.03460693359375e-05, + "model_forward_time": 0.0251007080078125, + "step": 13334 + }, + { + "epoch": 2.03460693359375e-05, + "step": 13334, + "training_step_time": 0.1078031063079834 + }, + { + "epoch": 2.034759521484375e-05, + "model_forward_time": 0.025640010833740234, + "step": 13335 + }, + { + "epoch": 2.034759521484375e-05, + "step": 13335, + "training_step_time": 0.10801815986633301 + }, + { + "epoch": 2.034912109375e-05, + "model_forward_time": 0.025121212005615234, + "step": 13336 + }, + { + "epoch": 2.034912109375e-05, + "step": 13336, + "training_step_time": 0.10560989379882812 + }, + { + "epoch": 2.035064697265625e-05, + "model_forward_time": 0.025162220001220703, + "step": 13337 + }, + { + "epoch": 2.035064697265625e-05, + "step": 13337, + "training_step_time": 0.10491085052490234 + }, + { + "epoch": 2.03521728515625e-05, + "model_forward_time": 0.025075197219848633, + "step": 13338 + }, + { + "epoch": 2.03521728515625e-05, + "step": 13338, + "training_step_time": 0.16425228118896484 + }, + { + "epoch": 2.035369873046875e-05, + "model_forward_time": 0.02476048469543457, + "step": 13339 + }, + { + "epoch": 2.035369873046875e-05, + "step": 13339, + "training_step_time": 0.1192617416381836 + }, + { + "epoch": 2.0355224609375e-05, + "grad_norm": 0.26604318618774414, + "learning_rate": 6.312718722054454e-05, + "loss": 0.0185, + "step": 13340 + }, + { + "epoch": 2.0355224609375e-05, + "model_forward_time": 0.02500319480895996, + "step": 13340 + }, + { + "epoch": 2.0355224609375e-05, + "step": 13340, + "training_step_time": 0.1104893684387207 + }, + { + "epoch": 2.035675048828125e-05, + "model_forward_time": 0.02562737464904785, + "step": 13341 + }, + { + "epoch": 2.035675048828125e-05, + "step": 13341, + "training_step_time": 0.1252579689025879 + }, + { + "epoch": 2.03582763671875e-05, + "model_forward_time": 0.025723695755004883, + "step": 13342 + }, + { + "epoch": 2.03582763671875e-05, + "step": 13342, + "training_step_time": 0.1060798168182373 + }, + { + "epoch": 2.035980224609375e-05, + "model_forward_time": 0.025710344314575195, + "step": 13343 + }, + { + "epoch": 2.035980224609375e-05, + "step": 13343, + "training_step_time": 0.11118292808532715 + }, + { + "epoch": 2.0361328125e-05, + "model_forward_time": 0.02558588981628418, + "step": 13344 + }, + { + "epoch": 2.0361328125e-05, + "step": 13344, + "training_step_time": 0.12491750717163086 + }, + { + "epoch": 2.036285400390625e-05, + "model_forward_time": 0.02542591094970703, + "step": 13345 + }, + { + "epoch": 2.036285400390625e-05, + "step": 13345, + "training_step_time": 0.1108391284942627 + }, + { + "epoch": 2.03643798828125e-05, + "model_forward_time": 0.0256960391998291, + "step": 13346 + }, + { + "epoch": 2.03643798828125e-05, + "step": 13346, + "training_step_time": 0.11232662200927734 + }, + { + "epoch": 2.036590576171875e-05, + "model_forward_time": 0.025386571884155273, + "step": 13347 + }, + { + "epoch": 2.036590576171875e-05, + "step": 13347, + "training_step_time": 0.11004471778869629 + }, + { + "epoch": 2.0367431640625e-05, + "model_forward_time": 0.025976181030273438, + "step": 13348 + }, + { + "epoch": 2.0367431640625e-05, + "step": 13348, + "training_step_time": 0.10616803169250488 + }, + { + "epoch": 2.036895751953125e-05, + "model_forward_time": 0.024954557418823242, + "step": 13349 + }, + { + "epoch": 2.036895751953125e-05, + "step": 13349, + "training_step_time": 0.10417342185974121 + }, + { + "epoch": 2.03704833984375e-05, + "grad_norm": 0.21818700432777405, + "learning_rate": 6.307399704769099e-05, + "loss": 0.0127, + "step": 13350 + }, + { + "epoch": 2.03704833984375e-05, + "model_forward_time": 0.02532339096069336, + "step": 13350 + }, + { + "epoch": 2.03704833984375e-05, + "step": 13350, + "training_step_time": 0.10731673240661621 + }, + { + "epoch": 2.037200927734375e-05, + "model_forward_time": 0.025515079498291016, + "step": 13351 + }, + { + "epoch": 2.037200927734375e-05, + "step": 13351, + "training_step_time": 0.10441398620605469 + }, + { + "epoch": 2.037353515625e-05, + "model_forward_time": 0.025191307067871094, + "step": 13352 + }, + { + "epoch": 2.037353515625e-05, + "step": 13352, + "training_step_time": 0.10577249526977539 + }, + { + "epoch": 2.037506103515625e-05, + "model_forward_time": 0.024889469146728516, + "step": 13353 + }, + { + "epoch": 2.037506103515625e-05, + "step": 13353, + "training_step_time": 0.10596275329589844 + }, + { + "epoch": 2.03765869140625e-05, + "model_forward_time": 0.025519371032714844, + "step": 13354 + }, + { + "epoch": 2.03765869140625e-05, + "step": 13354, + "training_step_time": 0.10979866981506348 + }, + { + "epoch": 2.037811279296875e-05, + "model_forward_time": 0.026854753494262695, + "step": 13355 + }, + { + "epoch": 2.037811279296875e-05, + "step": 13355, + "training_step_time": 0.10616421699523926 + }, + { + "epoch": 2.0379638671875e-05, + "model_forward_time": 0.025388240814208984, + "step": 13356 + }, + { + "epoch": 2.0379638671875e-05, + "step": 13356, + "training_step_time": 0.10576081275939941 + }, + { + "epoch": 2.038116455078125e-05, + "model_forward_time": 0.025641202926635742, + "step": 13357 + }, + { + "epoch": 2.038116455078125e-05, + "step": 13357, + "training_step_time": 0.10996222496032715 + }, + { + "epoch": 2.03826904296875e-05, + "model_forward_time": 0.02522730827331543, + "step": 13358 + }, + { + "epoch": 2.03826904296875e-05, + "step": 13358, + "training_step_time": 0.1120295524597168 + }, + { + "epoch": 2.038421630859375e-05, + "model_forward_time": 0.025324106216430664, + "step": 13359 + }, + { + "epoch": 2.038421630859375e-05, + "step": 13359, + "training_step_time": 0.10677099227905273 + }, + { + "epoch": 2.03857421875e-05, + "grad_norm": 0.2759242057800293, + "learning_rate": 6.302079098869824e-05, + "loss": 0.0168, + "step": 13360 + }, + { + "epoch": 2.03857421875e-05, + "model_forward_time": 0.02557086944580078, + "step": 13360 + }, + { + "epoch": 2.03857421875e-05, + "step": 13360, + "training_step_time": 0.1097874641418457 + }, + { + "epoch": 2.038726806640625e-05, + "model_forward_time": 0.02562856674194336, + "step": 13361 + }, + { + "epoch": 2.038726806640625e-05, + "step": 13361, + "training_step_time": 0.10847139358520508 + }, + { + "epoch": 2.03887939453125e-05, + "model_forward_time": 0.026401519775390625, + "step": 13362 + }, + { + "epoch": 2.03887939453125e-05, + "step": 13362, + "training_step_time": 0.10815906524658203 + }, + { + "epoch": 2.039031982421875e-05, + "model_forward_time": 0.025693178176879883, + "step": 13363 + }, + { + "epoch": 2.039031982421875e-05, + "step": 13363, + "training_step_time": 0.11211395263671875 + }, + { + "epoch": 2.0391845703125e-05, + "model_forward_time": 0.02521228790283203, + "step": 13364 + }, + { + "epoch": 2.0391845703125e-05, + "step": 13364, + "training_step_time": 0.10996460914611816 + }, + { + "epoch": 2.039337158203125e-05, + "model_forward_time": 0.025126934051513672, + "step": 13365 + }, + { + "epoch": 2.039337158203125e-05, + "step": 13365, + "training_step_time": 0.10844683647155762 + }, + { + "epoch": 2.03948974609375e-05, + "model_forward_time": 0.025361061096191406, + "step": 13366 + }, + { + "epoch": 2.03948974609375e-05, + "step": 13366, + "training_step_time": 0.10727047920227051 + }, + { + "epoch": 2.039642333984375e-05, + "model_forward_time": 0.02548980712890625, + "step": 13367 + }, + { + "epoch": 2.039642333984375e-05, + "step": 13367, + "training_step_time": 0.1099245548248291 + }, + { + "epoch": 2.039794921875e-05, + "model_forward_time": 0.02556443214416504, + "step": 13368 + }, + { + "epoch": 2.039794921875e-05, + "step": 13368, + "training_step_time": 0.10476875305175781 + }, + { + "epoch": 2.039947509765625e-05, + "model_forward_time": 0.025258541107177734, + "step": 13369 + }, + { + "epoch": 2.039947509765625e-05, + "step": 13369, + "training_step_time": 0.16568517684936523 + }, + { + "epoch": 2.04010009765625e-05, + "grad_norm": 0.3927362561225891, + "learning_rate": 6.296756910821666e-05, + "loss": 0.0258, + "step": 13370 + }, + { + "epoch": 2.04010009765625e-05, + "model_forward_time": 0.02494049072265625, + "step": 13370 + }, + { + "epoch": 2.04010009765625e-05, + "step": 13370, + "training_step_time": 0.1055293083190918 + }, + { + "epoch": 2.040252685546875e-05, + "model_forward_time": 0.025125980377197266, + "step": 13371 + }, + { + "epoch": 2.040252685546875e-05, + "step": 13371, + "training_step_time": 0.1176290512084961 + }, + { + "epoch": 2.0404052734375e-05, + "model_forward_time": 0.025312423706054688, + "step": 13372 + }, + { + "epoch": 2.0404052734375e-05, + "step": 13372, + "training_step_time": 0.1184237003326416 + }, + { + "epoch": 2.040557861328125e-05, + "model_forward_time": 0.025186538696289062, + "step": 13373 + }, + { + "epoch": 2.040557861328125e-05, + "step": 13373, + "training_step_time": 0.17196273803710938 + }, + { + "epoch": 2.04071044921875e-05, + "model_forward_time": 0.02446603775024414, + "step": 13374 + }, + { + "epoch": 2.04071044921875e-05, + "step": 13374, + "training_step_time": 0.1827099323272705 + }, + { + "epoch": 2.040863037109375e-05, + "model_forward_time": 0.024699926376342773, + "step": 13375 + }, + { + "epoch": 2.040863037109375e-05, + "step": 13375, + "training_step_time": 0.1266465187072754 + }, + { + "epoch": 2.041015625e-05, + "model_forward_time": 0.023993730545043945, + "step": 13376 + }, + { + "epoch": 2.041015625e-05, + "step": 13376, + "training_step_time": 0.1243433952331543 + }, + { + "epoch": 2.041168212890625e-05, + "model_forward_time": 0.024440288543701172, + "step": 13377 + }, + { + "epoch": 2.041168212890625e-05, + "step": 13377, + "training_step_time": 0.11133742332458496 + }, + { + "epoch": 2.04132080078125e-05, + "model_forward_time": 0.02582073211669922, + "step": 13378 + }, + { + "epoch": 2.04132080078125e-05, + "step": 13378, + "training_step_time": 0.20570898056030273 + }, + { + "epoch": 2.041473388671875e-05, + "model_forward_time": 0.024333953857421875, + "step": 13379 + }, + { + "epoch": 2.041473388671875e-05, + "step": 13379, + "training_step_time": 0.13486790657043457 + }, + { + "epoch": 2.0416259765625e-05, + "grad_norm": 0.4889189302921295, + "learning_rate": 6.291433147091583e-05, + "loss": 0.0184, + "step": 13380 + }, + { + "epoch": 2.0416259765625e-05, + "model_forward_time": 0.024821758270263672, + "step": 13380 + }, + { + "epoch": 2.0416259765625e-05, + "step": 13380, + "training_step_time": 0.19266343116760254 + }, + { + "epoch": 2.041778564453125e-05, + "model_forward_time": 0.02434992790222168, + "step": 13381 + }, + { + "epoch": 2.041778564453125e-05, + "step": 13381, + "training_step_time": 0.10489630699157715 + }, + { + "epoch": 2.04193115234375e-05, + "model_forward_time": 0.024602174758911133, + "step": 13382 + }, + { + "epoch": 2.04193115234375e-05, + "step": 13382, + "training_step_time": 0.10331845283508301 + }, + { + "epoch": 2.042083740234375e-05, + "model_forward_time": 0.025224924087524414, + "step": 13383 + }, + { + "epoch": 2.042083740234375e-05, + "step": 13383, + "training_step_time": 0.10495710372924805 + }, + { + "epoch": 2.042236328125e-05, + "model_forward_time": 0.02542734146118164, + "step": 13384 + }, + { + "epoch": 2.042236328125e-05, + "step": 13384, + "training_step_time": 0.1168813705444336 + }, + { + "epoch": 2.042388916015625e-05, + "model_forward_time": 0.025130748748779297, + "step": 13385 + }, + { + "epoch": 2.042388916015625e-05, + "step": 13385, + "training_step_time": 0.16591334342956543 + }, + { + "epoch": 2.04254150390625e-05, + "model_forward_time": 0.024766206741333008, + "step": 13386 + }, + { + "epoch": 2.04254150390625e-05, + "step": 13386, + "training_step_time": 0.11996960639953613 + }, + { + "epoch": 2.042694091796875e-05, + "model_forward_time": 0.024512052536010742, + "step": 13387 + }, + { + "epoch": 2.042694091796875e-05, + "step": 13387, + "training_step_time": 0.10339736938476562 + }, + { + "epoch": 2.0428466796875e-05, + "model_forward_time": 0.025208473205566406, + "step": 13388 + }, + { + "epoch": 2.0428466796875e-05, + "step": 13388, + "training_step_time": 0.12165307998657227 + }, + { + "epoch": 2.042999267578125e-05, + "model_forward_time": 0.025351762771606445, + "step": 13389 + }, + { + "epoch": 2.042999267578125e-05, + "step": 13389, + "training_step_time": 0.18850278854370117 + }, + { + "epoch": 2.04315185546875e-05, + "grad_norm": 0.5438939332962036, + "learning_rate": 6.286107814148454e-05, + "loss": 0.0228, + "step": 13390 + }, + { + "epoch": 2.04315185546875e-05, + "model_forward_time": 0.02413654327392578, + "step": 13390 + }, + { + "epoch": 2.04315185546875e-05, + "step": 13390, + "training_step_time": 0.12462306022644043 + }, + { + "epoch": 2.043304443359375e-05, + "model_forward_time": 0.0245668888092041, + "step": 13391 + }, + { + "epoch": 2.043304443359375e-05, + "step": 13391, + "training_step_time": 0.10827827453613281 + }, + { + "epoch": 2.04345703125e-05, + "model_forward_time": 0.025310516357421875, + "step": 13392 + }, + { + "epoch": 2.04345703125e-05, + "step": 13392, + "training_step_time": 0.10808825492858887 + }, + { + "epoch": 2.043609619140625e-05, + "model_forward_time": 0.025266408920288086, + "step": 13393 + }, + { + "epoch": 2.043609619140625e-05, + "step": 13393, + "training_step_time": 0.1845111846923828 + }, + { + "epoch": 2.04376220703125e-05, + "model_forward_time": 0.02456808090209961, + "step": 13394 + }, + { + "epoch": 2.04376220703125e-05, + "step": 13394, + "training_step_time": 0.20853614807128906 + }, + { + "epoch": 2.043914794921875e-05, + "model_forward_time": 0.02411651611328125, + "step": 13395 + }, + { + "epoch": 2.043914794921875e-05, + "step": 13395, + "training_step_time": 0.20552682876586914 + }, + { + "epoch": 2.0440673828125e-05, + "model_forward_time": 0.02406454086303711, + "step": 13396 + }, + { + "epoch": 2.0440673828125e-05, + "step": 13396, + "training_step_time": 0.2001035213470459 + }, + { + "epoch": 2.044219970703125e-05, + "model_forward_time": 0.02544856071472168, + "step": 13397 + }, + { + "epoch": 2.044219970703125e-05, + "step": 13397, + "training_step_time": 0.18812942504882812 + }, + { + "epoch": 2.04437255859375e-05, + "model_forward_time": 0.024179458618164062, + "step": 13398 + }, + { + "epoch": 2.04437255859375e-05, + "step": 13398, + "training_step_time": 0.17641615867614746 + }, + { + "epoch": 2.044525146484375e-05, + "model_forward_time": 0.024378299713134766, + "step": 13399 + }, + { + "epoch": 2.044525146484375e-05, + "step": 13399, + "training_step_time": 0.17526006698608398 + }, + { + "epoch": 2.044677734375e-05, + "grad_norm": 0.537973165512085, + "learning_rate": 6.280780918463057e-05, + "loss": 0.027, + "step": 13400 + }, + { + "epoch": 2.044677734375e-05, + "model_forward_time": 0.024411439895629883, + "step": 13400 + }, + { + "epoch": 2.044677734375e-05, + "step": 13400, + "training_step_time": 0.15742993354797363 + }, + { + "epoch": 2.044830322265625e-05, + "model_forward_time": 0.024432659149169922, + "step": 13401 + }, + { + "epoch": 2.044830322265625e-05, + "step": 13401, + "training_step_time": 0.11360335350036621 + }, + { + "epoch": 2.04498291015625e-05, + "model_forward_time": 0.02487039566040039, + "step": 13402 + }, + { + "epoch": 2.04498291015625e-05, + "step": 13402, + "training_step_time": 0.10068964958190918 + }, + { + "epoch": 2.045135498046875e-05, + "model_forward_time": 0.025463104248046875, + "step": 13403 + }, + { + "epoch": 2.045135498046875e-05, + "step": 13403, + "training_step_time": 0.10314798355102539 + }, + { + "epoch": 2.0452880859375e-05, + "model_forward_time": 0.02539849281311035, + "step": 13404 + }, + { + "epoch": 2.0452880859375e-05, + "step": 13404, + "training_step_time": 0.10997819900512695 + }, + { + "epoch": 2.045440673828125e-05, + "model_forward_time": 0.025297880172729492, + "step": 13405 + }, + { + "epoch": 2.045440673828125e-05, + "step": 13405, + "training_step_time": 0.10525131225585938 + }, + { + "epoch": 2.04559326171875e-05, + "model_forward_time": 0.025723934173583984, + "step": 13406 + }, + { + "epoch": 2.04559326171875e-05, + "step": 13406, + "training_step_time": 0.10515999794006348 + }, + { + "epoch": 2.045745849609375e-05, + "model_forward_time": 0.025177717208862305, + "step": 13407 + }, + { + "epoch": 2.045745849609375e-05, + "step": 13407, + "training_step_time": 0.10677170753479004 + }, + { + "epoch": 2.0458984375e-05, + "model_forward_time": 0.02572798728942871, + "step": 13408 + }, + { + "epoch": 2.0458984375e-05, + "step": 13408, + "training_step_time": 0.11076951026916504 + }, + { + "epoch": 2.046051025390625e-05, + "model_forward_time": 0.025222063064575195, + "step": 13409 + }, + { + "epoch": 2.046051025390625e-05, + "step": 13409, + "training_step_time": 0.10886454582214355 + }, + { + "epoch": 2.04620361328125e-05, + "grad_norm": 0.4679735600948334, + "learning_rate": 6.275452466508077e-05, + "loss": 0.0187, + "step": 13410 + }, + { + "epoch": 2.04620361328125e-05, + "model_forward_time": 0.02583146095275879, + "step": 13410 + }, + { + "epoch": 2.04620361328125e-05, + "step": 13410, + "training_step_time": 0.1889970302581787 + }, + { + "epoch": 2.046356201171875e-05, + "model_forward_time": 0.026597023010253906, + "step": 13411 + }, + { + "epoch": 2.046356201171875e-05, + "step": 13411, + "training_step_time": 0.15633463859558105 + }, + { + "epoch": 2.0465087890625e-05, + "model_forward_time": 0.023148775100708008, + "step": 13412 + }, + { + "epoch": 2.0465087890625e-05, + "step": 13412, + "training_step_time": 0.1860370635986328 + }, + { + "epoch": 2.046661376953125e-05, + "model_forward_time": 0.024436473846435547, + "step": 13413 + }, + { + "epoch": 2.046661376953125e-05, + "step": 13413, + "training_step_time": 0.20177459716796875 + }, + { + "epoch": 2.04681396484375e-05, + "model_forward_time": 0.02530074119567871, + "step": 13414 + }, + { + "epoch": 2.04681396484375e-05, + "step": 13414, + "training_step_time": 0.1618971824645996 + }, + { + "epoch": 2.046966552734375e-05, + "model_forward_time": 0.025056838989257812, + "step": 13415 + }, + { + "epoch": 2.046966552734375e-05, + "step": 13415, + "training_step_time": 0.10505366325378418 + }, + { + "epoch": 2.047119140625e-05, + "model_forward_time": 0.024538755416870117, + "step": 13416 + }, + { + "epoch": 2.047119140625e-05, + "step": 13416, + "training_step_time": 0.14645838737487793 + }, + { + "epoch": 2.047271728515625e-05, + "model_forward_time": 0.024964094161987305, + "step": 13417 + }, + { + "epoch": 2.047271728515625e-05, + "step": 13417, + "training_step_time": 0.16351628303527832 + }, + { + "epoch": 2.04742431640625e-05, + "model_forward_time": 0.025046348571777344, + "step": 13418 + }, + { + "epoch": 2.04742431640625e-05, + "step": 13418, + "training_step_time": 0.12252449989318848 + }, + { + "epoch": 2.047576904296875e-05, + "model_forward_time": 0.02460479736328125, + "step": 13419 + }, + { + "epoch": 2.047576904296875e-05, + "step": 13419, + "training_step_time": 0.11268949508666992 + }, + { + "epoch": 2.0477294921875e-05, + "grad_norm": 0.2953059673309326, + "learning_rate": 6.27012246475808e-05, + "loss": 0.0222, + "step": 13420 + }, + { + "epoch": 2.0477294921875e-05, + "model_forward_time": 0.025562047958374023, + "step": 13420 + }, + { + "epoch": 2.0477294921875e-05, + "step": 13420, + "training_step_time": 0.1158447265625 + }, + { + "epoch": 2.047882080078125e-05, + "model_forward_time": 0.0252077579498291, + "step": 13421 + }, + { + "epoch": 2.047882080078125e-05, + "step": 13421, + "training_step_time": 0.12416815757751465 + }, + { + "epoch": 2.04803466796875e-05, + "model_forward_time": 0.025690555572509766, + "step": 13422 + }, + { + "epoch": 2.04803466796875e-05, + "step": 13422, + "training_step_time": 0.10564088821411133 + }, + { + "epoch": 2.048187255859375e-05, + "model_forward_time": 0.02516031265258789, + "step": 13423 + }, + { + "epoch": 2.048187255859375e-05, + "step": 13423, + "training_step_time": 0.10915946960449219 + }, + { + "epoch": 2.04833984375e-05, + "model_forward_time": 0.02544689178466797, + "step": 13424 + }, + { + "epoch": 2.04833984375e-05, + "step": 13424, + "training_step_time": 0.10480928421020508 + }, + { + "epoch": 2.048492431640625e-05, + "model_forward_time": 0.025229692459106445, + "step": 13425 + }, + { + "epoch": 2.048492431640625e-05, + "step": 13425, + "training_step_time": 0.10796427726745605 + }, + { + "epoch": 2.04864501953125e-05, + "model_forward_time": 0.02448129653930664, + "step": 13426 + }, + { + "epoch": 2.04864501953125e-05, + "step": 13426, + "training_step_time": 0.1351613998413086 + }, + { + "epoch": 2.048797607421875e-05, + "model_forward_time": 0.0281524658203125, + "step": 13427 + }, + { + "epoch": 2.048797607421875e-05, + "step": 13427, + "training_step_time": 0.12517619132995605 + }, + { + "epoch": 2.0489501953125e-05, + "model_forward_time": 0.024603605270385742, + "step": 13428 + }, + { + "epoch": 2.0489501953125e-05, + "step": 13428, + "training_step_time": 0.11872172355651855 + }, + { + "epoch": 2.049102783203125e-05, + "model_forward_time": 0.02561640739440918, + "step": 13429 + }, + { + "epoch": 2.049102783203125e-05, + "step": 13429, + "training_step_time": 0.1268782615661621 + }, + { + "epoch": 2.04925537109375e-05, + "grad_norm": 0.5403966307640076, + "learning_rate": 6.264790919689525e-05, + "loss": 0.0161, + "step": 13430 + }, + { + "epoch": 2.04925537109375e-05, + "model_forward_time": 0.025127410888671875, + "step": 13430 + }, + { + "epoch": 2.04925537109375e-05, + "step": 13430, + "training_step_time": 0.10532832145690918 + }, + { + "epoch": 2.049407958984375e-05, + "model_forward_time": 0.025257587432861328, + "step": 13431 + }, + { + "epoch": 2.049407958984375e-05, + "step": 13431, + "training_step_time": 0.11516499519348145 + }, + { + "epoch": 2.049560546875e-05, + "model_forward_time": 0.025308609008789062, + "step": 13432 + }, + { + "epoch": 2.049560546875e-05, + "step": 13432, + "training_step_time": 0.11084794998168945 + }, + { + "epoch": 2.049713134765625e-05, + "model_forward_time": 0.025204896926879883, + "step": 13433 + }, + { + "epoch": 2.049713134765625e-05, + "step": 13433, + "training_step_time": 0.10706782341003418 + }, + { + "epoch": 2.04986572265625e-05, + "model_forward_time": 0.025284290313720703, + "step": 13434 + }, + { + "epoch": 2.04986572265625e-05, + "step": 13434, + "training_step_time": 0.10545110702514648 + }, + { + "epoch": 2.050018310546875e-05, + "model_forward_time": 0.025746583938598633, + "step": 13435 + }, + { + "epoch": 2.050018310546875e-05, + "step": 13435, + "training_step_time": 0.10692477226257324 + }, + { + "epoch": 2.0501708984375e-05, + "model_forward_time": 0.02491021156311035, + "step": 13436 + }, + { + "epoch": 2.0501708984375e-05, + "step": 13436, + "training_step_time": 0.10530471801757812 + }, + { + "epoch": 2.050323486328125e-05, + "model_forward_time": 0.025292158126831055, + "step": 13437 + }, + { + "epoch": 2.050323486328125e-05, + "step": 13437, + "training_step_time": 0.10413026809692383 + }, + { + "epoch": 2.05047607421875e-05, + "model_forward_time": 0.02547907829284668, + "step": 13438 + }, + { + "epoch": 2.05047607421875e-05, + "step": 13438, + "training_step_time": 0.10844707489013672 + }, + { + "epoch": 2.050628662109375e-05, + "model_forward_time": 0.025534391403198242, + "step": 13439 + }, + { + "epoch": 2.050628662109375e-05, + "step": 13439, + "training_step_time": 0.10549044609069824 + }, + { + "epoch": 2.05078125e-05, + "grad_norm": 0.246059387922287, + "learning_rate": 6.259457837780742e-05, + "loss": 0.0175, + "step": 13440 + }, + { + "epoch": 2.05078125e-05, + "model_forward_time": 0.02570176124572754, + "step": 13440 + }, + { + "epoch": 2.05078125e-05, + "step": 13440, + "training_step_time": 0.11027359962463379 + }, + { + "epoch": 2.050933837890625e-05, + "model_forward_time": 0.02532196044921875, + "step": 13441 + }, + { + "epoch": 2.050933837890625e-05, + "step": 13441, + "training_step_time": 0.10902976989746094 + }, + { + "epoch": 2.05108642578125e-05, + "model_forward_time": 0.025692224502563477, + "step": 13442 + }, + { + "epoch": 2.05108642578125e-05, + "step": 13442, + "training_step_time": 0.10401558876037598 + }, + { + "epoch": 2.051239013671875e-05, + "model_forward_time": 0.025447845458984375, + "step": 13443 + }, + { + "epoch": 2.051239013671875e-05, + "step": 13443, + "training_step_time": 0.10426831245422363 + }, + { + "epoch": 2.0513916015625e-05, + "model_forward_time": 0.025374650955200195, + "step": 13444 + }, + { + "epoch": 2.0513916015625e-05, + "step": 13444, + "training_step_time": 0.10567498207092285 + }, + { + "epoch": 2.051544189453125e-05, + "model_forward_time": 0.024979829788208008, + "step": 13445 + }, + { + "epoch": 2.051544189453125e-05, + "step": 13445, + "training_step_time": 0.10604453086853027 + }, + { + "epoch": 2.05169677734375e-05, + "model_forward_time": 0.025234699249267578, + "step": 13446 + }, + { + "epoch": 2.05169677734375e-05, + "step": 13446, + "training_step_time": 0.10599446296691895 + }, + { + "epoch": 2.051849365234375e-05, + "model_forward_time": 0.025767087936401367, + "step": 13447 + }, + { + "epoch": 2.051849365234375e-05, + "step": 13447, + "training_step_time": 0.17940235137939453 + }, + { + "epoch": 2.052001953125e-05, + "model_forward_time": 0.024871110916137695, + "step": 13448 + }, + { + "epoch": 2.052001953125e-05, + "step": 13448, + "training_step_time": 0.20208024978637695 + }, + { + "epoch": 2.052154541015625e-05, + "model_forward_time": 0.024614810943603516, + "step": 13449 + }, + { + "epoch": 2.052154541015625e-05, + "step": 13449, + "training_step_time": 0.21276640892028809 + }, + { + "epoch": 2.05230712890625e-05, + "grad_norm": 0.3965492248535156, + "learning_rate": 6.254123225511923e-05, + "loss": 0.0301, + "step": 13450 + }, + { + "epoch": 2.05230712890625e-05, + "model_forward_time": 0.02409219741821289, + "step": 13450 + }, + { + "epoch": 2.05230712890625e-05, + "step": 13450, + "training_step_time": 0.20916199684143066 + }, + { + "epoch": 2.052459716796875e-05, + "model_forward_time": 0.024336576461791992, + "step": 13451 + }, + { + "epoch": 2.052459716796875e-05, + "step": 13451, + "training_step_time": 0.2063922882080078 + }, + { + "epoch": 2.0526123046875e-05, + "model_forward_time": 0.024483442306518555, + "step": 13452 + }, + { + "epoch": 2.0526123046875e-05, + "step": 13452, + "training_step_time": 0.19893336296081543 + }, + { + "epoch": 2.052764892578125e-05, + "model_forward_time": 0.024085283279418945, + "step": 13453 + }, + { + "epoch": 2.052764892578125e-05, + "step": 13453, + "training_step_time": 0.19656848907470703 + }, + { + "epoch": 2.05291748046875e-05, + "model_forward_time": 0.02414083480834961, + "step": 13454 + }, + { + "epoch": 2.05291748046875e-05, + "step": 13454, + "training_step_time": 0.1954667568206787 + }, + { + "epoch": 2.053070068359375e-05, + "model_forward_time": 0.02589702606201172, + "step": 13455 + }, + { + "epoch": 2.053070068359375e-05, + "step": 13455, + "training_step_time": 0.1307220458984375 + }, + { + "epoch": 2.05322265625e-05, + "model_forward_time": 0.025348424911499023, + "step": 13456 + }, + { + "epoch": 2.05322265625e-05, + "step": 13456, + "training_step_time": 0.12230920791625977 + }, + { + "epoch": 2.053375244140625e-05, + "model_forward_time": 0.026223421096801758, + "step": 13457 + }, + { + "epoch": 2.053375244140625e-05, + "step": 13457, + "training_step_time": 0.11296939849853516 + }, + { + "epoch": 2.05352783203125e-05, + "model_forward_time": 0.02593398094177246, + "step": 13458 + }, + { + "epoch": 2.05352783203125e-05, + "step": 13458, + "training_step_time": 0.13977622985839844 + }, + { + "epoch": 2.053680419921875e-05, + "model_forward_time": 0.02613973617553711, + "step": 13459 + }, + { + "epoch": 2.053680419921875e-05, + "step": 13459, + "training_step_time": 0.1622178554534912 + }, + { + "epoch": 2.0538330078125e-05, + "grad_norm": 0.2964446544647217, + "learning_rate": 6.248787089365133e-05, + "loss": 0.026, + "step": 13460 + }, + { + "epoch": 2.0538330078125e-05, + "model_forward_time": 0.025403976440429688, + "step": 13460 + }, + { + "epoch": 2.0538330078125e-05, + "step": 13460, + "training_step_time": 0.1813497543334961 + }, + { + "epoch": 2.053985595703125e-05, + "model_forward_time": 0.024903297424316406, + "step": 13461 + }, + { + "epoch": 2.053985595703125e-05, + "step": 13461, + "training_step_time": 0.12934541702270508 + }, + { + "epoch": 2.05413818359375e-05, + "model_forward_time": 0.024985074996948242, + "step": 13462 + }, + { + "epoch": 2.05413818359375e-05, + "step": 13462, + "training_step_time": 0.10903286933898926 + }, + { + "epoch": 2.054290771484375e-05, + "model_forward_time": 0.025337696075439453, + "step": 13463 + }, + { + "epoch": 2.054290771484375e-05, + "step": 13463, + "training_step_time": 0.13035178184509277 + }, + { + "epoch": 2.054443359375e-05, + "model_forward_time": 0.025640487670898438, + "step": 13464 + }, + { + "epoch": 2.054443359375e-05, + "step": 13464, + "training_step_time": 0.1145782470703125 + }, + { + "epoch": 2.054595947265625e-05, + "model_forward_time": 0.025607824325561523, + "step": 13465 + }, + { + "epoch": 2.054595947265625e-05, + "step": 13465, + "training_step_time": 0.11302518844604492 + }, + { + "epoch": 2.05474853515625e-05, + "model_forward_time": 0.025816917419433594, + "step": 13466 + }, + { + "epoch": 2.05474853515625e-05, + "step": 13466, + "training_step_time": 0.11213016510009766 + }, + { + "epoch": 2.054901123046875e-05, + "model_forward_time": 0.025067806243896484, + "step": 13467 + }, + { + "epoch": 2.054901123046875e-05, + "step": 13467, + "training_step_time": 0.10860538482666016 + }, + { + "epoch": 2.0550537109375e-05, + "model_forward_time": 0.025256633758544922, + "step": 13468 + }, + { + "epoch": 2.0550537109375e-05, + "step": 13468, + "training_step_time": 0.10731077194213867 + }, + { + "epoch": 2.055206298828125e-05, + "model_forward_time": 0.025688886642456055, + "step": 13469 + }, + { + "epoch": 2.055206298828125e-05, + "step": 13469, + "training_step_time": 0.20747661590576172 + }, + { + "epoch": 2.05535888671875e-05, + "grad_norm": 0.23747020959854126, + "learning_rate": 6.243449435824276e-05, + "loss": 0.0334, + "step": 13470 + }, + { + "epoch": 2.05535888671875e-05, + "model_forward_time": 0.02470111846923828, + "step": 13470 + }, + { + "epoch": 2.05535888671875e-05, + "step": 13470, + "training_step_time": 0.11760783195495605 + }, + { + "epoch": 2.055511474609375e-05, + "model_forward_time": 0.024959087371826172, + "step": 13471 + }, + { + "epoch": 2.055511474609375e-05, + "step": 13471, + "training_step_time": 0.13237762451171875 + }, + { + "epoch": 2.0556640625e-05, + "model_forward_time": 0.025312185287475586, + "step": 13472 + }, + { + "epoch": 2.0556640625e-05, + "step": 13472, + "training_step_time": 0.11254715919494629 + }, + { + "epoch": 2.055816650390625e-05, + "model_forward_time": 0.025689363479614258, + "step": 13473 + }, + { + "epoch": 2.055816650390625e-05, + "step": 13473, + "training_step_time": 0.1810760498046875 + }, + { + "epoch": 2.05596923828125e-05, + "model_forward_time": 0.025043725967407227, + "step": 13474 + }, + { + "epoch": 2.05596923828125e-05, + "step": 13474, + "training_step_time": 0.12662744522094727 + }, + { + "epoch": 2.056121826171875e-05, + "model_forward_time": 0.025513887405395508, + "step": 13475 + }, + { + "epoch": 2.056121826171875e-05, + "step": 13475, + "training_step_time": 0.11168694496154785 + }, + { + "epoch": 2.0562744140625e-05, + "model_forward_time": 0.025313377380371094, + "step": 13476 + }, + { + "epoch": 2.0562744140625e-05, + "step": 13476, + "training_step_time": 0.10495781898498535 + }, + { + "epoch": 2.056427001953125e-05, + "model_forward_time": 0.02562880516052246, + "step": 13477 + }, + { + "epoch": 2.056427001953125e-05, + "step": 13477, + "training_step_time": 0.10590624809265137 + }, + { + "epoch": 2.05657958984375e-05, + "model_forward_time": 0.02529597282409668, + "step": 13478 + }, + { + "epoch": 2.05657958984375e-05, + "step": 13478, + "training_step_time": 0.10621333122253418 + }, + { + "epoch": 2.056732177734375e-05, + "model_forward_time": 0.027664899826049805, + "step": 13479 + }, + { + "epoch": 2.056732177734375e-05, + "step": 13479, + "training_step_time": 0.10705780982971191 + }, + { + "epoch": 2.056884765625e-05, + "grad_norm": 0.390501469373703, + "learning_rate": 6.238110271375102e-05, + "loss": 0.0233, + "step": 13480 + }, + { + "epoch": 2.056884765625e-05, + "model_forward_time": 0.02529764175415039, + "step": 13480 + }, + { + "epoch": 2.056884765625e-05, + "step": 13480, + "training_step_time": 0.11092662811279297 + }, + { + "epoch": 2.057037353515625e-05, + "model_forward_time": 0.02570819854736328, + "step": 13481 + }, + { + "epoch": 2.057037353515625e-05, + "step": 13481, + "training_step_time": 0.10501623153686523 + }, + { + "epoch": 2.05718994140625e-05, + "model_forward_time": 0.025580167770385742, + "step": 13482 + }, + { + "epoch": 2.05718994140625e-05, + "step": 13482, + "training_step_time": 0.10663294792175293 + }, + { + "epoch": 2.057342529296875e-05, + "model_forward_time": 0.025522947311401367, + "step": 13483 + }, + { + "epoch": 2.057342529296875e-05, + "step": 13483, + "training_step_time": 0.10570430755615234 + }, + { + "epoch": 2.0574951171875e-05, + "model_forward_time": 0.025350332260131836, + "step": 13484 + }, + { + "epoch": 2.0574951171875e-05, + "step": 13484, + "training_step_time": 0.10508418083190918 + }, + { + "epoch": 2.057647705078125e-05, + "model_forward_time": 0.025224685668945312, + "step": 13485 + }, + { + "epoch": 2.057647705078125e-05, + "step": 13485, + "training_step_time": 0.10484957695007324 + }, + { + "epoch": 2.05780029296875e-05, + "model_forward_time": 0.025508403778076172, + "step": 13486 + }, + { + "epoch": 2.05780029296875e-05, + "step": 13486, + "training_step_time": 0.10888338088989258 + }, + { + "epoch": 2.057952880859375e-05, + "model_forward_time": 0.025072813034057617, + "step": 13487 + }, + { + "epoch": 2.057952880859375e-05, + "step": 13487, + "training_step_time": 0.10498213768005371 + }, + { + "epoch": 2.05810546875e-05, + "model_forward_time": 0.025693178176879883, + "step": 13488 + }, + { + "epoch": 2.05810546875e-05, + "step": 13488, + "training_step_time": 0.10777568817138672 + }, + { + "epoch": 2.058258056640625e-05, + "model_forward_time": 0.026051759719848633, + "step": 13489 + }, + { + "epoch": 2.058258056640625e-05, + "step": 13489, + "training_step_time": 0.10966873168945312 + }, + { + "epoch": 2.05841064453125e-05, + "grad_norm": 0.31566035747528076, + "learning_rate": 6.232769602505203e-05, + "loss": 0.0241, + "step": 13490 + }, + { + "epoch": 2.05841064453125e-05, + "model_forward_time": 0.025745868682861328, + "step": 13490 + }, + { + "epoch": 2.05841064453125e-05, + "step": 13490, + "training_step_time": 0.10670232772827148 + }, + { + "epoch": 2.058563232421875e-05, + "model_forward_time": 0.02549147605895996, + "step": 13491 + }, + { + "epoch": 2.058563232421875e-05, + "step": 13491, + "training_step_time": 0.10652661323547363 + }, + { + "epoch": 2.0587158203125e-05, + "model_forward_time": 0.02582526206970215, + "step": 13492 + }, + { + "epoch": 2.0587158203125e-05, + "step": 13492, + "training_step_time": 0.10699605941772461 + }, + { + "epoch": 2.058868408203125e-05, + "model_forward_time": 0.02557229995727539, + "step": 13493 + }, + { + "epoch": 2.058868408203125e-05, + "step": 13493, + "training_step_time": 0.10550284385681152 + }, + { + "epoch": 2.05902099609375e-05, + "model_forward_time": 0.02545166015625, + "step": 13494 + }, + { + "epoch": 2.05902099609375e-05, + "step": 13494, + "training_step_time": 0.10523653030395508 + }, + { + "epoch": 2.059173583984375e-05, + "model_forward_time": 0.025597810745239258, + "step": 13495 + }, + { + "epoch": 2.059173583984375e-05, + "step": 13495, + "training_step_time": 0.1056661605834961 + }, + { + "epoch": 2.059326171875e-05, + "model_forward_time": 0.02552962303161621, + "step": 13496 + }, + { + "epoch": 2.059326171875e-05, + "step": 13496, + "training_step_time": 0.1071176528930664 + }, + { + "epoch": 2.059478759765625e-05, + "model_forward_time": 0.025418996810913086, + "step": 13497 + }, + { + "epoch": 2.059478759765625e-05, + "step": 13497, + "training_step_time": 0.1056063175201416 + }, + { + "epoch": 2.05963134765625e-05, + "model_forward_time": 0.02577662467956543, + "step": 13498 + }, + { + "epoch": 2.05963134765625e-05, + "step": 13498, + "training_step_time": 0.10579228401184082 + }, + { + "epoch": 2.059783935546875e-05, + "model_forward_time": 0.025702476501464844, + "step": 13499 + }, + { + "epoch": 2.059783935546875e-05, + "step": 13499, + "training_step_time": 0.17978358268737793 + }, + { + "epoch": 2.0599365234375e-05, + "grad_norm": 0.2811414897441864, + "learning_rate": 6.227427435703997e-05, + "loss": 0.0124, + "step": 13500 + }, + { + "epoch": 2.0599365234375e-05, + "model_forward_time": 0.02506732940673828, + "step": 13500 + }, + { + "epoch": 2.0599365234375e-05, + "step": 13500, + "training_step_time": 0.10538029670715332 + }, + { + "epoch": 2.060089111328125e-05, + "model_forward_time": 0.024884462356567383, + "step": 13501 + }, + { + "epoch": 2.060089111328125e-05, + "step": 13501, + "training_step_time": 0.1276853084564209 + }, + { + "epoch": 2.06024169921875e-05, + "model_forward_time": 0.025621652603149414, + "step": 13502 + }, + { + "epoch": 2.06024169921875e-05, + "step": 13502, + "training_step_time": 0.13167500495910645 + }, + { + "epoch": 2.060394287109375e-05, + "model_forward_time": 0.025303125381469727, + "step": 13503 + }, + { + "epoch": 2.060394287109375e-05, + "step": 13503, + "training_step_time": 0.13707923889160156 + }, + { + "epoch": 2.060546875e-05, + "model_forward_time": 0.02512383460998535, + "step": 13504 + }, + { + "epoch": 2.060546875e-05, + "step": 13504, + "training_step_time": 0.11810469627380371 + }, + { + "epoch": 2.060699462890625e-05, + "model_forward_time": 0.028889894485473633, + "step": 13505 + }, + { + "epoch": 2.060699462890625e-05, + "step": 13505, + "training_step_time": 0.11882901191711426 + }, + { + "epoch": 2.06085205078125e-05, + "model_forward_time": 0.024982452392578125, + "step": 13506 + }, + { + "epoch": 2.06085205078125e-05, + "step": 13506, + "training_step_time": 0.10388326644897461 + }, + { + "epoch": 2.061004638671875e-05, + "model_forward_time": 0.02473306655883789, + "step": 13507 + }, + { + "epoch": 2.061004638671875e-05, + "step": 13507, + "training_step_time": 0.1320035457611084 + }, + { + "epoch": 2.0611572265625e-05, + "model_forward_time": 0.025761127471923828, + "step": 13508 + }, + { + "epoch": 2.0611572265625e-05, + "step": 13508, + "training_step_time": 0.2026369571685791 + }, + { + "epoch": 2.061309814453125e-05, + "model_forward_time": 0.026338815689086914, + "step": 13509 + }, + { + "epoch": 2.061309814453125e-05, + "step": 13509, + "training_step_time": 0.1712629795074463 + }, + { + "epoch": 2.06146240234375e-05, + "grad_norm": 0.3576022684574127, + "learning_rate": 6.222083777462715e-05, + "loss": 0.0249, + "step": 13510 + }, + { + "epoch": 2.06146240234375e-05, + "model_forward_time": 0.024466991424560547, + "step": 13510 + }, + { + "epoch": 2.06146240234375e-05, + "step": 13510, + "training_step_time": 0.1889963150024414 + }, + { + "epoch": 2.061614990234375e-05, + "model_forward_time": 0.025861740112304688, + "step": 13511 + }, + { + "epoch": 2.061614990234375e-05, + "step": 13511, + "training_step_time": 0.14404678344726562 + }, + { + "epoch": 2.061767578125e-05, + "model_forward_time": 0.02688741683959961, + "step": 13512 + }, + { + "epoch": 2.061767578125e-05, + "step": 13512, + "training_step_time": 0.14098334312438965 + }, + { + "epoch": 2.061920166015625e-05, + "model_forward_time": 0.024428367614746094, + "step": 13513 + }, + { + "epoch": 2.061920166015625e-05, + "step": 13513, + "training_step_time": 0.12908458709716797 + }, + { + "epoch": 2.06207275390625e-05, + "model_forward_time": 0.024909019470214844, + "step": 13514 + }, + { + "epoch": 2.06207275390625e-05, + "step": 13514, + "training_step_time": 0.12499356269836426 + }, + { + "epoch": 2.062225341796875e-05, + "model_forward_time": 0.025639057159423828, + "step": 13515 + }, + { + "epoch": 2.062225341796875e-05, + "step": 13515, + "training_step_time": 0.11852765083312988 + }, + { + "epoch": 2.0623779296875e-05, + "model_forward_time": 0.025999069213867188, + "step": 13516 + }, + { + "epoch": 2.0623779296875e-05, + "step": 13516, + "training_step_time": 0.11444544792175293 + }, + { + "epoch": 2.062530517578125e-05, + "model_forward_time": 0.025890350341796875, + "step": 13517 + }, + { + "epoch": 2.062530517578125e-05, + "step": 13517, + "training_step_time": 0.12332773208618164 + }, + { + "epoch": 2.06268310546875e-05, + "model_forward_time": 0.02600264549255371, + "step": 13518 + }, + { + "epoch": 2.06268310546875e-05, + "step": 13518, + "training_step_time": 0.16089296340942383 + }, + { + "epoch": 2.062835693359375e-05, + "model_forward_time": 0.02433919906616211, + "step": 13519 + }, + { + "epoch": 2.062835693359375e-05, + "step": 13519, + "training_step_time": 0.21832513809204102 + }, + { + "epoch": 2.06298828125e-05, + "grad_norm": 0.2942567765712738, + "learning_rate": 6.216738634274411e-05, + "loss": 0.0251, + "step": 13520 + }, + { + "epoch": 2.06298828125e-05, + "model_forward_time": 0.025035381317138672, + "step": 13520 + }, + { + "epoch": 2.06298828125e-05, + "step": 13520, + "training_step_time": 0.1167595386505127 + }, + { + "epoch": 2.063140869140625e-05, + "model_forward_time": 0.02455306053161621, + "step": 13521 + }, + { + "epoch": 2.063140869140625e-05, + "step": 13521, + "training_step_time": 0.10366249084472656 + }, + { + "epoch": 2.06329345703125e-05, + "model_forward_time": 0.02541375160217285, + "step": 13522 + }, + { + "epoch": 2.06329345703125e-05, + "step": 13522, + "training_step_time": 0.10523533821105957 + }, + { + "epoch": 2.063446044921875e-05, + "model_forward_time": 0.025339365005493164, + "step": 13523 + }, + { + "epoch": 2.063446044921875e-05, + "step": 13523, + "training_step_time": 0.10650134086608887 + }, + { + "epoch": 2.0635986328125e-05, + "model_forward_time": 0.02537083625793457, + "step": 13524 + }, + { + "epoch": 2.0635986328125e-05, + "step": 13524, + "training_step_time": 0.1060328483581543 + }, + { + "epoch": 2.063751220703125e-05, + "model_forward_time": 0.02584385871887207, + "step": 13525 + }, + { + "epoch": 2.063751220703125e-05, + "step": 13525, + "training_step_time": 0.10665106773376465 + }, + { + "epoch": 2.06390380859375e-05, + "model_forward_time": 0.025703907012939453, + "step": 13526 + }, + { + "epoch": 2.06390380859375e-05, + "step": 13526, + "training_step_time": 0.1108710765838623 + }, + { + "epoch": 2.064056396484375e-05, + "model_forward_time": 0.02501535415649414, + "step": 13527 + }, + { + "epoch": 2.064056396484375e-05, + "step": 13527, + "training_step_time": 0.10784006118774414 + }, + { + "epoch": 2.064208984375e-05, + "model_forward_time": 0.025657176971435547, + "step": 13528 + }, + { + "epoch": 2.064208984375e-05, + "step": 13528, + "training_step_time": 0.10536408424377441 + }, + { + "epoch": 2.064361572265625e-05, + "model_forward_time": 0.025498390197753906, + "step": 13529 + }, + { + "epoch": 2.064361572265625e-05, + "step": 13529, + "training_step_time": 0.10569286346435547 + }, + { + "epoch": 2.06451416015625e-05, + "grad_norm": 0.36455395817756653, + "learning_rate": 6.211392012633932e-05, + "loss": 0.0254, + "step": 13530 + }, + { + "epoch": 2.06451416015625e-05, + "model_forward_time": 0.02626323699951172, + "step": 13530 + }, + { + "epoch": 2.06451416015625e-05, + "step": 13530, + "training_step_time": 0.11243510246276855 + }, + { + "epoch": 2.064666748046875e-05, + "model_forward_time": 0.026011228561401367, + "step": 13531 + }, + { + "epoch": 2.064666748046875e-05, + "step": 13531, + "training_step_time": 0.10503387451171875 + }, + { + "epoch": 2.0648193359375e-05, + "model_forward_time": 0.025607585906982422, + "step": 13532 + }, + { + "epoch": 2.0648193359375e-05, + "step": 13532, + "training_step_time": 0.10560750961303711 + }, + { + "epoch": 2.064971923828125e-05, + "model_forward_time": 0.026081562042236328, + "step": 13533 + }, + { + "epoch": 2.064971923828125e-05, + "step": 13533, + "training_step_time": 0.11016416549682617 + }, + { + "epoch": 2.06512451171875e-05, + "model_forward_time": 0.02525639533996582, + "step": 13534 + }, + { + "epoch": 2.06512451171875e-05, + "step": 13534, + "training_step_time": 0.10515308380126953 + }, + { + "epoch": 2.065277099609375e-05, + "model_forward_time": 0.025351524353027344, + "step": 13535 + }, + { + "epoch": 2.065277099609375e-05, + "step": 13535, + "training_step_time": 0.1093449592590332 + }, + { + "epoch": 2.0654296875e-05, + "model_forward_time": 0.02552938461303711, + "step": 13536 + }, + { + "epoch": 2.0654296875e-05, + "step": 13536, + "training_step_time": 0.10496139526367188 + }, + { + "epoch": 2.065582275390625e-05, + "model_forward_time": 0.025290966033935547, + "step": 13537 + }, + { + "epoch": 2.065582275390625e-05, + "step": 13537, + "training_step_time": 0.1048898696899414 + }, + { + "epoch": 2.06573486328125e-05, + "model_forward_time": 0.0255739688873291, + "step": 13538 + }, + { + "epoch": 2.06573486328125e-05, + "step": 13538, + "training_step_time": 0.10610198974609375 + }, + { + "epoch": 2.065887451171875e-05, + "model_forward_time": 0.025422334671020508, + "step": 13539 + }, + { + "epoch": 2.065887451171875e-05, + "step": 13539, + "training_step_time": 0.10854792594909668 + }, + { + "epoch": 2.0660400390625e-05, + "grad_norm": 0.29043322801589966, + "learning_rate": 6.206043919037933e-05, + "loss": 0.0215, + "step": 13540 + }, + { + "epoch": 2.0660400390625e-05, + "model_forward_time": 0.025322675704956055, + "step": 13540 + }, + { + "epoch": 2.0660400390625e-05, + "step": 13540, + "training_step_time": 0.10613369941711426 + }, + { + "epoch": 2.066192626953125e-05, + "model_forward_time": 0.025369644165039062, + "step": 13541 + }, + { + "epoch": 2.066192626953125e-05, + "step": 13541, + "training_step_time": 0.10640740394592285 + }, + { + "epoch": 2.06634521484375e-05, + "model_forward_time": 0.02557682991027832, + "step": 13542 + }, + { + "epoch": 2.06634521484375e-05, + "step": 13542, + "training_step_time": 0.10752701759338379 + }, + { + "epoch": 2.066497802734375e-05, + "model_forward_time": 0.026835203170776367, + "step": 13543 + }, + { + "epoch": 2.066497802734375e-05, + "step": 13543, + "training_step_time": 0.1137089729309082 + }, + { + "epoch": 2.066650390625e-05, + "model_forward_time": 0.026080608367919922, + "step": 13544 + }, + { + "epoch": 2.066650390625e-05, + "step": 13544, + "training_step_time": 0.10647463798522949 + }, + { + "epoch": 2.066802978515625e-05, + "model_forward_time": 0.02614140510559082, + "step": 13545 + }, + { + "epoch": 2.066802978515625e-05, + "step": 13545, + "training_step_time": 0.15695667266845703 + }, + { + "epoch": 2.06695556640625e-05, + "model_forward_time": 0.025475740432739258, + "step": 13546 + }, + { + "epoch": 2.06695556640625e-05, + "step": 13546, + "training_step_time": 0.22822093963623047 + }, + { + "epoch": 2.067108154296875e-05, + "model_forward_time": 0.025451183319091797, + "step": 13547 + }, + { + "epoch": 2.067108154296875e-05, + "step": 13547, + "training_step_time": 0.16279911994934082 + }, + { + "epoch": 2.0672607421875e-05, + "model_forward_time": 0.0246584415435791, + "step": 13548 + }, + { + "epoch": 2.0672607421875e-05, + "step": 13548, + "training_step_time": 0.19060587882995605 + }, + { + "epoch": 2.067413330078125e-05, + "model_forward_time": 0.02529287338256836, + "step": 13549 + }, + { + "epoch": 2.067413330078125e-05, + "step": 13549, + "training_step_time": 0.18202757835388184 + }, + { + "epoch": 2.06756591796875e-05, + "grad_norm": 0.27025356888771057, + "learning_rate": 6.200694359984849e-05, + "loss": 0.0145, + "step": 13550 + }, + { + "epoch": 2.06756591796875e-05, + "model_forward_time": 0.02523517608642578, + "step": 13550 + }, + { + "epoch": 2.06756591796875e-05, + "step": 13550, + "training_step_time": 0.1037900447845459 + }, + { + "epoch": 2.067718505859375e-05, + "model_forward_time": 0.024685382843017578, + "step": 13551 + }, + { + "epoch": 2.067718505859375e-05, + "step": 13551, + "training_step_time": 0.17618966102600098 + }, + { + "epoch": 2.06787109375e-05, + "model_forward_time": 0.02469611167907715, + "step": 13552 + }, + { + "epoch": 2.06787109375e-05, + "step": 13552, + "training_step_time": 0.16537880897521973 + }, + { + "epoch": 2.068023681640625e-05, + "model_forward_time": 0.024758338928222656, + "step": 13553 + }, + { + "epoch": 2.068023681640625e-05, + "step": 13553, + "training_step_time": 0.10965704917907715 + }, + { + "epoch": 2.06817626953125e-05, + "model_forward_time": 0.02538466453552246, + "step": 13554 + }, + { + "epoch": 2.06817626953125e-05, + "step": 13554, + "training_step_time": 0.1163020133972168 + }, + { + "epoch": 2.068328857421875e-05, + "model_forward_time": 0.02577829360961914, + "step": 13555 + }, + { + "epoch": 2.068328857421875e-05, + "step": 13555, + "training_step_time": 0.1302950382232666 + }, + { + "epoch": 2.0684814453125e-05, + "model_forward_time": 0.0257413387298584, + "step": 13556 + }, + { + "epoch": 2.0684814453125e-05, + "step": 13556, + "training_step_time": 0.1236116886138916 + }, + { + "epoch": 2.068634033203125e-05, + "model_forward_time": 0.02552628517150879, + "step": 13557 + }, + { + "epoch": 2.068634033203125e-05, + "step": 13557, + "training_step_time": 0.12032341957092285 + }, + { + "epoch": 2.06878662109375e-05, + "model_forward_time": 0.025516748428344727, + "step": 13558 + }, + { + "epoch": 2.06878662109375e-05, + "step": 13558, + "training_step_time": 0.12189531326293945 + }, + { + "epoch": 2.068939208984375e-05, + "model_forward_time": 0.025724411010742188, + "step": 13559 + }, + { + "epoch": 2.068939208984375e-05, + "step": 13559, + "training_step_time": 0.12296533584594727 + }, + { + "epoch": 2.069091796875e-05, + "grad_norm": 0.16759471595287323, + "learning_rate": 6.195343341974899e-05, + "loss": 0.0232, + "step": 13560 + }, + { + "epoch": 2.069091796875e-05, + "model_forward_time": 0.024808645248413086, + "step": 13560 + }, + { + "epoch": 2.069091796875e-05, + "step": 13560, + "training_step_time": 0.12245917320251465 + }, + { + "epoch": 2.069244384765625e-05, + "model_forward_time": 0.025620460510253906, + "step": 13561 + }, + { + "epoch": 2.069244384765625e-05, + "step": 13561, + "training_step_time": 0.12115120887756348 + }, + { + "epoch": 2.06939697265625e-05, + "model_forward_time": 0.026069164276123047, + "step": 13562 + }, + { + "epoch": 2.06939697265625e-05, + "step": 13562, + "training_step_time": 0.1276533603668213 + }, + { + "epoch": 2.069549560546875e-05, + "model_forward_time": 0.025541067123413086, + "step": 13563 + }, + { + "epoch": 2.069549560546875e-05, + "step": 13563, + "training_step_time": 0.15735912322998047 + }, + { + "epoch": 2.0697021484375e-05, + "model_forward_time": 0.024887800216674805, + "step": 13564 + }, + { + "epoch": 2.0697021484375e-05, + "step": 13564, + "training_step_time": 0.21988320350646973 + }, + { + "epoch": 2.069854736328125e-05, + "model_forward_time": 0.024647951126098633, + "step": 13565 + }, + { + "epoch": 2.069854736328125e-05, + "step": 13565, + "training_step_time": 0.11358976364135742 + }, + { + "epoch": 2.07000732421875e-05, + "model_forward_time": 0.025397300720214844, + "step": 13566 + }, + { + "epoch": 2.07000732421875e-05, + "step": 13566, + "training_step_time": 0.10919880867004395 + }, + { + "epoch": 2.070159912109375e-05, + "model_forward_time": 0.025625228881835938, + "step": 13567 + }, + { + "epoch": 2.070159912109375e-05, + "step": 13567, + "training_step_time": 0.11376523971557617 + }, + { + "epoch": 2.0703125e-05, + "model_forward_time": 0.02531886100769043, + "step": 13568 + }, + { + "epoch": 2.0703125e-05, + "step": 13568, + "training_step_time": 0.10929059982299805 + }, + { + "epoch": 2.070465087890625e-05, + "model_forward_time": 0.02492046356201172, + "step": 13569 + }, + { + "epoch": 2.070465087890625e-05, + "step": 13569, + "training_step_time": 0.10867166519165039 + }, + { + "epoch": 2.07061767578125e-05, + "grad_norm": 0.2568618357181549, + "learning_rate": 6.189990871510078e-05, + "loss": 0.0159, + "step": 13570 + }, + { + "epoch": 2.07061767578125e-05, + "model_forward_time": 0.024616003036499023, + "step": 13570 + }, + { + "epoch": 2.07061767578125e-05, + "step": 13570, + "training_step_time": 0.10872244834899902 + }, + { + "epoch": 2.070770263671875e-05, + "model_forward_time": 0.02526998519897461, + "step": 13571 + }, + { + "epoch": 2.070770263671875e-05, + "step": 13571, + "training_step_time": 0.10507607460021973 + }, + { + "epoch": 2.0709228515625e-05, + "model_forward_time": 0.025464773178100586, + "step": 13572 + }, + { + "epoch": 2.0709228515625e-05, + "step": 13572, + "training_step_time": 0.1047205924987793 + }, + { + "epoch": 2.071075439453125e-05, + "model_forward_time": 0.02590203285217285, + "step": 13573 + }, + { + "epoch": 2.071075439453125e-05, + "step": 13573, + "training_step_time": 0.10735416412353516 + }, + { + "epoch": 2.07122802734375e-05, + "model_forward_time": 0.025557994842529297, + "step": 13574 + }, + { + "epoch": 2.07122802734375e-05, + "step": 13574, + "training_step_time": 0.10766267776489258 + }, + { + "epoch": 2.071380615234375e-05, + "model_forward_time": 0.025234460830688477, + "step": 13575 + }, + { + "epoch": 2.071380615234375e-05, + "step": 13575, + "training_step_time": 0.10898280143737793 + }, + { + "epoch": 2.071533203125e-05, + "model_forward_time": 0.025351762771606445, + "step": 13576 + }, + { + "epoch": 2.071533203125e-05, + "step": 13576, + "training_step_time": 0.10595297813415527 + }, + { + "epoch": 2.071685791015625e-05, + "model_forward_time": 0.025415897369384766, + "step": 13577 + }, + { + "epoch": 2.071685791015625e-05, + "step": 13577, + "training_step_time": 0.11336827278137207 + }, + { + "epoch": 2.07183837890625e-05, + "model_forward_time": 0.026666879653930664, + "step": 13578 + }, + { + "epoch": 2.07183837890625e-05, + "step": 13578, + "training_step_time": 0.10653972625732422 + }, + { + "epoch": 2.071990966796875e-05, + "model_forward_time": 0.025657176971435547, + "step": 13579 + }, + { + "epoch": 2.071990966796875e-05, + "step": 13579, + "training_step_time": 0.10499072074890137 + }, + { + "epoch": 2.0721435546875e-05, + "grad_norm": 0.3949906826019287, + "learning_rate": 6.184636955094138e-05, + "loss": 0.0161, + "step": 13580 + }, + { + "epoch": 2.0721435546875e-05, + "model_forward_time": 0.025122880935668945, + "step": 13580 + }, + { + "epoch": 2.0721435546875e-05, + "step": 13580, + "training_step_time": 0.10523748397827148 + }, + { + "epoch": 2.072296142578125e-05, + "model_forward_time": 0.02554178237915039, + "step": 13581 + }, + { + "epoch": 2.072296142578125e-05, + "step": 13581, + "training_step_time": 0.10465693473815918 + }, + { + "epoch": 2.07244873046875e-05, + "model_forward_time": 0.025726795196533203, + "step": 13582 + }, + { + "epoch": 2.07244873046875e-05, + "step": 13582, + "training_step_time": 0.10459494590759277 + }, + { + "epoch": 2.072601318359375e-05, + "model_forward_time": 0.025447368621826172, + "step": 13583 + }, + { + "epoch": 2.072601318359375e-05, + "step": 13583, + "training_step_time": 0.10417723655700684 + }, + { + "epoch": 2.07275390625e-05, + "model_forward_time": 0.025584936141967773, + "step": 13584 + }, + { + "epoch": 2.07275390625e-05, + "step": 13584, + "training_step_time": 0.10549402236938477 + }, + { + "epoch": 2.072906494140625e-05, + "model_forward_time": 0.025400161743164062, + "step": 13585 + }, + { + "epoch": 2.072906494140625e-05, + "step": 13585, + "training_step_time": 0.10564112663269043 + }, + { + "epoch": 2.07305908203125e-05, + "model_forward_time": 0.02568960189819336, + "step": 13586 + }, + { + "epoch": 2.07305908203125e-05, + "step": 13586, + "training_step_time": 0.1101067066192627 + }, + { + "epoch": 2.073211669921875e-05, + "model_forward_time": 0.025120258331298828, + "step": 13587 + }, + { + "epoch": 2.073211669921875e-05, + "step": 13587, + "training_step_time": 0.10855317115783691 + }, + { + "epoch": 2.0733642578125e-05, + "model_forward_time": 0.025241613388061523, + "step": 13588 + }, + { + "epoch": 2.0733642578125e-05, + "step": 13588, + "training_step_time": 0.10760617256164551 + }, + { + "epoch": 2.073516845703125e-05, + "model_forward_time": 0.02600574493408203, + "step": 13589 + }, + { + "epoch": 2.073516845703125e-05, + "step": 13589, + "training_step_time": 0.10991024971008301 + }, + { + "epoch": 2.07366943359375e-05, + "grad_norm": 0.23415841162204742, + "learning_rate": 6.179281599232591e-05, + "loss": 0.0242, + "step": 13590 + }, + { + "epoch": 2.07366943359375e-05, + "model_forward_time": 0.02530384063720703, + "step": 13590 + }, + { + "epoch": 2.07366943359375e-05, + "step": 13590, + "training_step_time": 0.19319605827331543 + }, + { + "epoch": 2.073822021484375e-05, + "model_forward_time": 0.02474832534790039, + "step": 13591 + }, + { + "epoch": 2.073822021484375e-05, + "step": 13591, + "training_step_time": 0.11527585983276367 + }, + { + "epoch": 2.073974609375e-05, + "model_forward_time": 0.024869918823242188, + "step": 13592 + }, + { + "epoch": 2.073974609375e-05, + "step": 13592, + "training_step_time": 0.11902046203613281 + }, + { + "epoch": 2.074127197265625e-05, + "model_forward_time": 0.025594234466552734, + "step": 13593 + }, + { + "epoch": 2.074127197265625e-05, + "step": 13593, + "training_step_time": 0.14376306533813477 + }, + { + "epoch": 2.07427978515625e-05, + "model_forward_time": 0.025374650955200195, + "step": 13594 + }, + { + "epoch": 2.07427978515625e-05, + "step": 13594, + "training_step_time": 0.2284080982208252 + }, + { + "epoch": 2.074432373046875e-05, + "model_forward_time": 0.025103092193603516, + "step": 13595 + }, + { + "epoch": 2.074432373046875e-05, + "step": 13595, + "training_step_time": 0.21947169303894043 + }, + { + "epoch": 2.0745849609375e-05, + "model_forward_time": 0.024671554565429688, + "step": 13596 + }, + { + "epoch": 2.0745849609375e-05, + "step": 13596, + "training_step_time": 0.2086644172668457 + }, + { + "epoch": 2.074737548828125e-05, + "model_forward_time": 0.024730920791625977, + "step": 13597 + }, + { + "epoch": 2.074737548828125e-05, + "step": 13597, + "training_step_time": 0.15371179580688477 + }, + { + "epoch": 2.07489013671875e-05, + "model_forward_time": 0.02458333969116211, + "step": 13598 + }, + { + "epoch": 2.07489013671875e-05, + "step": 13598, + "training_step_time": 0.1919417381286621 + }, + { + "epoch": 2.075042724609375e-05, + "model_forward_time": 0.025199174880981445, + "step": 13599 + }, + { + "epoch": 2.075042724609375e-05, + "step": 13599, + "training_step_time": 0.11182117462158203 + }, + { + "epoch": 2.0751953125e-05, + "grad_norm": 0.3446387052536011, + "learning_rate": 6.173924810432705e-05, + "loss": 0.0328, + "step": 13600 + }, + { + "epoch": 2.0751953125e-05, + "model_forward_time": 0.024994850158691406, + "step": 13600 + }, + { + "epoch": 2.0751953125e-05, + "step": 13600, + "training_step_time": 0.11134648323059082 + }, + { + "epoch": 2.075347900390625e-05, + "model_forward_time": 0.02562117576599121, + "step": 13601 + }, + { + "epoch": 2.075347900390625e-05, + "step": 13601, + "training_step_time": 0.11544203758239746 + }, + { + "epoch": 2.07550048828125e-05, + "model_forward_time": 0.02560567855834961, + "step": 13602 + }, + { + "epoch": 2.07550048828125e-05, + "step": 13602, + "training_step_time": 0.10756945610046387 + }, + { + "epoch": 2.075653076171875e-05, + "model_forward_time": 0.025394439697265625, + "step": 13603 + }, + { + "epoch": 2.075653076171875e-05, + "step": 13603, + "training_step_time": 0.10807442665100098 + }, + { + "epoch": 2.0758056640625e-05, + "model_forward_time": 0.025594472885131836, + "step": 13604 + }, + { + "epoch": 2.0758056640625e-05, + "step": 13604, + "training_step_time": 0.10811233520507812 + }, + { + "epoch": 2.075958251953125e-05, + "model_forward_time": 0.02522730827331543, + "step": 13605 + }, + { + "epoch": 2.075958251953125e-05, + "step": 13605, + "training_step_time": 0.1431865692138672 + }, + { + "epoch": 2.07611083984375e-05, + "model_forward_time": 0.025676250457763672, + "step": 13606 + }, + { + "epoch": 2.07611083984375e-05, + "step": 13606, + "training_step_time": 0.13843846321105957 + }, + { + "epoch": 2.076263427734375e-05, + "model_forward_time": 0.02462148666381836, + "step": 13607 + }, + { + "epoch": 2.076263427734375e-05, + "step": 13607, + "training_step_time": 0.11219406127929688 + }, + { + "epoch": 2.076416015625e-05, + "model_forward_time": 0.025599002838134766, + "step": 13608 + }, + { + "epoch": 2.076416015625e-05, + "step": 13608, + "training_step_time": 0.1130824089050293 + }, + { + "epoch": 2.076568603515625e-05, + "model_forward_time": 0.02474355697631836, + "step": 13609 + }, + { + "epoch": 2.076568603515625e-05, + "step": 13609, + "training_step_time": 0.10604166984558105 + }, + { + "epoch": 2.07672119140625e-05, + "grad_norm": 0.22904595732688904, + "learning_rate": 6.168566595203479e-05, + "loss": 0.0141, + "step": 13610 + }, + { + "epoch": 2.07672119140625e-05, + "model_forward_time": 0.024632692337036133, + "step": 13610 + }, + { + "epoch": 2.07672119140625e-05, + "step": 13610, + "training_step_time": 0.17171549797058105 + }, + { + "epoch": 2.076873779296875e-05, + "model_forward_time": 0.02483057975769043, + "step": 13611 + }, + { + "epoch": 2.076873779296875e-05, + "step": 13611, + "training_step_time": 0.16538381576538086 + }, + { + "epoch": 2.0770263671875e-05, + "model_forward_time": 0.024667739868164062, + "step": 13612 + }, + { + "epoch": 2.0770263671875e-05, + "step": 13612, + "training_step_time": 0.10505175590515137 + }, + { + "epoch": 2.077178955078125e-05, + "model_forward_time": 0.024743318557739258, + "step": 13613 + }, + { + "epoch": 2.077178955078125e-05, + "step": 13613, + "training_step_time": 0.10822892189025879 + }, + { + "epoch": 2.07733154296875e-05, + "model_forward_time": 0.024968862533569336, + "step": 13614 + }, + { + "epoch": 2.07733154296875e-05, + "step": 13614, + "training_step_time": 0.11079597473144531 + }, + { + "epoch": 2.077484130859375e-05, + "model_forward_time": 0.025288105010986328, + "step": 13615 + }, + { + "epoch": 2.077484130859375e-05, + "step": 13615, + "training_step_time": 0.14905571937561035 + }, + { + "epoch": 2.07763671875e-05, + "model_forward_time": 0.02492380142211914, + "step": 13616 + }, + { + "epoch": 2.07763671875e-05, + "step": 13616, + "training_step_time": 0.1673579216003418 + }, + { + "epoch": 2.077789306640625e-05, + "model_forward_time": 0.024719953536987305, + "step": 13617 + }, + { + "epoch": 2.077789306640625e-05, + "step": 13617, + "training_step_time": 0.15595412254333496 + }, + { + "epoch": 2.07794189453125e-05, + "model_forward_time": 0.024540424346923828, + "step": 13618 + }, + { + "epoch": 2.07794189453125e-05, + "step": 13618, + "training_step_time": 0.14380502700805664 + }, + { + "epoch": 2.078094482421875e-05, + "model_forward_time": 0.02460503578186035, + "step": 13619 + }, + { + "epoch": 2.078094482421875e-05, + "step": 13619, + "training_step_time": 0.1566150188446045 + }, + { + "epoch": 2.0782470703125e-05, + "grad_norm": 0.37864014506340027, + "learning_rate": 6.163206960055651e-05, + "loss": 0.0279, + "step": 13620 + }, + { + "epoch": 2.0782470703125e-05, + "model_forward_time": 0.02496027946472168, + "step": 13620 + }, + { + "epoch": 2.0782470703125e-05, + "step": 13620, + "training_step_time": 0.1282200813293457 + }, + { + "epoch": 2.078399658203125e-05, + "model_forward_time": 0.024662494659423828, + "step": 13621 + }, + { + "epoch": 2.078399658203125e-05, + "step": 13621, + "training_step_time": 0.12955975532531738 + }, + { + "epoch": 2.07855224609375e-05, + "model_forward_time": 0.024855613708496094, + "step": 13622 + }, + { + "epoch": 2.07855224609375e-05, + "step": 13622, + "training_step_time": 0.12385082244873047 + }, + { + "epoch": 2.078704833984375e-05, + "model_forward_time": 0.02506089210510254, + "step": 13623 + }, + { + "epoch": 2.078704833984375e-05, + "step": 13623, + "training_step_time": 0.11514592170715332 + }, + { + "epoch": 2.078857421875e-05, + "model_forward_time": 0.02560567855834961, + "step": 13624 + }, + { + "epoch": 2.078857421875e-05, + "step": 13624, + "training_step_time": 0.11656022071838379 + }, + { + "epoch": 2.079010009765625e-05, + "model_forward_time": 0.0254366397857666, + "step": 13625 + }, + { + "epoch": 2.079010009765625e-05, + "step": 13625, + "training_step_time": 0.11447405815124512 + }, + { + "epoch": 2.07916259765625e-05, + "model_forward_time": 0.0255281925201416, + "step": 13626 + }, + { + "epoch": 2.07916259765625e-05, + "step": 13626, + "training_step_time": 0.11028385162353516 + }, + { + "epoch": 2.079315185546875e-05, + "model_forward_time": 0.025513887405395508, + "step": 13627 + }, + { + "epoch": 2.079315185546875e-05, + "step": 13627, + "training_step_time": 0.1080942153930664 + }, + { + "epoch": 2.0794677734375e-05, + "model_forward_time": 0.025769472122192383, + "step": 13628 + }, + { + "epoch": 2.0794677734375e-05, + "step": 13628, + "training_step_time": 0.10908222198486328 + }, + { + "epoch": 2.079620361328125e-05, + "model_forward_time": 0.025214672088623047, + "step": 13629 + }, + { + "epoch": 2.079620361328125e-05, + "step": 13629, + "training_step_time": 0.10624241828918457 + }, + { + "epoch": 2.07977294921875e-05, + "grad_norm": 0.4058496952056885, + "learning_rate": 6.157845911501684e-05, + "loss": 0.0164, + "step": 13630 + }, + { + "epoch": 2.07977294921875e-05, + "model_forward_time": 0.02564406394958496, + "step": 13630 + }, + { + "epoch": 2.07977294921875e-05, + "step": 13630, + "training_step_time": 0.1081094741821289 + }, + { + "epoch": 2.079925537109375e-05, + "model_forward_time": 0.025511980056762695, + "step": 13631 + }, + { + "epoch": 2.079925537109375e-05, + "step": 13631, + "training_step_time": 0.10627388954162598 + }, + { + "epoch": 2.080078125e-05, + "model_forward_time": 0.02545785903930664, + "step": 13632 + }, + { + "epoch": 2.080078125e-05, + "step": 13632, + "training_step_time": 0.10564494132995605 + }, + { + "epoch": 2.080230712890625e-05, + "model_forward_time": 0.025623083114624023, + "step": 13633 + }, + { + "epoch": 2.080230712890625e-05, + "step": 13633, + "training_step_time": 0.10777878761291504 + }, + { + "epoch": 2.08038330078125e-05, + "model_forward_time": 0.025361061096191406, + "step": 13634 + }, + { + "epoch": 2.08038330078125e-05, + "step": 13634, + "training_step_time": 0.10615873336791992 + }, + { + "epoch": 2.080535888671875e-05, + "model_forward_time": 0.0254056453704834, + "step": 13635 + }, + { + "epoch": 2.080535888671875e-05, + "step": 13635, + "training_step_time": 0.1260547637939453 + }, + { + "epoch": 2.0806884765625e-05, + "model_forward_time": 0.02587604522705078, + "step": 13636 + }, + { + "epoch": 2.0806884765625e-05, + "step": 13636, + "training_step_time": 0.12718892097473145 + }, + { + "epoch": 2.080841064453125e-05, + "model_forward_time": 0.025219202041625977, + "step": 13637 + }, + { + "epoch": 2.080841064453125e-05, + "step": 13637, + "training_step_time": 0.21882390975952148 + }, + { + "epoch": 2.08099365234375e-05, + "model_forward_time": 0.025017738342285156, + "step": 13638 + }, + { + "epoch": 2.08099365234375e-05, + "step": 13638, + "training_step_time": 0.14789438247680664 + }, + { + "epoch": 2.081146240234375e-05, + "model_forward_time": 0.024890422821044922, + "step": 13639 + }, + { + "epoch": 2.081146240234375e-05, + "step": 13639, + "training_step_time": 0.1112062931060791 + }, + { + "epoch": 2.081298828125e-05, + "grad_norm": 0.31141793727874756, + "learning_rate": 6.152483456055756e-05, + "loss": 0.0179, + "step": 13640 + }, + { + "epoch": 2.081298828125e-05, + "model_forward_time": 0.02823805809020996, + "step": 13640 + }, + { + "epoch": 2.081298828125e-05, + "step": 13640, + "training_step_time": 0.11250591278076172 + }, + { + "epoch": 2.081451416015625e-05, + "model_forward_time": 0.025721073150634766, + "step": 13641 + }, + { + "epoch": 2.081451416015625e-05, + "step": 13641, + "training_step_time": 0.1622319221496582 + }, + { + "epoch": 2.08160400390625e-05, + "model_forward_time": 0.024960994720458984, + "step": 13642 + }, + { + "epoch": 2.08160400390625e-05, + "step": 13642, + "training_step_time": 0.17319297790527344 + }, + { + "epoch": 2.081756591796875e-05, + "model_forward_time": 0.025726318359375, + "step": 13643 + }, + { + "epoch": 2.081756591796875e-05, + "step": 13643, + "training_step_time": 0.11777639389038086 + }, + { + "epoch": 2.0819091796875e-05, + "model_forward_time": 0.024801969528198242, + "step": 13644 + }, + { + "epoch": 2.0819091796875e-05, + "step": 13644, + "training_step_time": 0.12955713272094727 + }, + { + "epoch": 2.082061767578125e-05, + "model_forward_time": 0.02549290657043457, + "step": 13645 + }, + { + "epoch": 2.082061767578125e-05, + "step": 13645, + "training_step_time": 0.11130213737487793 + }, + { + "epoch": 2.08221435546875e-05, + "model_forward_time": 0.025822162628173828, + "step": 13646 + }, + { + "epoch": 2.08221435546875e-05, + "step": 13646, + "training_step_time": 0.11555647850036621 + }, + { + "epoch": 2.082366943359375e-05, + "model_forward_time": 0.026665687561035156, + "step": 13647 + }, + { + "epoch": 2.082366943359375e-05, + "step": 13647, + "training_step_time": 0.14503169059753418 + }, + { + "epoch": 2.08251953125e-05, + "model_forward_time": 0.025708675384521484, + "step": 13648 + }, + { + "epoch": 2.08251953125e-05, + "step": 13648, + "training_step_time": 0.13213109970092773 + }, + { + "epoch": 2.082672119140625e-05, + "model_forward_time": 0.024899721145629883, + "step": 13649 + }, + { + "epoch": 2.082672119140625e-05, + "step": 13649, + "training_step_time": 0.2188856601715088 + }, + { + "epoch": 2.08282470703125e-05, + "grad_norm": 0.41740694642066956, + "learning_rate": 6.147119600233758e-05, + "loss": 0.0214, + "step": 13650 + }, + { + "epoch": 2.08282470703125e-05, + "model_forward_time": 0.02570819854736328, + "step": 13650 + }, + { + "epoch": 2.08282470703125e-05, + "step": 13650, + "training_step_time": 0.12549614906311035 + }, + { + "epoch": 2.082977294921875e-05, + "model_forward_time": 0.02491450309753418, + "step": 13651 + }, + { + "epoch": 2.082977294921875e-05, + "step": 13651, + "training_step_time": 0.13227272033691406 + }, + { + "epoch": 2.0831298828125e-05, + "model_forward_time": 0.02501392364501953, + "step": 13652 + }, + { + "epoch": 2.0831298828125e-05, + "step": 13652, + "training_step_time": 0.1143195629119873 + }, + { + "epoch": 2.083282470703125e-05, + "model_forward_time": 0.02572321891784668, + "step": 13653 + }, + { + "epoch": 2.083282470703125e-05, + "step": 13653, + "training_step_time": 0.16243863105773926 + }, + { + "epoch": 2.08343505859375e-05, + "model_forward_time": 0.024828433990478516, + "step": 13654 + }, + { + "epoch": 2.08343505859375e-05, + "step": 13654, + "training_step_time": 0.13164567947387695 + }, + { + "epoch": 2.083587646484375e-05, + "model_forward_time": 0.0242922306060791, + "step": 13655 + }, + { + "epoch": 2.083587646484375e-05, + "step": 13655, + "training_step_time": 0.11646199226379395 + }, + { + "epoch": 2.083740234375e-05, + "model_forward_time": 0.026276350021362305, + "step": 13656 + }, + { + "epoch": 2.083740234375e-05, + "step": 13656, + "training_step_time": 0.11182427406311035 + }, + { + "epoch": 2.083892822265625e-05, + "model_forward_time": 0.026369810104370117, + "step": 13657 + }, + { + "epoch": 2.083892822265625e-05, + "step": 13657, + "training_step_time": 0.11131548881530762 + }, + { + "epoch": 2.08404541015625e-05, + "model_forward_time": 0.025813579559326172, + "step": 13658 + }, + { + "epoch": 2.08404541015625e-05, + "step": 13658, + "training_step_time": 0.1101229190826416 + }, + { + "epoch": 2.084197998046875e-05, + "model_forward_time": 0.025196075439453125, + "step": 13659 + }, + { + "epoch": 2.084197998046875e-05, + "step": 13659, + "training_step_time": 0.10927867889404297 + }, + { + "epoch": 2.0843505859375e-05, + "grad_norm": 0.2596238851547241, + "learning_rate": 6.141754350553279e-05, + "loss": 0.0222, + "step": 13660 + }, + { + "epoch": 2.0843505859375e-05, + "model_forward_time": 0.026456594467163086, + "step": 13660 + }, + { + "epoch": 2.0843505859375e-05, + "step": 13660, + "training_step_time": 0.11250495910644531 + }, + { + "epoch": 2.084503173828125e-05, + "model_forward_time": 0.025280475616455078, + "step": 13661 + }, + { + "epoch": 2.084503173828125e-05, + "step": 13661, + "training_step_time": 0.10811591148376465 + }, + { + "epoch": 2.08465576171875e-05, + "model_forward_time": 0.02529764175415039, + "step": 13662 + }, + { + "epoch": 2.08465576171875e-05, + "step": 13662, + "training_step_time": 0.10808014869689941 + }, + { + "epoch": 2.084808349609375e-05, + "model_forward_time": 0.025424718856811523, + "step": 13663 + }, + { + "epoch": 2.084808349609375e-05, + "step": 13663, + "training_step_time": 0.11020278930664062 + }, + { + "epoch": 2.0849609375e-05, + "model_forward_time": 0.025475740432739258, + "step": 13664 + }, + { + "epoch": 2.0849609375e-05, + "step": 13664, + "training_step_time": 0.11224818229675293 + }, + { + "epoch": 2.085113525390625e-05, + "model_forward_time": 0.025644779205322266, + "step": 13665 + }, + { + "epoch": 2.085113525390625e-05, + "step": 13665, + "training_step_time": 0.11086845397949219 + }, + { + "epoch": 2.08526611328125e-05, + "model_forward_time": 0.02604508399963379, + "step": 13666 + }, + { + "epoch": 2.08526611328125e-05, + "step": 13666, + "training_step_time": 0.10829734802246094 + }, + { + "epoch": 2.085418701171875e-05, + "model_forward_time": 0.025397539138793945, + "step": 13667 + }, + { + "epoch": 2.085418701171875e-05, + "step": 13667, + "training_step_time": 0.10617184638977051 + }, + { + "epoch": 2.0855712890625e-05, + "model_forward_time": 0.025548219680786133, + "step": 13668 + }, + { + "epoch": 2.0855712890625e-05, + "step": 13668, + "training_step_time": 0.10532283782958984 + }, + { + "epoch": 2.085723876953125e-05, + "model_forward_time": 0.025619983673095703, + "step": 13669 + }, + { + "epoch": 2.085723876953125e-05, + "step": 13669, + "training_step_time": 0.10579252243041992 + }, + { + "epoch": 2.08587646484375e-05, + "grad_norm": 0.2168739140033722, + "learning_rate": 6.136387713533603e-05, + "loss": 0.0162, + "step": 13670 + }, + { + "epoch": 2.08587646484375e-05, + "model_forward_time": 0.025711774826049805, + "step": 13670 + }, + { + "epoch": 2.08587646484375e-05, + "step": 13670, + "training_step_time": 0.10611248016357422 + }, + { + "epoch": 2.086029052734375e-05, + "model_forward_time": 0.02528524398803711, + "step": 13671 + }, + { + "epoch": 2.086029052734375e-05, + "step": 13671, + "training_step_time": 0.10628008842468262 + }, + { + "epoch": 2.086181640625e-05, + "model_forward_time": 0.025571107864379883, + "step": 13672 + }, + { + "epoch": 2.086181640625e-05, + "step": 13672, + "training_step_time": 0.10644412040710449 + }, + { + "epoch": 2.086334228515625e-05, + "model_forward_time": 0.025787830352783203, + "step": 13673 + }, + { + "epoch": 2.086334228515625e-05, + "step": 13673, + "training_step_time": 0.10737013816833496 + }, + { + "epoch": 2.08648681640625e-05, + "model_forward_time": 0.02672886848449707, + "step": 13674 + }, + { + "epoch": 2.08648681640625e-05, + "step": 13674, + "training_step_time": 0.10660004615783691 + }, + { + "epoch": 2.086639404296875e-05, + "model_forward_time": 0.025548219680786133, + "step": 13675 + }, + { + "epoch": 2.086639404296875e-05, + "step": 13675, + "training_step_time": 0.10455894470214844 + }, + { + "epoch": 2.0867919921875e-05, + "model_forward_time": 0.0257112979888916, + "step": 13676 + }, + { + "epoch": 2.0867919921875e-05, + "step": 13676, + "training_step_time": 0.10475730895996094 + }, + { + "epoch": 2.086944580078125e-05, + "model_forward_time": 0.02555084228515625, + "step": 13677 + }, + { + "epoch": 2.086944580078125e-05, + "step": 13677, + "training_step_time": 0.10443806648254395 + }, + { + "epoch": 2.08709716796875e-05, + "model_forward_time": 0.025644540786743164, + "step": 13678 + }, + { + "epoch": 2.08709716796875e-05, + "step": 13678, + "training_step_time": 0.10723686218261719 + }, + { + "epoch": 2.087249755859375e-05, + "model_forward_time": 0.025046110153198242, + "step": 13679 + }, + { + "epoch": 2.087249755859375e-05, + "step": 13679, + "training_step_time": 0.10556697845458984 + }, + { + "epoch": 2.08740234375e-05, + "grad_norm": 0.18559281527996063, + "learning_rate": 6.131019695695702e-05, + "loss": 0.0142, + "step": 13680 + }, + { + "epoch": 2.08740234375e-05, + "model_forward_time": 0.025385141372680664, + "step": 13680 + }, + { + "epoch": 2.08740234375e-05, + "step": 13680, + "training_step_time": 0.10452127456665039 + }, + { + "epoch": 2.087554931640625e-05, + "model_forward_time": 0.025847911834716797, + "step": 13681 + }, + { + "epoch": 2.087554931640625e-05, + "step": 13681, + "training_step_time": 0.18056845664978027 + }, + { + "epoch": 2.08770751953125e-05, + "model_forward_time": 0.025025367736816406, + "step": 13682 + }, + { + "epoch": 2.08770751953125e-05, + "step": 13682, + "training_step_time": 0.2406308650970459 + }, + { + "epoch": 2.087860107421875e-05, + "model_forward_time": 0.025998592376708984, + "step": 13683 + }, + { + "epoch": 2.087860107421875e-05, + "step": 13683, + "training_step_time": 0.19423246383666992 + }, + { + "epoch": 2.0880126953125e-05, + "model_forward_time": 0.023974180221557617, + "step": 13684 + }, + { + "epoch": 2.0880126953125e-05, + "step": 13684, + "training_step_time": 0.1882915496826172 + }, + { + "epoch": 2.088165283203125e-05, + "model_forward_time": 0.025397062301635742, + "step": 13685 + }, + { + "epoch": 2.088165283203125e-05, + "step": 13685, + "training_step_time": 0.1261589527130127 + }, + { + "epoch": 2.08831787109375e-05, + "model_forward_time": 0.02797985076904297, + "step": 13686 + }, + { + "epoch": 2.08831787109375e-05, + "step": 13686, + "training_step_time": 0.14324569702148438 + }, + { + "epoch": 2.088470458984375e-05, + "model_forward_time": 0.02514481544494629, + "step": 13687 + }, + { + "epoch": 2.088470458984375e-05, + "step": 13687, + "training_step_time": 0.1538372039794922 + }, + { + "epoch": 2.088623046875e-05, + "model_forward_time": 0.024793624877929688, + "step": 13688 + }, + { + "epoch": 2.088623046875e-05, + "step": 13688, + "training_step_time": 0.13050603866577148 + }, + { + "epoch": 2.088775634765625e-05, + "model_forward_time": 0.025187253952026367, + "step": 13689 + }, + { + "epoch": 2.088775634765625e-05, + "step": 13689, + "training_step_time": 0.12175703048706055 + }, + { + "epoch": 2.08892822265625e-05, + "grad_norm": 0.41579878330230713, + "learning_rate": 6.125650303562221e-05, + "loss": 0.0169, + "step": 13690 + }, + { + "epoch": 2.08892822265625e-05, + "model_forward_time": 0.02540445327758789, + "step": 13690 + }, + { + "epoch": 2.08892822265625e-05, + "step": 13690, + "training_step_time": 0.19053196907043457 + }, + { + "epoch": 2.089080810546875e-05, + "model_forward_time": 0.02640819549560547, + "step": 13691 + }, + { + "epoch": 2.089080810546875e-05, + "step": 13691, + "training_step_time": 0.11151742935180664 + }, + { + "epoch": 2.0892333984375e-05, + "model_forward_time": 0.02582573890686035, + "step": 13692 + }, + { + "epoch": 2.0892333984375e-05, + "step": 13692, + "training_step_time": 0.1090705394744873 + }, + { + "epoch": 2.089385986328125e-05, + "model_forward_time": 0.0254971981048584, + "step": 13693 + }, + { + "epoch": 2.089385986328125e-05, + "step": 13693, + "training_step_time": 0.10692596435546875 + }, + { + "epoch": 2.08953857421875e-05, + "model_forward_time": 0.025211095809936523, + "step": 13694 + }, + { + "epoch": 2.08953857421875e-05, + "step": 13694, + "training_step_time": 0.15807819366455078 + }, + { + "epoch": 2.089691162109375e-05, + "model_forward_time": 0.025496482849121094, + "step": 13695 + }, + { + "epoch": 2.089691162109375e-05, + "step": 13695, + "training_step_time": 0.11967277526855469 + }, + { + "epoch": 2.08984375e-05, + "model_forward_time": 0.02579331398010254, + "step": 13696 + }, + { + "epoch": 2.08984375e-05, + "step": 13696, + "training_step_time": 0.11074185371398926 + }, + { + "epoch": 2.089996337890625e-05, + "model_forward_time": 0.026192665100097656, + "step": 13697 + }, + { + "epoch": 2.089996337890625e-05, + "step": 13697, + "training_step_time": 0.12152814865112305 + }, + { + "epoch": 2.09014892578125e-05, + "model_forward_time": 0.025872468948364258, + "step": 13698 + }, + { + "epoch": 2.09014892578125e-05, + "step": 13698, + "training_step_time": 0.10606932640075684 + }, + { + "epoch": 2.090301513671875e-05, + "model_forward_time": 0.025550365447998047, + "step": 13699 + }, + { + "epoch": 2.090301513671875e-05, + "step": 13699, + "training_step_time": 0.11208963394165039 + }, + { + "epoch": 2.0904541015625e-05, + "grad_norm": 0.3767525553703308, + "learning_rate": 6.12027954365748e-05, + "loss": 0.0168, + "step": 13700 + }, + { + "epoch": 2.0904541015625e-05, + "model_forward_time": 0.025256872177124023, + "step": 13700 + }, + { + "epoch": 2.0904541015625e-05, + "step": 13700, + "training_step_time": 0.14169812202453613 + }, + { + "epoch": 2.090606689453125e-05, + "model_forward_time": 0.02752971649169922, + "step": 13701 + }, + { + "epoch": 2.090606689453125e-05, + "step": 13701, + "training_step_time": 0.10912275314331055 + }, + { + "epoch": 2.09075927734375e-05, + "model_forward_time": 0.026776790618896484, + "step": 13702 + }, + { + "epoch": 2.09075927734375e-05, + "step": 13702, + "training_step_time": 0.10444235801696777 + }, + { + "epoch": 2.090911865234375e-05, + "model_forward_time": 0.026980161666870117, + "step": 13703 + }, + { + "epoch": 2.090911865234375e-05, + "step": 13703, + "training_step_time": 0.10830116271972656 + }, + { + "epoch": 2.091064453125e-05, + "model_forward_time": 0.026279211044311523, + "step": 13704 + }, + { + "epoch": 2.091064453125e-05, + "step": 13704, + "training_step_time": 0.10456180572509766 + }, + { + "epoch": 2.091217041015625e-05, + "model_forward_time": 0.026111125946044922, + "step": 13705 + }, + { + "epoch": 2.091217041015625e-05, + "step": 13705, + "training_step_time": 0.1063385009765625 + }, + { + "epoch": 2.09136962890625e-05, + "model_forward_time": 0.025219202041625977, + "step": 13706 + }, + { + "epoch": 2.09136962890625e-05, + "step": 13706, + "training_step_time": 0.10744643211364746 + }, + { + "epoch": 2.091522216796875e-05, + "model_forward_time": 0.02541041374206543, + "step": 13707 + }, + { + "epoch": 2.091522216796875e-05, + "step": 13707, + "training_step_time": 0.10593771934509277 + }, + { + "epoch": 2.0916748046875e-05, + "model_forward_time": 0.025578022003173828, + "step": 13708 + }, + { + "epoch": 2.0916748046875e-05, + "step": 13708, + "training_step_time": 0.10746955871582031 + }, + { + "epoch": 2.091827392578125e-05, + "model_forward_time": 0.027563095092773438, + "step": 13709 + }, + { + "epoch": 2.091827392578125e-05, + "step": 13709, + "training_step_time": 0.10862302780151367 + }, + { + "epoch": 2.09197998046875e-05, + "grad_norm": 0.2269592583179474, + "learning_rate": 6.11490742250746e-05, + "loss": 0.0147, + "step": 13710 + }, + { + "epoch": 2.09197998046875e-05, + "model_forward_time": 0.025574445724487305, + "step": 13710 + }, + { + "epoch": 2.09197998046875e-05, + "step": 13710, + "training_step_time": 0.11015939712524414 + }, + { + "epoch": 2.092132568359375e-05, + "model_forward_time": 0.02546525001525879, + "step": 13711 + }, + { + "epoch": 2.092132568359375e-05, + "step": 13711, + "training_step_time": 0.10714888572692871 + }, + { + "epoch": 2.09228515625e-05, + "model_forward_time": 0.02526402473449707, + "step": 13712 + }, + { + "epoch": 2.09228515625e-05, + "step": 13712, + "training_step_time": 0.10870909690856934 + }, + { + "epoch": 2.092437744140625e-05, + "model_forward_time": 0.025445938110351562, + "step": 13713 + }, + { + "epoch": 2.092437744140625e-05, + "step": 13713, + "training_step_time": 0.10788774490356445 + }, + { + "epoch": 2.09259033203125e-05, + "model_forward_time": 0.0260162353515625, + "step": 13714 + }, + { + "epoch": 2.09259033203125e-05, + "step": 13714, + "training_step_time": 0.10829949378967285 + }, + { + "epoch": 2.092742919921875e-05, + "model_forward_time": 0.026346683502197266, + "step": 13715 + }, + { + "epoch": 2.092742919921875e-05, + "step": 13715, + "training_step_time": 0.10771703720092773 + }, + { + "epoch": 2.0928955078125e-05, + "model_forward_time": 0.02622199058532715, + "step": 13716 + }, + { + "epoch": 2.0928955078125e-05, + "step": 13716, + "training_step_time": 0.10752248764038086 + }, + { + "epoch": 2.093048095703125e-05, + "model_forward_time": 0.02557206153869629, + "step": 13717 + }, + { + "epoch": 2.093048095703125e-05, + "step": 13717, + "training_step_time": 0.11059308052062988 + }, + { + "epoch": 2.09320068359375e-05, + "model_forward_time": 0.025725364685058594, + "step": 13718 + }, + { + "epoch": 2.09320068359375e-05, + "step": 13718, + "training_step_time": 0.10825324058532715 + }, + { + "epoch": 2.093353271484375e-05, + "model_forward_time": 0.025615692138671875, + "step": 13719 + }, + { + "epoch": 2.093353271484375e-05, + "step": 13719, + "training_step_time": 0.10699295997619629 + }, + { + "epoch": 2.093505859375e-05, + "grad_norm": 0.21005350351333618, + "learning_rate": 6.10953394663979e-05, + "loss": 0.0135, + "step": 13720 + }, + { + "epoch": 2.093505859375e-05, + "model_forward_time": 0.025527238845825195, + "step": 13720 + }, + { + "epoch": 2.093505859375e-05, + "step": 13720, + "training_step_time": 0.10567855834960938 + }, + { + "epoch": 2.093658447265625e-05, + "model_forward_time": 0.025064945220947266, + "step": 13721 + }, + { + "epoch": 2.093658447265625e-05, + "step": 13721, + "training_step_time": 0.10699319839477539 + }, + { + "epoch": 2.09381103515625e-05, + "model_forward_time": 0.02465534210205078, + "step": 13722 + }, + { + "epoch": 2.09381103515625e-05, + "step": 13722, + "training_step_time": 0.10451865196228027 + }, + { + "epoch": 2.093963623046875e-05, + "model_forward_time": 0.025048494338989258, + "step": 13723 + }, + { + "epoch": 2.093963623046875e-05, + "step": 13723, + "training_step_time": 0.10701298713684082 + }, + { + "epoch": 2.0941162109375e-05, + "model_forward_time": 0.02509784698486328, + "step": 13724 + }, + { + "epoch": 2.0941162109375e-05, + "step": 13724, + "training_step_time": 0.10663914680480957 + }, + { + "epoch": 2.094268798828125e-05, + "model_forward_time": 0.025310993194580078, + "step": 13725 + }, + { + "epoch": 2.094268798828125e-05, + "step": 13725, + "training_step_time": 0.10582304000854492 + }, + { + "epoch": 2.09442138671875e-05, + "model_forward_time": 0.028603076934814453, + "step": 13726 + }, + { + "epoch": 2.09442138671875e-05, + "step": 13726, + "training_step_time": 0.10929107666015625 + }, + { + "epoch": 2.094573974609375e-05, + "model_forward_time": 0.025411367416381836, + "step": 13727 + }, + { + "epoch": 2.094573974609375e-05, + "step": 13727, + "training_step_time": 0.15403223037719727 + }, + { + "epoch": 2.0947265625e-05, + "model_forward_time": 0.02510523796081543, + "step": 13728 + }, + { + "epoch": 2.0947265625e-05, + "step": 13728, + "training_step_time": 0.20499300956726074 + }, + { + "epoch": 2.094879150390625e-05, + "model_forward_time": 0.02453923225402832, + "step": 13729 + }, + { + "epoch": 2.094879150390625e-05, + "step": 13729, + "training_step_time": 0.14041447639465332 + }, + { + "epoch": 2.09503173828125e-05, + "grad_norm": 0.3171594440937042, + "learning_rate": 6.104159122583752e-05, + "loss": 0.0143, + "step": 13730 + }, + { + "epoch": 2.09503173828125e-05, + "model_forward_time": 0.024187326431274414, + "step": 13730 + }, + { + "epoch": 2.09503173828125e-05, + "step": 13730, + "training_step_time": 0.1953895092010498 + }, + { + "epoch": 2.095184326171875e-05, + "model_forward_time": 0.027753591537475586, + "step": 13731 + }, + { + "epoch": 2.095184326171875e-05, + "step": 13731, + "training_step_time": 0.10712957382202148 + }, + { + "epoch": 2.0953369140625e-05, + "model_forward_time": 0.02504134178161621, + "step": 13732 + }, + { + "epoch": 2.0953369140625e-05, + "step": 13732, + "training_step_time": 0.14459466934204102 + }, + { + "epoch": 2.095489501953125e-05, + "model_forward_time": 0.025074243545532227, + "step": 13733 + }, + { + "epoch": 2.095489501953125e-05, + "step": 13733, + "training_step_time": 0.16294550895690918 + }, + { + "epoch": 2.09564208984375e-05, + "model_forward_time": 0.02475261688232422, + "step": 13734 + }, + { + "epoch": 2.09564208984375e-05, + "step": 13734, + "training_step_time": 0.11420631408691406 + }, + { + "epoch": 2.095794677734375e-05, + "model_forward_time": 0.02444291114807129, + "step": 13735 + }, + { + "epoch": 2.095794677734375e-05, + "step": 13735, + "training_step_time": 0.12803339958190918 + }, + { + "epoch": 2.095947265625e-05, + "model_forward_time": 0.025377988815307617, + "step": 13736 + }, + { + "epoch": 2.095947265625e-05, + "step": 13736, + "training_step_time": 0.19315838813781738 + }, + { + "epoch": 2.096099853515625e-05, + "model_forward_time": 0.024515628814697266, + "step": 13737 + }, + { + "epoch": 2.096099853515625e-05, + "step": 13737, + "training_step_time": 0.10225319862365723 + }, + { + "epoch": 2.09625244140625e-05, + "model_forward_time": 0.02680492401123047, + "step": 13738 + }, + { + "epoch": 2.09625244140625e-05, + "step": 13738, + "training_step_time": 0.1062312126159668 + }, + { + "epoch": 2.096405029296875e-05, + "model_forward_time": 0.02536487579345703, + "step": 13739 + }, + { + "epoch": 2.096405029296875e-05, + "step": 13739, + "training_step_time": 0.10562801361083984 + }, + { + "epoch": 2.0965576171875e-05, + "grad_norm": 0.29130005836486816, + "learning_rate": 6.0987829568702656e-05, + "loss": 0.013, + "step": 13740 + }, + { + "epoch": 2.0965576171875e-05, + "model_forward_time": 0.025903701782226562, + "step": 13740 + }, + { + "epoch": 2.0965576171875e-05, + "step": 13740, + "training_step_time": 0.10328483581542969 + }, + { + "epoch": 2.096710205078125e-05, + "model_forward_time": 0.02615499496459961, + "step": 13741 + }, + { + "epoch": 2.096710205078125e-05, + "step": 13741, + "training_step_time": 0.1491847038269043 + }, + { + "epoch": 2.09686279296875e-05, + "model_forward_time": 0.02608656883239746, + "step": 13742 + }, + { + "epoch": 2.09686279296875e-05, + "step": 13742, + "training_step_time": 0.1291196346282959 + }, + { + "epoch": 2.097015380859375e-05, + "model_forward_time": 0.024675607681274414, + "step": 13743 + }, + { + "epoch": 2.097015380859375e-05, + "step": 13743, + "training_step_time": 0.133544921875 + }, + { + "epoch": 2.09716796875e-05, + "model_forward_time": 0.025962352752685547, + "step": 13744 + }, + { + "epoch": 2.09716796875e-05, + "step": 13744, + "training_step_time": 0.16542816162109375 + }, + { + "epoch": 2.097320556640625e-05, + "model_forward_time": 0.02377915382385254, + "step": 13745 + }, + { + "epoch": 2.097320556640625e-05, + "step": 13745, + "training_step_time": 0.191239595413208 + }, + { + "epoch": 2.09747314453125e-05, + "model_forward_time": 0.024747371673583984, + "step": 13746 + }, + { + "epoch": 2.09747314453125e-05, + "step": 13746, + "training_step_time": 0.1715688705444336 + }, + { + "epoch": 2.097625732421875e-05, + "model_forward_time": 0.024481773376464844, + "step": 13747 + }, + { + "epoch": 2.097625732421875e-05, + "step": 13747, + "training_step_time": 0.17028427124023438 + }, + { + "epoch": 2.0977783203125e-05, + "model_forward_time": 0.025011062622070312, + "step": 13748 + }, + { + "epoch": 2.0977783203125e-05, + "step": 13748, + "training_step_time": 0.16060233116149902 + }, + { + "epoch": 2.097930908203125e-05, + "model_forward_time": 0.024953603744506836, + "step": 13749 + }, + { + "epoch": 2.097930908203125e-05, + "step": 13749, + "training_step_time": 0.14506292343139648 + }, + { + "epoch": 2.09808349609375e-05, + "grad_norm": 0.22500164806842804, + "learning_rate": 6.09340545603188e-05, + "loss": 0.0147, + "step": 13750 + }, + { + "epoch": 2.09808349609375e-05, + "model_forward_time": 0.024704933166503906, + "step": 13750 + }, + { + "epoch": 2.09808349609375e-05, + "step": 13750, + "training_step_time": 0.1338956356048584 + }, + { + "epoch": 2.098236083984375e-05, + "model_forward_time": 0.026500701904296875, + "step": 13751 + }, + { + "epoch": 2.098236083984375e-05, + "step": 13751, + "training_step_time": 0.12761211395263672 + }, + { + "epoch": 2.098388671875e-05, + "model_forward_time": 0.024725675582885742, + "step": 13752 + }, + { + "epoch": 2.098388671875e-05, + "step": 13752, + "training_step_time": 0.12161564826965332 + }, + { + "epoch": 2.098541259765625e-05, + "model_forward_time": 0.024921417236328125, + "step": 13753 + }, + { + "epoch": 2.098541259765625e-05, + "step": 13753, + "training_step_time": 0.1141660213470459 + }, + { + "epoch": 2.09869384765625e-05, + "model_forward_time": 0.025361061096191406, + "step": 13754 + }, + { + "epoch": 2.09869384765625e-05, + "step": 13754, + "training_step_time": 0.11153364181518555 + }, + { + "epoch": 2.098846435546875e-05, + "model_forward_time": 0.025524139404296875, + "step": 13755 + }, + { + "epoch": 2.098846435546875e-05, + "step": 13755, + "training_step_time": 0.11288666725158691 + }, + { + "epoch": 2.0989990234375e-05, + "model_forward_time": 0.025410175323486328, + "step": 13756 + }, + { + "epoch": 2.0989990234375e-05, + "step": 13756, + "training_step_time": 0.11191940307617188 + }, + { + "epoch": 2.099151611328125e-05, + "model_forward_time": 0.024551868438720703, + "step": 13757 + }, + { + "epoch": 2.099151611328125e-05, + "step": 13757, + "training_step_time": 0.10490679740905762 + }, + { + "epoch": 2.09930419921875e-05, + "model_forward_time": 0.024232864379882812, + "step": 13758 + }, + { + "epoch": 2.09930419921875e-05, + "step": 13758, + "training_step_time": 0.1020815372467041 + }, + { + "epoch": 2.099456787109375e-05, + "model_forward_time": 0.02547001838684082, + "step": 13759 + }, + { + "epoch": 2.099456787109375e-05, + "step": 13759, + "training_step_time": 0.10875606536865234 + }, + { + "epoch": 2.099609375e-05, + "grad_norm": 0.3524976968765259, + "learning_rate": 6.088026626602763e-05, + "loss": 0.0333, + "step": 13760 + }, + { + "epoch": 2.099609375e-05, + "model_forward_time": 0.02614450454711914, + "step": 13760 + }, + { + "epoch": 2.099609375e-05, + "step": 13760, + "training_step_time": 0.10822606086730957 + }, + { + "epoch": 2.099761962890625e-05, + "model_forward_time": 0.02534794807434082, + "step": 13761 + }, + { + "epoch": 2.099761962890625e-05, + "step": 13761, + "training_step_time": 0.10287213325500488 + }, + { + "epoch": 2.09991455078125e-05, + "model_forward_time": 0.025422334671020508, + "step": 13762 + }, + { + "epoch": 2.09991455078125e-05, + "step": 13762, + "training_step_time": 0.10394906997680664 + }, + { + "epoch": 2.100067138671875e-05, + "model_forward_time": 0.024719953536987305, + "step": 13763 + }, + { + "epoch": 2.100067138671875e-05, + "step": 13763, + "training_step_time": 0.1069633960723877 + }, + { + "epoch": 2.1002197265625e-05, + "model_forward_time": 0.025336265563964844, + "step": 13764 + }, + { + "epoch": 2.1002197265625e-05, + "step": 13764, + "training_step_time": 0.10631752014160156 + }, + { + "epoch": 2.100372314453125e-05, + "model_forward_time": 0.025374889373779297, + "step": 13765 + }, + { + "epoch": 2.100372314453125e-05, + "step": 13765, + "training_step_time": 0.10664129257202148 + }, + { + "epoch": 2.10052490234375e-05, + "model_forward_time": 0.02541351318359375, + "step": 13766 + }, + { + "epoch": 2.10052490234375e-05, + "step": 13766, + "training_step_time": 0.1121671199798584 + }, + { + "epoch": 2.100677490234375e-05, + "model_forward_time": 0.024404287338256836, + "step": 13767 + }, + { + "epoch": 2.100677490234375e-05, + "step": 13767, + "training_step_time": 0.11769914627075195 + }, + { + "epoch": 2.100830078125e-05, + "model_forward_time": 0.024979591369628906, + "step": 13768 + }, + { + "epoch": 2.100830078125e-05, + "step": 13768, + "training_step_time": 0.11680340766906738 + }, + { + "epoch": 2.100982666015625e-05, + "model_forward_time": 0.025100231170654297, + "step": 13769 + }, + { + "epoch": 2.100982666015625e-05, + "step": 13769, + "training_step_time": 0.11489129066467285 + }, + { + "epoch": 2.10113525390625e-05, + "grad_norm": 0.2127663493156433, + "learning_rate": 6.0826464751186994e-05, + "loss": 0.0137, + "step": 13770 + }, + { + "epoch": 2.10113525390625e-05, + "model_forward_time": 0.024636268615722656, + "step": 13770 + }, + { + "epoch": 2.10113525390625e-05, + "step": 13770, + "training_step_time": 0.15667033195495605 + }, + { + "epoch": 2.101287841796875e-05, + "model_forward_time": 0.025126934051513672, + "step": 13771 + }, + { + "epoch": 2.101287841796875e-05, + "step": 13771, + "training_step_time": 0.13269853591918945 + }, + { + "epoch": 2.1014404296875e-05, + "model_forward_time": 0.02482748031616211, + "step": 13772 + }, + { + "epoch": 2.1014404296875e-05, + "step": 13772, + "training_step_time": 0.21873831748962402 + }, + { + "epoch": 2.101593017578125e-05, + "model_forward_time": 0.02473759651184082, + "step": 13773 + }, + { + "epoch": 2.101593017578125e-05, + "step": 13773, + "training_step_time": 0.12624096870422363 + }, + { + "epoch": 2.10174560546875e-05, + "model_forward_time": 0.024590492248535156, + "step": 13774 + }, + { + "epoch": 2.10174560546875e-05, + "step": 13774, + "training_step_time": 0.11187887191772461 + }, + { + "epoch": 2.101898193359375e-05, + "model_forward_time": 0.025938749313354492, + "step": 13775 + }, + { + "epoch": 2.101898193359375e-05, + "step": 13775, + "training_step_time": 0.13290882110595703 + }, + { + "epoch": 2.10205078125e-05, + "model_forward_time": 0.02457451820373535, + "step": 13776 + }, + { + "epoch": 2.10205078125e-05, + "step": 13776, + "training_step_time": 0.21213483810424805 + }, + { + "epoch": 2.102203369140625e-05, + "model_forward_time": 0.024930953979492188, + "step": 13777 + }, + { + "epoch": 2.102203369140625e-05, + "step": 13777, + "training_step_time": 0.11803960800170898 + }, + { + "epoch": 2.10235595703125e-05, + "model_forward_time": 0.024651765823364258, + "step": 13778 + }, + { + "epoch": 2.10235595703125e-05, + "step": 13778, + "training_step_time": 0.12125587463378906 + }, + { + "epoch": 2.102508544921875e-05, + "model_forward_time": 0.025071144104003906, + "step": 13779 + }, + { + "epoch": 2.102508544921875e-05, + "step": 13779, + "training_step_time": 0.11096739768981934 + }, + { + "epoch": 2.1026611328125e-05, + "grad_norm": 0.3180241584777832, + "learning_rate": 6.077265008117081e-05, + "loss": 0.0139, + "step": 13780 + }, + { + "epoch": 2.1026611328125e-05, + "model_forward_time": 0.025513887405395508, + "step": 13780 + }, + { + "epoch": 2.1026611328125e-05, + "step": 13780, + "training_step_time": 0.11728549003601074 + }, + { + "epoch": 2.102813720703125e-05, + "model_forward_time": 0.02926468849182129, + "step": 13781 + }, + { + "epoch": 2.102813720703125e-05, + "step": 13781, + "training_step_time": 0.11231470108032227 + }, + { + "epoch": 2.10296630859375e-05, + "model_forward_time": 0.02541947364807129, + "step": 13782 + }, + { + "epoch": 2.10296630859375e-05, + "step": 13782, + "training_step_time": 0.10933542251586914 + }, + { + "epoch": 2.103118896484375e-05, + "model_forward_time": 0.0251615047454834, + "step": 13783 + }, + { + "epoch": 2.103118896484375e-05, + "step": 13783, + "training_step_time": 0.10403084754943848 + }, + { + "epoch": 2.103271484375e-05, + "model_forward_time": 0.024271726608276367, + "step": 13784 + }, + { + "epoch": 2.103271484375e-05, + "step": 13784, + "training_step_time": 0.1035923957824707 + }, + { + "epoch": 2.103424072265625e-05, + "model_forward_time": 0.024541378021240234, + "step": 13785 + }, + { + "epoch": 2.103424072265625e-05, + "step": 13785, + "training_step_time": 0.21591973304748535 + }, + { + "epoch": 2.10357666015625e-05, + "model_forward_time": 0.02500748634338379, + "step": 13786 + }, + { + "epoch": 2.10357666015625e-05, + "step": 13786, + "training_step_time": 0.12184858322143555 + }, + { + "epoch": 2.103729248046875e-05, + "model_forward_time": 0.024967193603515625, + "step": 13787 + }, + { + "epoch": 2.103729248046875e-05, + "step": 13787, + "training_step_time": 0.1257326602935791 + }, + { + "epoch": 2.1038818359375e-05, + "model_forward_time": 0.024944782257080078, + "step": 13788 + }, + { + "epoch": 2.1038818359375e-05, + "step": 13788, + "training_step_time": 0.1119387149810791 + }, + { + "epoch": 2.104034423828125e-05, + "model_forward_time": 0.02567291259765625, + "step": 13789 + }, + { + "epoch": 2.104034423828125e-05, + "step": 13789, + "training_step_time": 0.15781950950622559 + }, + { + "epoch": 2.10418701171875e-05, + "grad_norm": 0.17182987928390503, + "learning_rate": 6.071882232136901e-05, + "loss": 0.0163, + "step": 13790 + }, + { + "epoch": 2.10418701171875e-05, + "model_forward_time": 0.024692535400390625, + "step": 13790 + }, + { + "epoch": 2.10418701171875e-05, + "step": 13790, + "training_step_time": 0.13045501708984375 + }, + { + "epoch": 2.104339599609375e-05, + "model_forward_time": 0.024561405181884766, + "step": 13791 + }, + { + "epoch": 2.104339599609375e-05, + "step": 13791, + "training_step_time": 0.11142945289611816 + }, + { + "epoch": 2.1044921875e-05, + "model_forward_time": 0.025524139404296875, + "step": 13792 + }, + { + "epoch": 2.1044921875e-05, + "step": 13792, + "training_step_time": 0.10689616203308105 + }, + { + "epoch": 2.104644775390625e-05, + "model_forward_time": 0.02545785903930664, + "step": 13793 + }, + { + "epoch": 2.104644775390625e-05, + "step": 13793, + "training_step_time": 0.10878443717956543 + }, + { + "epoch": 2.10479736328125e-05, + "model_forward_time": 0.025337934494018555, + "step": 13794 + }, + { + "epoch": 2.10479736328125e-05, + "step": 13794, + "training_step_time": 0.10604190826416016 + }, + { + "epoch": 2.104949951171875e-05, + "model_forward_time": 0.025556087493896484, + "step": 13795 + }, + { + "epoch": 2.104949951171875e-05, + "step": 13795, + "training_step_time": 0.10880732536315918 + }, + { + "epoch": 2.1051025390625e-05, + "model_forward_time": 0.025599241256713867, + "step": 13796 + }, + { + "epoch": 2.1051025390625e-05, + "step": 13796, + "training_step_time": 0.11069202423095703 + }, + { + "epoch": 2.105255126953125e-05, + "model_forward_time": 0.02544093132019043, + "step": 13797 + }, + { + "epoch": 2.105255126953125e-05, + "step": 13797, + "training_step_time": 0.10885739326477051 + }, + { + "epoch": 2.10540771484375e-05, + "model_forward_time": 0.02591562271118164, + "step": 13798 + }, + { + "epoch": 2.10540771484375e-05, + "step": 13798, + "training_step_time": 0.10829520225524902 + }, + { + "epoch": 2.105560302734375e-05, + "model_forward_time": 0.026581764221191406, + "step": 13799 + }, + { + "epoch": 2.105560302734375e-05, + "step": 13799, + "training_step_time": 0.1089475154876709 + }, + { + "epoch": 2.105712890625e-05, + "grad_norm": 0.3595592677593231, + "learning_rate": 6.066498153718735e-05, + "loss": 0.0162, + "step": 13800 + }, + { + "epoch": 2.105712890625e-05, + "model_forward_time": 0.026041030883789062, + "step": 13800 + }, + { + "epoch": 2.105712890625e-05, + "step": 13800, + "training_step_time": 0.18308782577514648 + }, + { + "epoch": 2.105865478515625e-05, + "model_forward_time": 0.023525714874267578, + "step": 13801 + }, + { + "epoch": 2.105865478515625e-05, + "step": 13801, + "training_step_time": 0.1820087432861328 + }, + { + "epoch": 2.10601806640625e-05, + "model_forward_time": 0.023185253143310547, + "step": 13802 + }, + { + "epoch": 2.10601806640625e-05, + "step": 13802, + "training_step_time": 0.16408705711364746 + }, + { + "epoch": 2.106170654296875e-05, + "model_forward_time": 0.024877309799194336, + "step": 13803 + }, + { + "epoch": 2.106170654296875e-05, + "step": 13803, + "training_step_time": 0.15259003639221191 + }, + { + "epoch": 2.1063232421875e-05, + "model_forward_time": 0.023276090621948242, + "step": 13804 + }, + { + "epoch": 2.1063232421875e-05, + "step": 13804, + "training_step_time": 0.14666056632995605 + }, + { + "epoch": 2.106475830078125e-05, + "model_forward_time": 0.02346944808959961, + "step": 13805 + }, + { + "epoch": 2.106475830078125e-05, + "step": 13805, + "training_step_time": 0.12651538848876953 + }, + { + "epoch": 2.10662841796875e-05, + "model_forward_time": 0.02337026596069336, + "step": 13806 + }, + { + "epoch": 2.10662841796875e-05, + "step": 13806, + "training_step_time": 0.12755775451660156 + }, + { + "epoch": 2.106781005859375e-05, + "model_forward_time": 0.02415299415588379, + "step": 13807 + }, + { + "epoch": 2.106781005859375e-05, + "step": 13807, + "training_step_time": 0.12300801277160645 + }, + { + "epoch": 2.10693359375e-05, + "model_forward_time": 0.02429819107055664, + "step": 13808 + }, + { + "epoch": 2.10693359375e-05, + "step": 13808, + "training_step_time": 0.11923742294311523 + }, + { + "epoch": 2.107086181640625e-05, + "model_forward_time": 0.02471780776977539, + "step": 13809 + }, + { + "epoch": 2.107086181640625e-05, + "step": 13809, + "training_step_time": 0.11581587791442871 + }, + { + "epoch": 2.10723876953125e-05, + "grad_norm": 0.43297770619392395, + "learning_rate": 6.0611127794047486e-05, + "loss": 0.0241, + "step": 13810 + }, + { + "epoch": 2.10723876953125e-05, + "model_forward_time": 0.02421736717224121, + "step": 13810 + }, + { + "epoch": 2.10723876953125e-05, + "step": 13810, + "training_step_time": 0.11188077926635742 + }, + { + "epoch": 2.107391357421875e-05, + "model_forward_time": 0.02520442008972168, + "step": 13811 + }, + { + "epoch": 2.107391357421875e-05, + "step": 13811, + "training_step_time": 0.10904240608215332 + }, + { + "epoch": 2.1075439453125e-05, + "model_forward_time": 0.025173187255859375, + "step": 13812 + }, + { + "epoch": 2.1075439453125e-05, + "step": 13812, + "training_step_time": 0.10860180854797363 + }, + { + "epoch": 2.107696533203125e-05, + "model_forward_time": 0.025487184524536133, + "step": 13813 + }, + { + "epoch": 2.107696533203125e-05, + "step": 13813, + "training_step_time": 0.10796499252319336 + }, + { + "epoch": 2.10784912109375e-05, + "model_forward_time": 0.02460026741027832, + "step": 13814 + }, + { + "epoch": 2.10784912109375e-05, + "step": 13814, + "training_step_time": 0.12011170387268066 + }, + { + "epoch": 2.108001708984375e-05, + "model_forward_time": 0.024837493896484375, + "step": 13815 + }, + { + "epoch": 2.108001708984375e-05, + "step": 13815, + "training_step_time": 0.12703585624694824 + }, + { + "epoch": 2.108154296875e-05, + "model_forward_time": 0.02537250518798828, + "step": 13816 + }, + { + "epoch": 2.108154296875e-05, + "step": 13816, + "training_step_time": 0.2201244831085205 + }, + { + "epoch": 2.108306884765625e-05, + "model_forward_time": 0.02442479133605957, + "step": 13817 + }, + { + "epoch": 2.108306884765625e-05, + "step": 13817, + "training_step_time": 0.13438653945922852 + }, + { + "epoch": 2.10845947265625e-05, + "model_forward_time": 0.024334192276000977, + "step": 13818 + }, + { + "epoch": 2.10845947265625e-05, + "step": 13818, + "training_step_time": 0.17963743209838867 + }, + { + "epoch": 2.108612060546875e-05, + "model_forward_time": 0.02477407455444336, + "step": 13819 + }, + { + "epoch": 2.108612060546875e-05, + "step": 13819, + "training_step_time": 0.10099434852600098 + }, + { + "epoch": 2.1087646484375e-05, + "grad_norm": 0.3133634626865387, + "learning_rate": 6.055726115738678e-05, + "loss": 0.0202, + "step": 13820 + }, + { + "epoch": 2.1087646484375e-05, + "model_forward_time": 0.024273157119750977, + "step": 13820 + }, + { + "epoch": 2.1087646484375e-05, + "step": 13820, + "training_step_time": 0.14082694053649902 + }, + { + "epoch": 2.108917236328125e-05, + "model_forward_time": 0.024604082107543945, + "step": 13821 + }, + { + "epoch": 2.108917236328125e-05, + "step": 13821, + "training_step_time": 0.13423585891723633 + }, + { + "epoch": 2.10906982421875e-05, + "model_forward_time": 0.024552583694458008, + "step": 13822 + }, + { + "epoch": 2.10906982421875e-05, + "step": 13822, + "training_step_time": 0.10879659652709961 + }, + { + "epoch": 2.109222412109375e-05, + "model_forward_time": 0.025456905364990234, + "step": 13823 + }, + { + "epoch": 2.109222412109375e-05, + "step": 13823, + "training_step_time": 0.11156773567199707 + }, + { + "epoch": 2.109375e-05, + "model_forward_time": 0.025557518005371094, + "step": 13824 + }, + { + "epoch": 2.109375e-05, + "step": 13824, + "training_step_time": 0.10536003112792969 + }, + { + "epoch": 2.109527587890625e-05, + "model_forward_time": 0.025357484817504883, + "step": 13825 + }, + { + "epoch": 2.109527587890625e-05, + "step": 13825, + "training_step_time": 0.12174582481384277 + }, + { + "epoch": 2.10968017578125e-05, + "model_forward_time": 0.02515411376953125, + "step": 13826 + }, + { + "epoch": 2.10968017578125e-05, + "step": 13826, + "training_step_time": 0.1058952808380127 + }, + { + "epoch": 2.109832763671875e-05, + "model_forward_time": 0.02508687973022461, + "step": 13827 + }, + { + "epoch": 2.109832763671875e-05, + "step": 13827, + "training_step_time": 0.10395336151123047 + }, + { + "epoch": 2.1099853515625e-05, + "model_forward_time": 0.02506542205810547, + "step": 13828 + }, + { + "epoch": 2.1099853515625e-05, + "step": 13828, + "training_step_time": 0.10573840141296387 + }, + { + "epoch": 2.110137939453125e-05, + "model_forward_time": 0.025094032287597656, + "step": 13829 + }, + { + "epoch": 2.110137939453125e-05, + "step": 13829, + "training_step_time": 0.20791149139404297 + }, + { + "epoch": 2.11029052734375e-05, + "grad_norm": 0.31008756160736084, + "learning_rate": 6.05033816926583e-05, + "loss": 0.0185, + "step": 13830 + }, + { + "epoch": 2.11029052734375e-05, + "model_forward_time": 0.02429509162902832, + "step": 13830 + }, + { + "epoch": 2.11029052734375e-05, + "step": 13830, + "training_step_time": 0.11154365539550781 + }, + { + "epoch": 2.110443115234375e-05, + "model_forward_time": 0.02499675750732422, + "step": 13831 + }, + { + "epoch": 2.110443115234375e-05, + "step": 13831, + "training_step_time": 0.13228678703308105 + }, + { + "epoch": 2.110595703125e-05, + "model_forward_time": 0.02540135383605957, + "step": 13832 + }, + { + "epoch": 2.110595703125e-05, + "step": 13832, + "training_step_time": 0.13718414306640625 + }, + { + "epoch": 2.110748291015625e-05, + "model_forward_time": 0.024593114852905273, + "step": 13833 + }, + { + "epoch": 2.110748291015625e-05, + "step": 13833, + "training_step_time": 0.11216187477111816 + }, + { + "epoch": 2.11090087890625e-05, + "model_forward_time": 0.02505326271057129, + "step": 13834 + }, + { + "epoch": 2.11090087890625e-05, + "step": 13834, + "training_step_time": 0.12896966934204102 + }, + { + "epoch": 2.111053466796875e-05, + "model_forward_time": 0.025183916091918945, + "step": 13835 + }, + { + "epoch": 2.111053466796875e-05, + "step": 13835, + "training_step_time": 0.11802148818969727 + }, + { + "epoch": 2.1112060546875e-05, + "model_forward_time": 0.02514934539794922, + "step": 13836 + }, + { + "epoch": 2.1112060546875e-05, + "step": 13836, + "training_step_time": 0.10564041137695312 + }, + { + "epoch": 2.111358642578125e-05, + "model_forward_time": 0.0253903865814209, + "step": 13837 + }, + { + "epoch": 2.111358642578125e-05, + "step": 13837, + "training_step_time": 0.10464072227478027 + }, + { + "epoch": 2.11151123046875e-05, + "model_forward_time": 0.024884462356567383, + "step": 13838 + }, + { + "epoch": 2.11151123046875e-05, + "step": 13838, + "training_step_time": 0.10327935218811035 + }, + { + "epoch": 2.111663818359375e-05, + "model_forward_time": 0.025180339813232422, + "step": 13839 + }, + { + "epoch": 2.111663818359375e-05, + "step": 13839, + "training_step_time": 0.10840821266174316 + }, + { + "epoch": 2.11181640625e-05, + "grad_norm": 0.43487513065338135, + "learning_rate": 6.044948946533064e-05, + "loss": 0.0197, + "step": 13840 + }, + { + "epoch": 2.11181640625e-05, + "model_forward_time": 0.0250091552734375, + "step": 13840 + }, + { + "epoch": 2.11181640625e-05, + "step": 13840, + "training_step_time": 0.10513687133789062 + }, + { + "epoch": 2.111968994140625e-05, + "model_forward_time": 0.02486252784729004, + "step": 13841 + }, + { + "epoch": 2.111968994140625e-05, + "step": 13841, + "training_step_time": 0.18270015716552734 + }, + { + "epoch": 2.11212158203125e-05, + "model_forward_time": 0.02440786361694336, + "step": 13842 + }, + { + "epoch": 2.11212158203125e-05, + "step": 13842, + "training_step_time": 0.19562506675720215 + }, + { + "epoch": 2.112274169921875e-05, + "model_forward_time": 0.02408432960510254, + "step": 13843 + }, + { + "epoch": 2.112274169921875e-05, + "step": 13843, + "training_step_time": 0.18784570693969727 + }, + { + "epoch": 2.1124267578125e-05, + "model_forward_time": 0.024085283279418945, + "step": 13844 + }, + { + "epoch": 2.1124267578125e-05, + "step": 13844, + "training_step_time": 0.18026185035705566 + }, + { + "epoch": 2.112579345703125e-05, + "model_forward_time": 0.023852109909057617, + "step": 13845 + }, + { + "epoch": 2.112579345703125e-05, + "step": 13845, + "training_step_time": 0.16629433631896973 + }, + { + "epoch": 2.11273193359375e-05, + "model_forward_time": 0.02449321746826172, + "step": 13846 + }, + { + "epoch": 2.11273193359375e-05, + "step": 13846, + "training_step_time": 0.11739206314086914 + }, + { + "epoch": 2.112884521484375e-05, + "model_forward_time": 0.024660825729370117, + "step": 13847 + }, + { + "epoch": 2.112884521484375e-05, + "step": 13847, + "training_step_time": 0.10135364532470703 + }, + { + "epoch": 2.113037109375e-05, + "model_forward_time": 0.025350570678710938, + "step": 13848 + }, + { + "epoch": 2.113037109375e-05, + "step": 13848, + "training_step_time": 0.10333847999572754 + }, + { + "epoch": 2.113189697265625e-05, + "model_forward_time": 0.0251157283782959, + "step": 13849 + }, + { + "epoch": 2.113189697265625e-05, + "step": 13849, + "training_step_time": 0.1031041145324707 + }, + { + "epoch": 2.11334228515625e-05, + "grad_norm": 0.45656758546829224, + "learning_rate": 6.0395584540887963e-05, + "loss": 0.0139, + "step": 13850 + }, + { + "epoch": 2.11334228515625e-05, + "model_forward_time": 0.025149106979370117, + "step": 13850 + }, + { + "epoch": 2.11334228515625e-05, + "step": 13850, + "training_step_time": 0.10684800148010254 + }, + { + "epoch": 2.113494873046875e-05, + "model_forward_time": 0.025339603424072266, + "step": 13851 + }, + { + "epoch": 2.113494873046875e-05, + "step": 13851, + "training_step_time": 0.10315346717834473 + }, + { + "epoch": 2.1136474609375e-05, + "model_forward_time": 0.025275230407714844, + "step": 13852 + }, + { + "epoch": 2.1136474609375e-05, + "step": 13852, + "training_step_time": 0.10297107696533203 + }, + { + "epoch": 2.113800048828125e-05, + "model_forward_time": 0.02508831024169922, + "step": 13853 + }, + { + "epoch": 2.113800048828125e-05, + "step": 13853, + "training_step_time": 0.10972261428833008 + }, + { + "epoch": 2.11395263671875e-05, + "model_forward_time": 0.025352001190185547, + "step": 13854 + }, + { + "epoch": 2.11395263671875e-05, + "step": 13854, + "training_step_time": 0.1049959659576416 + }, + { + "epoch": 2.114105224609375e-05, + "model_forward_time": 0.02535867691040039, + "step": 13855 + }, + { + "epoch": 2.114105224609375e-05, + "step": 13855, + "training_step_time": 0.1058509349822998 + }, + { + "epoch": 2.1142578125e-05, + "model_forward_time": 0.025278091430664062, + "step": 13856 + }, + { + "epoch": 2.1142578125e-05, + "step": 13856, + "training_step_time": 0.10474109649658203 + }, + { + "epoch": 2.114410400390625e-05, + "model_forward_time": 0.024910449981689453, + "step": 13857 + }, + { + "epoch": 2.114410400390625e-05, + "step": 13857, + "training_step_time": 0.10487532615661621 + }, + { + "epoch": 2.11456298828125e-05, + "model_forward_time": 0.02590179443359375, + "step": 13858 + }, + { + "epoch": 2.11456298828125e-05, + "step": 13858, + "training_step_time": 0.175065279006958 + }, + { + "epoch": 2.114715576171875e-05, + "model_forward_time": 0.024261474609375, + "step": 13859 + }, + { + "epoch": 2.114715576171875e-05, + "step": 13859, + "training_step_time": 0.14134836196899414 + }, + { + "epoch": 2.1148681640625e-05, + "grad_norm": 0.21963365375995636, + "learning_rate": 6.034166698482984e-05, + "loss": 0.0157, + "step": 13860 + }, + { + "epoch": 2.1148681640625e-05, + "model_forward_time": 0.024405241012573242, + "step": 13860 + }, + { + "epoch": 2.1148681640625e-05, + "step": 13860, + "training_step_time": 0.19643735885620117 + }, + { + "epoch": 2.115020751953125e-05, + "model_forward_time": 0.025187253952026367, + "step": 13861 + }, + { + "epoch": 2.115020751953125e-05, + "step": 13861, + "training_step_time": 0.1666247844696045 + }, + { + "epoch": 2.11517333984375e-05, + "model_forward_time": 0.024095773696899414, + "step": 13862 + }, + { + "epoch": 2.11517333984375e-05, + "step": 13862, + "training_step_time": 0.14900612831115723 + }, + { + "epoch": 2.115325927734375e-05, + "model_forward_time": 0.02439713478088379, + "step": 13863 + }, + { + "epoch": 2.115325927734375e-05, + "step": 13863, + "training_step_time": 0.17798948287963867 + }, + { + "epoch": 2.115478515625e-05, + "model_forward_time": 0.025011539459228516, + "step": 13864 + }, + { + "epoch": 2.115478515625e-05, + "step": 13864, + "training_step_time": 0.18427515029907227 + }, + { + "epoch": 2.115631103515625e-05, + "model_forward_time": 0.02464437484741211, + "step": 13865 + }, + { + "epoch": 2.115631103515625e-05, + "step": 13865, + "training_step_time": 0.13962984085083008 + }, + { + "epoch": 2.11578369140625e-05, + "model_forward_time": 0.02464747428894043, + "step": 13866 + }, + { + "epoch": 2.11578369140625e-05, + "step": 13866, + "training_step_time": 0.20704889297485352 + }, + { + "epoch": 2.115936279296875e-05, + "model_forward_time": 0.02448725700378418, + "step": 13867 + }, + { + "epoch": 2.115936279296875e-05, + "step": 13867, + "training_step_time": 0.13471627235412598 + }, + { + "epoch": 2.1160888671875e-05, + "model_forward_time": 0.02469182014465332, + "step": 13868 + }, + { + "epoch": 2.1160888671875e-05, + "step": 13868, + "training_step_time": 0.1241450309753418 + }, + { + "epoch": 2.116241455078125e-05, + "model_forward_time": 0.025192737579345703, + "step": 13869 + }, + { + "epoch": 2.116241455078125e-05, + "step": 13869, + "training_step_time": 0.11481738090515137 + }, + { + "epoch": 2.11639404296875e-05, + "grad_norm": 0.2901814579963684, + "learning_rate": 6.0287736862671175e-05, + "loss": 0.0208, + "step": 13870 + }, + { + "epoch": 2.11639404296875e-05, + "model_forward_time": 0.025123119354248047, + "step": 13870 + }, + { + "epoch": 2.11639404296875e-05, + "step": 13870, + "training_step_time": 0.11545515060424805 + }, + { + "epoch": 2.116546630859375e-05, + "model_forward_time": 0.025197505950927734, + "step": 13871 + }, + { + "epoch": 2.116546630859375e-05, + "step": 13871, + "training_step_time": 0.11384391784667969 + }, + { + "epoch": 2.11669921875e-05, + "model_forward_time": 0.025483131408691406, + "step": 13872 + }, + { + "epoch": 2.11669921875e-05, + "step": 13872, + "training_step_time": 0.11619091033935547 + }, + { + "epoch": 2.116851806640625e-05, + "model_forward_time": 0.024842023849487305, + "step": 13873 + }, + { + "epoch": 2.116851806640625e-05, + "step": 13873, + "training_step_time": 0.12018084526062012 + }, + { + "epoch": 2.11700439453125e-05, + "model_forward_time": 0.025530099868774414, + "step": 13874 + }, + { + "epoch": 2.11700439453125e-05, + "step": 13874, + "training_step_time": 0.13125896453857422 + }, + { + "epoch": 2.117156982421875e-05, + "model_forward_time": 0.025022506713867188, + "step": 13875 + }, + { + "epoch": 2.117156982421875e-05, + "step": 13875, + "training_step_time": 0.1059274673461914 + }, + { + "epoch": 2.1173095703125e-05, + "model_forward_time": 0.02528691291809082, + "step": 13876 + }, + { + "epoch": 2.1173095703125e-05, + "step": 13876, + "training_step_time": 0.2113492488861084 + }, + { + "epoch": 2.117462158203125e-05, + "model_forward_time": 0.024336814880371094, + "step": 13877 + }, + { + "epoch": 2.117462158203125e-05, + "step": 13877, + "training_step_time": 0.11668992042541504 + }, + { + "epoch": 2.11761474609375e-05, + "model_forward_time": 0.024745702743530273, + "step": 13878 + }, + { + "epoch": 2.11761474609375e-05, + "step": 13878, + "training_step_time": 0.11249494552612305 + }, + { + "epoch": 2.117767333984375e-05, + "model_forward_time": 0.024939775466918945, + "step": 13879 + }, + { + "epoch": 2.117767333984375e-05, + "step": 13879, + "training_step_time": 0.10766863822937012 + }, + { + "epoch": 2.117919921875e-05, + "grad_norm": 0.2624422013759613, + "learning_rate": 6.023379423994214e-05, + "loss": 0.0106, + "step": 13880 + }, + { + "epoch": 2.117919921875e-05, + "model_forward_time": 0.02528214454650879, + "step": 13880 + }, + { + "epoch": 2.117919921875e-05, + "step": 13880, + "training_step_time": 0.11126470565795898 + }, + { + "epoch": 2.118072509765625e-05, + "model_forward_time": 0.025384902954101562, + "step": 13881 + }, + { + "epoch": 2.118072509765625e-05, + "step": 13881, + "training_step_time": 0.10678744316101074 + }, + { + "epoch": 2.11822509765625e-05, + "model_forward_time": 0.02501654624938965, + "step": 13882 + }, + { + "epoch": 2.11822509765625e-05, + "step": 13882, + "training_step_time": 0.10811400413513184 + }, + { + "epoch": 2.118377685546875e-05, + "model_forward_time": 0.02526569366455078, + "step": 13883 + }, + { + "epoch": 2.118377685546875e-05, + "step": 13883, + "training_step_time": 0.1064748764038086 + }, + { + "epoch": 2.1185302734375e-05, + "model_forward_time": 0.025110960006713867, + "step": 13884 + }, + { + "epoch": 2.1185302734375e-05, + "step": 13884, + "training_step_time": 0.10522222518920898 + }, + { + "epoch": 2.118682861328125e-05, + "model_forward_time": 0.02486133575439453, + "step": 13885 + }, + { + "epoch": 2.118682861328125e-05, + "step": 13885, + "training_step_time": 0.10434484481811523 + }, + { + "epoch": 2.11883544921875e-05, + "model_forward_time": 0.025339365005493164, + "step": 13886 + }, + { + "epoch": 2.11883544921875e-05, + "step": 13886, + "training_step_time": 0.10593748092651367 + }, + { + "epoch": 2.118988037109375e-05, + "model_forward_time": 0.025481700897216797, + "step": 13887 + }, + { + "epoch": 2.118988037109375e-05, + "step": 13887, + "training_step_time": 0.10702872276306152 + }, + { + "epoch": 2.119140625e-05, + "model_forward_time": 0.025051355361938477, + "step": 13888 + }, + { + "epoch": 2.119140625e-05, + "step": 13888, + "training_step_time": 0.10433650016784668 + }, + { + "epoch": 2.119293212890625e-05, + "model_forward_time": 0.025042057037353516, + "step": 13889 + }, + { + "epoch": 2.119293212890625e-05, + "step": 13889, + "training_step_time": 0.1077113151550293 + }, + { + "epoch": 2.11944580078125e-05, + "grad_norm": 0.4025578200817108, + "learning_rate": 6.017983918218812e-05, + "loss": 0.024, + "step": 13890 + }, + { + "epoch": 2.11944580078125e-05, + "model_forward_time": 0.02539992332458496, + "step": 13890 + }, + { + "epoch": 2.11944580078125e-05, + "step": 13890, + "training_step_time": 0.10498523712158203 + }, + { + "epoch": 2.119598388671875e-05, + "model_forward_time": 0.02527165412902832, + "step": 13891 + }, + { + "epoch": 2.119598388671875e-05, + "step": 13891, + "training_step_time": 0.10509920120239258 + }, + { + "epoch": 2.1197509765625e-05, + "model_forward_time": 0.025394201278686523, + "step": 13892 + }, + { + "epoch": 2.1197509765625e-05, + "step": 13892, + "training_step_time": 0.1056370735168457 + }, + { + "epoch": 2.119903564453125e-05, + "model_forward_time": 0.025632143020629883, + "step": 13893 + }, + { + "epoch": 2.119903564453125e-05, + "step": 13893, + "training_step_time": 0.10530781745910645 + }, + { + "epoch": 2.12005615234375e-05, + "model_forward_time": 0.025727033615112305, + "step": 13894 + }, + { + "epoch": 2.12005615234375e-05, + "step": 13894, + "training_step_time": 0.1074361801147461 + }, + { + "epoch": 2.120208740234375e-05, + "model_forward_time": 0.02525806427001953, + "step": 13895 + }, + { + "epoch": 2.120208740234375e-05, + "step": 13895, + "training_step_time": 0.1079564094543457 + }, + { + "epoch": 2.120361328125e-05, + "model_forward_time": 0.02511286735534668, + "step": 13896 + }, + { + "epoch": 2.120361328125e-05, + "step": 13896, + "training_step_time": 0.10541105270385742 + }, + { + "epoch": 2.120513916015625e-05, + "model_forward_time": 0.02458810806274414, + "step": 13897 + }, + { + "epoch": 2.120513916015625e-05, + "step": 13897, + "training_step_time": 0.10821962356567383 + }, + { + "epoch": 2.12066650390625e-05, + "model_forward_time": 0.025307893753051758, + "step": 13898 + }, + { + "epoch": 2.12066650390625e-05, + "step": 13898, + "training_step_time": 0.11216425895690918 + }, + { + "epoch": 2.120819091796875e-05, + "model_forward_time": 0.02530193328857422, + "step": 13899 + }, + { + "epoch": 2.120819091796875e-05, + "step": 13899, + "training_step_time": 0.10505509376525879 + }, + { + "epoch": 2.1209716796875e-05, + "grad_norm": 0.2524588704109192, + "learning_rate": 6.012587175496961e-05, + "loss": 0.0098, + "step": 13900 + }, + { + "epoch": 2.1209716796875e-05, + "model_forward_time": 0.025150060653686523, + "step": 13900 + }, + { + "epoch": 2.1209716796875e-05, + "step": 13900, + "training_step_time": 0.10435652732849121 + }, + { + "epoch": 2.121124267578125e-05, + "model_forward_time": 0.025235414505004883, + "step": 13901 + }, + { + "epoch": 2.121124267578125e-05, + "step": 13901, + "training_step_time": 0.10574722290039062 + }, + { + "epoch": 2.12127685546875e-05, + "model_forward_time": 0.025246143341064453, + "step": 13902 + }, + { + "epoch": 2.12127685546875e-05, + "step": 13902, + "training_step_time": 0.1064596176147461 + }, + { + "epoch": 2.121429443359375e-05, + "model_forward_time": 0.025489091873168945, + "step": 13903 + }, + { + "epoch": 2.121429443359375e-05, + "step": 13903, + "training_step_time": 0.18894600868225098 + }, + { + "epoch": 2.12158203125e-05, + "model_forward_time": 0.024791955947875977, + "step": 13904 + }, + { + "epoch": 2.12158203125e-05, + "step": 13904, + "training_step_time": 0.13489246368408203 + }, + { + "epoch": 2.121734619140625e-05, + "model_forward_time": 0.02440190315246582, + "step": 13905 + }, + { + "epoch": 2.121734619140625e-05, + "step": 13905, + "training_step_time": 0.18694472312927246 + }, + { + "epoch": 2.12188720703125e-05, + "model_forward_time": 0.0245974063873291, + "step": 13906 + }, + { + "epoch": 2.12188720703125e-05, + "step": 13906, + "training_step_time": 0.15798044204711914 + }, + { + "epoch": 2.122039794921875e-05, + "model_forward_time": 0.024889469146728516, + "step": 13907 + }, + { + "epoch": 2.122039794921875e-05, + "step": 13907, + "training_step_time": 0.10735630989074707 + }, + { + "epoch": 2.1221923828125e-05, + "model_forward_time": 0.024691343307495117, + "step": 13908 + }, + { + "epoch": 2.1221923828125e-05, + "step": 13908, + "training_step_time": 0.19231963157653809 + }, + { + "epoch": 2.122344970703125e-05, + "model_forward_time": 0.024800539016723633, + "step": 13909 + }, + { + "epoch": 2.122344970703125e-05, + "step": 13909, + "training_step_time": 0.10147786140441895 + }, + { + "epoch": 2.12249755859375e-05, + "grad_norm": 0.21475963294506073, + "learning_rate": 6.0071892023862105e-05, + "loss": 0.0143, + "step": 13910 + }, + { + "epoch": 2.12249755859375e-05, + "model_forward_time": 0.02467823028564453, + "step": 13910 + }, + { + "epoch": 2.12249755859375e-05, + "step": 13910, + "training_step_time": 0.14706754684448242 + }, + { + "epoch": 2.122650146484375e-05, + "model_forward_time": 0.02446460723876953, + "step": 13911 + }, + { + "epoch": 2.122650146484375e-05, + "step": 13911, + "training_step_time": 0.10947513580322266 + }, + { + "epoch": 2.122802734375e-05, + "model_forward_time": 0.02607870101928711, + "step": 13912 + }, + { + "epoch": 2.122802734375e-05, + "step": 13912, + "training_step_time": 0.18893694877624512 + }, + { + "epoch": 2.122955322265625e-05, + "model_forward_time": 0.024167776107788086, + "step": 13913 + }, + { + "epoch": 2.122955322265625e-05, + "step": 13913, + "training_step_time": 0.12911105155944824 + }, + { + "epoch": 2.12310791015625e-05, + "model_forward_time": 0.024782657623291016, + "step": 13914 + }, + { + "epoch": 2.12310791015625e-05, + "step": 13914, + "training_step_time": 0.10303544998168945 + }, + { + "epoch": 2.123260498046875e-05, + "model_forward_time": 0.02649068832397461, + "step": 13915 + }, + { + "epoch": 2.123260498046875e-05, + "step": 13915, + "training_step_time": 0.11663508415222168 + }, + { + "epoch": 2.1234130859375e-05, + "model_forward_time": 0.02526712417602539, + "step": 13916 + }, + { + "epoch": 2.1234130859375e-05, + "step": 13916, + "training_step_time": 0.10574054718017578 + }, + { + "epoch": 2.123565673828125e-05, + "model_forward_time": 0.02545475959777832, + "step": 13917 + }, + { + "epoch": 2.123565673828125e-05, + "step": 13917, + "training_step_time": 0.11175751686096191 + }, + { + "epoch": 2.12371826171875e-05, + "model_forward_time": 0.02419257164001465, + "step": 13918 + }, + { + "epoch": 2.12371826171875e-05, + "step": 13918, + "training_step_time": 0.18088483810424805 + }, + { + "epoch": 2.123870849609375e-05, + "model_forward_time": 0.02466106414794922, + "step": 13919 + }, + { + "epoch": 2.123870849609375e-05, + "step": 13919, + "training_step_time": 0.12383532524108887 + }, + { + "epoch": 2.1240234375e-05, + "grad_norm": 0.4200749397277832, + "learning_rate": 6.001790005445607e-05, + "loss": 0.0192, + "step": 13920 + }, + { + "epoch": 2.1240234375e-05, + "model_forward_time": 0.024456024169921875, + "step": 13920 + }, + { + "epoch": 2.1240234375e-05, + "step": 13920, + "training_step_time": 0.11646580696105957 + }, + { + "epoch": 2.124176025390625e-05, + "model_forward_time": 0.02443838119506836, + "step": 13921 + }, + { + "epoch": 2.124176025390625e-05, + "step": 13921, + "training_step_time": 0.11777877807617188 + }, + { + "epoch": 2.12432861328125e-05, + "model_forward_time": 0.025475263595581055, + "step": 13922 + }, + { + "epoch": 2.12432861328125e-05, + "step": 13922, + "training_step_time": 0.2063736915588379 + }, + { + "epoch": 2.124481201171875e-05, + "model_forward_time": 0.024596452713012695, + "step": 13923 + }, + { + "epoch": 2.124481201171875e-05, + "step": 13923, + "training_step_time": 0.12872982025146484 + }, + { + "epoch": 2.1246337890625e-05, + "model_forward_time": 0.02398991584777832, + "step": 13924 + }, + { + "epoch": 2.1246337890625e-05, + "step": 13924, + "training_step_time": 0.10710930824279785 + }, + { + "epoch": 2.124786376953125e-05, + "model_forward_time": 0.025101900100708008, + "step": 13925 + }, + { + "epoch": 2.124786376953125e-05, + "step": 13925, + "training_step_time": 0.11490678787231445 + }, + { + "epoch": 2.12493896484375e-05, + "model_forward_time": 0.025785207748413086, + "step": 13926 + }, + { + "epoch": 2.12493896484375e-05, + "step": 13926, + "training_step_time": 0.10704445838928223 + }, + { + "epoch": 2.125091552734375e-05, + "model_forward_time": 0.025490999221801758, + "step": 13927 + }, + { + "epoch": 2.125091552734375e-05, + "step": 13927, + "training_step_time": 0.10575079917907715 + }, + { + "epoch": 2.125244140625e-05, + "model_forward_time": 0.025529861450195312, + "step": 13928 + }, + { + "epoch": 2.125244140625e-05, + "step": 13928, + "training_step_time": 0.10776472091674805 + }, + { + "epoch": 2.125396728515625e-05, + "model_forward_time": 0.02514052391052246, + "step": 13929 + }, + { + "epoch": 2.125396728515625e-05, + "step": 13929, + "training_step_time": 0.10558795928955078 + }, + { + "epoch": 2.12554931640625e-05, + "grad_norm": 0.4564298391342163, + "learning_rate": 5.9963895912356836e-05, + "loss": 0.0172, + "step": 13930 + }, + { + "epoch": 2.12554931640625e-05, + "model_forward_time": 0.025671005249023438, + "step": 13930 + }, + { + "epoch": 2.12554931640625e-05, + "step": 13930, + "training_step_time": 0.10561132431030273 + }, + { + "epoch": 2.125701904296875e-05, + "model_forward_time": 0.025171995162963867, + "step": 13931 + }, + { + "epoch": 2.125701904296875e-05, + "step": 13931, + "training_step_time": 0.10582613945007324 + }, + { + "epoch": 2.1258544921875e-05, + "model_forward_time": 0.025439023971557617, + "step": 13932 + }, + { + "epoch": 2.1258544921875e-05, + "step": 13932, + "training_step_time": 0.10581374168395996 + }, + { + "epoch": 2.126007080078125e-05, + "model_forward_time": 0.025280237197875977, + "step": 13933 + }, + { + "epoch": 2.126007080078125e-05, + "step": 13933, + "training_step_time": 0.10661935806274414 + }, + { + "epoch": 2.12615966796875e-05, + "model_forward_time": 0.025499343872070312, + "step": 13934 + }, + { + "epoch": 2.12615966796875e-05, + "step": 13934, + "training_step_time": 0.10638904571533203 + }, + { + "epoch": 2.126312255859375e-05, + "model_forward_time": 0.025398731231689453, + "step": 13935 + }, + { + "epoch": 2.126312255859375e-05, + "step": 13935, + "training_step_time": 0.10975217819213867 + }, + { + "epoch": 2.12646484375e-05, + "model_forward_time": 0.025362014770507812, + "step": 13936 + }, + { + "epoch": 2.12646484375e-05, + "step": 13936, + "training_step_time": 0.10542774200439453 + }, + { + "epoch": 2.126617431640625e-05, + "model_forward_time": 0.025346994400024414, + "step": 13937 + }, + { + "epoch": 2.126617431640625e-05, + "step": 13937, + "training_step_time": 0.10587024688720703 + }, + { + "epoch": 2.12677001953125e-05, + "model_forward_time": 0.025196075439453125, + "step": 13938 + }, + { + "epoch": 2.12677001953125e-05, + "step": 13938, + "training_step_time": 0.10580897331237793 + }, + { + "epoch": 2.126922607421875e-05, + "model_forward_time": 0.0251615047454834, + "step": 13939 + }, + { + "epoch": 2.126922607421875e-05, + "step": 13939, + "training_step_time": 0.10612249374389648 + }, + { + "epoch": 2.1270751953125e-05, + "grad_norm": 0.35134264826774597, + "learning_rate": 5.9909879663184544e-05, + "loss": 0.016, + "step": 13940 + }, + { + "epoch": 2.1270751953125e-05, + "model_forward_time": 0.025101900100708008, + "step": 13940 + }, + { + "epoch": 2.1270751953125e-05, + "step": 13940, + "training_step_time": 0.10547161102294922 + }, + { + "epoch": 2.127227783203125e-05, + "model_forward_time": 0.025228261947631836, + "step": 13941 + }, + { + "epoch": 2.127227783203125e-05, + "step": 13941, + "training_step_time": 0.10650801658630371 + }, + { + "epoch": 2.12738037109375e-05, + "model_forward_time": 0.025037050247192383, + "step": 13942 + }, + { + "epoch": 2.12738037109375e-05, + "step": 13942, + "training_step_time": 0.10799837112426758 + }, + { + "epoch": 2.127532958984375e-05, + "model_forward_time": 0.02582526206970215, + "step": 13943 + }, + { + "epoch": 2.127532958984375e-05, + "step": 13943, + "training_step_time": 0.10747790336608887 + }, + { + "epoch": 2.127685546875e-05, + "model_forward_time": 0.02507638931274414, + "step": 13944 + }, + { + "epoch": 2.127685546875e-05, + "step": 13944, + "training_step_time": 0.10650944709777832 + }, + { + "epoch": 2.127838134765625e-05, + "model_forward_time": 0.02550339698791504, + "step": 13945 + }, + { + "epoch": 2.127838134765625e-05, + "step": 13945, + "training_step_time": 0.10601329803466797 + }, + { + "epoch": 2.12799072265625e-05, + "model_forward_time": 0.024989843368530273, + "step": 13946 + }, + { + "epoch": 2.12799072265625e-05, + "step": 13946, + "training_step_time": 0.10802578926086426 + }, + { + "epoch": 2.128143310546875e-05, + "model_forward_time": 0.02522563934326172, + "step": 13947 + }, + { + "epoch": 2.128143310546875e-05, + "step": 13947, + "training_step_time": 0.10716748237609863 + }, + { + "epoch": 2.1282958984375e-05, + "model_forward_time": 0.025110721588134766, + "step": 13948 + }, + { + "epoch": 2.1282958984375e-05, + "step": 13948, + "training_step_time": 0.19045114517211914 + }, + { + "epoch": 2.128448486328125e-05, + "model_forward_time": 0.024541616439819336, + "step": 13949 + }, + { + "epoch": 2.128448486328125e-05, + "step": 13949, + "training_step_time": 0.155198335647583 + }, + { + "epoch": 2.12860107421875e-05, + "grad_norm": 0.2921213209629059, + "learning_rate": 5.985585137257401e-05, + "loss": 0.0146, + "step": 13950 + }, + { + "epoch": 2.12860107421875e-05, + "model_forward_time": 0.02502727508544922, + "step": 13950 + }, + { + "epoch": 2.12860107421875e-05, + "step": 13950, + "training_step_time": 0.15549445152282715 + }, + { + "epoch": 2.128753662109375e-05, + "model_forward_time": 0.024546384811401367, + "step": 13951 + }, + { + "epoch": 2.128753662109375e-05, + "step": 13951, + "training_step_time": 0.18978333473205566 + }, + { + "epoch": 2.12890625e-05, + "model_forward_time": 0.024152517318725586, + "step": 13952 + }, + { + "epoch": 2.12890625e-05, + "step": 13952, + "training_step_time": 0.18538928031921387 + }, + { + "epoch": 2.129058837890625e-05, + "model_forward_time": 0.02405261993408203, + "step": 13953 + }, + { + "epoch": 2.129058837890625e-05, + "step": 13953, + "training_step_time": 0.19746994972229004 + }, + { + "epoch": 2.12921142578125e-05, + "model_forward_time": 0.02359628677368164, + "step": 13954 + }, + { + "epoch": 2.12921142578125e-05, + "step": 13954, + "training_step_time": 0.19241929054260254 + }, + { + "epoch": 2.129364013671875e-05, + "model_forward_time": 0.025365114212036133, + "step": 13955 + }, + { + "epoch": 2.129364013671875e-05, + "step": 13955, + "training_step_time": 0.22680449485778809 + }, + { + "epoch": 2.1295166015625e-05, + "model_forward_time": 0.02462911605834961, + "step": 13956 + }, + { + "epoch": 2.1295166015625e-05, + "step": 13956, + "training_step_time": 0.1784207820892334 + }, + { + "epoch": 2.129669189453125e-05, + "model_forward_time": 0.024585723876953125, + "step": 13957 + }, + { + "epoch": 2.129669189453125e-05, + "step": 13957, + "training_step_time": 0.22498822212219238 + }, + { + "epoch": 2.12982177734375e-05, + "model_forward_time": 0.02415633201599121, + "step": 13958 + }, + { + "epoch": 2.12982177734375e-05, + "step": 13958, + "training_step_time": 0.20644831657409668 + }, + { + "epoch": 2.129974365234375e-05, + "model_forward_time": 0.024311542510986328, + "step": 13959 + }, + { + "epoch": 2.129974365234375e-05, + "step": 13959, + "training_step_time": 0.12637972831726074 + }, + { + "epoch": 2.130126953125e-05, + "grad_norm": 0.35321858525276184, + "learning_rate": 5.980181110617473e-05, + "loss": 0.0182, + "step": 13960 + }, + { + "epoch": 2.130126953125e-05, + "model_forward_time": 0.02434086799621582, + "step": 13960 + }, + { + "epoch": 2.130126953125e-05, + "step": 13960, + "training_step_time": 0.10785126686096191 + }, + { + "epoch": 2.130279541015625e-05, + "model_forward_time": 0.02537703514099121, + "step": 13961 + }, + { + "epoch": 2.130279541015625e-05, + "step": 13961, + "training_step_time": 0.10762739181518555 + }, + { + "epoch": 2.13043212890625e-05, + "model_forward_time": 0.02480483055114746, + "step": 13962 + }, + { + "epoch": 2.13043212890625e-05, + "step": 13962, + "training_step_time": 0.10862135887145996 + }, + { + "epoch": 2.130584716796875e-05, + "model_forward_time": 0.024865150451660156, + "step": 13963 + }, + { + "epoch": 2.130584716796875e-05, + "step": 13963, + "training_step_time": 0.1378934383392334 + }, + { + "epoch": 2.1307373046875e-05, + "model_forward_time": 0.025484561920166016, + "step": 13964 + }, + { + "epoch": 2.1307373046875e-05, + "step": 13964, + "training_step_time": 0.11118578910827637 + }, + { + "epoch": 2.130889892578125e-05, + "model_forward_time": 0.025599002838134766, + "step": 13965 + }, + { + "epoch": 2.130889892578125e-05, + "step": 13965, + "training_step_time": 0.18898534774780273 + }, + { + "epoch": 2.13104248046875e-05, + "model_forward_time": 0.024372100830078125, + "step": 13966 + }, + { + "epoch": 2.13104248046875e-05, + "step": 13966, + "training_step_time": 0.15226292610168457 + }, + { + "epoch": 2.131195068359375e-05, + "model_forward_time": 0.02411961555480957, + "step": 13967 + }, + { + "epoch": 2.131195068359375e-05, + "step": 13967, + "training_step_time": 0.1278977394104004 + }, + { + "epoch": 2.13134765625e-05, + "model_forward_time": 0.024448871612548828, + "step": 13968 + }, + { + "epoch": 2.13134765625e-05, + "step": 13968, + "training_step_time": 0.1273479461669922 + }, + { + "epoch": 2.131500244140625e-05, + "model_forward_time": 0.025207996368408203, + "step": 13969 + }, + { + "epoch": 2.131500244140625e-05, + "step": 13969, + "training_step_time": 0.12486648559570312 + }, + { + "epoch": 2.13165283203125e-05, + "grad_norm": 0.3368975520133972, + "learning_rate": 5.974775892965071e-05, + "loss": 0.0408, + "step": 13970 + }, + { + "epoch": 2.13165283203125e-05, + "model_forward_time": 0.024851560592651367, + "step": 13970 + }, + { + "epoch": 2.13165283203125e-05, + "step": 13970, + "training_step_time": 0.11732292175292969 + }, + { + "epoch": 2.131805419921875e-05, + "model_forward_time": 0.025589466094970703, + "step": 13971 + }, + { + "epoch": 2.131805419921875e-05, + "step": 13971, + "training_step_time": 0.11449480056762695 + }, + { + "epoch": 2.1319580078125e-05, + "model_forward_time": 0.025035381317138672, + "step": 13972 + }, + { + "epoch": 2.1319580078125e-05, + "step": 13972, + "training_step_time": 0.11477947235107422 + }, + { + "epoch": 2.132110595703125e-05, + "model_forward_time": 0.025274276733398438, + "step": 13973 + }, + { + "epoch": 2.132110595703125e-05, + "step": 13973, + "training_step_time": 0.11434817314147949 + }, + { + "epoch": 2.13226318359375e-05, + "model_forward_time": 0.025328874588012695, + "step": 13974 + }, + { + "epoch": 2.13226318359375e-05, + "step": 13974, + "training_step_time": 0.10937976837158203 + }, + { + "epoch": 2.132415771484375e-05, + "model_forward_time": 0.025514602661132812, + "step": 13975 + }, + { + "epoch": 2.132415771484375e-05, + "step": 13975, + "training_step_time": 0.11252498626708984 + }, + { + "epoch": 2.132568359375e-05, + "model_forward_time": 0.025487661361694336, + "step": 13976 + }, + { + "epoch": 2.132568359375e-05, + "step": 13976, + "training_step_time": 0.10770106315612793 + }, + { + "epoch": 2.132720947265625e-05, + "model_forward_time": 0.025414705276489258, + "step": 13977 + }, + { + "epoch": 2.132720947265625e-05, + "step": 13977, + "training_step_time": 0.10860419273376465 + }, + { + "epoch": 2.13287353515625e-05, + "model_forward_time": 0.025402545928955078, + "step": 13978 + }, + { + "epoch": 2.13287353515625e-05, + "step": 13978, + "training_step_time": 0.10608220100402832 + }, + { + "epoch": 2.133026123046875e-05, + "model_forward_time": 0.025238752365112305, + "step": 13979 + }, + { + "epoch": 2.133026123046875e-05, + "step": 13979, + "training_step_time": 0.10479283332824707 + }, + { + "epoch": 2.1331787109375e-05, + "grad_norm": 0.3981991112232208, + "learning_rate": 5.969369490868042e-05, + "loss": 0.0221, + "step": 13980 + }, + { + "epoch": 2.1331787109375e-05, + "model_forward_time": 0.025357961654663086, + "step": 13980 + }, + { + "epoch": 2.1331787109375e-05, + "step": 13980, + "training_step_time": 0.10549592971801758 + }, + { + "epoch": 2.133331298828125e-05, + "model_forward_time": 0.025704622268676758, + "step": 13981 + }, + { + "epoch": 2.133331298828125e-05, + "step": 13981, + "training_step_time": 0.10637784004211426 + }, + { + "epoch": 2.13348388671875e-05, + "model_forward_time": 0.025435924530029297, + "step": 13982 + }, + { + "epoch": 2.13348388671875e-05, + "step": 13982, + "training_step_time": 0.10661768913269043 + }, + { + "epoch": 2.133636474609375e-05, + "model_forward_time": 0.025319814682006836, + "step": 13983 + }, + { + "epoch": 2.133636474609375e-05, + "step": 13983, + "training_step_time": 0.10835480690002441 + }, + { + "epoch": 2.1337890625e-05, + "model_forward_time": 0.025866985321044922, + "step": 13984 + }, + { + "epoch": 2.1337890625e-05, + "step": 13984, + "training_step_time": 0.10635924339294434 + }, + { + "epoch": 2.133941650390625e-05, + "model_forward_time": 0.025388479232788086, + "step": 13985 + }, + { + "epoch": 2.133941650390625e-05, + "step": 13985, + "training_step_time": 0.10951805114746094 + }, + { + "epoch": 2.13409423828125e-05, + "model_forward_time": 0.024925708770751953, + "step": 13986 + }, + { + "epoch": 2.13409423828125e-05, + "step": 13986, + "training_step_time": 0.10550117492675781 + }, + { + "epoch": 2.134246826171875e-05, + "model_forward_time": 0.025908231735229492, + "step": 13987 + }, + { + "epoch": 2.134246826171875e-05, + "step": 13987, + "training_step_time": 0.10879755020141602 + }, + { + "epoch": 2.1343994140625e-05, + "model_forward_time": 0.025860309600830078, + "step": 13988 + }, + { + "epoch": 2.1343994140625e-05, + "step": 13988, + "training_step_time": 0.10840415954589844 + }, + { + "epoch": 2.134552001953125e-05, + "model_forward_time": 0.02550220489501953, + "step": 13989 + }, + { + "epoch": 2.134552001953125e-05, + "step": 13989, + "training_step_time": 0.11141848564147949 + }, + { + "epoch": 2.13470458984375e-05, + "grad_norm": 0.1812504678964615, + "learning_rate": 5.963961910895676e-05, + "loss": 0.0205, + "step": 13990 + }, + { + "epoch": 2.13470458984375e-05, + "model_forward_time": 0.025580406188964844, + "step": 13990 + }, + { + "epoch": 2.13470458984375e-05, + "step": 13990, + "training_step_time": 0.10611224174499512 + }, + { + "epoch": 2.134857177734375e-05, + "model_forward_time": 0.025664806365966797, + "step": 13991 + }, + { + "epoch": 2.134857177734375e-05, + "step": 13991, + "training_step_time": 0.11786174774169922 + }, + { + "epoch": 2.135009765625e-05, + "model_forward_time": 0.025136232376098633, + "step": 13992 + }, + { + "epoch": 2.135009765625e-05, + "step": 13992, + "training_step_time": 0.15346431732177734 + }, + { + "epoch": 2.135162353515625e-05, + "model_forward_time": 0.024534225463867188, + "step": 13993 + }, + { + "epoch": 2.135162353515625e-05, + "step": 13993, + "training_step_time": 0.11351656913757324 + }, + { + "epoch": 2.13531494140625e-05, + "model_forward_time": 0.02462315559387207, + "step": 13994 + }, + { + "epoch": 2.13531494140625e-05, + "step": 13994, + "training_step_time": 0.1067650318145752 + }, + { + "epoch": 2.135467529296875e-05, + "model_forward_time": 0.025611400604248047, + "step": 13995 + }, + { + "epoch": 2.135467529296875e-05, + "step": 13995, + "training_step_time": 0.1327519416809082 + }, + { + "epoch": 2.1356201171875e-05, + "model_forward_time": 0.02557206153869629, + "step": 13996 + }, + { + "epoch": 2.1356201171875e-05, + "step": 13996, + "training_step_time": 0.16394448280334473 + }, + { + "epoch": 2.135772705078125e-05, + "model_forward_time": 0.0248870849609375, + "step": 13997 + }, + { + "epoch": 2.135772705078125e-05, + "step": 13997, + "training_step_time": 0.11015200614929199 + }, + { + "epoch": 2.13592529296875e-05, + "model_forward_time": 0.025127887725830078, + "step": 13998 + }, + { + "epoch": 2.13592529296875e-05, + "step": 13998, + "training_step_time": 0.1094968318939209 + }, + { + "epoch": 2.136077880859375e-05, + "model_forward_time": 0.026792526245117188, + "step": 13999 + }, + { + "epoch": 2.136077880859375e-05, + "step": 13999, + "training_step_time": 0.10842680931091309 + }, + { + "epoch": 2.13623046875e-05, + "grad_norm": 0.32337090373039246, + "learning_rate": 5.958553159618693e-05, + "loss": 0.0171, + "step": 14000 + }, + { + "epoch": 2.13623046875e-05, + "model_forward_time": 0.026173830032348633, + "step": 14000 + }, + { + "epoch": 2.13623046875e-05, + "step": 14000, + "training_step_time": 0.09994864463806152 + }, + { + "epoch": 2.136383056640625e-05, + "model_forward_time": 0.023351430892944336, + "step": 14001 + }, + { + "epoch": 2.136383056640625e-05, + "step": 14001, + "training_step_time": 0.16586518287658691 + }, + { + "epoch": 2.13653564453125e-05, + "model_forward_time": 0.024451732635498047, + "step": 14002 + }, + { + "epoch": 2.13653564453125e-05, + "step": 14002, + "training_step_time": 0.12363719940185547 + }, + { + "epoch": 2.136688232421875e-05, + "model_forward_time": 0.024913311004638672, + "step": 14003 + }, + { + "epoch": 2.136688232421875e-05, + "step": 14003, + "training_step_time": 0.16837596893310547 + }, + { + "epoch": 2.1368408203125e-05, + "model_forward_time": 0.024346113204956055, + "step": 14004 + }, + { + "epoch": 2.1368408203125e-05, + "step": 14004, + "training_step_time": 0.1948544979095459 + }, + { + "epoch": 2.136993408203125e-05, + "model_forward_time": 0.024114608764648438, + "step": 14005 + }, + { + "epoch": 2.136993408203125e-05, + "step": 14005, + "training_step_time": 0.10498929023742676 + }, + { + "epoch": 2.13714599609375e-05, + "model_forward_time": 0.02505660057067871, + "step": 14006 + }, + { + "epoch": 2.13714599609375e-05, + "step": 14006, + "training_step_time": 0.11477088928222656 + }, + { + "epoch": 2.137298583984375e-05, + "model_forward_time": 0.02535557746887207, + "step": 14007 + }, + { + "epoch": 2.137298583984375e-05, + "step": 14007, + "training_step_time": 0.10522031784057617 + }, + { + "epoch": 2.137451171875e-05, + "model_forward_time": 0.024880170822143555, + "step": 14008 + }, + { + "epoch": 2.137451171875e-05, + "step": 14008, + "training_step_time": 0.13418245315551758 + }, + { + "epoch": 2.137603759765625e-05, + "model_forward_time": 0.02491307258605957, + "step": 14009 + }, + { + "epoch": 2.137603759765625e-05, + "step": 14009, + "training_step_time": 0.11905550956726074 + }, + { + "epoch": 2.13775634765625e-05, + "grad_norm": 0.3173942565917969, + "learning_rate": 5.953143243609235e-05, + "loss": 0.0143, + "step": 14010 + }, + { + "epoch": 2.13775634765625e-05, + "model_forward_time": 0.024995803833007812, + "step": 14010 + }, + { + "epoch": 2.13775634765625e-05, + "step": 14010, + "training_step_time": 0.11174392700195312 + }, + { + "epoch": 2.137908935546875e-05, + "model_forward_time": 0.025606155395507812, + "step": 14011 + }, + { + "epoch": 2.137908935546875e-05, + "step": 14011, + "training_step_time": 0.10557889938354492 + }, + { + "epoch": 2.1380615234375e-05, + "model_forward_time": 0.02539968490600586, + "step": 14012 + }, + { + "epoch": 2.1380615234375e-05, + "step": 14012, + "training_step_time": 0.10534000396728516 + }, + { + "epoch": 2.138214111328125e-05, + "model_forward_time": 0.025237321853637695, + "step": 14013 + }, + { + "epoch": 2.138214111328125e-05, + "step": 14013, + "training_step_time": 0.11093735694885254 + }, + { + "epoch": 2.13836669921875e-05, + "model_forward_time": 0.02521204948425293, + "step": 14014 + }, + { + "epoch": 2.13836669921875e-05, + "step": 14014, + "training_step_time": 0.11744427680969238 + }, + { + "epoch": 2.138519287109375e-05, + "model_forward_time": 0.02526998519897461, + "step": 14015 + }, + { + "epoch": 2.138519287109375e-05, + "step": 14015, + "training_step_time": 0.10761380195617676 + }, + { + "epoch": 2.138671875e-05, + "model_forward_time": 0.025046110153198242, + "step": 14016 + }, + { + "epoch": 2.138671875e-05, + "step": 14016, + "training_step_time": 0.128798246383667 + }, + { + "epoch": 2.138824462890625e-05, + "model_forward_time": 0.024949312210083008, + "step": 14017 + }, + { + "epoch": 2.138824462890625e-05, + "step": 14017, + "training_step_time": 0.12284350395202637 + }, + { + "epoch": 2.13897705078125e-05, + "model_forward_time": 0.025310993194580078, + "step": 14018 + }, + { + "epoch": 2.13897705078125e-05, + "step": 14018, + "training_step_time": 0.13483190536499023 + }, + { + "epoch": 2.139129638671875e-05, + "model_forward_time": 0.02504277229309082, + "step": 14019 + }, + { + "epoch": 2.139129638671875e-05, + "step": 14019, + "training_step_time": 0.10747265815734863 + }, + { + "epoch": 2.1392822265625e-05, + "grad_norm": 0.566034197807312, + "learning_rate": 5.9477321694408606e-05, + "loss": 0.0253, + "step": 14020 + }, + { + "epoch": 2.1392822265625e-05, + "model_forward_time": 0.025091171264648438, + "step": 14020 + }, + { + "epoch": 2.1392822265625e-05, + "step": 14020, + "training_step_time": 0.1680295467376709 + }, + { + "epoch": 2.139434814453125e-05, + "model_forward_time": 0.024509191513061523, + "step": 14021 + }, + { + "epoch": 2.139434814453125e-05, + "step": 14021, + "training_step_time": 0.13176274299621582 + }, + { + "epoch": 2.13958740234375e-05, + "model_forward_time": 0.024382829666137695, + "step": 14022 + }, + { + "epoch": 2.13958740234375e-05, + "step": 14022, + "training_step_time": 0.11061596870422363 + }, + { + "epoch": 2.139739990234375e-05, + "model_forward_time": 0.0254058837890625, + "step": 14023 + }, + { + "epoch": 2.139739990234375e-05, + "step": 14023, + "training_step_time": 0.10433769226074219 + }, + { + "epoch": 2.139892578125e-05, + "model_forward_time": 0.025636911392211914, + "step": 14024 + }, + { + "epoch": 2.139892578125e-05, + "step": 14024, + "training_step_time": 0.10586047172546387 + }, + { + "epoch": 2.140045166015625e-05, + "model_forward_time": 0.025478363037109375, + "step": 14025 + }, + { + "epoch": 2.140045166015625e-05, + "step": 14025, + "training_step_time": 0.10463809967041016 + }, + { + "epoch": 2.14019775390625e-05, + "model_forward_time": 0.025357961654663086, + "step": 14026 + }, + { + "epoch": 2.14019775390625e-05, + "step": 14026, + "training_step_time": 0.10375833511352539 + }, + { + "epoch": 2.140350341796875e-05, + "model_forward_time": 0.02506399154663086, + "step": 14027 + }, + { + "epoch": 2.140350341796875e-05, + "step": 14027, + "training_step_time": 0.10563278198242188 + }, + { + "epoch": 2.1405029296875e-05, + "model_forward_time": 0.025147676467895508, + "step": 14028 + }, + { + "epoch": 2.1405029296875e-05, + "step": 14028, + "training_step_time": 0.1040036678314209 + }, + { + "epoch": 2.140655517578125e-05, + "model_forward_time": 0.0249788761138916, + "step": 14029 + }, + { + "epoch": 2.140655517578125e-05, + "step": 14029, + "training_step_time": 0.10353374481201172 + }, + { + "epoch": 2.14080810546875e-05, + "grad_norm": 0.4070761501789093, + "learning_rate": 5.9423199436885345e-05, + "loss": 0.0216, + "step": 14030 + }, + { + "epoch": 2.14080810546875e-05, + "model_forward_time": 0.024532556533813477, + "step": 14030 + }, + { + "epoch": 2.14080810546875e-05, + "step": 14030, + "training_step_time": 0.10291314125061035 + }, + { + "epoch": 2.140960693359375e-05, + "model_forward_time": 0.02493429183959961, + "step": 14031 + }, + { + "epoch": 2.140960693359375e-05, + "step": 14031, + "training_step_time": 0.10916590690612793 + }, + { + "epoch": 2.14111328125e-05, + "model_forward_time": 0.025258302688598633, + "step": 14032 + }, + { + "epoch": 2.14111328125e-05, + "step": 14032, + "training_step_time": 0.10734677314758301 + }, + { + "epoch": 2.141265869140625e-05, + "model_forward_time": 0.024966001510620117, + "step": 14033 + }, + { + "epoch": 2.141265869140625e-05, + "step": 14033, + "training_step_time": 0.10363364219665527 + }, + { + "epoch": 2.14141845703125e-05, + "model_forward_time": 0.02516961097717285, + "step": 14034 + }, + { + "epoch": 2.14141845703125e-05, + "step": 14034, + "training_step_time": 0.10454416275024414 + }, + { + "epoch": 2.141571044921875e-05, + "model_forward_time": 0.025236129760742188, + "step": 14035 + }, + { + "epoch": 2.141571044921875e-05, + "step": 14035, + "training_step_time": 0.10728049278259277 + }, + { + "epoch": 2.1417236328125e-05, + "model_forward_time": 0.024898290634155273, + "step": 14036 + }, + { + "epoch": 2.1417236328125e-05, + "step": 14036, + "training_step_time": 0.10718703269958496 + }, + { + "epoch": 2.141876220703125e-05, + "model_forward_time": 0.02476644515991211, + "step": 14037 + }, + { + "epoch": 2.141876220703125e-05, + "step": 14037, + "training_step_time": 0.10628485679626465 + }, + { + "epoch": 2.14202880859375e-05, + "model_forward_time": 0.025035619735717773, + "step": 14038 + }, + { + "epoch": 2.14202880859375e-05, + "step": 14038, + "training_step_time": 0.10842156410217285 + }, + { + "epoch": 2.142181396484375e-05, + "model_forward_time": 0.025457143783569336, + "step": 14039 + }, + { + "epoch": 2.142181396484375e-05, + "step": 14039, + "training_step_time": 0.10665583610534668 + }, + { + "epoch": 2.142333984375e-05, + "grad_norm": 0.5166757702827454, + "learning_rate": 5.9369065729286245e-05, + "loss": 0.0149, + "step": 14040 + }, + { + "epoch": 2.142333984375e-05, + "model_forward_time": 0.025465726852416992, + "step": 14040 + }, + { + "epoch": 2.142333984375e-05, + "step": 14040, + "training_step_time": 0.10839462280273438 + }, + { + "epoch": 2.142486572265625e-05, + "model_forward_time": 0.02562737464904785, + "step": 14041 + }, + { + "epoch": 2.142486572265625e-05, + "step": 14041, + "training_step_time": 0.10825490951538086 + }, + { + "epoch": 2.14263916015625e-05, + "model_forward_time": 0.0253446102142334, + "step": 14042 + }, + { + "epoch": 2.14263916015625e-05, + "step": 14042, + "training_step_time": 0.10557222366333008 + }, + { + "epoch": 2.142791748046875e-05, + "model_forward_time": 0.025227069854736328, + "step": 14043 + }, + { + "epoch": 2.142791748046875e-05, + "step": 14043, + "training_step_time": 0.10369324684143066 + }, + { + "epoch": 2.1429443359375e-05, + "model_forward_time": 0.02515697479248047, + "step": 14044 + }, + { + "epoch": 2.1429443359375e-05, + "step": 14044, + "training_step_time": 0.10808372497558594 + }, + { + "epoch": 2.143096923828125e-05, + "model_forward_time": 0.025079011917114258, + "step": 14045 + }, + { + "epoch": 2.143096923828125e-05, + "step": 14045, + "training_step_time": 0.10954904556274414 + }, + { + "epoch": 2.14324951171875e-05, + "model_forward_time": 0.025165796279907227, + "step": 14046 + }, + { + "epoch": 2.14324951171875e-05, + "step": 14046, + "training_step_time": 0.11055994033813477 + }, + { + "epoch": 2.143402099609375e-05, + "model_forward_time": 0.026336193084716797, + "step": 14047 + }, + { + "epoch": 2.143402099609375e-05, + "step": 14047, + "training_step_time": 0.15317058563232422 + }, + { + "epoch": 2.1435546875e-05, + "model_forward_time": 0.024962425231933594, + "step": 14048 + }, + { + "epoch": 2.1435546875e-05, + "step": 14048, + "training_step_time": 0.14794301986694336 + }, + { + "epoch": 2.143707275390625e-05, + "model_forward_time": 0.024369239807128906, + "step": 14049 + }, + { + "epoch": 2.143707275390625e-05, + "step": 14049, + "training_step_time": 0.1387336254119873 + }, + { + "epoch": 2.14385986328125e-05, + "grad_norm": 0.39149346947669983, + "learning_rate": 5.9314920637388815e-05, + "loss": 0.0145, + "step": 14050 + }, + { + "epoch": 2.14385986328125e-05, + "model_forward_time": 0.024322986602783203, + "step": 14050 + }, + { + "epoch": 2.14385986328125e-05, + "step": 14050, + "training_step_time": 0.19942164421081543 + }, + { + "epoch": 2.144012451171875e-05, + "model_forward_time": 0.024318456649780273, + "step": 14051 + }, + { + "epoch": 2.144012451171875e-05, + "step": 14051, + "training_step_time": 0.15562963485717773 + }, + { + "epoch": 2.1441650390625e-05, + "model_forward_time": 0.0246737003326416, + "step": 14052 + }, + { + "epoch": 2.1441650390625e-05, + "step": 14052, + "training_step_time": 0.1476426124572754 + }, + { + "epoch": 2.144317626953125e-05, + "model_forward_time": 0.024805307388305664, + "step": 14053 + }, + { + "epoch": 2.144317626953125e-05, + "step": 14053, + "training_step_time": 0.19887042045593262 + }, + { + "epoch": 2.14447021484375e-05, + "model_forward_time": 0.024436235427856445, + "step": 14054 + }, + { + "epoch": 2.14447021484375e-05, + "step": 14054, + "training_step_time": 0.10278058052062988 + }, + { + "epoch": 2.144622802734375e-05, + "model_forward_time": 0.024085283279418945, + "step": 14055 + }, + { + "epoch": 2.144622802734375e-05, + "step": 14055, + "training_step_time": 0.14394831657409668 + }, + { + "epoch": 2.144775390625e-05, + "model_forward_time": 0.024950265884399414, + "step": 14056 + }, + { + "epoch": 2.144775390625e-05, + "step": 14056, + "training_step_time": 0.10691189765930176 + }, + { + "epoch": 2.144927978515625e-05, + "model_forward_time": 0.02534198760986328, + "step": 14057 + }, + { + "epoch": 2.144927978515625e-05, + "step": 14057, + "training_step_time": 0.20055532455444336 + }, + { + "epoch": 2.14508056640625e-05, + "model_forward_time": 0.024526357650756836, + "step": 14058 + }, + { + "epoch": 2.14508056640625e-05, + "step": 14058, + "training_step_time": 0.14471793174743652 + }, + { + "epoch": 2.145233154296875e-05, + "model_forward_time": 0.02510547637939453, + "step": 14059 + }, + { + "epoch": 2.145233154296875e-05, + "step": 14059, + "training_step_time": 0.10533833503723145 + }, + { + "epoch": 2.1453857421875e-05, + "grad_norm": 0.41174158453941345, + "learning_rate": 5.9260764226984476e-05, + "loss": 0.0208, + "step": 14060 + }, + { + "epoch": 2.1453857421875e-05, + "model_forward_time": 0.025342702865600586, + "step": 14060 + }, + { + "epoch": 2.1453857421875e-05, + "step": 14060, + "training_step_time": 0.1174924373626709 + }, + { + "epoch": 2.145538330078125e-05, + "model_forward_time": 0.024910449981689453, + "step": 14061 + }, + { + "epoch": 2.145538330078125e-05, + "step": 14061, + "training_step_time": 0.2035226821899414 + }, + { + "epoch": 2.14569091796875e-05, + "model_forward_time": 0.024322509765625, + "step": 14062 + }, + { + "epoch": 2.14569091796875e-05, + "step": 14062, + "training_step_time": 0.1265122890472412 + }, + { + "epoch": 2.145843505859375e-05, + "model_forward_time": 0.024437904357910156, + "step": 14063 + }, + { + "epoch": 2.145843505859375e-05, + "step": 14063, + "training_step_time": 0.10659003257751465 + }, + { + "epoch": 2.14599609375e-05, + "model_forward_time": 0.02566218376159668, + "step": 14064 + }, + { + "epoch": 2.14599609375e-05, + "step": 14064, + "training_step_time": 0.11966443061828613 + }, + { + "epoch": 2.146148681640625e-05, + "model_forward_time": 0.02475261688232422, + "step": 14065 + }, + { + "epoch": 2.146148681640625e-05, + "step": 14065, + "training_step_time": 0.1100001335144043 + }, + { + "epoch": 2.14630126953125e-05, + "model_forward_time": 0.025791645050048828, + "step": 14066 + }, + { + "epoch": 2.14630126953125e-05, + "step": 14066, + "training_step_time": 0.11473727226257324 + }, + { + "epoch": 2.146453857421875e-05, + "model_forward_time": 0.025249242782592773, + "step": 14067 + }, + { + "epoch": 2.146453857421875e-05, + "step": 14067, + "training_step_time": 0.13289499282836914 + }, + { + "epoch": 2.1466064453125e-05, + "model_forward_time": 0.025318145751953125, + "step": 14068 + }, + { + "epoch": 2.1466064453125e-05, + "step": 14068, + "training_step_time": 0.11708378791809082 + }, + { + "epoch": 2.146759033203125e-05, + "model_forward_time": 0.02529764175415039, + "step": 14069 + }, + { + "epoch": 2.146759033203125e-05, + "step": 14069, + "training_step_time": 0.10348391532897949 + }, + { + "epoch": 2.14691162109375e-05, + "grad_norm": 0.40277501940727234, + "learning_rate": 5.9206596563878357e-05, + "loss": 0.0169, + "step": 14070 + }, + { + "epoch": 2.14691162109375e-05, + "model_forward_time": 0.02511310577392578, + "step": 14070 + }, + { + "epoch": 2.14691162109375e-05, + "step": 14070, + "training_step_time": 0.10499048233032227 + }, + { + "epoch": 2.147064208984375e-05, + "model_forward_time": 0.025663375854492188, + "step": 14071 + }, + { + "epoch": 2.147064208984375e-05, + "step": 14071, + "training_step_time": 0.10557961463928223 + }, + { + "epoch": 2.147216796875e-05, + "model_forward_time": 0.025542259216308594, + "step": 14072 + }, + { + "epoch": 2.147216796875e-05, + "step": 14072, + "training_step_time": 0.10620927810668945 + }, + { + "epoch": 2.147369384765625e-05, + "model_forward_time": 0.025388002395629883, + "step": 14073 + }, + { + "epoch": 2.147369384765625e-05, + "step": 14073, + "training_step_time": 0.10884952545166016 + }, + { + "epoch": 2.14752197265625e-05, + "model_forward_time": 0.025684833526611328, + "step": 14074 + }, + { + "epoch": 2.14752197265625e-05, + "step": 14074, + "training_step_time": 0.11088728904724121 + }, + { + "epoch": 2.147674560546875e-05, + "model_forward_time": 0.025590181350708008, + "step": 14075 + }, + { + "epoch": 2.147674560546875e-05, + "step": 14075, + "training_step_time": 0.11618828773498535 + }, + { + "epoch": 2.1478271484375e-05, + "model_forward_time": 0.025785446166992188, + "step": 14076 + }, + { + "epoch": 2.1478271484375e-05, + "step": 14076, + "training_step_time": 0.11698675155639648 + }, + { + "epoch": 2.147979736328125e-05, + "model_forward_time": 0.025569438934326172, + "step": 14077 + }, + { + "epoch": 2.147979736328125e-05, + "step": 14077, + "training_step_time": 0.11405444145202637 + }, + { + "epoch": 2.14813232421875e-05, + "model_forward_time": 0.025281429290771484, + "step": 14078 + }, + { + "epoch": 2.14813232421875e-05, + "step": 14078, + "training_step_time": 0.11471676826477051 + }, + { + "epoch": 2.148284912109375e-05, + "model_forward_time": 0.025444746017456055, + "step": 14079 + }, + { + "epoch": 2.148284912109375e-05, + "step": 14079, + "training_step_time": 0.11733126640319824 + }, + { + "epoch": 2.1484375e-05, + "grad_norm": 0.17169028520584106, + "learning_rate": 5.915241771388931e-05, + "loss": 0.017, + "step": 14080 + }, + { + "epoch": 2.1484375e-05, + "model_forward_time": 0.025615692138671875, + "step": 14080 + }, + { + "epoch": 2.1484375e-05, + "step": 14080, + "training_step_time": 0.11424565315246582 + }, + { + "epoch": 2.148590087890625e-05, + "model_forward_time": 0.025841236114501953, + "step": 14081 + }, + { + "epoch": 2.148590087890625e-05, + "step": 14081, + "training_step_time": 0.11140823364257812 + }, + { + "epoch": 2.14874267578125e-05, + "model_forward_time": 0.027458667755126953, + "step": 14082 + }, + { + "epoch": 2.14874267578125e-05, + "step": 14082, + "training_step_time": 0.10958194732666016 + }, + { + "epoch": 2.148895263671875e-05, + "model_forward_time": 0.026074886322021484, + "step": 14083 + }, + { + "epoch": 2.148895263671875e-05, + "step": 14083, + "training_step_time": 0.11341619491577148 + }, + { + "epoch": 2.1490478515625e-05, + "model_forward_time": 0.025475502014160156, + "step": 14084 + }, + { + "epoch": 2.1490478515625e-05, + "step": 14084, + "training_step_time": 0.10708928108215332 + }, + { + "epoch": 2.149200439453125e-05, + "model_forward_time": 0.024705171585083008, + "step": 14085 + }, + { + "epoch": 2.149200439453125e-05, + "step": 14085, + "training_step_time": 0.10597777366638184 + }, + { + "epoch": 2.14935302734375e-05, + "model_forward_time": 0.025627851486206055, + "step": 14086 + }, + { + "epoch": 2.14935302734375e-05, + "step": 14086, + "training_step_time": 0.10751771926879883 + }, + { + "epoch": 2.149505615234375e-05, + "model_forward_time": 0.025578975677490234, + "step": 14087 + }, + { + "epoch": 2.149505615234375e-05, + "step": 14087, + "training_step_time": 0.1107940673828125 + }, + { + "epoch": 2.149658203125e-05, + "model_forward_time": 0.025463104248046875, + "step": 14088 + }, + { + "epoch": 2.149658203125e-05, + "step": 14088, + "training_step_time": 0.10952115058898926 + }, + { + "epoch": 2.149810791015625e-05, + "model_forward_time": 0.025266647338867188, + "step": 14089 + }, + { + "epoch": 2.149810791015625e-05, + "step": 14089, + "training_step_time": 0.10568642616271973 + }, + { + "epoch": 2.14996337890625e-05, + "grad_norm": 0.18737411499023438, + "learning_rate": 5.909822774284971e-05, + "loss": 0.0114, + "step": 14090 + }, + { + "epoch": 2.14996337890625e-05, + "model_forward_time": 0.02570343017578125, + "step": 14090 + }, + { + "epoch": 2.14996337890625e-05, + "step": 14090, + "training_step_time": 0.10887432098388672 + }, + { + "epoch": 2.150115966796875e-05, + "model_forward_time": 0.025948286056518555, + "step": 14091 + }, + { + "epoch": 2.150115966796875e-05, + "step": 14091, + "training_step_time": 0.18009281158447266 + }, + { + "epoch": 2.1502685546875e-05, + "model_forward_time": 0.024937868118286133, + "step": 14092 + }, + { + "epoch": 2.1502685546875e-05, + "step": 14092, + "training_step_time": 0.1372060775756836 + }, + { + "epoch": 2.150421142578125e-05, + "model_forward_time": 0.02493143081665039, + "step": 14093 + }, + { + "epoch": 2.150421142578125e-05, + "step": 14093, + "training_step_time": 0.10674643516540527 + }, + { + "epoch": 2.15057373046875e-05, + "model_forward_time": 0.02538323402404785, + "step": 14094 + }, + { + "epoch": 2.15057373046875e-05, + "step": 14094, + "training_step_time": 0.11157369613647461 + }, + { + "epoch": 2.150726318359375e-05, + "model_forward_time": 0.02501702308654785, + "step": 14095 + }, + { + "epoch": 2.150726318359375e-05, + "step": 14095, + "training_step_time": 0.1174626350402832 + }, + { + "epoch": 2.15087890625e-05, + "model_forward_time": 0.025490283966064453, + "step": 14096 + }, + { + "epoch": 2.15087890625e-05, + "step": 14096, + "training_step_time": 0.22123122215270996 + }, + { + "epoch": 2.151031494140625e-05, + "model_forward_time": 0.026398897171020508, + "step": 14097 + }, + { + "epoch": 2.151031494140625e-05, + "step": 14097, + "training_step_time": 0.1315312385559082 + }, + { + "epoch": 2.15118408203125e-05, + "model_forward_time": 0.02446150779724121, + "step": 14098 + }, + { + "epoch": 2.15118408203125e-05, + "step": 14098, + "training_step_time": 0.11582279205322266 + }, + { + "epoch": 2.151336669921875e-05, + "model_forward_time": 0.025528430938720703, + "step": 14099 + }, + { + "epoch": 2.151336669921875e-05, + "step": 14099, + "training_step_time": 0.11322712898254395 + }, + { + "epoch": 2.1514892578125e-05, + "grad_norm": 0.16362541913986206, + "learning_rate": 5.90440267166055e-05, + "loss": 0.0182, + "step": 14100 + }, + { + "epoch": 2.1514892578125e-05, + "model_forward_time": 0.025630950927734375, + "step": 14100 + }, + { + "epoch": 2.1514892578125e-05, + "step": 14100, + "training_step_time": 0.10351681709289551 + }, + { + "epoch": 2.151641845703125e-05, + "model_forward_time": 0.024866104125976562, + "step": 14101 + }, + { + "epoch": 2.151641845703125e-05, + "step": 14101, + "training_step_time": 0.11760997772216797 + }, + { + "epoch": 2.15179443359375e-05, + "model_forward_time": 0.024771928787231445, + "step": 14102 + }, + { + "epoch": 2.15179443359375e-05, + "step": 14102, + "training_step_time": 0.11686849594116211 + }, + { + "epoch": 2.151947021484375e-05, + "model_forward_time": 0.025556564331054688, + "step": 14103 + }, + { + "epoch": 2.151947021484375e-05, + "step": 14103, + "training_step_time": 0.11809396743774414 + }, + { + "epoch": 2.152099609375e-05, + "model_forward_time": 0.02827143669128418, + "step": 14104 + }, + { + "epoch": 2.152099609375e-05, + "step": 14104, + "training_step_time": 0.11651730537414551 + }, + { + "epoch": 2.152252197265625e-05, + "model_forward_time": 0.025686264038085938, + "step": 14105 + }, + { + "epoch": 2.152252197265625e-05, + "step": 14105, + "training_step_time": 0.11438918113708496 + }, + { + "epoch": 2.15240478515625e-05, + "model_forward_time": 0.02637457847595215, + "step": 14106 + }, + { + "epoch": 2.15240478515625e-05, + "step": 14106, + "training_step_time": 0.12194037437438965 + }, + { + "epoch": 2.152557373046875e-05, + "model_forward_time": 0.02580714225769043, + "step": 14107 + }, + { + "epoch": 2.152557373046875e-05, + "step": 14107, + "training_step_time": 0.11693167686462402 + }, + { + "epoch": 2.1527099609375e-05, + "model_forward_time": 0.0257415771484375, + "step": 14108 + }, + { + "epoch": 2.1527099609375e-05, + "step": 14108, + "training_step_time": 0.10375785827636719 + }, + { + "epoch": 2.152862548828125e-05, + "model_forward_time": 0.02541518211364746, + "step": 14109 + }, + { + "epoch": 2.152862548828125e-05, + "step": 14109, + "training_step_time": 0.13335847854614258 + }, + { + "epoch": 2.15301513671875e-05, + "grad_norm": 0.3614904582500458, + "learning_rate": 5.8989814701016035e-05, + "loss": 0.0166, + "step": 14110 + }, + { + "epoch": 2.15301513671875e-05, + "model_forward_time": 0.025261640548706055, + "step": 14110 + }, + { + "epoch": 2.15301513671875e-05, + "step": 14110, + "training_step_time": 0.10974979400634766 + }, + { + "epoch": 2.153167724609375e-05, + "model_forward_time": 0.0260317325592041, + "step": 14111 + }, + { + "epoch": 2.153167724609375e-05, + "step": 14111, + "training_step_time": 0.1247396469116211 + }, + { + "epoch": 2.1533203125e-05, + "model_forward_time": 0.025685787200927734, + "step": 14112 + }, + { + "epoch": 2.1533203125e-05, + "step": 14112, + "training_step_time": 0.1055452823638916 + }, + { + "epoch": 2.153472900390625e-05, + "model_forward_time": 0.025370121002197266, + "step": 14113 + }, + { + "epoch": 2.153472900390625e-05, + "step": 14113, + "training_step_time": 0.1899101734161377 + }, + { + "epoch": 2.15362548828125e-05, + "model_forward_time": 0.025854825973510742, + "step": 14114 + }, + { + "epoch": 2.15362548828125e-05, + "step": 14114, + "training_step_time": 0.15713882446289062 + }, + { + "epoch": 2.153778076171875e-05, + "model_forward_time": 0.024793624877929688, + "step": 14115 + }, + { + "epoch": 2.153778076171875e-05, + "step": 14115, + "training_step_time": 0.12921881675720215 + }, + { + "epoch": 2.1539306640625e-05, + "model_forward_time": 0.024428606033325195, + "step": 14116 + }, + { + "epoch": 2.1539306640625e-05, + "step": 14116, + "training_step_time": 0.14049077033996582 + }, + { + "epoch": 2.154083251953125e-05, + "model_forward_time": 0.029732704162597656, + "step": 14117 + }, + { + "epoch": 2.154083251953125e-05, + "step": 14117, + "training_step_time": 0.13073372840881348 + }, + { + "epoch": 2.15423583984375e-05, + "model_forward_time": 0.024417638778686523, + "step": 14118 + }, + { + "epoch": 2.15423583984375e-05, + "step": 14118, + "training_step_time": 0.12877964973449707 + }, + { + "epoch": 2.154388427734375e-05, + "model_forward_time": 0.025296926498413086, + "step": 14119 + }, + { + "epoch": 2.154388427734375e-05, + "step": 14119, + "training_step_time": 0.13159656524658203 + }, + { + "epoch": 2.154541015625e-05, + "grad_norm": 0.17757172882556915, + "learning_rate": 5.8935591761954025e-05, + "loss": 0.0124, + "step": 14120 + }, + { + "epoch": 2.154541015625e-05, + "model_forward_time": 0.02501225471496582, + "step": 14120 + }, + { + "epoch": 2.154541015625e-05, + "step": 14120, + "training_step_time": 0.12037229537963867 + }, + { + "epoch": 2.154693603515625e-05, + "model_forward_time": 0.024975061416625977, + "step": 14121 + }, + { + "epoch": 2.154693603515625e-05, + "step": 14121, + "training_step_time": 0.12013745307922363 + }, + { + "epoch": 2.15484619140625e-05, + "model_forward_time": 0.02554464340209961, + "step": 14122 + }, + { + "epoch": 2.15484619140625e-05, + "step": 14122, + "training_step_time": 0.11409902572631836 + }, + { + "epoch": 2.154998779296875e-05, + "model_forward_time": 0.025319814682006836, + "step": 14123 + }, + { + "epoch": 2.154998779296875e-05, + "step": 14123, + "training_step_time": 0.11845993995666504 + }, + { + "epoch": 2.1551513671875e-05, + "model_forward_time": 0.024976730346679688, + "step": 14124 + }, + { + "epoch": 2.1551513671875e-05, + "step": 14124, + "training_step_time": 0.11191344261169434 + }, + { + "epoch": 2.155303955078125e-05, + "model_forward_time": 0.024422645568847656, + "step": 14125 + }, + { + "epoch": 2.155303955078125e-05, + "step": 14125, + "training_step_time": 0.10864067077636719 + }, + { + "epoch": 2.15545654296875e-05, + "model_forward_time": 0.02524280548095703, + "step": 14126 + }, + { + "epoch": 2.15545654296875e-05, + "step": 14126, + "training_step_time": 0.11043024063110352 + }, + { + "epoch": 2.155609130859375e-05, + "model_forward_time": 0.025084972381591797, + "step": 14127 + }, + { + "epoch": 2.155609130859375e-05, + "step": 14127, + "training_step_time": 0.10823798179626465 + }, + { + "epoch": 2.15576171875e-05, + "model_forward_time": 0.02534651756286621, + "step": 14128 + }, + { + "epoch": 2.15576171875e-05, + "step": 14128, + "training_step_time": 0.1072242259979248 + }, + { + "epoch": 2.155914306640625e-05, + "model_forward_time": 0.025495290756225586, + "step": 14129 + }, + { + "epoch": 2.155914306640625e-05, + "step": 14129, + "training_step_time": 0.10645627975463867 + }, + { + "epoch": 2.15606689453125e-05, + "grad_norm": 0.4064501225948334, + "learning_rate": 5.888135796530544e-05, + "loss": 0.0228, + "step": 14130 + }, + { + "epoch": 2.15606689453125e-05, + "model_forward_time": 0.024997711181640625, + "step": 14130 + }, + { + "epoch": 2.15606689453125e-05, + "step": 14130, + "training_step_time": 0.10503482818603516 + }, + { + "epoch": 2.156219482421875e-05, + "model_forward_time": 0.025748252868652344, + "step": 14131 + }, + { + "epoch": 2.156219482421875e-05, + "step": 14131, + "training_step_time": 0.10864973068237305 + }, + { + "epoch": 2.1563720703125e-05, + "model_forward_time": 0.025141000747680664, + "step": 14132 + }, + { + "epoch": 2.1563720703125e-05, + "step": 14132, + "training_step_time": 0.10582256317138672 + }, + { + "epoch": 2.156524658203125e-05, + "model_forward_time": 0.025005578994750977, + "step": 14133 + }, + { + "epoch": 2.156524658203125e-05, + "step": 14133, + "training_step_time": 0.10696983337402344 + }, + { + "epoch": 2.15667724609375e-05, + "model_forward_time": 0.025117874145507812, + "step": 14134 + }, + { + "epoch": 2.15667724609375e-05, + "step": 14134, + "training_step_time": 0.10648870468139648 + }, + { + "epoch": 2.156829833984375e-05, + "model_forward_time": 0.025725841522216797, + "step": 14135 + }, + { + "epoch": 2.156829833984375e-05, + "step": 14135, + "training_step_time": 0.10704541206359863 + }, + { + "epoch": 2.156982421875e-05, + "model_forward_time": 0.025290489196777344, + "step": 14136 + }, + { + "epoch": 2.156982421875e-05, + "step": 14136, + "training_step_time": 0.10841941833496094 + }, + { + "epoch": 2.157135009765625e-05, + "model_forward_time": 0.025128602981567383, + "step": 14137 + }, + { + "epoch": 2.157135009765625e-05, + "step": 14137, + "training_step_time": 0.10879731178283691 + }, + { + "epoch": 2.15728759765625e-05, + "model_forward_time": 0.0250704288482666, + "step": 14138 + }, + { + "epoch": 2.15728759765625e-05, + "step": 14138, + "training_step_time": 0.13630914688110352 + }, + { + "epoch": 2.157440185546875e-05, + "model_forward_time": 0.02517867088317871, + "step": 14139 + }, + { + "epoch": 2.157440185546875e-05, + "step": 14139, + "training_step_time": 0.10772919654846191 + }, + { + "epoch": 2.1575927734375e-05, + "grad_norm": 0.36874639987945557, + "learning_rate": 5.8827113376969465e-05, + "loss": 0.0189, + "step": 14140 + }, + { + "epoch": 2.1575927734375e-05, + "model_forward_time": 0.025316238403320312, + "step": 14140 + }, + { + "epoch": 2.1575927734375e-05, + "step": 14140, + "training_step_time": 0.12947535514831543 + }, + { + "epoch": 2.157745361328125e-05, + "model_forward_time": 0.025620698928833008, + "step": 14141 + }, + { + "epoch": 2.157745361328125e-05, + "step": 14141, + "training_step_time": 0.11558699607849121 + }, + { + "epoch": 2.15789794921875e-05, + "model_forward_time": 0.025434255599975586, + "step": 14142 + }, + { + "epoch": 2.15789794921875e-05, + "step": 14142, + "training_step_time": 0.18879175186157227 + }, + { + "epoch": 2.158050537109375e-05, + "model_forward_time": 0.024472951889038086, + "step": 14143 + }, + { + "epoch": 2.158050537109375e-05, + "step": 14143, + "training_step_time": 0.16038203239440918 + }, + { + "epoch": 2.158203125e-05, + "model_forward_time": 0.024168014526367188, + "step": 14144 + }, + { + "epoch": 2.158203125e-05, + "step": 14144, + "training_step_time": 0.12031984329223633 + }, + { + "epoch": 2.158355712890625e-05, + "model_forward_time": 0.02750706672668457, + "step": 14145 + }, + { + "epoch": 2.158355712890625e-05, + "step": 14145, + "training_step_time": 0.11200141906738281 + }, + { + "epoch": 2.15850830078125e-05, + "model_forward_time": 0.025278806686401367, + "step": 14146 + }, + { + "epoch": 2.15850830078125e-05, + "step": 14146, + "training_step_time": 0.14926481246948242 + }, + { + "epoch": 2.158660888671875e-05, + "model_forward_time": 0.02560567855834961, + "step": 14147 + }, + { + "epoch": 2.158660888671875e-05, + "step": 14147, + "training_step_time": 0.10370731353759766 + }, + { + "epoch": 2.1588134765625e-05, + "model_forward_time": 0.024811983108520508, + "step": 14148 + }, + { + "epoch": 2.1588134765625e-05, + "step": 14148, + "training_step_time": 0.14701366424560547 + }, + { + "epoch": 2.158966064453125e-05, + "model_forward_time": 0.025099754333496094, + "step": 14149 + }, + { + "epoch": 2.158966064453125e-05, + "step": 14149, + "training_step_time": 0.12157464027404785 + }, + { + "epoch": 2.15911865234375e-05, + "grad_norm": 0.2832779586315155, + "learning_rate": 5.8772858062858416e-05, + "loss": 0.0127, + "step": 14150 + }, + { + "epoch": 2.15911865234375e-05, + "model_forward_time": 0.025452375411987305, + "step": 14150 + }, + { + "epoch": 2.15911865234375e-05, + "step": 14150, + "training_step_time": 0.10580182075500488 + }, + { + "epoch": 2.159271240234375e-05, + "model_forward_time": 0.025459766387939453, + "step": 14151 + }, + { + "epoch": 2.159271240234375e-05, + "step": 14151, + "training_step_time": 0.11262893676757812 + }, + { + "epoch": 2.159423828125e-05, + "model_forward_time": 0.025344133377075195, + "step": 14152 + }, + { + "epoch": 2.159423828125e-05, + "step": 14152, + "training_step_time": 0.10612893104553223 + }, + { + "epoch": 2.159576416015625e-05, + "model_forward_time": 0.025527238845825195, + "step": 14153 + }, + { + "epoch": 2.159576416015625e-05, + "step": 14153, + "training_step_time": 0.11699652671813965 + }, + { + "epoch": 2.15972900390625e-05, + "model_forward_time": 0.02534961700439453, + "step": 14154 + }, + { + "epoch": 2.15972900390625e-05, + "step": 14154, + "training_step_time": 0.1320171356201172 + }, + { + "epoch": 2.159881591796875e-05, + "model_forward_time": 0.025344371795654297, + "step": 14155 + }, + { + "epoch": 2.159881591796875e-05, + "step": 14155, + "training_step_time": 0.16156935691833496 + }, + { + "epoch": 2.1600341796875e-05, + "model_forward_time": 0.024556636810302734, + "step": 14156 + }, + { + "epoch": 2.1600341796875e-05, + "step": 14156, + "training_step_time": 0.1331944465637207 + }, + { + "epoch": 2.160186767578125e-05, + "model_forward_time": 0.02551412582397461, + "step": 14157 + }, + { + "epoch": 2.160186767578125e-05, + "step": 14157, + "training_step_time": 0.12624788284301758 + }, + { + "epoch": 2.16033935546875e-05, + "model_forward_time": 0.025478839874267578, + "step": 14158 + }, + { + "epoch": 2.16033935546875e-05, + "step": 14158, + "training_step_time": 0.156083345413208 + }, + { + "epoch": 2.160491943359375e-05, + "model_forward_time": 0.024598360061645508, + "step": 14159 + }, + { + "epoch": 2.160491943359375e-05, + "step": 14159, + "training_step_time": 0.17516732215881348 + }, + { + "epoch": 2.16064453125e-05, + "grad_norm": 0.40027958154678345, + "learning_rate": 5.871859208889759e-05, + "loss": 0.0274, + "step": 14160 + }, + { + "epoch": 2.16064453125e-05, + "model_forward_time": 0.025252580642700195, + "step": 14160 + }, + { + "epoch": 2.16064453125e-05, + "step": 14160, + "training_step_time": 0.16366815567016602 + }, + { + "epoch": 2.160797119140625e-05, + "model_forward_time": 0.02539539337158203, + "step": 14161 + }, + { + "epoch": 2.160797119140625e-05, + "step": 14161, + "training_step_time": 0.10684776306152344 + }, + { + "epoch": 2.16094970703125e-05, + "model_forward_time": 0.025539875030517578, + "step": 14162 + }, + { + "epoch": 2.16094970703125e-05, + "step": 14162, + "training_step_time": 0.10494709014892578 + }, + { + "epoch": 2.161102294921875e-05, + "model_forward_time": 0.025307178497314453, + "step": 14163 + }, + { + "epoch": 2.161102294921875e-05, + "step": 14163, + "training_step_time": 0.10545825958251953 + }, + { + "epoch": 2.1612548828125e-05, + "model_forward_time": 0.025830745697021484, + "step": 14164 + }, + { + "epoch": 2.1612548828125e-05, + "step": 14164, + "training_step_time": 0.1083676815032959 + }, + { + "epoch": 2.161407470703125e-05, + "model_forward_time": 0.026053667068481445, + "step": 14165 + }, + { + "epoch": 2.161407470703125e-05, + "step": 14165, + "training_step_time": 0.10612344741821289 + }, + { + "epoch": 2.16156005859375e-05, + "model_forward_time": 0.02515697479248047, + "step": 14166 + }, + { + "epoch": 2.16156005859375e-05, + "step": 14166, + "training_step_time": 0.10560202598571777 + }, + { + "epoch": 2.161712646484375e-05, + "model_forward_time": 0.02541184425354004, + "step": 14167 + }, + { + "epoch": 2.161712646484375e-05, + "step": 14167, + "training_step_time": 0.10646700859069824 + }, + { + "epoch": 2.161865234375e-05, + "model_forward_time": 0.02543163299560547, + "step": 14168 + }, + { + "epoch": 2.161865234375e-05, + "step": 14168, + "training_step_time": 0.10625100135803223 + }, + { + "epoch": 2.162017822265625e-05, + "model_forward_time": 0.02502274513244629, + "step": 14169 + }, + { + "epoch": 2.162017822265625e-05, + "step": 14169, + "training_step_time": 0.10677647590637207 + }, + { + "epoch": 2.16217041015625e-05, + "grad_norm": 0.36665669083595276, + "learning_rate": 5.866431552102526e-05, + "loss": 0.0162, + "step": 14170 + }, + { + "epoch": 2.16217041015625e-05, + "model_forward_time": 0.025372982025146484, + "step": 14170 + }, + { + "epoch": 2.16217041015625e-05, + "step": 14170, + "training_step_time": 0.10375285148620605 + }, + { + "epoch": 2.162322998046875e-05, + "model_forward_time": 0.025723695755004883, + "step": 14171 + }, + { + "epoch": 2.162322998046875e-05, + "step": 14171, + "training_step_time": 0.10509753227233887 + }, + { + "epoch": 2.1624755859375e-05, + "model_forward_time": 0.025551795959472656, + "step": 14172 + }, + { + "epoch": 2.1624755859375e-05, + "step": 14172, + "training_step_time": 0.10844302177429199 + }, + { + "epoch": 2.162628173828125e-05, + "model_forward_time": 0.02575540542602539, + "step": 14173 + }, + { + "epoch": 2.162628173828125e-05, + "step": 14173, + "training_step_time": 0.10551309585571289 + }, + { + "epoch": 2.16278076171875e-05, + "model_forward_time": 0.02541351318359375, + "step": 14174 + }, + { + "epoch": 2.16278076171875e-05, + "step": 14174, + "training_step_time": 0.10884428024291992 + }, + { + "epoch": 2.162933349609375e-05, + "model_forward_time": 0.0254366397857666, + "step": 14175 + }, + { + "epoch": 2.162933349609375e-05, + "step": 14175, + "training_step_time": 0.10845756530761719 + }, + { + "epoch": 2.1630859375e-05, + "model_forward_time": 0.025246381759643555, + "step": 14176 + }, + { + "epoch": 2.1630859375e-05, + "step": 14176, + "training_step_time": 0.10660719871520996 + }, + { + "epoch": 2.163238525390625e-05, + "model_forward_time": 0.025371074676513672, + "step": 14177 + }, + { + "epoch": 2.163238525390625e-05, + "step": 14177, + "training_step_time": 0.1057732105255127 + }, + { + "epoch": 2.16339111328125e-05, + "model_forward_time": 0.02570319175720215, + "step": 14178 + }, + { + "epoch": 2.16339111328125e-05, + "step": 14178, + "training_step_time": 0.10518550872802734 + }, + { + "epoch": 2.163543701171875e-05, + "model_forward_time": 0.025697946548461914, + "step": 14179 + }, + { + "epoch": 2.163543701171875e-05, + "step": 14179, + "training_step_time": 0.11314558982849121 + }, + { + "epoch": 2.1636962890625e-05, + "grad_norm": 0.27468445897102356, + "learning_rate": 5.861002842519259e-05, + "loss": 0.0144, + "step": 14180 + }, + { + "epoch": 2.1636962890625e-05, + "model_forward_time": 0.02560281753540039, + "step": 14180 + }, + { + "epoch": 2.1636962890625e-05, + "step": 14180, + "training_step_time": 0.12124824523925781 + }, + { + "epoch": 2.163848876953125e-05, + "model_forward_time": 0.0256345272064209, + "step": 14181 + }, + { + "epoch": 2.163848876953125e-05, + "step": 14181, + "training_step_time": 0.12052679061889648 + }, + { + "epoch": 2.16400146484375e-05, + "model_forward_time": 0.025391340255737305, + "step": 14182 + }, + { + "epoch": 2.16400146484375e-05, + "step": 14182, + "training_step_time": 0.11876845359802246 + }, + { + "epoch": 2.164154052734375e-05, + "model_forward_time": 0.02524399757385254, + "step": 14183 + }, + { + "epoch": 2.164154052734375e-05, + "step": 14183, + "training_step_time": 0.13223910331726074 + }, + { + "epoch": 2.164306640625e-05, + "model_forward_time": 0.025165319442749023, + "step": 14184 + }, + { + "epoch": 2.164306640625e-05, + "step": 14184, + "training_step_time": 0.13660001754760742 + }, + { + "epoch": 2.164459228515625e-05, + "model_forward_time": 0.0256807804107666, + "step": 14185 + }, + { + "epoch": 2.164459228515625e-05, + "step": 14185, + "training_step_time": 0.11591839790344238 + }, + { + "epoch": 2.16461181640625e-05, + "model_forward_time": 0.026081085205078125, + "step": 14186 + }, + { + "epoch": 2.16461181640625e-05, + "step": 14186, + "training_step_time": 0.11262989044189453 + }, + { + "epoch": 2.164764404296875e-05, + "model_forward_time": 0.026117801666259766, + "step": 14187 + }, + { + "epoch": 2.164764404296875e-05, + "step": 14187, + "training_step_time": 0.1134040355682373 + }, + { + "epoch": 2.1649169921875e-05, + "model_forward_time": 0.025333642959594727, + "step": 14188 + }, + { + "epoch": 2.1649169921875e-05, + "step": 14188, + "training_step_time": 0.10905623435974121 + }, + { + "epoch": 2.165069580078125e-05, + "model_forward_time": 0.0254361629486084, + "step": 14189 + }, + { + "epoch": 2.165069580078125e-05, + "step": 14189, + "training_step_time": 0.22716045379638672 + }, + { + "epoch": 2.16522216796875e-05, + "grad_norm": 0.6543409824371338, + "learning_rate": 5.85557308673635e-05, + "loss": 0.0187, + "step": 14190 + }, + { + "epoch": 2.16522216796875e-05, + "model_forward_time": 0.024791955947875977, + "step": 14190 + }, + { + "epoch": 2.16522216796875e-05, + "step": 14190, + "training_step_time": 0.12995529174804688 + }, + { + "epoch": 2.165374755859375e-05, + "model_forward_time": 0.024709463119506836, + "step": 14191 + }, + { + "epoch": 2.165374755859375e-05, + "step": 14191, + "training_step_time": 0.12517738342285156 + }, + { + "epoch": 2.16552734375e-05, + "model_forward_time": 0.02505636215209961, + "step": 14192 + }, + { + "epoch": 2.16552734375e-05, + "step": 14192, + "training_step_time": 0.1282491683959961 + }, + { + "epoch": 2.165679931640625e-05, + "model_forward_time": 0.025296926498413086, + "step": 14193 + }, + { + "epoch": 2.165679931640625e-05, + "step": 14193, + "training_step_time": 0.11142683029174805 + }, + { + "epoch": 2.16583251953125e-05, + "model_forward_time": 0.026098251342773438, + "step": 14194 + }, + { + "epoch": 2.16583251953125e-05, + "step": 14194, + "training_step_time": 0.1708202362060547 + }, + { + "epoch": 2.165985107421875e-05, + "model_forward_time": 0.027638673782348633, + "step": 14195 + }, + { + "epoch": 2.165985107421875e-05, + "step": 14195, + "training_step_time": 0.17023277282714844 + }, + { + "epoch": 2.1661376953125e-05, + "model_forward_time": 0.024567842483520508, + "step": 14196 + }, + { + "epoch": 2.1661376953125e-05, + "step": 14196, + "training_step_time": 0.10980224609375 + }, + { + "epoch": 2.166290283203125e-05, + "model_forward_time": 0.025326967239379883, + "step": 14197 + }, + { + "epoch": 2.166290283203125e-05, + "step": 14197, + "training_step_time": 0.11282753944396973 + }, + { + "epoch": 2.16644287109375e-05, + "model_forward_time": 0.02573561668395996, + "step": 14198 + }, + { + "epoch": 2.16644287109375e-05, + "step": 14198, + "training_step_time": 0.11056089401245117 + }, + { + "epoch": 2.166595458984375e-05, + "model_forward_time": 0.025129079818725586, + "step": 14199 + }, + { + "epoch": 2.166595458984375e-05, + "step": 14199, + "training_step_time": 0.1680583953857422 + }, + { + "epoch": 2.166748046875e-05, + "grad_norm": 0.3905617892742157, + "learning_rate": 5.850142291351466e-05, + "loss": 0.0181, + "step": 14200 + }, + { + "epoch": 2.166748046875e-05, + "model_forward_time": 0.026114225387573242, + "step": 14200 + }, + { + "epoch": 2.166748046875e-05, + "step": 14200, + "training_step_time": 0.18952727317810059 + }, + { + "epoch": 2.166900634765625e-05, + "model_forward_time": 0.02424931526184082, + "step": 14201 + }, + { + "epoch": 2.166900634765625e-05, + "step": 14201, + "training_step_time": 0.19691848754882812 + }, + { + "epoch": 2.16705322265625e-05, + "model_forward_time": 0.023989200592041016, + "step": 14202 + }, + { + "epoch": 2.16705322265625e-05, + "step": 14202, + "training_step_time": 0.17379045486450195 + }, + { + "epoch": 2.167205810546875e-05, + "model_forward_time": 0.024037837982177734, + "step": 14203 + }, + { + "epoch": 2.167205810546875e-05, + "step": 14203, + "training_step_time": 0.16141510009765625 + }, + { + "epoch": 2.1673583984375e-05, + "model_forward_time": 0.023444175720214844, + "step": 14204 + }, + { + "epoch": 2.1673583984375e-05, + "step": 14204, + "training_step_time": 0.15222668647766113 + }, + { + "epoch": 2.167510986328125e-05, + "model_forward_time": 0.024636030197143555, + "step": 14205 + }, + { + "epoch": 2.167510986328125e-05, + "step": 14205, + "training_step_time": 0.11674141883850098 + }, + { + "epoch": 2.16766357421875e-05, + "model_forward_time": 0.025009632110595703, + "step": 14206 + }, + { + "epoch": 2.16766357421875e-05, + "step": 14206, + "training_step_time": 0.1020820140838623 + }, + { + "epoch": 2.167816162109375e-05, + "model_forward_time": 0.0256040096282959, + "step": 14207 + }, + { + "epoch": 2.167816162109375e-05, + "step": 14207, + "training_step_time": 0.10316848754882812 + }, + { + "epoch": 2.16796875e-05, + "model_forward_time": 0.02793288230895996, + "step": 14208 + }, + { + "epoch": 2.16796875e-05, + "step": 14208, + "training_step_time": 0.10660958290100098 + }, + { + "epoch": 2.168121337890625e-05, + "model_forward_time": 0.02549910545349121, + "step": 14209 + }, + { + "epoch": 2.168121337890625e-05, + "step": 14209, + "training_step_time": 0.1085367202758789 + }, + { + "epoch": 2.16827392578125e-05, + "grad_norm": 0.2370947003364563, + "learning_rate": 5.8447104629635344e-05, + "loss": 0.0162, + "step": 14210 + }, + { + "epoch": 2.16827392578125e-05, + "model_forward_time": 0.024593353271484375, + "step": 14210 + }, + { + "epoch": 2.16827392578125e-05, + "step": 14210, + "training_step_time": 0.10286641120910645 + }, + { + "epoch": 2.168426513671875e-05, + "model_forward_time": 0.025765657424926758, + "step": 14211 + }, + { + "epoch": 2.168426513671875e-05, + "step": 14211, + "training_step_time": 0.10534024238586426 + }, + { + "epoch": 2.1685791015625e-05, + "model_forward_time": 0.024977445602416992, + "step": 14212 + }, + { + "epoch": 2.1685791015625e-05, + "step": 14212, + "training_step_time": 0.1051020622253418 + }, + { + "epoch": 2.168731689453125e-05, + "model_forward_time": 0.02566838264465332, + "step": 14213 + }, + { + "epoch": 2.168731689453125e-05, + "step": 14213, + "training_step_time": 0.11739373207092285 + }, + { + "epoch": 2.16888427734375e-05, + "model_forward_time": 0.024464845657348633, + "step": 14214 + }, + { + "epoch": 2.16888427734375e-05, + "step": 14214, + "training_step_time": 0.1219627857208252 + }, + { + "epoch": 2.169036865234375e-05, + "model_forward_time": 0.024508953094482422, + "step": 14215 + }, + { + "epoch": 2.169036865234375e-05, + "step": 14215, + "training_step_time": 0.1144554615020752 + }, + { + "epoch": 2.169189453125e-05, + "model_forward_time": 0.025665760040283203, + "step": 14216 + }, + { + "epoch": 2.169189453125e-05, + "step": 14216, + "training_step_time": 0.1211385726928711 + }, + { + "epoch": 2.169342041015625e-05, + "model_forward_time": 0.023913145065307617, + "step": 14217 + }, + { + "epoch": 2.169342041015625e-05, + "step": 14217, + "training_step_time": 0.11816740036010742 + }, + { + "epoch": 2.16949462890625e-05, + "model_forward_time": 0.024271488189697266, + "step": 14218 + }, + { + "epoch": 2.16949462890625e-05, + "step": 14218, + "training_step_time": 0.11749529838562012 + }, + { + "epoch": 2.169647216796875e-05, + "model_forward_time": 0.024726390838623047, + "step": 14219 + }, + { + "epoch": 2.169647216796875e-05, + "step": 14219, + "training_step_time": 0.11262989044189453 + }, + { + "epoch": 2.1697998046875e-05, + "grad_norm": 0.3323439061641693, + "learning_rate": 5.8392776081727385e-05, + "loss": 0.0253, + "step": 14220 + }, + { + "epoch": 2.1697998046875e-05, + "model_forward_time": 0.02440810203552246, + "step": 14220 + }, + { + "epoch": 2.1697998046875e-05, + "step": 14220, + "training_step_time": 0.1128697395324707 + }, + { + "epoch": 2.169952392578125e-05, + "model_forward_time": 0.02443099021911621, + "step": 14221 + }, + { + "epoch": 2.169952392578125e-05, + "step": 14221, + "training_step_time": 0.11056923866271973 + }, + { + "epoch": 2.17010498046875e-05, + "model_forward_time": 0.024686813354492188, + "step": 14222 + }, + { + "epoch": 2.17010498046875e-05, + "step": 14222, + "training_step_time": 0.1112356185913086 + }, + { + "epoch": 2.170257568359375e-05, + "model_forward_time": 0.025220632553100586, + "step": 14223 + }, + { + "epoch": 2.170257568359375e-05, + "step": 14223, + "training_step_time": 0.10770916938781738 + }, + { + "epoch": 2.17041015625e-05, + "model_forward_time": 0.025386810302734375, + "step": 14224 + }, + { + "epoch": 2.17041015625e-05, + "step": 14224, + "training_step_time": 0.11013650894165039 + }, + { + "epoch": 2.170562744140625e-05, + "model_forward_time": 0.02533435821533203, + "step": 14225 + }, + { + "epoch": 2.170562744140625e-05, + "step": 14225, + "training_step_time": 0.10602068901062012 + }, + { + "epoch": 2.17071533203125e-05, + "model_forward_time": 0.025584936141967773, + "step": 14226 + }, + { + "epoch": 2.17071533203125e-05, + "step": 14226, + "training_step_time": 0.10624957084655762 + }, + { + "epoch": 2.170867919921875e-05, + "model_forward_time": 0.02587437629699707, + "step": 14227 + }, + { + "epoch": 2.170867919921875e-05, + "step": 14227, + "training_step_time": 0.10594892501831055 + }, + { + "epoch": 2.1710205078125e-05, + "model_forward_time": 0.025670766830444336, + "step": 14228 + }, + { + "epoch": 2.1710205078125e-05, + "step": 14228, + "training_step_time": 0.1812424659729004 + }, + { + "epoch": 2.171173095703125e-05, + "model_forward_time": 0.02523946762084961, + "step": 14229 + }, + { + "epoch": 2.171173095703125e-05, + "step": 14229, + "training_step_time": 0.14331388473510742 + }, + { + "epoch": 2.17132568359375e-05, + "grad_norm": 0.44740182161331177, + "learning_rate": 5.833843733580512e-05, + "loss": 0.0164, + "step": 14230 + }, + { + "epoch": 2.17132568359375e-05, + "model_forward_time": 0.024950742721557617, + "step": 14230 + }, + { + "epoch": 2.17132568359375e-05, + "step": 14230, + "training_step_time": 0.11131453514099121 + }, + { + "epoch": 2.171478271484375e-05, + "model_forward_time": 0.025397300720214844, + "step": 14231 + }, + { + "epoch": 2.171478271484375e-05, + "step": 14231, + "training_step_time": 0.10571742057800293 + }, + { + "epoch": 2.171630859375e-05, + "model_forward_time": 0.027855634689331055, + "step": 14232 + }, + { + "epoch": 2.171630859375e-05, + "step": 14232, + "training_step_time": 0.12242555618286133 + }, + { + "epoch": 2.171783447265625e-05, + "model_forward_time": 0.025373458862304688, + "step": 14233 + }, + { + "epoch": 2.171783447265625e-05, + "step": 14233, + "training_step_time": 0.1830766201019287 + }, + { + "epoch": 2.17193603515625e-05, + "model_forward_time": 0.024764299392700195, + "step": 14234 + }, + { + "epoch": 2.17193603515625e-05, + "step": 14234, + "training_step_time": 0.16950225830078125 + }, + { + "epoch": 2.172088623046875e-05, + "model_forward_time": 0.024750947952270508, + "step": 14235 + }, + { + "epoch": 2.172088623046875e-05, + "step": 14235, + "training_step_time": 0.12900304794311523 + }, + { + "epoch": 2.1722412109375e-05, + "model_forward_time": 0.024866819381713867, + "step": 14236 + }, + { + "epoch": 2.1722412109375e-05, + "step": 14236, + "training_step_time": 0.1181650161743164 + }, + { + "epoch": 2.172393798828125e-05, + "model_forward_time": 0.025434494018554688, + "step": 14237 + }, + { + "epoch": 2.172393798828125e-05, + "step": 14237, + "training_step_time": 0.10656118392944336 + }, + { + "epoch": 2.17254638671875e-05, + "model_forward_time": 0.0257108211517334, + "step": 14238 + }, + { + "epoch": 2.17254638671875e-05, + "step": 14238, + "training_step_time": 0.10633683204650879 + }, + { + "epoch": 2.172698974609375e-05, + "model_forward_time": 0.025119781494140625, + "step": 14239 + }, + { + "epoch": 2.172698974609375e-05, + "step": 14239, + "training_step_time": 0.10781002044677734 + }, + { + "epoch": 2.1728515625e-05, + "grad_norm": 0.30819186568260193, + "learning_rate": 5.828408845789522e-05, + "loss": 0.0165, + "step": 14240 + }, + { + "epoch": 2.1728515625e-05, + "model_forward_time": 0.025023698806762695, + "step": 14240 + }, + { + "epoch": 2.1728515625e-05, + "step": 14240, + "training_step_time": 0.10374188423156738 + }, + { + "epoch": 2.173004150390625e-05, + "model_forward_time": 0.02571702003479004, + "step": 14241 + }, + { + "epoch": 2.173004150390625e-05, + "step": 14241, + "training_step_time": 0.1978161334991455 + }, + { + "epoch": 2.17315673828125e-05, + "model_forward_time": 0.025000810623168945, + "step": 14242 + }, + { + "epoch": 2.17315673828125e-05, + "step": 14242, + "training_step_time": 0.13852572441101074 + }, + { + "epoch": 2.173309326171875e-05, + "model_forward_time": 0.025410890579223633, + "step": 14243 + }, + { + "epoch": 2.173309326171875e-05, + "step": 14243, + "training_step_time": 0.11875605583190918 + }, + { + "epoch": 2.1734619140625e-05, + "model_forward_time": 0.025330305099487305, + "step": 14244 + }, + { + "epoch": 2.1734619140625e-05, + "step": 14244, + "training_step_time": 0.12769365310668945 + }, + { + "epoch": 2.173614501953125e-05, + "model_forward_time": 0.02568650245666504, + "step": 14245 + }, + { + "epoch": 2.173614501953125e-05, + "step": 14245, + "training_step_time": 0.19179177284240723 + }, + { + "epoch": 2.17376708984375e-05, + "model_forward_time": 0.024847030639648438, + "step": 14246 + }, + { + "epoch": 2.17376708984375e-05, + "step": 14246, + "training_step_time": 0.12841367721557617 + }, + { + "epoch": 2.173919677734375e-05, + "model_forward_time": 0.024584531784057617, + "step": 14247 + }, + { + "epoch": 2.173919677734375e-05, + "step": 14247, + "training_step_time": 0.10552549362182617 + }, + { + "epoch": 2.174072265625e-05, + "model_forward_time": 0.026430606842041016, + "step": 14248 + }, + { + "epoch": 2.174072265625e-05, + "step": 14248, + "training_step_time": 0.11323213577270508 + }, + { + "epoch": 2.174224853515625e-05, + "model_forward_time": 0.025250911712646484, + "step": 14249 + }, + { + "epoch": 2.174224853515625e-05, + "step": 14249, + "training_step_time": 0.10681486129760742 + }, + { + "epoch": 2.17437744140625e-05, + "grad_norm": 0.35048770904541016, + "learning_rate": 5.8229729514036705e-05, + "loss": 0.0223, + "step": 14250 + }, + { + "epoch": 2.17437744140625e-05, + "model_forward_time": 0.025784969329833984, + "step": 14250 + }, + { + "epoch": 2.17437744140625e-05, + "step": 14250, + "training_step_time": 0.10779976844787598 + }, + { + "epoch": 2.174530029296875e-05, + "model_forward_time": 0.025392770767211914, + "step": 14251 + }, + { + "epoch": 2.174530029296875e-05, + "step": 14251, + "training_step_time": 0.11716365814208984 + }, + { + "epoch": 2.1746826171875e-05, + "model_forward_time": 0.02602982521057129, + "step": 14252 + }, + { + "epoch": 2.1746826171875e-05, + "step": 14252, + "training_step_time": 0.1142120361328125 + }, + { + "epoch": 2.174835205078125e-05, + "model_forward_time": 0.02585625648498535, + "step": 14253 + }, + { + "epoch": 2.174835205078125e-05, + "step": 14253, + "training_step_time": 0.10896039009094238 + }, + { + "epoch": 2.17498779296875e-05, + "model_forward_time": 0.0254361629486084, + "step": 14254 + }, + { + "epoch": 2.17498779296875e-05, + "step": 14254, + "training_step_time": 0.10609269142150879 + }, + { + "epoch": 2.175140380859375e-05, + "model_forward_time": 0.025432348251342773, + "step": 14255 + }, + { + "epoch": 2.175140380859375e-05, + "step": 14255, + "training_step_time": 0.10748791694641113 + }, + { + "epoch": 2.17529296875e-05, + "model_forward_time": 0.026241302490234375, + "step": 14256 + }, + { + "epoch": 2.17529296875e-05, + "step": 14256, + "training_step_time": 0.10540390014648438 + }, + { + "epoch": 2.175445556640625e-05, + "model_forward_time": 0.0257875919342041, + "step": 14257 + }, + { + "epoch": 2.175445556640625e-05, + "step": 14257, + "training_step_time": 0.10501241683959961 + }, + { + "epoch": 2.17559814453125e-05, + "model_forward_time": 0.025878190994262695, + "step": 14258 + }, + { + "epoch": 2.17559814453125e-05, + "step": 14258, + "training_step_time": 0.10561823844909668 + }, + { + "epoch": 2.175750732421875e-05, + "model_forward_time": 0.02545619010925293, + "step": 14259 + }, + { + "epoch": 2.175750732421875e-05, + "step": 14259, + "training_step_time": 0.10495257377624512 + }, + { + "epoch": 2.1759033203125e-05, + "grad_norm": 0.25206512212753296, + "learning_rate": 5.817536057028081e-05, + "loss": 0.0158, + "step": 14260 + }, + { + "epoch": 2.1759033203125e-05, + "model_forward_time": 0.025283336639404297, + "step": 14260 + }, + { + "epoch": 2.1759033203125e-05, + "step": 14260, + "training_step_time": 0.10444951057434082 + }, + { + "epoch": 2.176055908203125e-05, + "model_forward_time": 0.02515578269958496, + "step": 14261 + }, + { + "epoch": 2.176055908203125e-05, + "step": 14261, + "training_step_time": 0.10490679740905762 + }, + { + "epoch": 2.17620849609375e-05, + "model_forward_time": 0.02617931365966797, + "step": 14262 + }, + { + "epoch": 2.17620849609375e-05, + "step": 14262, + "training_step_time": 0.1102452278137207 + }, + { + "epoch": 2.176361083984375e-05, + "model_forward_time": 0.025544166564941406, + "step": 14263 + }, + { + "epoch": 2.176361083984375e-05, + "step": 14263, + "training_step_time": 0.10574054718017578 + }, + { + "epoch": 2.176513671875e-05, + "model_forward_time": 0.0252535343170166, + "step": 14264 + }, + { + "epoch": 2.176513671875e-05, + "step": 14264, + "training_step_time": 0.10870695114135742 + }, + { + "epoch": 2.176666259765625e-05, + "model_forward_time": 0.025237321853637695, + "step": 14265 + }, + { + "epoch": 2.176666259765625e-05, + "step": 14265, + "training_step_time": 0.10875511169433594 + }, + { + "epoch": 2.17681884765625e-05, + "model_forward_time": 0.0253908634185791, + "step": 14266 + }, + { + "epoch": 2.17681884765625e-05, + "step": 14266, + "training_step_time": 0.10854411125183105 + }, + { + "epoch": 2.176971435546875e-05, + "model_forward_time": 0.025395870208740234, + "step": 14267 + }, + { + "epoch": 2.176971435546875e-05, + "step": 14267, + "training_step_time": 0.10894036293029785 + }, + { + "epoch": 2.1771240234375e-05, + "model_forward_time": 0.025535106658935547, + "step": 14268 + }, + { + "epoch": 2.1771240234375e-05, + "step": 14268, + "training_step_time": 0.11008191108703613 + }, + { + "epoch": 2.177276611328125e-05, + "model_forward_time": 0.025491714477539062, + "step": 14269 + }, + { + "epoch": 2.177276611328125e-05, + "step": 14269, + "training_step_time": 0.10747694969177246 + }, + { + "epoch": 2.17742919921875e-05, + "grad_norm": 0.37802183628082275, + "learning_rate": 5.812098169269094e-05, + "loss": 0.0215, + "step": 14270 + }, + { + "epoch": 2.17742919921875e-05, + "model_forward_time": 0.0255584716796875, + "step": 14270 + }, + { + "epoch": 2.17742919921875e-05, + "step": 14270, + "training_step_time": 0.11397314071655273 + }, + { + "epoch": 2.177581787109375e-05, + "model_forward_time": 0.02548360824584961, + "step": 14271 + }, + { + "epoch": 2.177581787109375e-05, + "step": 14271, + "training_step_time": 0.10861039161682129 + }, + { + "epoch": 2.177734375e-05, + "model_forward_time": 0.02543950080871582, + "step": 14272 + }, + { + "epoch": 2.177734375e-05, + "step": 14272, + "training_step_time": 0.10734724998474121 + }, + { + "epoch": 2.177886962890625e-05, + "model_forward_time": 0.024877548217773438, + "step": 14273 + }, + { + "epoch": 2.177886962890625e-05, + "step": 14273, + "training_step_time": 0.10563921928405762 + }, + { + "epoch": 2.17803955078125e-05, + "model_forward_time": 0.02554464340209961, + "step": 14274 + }, + { + "epoch": 2.17803955078125e-05, + "step": 14274, + "training_step_time": 0.10756421089172363 + }, + { + "epoch": 2.178192138671875e-05, + "model_forward_time": 0.025127887725830078, + "step": 14275 + }, + { + "epoch": 2.178192138671875e-05, + "step": 14275, + "training_step_time": 0.1523747444152832 + }, + { + "epoch": 2.1783447265625e-05, + "model_forward_time": 0.02514362335205078, + "step": 14276 + }, + { + "epoch": 2.1783447265625e-05, + "step": 14276, + "training_step_time": 0.16162395477294922 + }, + { + "epoch": 2.178497314453125e-05, + "model_forward_time": 0.024582862854003906, + "step": 14277 + }, + { + "epoch": 2.178497314453125e-05, + "step": 14277, + "training_step_time": 0.24405694007873535 + }, + { + "epoch": 2.17864990234375e-05, + "model_forward_time": 0.024241209030151367, + "step": 14278 + }, + { + "epoch": 2.17864990234375e-05, + "step": 14278, + "training_step_time": 0.219435453414917 + }, + { + "epoch": 2.178802490234375e-05, + "model_forward_time": 0.024483680725097656, + "step": 14279 + }, + { + "epoch": 2.178802490234375e-05, + "step": 14279, + "training_step_time": 0.20451593399047852 + }, + { + "epoch": 2.178955078125e-05, + "grad_norm": 0.2612272799015045, + "learning_rate": 5.8066592947342555e-05, + "loss": 0.0153, + "step": 14280 + }, + { + "epoch": 2.178955078125e-05, + "model_forward_time": 0.024660348892211914, + "step": 14280 + }, + { + "epoch": 2.178955078125e-05, + "step": 14280, + "training_step_time": 0.2109088897705078 + }, + { + "epoch": 2.179107666015625e-05, + "model_forward_time": 0.024587154388427734, + "step": 14281 + }, + { + "epoch": 2.179107666015625e-05, + "step": 14281, + "training_step_time": 0.15859603881835938 + }, + { + "epoch": 2.17926025390625e-05, + "model_forward_time": 0.025776386260986328, + "step": 14282 + }, + { + "epoch": 2.17926025390625e-05, + "step": 14282, + "training_step_time": 0.11010575294494629 + }, + { + "epoch": 2.179412841796875e-05, + "model_forward_time": 0.024687528610229492, + "step": 14283 + }, + { + "epoch": 2.179412841796875e-05, + "step": 14283, + "training_step_time": 0.14109063148498535 + }, + { + "epoch": 2.1795654296875e-05, + "model_forward_time": 0.02494025230407715, + "step": 14284 + }, + { + "epoch": 2.1795654296875e-05, + "step": 14284, + "training_step_time": 0.1917719841003418 + }, + { + "epoch": 2.179718017578125e-05, + "model_forward_time": 0.02471637725830078, + "step": 14285 + }, + { + "epoch": 2.179718017578125e-05, + "step": 14285, + "training_step_time": 0.12595129013061523 + }, + { + "epoch": 2.17987060546875e-05, + "model_forward_time": 0.024541378021240234, + "step": 14286 + }, + { + "epoch": 2.17987060546875e-05, + "step": 14286, + "training_step_time": 0.10734724998474121 + }, + { + "epoch": 2.180023193359375e-05, + "model_forward_time": 0.025301456451416016, + "step": 14287 + }, + { + "epoch": 2.180023193359375e-05, + "step": 14287, + "training_step_time": 0.13266682624816895 + }, + { + "epoch": 2.18017578125e-05, + "model_forward_time": 0.025513172149658203, + "step": 14288 + }, + { + "epoch": 2.18017578125e-05, + "step": 14288, + "training_step_time": 0.12051582336425781 + }, + { + "epoch": 2.180328369140625e-05, + "model_forward_time": 0.025593280792236328, + "step": 14289 + }, + { + "epoch": 2.180328369140625e-05, + "step": 14289, + "training_step_time": 0.24174928665161133 + }, + { + "epoch": 2.18048095703125e-05, + "grad_norm": 0.1933142989873886, + "learning_rate": 5.8012194400323116e-05, + "loss": 0.0115, + "step": 14290 + }, + { + "epoch": 2.18048095703125e-05, + "model_forward_time": 0.026621103286743164, + "step": 14290 + }, + { + "epoch": 2.18048095703125e-05, + "step": 14290, + "training_step_time": 0.16365623474121094 + }, + { + "epoch": 2.180633544921875e-05, + "model_forward_time": 0.024871110916137695, + "step": 14291 + }, + { + "epoch": 2.180633544921875e-05, + "step": 14291, + "training_step_time": 0.17380857467651367 + }, + { + "epoch": 2.1807861328125e-05, + "model_forward_time": 0.024326801300048828, + "step": 14292 + }, + { + "epoch": 2.1807861328125e-05, + "step": 14292, + "training_step_time": 0.12635183334350586 + }, + { + "epoch": 2.180938720703125e-05, + "model_forward_time": 0.02431511878967285, + "step": 14293 + }, + { + "epoch": 2.180938720703125e-05, + "step": 14293, + "training_step_time": 0.20455551147460938 + }, + { + "epoch": 2.18109130859375e-05, + "model_forward_time": 0.02473306655883789, + "step": 14294 + }, + { + "epoch": 2.18109130859375e-05, + "step": 14294, + "training_step_time": 0.11234903335571289 + }, + { + "epoch": 2.181243896484375e-05, + "model_forward_time": 0.024663448333740234, + "step": 14295 + }, + { + "epoch": 2.181243896484375e-05, + "step": 14295, + "training_step_time": 0.10554838180541992 + }, + { + "epoch": 2.181396484375e-05, + "model_forward_time": 0.02562236785888672, + "step": 14296 + }, + { + "epoch": 2.181396484375e-05, + "step": 14296, + "training_step_time": 0.11009764671325684 + }, + { + "epoch": 2.181549072265625e-05, + "model_forward_time": 0.025576114654541016, + "step": 14297 + }, + { + "epoch": 2.181549072265625e-05, + "step": 14297, + "training_step_time": 0.1046440601348877 + }, + { + "epoch": 2.18170166015625e-05, + "model_forward_time": 0.025992631912231445, + "step": 14298 + }, + { + "epoch": 2.18170166015625e-05, + "step": 14298, + "training_step_time": 0.1050271987915039 + }, + { + "epoch": 2.181854248046875e-05, + "model_forward_time": 0.025273799896240234, + "step": 14299 + }, + { + "epoch": 2.181854248046875e-05, + "step": 14299, + "training_step_time": 0.10572218894958496 + }, + { + "epoch": 2.1820068359375e-05, + "grad_norm": 0.2315218150615692, + "learning_rate": 5.795778611773197e-05, + "loss": 0.0184, + "step": 14300 + }, + { + "epoch": 2.1820068359375e-05, + "model_forward_time": 0.02531576156616211, + "step": 14300 + }, + { + "epoch": 2.1820068359375e-05, + "step": 14300, + "training_step_time": 0.10671496391296387 + }, + { + "epoch": 2.182159423828125e-05, + "model_forward_time": 0.02517557144165039, + "step": 14301 + }, + { + "epoch": 2.182159423828125e-05, + "step": 14301, + "training_step_time": 0.10485076904296875 + }, + { + "epoch": 2.18231201171875e-05, + "model_forward_time": 0.025173425674438477, + "step": 14302 + }, + { + "epoch": 2.18231201171875e-05, + "step": 14302, + "training_step_time": 0.10883092880249023 + }, + { + "epoch": 2.182464599609375e-05, + "model_forward_time": 0.025431394577026367, + "step": 14303 + }, + { + "epoch": 2.182464599609375e-05, + "step": 14303, + "training_step_time": 0.10502123832702637 + }, + { + "epoch": 2.1826171875e-05, + "model_forward_time": 0.025810718536376953, + "step": 14304 + }, + { + "epoch": 2.1826171875e-05, + "step": 14304, + "training_step_time": 0.10597395896911621 + }, + { + "epoch": 2.182769775390625e-05, + "model_forward_time": 0.0251920223236084, + "step": 14305 + }, + { + "epoch": 2.182769775390625e-05, + "step": 14305, + "training_step_time": 0.10492849349975586 + }, + { + "epoch": 2.18292236328125e-05, + "model_forward_time": 0.025177717208862305, + "step": 14306 + }, + { + "epoch": 2.18292236328125e-05, + "step": 14306, + "training_step_time": 0.10402321815490723 + }, + { + "epoch": 2.183074951171875e-05, + "model_forward_time": 0.02519512176513672, + "step": 14307 + }, + { + "epoch": 2.183074951171875e-05, + "step": 14307, + "training_step_time": 0.10391068458557129 + }, + { + "epoch": 2.1832275390625e-05, + "model_forward_time": 0.025062084197998047, + "step": 14308 + }, + { + "epoch": 2.1832275390625e-05, + "step": 14308, + "training_step_time": 0.1046140193939209 + }, + { + "epoch": 2.183380126953125e-05, + "model_forward_time": 0.024808645248413086, + "step": 14309 + }, + { + "epoch": 2.183380126953125e-05, + "step": 14309, + "training_step_time": 0.10811448097229004 + }, + { + "epoch": 2.18353271484375e-05, + "grad_norm": 0.40620726346969604, + "learning_rate": 5.7903368165680327e-05, + "loss": 0.0151, + "step": 14310 + }, + { + "epoch": 2.18353271484375e-05, + "model_forward_time": 0.025340795516967773, + "step": 14310 + }, + { + "epoch": 2.18353271484375e-05, + "step": 14310, + "training_step_time": 0.11503791809082031 + }, + { + "epoch": 2.183685302734375e-05, + "model_forward_time": 0.02547907829284668, + "step": 14311 + }, + { + "epoch": 2.183685302734375e-05, + "step": 14311, + "training_step_time": 0.11793303489685059 + }, + { + "epoch": 2.183837890625e-05, + "model_forward_time": 0.026047229766845703, + "step": 14312 + }, + { + "epoch": 2.183837890625e-05, + "step": 14312, + "training_step_time": 0.11669039726257324 + }, + { + "epoch": 2.183990478515625e-05, + "model_forward_time": 0.02695488929748535, + "step": 14313 + }, + { + "epoch": 2.183990478515625e-05, + "step": 14313, + "training_step_time": 0.11562633514404297 + }, + { + "epoch": 2.18414306640625e-05, + "model_forward_time": 0.025776386260986328, + "step": 14314 + }, + { + "epoch": 2.18414306640625e-05, + "step": 14314, + "training_step_time": 0.11435222625732422 + }, + { + "epoch": 2.184295654296875e-05, + "model_forward_time": 0.025081396102905273, + "step": 14315 + }, + { + "epoch": 2.184295654296875e-05, + "step": 14315, + "training_step_time": 0.11242008209228516 + }, + { + "epoch": 2.1844482421875e-05, + "model_forward_time": 0.025320053100585938, + "step": 14316 + }, + { + "epoch": 2.1844482421875e-05, + "step": 14316, + "training_step_time": 0.16251707077026367 + }, + { + "epoch": 2.184600830078125e-05, + "model_forward_time": 0.02516627311706543, + "step": 14317 + }, + { + "epoch": 2.184600830078125e-05, + "step": 14317, + "training_step_time": 0.14265227317810059 + }, + { + "epoch": 2.18475341796875e-05, + "model_forward_time": 0.02472686767578125, + "step": 14318 + }, + { + "epoch": 2.18475341796875e-05, + "step": 14318, + "training_step_time": 0.11267542839050293 + }, + { + "epoch": 2.184906005859375e-05, + "model_forward_time": 0.0250852108001709, + "step": 14319 + }, + { + "epoch": 2.184906005859375e-05, + "step": 14319, + "training_step_time": 0.11116647720336914 + }, + { + "epoch": 2.18505859375e-05, + "grad_norm": 0.3635031282901764, + "learning_rate": 5.7848940610291125e-05, + "loss": 0.0301, + "step": 14320 + }, + { + "epoch": 2.18505859375e-05, + "model_forward_time": 0.025061368942260742, + "step": 14320 + }, + { + "epoch": 2.18505859375e-05, + "step": 14320, + "training_step_time": 0.10956621170043945 + }, + { + "epoch": 2.185211181640625e-05, + "model_forward_time": 0.02559638023376465, + "step": 14321 + }, + { + "epoch": 2.185211181640625e-05, + "step": 14321, + "training_step_time": 0.18656277656555176 + }, + { + "epoch": 2.18536376953125e-05, + "model_forward_time": 0.024847030639648438, + "step": 14322 + }, + { + "epoch": 2.18536376953125e-05, + "step": 14322, + "training_step_time": 0.11123037338256836 + }, + { + "epoch": 2.185516357421875e-05, + "model_forward_time": 0.02480316162109375, + "step": 14323 + }, + { + "epoch": 2.185516357421875e-05, + "step": 14323, + "training_step_time": 0.11948442459106445 + }, + { + "epoch": 2.1856689453125e-05, + "model_forward_time": 0.02577352523803711, + "step": 14324 + }, + { + "epoch": 2.1856689453125e-05, + "step": 14324, + "training_step_time": 0.12388753890991211 + }, + { + "epoch": 2.185821533203125e-05, + "model_forward_time": 0.025455713272094727, + "step": 14325 + }, + { + "epoch": 2.185821533203125e-05, + "step": 14325, + "training_step_time": 0.11904740333557129 + }, + { + "epoch": 2.18597412109375e-05, + "model_forward_time": 0.025624990463256836, + "step": 14326 + }, + { + "epoch": 2.18597412109375e-05, + "step": 14326, + "training_step_time": 0.12269425392150879 + }, + { + "epoch": 2.186126708984375e-05, + "model_forward_time": 0.025709152221679688, + "step": 14327 + }, + { + "epoch": 2.186126708984375e-05, + "step": 14327, + "training_step_time": 0.10557889938354492 + }, + { + "epoch": 2.186279296875e-05, + "model_forward_time": 0.02441692352294922, + "step": 14328 + }, + { + "epoch": 2.186279296875e-05, + "step": 14328, + "training_step_time": 0.10581493377685547 + }, + { + "epoch": 2.186431884765625e-05, + "model_forward_time": 0.025171756744384766, + "step": 14329 + }, + { + "epoch": 2.186431884765625e-05, + "step": 14329, + "training_step_time": 0.17633891105651855 + }, + { + "epoch": 2.18658447265625e-05, + "grad_norm": 0.28926295042037964, + "learning_rate": 5.7794503517698974e-05, + "loss": 0.0138, + "step": 14330 + }, + { + "epoch": 2.18658447265625e-05, + "model_forward_time": 0.024778127670288086, + "step": 14330 + }, + { + "epoch": 2.18658447265625e-05, + "step": 14330, + "training_step_time": 0.1089165210723877 + }, + { + "epoch": 2.186737060546875e-05, + "model_forward_time": 0.0248720645904541, + "step": 14331 + }, + { + "epoch": 2.186737060546875e-05, + "step": 14331, + "training_step_time": 0.11307716369628906 + }, + { + "epoch": 2.1868896484375e-05, + "model_forward_time": 0.025746822357177734, + "step": 14332 + }, + { + "epoch": 2.1868896484375e-05, + "step": 14332, + "training_step_time": 0.10546302795410156 + }, + { + "epoch": 2.187042236328125e-05, + "model_forward_time": 0.025802135467529297, + "step": 14333 + }, + { + "epoch": 2.187042236328125e-05, + "step": 14333, + "training_step_time": 0.11277556419372559 + }, + { + "epoch": 2.18719482421875e-05, + "model_forward_time": 0.025612831115722656, + "step": 14334 + }, + { + "epoch": 2.18719482421875e-05, + "step": 14334, + "training_step_time": 0.12517523765563965 + }, + { + "epoch": 2.187347412109375e-05, + "model_forward_time": 0.0253753662109375, + "step": 14335 + }, + { + "epoch": 2.187347412109375e-05, + "step": 14335, + "training_step_time": 0.1884758472442627 + }, + { + "epoch": 2.1875e-05, + "model_forward_time": 0.024528980255126953, + "step": 14336 + }, + { + "epoch": 2.1875e-05, + "step": 14336, + "training_step_time": 0.12221574783325195 + }, + { + "epoch": 2.187652587890625e-05, + "model_forward_time": 0.024448394775390625, + "step": 14337 + }, + { + "epoch": 2.187652587890625e-05, + "step": 14337, + "training_step_time": 0.13874363899230957 + }, + { + "epoch": 2.18780517578125e-05, + "model_forward_time": 0.024963855743408203, + "step": 14338 + }, + { + "epoch": 2.18780517578125e-05, + "step": 14338, + "training_step_time": 0.15686941146850586 + }, + { + "epoch": 2.187957763671875e-05, + "model_forward_time": 0.02448725700378418, + "step": 14339 + }, + { + "epoch": 2.187957763671875e-05, + "step": 14339, + "training_step_time": 0.11893534660339355 + }, + { + "epoch": 2.1881103515625e-05, + "grad_norm": 0.23410135507583618, + "learning_rate": 5.7740056954050084e-05, + "loss": 0.0163, + "step": 14340 + }, + { + "epoch": 2.1881103515625e-05, + "model_forward_time": 0.02789139747619629, + "step": 14340 + }, + { + "epoch": 2.1881103515625e-05, + "step": 14340, + "training_step_time": 0.11692929267883301 + }, + { + "epoch": 2.188262939453125e-05, + "model_forward_time": 0.02580571174621582, + "step": 14341 + }, + { + "epoch": 2.188262939453125e-05, + "step": 14341, + "training_step_time": 0.1131293773651123 + }, + { + "epoch": 2.18841552734375e-05, + "model_forward_time": 0.025181055068969727, + "step": 14342 + }, + { + "epoch": 2.18841552734375e-05, + "step": 14342, + "training_step_time": 0.10871624946594238 + }, + { + "epoch": 2.188568115234375e-05, + "model_forward_time": 0.025333404541015625, + "step": 14343 + }, + { + "epoch": 2.188568115234375e-05, + "step": 14343, + "training_step_time": 0.10631489753723145 + }, + { + "epoch": 2.188720703125e-05, + "model_forward_time": 0.02524280548095703, + "step": 14344 + }, + { + "epoch": 2.188720703125e-05, + "step": 14344, + "training_step_time": 0.10632920265197754 + }, + { + "epoch": 2.188873291015625e-05, + "model_forward_time": 0.025287628173828125, + "step": 14345 + }, + { + "epoch": 2.188873291015625e-05, + "step": 14345, + "training_step_time": 0.1079554557800293 + }, + { + "epoch": 2.18902587890625e-05, + "model_forward_time": 0.025325298309326172, + "step": 14346 + }, + { + "epoch": 2.18902587890625e-05, + "step": 14346, + "training_step_time": 0.10594439506530762 + }, + { + "epoch": 2.189178466796875e-05, + "model_forward_time": 0.026143789291381836, + "step": 14347 + }, + { + "epoch": 2.189178466796875e-05, + "step": 14347, + "training_step_time": 0.10685396194458008 + }, + { + "epoch": 2.1893310546875e-05, + "model_forward_time": 0.025084733963012695, + "step": 14348 + }, + { + "epoch": 2.1893310546875e-05, + "step": 14348, + "training_step_time": 0.11030387878417969 + }, + { + "epoch": 2.189483642578125e-05, + "model_forward_time": 0.025169849395751953, + "step": 14349 + }, + { + "epoch": 2.189483642578125e-05, + "step": 14349, + "training_step_time": 0.10748672485351562 + }, + { + "epoch": 2.18963623046875e-05, + "grad_norm": 0.4476127028465271, + "learning_rate": 5.768560098550213e-05, + "loss": 0.0159, + "step": 14350 + }, + { + "epoch": 2.18963623046875e-05, + "model_forward_time": 0.025304555892944336, + "step": 14350 + }, + { + "epoch": 2.18963623046875e-05, + "step": 14350, + "training_step_time": 0.10526037216186523 + }, + { + "epoch": 2.189788818359375e-05, + "model_forward_time": 0.02551102638244629, + "step": 14351 + }, + { + "epoch": 2.189788818359375e-05, + "step": 14351, + "training_step_time": 0.10544323921203613 + }, + { + "epoch": 2.18994140625e-05, + "model_forward_time": 0.025460481643676758, + "step": 14352 + }, + { + "epoch": 2.18994140625e-05, + "step": 14352, + "training_step_time": 0.1069488525390625 + }, + { + "epoch": 2.190093994140625e-05, + "model_forward_time": 0.025208473205566406, + "step": 14353 + }, + { + "epoch": 2.190093994140625e-05, + "step": 14353, + "training_step_time": 0.10629510879516602 + }, + { + "epoch": 2.19024658203125e-05, + "model_forward_time": 0.025322437286376953, + "step": 14354 + }, + { + "epoch": 2.19024658203125e-05, + "step": 14354, + "training_step_time": 0.10442280769348145 + }, + { + "epoch": 2.190399169921875e-05, + "model_forward_time": 0.025219202041625977, + "step": 14355 + }, + { + "epoch": 2.190399169921875e-05, + "step": 14355, + "training_step_time": 0.10483694076538086 + }, + { + "epoch": 2.1905517578125e-05, + "model_forward_time": 0.025483131408691406, + "step": 14356 + }, + { + "epoch": 2.1905517578125e-05, + "step": 14356, + "training_step_time": 0.1047523021697998 + }, + { + "epoch": 2.190704345703125e-05, + "model_forward_time": 0.027733325958251953, + "step": 14357 + }, + { + "epoch": 2.190704345703125e-05, + "step": 14357, + "training_step_time": 0.10677051544189453 + }, + { + "epoch": 2.19085693359375e-05, + "model_forward_time": 0.025628089904785156, + "step": 14358 + }, + { + "epoch": 2.19085693359375e-05, + "step": 14358, + "training_step_time": 0.11213159561157227 + }, + { + "epoch": 2.191009521484375e-05, + "model_forward_time": 0.0253298282623291, + "step": 14359 + }, + { + "epoch": 2.191009521484375e-05, + "step": 14359, + "training_step_time": 0.10497879981994629 + }, + { + "epoch": 2.191162109375e-05, + "grad_norm": 0.2515600621700287, + "learning_rate": 5.763113567822429e-05, + "loss": 0.0113, + "step": 14360 + }, + { + "epoch": 2.191162109375e-05, + "model_forward_time": 0.025327444076538086, + "step": 14360 + }, + { + "epoch": 2.191162109375e-05, + "step": 14360, + "training_step_time": 0.1047203540802002 + }, + { + "epoch": 2.191314697265625e-05, + "model_forward_time": 0.02570939064025879, + "step": 14361 + }, + { + "epoch": 2.191314697265625e-05, + "step": 14361, + "training_step_time": 0.10623335838317871 + }, + { + "epoch": 2.19146728515625e-05, + "model_forward_time": 0.025732755661010742, + "step": 14362 + }, + { + "epoch": 2.19146728515625e-05, + "step": 14362, + "training_step_time": 0.10715031623840332 + }, + { + "epoch": 2.191619873046875e-05, + "model_forward_time": 0.025213003158569336, + "step": 14363 + }, + { + "epoch": 2.191619873046875e-05, + "step": 14363, + "training_step_time": 0.10350799560546875 + }, + { + "epoch": 2.1917724609375e-05, + "model_forward_time": 0.025738239288330078, + "step": 14364 + }, + { + "epoch": 2.1917724609375e-05, + "step": 14364, + "training_step_time": 0.18706274032592773 + }, + { + "epoch": 2.191925048828125e-05, + "model_forward_time": 0.026445388793945312, + "step": 14365 + }, + { + "epoch": 2.191925048828125e-05, + "step": 14365, + "training_step_time": 0.1490159034729004 + }, + { + "epoch": 2.19207763671875e-05, + "model_forward_time": 0.024654388427734375, + "step": 14366 + }, + { + "epoch": 2.19207763671875e-05, + "step": 14366, + "training_step_time": 0.10743904113769531 + }, + { + "epoch": 2.192230224609375e-05, + "model_forward_time": 0.02534770965576172, + "step": 14367 + }, + { + "epoch": 2.192230224609375e-05, + "step": 14367, + "training_step_time": 0.1786022186279297 + }, + { + "epoch": 2.1923828125e-05, + "model_forward_time": 0.024702072143554688, + "step": 14368 + }, + { + "epoch": 2.1923828125e-05, + "step": 14368, + "training_step_time": 0.15403366088867188 + }, + { + "epoch": 2.192535400390625e-05, + "model_forward_time": 0.024469614028930664, + "step": 14369 + }, + { + "epoch": 2.192535400390625e-05, + "step": 14369, + "training_step_time": 0.16647958755493164 + }, + { + "epoch": 2.19268798828125e-05, + "grad_norm": 0.21295957267284393, + "learning_rate": 5.757666109839702e-05, + "loss": 0.0159, + "step": 14370 + }, + { + "epoch": 2.19268798828125e-05, + "model_forward_time": 0.024891138076782227, + "step": 14370 + }, + { + "epoch": 2.19268798828125e-05, + "step": 14370, + "training_step_time": 0.19384121894836426 + }, + { + "epoch": 2.192840576171875e-05, + "model_forward_time": 0.02559804916381836, + "step": 14371 + }, + { + "epoch": 2.192840576171875e-05, + "step": 14371, + "training_step_time": 0.12395977973937988 + }, + { + "epoch": 2.1929931640625e-05, + "model_forward_time": 0.02475285530090332, + "step": 14372 + }, + { + "epoch": 2.1929931640625e-05, + "step": 14372, + "training_step_time": 0.11702227592468262 + }, + { + "epoch": 2.193145751953125e-05, + "model_forward_time": 0.02573680877685547, + "step": 14373 + }, + { + "epoch": 2.193145751953125e-05, + "step": 14373, + "training_step_time": 0.11294937133789062 + }, + { + "epoch": 2.19329833984375e-05, + "model_forward_time": 0.025687694549560547, + "step": 14374 + }, + { + "epoch": 2.19329833984375e-05, + "step": 14374, + "training_step_time": 0.10483384132385254 + }, + { + "epoch": 2.193450927734375e-05, + "model_forward_time": 0.025590896606445312, + "step": 14375 + }, + { + "epoch": 2.193450927734375e-05, + "step": 14375, + "training_step_time": 0.21607661247253418 + }, + { + "epoch": 2.193603515625e-05, + "model_forward_time": 0.026109933853149414, + "step": 14376 + }, + { + "epoch": 2.193603515625e-05, + "step": 14376, + "training_step_time": 0.14435935020446777 + }, + { + "epoch": 2.193756103515625e-05, + "model_forward_time": 0.02444601058959961, + "step": 14377 + }, + { + "epoch": 2.193756103515625e-05, + "step": 14377, + "training_step_time": 0.2158801555633545 + }, + { + "epoch": 2.19390869140625e-05, + "model_forward_time": 0.025578975677490234, + "step": 14378 + }, + { + "epoch": 2.19390869140625e-05, + "step": 14378, + "training_step_time": 0.19213509559631348 + }, + { + "epoch": 2.194061279296875e-05, + "model_forward_time": 0.024686574935913086, + "step": 14379 + }, + { + "epoch": 2.194061279296875e-05, + "step": 14379, + "training_step_time": 0.17601871490478516 + }, + { + "epoch": 2.1942138671875e-05, + "grad_norm": 0.34961214661598206, + "learning_rate": 5.75221773122121e-05, + "loss": 0.0146, + "step": 14380 + }, + { + "epoch": 2.1942138671875e-05, + "model_forward_time": 0.024730920791625977, + "step": 14380 + }, + { + "epoch": 2.1942138671875e-05, + "step": 14380, + "training_step_time": 0.18546605110168457 + }, + { + "epoch": 2.194366455078125e-05, + "model_forward_time": 0.02497553825378418, + "step": 14381 + }, + { + "epoch": 2.194366455078125e-05, + "step": 14381, + "training_step_time": 0.13523101806640625 + }, + { + "epoch": 2.19451904296875e-05, + "model_forward_time": 0.02430272102355957, + "step": 14382 + }, + { + "epoch": 2.19451904296875e-05, + "step": 14382, + "training_step_time": 0.1349170207977295 + }, + { + "epoch": 2.194671630859375e-05, + "model_forward_time": 0.02502274513244629, + "step": 14383 + }, + { + "epoch": 2.194671630859375e-05, + "step": 14383, + "training_step_time": 0.18671965599060059 + }, + { + "epoch": 2.19482421875e-05, + "model_forward_time": 0.02443218231201172, + "step": 14384 + }, + { + "epoch": 2.19482421875e-05, + "step": 14384, + "training_step_time": 0.12279272079467773 + }, + { + "epoch": 2.194976806640625e-05, + "model_forward_time": 0.0236661434173584, + "step": 14385 + }, + { + "epoch": 2.194976806640625e-05, + "step": 14385, + "training_step_time": 0.11693382263183594 + }, + { + "epoch": 2.19512939453125e-05, + "model_forward_time": 0.02473163604736328, + "step": 14386 + }, + { + "epoch": 2.19512939453125e-05, + "step": 14386, + "training_step_time": 0.11201095581054688 + }, + { + "epoch": 2.195281982421875e-05, + "model_forward_time": 0.02422785758972168, + "step": 14387 + }, + { + "epoch": 2.195281982421875e-05, + "step": 14387, + "training_step_time": 0.11353468894958496 + }, + { + "epoch": 2.1954345703125e-05, + "model_forward_time": 0.02459406852722168, + "step": 14388 + }, + { + "epoch": 2.1954345703125e-05, + "step": 14388, + "training_step_time": 0.10876941680908203 + }, + { + "epoch": 2.195587158203125e-05, + "model_forward_time": 0.02595233917236328, + "step": 14389 + }, + { + "epoch": 2.195587158203125e-05, + "step": 14389, + "training_step_time": 0.11036038398742676 + }, + { + "epoch": 2.19573974609375e-05, + "grad_norm": 0.31873825192451477, + "learning_rate": 5.746768438587245e-05, + "loss": 0.0258, + "step": 14390 + }, + { + "epoch": 2.19573974609375e-05, + "model_forward_time": 0.025621414184570312, + "step": 14390 + }, + { + "epoch": 2.19573974609375e-05, + "step": 14390, + "training_step_time": 0.10696029663085938 + }, + { + "epoch": 2.195892333984375e-05, + "model_forward_time": 0.025578022003173828, + "step": 14391 + }, + { + "epoch": 2.195892333984375e-05, + "step": 14391, + "training_step_time": 0.10752630233764648 + }, + { + "epoch": 2.196044921875e-05, + "model_forward_time": 0.025530576705932617, + "step": 14392 + }, + { + "epoch": 2.196044921875e-05, + "step": 14392, + "training_step_time": 0.10671806335449219 + }, + { + "epoch": 2.196197509765625e-05, + "model_forward_time": 0.025248050689697266, + "step": 14393 + }, + { + "epoch": 2.196197509765625e-05, + "step": 14393, + "training_step_time": 0.1077568531036377 + }, + { + "epoch": 2.19635009765625e-05, + "model_forward_time": 0.02564096450805664, + "step": 14394 + }, + { + "epoch": 2.19635009765625e-05, + "step": 14394, + "training_step_time": 0.11052298545837402 + }, + { + "epoch": 2.196502685546875e-05, + "model_forward_time": 0.025376081466674805, + "step": 14395 + }, + { + "epoch": 2.196502685546875e-05, + "step": 14395, + "training_step_time": 0.11121463775634766 + }, + { + "epoch": 2.1966552734375e-05, + "model_forward_time": 0.025179386138916016, + "step": 14396 + }, + { + "epoch": 2.1966552734375e-05, + "step": 14396, + "training_step_time": 0.10913324356079102 + }, + { + "epoch": 2.196807861328125e-05, + "model_forward_time": 0.025421857833862305, + "step": 14397 + }, + { + "epoch": 2.196807861328125e-05, + "step": 14397, + "training_step_time": 0.10636591911315918 + }, + { + "epoch": 2.19696044921875e-05, + "model_forward_time": 0.025507688522338867, + "step": 14398 + }, + { + "epoch": 2.19696044921875e-05, + "step": 14398, + "training_step_time": 0.10549569129943848 + }, + { + "epoch": 2.197113037109375e-05, + "model_forward_time": 0.02544713020324707, + "step": 14399 + }, + { + "epoch": 2.197113037109375e-05, + "step": 14399, + "training_step_time": 0.11061692237854004 + }, + { + "epoch": 2.197265625e-05, + "grad_norm": 0.28007107973098755, + "learning_rate": 5.74131823855921e-05, + "loss": 0.0169, + "step": 14400 + }, + { + "epoch": 2.197265625e-05, + "model_forward_time": 0.024945974349975586, + "step": 14400 + }, + { + "epoch": 2.197265625e-05, + "step": 14400, + "training_step_time": 0.11136388778686523 + }, + { + "epoch": 2.197418212890625e-05, + "model_forward_time": 0.02496790885925293, + "step": 14401 + }, + { + "epoch": 2.197418212890625e-05, + "step": 14401, + "training_step_time": 0.1042335033416748 + }, + { + "epoch": 2.19757080078125e-05, + "model_forward_time": 0.025563716888427734, + "step": 14402 + }, + { + "epoch": 2.19757080078125e-05, + "step": 14402, + "training_step_time": 0.10403132438659668 + }, + { + "epoch": 2.197723388671875e-05, + "model_forward_time": 0.02575230598449707, + "step": 14403 + }, + { + "epoch": 2.197723388671875e-05, + "step": 14403, + "training_step_time": 0.1058199405670166 + }, + { + "epoch": 2.1978759765625e-05, + "model_forward_time": 0.025254011154174805, + "step": 14404 + }, + { + "epoch": 2.1978759765625e-05, + "step": 14404, + "training_step_time": 0.10463547706604004 + }, + { + "epoch": 2.198028564453125e-05, + "model_forward_time": 0.025304555892944336, + "step": 14405 + }, + { + "epoch": 2.198028564453125e-05, + "step": 14405, + "training_step_time": 0.1062014102935791 + }, + { + "epoch": 2.19818115234375e-05, + "model_forward_time": 0.02547478675842285, + "step": 14406 + }, + { + "epoch": 2.19818115234375e-05, + "step": 14406, + "training_step_time": 0.10538983345031738 + }, + { + "epoch": 2.198333740234375e-05, + "model_forward_time": 0.02555251121520996, + "step": 14407 + }, + { + "epoch": 2.198333740234375e-05, + "step": 14407, + "training_step_time": 0.10567712783813477 + }, + { + "epoch": 2.198486328125e-05, + "model_forward_time": 0.025917768478393555, + "step": 14408 + }, + { + "epoch": 2.198486328125e-05, + "step": 14408, + "training_step_time": 0.12190699577331543 + }, + { + "epoch": 2.198638916015625e-05, + "model_forward_time": 0.025279998779296875, + "step": 14409 + }, + { + "epoch": 2.198638916015625e-05, + "step": 14409, + "training_step_time": 0.13457822799682617 + }, + { + "epoch": 2.19879150390625e-05, + "grad_norm": 0.32994768023490906, + "learning_rate": 5.735867137759615e-05, + "loss": 0.0147, + "step": 14410 + }, + { + "epoch": 2.19879150390625e-05, + "model_forward_time": 0.025104045867919922, + "step": 14410 + }, + { + "epoch": 2.19879150390625e-05, + "step": 14410, + "training_step_time": 0.1033792495727539 + }, + { + "epoch": 2.198944091796875e-05, + "model_forward_time": 0.025578022003173828, + "step": 14411 + }, + { + "epoch": 2.198944091796875e-05, + "step": 14411, + "training_step_time": 0.12581372261047363 + }, + { + "epoch": 2.1990966796875e-05, + "model_forward_time": 0.026106595993041992, + "step": 14412 + }, + { + "epoch": 2.1990966796875e-05, + "step": 14412, + "training_step_time": 0.11005711555480957 + }, + { + "epoch": 2.199249267578125e-05, + "model_forward_time": 0.025022268295288086, + "step": 14413 + }, + { + "epoch": 2.199249267578125e-05, + "step": 14413, + "training_step_time": 0.20831060409545898 + }, + { + "epoch": 2.19940185546875e-05, + "model_forward_time": 0.023916006088256836, + "step": 14414 + }, + { + "epoch": 2.19940185546875e-05, + "step": 14414, + "training_step_time": 0.11057806015014648 + }, + { + "epoch": 2.199554443359375e-05, + "model_forward_time": 0.02718067169189453, + "step": 14415 + }, + { + "epoch": 2.199554443359375e-05, + "step": 14415, + "training_step_time": 0.10711979866027832 + }, + { + "epoch": 2.19970703125e-05, + "model_forward_time": 0.025145292282104492, + "step": 14416 + }, + { + "epoch": 2.19970703125e-05, + "step": 14416, + "training_step_time": 0.12464451789855957 + }, + { + "epoch": 2.199859619140625e-05, + "model_forward_time": 0.025227069854736328, + "step": 14417 + }, + { + "epoch": 2.199859619140625e-05, + "step": 14417, + "training_step_time": 0.13682222366333008 + }, + { + "epoch": 2.20001220703125e-05, + "model_forward_time": 0.025318384170532227, + "step": 14418 + }, + { + "epoch": 2.20001220703125e-05, + "step": 14418, + "training_step_time": 0.10161757469177246 + }, + { + "epoch": 2.200164794921875e-05, + "model_forward_time": 0.02474236488342285, + "step": 14419 + }, + { + "epoch": 2.200164794921875e-05, + "step": 14419, + "training_step_time": 0.1598207950592041 + }, + { + "epoch": 2.2003173828125e-05, + "grad_norm": 0.12650427222251892, + "learning_rate": 5.730415142812059e-05, + "loss": 0.0125, + "step": 14420 + }, + { + "epoch": 2.2003173828125e-05, + "model_forward_time": 0.025153636932373047, + "step": 14420 + }, + { + "epoch": 2.2003173828125e-05, + "step": 14420, + "training_step_time": 0.1709437370300293 + }, + { + "epoch": 2.200469970703125e-05, + "model_forward_time": 0.02468729019165039, + "step": 14421 + }, + { + "epoch": 2.200469970703125e-05, + "step": 14421, + "training_step_time": 0.11967992782592773 + }, + { + "epoch": 2.20062255859375e-05, + "model_forward_time": 0.024868249893188477, + "step": 14422 + }, + { + "epoch": 2.20062255859375e-05, + "step": 14422, + "training_step_time": 0.16941547393798828 + }, + { + "epoch": 2.200775146484375e-05, + "model_forward_time": 0.0249025821685791, + "step": 14423 + }, + { + "epoch": 2.200775146484375e-05, + "step": 14423, + "training_step_time": 0.16129016876220703 + }, + { + "epoch": 2.200927734375e-05, + "model_forward_time": 0.024200916290283203, + "step": 14424 + }, + { + "epoch": 2.200927734375e-05, + "step": 14424, + "training_step_time": 0.10393476486206055 + }, + { + "epoch": 2.201080322265625e-05, + "model_forward_time": 0.02512335777282715, + "step": 14425 + }, + { + "epoch": 2.201080322265625e-05, + "step": 14425, + "training_step_time": 0.15414047241210938 + }, + { + "epoch": 2.20123291015625e-05, + "model_forward_time": 0.024946928024291992, + "step": 14426 + }, + { + "epoch": 2.20123291015625e-05, + "step": 14426, + "training_step_time": 0.11195707321166992 + }, + { + "epoch": 2.201385498046875e-05, + "model_forward_time": 0.025065183639526367, + "step": 14427 + }, + { + "epoch": 2.201385498046875e-05, + "step": 14427, + "training_step_time": 0.19071578979492188 + }, + { + "epoch": 2.2015380859375e-05, + "model_forward_time": 0.024942874908447266, + "step": 14428 + }, + { + "epoch": 2.2015380859375e-05, + "step": 14428, + "training_step_time": 0.19861268997192383 + }, + { + "epoch": 2.201690673828125e-05, + "model_forward_time": 0.024544239044189453, + "step": 14429 + }, + { + "epoch": 2.201690673828125e-05, + "step": 14429, + "training_step_time": 0.12487411499023438 + }, + { + "epoch": 2.20184326171875e-05, + "grad_norm": 0.35339513421058655, + "learning_rate": 5.72496226034123e-05, + "loss": 0.0149, + "step": 14430 + }, + { + "epoch": 2.20184326171875e-05, + "model_forward_time": 0.025171518325805664, + "step": 14430 + }, + { + "epoch": 2.20184326171875e-05, + "step": 14430, + "training_step_time": 0.11470937728881836 + }, + { + "epoch": 2.201995849609375e-05, + "model_forward_time": 0.025522470474243164, + "step": 14431 + }, + { + "epoch": 2.201995849609375e-05, + "step": 14431, + "training_step_time": 0.10373687744140625 + }, + { + "epoch": 2.2021484375e-05, + "model_forward_time": 0.025337934494018555, + "step": 14432 + }, + { + "epoch": 2.2021484375e-05, + "step": 14432, + "training_step_time": 0.10462212562561035 + }, + { + "epoch": 2.202301025390625e-05, + "model_forward_time": 0.02532219886779785, + "step": 14433 + }, + { + "epoch": 2.202301025390625e-05, + "step": 14433, + "training_step_time": 0.10544323921203613 + }, + { + "epoch": 2.20245361328125e-05, + "model_forward_time": 0.025471210479736328, + "step": 14434 + }, + { + "epoch": 2.20245361328125e-05, + "step": 14434, + "training_step_time": 0.1040809154510498 + }, + { + "epoch": 2.202606201171875e-05, + "model_forward_time": 0.02524423599243164, + "step": 14435 + }, + { + "epoch": 2.202606201171875e-05, + "step": 14435, + "training_step_time": 0.1081094741821289 + }, + { + "epoch": 2.2027587890625e-05, + "model_forward_time": 0.02560710906982422, + "step": 14436 + }, + { + "epoch": 2.2027587890625e-05, + "step": 14436, + "training_step_time": 0.1131441593170166 + }, + { + "epoch": 2.202911376953125e-05, + "model_forward_time": 0.02537822723388672, + "step": 14437 + }, + { + "epoch": 2.202911376953125e-05, + "step": 14437, + "training_step_time": 0.16233253479003906 + }, + { + "epoch": 2.20306396484375e-05, + "model_forward_time": 0.024821996688842773, + "step": 14438 + }, + { + "epoch": 2.20306396484375e-05, + "step": 14438, + "training_step_time": 0.19222259521484375 + }, + { + "epoch": 2.203216552734375e-05, + "model_forward_time": 0.024534940719604492, + "step": 14439 + }, + { + "epoch": 2.203216552734375e-05, + "step": 14439, + "training_step_time": 0.18992280960083008 + }, + { + "epoch": 2.203369140625e-05, + "grad_norm": 0.3374268710613251, + "learning_rate": 5.719508496972896e-05, + "loss": 0.0126, + "step": 14440 + }, + { + "epoch": 2.203369140625e-05, + "model_forward_time": 0.024710655212402344, + "step": 14440 + }, + { + "epoch": 2.203369140625e-05, + "step": 14440, + "training_step_time": 0.1766221523284912 + }, + { + "epoch": 2.203521728515625e-05, + "model_forward_time": 0.024690628051757812, + "step": 14441 + }, + { + "epoch": 2.203521728515625e-05, + "step": 14441, + "training_step_time": 0.1760387420654297 + }, + { + "epoch": 2.20367431640625e-05, + "model_forward_time": 0.024201631546020508, + "step": 14442 + }, + { + "epoch": 2.20367431640625e-05, + "step": 14442, + "training_step_time": 0.1626882553100586 + }, + { + "epoch": 2.203826904296875e-05, + "model_forward_time": 0.024474620819091797, + "step": 14443 + }, + { + "epoch": 2.203826904296875e-05, + "step": 14443, + "training_step_time": 0.14006662368774414 + }, + { + "epoch": 2.2039794921875e-05, + "model_forward_time": 0.02454209327697754, + "step": 14444 + }, + { + "epoch": 2.2039794921875e-05, + "step": 14444, + "training_step_time": 0.12909150123596191 + }, + { + "epoch": 2.204132080078125e-05, + "model_forward_time": 0.02589726448059082, + "step": 14445 + }, + { + "epoch": 2.204132080078125e-05, + "step": 14445, + "training_step_time": 0.12542128562927246 + }, + { + "epoch": 2.20428466796875e-05, + "model_forward_time": 0.02580404281616211, + "step": 14446 + }, + { + "epoch": 2.20428466796875e-05, + "step": 14446, + "training_step_time": 0.12280154228210449 + }, + { + "epoch": 2.204437255859375e-05, + "model_forward_time": 0.02495098114013672, + "step": 14447 + }, + { + "epoch": 2.204437255859375e-05, + "step": 14447, + "training_step_time": 0.1176149845123291 + }, + { + "epoch": 2.20458984375e-05, + "model_forward_time": 0.026503562927246094, + "step": 14448 + }, + { + "epoch": 2.20458984375e-05, + "step": 14448, + "training_step_time": 0.1156005859375 + }, + { + "epoch": 2.204742431640625e-05, + "model_forward_time": 0.02577829360961914, + "step": 14449 + }, + { + "epoch": 2.204742431640625e-05, + "step": 14449, + "training_step_time": 0.15075325965881348 + }, + { + "epoch": 2.20489501953125e-05, + "grad_norm": 0.3030347228050232, + "learning_rate": 5.714053859333893e-05, + "loss": 0.0149, + "step": 14450 + }, + { + "epoch": 2.20489501953125e-05, + "model_forward_time": 0.02609705924987793, + "step": 14450 + }, + { + "epoch": 2.20489501953125e-05, + "step": 14450, + "training_step_time": 0.11271262168884277 + }, + { + "epoch": 2.205047607421875e-05, + "model_forward_time": 0.024673938751220703, + "step": 14451 + }, + { + "epoch": 2.205047607421875e-05, + "step": 14451, + "training_step_time": 0.1802356243133545 + }, + { + "epoch": 2.2052001953125e-05, + "model_forward_time": 0.024587392807006836, + "step": 14452 + }, + { + "epoch": 2.2052001953125e-05, + "step": 14452, + "training_step_time": 0.15288209915161133 + }, + { + "epoch": 2.205352783203125e-05, + "model_forward_time": 0.024260520935058594, + "step": 14453 + }, + { + "epoch": 2.205352783203125e-05, + "step": 14453, + "training_step_time": 0.11077260971069336 + }, + { + "epoch": 2.20550537109375e-05, + "model_forward_time": 0.025082111358642578, + "step": 14454 + }, + { + "epoch": 2.20550537109375e-05, + "step": 14454, + "training_step_time": 0.1906116008758545 + }, + { + "epoch": 2.205657958984375e-05, + "model_forward_time": 0.02485799789428711, + "step": 14455 + }, + { + "epoch": 2.205657958984375e-05, + "step": 14455, + "training_step_time": 0.10830879211425781 + }, + { + "epoch": 2.205810546875e-05, + "model_forward_time": 0.024625778198242188, + "step": 14456 + }, + { + "epoch": 2.205810546875e-05, + "step": 14456, + "training_step_time": 0.11713552474975586 + }, + { + "epoch": 2.205963134765625e-05, + "model_forward_time": 0.025302886962890625, + "step": 14457 + }, + { + "epoch": 2.205963134765625e-05, + "step": 14457, + "training_step_time": 0.12384819984436035 + }, + { + "epoch": 2.20611572265625e-05, + "model_forward_time": 0.025283098220825195, + "step": 14458 + }, + { + "epoch": 2.20611572265625e-05, + "step": 14458, + "training_step_time": 0.12203145027160645 + }, + { + "epoch": 2.206268310546875e-05, + "model_forward_time": 0.024929523468017578, + "step": 14459 + }, + { + "epoch": 2.206268310546875e-05, + "step": 14459, + "training_step_time": 0.1062629222869873 + }, + { + "epoch": 2.2064208984375e-05, + "grad_norm": 0.23954321444034576, + "learning_rate": 5.7085983540521216e-05, + "loss": 0.0164, + "step": 14460 + }, + { + "epoch": 2.2064208984375e-05, + "model_forward_time": 0.024916648864746094, + "step": 14460 + }, + { + "epoch": 2.2064208984375e-05, + "step": 14460, + "training_step_time": 0.14684629440307617 + }, + { + "epoch": 2.206573486328125e-05, + "model_forward_time": 0.025134801864624023, + "step": 14461 + }, + { + "epoch": 2.206573486328125e-05, + "step": 14461, + "training_step_time": 0.1693110466003418 + }, + { + "epoch": 2.20672607421875e-05, + "model_forward_time": 0.024710893630981445, + "step": 14462 + }, + { + "epoch": 2.20672607421875e-05, + "step": 14462, + "training_step_time": 0.15579795837402344 + }, + { + "epoch": 2.206878662109375e-05, + "model_forward_time": 0.02496933937072754, + "step": 14463 + }, + { + "epoch": 2.206878662109375e-05, + "step": 14463, + "training_step_time": 0.12489008903503418 + }, + { + "epoch": 2.20703125e-05, + "model_forward_time": 0.024796724319458008, + "step": 14464 + }, + { + "epoch": 2.20703125e-05, + "step": 14464, + "training_step_time": 0.11968135833740234 + }, + { + "epoch": 2.207183837890625e-05, + "model_forward_time": 0.02559494972229004, + "step": 14465 + }, + { + "epoch": 2.207183837890625e-05, + "step": 14465, + "training_step_time": 0.11547517776489258 + }, + { + "epoch": 2.20733642578125e-05, + "model_forward_time": 0.025423765182495117, + "step": 14466 + }, + { + "epoch": 2.20733642578125e-05, + "step": 14466, + "training_step_time": 0.1246178150177002 + }, + { + "epoch": 2.207489013671875e-05, + "model_forward_time": 0.025397777557373047, + "step": 14467 + }, + { + "epoch": 2.207489013671875e-05, + "step": 14467, + "training_step_time": 0.1491076946258545 + }, + { + "epoch": 2.2076416015625e-05, + "model_forward_time": 0.02490520477294922, + "step": 14468 + }, + { + "epoch": 2.2076416015625e-05, + "step": 14468, + "training_step_time": 0.1610124111175537 + }, + { + "epoch": 2.207794189453125e-05, + "model_forward_time": 0.024445295333862305, + "step": 14469 + }, + { + "epoch": 2.207794189453125e-05, + "step": 14469, + "training_step_time": 0.143110990524292 + }, + { + "epoch": 2.20794677734375e-05, + "grad_norm": 0.27757343649864197, + "learning_rate": 5.7031419877565317e-05, + "loss": 0.0192, + "step": 14470 + }, + { + "epoch": 2.20794677734375e-05, + "model_forward_time": 0.02394390106201172, + "step": 14470 + }, + { + "epoch": 2.20794677734375e-05, + "step": 14470, + "training_step_time": 0.20659661293029785 + }, + { + "epoch": 2.208099365234375e-05, + "model_forward_time": 0.02458024024963379, + "step": 14471 + }, + { + "epoch": 2.208099365234375e-05, + "step": 14471, + "training_step_time": 0.1291954517364502 + }, + { + "epoch": 2.208251953125e-05, + "model_forward_time": 0.024632930755615234, + "step": 14472 + }, + { + "epoch": 2.208251953125e-05, + "step": 14472, + "training_step_time": 0.12108206748962402 + }, + { + "epoch": 2.208404541015625e-05, + "model_forward_time": 0.02512335777282715, + "step": 14473 + }, + { + "epoch": 2.208404541015625e-05, + "step": 14473, + "training_step_time": 0.10269308090209961 + }, + { + "epoch": 2.20855712890625e-05, + "model_forward_time": 0.02545475959777832, + "step": 14474 + }, + { + "epoch": 2.20855712890625e-05, + "step": 14474, + "training_step_time": 0.10280704498291016 + }, + { + "epoch": 2.208709716796875e-05, + "model_forward_time": 0.02537846565246582, + "step": 14475 + }, + { + "epoch": 2.208709716796875e-05, + "step": 14475, + "training_step_time": 0.10709953308105469 + }, + { + "epoch": 2.2088623046875e-05, + "model_forward_time": 0.02534651756286621, + "step": 14476 + }, + { + "epoch": 2.2088623046875e-05, + "step": 14476, + "training_step_time": 0.10887026786804199 + }, + { + "epoch": 2.209014892578125e-05, + "model_forward_time": 0.025393009185791016, + "step": 14477 + }, + { + "epoch": 2.209014892578125e-05, + "step": 14477, + "training_step_time": 0.10451221466064453 + }, + { + "epoch": 2.20916748046875e-05, + "model_forward_time": 0.025368928909301758, + "step": 14478 + }, + { + "epoch": 2.20916748046875e-05, + "step": 14478, + "training_step_time": 0.10456395149230957 + }, + { + "epoch": 2.209320068359375e-05, + "model_forward_time": 0.024740219116210938, + "step": 14479 + }, + { + "epoch": 2.209320068359375e-05, + "step": 14479, + "training_step_time": 0.10389828681945801 + }, + { + "epoch": 2.20947265625e-05, + "grad_norm": 0.3883318305015564, + "learning_rate": 5.697684767077125e-05, + "loss": 0.0123, + "step": 14480 + }, + { + "epoch": 2.20947265625e-05, + "model_forward_time": 0.024800777435302734, + "step": 14480 + }, + { + "epoch": 2.20947265625e-05, + "step": 14480, + "training_step_time": 0.10541391372680664 + }, + { + "epoch": 2.209625244140625e-05, + "model_forward_time": 0.024829387664794922, + "step": 14481 + }, + { + "epoch": 2.209625244140625e-05, + "step": 14481, + "training_step_time": 0.10607028007507324 + }, + { + "epoch": 2.20977783203125e-05, + "model_forward_time": 0.025574922561645508, + "step": 14482 + }, + { + "epoch": 2.20977783203125e-05, + "step": 14482, + "training_step_time": 0.10839486122131348 + }, + { + "epoch": 2.209930419921875e-05, + "model_forward_time": 0.02539992332458496, + "step": 14483 + }, + { + "epoch": 2.209930419921875e-05, + "step": 14483, + "training_step_time": 0.10468316078186035 + }, + { + "epoch": 2.2100830078125e-05, + "model_forward_time": 0.025543928146362305, + "step": 14484 + }, + { + "epoch": 2.2100830078125e-05, + "step": 14484, + "training_step_time": 0.1052558422088623 + }, + { + "epoch": 2.210235595703125e-05, + "model_forward_time": 0.025189876556396484, + "step": 14485 + }, + { + "epoch": 2.210235595703125e-05, + "step": 14485, + "training_step_time": 0.10957646369934082 + }, + { + "epoch": 2.21038818359375e-05, + "model_forward_time": 0.025109291076660156, + "step": 14486 + }, + { + "epoch": 2.21038818359375e-05, + "step": 14486, + "training_step_time": 0.10662221908569336 + }, + { + "epoch": 2.210540771484375e-05, + "model_forward_time": 0.0253598690032959, + "step": 14487 + }, + { + "epoch": 2.210540771484375e-05, + "step": 14487, + "training_step_time": 0.1046895980834961 + }, + { + "epoch": 2.210693359375e-05, + "model_forward_time": 0.025243043899536133, + "step": 14488 + }, + { + "epoch": 2.210693359375e-05, + "step": 14488, + "training_step_time": 0.10539865493774414 + }, + { + "epoch": 2.210845947265625e-05, + "model_forward_time": 0.025421857833862305, + "step": 14489 + }, + { + "epoch": 2.210845947265625e-05, + "step": 14489, + "training_step_time": 0.10492134094238281 + }, + { + "epoch": 2.21099853515625e-05, + "grad_norm": 0.6013140082359314, + "learning_rate": 5.692226698644938e-05, + "loss": 0.0142, + "step": 14490 + }, + { + "epoch": 2.21099853515625e-05, + "model_forward_time": 0.02513575553894043, + "step": 14490 + }, + { + "epoch": 2.21099853515625e-05, + "step": 14490, + "training_step_time": 0.10460662841796875 + }, + { + "epoch": 2.211151123046875e-05, + "model_forward_time": 0.027991771697998047, + "step": 14491 + }, + { + "epoch": 2.211151123046875e-05, + "step": 14491, + "training_step_time": 0.10808277130126953 + }, + { + "epoch": 2.2113037109375e-05, + "model_forward_time": 0.025477886199951172, + "step": 14492 + }, + { + "epoch": 2.2113037109375e-05, + "step": 14492, + "training_step_time": 0.10950827598571777 + }, + { + "epoch": 2.211456298828125e-05, + "model_forward_time": 0.026910066604614258, + "step": 14493 + }, + { + "epoch": 2.211456298828125e-05, + "step": 14493, + "training_step_time": 0.13556432723999023 + }, + { + "epoch": 2.21160888671875e-05, + "model_forward_time": 0.025238990783691406, + "step": 14494 + }, + { + "epoch": 2.21160888671875e-05, + "step": 14494, + "training_step_time": 0.1399383544921875 + }, + { + "epoch": 2.211761474609375e-05, + "model_forward_time": 0.02437138557434082, + "step": 14495 + }, + { + "epoch": 2.211761474609375e-05, + "step": 14495, + "training_step_time": 0.10627007484436035 + }, + { + "epoch": 2.2119140625e-05, + "model_forward_time": 0.025246381759643555, + "step": 14496 + }, + { + "epoch": 2.2119140625e-05, + "step": 14496, + "training_step_time": 0.11527514457702637 + }, + { + "epoch": 2.212066650390625e-05, + "model_forward_time": 0.02501535415649414, + "step": 14497 + }, + { + "epoch": 2.212066650390625e-05, + "step": 14497, + "training_step_time": 0.11256074905395508 + }, + { + "epoch": 2.21221923828125e-05, + "model_forward_time": 0.025443315505981445, + "step": 14498 + }, + { + "epoch": 2.21221923828125e-05, + "step": 14498, + "training_step_time": 0.1087343692779541 + }, + { + "epoch": 2.212371826171875e-05, + "model_forward_time": 0.025124311447143555, + "step": 14499 + }, + { + "epoch": 2.212371826171875e-05, + "step": 14499, + "training_step_time": 0.1915757656097412 + }, + { + "epoch": 2.2125244140625e-05, + "grad_norm": 0.1677589863538742, + "learning_rate": 5.686767789092041e-05, + "loss": 0.0222, + "step": 14500 + }, + { + "epoch": 2.2125244140625e-05, + "model_forward_time": 0.025127172470092773, + "step": 14500 + }, + { + "epoch": 2.2125244140625e-05, + "step": 14500, + "training_step_time": 0.1542203426361084 + }, + { + "epoch": 2.212677001953125e-05, + "model_forward_time": 0.024472713470458984, + "step": 14501 + }, + { + "epoch": 2.212677001953125e-05, + "step": 14501, + "training_step_time": 0.10672783851623535 + }, + { + "epoch": 2.21282958984375e-05, + "model_forward_time": 0.02491450309753418, + "step": 14502 + }, + { + "epoch": 2.21282958984375e-05, + "step": 14502, + "training_step_time": 0.1116936206817627 + }, + { + "epoch": 2.212982177734375e-05, + "model_forward_time": 0.025066614151000977, + "step": 14503 + }, + { + "epoch": 2.212982177734375e-05, + "step": 14503, + "training_step_time": 0.12648248672485352 + }, + { + "epoch": 2.213134765625e-05, + "model_forward_time": 0.025091886520385742, + "step": 14504 + }, + { + "epoch": 2.213134765625e-05, + "step": 14504, + "training_step_time": 0.12327194213867188 + }, + { + "epoch": 2.213287353515625e-05, + "model_forward_time": 0.02522730827331543, + "step": 14505 + }, + { + "epoch": 2.213287353515625e-05, + "step": 14505, + "training_step_time": 0.12079477310180664 + }, + { + "epoch": 2.21343994140625e-05, + "model_forward_time": 0.025873422622680664, + "step": 14506 + }, + { + "epoch": 2.21343994140625e-05, + "step": 14506, + "training_step_time": 0.10528755187988281 + }, + { + "epoch": 2.213592529296875e-05, + "model_forward_time": 0.025313854217529297, + "step": 14507 + }, + { + "epoch": 2.213592529296875e-05, + "step": 14507, + "training_step_time": 0.10801434516906738 + }, + { + "epoch": 2.2137451171875e-05, + "model_forward_time": 0.02560734748840332, + "step": 14508 + }, + { + "epoch": 2.2137451171875e-05, + "step": 14508, + "training_step_time": 0.11578941345214844 + }, + { + "epoch": 2.213897705078125e-05, + "model_forward_time": 0.025099754333496094, + "step": 14509 + }, + { + "epoch": 2.213897705078125e-05, + "step": 14509, + "training_step_time": 0.11017775535583496 + }, + { + "epoch": 2.21405029296875e-05, + "grad_norm": 0.23868466913700104, + "learning_rate": 5.681308045051522e-05, + "loss": 0.0172, + "step": 14510 + }, + { + "epoch": 2.21405029296875e-05, + "model_forward_time": 0.025927066802978516, + "step": 14510 + }, + { + "epoch": 2.21405029296875e-05, + "step": 14510, + "training_step_time": 0.12167644500732422 + }, + { + "epoch": 2.214202880859375e-05, + "model_forward_time": 0.025357723236083984, + "step": 14511 + }, + { + "epoch": 2.214202880859375e-05, + "step": 14511, + "training_step_time": 0.18501615524291992 + }, + { + "epoch": 2.21435546875e-05, + "model_forward_time": 0.024504899978637695, + "step": 14512 + }, + { + "epoch": 2.21435546875e-05, + "step": 14512, + "training_step_time": 0.16508865356445312 + }, + { + "epoch": 2.214508056640625e-05, + "model_forward_time": 0.025059938430786133, + "step": 14513 + }, + { + "epoch": 2.214508056640625e-05, + "step": 14513, + "training_step_time": 0.20446133613586426 + }, + { + "epoch": 2.21466064453125e-05, + "model_forward_time": 0.02722954750061035, + "step": 14514 + }, + { + "epoch": 2.21466064453125e-05, + "step": 14514, + "training_step_time": 0.14736366271972656 + }, + { + "epoch": 2.214813232421875e-05, + "model_forward_time": 0.0245511531829834, + "step": 14515 + }, + { + "epoch": 2.214813232421875e-05, + "step": 14515, + "training_step_time": 0.14623451232910156 + }, + { + "epoch": 2.2149658203125e-05, + "model_forward_time": 0.024187088012695312, + "step": 14516 + }, + { + "epoch": 2.2149658203125e-05, + "step": 14516, + "training_step_time": 0.2178959846496582 + }, + { + "epoch": 2.215118408203125e-05, + "model_forward_time": 0.024257183074951172, + "step": 14517 + }, + { + "epoch": 2.215118408203125e-05, + "step": 14517, + "training_step_time": 0.12434029579162598 + }, + { + "epoch": 2.21527099609375e-05, + "model_forward_time": 0.024207592010498047, + "step": 14518 + }, + { + "epoch": 2.21527099609375e-05, + "step": 14518, + "training_step_time": 0.11719679832458496 + }, + { + "epoch": 2.215423583984375e-05, + "model_forward_time": 0.025354385375976562, + "step": 14519 + }, + { + "epoch": 2.215423583984375e-05, + "step": 14519, + "training_step_time": 0.11101865768432617 + }, + { + "epoch": 2.215576171875e-05, + "grad_norm": 0.24587038159370422, + "learning_rate": 5.675847473157485e-05, + "loss": 0.0113, + "step": 14520 + }, + { + "epoch": 2.215576171875e-05, + "model_forward_time": 0.02551412582397461, + "step": 14520 + }, + { + "epoch": 2.215576171875e-05, + "step": 14520, + "training_step_time": 0.1083371639251709 + }, + { + "epoch": 2.215728759765625e-05, + "model_forward_time": 0.025506019592285156, + "step": 14521 + }, + { + "epoch": 2.215728759765625e-05, + "step": 14521, + "training_step_time": 0.10920929908752441 + }, + { + "epoch": 2.21588134765625e-05, + "model_forward_time": 0.025127410888671875, + "step": 14522 + }, + { + "epoch": 2.21588134765625e-05, + "step": 14522, + "training_step_time": 0.11072731018066406 + }, + { + "epoch": 2.216033935546875e-05, + "model_forward_time": 0.025318384170532227, + "step": 14523 + }, + { + "epoch": 2.216033935546875e-05, + "step": 14523, + "training_step_time": 0.10721158981323242 + }, + { + "epoch": 2.2161865234375e-05, + "model_forward_time": 0.02513265609741211, + "step": 14524 + }, + { + "epoch": 2.2161865234375e-05, + "step": 14524, + "training_step_time": 0.10646462440490723 + }, + { + "epoch": 2.216339111328125e-05, + "model_forward_time": 0.025191307067871094, + "step": 14525 + }, + { + "epoch": 2.216339111328125e-05, + "step": 14525, + "training_step_time": 0.10880589485168457 + }, + { + "epoch": 2.21649169921875e-05, + "model_forward_time": 0.024858474731445312, + "step": 14526 + }, + { + "epoch": 2.21649169921875e-05, + "step": 14526, + "training_step_time": 0.10600948333740234 + }, + { + "epoch": 2.216644287109375e-05, + "model_forward_time": 0.025404691696166992, + "step": 14527 + }, + { + "epoch": 2.216644287109375e-05, + "step": 14527, + "training_step_time": 0.10690474510192871 + }, + { + "epoch": 2.216796875e-05, + "model_forward_time": 0.025562763214111328, + "step": 14528 + }, + { + "epoch": 2.216796875e-05, + "step": 14528, + "training_step_time": 0.10588240623474121 + }, + { + "epoch": 2.216949462890625e-05, + "model_forward_time": 0.024974584579467773, + "step": 14529 + }, + { + "epoch": 2.216949462890625e-05, + "step": 14529, + "training_step_time": 0.10827827453613281 + }, + { + "epoch": 2.21710205078125e-05, + "grad_norm": 0.4669856131076813, + "learning_rate": 5.670386080045039e-05, + "loss": 0.0338, + "step": 14530 + }, + { + "epoch": 2.21710205078125e-05, + "model_forward_time": 0.024988174438476562, + "step": 14530 + }, + { + "epoch": 2.21710205078125e-05, + "step": 14530, + "training_step_time": 0.10833001136779785 + }, + { + "epoch": 2.217254638671875e-05, + "model_forward_time": 0.025222301483154297, + "step": 14531 + }, + { + "epoch": 2.217254638671875e-05, + "step": 14531, + "training_step_time": 0.10470008850097656 + }, + { + "epoch": 2.2174072265625e-05, + "model_forward_time": 0.025252580642700195, + "step": 14532 + }, + { + "epoch": 2.2174072265625e-05, + "step": 14532, + "training_step_time": 0.10411214828491211 + }, + { + "epoch": 2.217559814453125e-05, + "model_forward_time": 0.025224924087524414, + "step": 14533 + }, + { + "epoch": 2.217559814453125e-05, + "step": 14533, + "training_step_time": 0.10711002349853516 + }, + { + "epoch": 2.21771240234375e-05, + "model_forward_time": 0.025225162506103516, + "step": 14534 + }, + { + "epoch": 2.21771240234375e-05, + "step": 14534, + "training_step_time": 0.10876584053039551 + }, + { + "epoch": 2.217864990234375e-05, + "model_forward_time": 0.025290250778198242, + "step": 14535 + }, + { + "epoch": 2.217864990234375e-05, + "step": 14535, + "training_step_time": 0.10790181159973145 + }, + { + "epoch": 2.218017578125e-05, + "model_forward_time": 0.025249481201171875, + "step": 14536 + }, + { + "epoch": 2.218017578125e-05, + "step": 14536, + "training_step_time": 0.10514235496520996 + }, + { + "epoch": 2.218170166015625e-05, + "model_forward_time": 0.025214672088623047, + "step": 14537 + }, + { + "epoch": 2.218170166015625e-05, + "step": 14537, + "training_step_time": 0.10736584663391113 + }, + { + "epoch": 2.21832275390625e-05, + "model_forward_time": 0.025692462921142578, + "step": 14538 + }, + { + "epoch": 2.21832275390625e-05, + "step": 14538, + "training_step_time": 0.10925865173339844 + }, + { + "epoch": 2.218475341796875e-05, + "model_forward_time": 0.025007963180541992, + "step": 14539 + }, + { + "epoch": 2.218475341796875e-05, + "step": 14539, + "training_step_time": 0.17052817344665527 + }, + { + "epoch": 2.2186279296875e-05, + "grad_norm": 0.31042104959487915, + "learning_rate": 5.664923872350294e-05, + "loss": 0.0157, + "step": 14540 + }, + { + "epoch": 2.2186279296875e-05, + "model_forward_time": 0.024857282638549805, + "step": 14540 + }, + { + "epoch": 2.2186279296875e-05, + "step": 14540, + "training_step_time": 0.13193798065185547 + }, + { + "epoch": 2.218780517578125e-05, + "model_forward_time": 0.024276018142700195, + "step": 14541 + }, + { + "epoch": 2.218780517578125e-05, + "step": 14541, + "training_step_time": 0.10498857498168945 + }, + { + "epoch": 2.21893310546875e-05, + "model_forward_time": 0.02554798126220703, + "step": 14542 + }, + { + "epoch": 2.21893310546875e-05, + "step": 14542, + "training_step_time": 0.11996936798095703 + }, + { + "epoch": 2.219085693359375e-05, + "model_forward_time": 0.025265932083129883, + "step": 14543 + }, + { + "epoch": 2.219085693359375e-05, + "step": 14543, + "training_step_time": 0.11675453186035156 + }, + { + "epoch": 2.21923828125e-05, + "model_forward_time": 0.02521514892578125, + "step": 14544 + }, + { + "epoch": 2.21923828125e-05, + "step": 14544, + "training_step_time": 0.10510468482971191 + }, + { + "epoch": 2.219390869140625e-05, + "model_forward_time": 0.025184154510498047, + "step": 14545 + }, + { + "epoch": 2.219390869140625e-05, + "step": 14545, + "training_step_time": 0.19674015045166016 + }, + { + "epoch": 2.21954345703125e-05, + "model_forward_time": 0.024651288986206055, + "step": 14546 + }, + { + "epoch": 2.21954345703125e-05, + "step": 14546, + "training_step_time": 0.10736656188964844 + }, + { + "epoch": 2.219696044921875e-05, + "model_forward_time": 0.02479720115661621, + "step": 14547 + }, + { + "epoch": 2.219696044921875e-05, + "step": 14547, + "training_step_time": 0.11674380302429199 + }, + { + "epoch": 2.2198486328125e-05, + "model_forward_time": 0.02531743049621582, + "step": 14548 + }, + { + "epoch": 2.2198486328125e-05, + "step": 14548, + "training_step_time": 0.11290359497070312 + }, + { + "epoch": 2.220001220703125e-05, + "model_forward_time": 0.025397300720214844, + "step": 14549 + }, + { + "epoch": 2.220001220703125e-05, + "step": 14549, + "training_step_time": 0.11157083511352539 + }, + { + "epoch": 2.22015380859375e-05, + "grad_norm": 0.5008291006088257, + "learning_rate": 5.6594608567103456e-05, + "loss": 0.0132, + "step": 14550 + }, + { + "epoch": 2.22015380859375e-05, + "model_forward_time": 0.0252227783203125, + "step": 14550 + }, + { + "epoch": 2.22015380859375e-05, + "step": 14550, + "training_step_time": 0.11755633354187012 + }, + { + "epoch": 2.220306396484375e-05, + "model_forward_time": 0.02504420280456543, + "step": 14551 + }, + { + "epoch": 2.220306396484375e-05, + "step": 14551, + "training_step_time": 0.11762428283691406 + }, + { + "epoch": 2.220458984375e-05, + "model_forward_time": 0.0259397029876709, + "step": 14552 + }, + { + "epoch": 2.220458984375e-05, + "step": 14552, + "training_step_time": 0.11055564880371094 + }, + { + "epoch": 2.220611572265625e-05, + "model_forward_time": 0.024779319763183594, + "step": 14553 + }, + { + "epoch": 2.220611572265625e-05, + "step": 14553, + "training_step_time": 0.16904354095458984 + }, + { + "epoch": 2.22076416015625e-05, + "model_forward_time": 0.025450706481933594, + "step": 14554 + }, + { + "epoch": 2.22076416015625e-05, + "step": 14554, + "training_step_time": 0.16411471366882324 + }, + { + "epoch": 2.220916748046875e-05, + "model_forward_time": 0.024248600006103516, + "step": 14555 + }, + { + "epoch": 2.220916748046875e-05, + "step": 14555, + "training_step_time": 0.11116313934326172 + }, + { + "epoch": 2.2210693359375e-05, + "model_forward_time": 0.025157451629638672, + "step": 14556 + }, + { + "epoch": 2.2210693359375e-05, + "step": 14556, + "training_step_time": 0.10460329055786133 + }, + { + "epoch": 2.221221923828125e-05, + "model_forward_time": 0.025165319442749023, + "step": 14557 + }, + { + "epoch": 2.221221923828125e-05, + "step": 14557, + "training_step_time": 0.11836457252502441 + }, + { + "epoch": 2.22137451171875e-05, + "model_forward_time": 0.02537250518798828, + "step": 14558 + }, + { + "epoch": 2.22137451171875e-05, + "step": 14558, + "training_step_time": 0.14841389656066895 + }, + { + "epoch": 2.221527099609375e-05, + "model_forward_time": 0.02514028549194336, + "step": 14559 + }, + { + "epoch": 2.221527099609375e-05, + "step": 14559, + "training_step_time": 0.11809015274047852 + }, + { + "epoch": 2.2216796875e-05, + "grad_norm": 0.3204619884490967, + "learning_rate": 5.653997039763273e-05, + "loss": 0.0164, + "step": 14560 + }, + { + "epoch": 2.2216796875e-05, + "model_forward_time": 0.025328636169433594, + "step": 14560 + }, + { + "epoch": 2.2216796875e-05, + "step": 14560, + "training_step_time": 0.1295909881591797 + }, + { + "epoch": 2.221832275390625e-05, + "model_forward_time": 0.024994850158691406, + "step": 14561 + }, + { + "epoch": 2.221832275390625e-05, + "step": 14561, + "training_step_time": 0.13772082328796387 + }, + { + "epoch": 2.22198486328125e-05, + "model_forward_time": 0.024618864059448242, + "step": 14562 + }, + { + "epoch": 2.22198486328125e-05, + "step": 14562, + "training_step_time": 0.11031842231750488 + }, + { + "epoch": 2.222137451171875e-05, + "model_forward_time": 0.02543163299560547, + "step": 14563 + }, + { + "epoch": 2.222137451171875e-05, + "step": 14563, + "training_step_time": 0.1372203826904297 + }, + { + "epoch": 2.2222900390625e-05, + "model_forward_time": 0.025385618209838867, + "step": 14564 + }, + { + "epoch": 2.2222900390625e-05, + "step": 14564, + "training_step_time": 0.1085052490234375 + }, + { + "epoch": 2.222442626953125e-05, + "model_forward_time": 0.025194883346557617, + "step": 14565 + }, + { + "epoch": 2.222442626953125e-05, + "step": 14565, + "training_step_time": 0.10372090339660645 + }, + { + "epoch": 2.22259521484375e-05, + "model_forward_time": 0.025232553482055664, + "step": 14566 + }, + { + "epoch": 2.22259521484375e-05, + "step": 14566, + "training_step_time": 0.10683012008666992 + }, + { + "epoch": 2.222747802734375e-05, + "model_forward_time": 0.028279542922973633, + "step": 14567 + }, + { + "epoch": 2.222747802734375e-05, + "step": 14567, + "training_step_time": 0.10767650604248047 + }, + { + "epoch": 2.222900390625e-05, + "model_forward_time": 0.025249242782592773, + "step": 14568 + }, + { + "epoch": 2.222900390625e-05, + "step": 14568, + "training_step_time": 0.10807371139526367 + }, + { + "epoch": 2.223052978515625e-05, + "model_forward_time": 0.026694774627685547, + "step": 14569 + }, + { + "epoch": 2.223052978515625e-05, + "step": 14569, + "training_step_time": 0.10769462585449219 + }, + { + "epoch": 2.22320556640625e-05, + "grad_norm": 0.5669165253639221, + "learning_rate": 5.648532428148128e-05, + "loss": 0.0103, + "step": 14570 + }, + { + "epoch": 2.22320556640625e-05, + "model_forward_time": 0.025313615798950195, + "step": 14570 + }, + { + "epoch": 2.22320556640625e-05, + "step": 14570, + "training_step_time": 0.1088409423828125 + }, + { + "epoch": 2.223358154296875e-05, + "model_forward_time": 0.02517247200012207, + "step": 14571 + }, + { + "epoch": 2.223358154296875e-05, + "step": 14571, + "training_step_time": 0.10522890090942383 + }, + { + "epoch": 2.2235107421875e-05, + "model_forward_time": 0.02527761459350586, + "step": 14572 + }, + { + "epoch": 2.2235107421875e-05, + "step": 14572, + "training_step_time": 0.10531353950500488 + }, + { + "epoch": 2.223663330078125e-05, + "model_forward_time": 0.02524876594543457, + "step": 14573 + }, + { + "epoch": 2.223663330078125e-05, + "step": 14573, + "training_step_time": 0.10536575317382812 + }, + { + "epoch": 2.22381591796875e-05, + "model_forward_time": 0.025576353073120117, + "step": 14574 + }, + { + "epoch": 2.22381591796875e-05, + "step": 14574, + "training_step_time": 0.1059722900390625 + }, + { + "epoch": 2.223968505859375e-05, + "model_forward_time": 0.025562524795532227, + "step": 14575 + }, + { + "epoch": 2.223968505859375e-05, + "step": 14575, + "training_step_time": 0.10818719863891602 + }, + { + "epoch": 2.22412109375e-05, + "model_forward_time": 0.025771617889404297, + "step": 14576 + }, + { + "epoch": 2.22412109375e-05, + "step": 14576, + "training_step_time": 0.10642242431640625 + }, + { + "epoch": 2.224273681640625e-05, + "model_forward_time": 0.025417327880859375, + "step": 14577 + }, + { + "epoch": 2.224273681640625e-05, + "step": 14577, + "training_step_time": 0.1053764820098877 + }, + { + "epoch": 2.22442626953125e-05, + "model_forward_time": 0.025953292846679688, + "step": 14578 + }, + { + "epoch": 2.22442626953125e-05, + "step": 14578, + "training_step_time": 0.1078343391418457 + }, + { + "epoch": 2.224578857421875e-05, + "model_forward_time": 0.025502443313598633, + "step": 14579 + }, + { + "epoch": 2.224578857421875e-05, + "step": 14579, + "training_step_time": 0.10656881332397461 + }, + { + "epoch": 2.2247314453125e-05, + "grad_norm": 0.28606441617012024, + "learning_rate": 5.6430670285049314e-05, + "loss": 0.0208, + "step": 14580 + }, + { + "epoch": 2.2247314453125e-05, + "model_forward_time": 0.025617122650146484, + "step": 14580 + }, + { + "epoch": 2.2247314453125e-05, + "step": 14580, + "training_step_time": 0.10574054718017578 + }, + { + "epoch": 2.224884033203125e-05, + "model_forward_time": 0.026617765426635742, + "step": 14581 + }, + { + "epoch": 2.224884033203125e-05, + "step": 14581, + "training_step_time": 0.11324524879455566 + }, + { + "epoch": 2.22503662109375e-05, + "model_forward_time": 0.025490760803222656, + "step": 14582 + }, + { + "epoch": 2.22503662109375e-05, + "step": 14582, + "training_step_time": 0.10758352279663086 + }, + { + "epoch": 2.225189208984375e-05, + "model_forward_time": 0.025390148162841797, + "step": 14583 + }, + { + "epoch": 2.225189208984375e-05, + "step": 14583, + "training_step_time": 0.10762429237365723 + }, + { + "epoch": 2.225341796875e-05, + "model_forward_time": 0.028012514114379883, + "step": 14584 + }, + { + "epoch": 2.225341796875e-05, + "step": 14584, + "training_step_time": 0.10878801345825195 + }, + { + "epoch": 2.225494384765625e-05, + "model_forward_time": 0.02500605583190918, + "step": 14585 + }, + { + "epoch": 2.225494384765625e-05, + "step": 14585, + "training_step_time": 0.10852932929992676 + }, + { + "epoch": 2.22564697265625e-05, + "model_forward_time": 0.025295257568359375, + "step": 14586 + }, + { + "epoch": 2.22564697265625e-05, + "step": 14586, + "training_step_time": 0.19635224342346191 + }, + { + "epoch": 2.225799560546875e-05, + "model_forward_time": 0.024827241897583008, + "step": 14587 + }, + { + "epoch": 2.225799560546875e-05, + "step": 14587, + "training_step_time": 0.13767170906066895 + }, + { + "epoch": 2.2259521484375e-05, + "model_forward_time": 0.024292945861816406, + "step": 14588 + }, + { + "epoch": 2.2259521484375e-05, + "step": 14588, + "training_step_time": 0.10584115982055664 + }, + { + "epoch": 2.226104736328125e-05, + "model_forward_time": 0.025574445724487305, + "step": 14589 + }, + { + "epoch": 2.226104736328125e-05, + "step": 14589, + "training_step_time": 0.12027215957641602 + }, + { + "epoch": 2.22625732421875e-05, + "grad_norm": 0.2916117012500763, + "learning_rate": 5.637600847474656e-05, + "loss": 0.0259, + "step": 14590 + }, + { + "epoch": 2.22625732421875e-05, + "model_forward_time": 0.025522708892822266, + "step": 14590 + }, + { + "epoch": 2.22625732421875e-05, + "step": 14590, + "training_step_time": 0.14209604263305664 + }, + { + "epoch": 2.226409912109375e-05, + "model_forward_time": 0.024690628051757812, + "step": 14591 + }, + { + "epoch": 2.226409912109375e-05, + "step": 14591, + "training_step_time": 0.1726839542388916 + }, + { + "epoch": 2.2265625e-05, + "model_forward_time": 0.024991273880004883, + "step": 14592 + }, + { + "epoch": 2.2265625e-05, + "step": 14592, + "training_step_time": 0.1297597885131836 + }, + { + "epoch": 2.226715087890625e-05, + "model_forward_time": 0.024867534637451172, + "step": 14593 + }, + { + "epoch": 2.226715087890625e-05, + "step": 14593, + "training_step_time": 0.12020111083984375 + }, + { + "epoch": 2.22686767578125e-05, + "model_forward_time": 0.02412581443786621, + "step": 14594 + }, + { + "epoch": 2.22686767578125e-05, + "step": 14594, + "training_step_time": 0.13795804977416992 + }, + { + "epoch": 2.227020263671875e-05, + "model_forward_time": 0.02532672882080078, + "step": 14595 + }, + { + "epoch": 2.227020263671875e-05, + "step": 14595, + "training_step_time": 0.11986804008483887 + }, + { + "epoch": 2.2271728515625e-05, + "model_forward_time": 0.025309324264526367, + "step": 14596 + }, + { + "epoch": 2.2271728515625e-05, + "step": 14596, + "training_step_time": 0.21877098083496094 + }, + { + "epoch": 2.227325439453125e-05, + "model_forward_time": 0.024305343627929688, + "step": 14597 + }, + { + "epoch": 2.227325439453125e-05, + "step": 14597, + "training_step_time": 0.13095593452453613 + }, + { + "epoch": 2.22747802734375e-05, + "model_forward_time": 0.025554656982421875, + "step": 14598 + }, + { + "epoch": 2.22747802734375e-05, + "step": 14598, + "training_step_time": 0.1100459098815918 + }, + { + "epoch": 2.227630615234375e-05, + "model_forward_time": 0.025024890899658203, + "step": 14599 + }, + { + "epoch": 2.227630615234375e-05, + "step": 14599, + "training_step_time": 0.16653084754943848 + }, + { + "epoch": 2.227783203125e-05, + "grad_norm": 0.26374122500419617, + "learning_rate": 5.6321338916992315e-05, + "loss": 0.0183, + "step": 14600 + }, + { + "epoch": 2.227783203125e-05, + "model_forward_time": 0.025025367736816406, + "step": 14600 + }, + { + "epoch": 2.227783203125e-05, + "step": 14600, + "training_step_time": 0.1618502140045166 + }, + { + "epoch": 2.227935791015625e-05, + "model_forward_time": 0.024567127227783203, + "step": 14601 + }, + { + "epoch": 2.227935791015625e-05, + "step": 14601, + "training_step_time": 0.10683107376098633 + }, + { + "epoch": 2.22808837890625e-05, + "model_forward_time": 0.025127172470092773, + "step": 14602 + }, + { + "epoch": 2.22808837890625e-05, + "step": 14602, + "training_step_time": 0.13989615440368652 + }, + { + "epoch": 2.228240966796875e-05, + "model_forward_time": 0.025621652603149414, + "step": 14603 + }, + { + "epoch": 2.228240966796875e-05, + "step": 14603, + "training_step_time": 0.10675883293151855 + }, + { + "epoch": 2.2283935546875e-05, + "model_forward_time": 0.02579474449157715, + "step": 14604 + }, + { + "epoch": 2.2283935546875e-05, + "step": 14604, + "training_step_time": 0.1613454818725586 + }, + { + "epoch": 2.228546142578125e-05, + "model_forward_time": 0.02497124671936035, + "step": 14605 + }, + { + "epoch": 2.228546142578125e-05, + "step": 14605, + "training_step_time": 0.12167668342590332 + }, + { + "epoch": 2.22869873046875e-05, + "model_forward_time": 0.024556636810302734, + "step": 14606 + }, + { + "epoch": 2.22869873046875e-05, + "step": 14606, + "training_step_time": 0.12325596809387207 + }, + { + "epoch": 2.228851318359375e-05, + "model_forward_time": 0.025227069854736328, + "step": 14607 + }, + { + "epoch": 2.228851318359375e-05, + "step": 14607, + "training_step_time": 0.10447835922241211 + }, + { + "epoch": 2.22900390625e-05, + "model_forward_time": 0.025501012802124023, + "step": 14608 + }, + { + "epoch": 2.22900390625e-05, + "step": 14608, + "training_step_time": 0.15616369247436523 + }, + { + "epoch": 2.229156494140625e-05, + "model_forward_time": 0.02691817283630371, + "step": 14609 + }, + { + "epoch": 2.229156494140625e-05, + "step": 14609, + "training_step_time": 0.13019704818725586 + }, + { + "epoch": 2.22930908203125e-05, + "grad_norm": 0.22930274903774261, + "learning_rate": 5.6266661678215216e-05, + "loss": 0.0131, + "step": 14610 + }, + { + "epoch": 2.22930908203125e-05, + "model_forward_time": 0.02466869354248047, + "step": 14610 + }, + { + "epoch": 2.22930908203125e-05, + "step": 14610, + "training_step_time": 0.10407185554504395 + }, + { + "epoch": 2.229461669921875e-05, + "model_forward_time": 0.02530384063720703, + "step": 14611 + }, + { + "epoch": 2.229461669921875e-05, + "step": 14611, + "training_step_time": 0.10439634323120117 + }, + { + "epoch": 2.2296142578125e-05, + "model_forward_time": 0.0257565975189209, + "step": 14612 + }, + { + "epoch": 2.2296142578125e-05, + "step": 14612, + "training_step_time": 0.1071312427520752 + }, + { + "epoch": 2.229766845703125e-05, + "model_forward_time": 0.025597810745239258, + "step": 14613 + }, + { + "epoch": 2.229766845703125e-05, + "step": 14613, + "training_step_time": 0.1091303825378418 + }, + { + "epoch": 2.22991943359375e-05, + "model_forward_time": 0.025457382202148438, + "step": 14614 + }, + { + "epoch": 2.22991943359375e-05, + "step": 14614, + "training_step_time": 0.10671138763427734 + }, + { + "epoch": 2.230072021484375e-05, + "model_forward_time": 0.025119543075561523, + "step": 14615 + }, + { + "epoch": 2.230072021484375e-05, + "step": 14615, + "training_step_time": 0.10379862785339355 + }, + { + "epoch": 2.230224609375e-05, + "model_forward_time": 0.02516794204711914, + "step": 14616 + }, + { + "epoch": 2.230224609375e-05, + "step": 14616, + "training_step_time": 0.10806655883789062 + }, + { + "epoch": 2.230377197265625e-05, + "model_forward_time": 0.025177955627441406, + "step": 14617 + }, + { + "epoch": 2.230377197265625e-05, + "step": 14617, + "training_step_time": 0.10591745376586914 + }, + { + "epoch": 2.23052978515625e-05, + "model_forward_time": 0.025246620178222656, + "step": 14618 + }, + { + "epoch": 2.23052978515625e-05, + "step": 14618, + "training_step_time": 0.10493636131286621 + }, + { + "epoch": 2.230682373046875e-05, + "model_forward_time": 0.025217771530151367, + "step": 14619 + }, + { + "epoch": 2.230682373046875e-05, + "step": 14619, + "training_step_time": 0.1037290096282959 + }, + { + "epoch": 2.2308349609375e-05, + "grad_norm": 0.27034735679626465, + "learning_rate": 5.621197682485327e-05, + "loss": 0.0147, + "step": 14620 + }, + { + "epoch": 2.2308349609375e-05, + "model_forward_time": 0.025237321853637695, + "step": 14620 + }, + { + "epoch": 2.2308349609375e-05, + "step": 14620, + "training_step_time": 0.10933399200439453 + }, + { + "epoch": 2.230987548828125e-05, + "model_forward_time": 0.025169849395751953, + "step": 14621 + }, + { + "epoch": 2.230987548828125e-05, + "step": 14621, + "training_step_time": 0.10695219039916992 + }, + { + "epoch": 2.23114013671875e-05, + "model_forward_time": 0.02432990074157715, + "step": 14622 + }, + { + "epoch": 2.23114013671875e-05, + "step": 14622, + "training_step_time": 0.10440874099731445 + }, + { + "epoch": 2.231292724609375e-05, + "model_forward_time": 0.024834871292114258, + "step": 14623 + }, + { + "epoch": 2.231292724609375e-05, + "step": 14623, + "training_step_time": 0.10554957389831543 + }, + { + "epoch": 2.2314453125e-05, + "model_forward_time": 0.02553868293762207, + "step": 14624 + }, + { + "epoch": 2.2314453125e-05, + "step": 14624, + "training_step_time": 0.1040349006652832 + }, + { + "epoch": 2.231597900390625e-05, + "model_forward_time": 0.0252225399017334, + "step": 14625 + }, + { + "epoch": 2.231597900390625e-05, + "step": 14625, + "training_step_time": 0.10519552230834961 + }, + { + "epoch": 2.23175048828125e-05, + "model_forward_time": 0.02524709701538086, + "step": 14626 + }, + { + "epoch": 2.23175048828125e-05, + "step": 14626, + "training_step_time": 0.10590577125549316 + }, + { + "epoch": 2.231903076171875e-05, + "model_forward_time": 0.028827428817749023, + "step": 14627 + }, + { + "epoch": 2.231903076171875e-05, + "step": 14627, + "training_step_time": 0.17014288902282715 + }, + { + "epoch": 2.2320556640625e-05, + "model_forward_time": 0.02440166473388672, + "step": 14628 + }, + { + "epoch": 2.2320556640625e-05, + "step": 14628, + "training_step_time": 0.19220995903015137 + }, + { + "epoch": 2.232208251953125e-05, + "model_forward_time": 0.02406001091003418, + "step": 14629 + }, + { + "epoch": 2.232208251953125e-05, + "step": 14629, + "training_step_time": 0.18671131134033203 + }, + { + "epoch": 2.23236083984375e-05, + "grad_norm": 0.20472361147403717, + "learning_rate": 5.615728442335373e-05, + "loss": 0.0141, + "step": 14630 + }, + { + "epoch": 2.23236083984375e-05, + "model_forward_time": 0.023957252502441406, + "step": 14630 + }, + { + "epoch": 2.23236083984375e-05, + "step": 14630, + "training_step_time": 0.21802139282226562 + }, + { + "epoch": 2.232513427734375e-05, + "model_forward_time": 0.02455902099609375, + "step": 14631 + }, + { + "epoch": 2.232513427734375e-05, + "step": 14631, + "training_step_time": 0.16136908531188965 + }, + { + "epoch": 2.232666015625e-05, + "model_forward_time": 0.025538206100463867, + "step": 14632 + }, + { + "epoch": 2.232666015625e-05, + "step": 14632, + "training_step_time": 0.17586064338684082 + }, + { + "epoch": 2.232818603515625e-05, + "model_forward_time": 0.02447223663330078, + "step": 14633 + }, + { + "epoch": 2.232818603515625e-05, + "step": 14633, + "training_step_time": 0.1186366081237793 + }, + { + "epoch": 2.23297119140625e-05, + "model_forward_time": 0.030089855194091797, + "step": 14634 + }, + { + "epoch": 2.23297119140625e-05, + "step": 14634, + "training_step_time": 0.1081693172454834 + }, + { + "epoch": 2.233123779296875e-05, + "model_forward_time": 0.025067567825317383, + "step": 14635 + }, + { + "epoch": 2.233123779296875e-05, + "step": 14635, + "training_step_time": 0.19274067878723145 + }, + { + "epoch": 2.2332763671875e-05, + "model_forward_time": 0.02409505844116211, + "step": 14636 + }, + { + "epoch": 2.2332763671875e-05, + "step": 14636, + "training_step_time": 0.10451650619506836 + }, + { + "epoch": 2.233428955078125e-05, + "model_forward_time": 0.02441096305847168, + "step": 14637 + }, + { + "epoch": 2.233428955078125e-05, + "step": 14637, + "training_step_time": 0.10161113739013672 + }, + { + "epoch": 2.23358154296875e-05, + "model_forward_time": 0.025036334991455078, + "step": 14638 + }, + { + "epoch": 2.23358154296875e-05, + "step": 14638, + "training_step_time": 0.11412167549133301 + }, + { + "epoch": 2.233734130859375e-05, + "model_forward_time": 0.025182247161865234, + "step": 14639 + }, + { + "epoch": 2.233734130859375e-05, + "step": 14639, + "training_step_time": 0.10583949089050293 + }, + { + "epoch": 2.23388671875e-05, + "grad_norm": 0.513239324092865, + "learning_rate": 5.6102584540173006e-05, + "loss": 0.0139, + "step": 14640 + }, + { + "epoch": 2.23388671875e-05, + "model_forward_time": 0.025196075439453125, + "step": 14640 + }, + { + "epoch": 2.23388671875e-05, + "step": 14640, + "training_step_time": 0.10657310485839844 + }, + { + "epoch": 2.234039306640625e-05, + "model_forward_time": 0.025290727615356445, + "step": 14641 + }, + { + "epoch": 2.234039306640625e-05, + "step": 14641, + "training_step_time": 0.13177871704101562 + }, + { + "epoch": 2.23419189453125e-05, + "model_forward_time": 0.025245189666748047, + "step": 14642 + }, + { + "epoch": 2.23419189453125e-05, + "step": 14642, + "training_step_time": 0.12120795249938965 + }, + { + "epoch": 2.234344482421875e-05, + "model_forward_time": 0.0250394344329834, + "step": 14643 + }, + { + "epoch": 2.234344482421875e-05, + "step": 14643, + "training_step_time": 0.10208606719970703 + }, + { + "epoch": 2.2344970703125e-05, + "model_forward_time": 0.024508953094482422, + "step": 14644 + }, + { + "epoch": 2.2344970703125e-05, + "step": 14644, + "training_step_time": 0.1603856086730957 + }, + { + "epoch": 2.234649658203125e-05, + "model_forward_time": 0.024504423141479492, + "step": 14645 + }, + { + "epoch": 2.234649658203125e-05, + "step": 14645, + "training_step_time": 0.17503833770751953 + }, + { + "epoch": 2.23480224609375e-05, + "model_forward_time": 0.024655818939208984, + "step": 14646 + }, + { + "epoch": 2.23480224609375e-05, + "step": 14646, + "training_step_time": 0.12986087799072266 + }, + { + "epoch": 2.234954833984375e-05, + "model_forward_time": 0.024106740951538086, + "step": 14647 + }, + { + "epoch": 2.234954833984375e-05, + "step": 14647, + "training_step_time": 0.13033533096313477 + }, + { + "epoch": 2.235107421875e-05, + "model_forward_time": 0.026260852813720703, + "step": 14648 + }, + { + "epoch": 2.235107421875e-05, + "step": 14648, + "training_step_time": 0.20872092247009277 + }, + { + "epoch": 2.235260009765625e-05, + "model_forward_time": 0.024337291717529297, + "step": 14649 + }, + { + "epoch": 2.235260009765625e-05, + "step": 14649, + "training_step_time": 0.11283254623413086 + }, + { + "epoch": 2.23541259765625e-05, + "grad_norm": 0.22841264307498932, + "learning_rate": 5.604787724177666e-05, + "loss": 0.0152, + "step": 14650 + }, + { + "epoch": 2.23541259765625e-05, + "model_forward_time": 0.02446269989013672, + "step": 14650 + }, + { + "epoch": 2.23541259765625e-05, + "step": 14650, + "training_step_time": 0.12491631507873535 + }, + { + "epoch": 2.235565185546875e-05, + "model_forward_time": 0.02512836456298828, + "step": 14651 + }, + { + "epoch": 2.235565185546875e-05, + "step": 14651, + "training_step_time": 0.14040637016296387 + }, + { + "epoch": 2.2357177734375e-05, + "model_forward_time": 0.02445054054260254, + "step": 14652 + }, + { + "epoch": 2.2357177734375e-05, + "step": 14652, + "training_step_time": 0.11809396743774414 + }, + { + "epoch": 2.235870361328125e-05, + "model_forward_time": 0.024946928024291992, + "step": 14653 + }, + { + "epoch": 2.235870361328125e-05, + "step": 14653, + "training_step_time": 0.17241668701171875 + }, + { + "epoch": 2.23602294921875e-05, + "model_forward_time": 0.023530244827270508, + "step": 14654 + }, + { + "epoch": 2.23602294921875e-05, + "step": 14654, + "training_step_time": 0.17937898635864258 + }, + { + "epoch": 2.236175537109375e-05, + "model_forward_time": 0.023753881454467773, + "step": 14655 + }, + { + "epoch": 2.236175537109375e-05, + "step": 14655, + "training_step_time": 0.16491937637329102 + }, + { + "epoch": 2.236328125e-05, + "model_forward_time": 0.023511171340942383, + "step": 14656 + }, + { + "epoch": 2.236328125e-05, + "step": 14656, + "training_step_time": 0.1552143096923828 + }, + { + "epoch": 2.236480712890625e-05, + "model_forward_time": 0.023363351821899414, + "step": 14657 + }, + { + "epoch": 2.236480712890625e-05, + "step": 14657, + "training_step_time": 0.14308810234069824 + }, + { + "epoch": 2.23663330078125e-05, + "model_forward_time": 0.023589611053466797, + "step": 14658 + }, + { + "epoch": 2.23663330078125e-05, + "step": 14658, + "training_step_time": 0.12970399856567383 + }, + { + "epoch": 2.236785888671875e-05, + "model_forward_time": 0.023662328720092773, + "step": 14659 + }, + { + "epoch": 2.236785888671875e-05, + "step": 14659, + "training_step_time": 0.12605071067810059 + }, + { + "epoch": 2.2369384765625e-05, + "grad_norm": 0.15974995493888855, + "learning_rate": 5.599316259463916e-05, + "loss": 0.0136, + "step": 14660 + }, + { + "epoch": 2.2369384765625e-05, + "model_forward_time": 0.024079084396362305, + "step": 14660 + }, + { + "epoch": 2.2369384765625e-05, + "step": 14660, + "training_step_time": 0.12470412254333496 + }, + { + "epoch": 2.237091064453125e-05, + "model_forward_time": 0.024296998977661133, + "step": 14661 + }, + { + "epoch": 2.237091064453125e-05, + "step": 14661, + "training_step_time": 0.11998844146728516 + }, + { + "epoch": 2.23724365234375e-05, + "model_forward_time": 0.023944616317749023, + "step": 14662 + }, + { + "epoch": 2.23724365234375e-05, + "step": 14662, + "training_step_time": 0.11277937889099121 + }, + { + "epoch": 2.237396240234375e-05, + "model_forward_time": 0.02459716796875, + "step": 14663 + }, + { + "epoch": 2.237396240234375e-05, + "step": 14663, + "training_step_time": 0.10959362983703613 + }, + { + "epoch": 2.237548828125e-05, + "model_forward_time": 0.02400064468383789, + "step": 14664 + }, + { + "epoch": 2.237548828125e-05, + "step": 14664, + "training_step_time": 0.1081092357635498 + }, + { + "epoch": 2.237701416015625e-05, + "model_forward_time": 0.02496814727783203, + "step": 14665 + }, + { + "epoch": 2.237701416015625e-05, + "step": 14665, + "training_step_time": 0.11006307601928711 + }, + { + "epoch": 2.23785400390625e-05, + "model_forward_time": 0.024898767471313477, + "step": 14666 + }, + { + "epoch": 2.23785400390625e-05, + "step": 14666, + "training_step_time": 0.10990118980407715 + }, + { + "epoch": 2.238006591796875e-05, + "model_forward_time": 0.025090932846069336, + "step": 14667 + }, + { + "epoch": 2.238006591796875e-05, + "step": 14667, + "training_step_time": 0.10628461837768555 + }, + { + "epoch": 2.2381591796875e-05, + "model_forward_time": 0.025376558303833008, + "step": 14668 + }, + { + "epoch": 2.2381591796875e-05, + "step": 14668, + "training_step_time": 0.10677194595336914 + }, + { + "epoch": 2.238311767578125e-05, + "model_forward_time": 0.027096271514892578, + "step": 14669 + }, + { + "epoch": 2.238311767578125e-05, + "step": 14669, + "training_step_time": 0.11064720153808594 + }, + { + "epoch": 2.23846435546875e-05, + "grad_norm": 0.32507702708244324, + "learning_rate": 5.5938440665244006e-05, + "loss": 0.0198, + "step": 14670 + }, + { + "epoch": 2.23846435546875e-05, + "model_forward_time": 0.025203466415405273, + "step": 14670 + }, + { + "epoch": 2.23846435546875e-05, + "step": 14670, + "training_step_time": 0.11006307601928711 + }, + { + "epoch": 2.238616943359375e-05, + "model_forward_time": 0.025109529495239258, + "step": 14671 + }, + { + "epoch": 2.238616943359375e-05, + "step": 14671, + "training_step_time": 0.10503411293029785 + }, + { + "epoch": 2.23876953125e-05, + "model_forward_time": 0.025461912155151367, + "step": 14672 + }, + { + "epoch": 2.23876953125e-05, + "step": 14672, + "training_step_time": 0.10463738441467285 + }, + { + "epoch": 2.238922119140625e-05, + "model_forward_time": 0.025249719619750977, + "step": 14673 + }, + { + "epoch": 2.238922119140625e-05, + "step": 14673, + "training_step_time": 0.10505008697509766 + }, + { + "epoch": 2.23907470703125e-05, + "model_forward_time": 0.024866819381713867, + "step": 14674 + }, + { + "epoch": 2.23907470703125e-05, + "step": 14674, + "training_step_time": 0.11972832679748535 + }, + { + "epoch": 2.239227294921875e-05, + "model_forward_time": 0.02445387840270996, + "step": 14675 + }, + { + "epoch": 2.239227294921875e-05, + "step": 14675, + "training_step_time": 0.14861822128295898 + }, + { + "epoch": 2.2393798828125e-05, + "model_forward_time": 0.024769067764282227, + "step": 14676 + }, + { + "epoch": 2.2393798828125e-05, + "step": 14676, + "training_step_time": 0.10760045051574707 + }, + { + "epoch": 2.239532470703125e-05, + "model_forward_time": 0.024705171585083008, + "step": 14677 + }, + { + "epoch": 2.239532470703125e-05, + "step": 14677, + "training_step_time": 0.11992812156677246 + }, + { + "epoch": 2.23968505859375e-05, + "model_forward_time": 0.0251309871673584, + "step": 14678 + }, + { + "epoch": 2.23968505859375e-05, + "step": 14678, + "training_step_time": 0.11035561561584473 + }, + { + "epoch": 2.239837646484375e-05, + "model_forward_time": 0.02546548843383789, + "step": 14679 + }, + { + "epoch": 2.239837646484375e-05, + "step": 14679, + "training_step_time": 0.15317606925964355 + }, + { + "epoch": 2.239990234375e-05, + "grad_norm": 0.26223987340927124, + "learning_rate": 5.588371152008349e-05, + "loss": 0.0133, + "step": 14680 + }, + { + "epoch": 2.239990234375e-05, + "model_forward_time": 0.024795055389404297, + "step": 14680 + }, + { + "epoch": 2.239990234375e-05, + "step": 14680, + "training_step_time": 0.1347506046295166 + }, + { + "epoch": 2.240142822265625e-05, + "model_forward_time": 0.02449512481689453, + "step": 14681 + }, + { + "epoch": 2.240142822265625e-05, + "step": 14681, + "training_step_time": 0.10308504104614258 + }, + { + "epoch": 2.24029541015625e-05, + "model_forward_time": 0.02513909339904785, + "step": 14682 + }, + { + "epoch": 2.24029541015625e-05, + "step": 14682, + "training_step_time": 0.11237502098083496 + }, + { + "epoch": 2.240447998046875e-05, + "model_forward_time": 0.02517223358154297, + "step": 14683 + }, + { + "epoch": 2.240447998046875e-05, + "step": 14683, + "training_step_time": 0.11715102195739746 + }, + { + "epoch": 2.2406005859375e-05, + "model_forward_time": 0.025468111038208008, + "step": 14684 + }, + { + "epoch": 2.2406005859375e-05, + "step": 14684, + "training_step_time": 0.10817742347717285 + }, + { + "epoch": 2.240753173828125e-05, + "model_forward_time": 0.026155471801757812, + "step": 14685 + }, + { + "epoch": 2.240753173828125e-05, + "step": 14685, + "training_step_time": 0.10857129096984863 + }, + { + "epoch": 2.24090576171875e-05, + "model_forward_time": 0.02526569366455078, + "step": 14686 + }, + { + "epoch": 2.24090576171875e-05, + "step": 14686, + "training_step_time": 0.17660069465637207 + }, + { + "epoch": 2.241058349609375e-05, + "model_forward_time": 0.024334430694580078, + "step": 14687 + }, + { + "epoch": 2.241058349609375e-05, + "step": 14687, + "training_step_time": 0.10007476806640625 + }, + { + "epoch": 2.2412109375e-05, + "model_forward_time": 0.024341344833374023, + "step": 14688 + }, + { + "epoch": 2.2412109375e-05, + "step": 14688, + "training_step_time": 0.15075278282165527 + }, + { + "epoch": 2.241363525390625e-05, + "model_forward_time": 0.02454066276550293, + "step": 14689 + }, + { + "epoch": 2.241363525390625e-05, + "step": 14689, + "training_step_time": 0.16780376434326172 + }, + { + "epoch": 2.24151611328125e-05, + "grad_norm": 0.2609595060348511, + "learning_rate": 5.5828975225658666e-05, + "loss": 0.0179, + "step": 14690 + }, + { + "epoch": 2.24151611328125e-05, + "model_forward_time": 0.024750947952270508, + "step": 14690 + }, + { + "epoch": 2.24151611328125e-05, + "step": 14690, + "training_step_time": 0.10785245895385742 + }, + { + "epoch": 2.241668701171875e-05, + "model_forward_time": 0.02433323860168457, + "step": 14691 + }, + { + "epoch": 2.241668701171875e-05, + "step": 14691, + "training_step_time": 0.13776803016662598 + }, + { + "epoch": 2.2418212890625e-05, + "model_forward_time": 0.025391101837158203, + "step": 14692 + }, + { + "epoch": 2.2418212890625e-05, + "step": 14692, + "training_step_time": 0.19810724258422852 + }, + { + "epoch": 2.241973876953125e-05, + "model_forward_time": 0.024683713912963867, + "step": 14693 + }, + { + "epoch": 2.241973876953125e-05, + "step": 14693, + "training_step_time": 0.1101081371307373 + }, + { + "epoch": 2.24212646484375e-05, + "model_forward_time": 0.02480912208557129, + "step": 14694 + }, + { + "epoch": 2.24212646484375e-05, + "step": 14694, + "training_step_time": 0.13306260108947754 + }, + { + "epoch": 2.242279052734375e-05, + "model_forward_time": 0.025397777557373047, + "step": 14695 + }, + { + "epoch": 2.242279052734375e-05, + "step": 14695, + "training_step_time": 0.1423022747039795 + }, + { + "epoch": 2.242431640625e-05, + "model_forward_time": 0.024962663650512695, + "step": 14696 + }, + { + "epoch": 2.242431640625e-05, + "step": 14696, + "training_step_time": 0.11060333251953125 + }, + { + "epoch": 2.242584228515625e-05, + "model_forward_time": 0.02542400360107422, + "step": 14697 + }, + { + "epoch": 2.242584228515625e-05, + "step": 14697, + "training_step_time": 0.13126492500305176 + }, + { + "epoch": 2.24273681640625e-05, + "model_forward_time": 0.02873969078063965, + "step": 14698 + }, + { + "epoch": 2.24273681640625e-05, + "step": 14698, + "training_step_time": 0.10872316360473633 + }, + { + "epoch": 2.242889404296875e-05, + "model_forward_time": 0.02545619010925293, + "step": 14699 + }, + { + "epoch": 2.242889404296875e-05, + "step": 14699, + "training_step_time": 0.10780119895935059 + }, + { + "epoch": 2.2430419921875e-05, + "grad_norm": 0.3035556375980377, + "learning_rate": 5.577423184847932e-05, + "loss": 0.017, + "step": 14700 + }, + { + "epoch": 2.2430419921875e-05, + "model_forward_time": 0.02522873878479004, + "step": 14700 + }, + { + "epoch": 2.2430419921875e-05, + "step": 14700, + "training_step_time": 0.10416412353515625 + }, + { + "epoch": 2.243194580078125e-05, + "model_forward_time": 0.024952173233032227, + "step": 14701 + }, + { + "epoch": 2.243194580078125e-05, + "step": 14701, + "training_step_time": 0.12407088279724121 + }, + { + "epoch": 2.24334716796875e-05, + "model_forward_time": 0.024108409881591797, + "step": 14702 + }, + { + "epoch": 2.24334716796875e-05, + "step": 14702, + "training_step_time": 0.13840937614440918 + }, + { + "epoch": 2.243499755859375e-05, + "model_forward_time": 0.02394247055053711, + "step": 14703 + }, + { + "epoch": 2.243499755859375e-05, + "step": 14703, + "training_step_time": 0.13051557540893555 + }, + { + "epoch": 2.24365234375e-05, + "model_forward_time": 0.023429393768310547, + "step": 14704 + }, + { + "epoch": 2.24365234375e-05, + "step": 14704, + "training_step_time": 0.12807250022888184 + }, + { + "epoch": 2.243804931640625e-05, + "model_forward_time": 0.023743152618408203, + "step": 14705 + }, + { + "epoch": 2.243804931640625e-05, + "step": 14705, + "training_step_time": 0.12237143516540527 + }, + { + "epoch": 2.24395751953125e-05, + "model_forward_time": 0.024407386779785156, + "step": 14706 + }, + { + "epoch": 2.24395751953125e-05, + "step": 14706, + "training_step_time": 0.1209573745727539 + }, + { + "epoch": 2.244110107421875e-05, + "model_forward_time": 0.023970842361450195, + "step": 14707 + }, + { + "epoch": 2.244110107421875e-05, + "step": 14707, + "training_step_time": 0.11579775810241699 + }, + { + "epoch": 2.2442626953125e-05, + "model_forward_time": 0.025153636932373047, + "step": 14708 + }, + { + "epoch": 2.2442626953125e-05, + "step": 14708, + "training_step_time": 0.11315512657165527 + }, + { + "epoch": 2.244415283203125e-05, + "model_forward_time": 0.025513648986816406, + "step": 14709 + }, + { + "epoch": 2.244415283203125e-05, + "step": 14709, + "training_step_time": 0.11139297485351562 + }, + { + "epoch": 2.24456787109375e-05, + "grad_norm": 0.29737013578414917, + "learning_rate": 5.5719481455063784e-05, + "loss": 0.0198, + "step": 14710 + }, + { + "epoch": 2.24456787109375e-05, + "model_forward_time": 0.02487945556640625, + "step": 14710 + }, + { + "epoch": 2.24456787109375e-05, + "step": 14710, + "training_step_time": 0.10619688034057617 + }, + { + "epoch": 2.244720458984375e-05, + "model_forward_time": 0.0250244140625, + "step": 14711 + }, + { + "epoch": 2.244720458984375e-05, + "step": 14711, + "training_step_time": 0.10650157928466797 + }, + { + "epoch": 2.244873046875e-05, + "model_forward_time": 0.025164365768432617, + "step": 14712 + }, + { + "epoch": 2.244873046875e-05, + "step": 14712, + "training_step_time": 0.10634756088256836 + }, + { + "epoch": 2.245025634765625e-05, + "model_forward_time": 0.025141000747680664, + "step": 14713 + }, + { + "epoch": 2.245025634765625e-05, + "step": 14713, + "training_step_time": 0.10593199729919434 + }, + { + "epoch": 2.24517822265625e-05, + "model_forward_time": 0.026380538940429688, + "step": 14714 + }, + { + "epoch": 2.24517822265625e-05, + "step": 14714, + "training_step_time": 0.10787773132324219 + }, + { + "epoch": 2.245330810546875e-05, + "model_forward_time": 0.02494955062866211, + "step": 14715 + }, + { + "epoch": 2.245330810546875e-05, + "step": 14715, + "training_step_time": 0.10883569717407227 + }, + { + "epoch": 2.2454833984375e-05, + "model_forward_time": 0.0250546932220459, + "step": 14716 + }, + { + "epoch": 2.2454833984375e-05, + "step": 14716, + "training_step_time": 0.10652041435241699 + }, + { + "epoch": 2.245635986328125e-05, + "model_forward_time": 0.02526068687438965, + "step": 14717 + }, + { + "epoch": 2.245635986328125e-05, + "step": 14717, + "training_step_time": 0.10683727264404297 + }, + { + "epoch": 2.24578857421875e-05, + "model_forward_time": 0.025353193283081055, + "step": 14718 + }, + { + "epoch": 2.24578857421875e-05, + "step": 14718, + "training_step_time": 0.10577178001403809 + }, + { + "epoch": 2.245941162109375e-05, + "model_forward_time": 0.025292634963989258, + "step": 14719 + }, + { + "epoch": 2.245941162109375e-05, + "step": 14719, + "training_step_time": 0.18582653999328613 + }, + { + "epoch": 2.24609375e-05, + "grad_norm": 0.3045312762260437, + "learning_rate": 5.566472411193897e-05, + "loss": 0.0192, + "step": 14720 + }, + { + "epoch": 2.24609375e-05, + "model_forward_time": 0.024792194366455078, + "step": 14720 + }, + { + "epoch": 2.24609375e-05, + "step": 14720, + "training_step_time": 0.13614845275878906 + }, + { + "epoch": 2.246246337890625e-05, + "model_forward_time": 0.024472713470458984, + "step": 14721 + }, + { + "epoch": 2.246246337890625e-05, + "step": 14721, + "training_step_time": 0.10914134979248047 + }, + { + "epoch": 2.24639892578125e-05, + "model_forward_time": 0.02533555030822754, + "step": 14722 + }, + { + "epoch": 2.24639892578125e-05, + "step": 14722, + "training_step_time": 0.10874700546264648 + }, + { + "epoch": 2.246551513671875e-05, + "model_forward_time": 0.0251467227935791, + "step": 14723 + }, + { + "epoch": 2.246551513671875e-05, + "step": 14723, + "training_step_time": 0.16874957084655762 + }, + { + "epoch": 2.2467041015625e-05, + "model_forward_time": 0.02476358413696289, + "step": 14724 + }, + { + "epoch": 2.2467041015625e-05, + "step": 14724, + "training_step_time": 0.10590100288391113 + }, + { + "epoch": 2.246856689453125e-05, + "model_forward_time": 0.02458810806274414, + "step": 14725 + }, + { + "epoch": 2.246856689453125e-05, + "step": 14725, + "training_step_time": 0.1580822467803955 + }, + { + "epoch": 2.24700927734375e-05, + "model_forward_time": 0.02401137351989746, + "step": 14726 + }, + { + "epoch": 2.24700927734375e-05, + "step": 14726, + "training_step_time": 0.10606908798217773 + }, + { + "epoch": 2.247161865234375e-05, + "model_forward_time": 0.02607560157775879, + "step": 14727 + }, + { + "epoch": 2.247161865234375e-05, + "step": 14727, + "training_step_time": 0.10567808151245117 + }, + { + "epoch": 2.247314453125e-05, + "model_forward_time": 0.025455713272094727, + "step": 14728 + }, + { + "epoch": 2.247314453125e-05, + "step": 14728, + "training_step_time": 0.10681891441345215 + }, + { + "epoch": 2.247467041015625e-05, + "model_forward_time": 0.025736093521118164, + "step": 14729 + }, + { + "epoch": 2.247467041015625e-05, + "step": 14729, + "training_step_time": 0.1573047637939453 + }, + { + "epoch": 2.24761962890625e-05, + "grad_norm": 0.2626732587814331, + "learning_rate": 5.560995988564023e-05, + "loss": 0.0196, + "step": 14730 + }, + { + "epoch": 2.24761962890625e-05, + "model_forward_time": 0.024536609649658203, + "step": 14730 + }, + { + "epoch": 2.24761962890625e-05, + "step": 14730, + "training_step_time": 0.10517406463623047 + }, + { + "epoch": 2.247772216796875e-05, + "model_forward_time": 0.025447368621826172, + "step": 14731 + }, + { + "epoch": 2.247772216796875e-05, + "step": 14731, + "training_step_time": 0.10872721672058105 + }, + { + "epoch": 2.2479248046875e-05, + "model_forward_time": 0.02533745765686035, + "step": 14732 + }, + { + "epoch": 2.2479248046875e-05, + "step": 14732, + "training_step_time": 0.12754130363464355 + }, + { + "epoch": 2.248077392578125e-05, + "model_forward_time": 0.025287389755249023, + "step": 14733 + }, + { + "epoch": 2.248077392578125e-05, + "step": 14733, + "training_step_time": 0.1068577766418457 + }, + { + "epoch": 2.24822998046875e-05, + "model_forward_time": 0.02437877655029297, + "step": 14734 + }, + { + "epoch": 2.24822998046875e-05, + "step": 14734, + "training_step_time": 0.17467927932739258 + }, + { + "epoch": 2.248382568359375e-05, + "model_forward_time": 0.02448582649230957, + "step": 14735 + }, + { + "epoch": 2.248382568359375e-05, + "step": 14735, + "training_step_time": 0.1784205436706543 + }, + { + "epoch": 2.24853515625e-05, + "model_forward_time": 0.027005672454833984, + "step": 14736 + }, + { + "epoch": 2.24853515625e-05, + "step": 14736, + "training_step_time": 0.1210944652557373 + }, + { + "epoch": 2.248687744140625e-05, + "model_forward_time": 0.024851322174072266, + "step": 14737 + }, + { + "epoch": 2.248687744140625e-05, + "step": 14737, + "training_step_time": 0.2062077522277832 + }, + { + "epoch": 2.24884033203125e-05, + "model_forward_time": 0.024229764938354492, + "step": 14738 + }, + { + "epoch": 2.24884033203125e-05, + "step": 14738, + "training_step_time": 0.1498866081237793 + }, + { + "epoch": 2.248992919921875e-05, + "model_forward_time": 0.024547100067138672, + "step": 14739 + }, + { + "epoch": 2.248992919921875e-05, + "step": 14739, + "training_step_time": 0.10862874984741211 + }, + { + "epoch": 2.2491455078125e-05, + "grad_norm": 0.22782595455646515, + "learning_rate": 5.555518884271122e-05, + "loss": 0.0222, + "step": 14740 + }, + { + "epoch": 2.2491455078125e-05, + "model_forward_time": 0.024859905242919922, + "step": 14740 + }, + { + "epoch": 2.2491455078125e-05, + "step": 14740, + "training_step_time": 0.15157628059387207 + }, + { + "epoch": 2.249298095703125e-05, + "model_forward_time": 0.02439594268798828, + "step": 14741 + }, + { + "epoch": 2.249298095703125e-05, + "step": 14741, + "training_step_time": 0.2140343189239502 + }, + { + "epoch": 2.24945068359375e-05, + "model_forward_time": 0.024020910263061523, + "step": 14742 + }, + { + "epoch": 2.24945068359375e-05, + "step": 14742, + "training_step_time": 0.12701630592346191 + }, + { + "epoch": 2.249603271484375e-05, + "model_forward_time": 0.024662494659423828, + "step": 14743 + }, + { + "epoch": 2.249603271484375e-05, + "step": 14743, + "training_step_time": 0.10414314270019531 + }, + { + "epoch": 2.249755859375e-05, + "model_forward_time": 0.024993181228637695, + "step": 14744 + }, + { + "epoch": 2.249755859375e-05, + "step": 14744, + "training_step_time": 0.1050117015838623 + }, + { + "epoch": 2.249908447265625e-05, + "model_forward_time": 0.02480626106262207, + "step": 14745 + }, + { + "epoch": 2.249908447265625e-05, + "step": 14745, + "training_step_time": 0.1041867733001709 + }, + { + "epoch": 2.25006103515625e-05, + "model_forward_time": 0.025339841842651367, + "step": 14746 + }, + { + "epoch": 2.25006103515625e-05, + "step": 14746, + "training_step_time": 0.10600638389587402 + }, + { + "epoch": 2.250213623046875e-05, + "model_forward_time": 0.027340412139892578, + "step": 14747 + }, + { + "epoch": 2.250213623046875e-05, + "step": 14747, + "training_step_time": 0.10669994354248047 + }, + { + "epoch": 2.2503662109375e-05, + "model_forward_time": 0.025320768356323242, + "step": 14748 + }, + { + "epoch": 2.2503662109375e-05, + "step": 14748, + "training_step_time": 0.1030266284942627 + }, + { + "epoch": 2.250518798828125e-05, + "model_forward_time": 0.0251922607421875, + "step": 14749 + }, + { + "epoch": 2.250518798828125e-05, + "step": 14749, + "training_step_time": 0.10486316680908203 + }, + { + "epoch": 2.25067138671875e-05, + "grad_norm": 0.1958162486553192, + "learning_rate": 5.550041104970397e-05, + "loss": 0.0158, + "step": 14750 + }, + { + "epoch": 2.25067138671875e-05, + "model_forward_time": 0.025376081466674805, + "step": 14750 + }, + { + "epoch": 2.25067138671875e-05, + "step": 14750, + "training_step_time": 0.10457491874694824 + }, + { + "epoch": 2.250823974609375e-05, + "model_forward_time": 0.02504873275756836, + "step": 14751 + }, + { + "epoch": 2.250823974609375e-05, + "step": 14751, + "training_step_time": 0.10509443283081055 + }, + { + "epoch": 2.2509765625e-05, + "model_forward_time": 0.02506089210510254, + "step": 14752 + }, + { + "epoch": 2.2509765625e-05, + "step": 14752, + "training_step_time": 0.1042473316192627 + }, + { + "epoch": 2.251129150390625e-05, + "model_forward_time": 0.02558755874633789, + "step": 14753 + }, + { + "epoch": 2.251129150390625e-05, + "step": 14753, + "training_step_time": 0.1072998046875 + }, + { + "epoch": 2.25128173828125e-05, + "model_forward_time": 0.025144577026367188, + "step": 14754 + }, + { + "epoch": 2.25128173828125e-05, + "step": 14754, + "training_step_time": 0.10531210899353027 + }, + { + "epoch": 2.251434326171875e-05, + "model_forward_time": 0.025300025939941406, + "step": 14755 + }, + { + "epoch": 2.251434326171875e-05, + "step": 14755, + "training_step_time": 0.10615301132202148 + }, + { + "epoch": 2.2515869140625e-05, + "model_forward_time": 0.02512192726135254, + "step": 14756 + }, + { + "epoch": 2.2515869140625e-05, + "step": 14756, + "training_step_time": 0.10626769065856934 + }, + { + "epoch": 2.251739501953125e-05, + "model_forward_time": 0.02468705177307129, + "step": 14757 + }, + { + "epoch": 2.251739501953125e-05, + "step": 14757, + "training_step_time": 0.10579586029052734 + }, + { + "epoch": 2.25189208984375e-05, + "model_forward_time": 0.025091171264648438, + "step": 14758 + }, + { + "epoch": 2.25189208984375e-05, + "step": 14758, + "training_step_time": 0.18588662147521973 + }, + { + "epoch": 2.252044677734375e-05, + "model_forward_time": 0.024283885955810547, + "step": 14759 + }, + { + "epoch": 2.252044677734375e-05, + "step": 14759, + "training_step_time": 0.2110607624053955 + }, + { + "epoch": 2.252197265625e-05, + "grad_norm": 0.21639585494995117, + "learning_rate": 5.544562657317863e-05, + "loss": 0.0191, + "step": 14760 + }, + { + "epoch": 2.252197265625e-05, + "model_forward_time": 0.024493694305419922, + "step": 14760 + }, + { + "epoch": 2.252197265625e-05, + "step": 14760, + "training_step_time": 0.2008209228515625 + }, + { + "epoch": 2.252349853515625e-05, + "model_forward_time": 0.025089025497436523, + "step": 14761 + }, + { + "epoch": 2.252349853515625e-05, + "step": 14761, + "training_step_time": 0.20092225074768066 + }, + { + "epoch": 2.25250244140625e-05, + "model_forward_time": 0.024183988571166992, + "step": 14762 + }, + { + "epoch": 2.25250244140625e-05, + "step": 14762, + "training_step_time": 0.20627498626708984 + }, + { + "epoch": 2.252655029296875e-05, + "model_forward_time": 0.027688980102539062, + "step": 14763 + }, + { + "epoch": 2.252655029296875e-05, + "step": 14763, + "training_step_time": 0.16882729530334473 + }, + { + "epoch": 2.2528076171875e-05, + "model_forward_time": 0.02386188507080078, + "step": 14764 + }, + { + "epoch": 2.2528076171875e-05, + "step": 14764, + "training_step_time": 0.18313860893249512 + }, + { + "epoch": 2.252960205078125e-05, + "model_forward_time": 0.02484273910522461, + "step": 14765 + }, + { + "epoch": 2.252960205078125e-05, + "step": 14765, + "training_step_time": 0.1572110652923584 + }, + { + "epoch": 2.25311279296875e-05, + "model_forward_time": 0.023606538772583008, + "step": 14766 + }, + { + "epoch": 2.25311279296875e-05, + "step": 14766, + "training_step_time": 0.15790176391601562 + }, + { + "epoch": 2.253265380859375e-05, + "model_forward_time": 0.023847103118896484, + "step": 14767 + }, + { + "epoch": 2.253265380859375e-05, + "step": 14767, + "training_step_time": 0.12670350074768066 + }, + { + "epoch": 2.25341796875e-05, + "model_forward_time": 0.02452850341796875, + "step": 14768 + }, + { + "epoch": 2.25341796875e-05, + "step": 14768, + "training_step_time": 0.12925505638122559 + }, + { + "epoch": 2.253570556640625e-05, + "model_forward_time": 0.024738788604736328, + "step": 14769 + }, + { + "epoch": 2.253570556640625e-05, + "step": 14769, + "training_step_time": 0.10661530494689941 + }, + { + "epoch": 2.25372314453125e-05, + "grad_norm": 0.3235095143318176, + "learning_rate": 5.539083547970352e-05, + "loss": 0.0166, + "step": 14770 + }, + { + "epoch": 2.25372314453125e-05, + "model_forward_time": 0.025157928466796875, + "step": 14770 + }, + { + "epoch": 2.25372314453125e-05, + "step": 14770, + "training_step_time": 0.1070404052734375 + }, + { + "epoch": 2.253875732421875e-05, + "model_forward_time": 0.024747610092163086, + "step": 14771 + }, + { + "epoch": 2.253875732421875e-05, + "step": 14771, + "training_step_time": 0.10478854179382324 + }, + { + "epoch": 2.2540283203125e-05, + "model_forward_time": 0.024283885955810547, + "step": 14772 + }, + { + "epoch": 2.2540283203125e-05, + "step": 14772, + "training_step_time": 0.10873150825500488 + }, + { + "epoch": 2.254180908203125e-05, + "model_forward_time": 0.024384737014770508, + "step": 14773 + }, + { + "epoch": 2.254180908203125e-05, + "step": 14773, + "training_step_time": 0.11200404167175293 + }, + { + "epoch": 2.25433349609375e-05, + "model_forward_time": 0.023879289627075195, + "step": 14774 + }, + { + "epoch": 2.25433349609375e-05, + "step": 14774, + "training_step_time": 0.12871336936950684 + }, + { + "epoch": 2.254486083984375e-05, + "model_forward_time": 0.024820327758789062, + "step": 14775 + }, + { + "epoch": 2.254486083984375e-05, + "step": 14775, + "training_step_time": 0.10478377342224121 + }, + { + "epoch": 2.254638671875e-05, + "model_forward_time": 0.02547454833984375, + "step": 14776 + }, + { + "epoch": 2.254638671875e-05, + "step": 14776, + "training_step_time": 0.15362191200256348 + }, + { + "epoch": 2.254791259765625e-05, + "model_forward_time": 0.025838136672973633, + "step": 14777 + }, + { + "epoch": 2.254791259765625e-05, + "step": 14777, + "training_step_time": 0.18442440032958984 + }, + { + "epoch": 2.25494384765625e-05, + "model_forward_time": 0.024869441986083984, + "step": 14778 + }, + { + "epoch": 2.25494384765625e-05, + "step": 14778, + "training_step_time": 0.10862445831298828 + }, + { + "epoch": 2.255096435546875e-05, + "model_forward_time": 0.02480030059814453, + "step": 14779 + }, + { + "epoch": 2.255096435546875e-05, + "step": 14779, + "training_step_time": 0.17042946815490723 + }, + { + "epoch": 2.2552490234375e-05, + "grad_norm": 0.37016212940216064, + "learning_rate": 5.533603783585496e-05, + "loss": 0.0143, + "step": 14780 + }, + { + "epoch": 2.2552490234375e-05, + "model_forward_time": 0.024486780166625977, + "step": 14780 + }, + { + "epoch": 2.2552490234375e-05, + "step": 14780, + "training_step_time": 0.22100114822387695 + }, + { + "epoch": 2.255401611328125e-05, + "model_forward_time": 0.024599790573120117, + "step": 14781 + }, + { + "epoch": 2.255401611328125e-05, + "step": 14781, + "training_step_time": 0.13791441917419434 + }, + { + "epoch": 2.25555419921875e-05, + "model_forward_time": 0.025160551071166992, + "step": 14782 + }, + { + "epoch": 2.25555419921875e-05, + "step": 14782, + "training_step_time": 0.11895251274108887 + }, + { + "epoch": 2.255706787109375e-05, + "model_forward_time": 0.025170326232910156, + "step": 14783 + }, + { + "epoch": 2.255706787109375e-05, + "step": 14783, + "training_step_time": 0.12685704231262207 + }, + { + "epoch": 2.255859375e-05, + "model_forward_time": 0.025570392608642578, + "step": 14784 + }, + { + "epoch": 2.255859375e-05, + "step": 14784, + "training_step_time": 0.10845470428466797 + }, + { + "epoch": 2.256011962890625e-05, + "model_forward_time": 0.025302410125732422, + "step": 14785 + }, + { + "epoch": 2.256011962890625e-05, + "step": 14785, + "training_step_time": 0.10517096519470215 + }, + { + "epoch": 2.25616455078125e-05, + "model_forward_time": 0.02496933937072754, + "step": 14786 + }, + { + "epoch": 2.25616455078125e-05, + "step": 14786, + "training_step_time": 0.10517144203186035 + }, + { + "epoch": 2.256317138671875e-05, + "model_forward_time": 0.024996519088745117, + "step": 14787 + }, + { + "epoch": 2.256317138671875e-05, + "step": 14787, + "training_step_time": 0.10824942588806152 + }, + { + "epoch": 2.2564697265625e-05, + "model_forward_time": 0.02556014060974121, + "step": 14788 + }, + { + "epoch": 2.2564697265625e-05, + "step": 14788, + "training_step_time": 0.10934758186340332 + }, + { + "epoch": 2.256622314453125e-05, + "model_forward_time": 0.025942564010620117, + "step": 14789 + }, + { + "epoch": 2.256622314453125e-05, + "step": 14789, + "training_step_time": 0.10755753517150879 + }, + { + "epoch": 2.25677490234375e-05, + "grad_norm": 0.2053672969341278, + "learning_rate": 5.52812337082173e-05, + "loss": 0.009, + "step": 14790 + }, + { + "epoch": 2.25677490234375e-05, + "model_forward_time": 0.0259702205657959, + "step": 14790 + }, + { + "epoch": 2.25677490234375e-05, + "step": 14790, + "training_step_time": 0.10682296752929688 + }, + { + "epoch": 2.256927490234375e-05, + "model_forward_time": 0.02599930763244629, + "step": 14791 + }, + { + "epoch": 2.256927490234375e-05, + "step": 14791, + "training_step_time": 0.10543608665466309 + }, + { + "epoch": 2.257080078125e-05, + "model_forward_time": 0.02496790885925293, + "step": 14792 + }, + { + "epoch": 2.257080078125e-05, + "step": 14792, + "training_step_time": 0.10713768005371094 + }, + { + "epoch": 2.257232666015625e-05, + "model_forward_time": 0.025805950164794922, + "step": 14793 + }, + { + "epoch": 2.257232666015625e-05, + "step": 14793, + "training_step_time": 0.10598206520080566 + }, + { + "epoch": 2.25738525390625e-05, + "model_forward_time": 0.02528858184814453, + "step": 14794 + }, + { + "epoch": 2.25738525390625e-05, + "step": 14794, + "training_step_time": 0.11873292922973633 + }, + { + "epoch": 2.257537841796875e-05, + "model_forward_time": 0.024210453033447266, + "step": 14795 + }, + { + "epoch": 2.257537841796875e-05, + "step": 14795, + "training_step_time": 0.10839581489562988 + }, + { + "epoch": 2.2576904296875e-05, + "model_forward_time": 0.025251388549804688, + "step": 14796 + }, + { + "epoch": 2.2576904296875e-05, + "step": 14796, + "training_step_time": 0.10885143280029297 + }, + { + "epoch": 2.257843017578125e-05, + "model_forward_time": 0.026015043258666992, + "step": 14797 + }, + { + "epoch": 2.257843017578125e-05, + "step": 14797, + "training_step_time": 0.10935354232788086 + }, + { + "epoch": 2.25799560546875e-05, + "model_forward_time": 0.025206565856933594, + "step": 14798 + }, + { + "epoch": 2.25799560546875e-05, + "step": 14798, + "training_step_time": 0.10524392127990723 + }, + { + "epoch": 2.258148193359375e-05, + "model_forward_time": 0.02510666847229004, + "step": 14799 + }, + { + "epoch": 2.258148193359375e-05, + "step": 14799, + "training_step_time": 0.11235666275024414 + }, + { + "epoch": 2.25830078125e-05, + "grad_norm": 0.4688062071800232, + "learning_rate": 5.522642316338268e-05, + "loss": 0.0181, + "step": 14800 + }, + { + "epoch": 2.25830078125e-05, + "model_forward_time": 0.02641916275024414, + "step": 14800 + }, + { + "epoch": 2.25830078125e-05, + "step": 14800, + "training_step_time": 0.11036992073059082 + }, + { + "epoch": 2.258453369140625e-05, + "model_forward_time": 0.02584528923034668, + "step": 14801 + }, + { + "epoch": 2.258453369140625e-05, + "step": 14801, + "training_step_time": 0.10625886917114258 + }, + { + "epoch": 2.25860595703125e-05, + "model_forward_time": 0.025241374969482422, + "step": 14802 + }, + { + "epoch": 2.25860595703125e-05, + "step": 14802, + "training_step_time": 0.10579252243041992 + }, + { + "epoch": 2.258758544921875e-05, + "model_forward_time": 0.025835514068603516, + "step": 14803 + }, + { + "epoch": 2.258758544921875e-05, + "step": 14803, + "training_step_time": 0.10532093048095703 + }, + { + "epoch": 2.2589111328125e-05, + "model_forward_time": 0.02572798728942871, + "step": 14804 + }, + { + "epoch": 2.2589111328125e-05, + "step": 14804, + "training_step_time": 0.10525321960449219 + }, + { + "epoch": 2.259063720703125e-05, + "model_forward_time": 0.02539825439453125, + "step": 14805 + }, + { + "epoch": 2.259063720703125e-05, + "step": 14805, + "training_step_time": 0.10481762886047363 + }, + { + "epoch": 2.25921630859375e-05, + "model_forward_time": 0.025761842727661133, + "step": 14806 + }, + { + "epoch": 2.25921630859375e-05, + "step": 14806, + "training_step_time": 0.10483098030090332 + }, + { + "epoch": 2.259368896484375e-05, + "model_forward_time": 0.025554418563842773, + "step": 14807 + }, + { + "epoch": 2.259368896484375e-05, + "step": 14807, + "training_step_time": 0.150787353515625 + }, + { + "epoch": 2.259521484375e-05, + "model_forward_time": 0.02520132064819336, + "step": 14808 + }, + { + "epoch": 2.259521484375e-05, + "step": 14808, + "training_step_time": 0.139662504196167 + }, + { + "epoch": 2.259674072265625e-05, + "model_forward_time": 0.02579951286315918, + "step": 14809 + }, + { + "epoch": 2.259674072265625e-05, + "step": 14809, + "training_step_time": 0.11650347709655762 + }, + { + "epoch": 2.25982666015625e-05, + "grad_norm": 0.2553534507751465, + "learning_rate": 5.51716062679511e-05, + "loss": 0.0103, + "step": 14810 + }, + { + "epoch": 2.25982666015625e-05, + "model_forward_time": 0.024725675582885742, + "step": 14810 + }, + { + "epoch": 2.25982666015625e-05, + "step": 14810, + "training_step_time": 0.11183643341064453 + }, + { + "epoch": 2.259979248046875e-05, + "model_forward_time": 0.02544260025024414, + "step": 14811 + }, + { + "epoch": 2.259979248046875e-05, + "step": 14811, + "training_step_time": 0.1734323501586914 + }, + { + "epoch": 2.2601318359375e-05, + "model_forward_time": 0.025006532669067383, + "step": 14812 + }, + { + "epoch": 2.2601318359375e-05, + "step": 14812, + "training_step_time": 0.13037443161010742 + }, + { + "epoch": 2.260284423828125e-05, + "model_forward_time": 0.024850845336914062, + "step": 14813 + }, + { + "epoch": 2.260284423828125e-05, + "step": 14813, + "training_step_time": 0.11062312126159668 + }, + { + "epoch": 2.26043701171875e-05, + "model_forward_time": 0.024623632431030273, + "step": 14814 + }, + { + "epoch": 2.26043701171875e-05, + "step": 14814, + "training_step_time": 0.10833477973937988 + }, + { + "epoch": 2.260589599609375e-05, + "model_forward_time": 0.025421857833862305, + "step": 14815 + }, + { + "epoch": 2.260589599609375e-05, + "step": 14815, + "training_step_time": 0.10810446739196777 + }, + { + "epoch": 2.2607421875e-05, + "model_forward_time": 0.02528548240661621, + "step": 14816 + }, + { + "epoch": 2.2607421875e-05, + "step": 14816, + "training_step_time": 0.10900306701660156 + }, + { + "epoch": 2.260894775390625e-05, + "model_forward_time": 0.02529764175415039, + "step": 14817 + }, + { + "epoch": 2.260894775390625e-05, + "step": 14817, + "training_step_time": 0.1571967601776123 + }, + { + "epoch": 2.26104736328125e-05, + "model_forward_time": 0.024762868881225586, + "step": 14818 + }, + { + "epoch": 2.26104736328125e-05, + "step": 14818, + "training_step_time": 0.1141808032989502 + }, + { + "epoch": 2.261199951171875e-05, + "model_forward_time": 0.02491140365600586, + "step": 14819 + }, + { + "epoch": 2.261199951171875e-05, + "step": 14819, + "training_step_time": 0.11102509498596191 + }, + { + "epoch": 2.2613525390625e-05, + "grad_norm": 0.3513282239437103, + "learning_rate": 5.511678308853026e-05, + "loss": 0.0176, + "step": 14820 + }, + { + "epoch": 2.2613525390625e-05, + "model_forward_time": 0.025198936462402344, + "step": 14820 + }, + { + "epoch": 2.2613525390625e-05, + "step": 14820, + "training_step_time": 0.11933422088623047 + }, + { + "epoch": 2.261505126953125e-05, + "model_forward_time": 0.025443077087402344, + "step": 14821 + }, + { + "epoch": 2.261505126953125e-05, + "step": 14821, + "training_step_time": 0.10541892051696777 + }, + { + "epoch": 2.26165771484375e-05, + "model_forward_time": 0.028615713119506836, + "step": 14822 + }, + { + "epoch": 2.26165771484375e-05, + "step": 14822, + "training_step_time": 0.17497992515563965 + }, + { + "epoch": 2.261810302734375e-05, + "model_forward_time": 0.0250246524810791, + "step": 14823 + }, + { + "epoch": 2.261810302734375e-05, + "step": 14823, + "training_step_time": 0.1685009002685547 + }, + { + "epoch": 2.261962890625e-05, + "model_forward_time": 0.024708986282348633, + "step": 14824 + }, + { + "epoch": 2.261962890625e-05, + "step": 14824, + "training_step_time": 0.10331392288208008 + }, + { + "epoch": 2.262115478515625e-05, + "model_forward_time": 0.02632904052734375, + "step": 14825 + }, + { + "epoch": 2.262115478515625e-05, + "step": 14825, + "training_step_time": 0.18813610076904297 + }, + { + "epoch": 2.26226806640625e-05, + "model_forward_time": 0.024967193603515625, + "step": 14826 + }, + { + "epoch": 2.26226806640625e-05, + "step": 14826, + "training_step_time": 0.15134549140930176 + }, + { + "epoch": 2.262420654296875e-05, + "model_forward_time": 0.02448892593383789, + "step": 14827 + }, + { + "epoch": 2.262420654296875e-05, + "step": 14827, + "training_step_time": 0.10665702819824219 + }, + { + "epoch": 2.2625732421875e-05, + "model_forward_time": 0.025063753128051758, + "step": 14828 + }, + { + "epoch": 2.2625732421875e-05, + "step": 14828, + "training_step_time": 0.1487720012664795 + }, + { + "epoch": 2.262725830078125e-05, + "model_forward_time": 0.025043010711669922, + "step": 14829 + }, + { + "epoch": 2.262725830078125e-05, + "step": 14829, + "training_step_time": 0.20940709114074707 + }, + { + "epoch": 2.26287841796875e-05, + "grad_norm": 0.3493926525115967, + "learning_rate": 5.506195369173548e-05, + "loss": 0.0246, + "step": 14830 + }, + { + "epoch": 2.26287841796875e-05, + "model_forward_time": 0.024427175521850586, + "step": 14830 + }, + { + "epoch": 2.26287841796875e-05, + "step": 14830, + "training_step_time": 0.11499714851379395 + }, + { + "epoch": 2.263031005859375e-05, + "model_forward_time": 0.025658369064331055, + "step": 14831 + }, + { + "epoch": 2.263031005859375e-05, + "step": 14831, + "training_step_time": 0.1037442684173584 + }, + { + "epoch": 2.26318359375e-05, + "model_forward_time": 0.0267179012298584, + "step": 14832 + }, + { + "epoch": 2.26318359375e-05, + "step": 14832, + "training_step_time": 0.10761761665344238 + }, + { + "epoch": 2.263336181640625e-05, + "model_forward_time": 0.02652144432067871, + "step": 14833 + }, + { + "epoch": 2.263336181640625e-05, + "step": 14833, + "training_step_time": 0.1071329116821289 + }, + { + "epoch": 2.26348876953125e-05, + "model_forward_time": 0.0265045166015625, + "step": 14834 + }, + { + "epoch": 2.26348876953125e-05, + "step": 14834, + "training_step_time": 0.10627508163452148 + }, + { + "epoch": 2.263641357421875e-05, + "model_forward_time": 0.026355743408203125, + "step": 14835 + }, + { + "epoch": 2.263641357421875e-05, + "step": 14835, + "training_step_time": 0.10593533515930176 + }, + { + "epoch": 2.2637939453125e-05, + "model_forward_time": 0.027586936950683594, + "step": 14836 + }, + { + "epoch": 2.2637939453125e-05, + "step": 14836, + "training_step_time": 0.10930085182189941 + }, + { + "epoch": 2.263946533203125e-05, + "model_forward_time": 0.02616262435913086, + "step": 14837 + }, + { + "epoch": 2.263946533203125e-05, + "step": 14837, + "training_step_time": 0.10576725006103516 + }, + { + "epoch": 2.26409912109375e-05, + "model_forward_time": 0.025001049041748047, + "step": 14838 + }, + { + "epoch": 2.26409912109375e-05, + "step": 14838, + "training_step_time": 0.10335540771484375 + }, + { + "epoch": 2.264251708984375e-05, + "model_forward_time": 0.02640986442565918, + "step": 14839 + }, + { + "epoch": 2.264251708984375e-05, + "step": 14839, + "training_step_time": 0.10802435874938965 + }, + { + "epoch": 2.264404296875e-05, + "grad_norm": 0.22913873195648193, + "learning_rate": 5.500711814418966e-05, + "loss": 0.032, + "step": 14840 + }, + { + "epoch": 2.264404296875e-05, + "model_forward_time": 0.025347232818603516, + "step": 14840 + }, + { + "epoch": 2.264404296875e-05, + "step": 14840, + "training_step_time": 0.10446739196777344 + }, + { + "epoch": 2.264556884765625e-05, + "model_forward_time": 0.024825096130371094, + "step": 14841 + }, + { + "epoch": 2.264556884765625e-05, + "step": 14841, + "training_step_time": 0.1092381477355957 + }, + { + "epoch": 2.26470947265625e-05, + "model_forward_time": 0.024314403533935547, + "step": 14842 + }, + { + "epoch": 2.26470947265625e-05, + "step": 14842, + "training_step_time": 0.10326933860778809 + }, + { + "epoch": 2.264862060546875e-05, + "model_forward_time": 0.02485179901123047, + "step": 14843 + }, + { + "epoch": 2.264862060546875e-05, + "step": 14843, + "training_step_time": 0.10414695739746094 + }, + { + "epoch": 2.2650146484375e-05, + "model_forward_time": 0.024906635284423828, + "step": 14844 + }, + { + "epoch": 2.2650146484375e-05, + "step": 14844, + "training_step_time": 0.10831308364868164 + }, + { + "epoch": 2.265167236328125e-05, + "model_forward_time": 0.02557849884033203, + "step": 14845 + }, + { + "epoch": 2.265167236328125e-05, + "step": 14845, + "training_step_time": 0.10463929176330566 + }, + { + "epoch": 2.26531982421875e-05, + "model_forward_time": 0.02520895004272461, + "step": 14846 + }, + { + "epoch": 2.26531982421875e-05, + "step": 14846, + "training_step_time": 0.10445737838745117 + }, + { + "epoch": 2.265472412109375e-05, + "model_forward_time": 0.025127887725830078, + "step": 14847 + }, + { + "epoch": 2.265472412109375e-05, + "step": 14847, + "training_step_time": 0.10575389862060547 + }, + { + "epoch": 2.265625e-05, + "model_forward_time": 0.02509784698486328, + "step": 14848 + }, + { + "epoch": 2.265625e-05, + "step": 14848, + "training_step_time": 0.10387110710144043 + }, + { + "epoch": 2.265777587890625e-05, + "model_forward_time": 0.024988412857055664, + "step": 14849 + }, + { + "epoch": 2.265777587890625e-05, + "step": 14849, + "training_step_time": 0.10509228706359863 + }, + { + "epoch": 2.26593017578125e-05, + "grad_norm": 0.2163519263267517, + "learning_rate": 5.495227651252315e-05, + "loss": 0.026, + "step": 14850 + }, + { + "epoch": 2.26593017578125e-05, + "model_forward_time": 0.026961803436279297, + "step": 14850 + }, + { + "epoch": 2.26593017578125e-05, + "step": 14850, + "training_step_time": 0.10823345184326172 + }, + { + "epoch": 2.266082763671875e-05, + "model_forward_time": 0.02663135528564453, + "step": 14851 + }, + { + "epoch": 2.266082763671875e-05, + "step": 14851, + "training_step_time": 0.10627174377441406 + }, + { + "epoch": 2.2662353515625e-05, + "model_forward_time": 0.025142431259155273, + "step": 14852 + }, + { + "epoch": 2.2662353515625e-05, + "step": 14852, + "training_step_time": 0.10399889945983887 + }, + { + "epoch": 2.266387939453125e-05, + "model_forward_time": 0.026178836822509766, + "step": 14853 + }, + { + "epoch": 2.266387939453125e-05, + "step": 14853, + "training_step_time": 0.15939760208129883 + }, + { + "epoch": 2.26654052734375e-05, + "model_forward_time": 0.02554154396057129, + "step": 14854 + }, + { + "epoch": 2.26654052734375e-05, + "step": 14854, + "training_step_time": 0.1461200714111328 + }, + { + "epoch": 2.266693115234375e-05, + "model_forward_time": 0.024308204650878906, + "step": 14855 + }, + { + "epoch": 2.266693115234375e-05, + "step": 14855, + "training_step_time": 0.10918974876403809 + }, + { + "epoch": 2.266845703125e-05, + "model_forward_time": 0.02513861656188965, + "step": 14856 + }, + { + "epoch": 2.266845703125e-05, + "step": 14856, + "training_step_time": 0.11739492416381836 + }, + { + "epoch": 2.266998291015625e-05, + "model_forward_time": 0.0248568058013916, + "step": 14857 + }, + { + "epoch": 2.266998291015625e-05, + "step": 14857, + "training_step_time": 0.18959736824035645 + }, + { + "epoch": 2.26715087890625e-05, + "model_forward_time": 0.024457693099975586, + "step": 14858 + }, + { + "epoch": 2.26715087890625e-05, + "step": 14858, + "training_step_time": 0.20301127433776855 + }, + { + "epoch": 2.267303466796875e-05, + "model_forward_time": 0.024655580520629883, + "step": 14859 + }, + { + "epoch": 2.267303466796875e-05, + "step": 14859, + "training_step_time": 0.2040097713470459 + }, + { + "epoch": 2.2674560546875e-05, + "grad_norm": 0.2992362082004547, + "learning_rate": 5.48974288633737e-05, + "loss": 0.0121, + "step": 14860 + }, + { + "epoch": 2.2674560546875e-05, + "model_forward_time": 0.024125337600708008, + "step": 14860 + }, + { + "epoch": 2.2674560546875e-05, + "step": 14860, + "training_step_time": 0.18449807167053223 + }, + { + "epoch": 2.267608642578125e-05, + "model_forward_time": 0.024123430252075195, + "step": 14861 + }, + { + "epoch": 2.267608642578125e-05, + "step": 14861, + "training_step_time": 0.2059779167175293 + }, + { + "epoch": 2.26776123046875e-05, + "model_forward_time": 0.024475574493408203, + "step": 14862 + }, + { + "epoch": 2.26776123046875e-05, + "step": 14862, + "training_step_time": 0.21816611289978027 + }, + { + "epoch": 2.267913818359375e-05, + "model_forward_time": 0.024101734161376953, + "step": 14863 + }, + { + "epoch": 2.267913818359375e-05, + "step": 14863, + "training_step_time": 0.15960311889648438 + }, + { + "epoch": 2.26806640625e-05, + "model_forward_time": 0.02553558349609375, + "step": 14864 + }, + { + "epoch": 2.26806640625e-05, + "step": 14864, + "training_step_time": 0.13377737998962402 + }, + { + "epoch": 2.268218994140625e-05, + "model_forward_time": 0.024745702743530273, + "step": 14865 + }, + { + "epoch": 2.268218994140625e-05, + "step": 14865, + "training_step_time": 0.1947021484375 + }, + { + "epoch": 2.26837158203125e-05, + "model_forward_time": 0.02460479736328125, + "step": 14866 + }, + { + "epoch": 2.26837158203125e-05, + "step": 14866, + "training_step_time": 0.10844182968139648 + }, + { + "epoch": 2.268524169921875e-05, + "model_forward_time": 0.024878740310668945, + "step": 14867 + }, + { + "epoch": 2.268524169921875e-05, + "step": 14867, + "training_step_time": 0.1087791919708252 + }, + { + "epoch": 2.2686767578125e-05, + "model_forward_time": 0.025892019271850586, + "step": 14868 + }, + { + "epoch": 2.2686767578125e-05, + "step": 14868, + "training_step_time": 0.1576218605041504 + }, + { + "epoch": 2.268829345703125e-05, + "model_forward_time": 0.025026798248291016, + "step": 14869 + }, + { + "epoch": 2.268829345703125e-05, + "step": 14869, + "training_step_time": 0.18625140190124512 + }, + { + "epoch": 2.26898193359375e-05, + "grad_norm": 0.26298847794532776, + "learning_rate": 5.4842575263386386e-05, + "loss": 0.0111, + "step": 14870 + }, + { + "epoch": 2.26898193359375e-05, + "model_forward_time": 0.02471137046813965, + "step": 14870 + }, + { + "epoch": 2.26898193359375e-05, + "step": 14870, + "training_step_time": 0.134918212890625 + }, + { + "epoch": 2.269134521484375e-05, + "model_forward_time": 0.024823427200317383, + "step": 14871 + }, + { + "epoch": 2.269134521484375e-05, + "step": 14871, + "training_step_time": 0.15197420120239258 + }, + { + "epoch": 2.269287109375e-05, + "model_forward_time": 0.02453160285949707, + "step": 14872 + }, + { + "epoch": 2.269287109375e-05, + "step": 14872, + "training_step_time": 0.1039285659790039 + }, + { + "epoch": 2.269439697265625e-05, + "model_forward_time": 0.024767398834228516, + "step": 14873 + }, + { + "epoch": 2.269439697265625e-05, + "step": 14873, + "training_step_time": 0.1219797134399414 + }, + { + "epoch": 2.26959228515625e-05, + "model_forward_time": 0.025044918060302734, + "step": 14874 + }, + { + "epoch": 2.26959228515625e-05, + "step": 14874, + "training_step_time": 0.11231827735900879 + }, + { + "epoch": 2.269744873046875e-05, + "model_forward_time": 0.02501058578491211, + "step": 14875 + }, + { + "epoch": 2.269744873046875e-05, + "step": 14875, + "training_step_time": 0.10689258575439453 + }, + { + "epoch": 2.2698974609375e-05, + "model_forward_time": 0.024886131286621094, + "step": 14876 + }, + { + "epoch": 2.2698974609375e-05, + "step": 14876, + "training_step_time": 0.15097260475158691 + }, + { + "epoch": 2.270050048828125e-05, + "model_forward_time": 0.02499866485595703, + "step": 14877 + }, + { + "epoch": 2.270050048828125e-05, + "step": 14877, + "training_step_time": 0.17195558547973633 + }, + { + "epoch": 2.27020263671875e-05, + "model_forward_time": 0.02435469627380371, + "step": 14878 + }, + { + "epoch": 2.27020263671875e-05, + "step": 14878, + "training_step_time": 0.17908620834350586 + }, + { + "epoch": 2.270355224609375e-05, + "model_forward_time": 0.024644851684570312, + "step": 14879 + }, + { + "epoch": 2.270355224609375e-05, + "step": 14879, + "training_step_time": 0.1671741008758545 + }, + { + "epoch": 2.2705078125e-05, + "grad_norm": 0.3613302707672119, + "learning_rate": 5.478771577921351e-05, + "loss": 0.0165, + "step": 14880 + }, + { + "epoch": 2.2705078125e-05, + "model_forward_time": 0.02400803565979004, + "step": 14880 + }, + { + "epoch": 2.2705078125e-05, + "step": 14880, + "training_step_time": 0.1529064178466797 + }, + { + "epoch": 2.270660400390625e-05, + "model_forward_time": 0.023691177368164062, + "step": 14881 + }, + { + "epoch": 2.270660400390625e-05, + "step": 14881, + "training_step_time": 0.13833260536193848 + }, + { + "epoch": 2.27081298828125e-05, + "model_forward_time": 0.02431488037109375, + "step": 14882 + }, + { + "epoch": 2.27081298828125e-05, + "step": 14882, + "training_step_time": 0.13045287132263184 + }, + { + "epoch": 2.270965576171875e-05, + "model_forward_time": 0.024327993392944336, + "step": 14883 + }, + { + "epoch": 2.270965576171875e-05, + "step": 14883, + "training_step_time": 0.12671160697937012 + }, + { + "epoch": 2.2711181640625e-05, + "model_forward_time": 0.024814128875732422, + "step": 14884 + }, + { + "epoch": 2.2711181640625e-05, + "step": 14884, + "training_step_time": 0.12410783767700195 + }, + { + "epoch": 2.271270751953125e-05, + "model_forward_time": 0.027817964553833008, + "step": 14885 + }, + { + "epoch": 2.271270751953125e-05, + "step": 14885, + "training_step_time": 0.1152656078338623 + }, + { + "epoch": 2.27142333984375e-05, + "model_forward_time": 0.024953126907348633, + "step": 14886 + }, + { + "epoch": 2.27142333984375e-05, + "step": 14886, + "training_step_time": 0.11551499366760254 + }, + { + "epoch": 2.271575927734375e-05, + "model_forward_time": 0.025027036666870117, + "step": 14887 + }, + { + "epoch": 2.271575927734375e-05, + "step": 14887, + "training_step_time": 0.1104745864868164 + }, + { + "epoch": 2.271728515625e-05, + "model_forward_time": 0.024927616119384766, + "step": 14888 + }, + { + "epoch": 2.271728515625e-05, + "step": 14888, + "training_step_time": 0.10978388786315918 + }, + { + "epoch": 2.271881103515625e-05, + "model_forward_time": 0.026190996170043945, + "step": 14889 + }, + { + "epoch": 2.271881103515625e-05, + "step": 14889, + "training_step_time": 0.10906171798706055 + }, + { + "epoch": 2.27203369140625e-05, + "grad_norm": 0.4646231532096863, + "learning_rate": 5.473285047751451e-05, + "loss": 0.0132, + "step": 14890 + }, + { + "epoch": 2.27203369140625e-05, + "model_forward_time": 0.024974584579467773, + "step": 14890 + }, + { + "epoch": 2.27203369140625e-05, + "step": 14890, + "training_step_time": 0.10864377021789551 + }, + { + "epoch": 2.272186279296875e-05, + "model_forward_time": 0.024869441986083984, + "step": 14891 + }, + { + "epoch": 2.272186279296875e-05, + "step": 14891, + "training_step_time": 0.1066129207611084 + }, + { + "epoch": 2.2723388671875e-05, + "model_forward_time": 0.02564406394958496, + "step": 14892 + }, + { + "epoch": 2.2723388671875e-05, + "step": 14892, + "training_step_time": 0.10619688034057617 + }, + { + "epoch": 2.272491455078125e-05, + "model_forward_time": 0.02511763572692871, + "step": 14893 + }, + { + "epoch": 2.272491455078125e-05, + "step": 14893, + "training_step_time": 0.10897254943847656 + }, + { + "epoch": 2.27264404296875e-05, + "model_forward_time": 0.025453805923461914, + "step": 14894 + }, + { + "epoch": 2.27264404296875e-05, + "step": 14894, + "training_step_time": 0.12895774841308594 + }, + { + "epoch": 2.272796630859375e-05, + "model_forward_time": 0.025157451629638672, + "step": 14895 + }, + { + "epoch": 2.272796630859375e-05, + "step": 14895, + "training_step_time": 0.11252832412719727 + }, + { + "epoch": 2.27294921875e-05, + "model_forward_time": 0.02543330192565918, + "step": 14896 + }, + { + "epoch": 2.27294921875e-05, + "step": 14896, + "training_step_time": 0.1196587085723877 + }, + { + "epoch": 2.273101806640625e-05, + "model_forward_time": 0.025203227996826172, + "step": 14897 + }, + { + "epoch": 2.273101806640625e-05, + "step": 14897, + "training_step_time": 0.11507296562194824 + }, + { + "epoch": 2.27325439453125e-05, + "model_forward_time": 0.024980545043945312, + "step": 14898 + }, + { + "epoch": 2.27325439453125e-05, + "step": 14898, + "training_step_time": 0.18722319602966309 + }, + { + "epoch": 2.273406982421875e-05, + "model_forward_time": 0.02448105812072754, + "step": 14899 + }, + { + "epoch": 2.273406982421875e-05, + "step": 14899, + "training_step_time": 0.11398124694824219 + }, + { + "epoch": 2.2735595703125e-05, + "grad_norm": 0.26571959257125854, + "learning_rate": 5.467797942495589e-05, + "loss": 0.0107, + "step": 14900 + }, + { + "epoch": 2.2735595703125e-05, + "model_forward_time": 0.024373531341552734, + "step": 14900 + }, + { + "epoch": 2.2735595703125e-05, + "step": 14900, + "training_step_time": 0.10998344421386719 + }, + { + "epoch": 2.273712158203125e-05, + "model_forward_time": 0.0252382755279541, + "step": 14901 + }, + { + "epoch": 2.273712158203125e-05, + "step": 14901, + "training_step_time": 0.11007142066955566 + }, + { + "epoch": 2.27386474609375e-05, + "model_forward_time": 0.025246858596801758, + "step": 14902 + }, + { + "epoch": 2.27386474609375e-05, + "step": 14902, + "training_step_time": 0.10644125938415527 + }, + { + "epoch": 2.274017333984375e-05, + "model_forward_time": 0.025379180908203125, + "step": 14903 + }, + { + "epoch": 2.274017333984375e-05, + "step": 14903, + "training_step_time": 0.10727334022521973 + }, + { + "epoch": 2.274169921875e-05, + "model_forward_time": 0.025216341018676758, + "step": 14904 + }, + { + "epoch": 2.274169921875e-05, + "step": 14904, + "training_step_time": 0.15033793449401855 + }, + { + "epoch": 2.274322509765625e-05, + "model_forward_time": 0.02520012855529785, + "step": 14905 + }, + { + "epoch": 2.274322509765625e-05, + "step": 14905, + "training_step_time": 0.10689067840576172 + }, + { + "epoch": 2.27447509765625e-05, + "model_forward_time": 0.02510547637939453, + "step": 14906 + }, + { + "epoch": 2.27447509765625e-05, + "step": 14906, + "training_step_time": 0.11208510398864746 + }, + { + "epoch": 2.274627685546875e-05, + "model_forward_time": 0.024983882904052734, + "step": 14907 + }, + { + "epoch": 2.274627685546875e-05, + "step": 14907, + "training_step_time": 0.12486839294433594 + }, + { + "epoch": 2.2747802734375e-05, + "model_forward_time": 0.024580717086791992, + "step": 14908 + }, + { + "epoch": 2.2747802734375e-05, + "step": 14908, + "training_step_time": 0.10437130928039551 + }, + { + "epoch": 2.274932861328125e-05, + "model_forward_time": 0.024425983428955078, + "step": 14909 + }, + { + "epoch": 2.274932861328125e-05, + "step": 14909, + "training_step_time": 0.16484642028808594 + }, + { + "epoch": 2.27508544921875e-05, + "grad_norm": 0.42848464846611023, + "learning_rate": 5.462310268821118e-05, + "loss": 0.0156, + "step": 14910 + }, + { + "epoch": 2.27508544921875e-05, + "model_forward_time": 0.024337291717529297, + "step": 14910 + }, + { + "epoch": 2.27508544921875e-05, + "step": 14910, + "training_step_time": 0.14425039291381836 + }, + { + "epoch": 2.275238037109375e-05, + "model_forward_time": 0.025024890899658203, + "step": 14911 + }, + { + "epoch": 2.275238037109375e-05, + "step": 14911, + "training_step_time": 0.11900901794433594 + }, + { + "epoch": 2.275390625e-05, + "model_forward_time": 0.02473616600036621, + "step": 14912 + }, + { + "epoch": 2.275390625e-05, + "step": 14912, + "training_step_time": 0.12766051292419434 + }, + { + "epoch": 2.275543212890625e-05, + "model_forward_time": 0.0250396728515625, + "step": 14913 + }, + { + "epoch": 2.275543212890625e-05, + "step": 14913, + "training_step_time": 0.23004651069641113 + }, + { + "epoch": 2.27569580078125e-05, + "model_forward_time": 0.024539470672607422, + "step": 14914 + }, + { + "epoch": 2.27569580078125e-05, + "step": 14914, + "training_step_time": 0.11830687522888184 + }, + { + "epoch": 2.275848388671875e-05, + "model_forward_time": 0.02417278289794922, + "step": 14915 + }, + { + "epoch": 2.275848388671875e-05, + "step": 14915, + "training_step_time": 0.13068675994873047 + }, + { + "epoch": 2.2760009765625e-05, + "model_forward_time": 0.024727582931518555, + "step": 14916 + }, + { + "epoch": 2.2760009765625e-05, + "step": 14916, + "training_step_time": 0.16162729263305664 + }, + { + "epoch": 2.276153564453125e-05, + "model_forward_time": 0.024370908737182617, + "step": 14917 + }, + { + "epoch": 2.276153564453125e-05, + "step": 14917, + "training_step_time": 0.10608196258544922 + }, + { + "epoch": 2.27630615234375e-05, + "model_forward_time": 0.024675607681274414, + "step": 14918 + }, + { + "epoch": 2.27630615234375e-05, + "step": 14918, + "training_step_time": 0.12104129791259766 + }, + { + "epoch": 2.276458740234375e-05, + "model_forward_time": 0.02509474754333496, + "step": 14919 + }, + { + "epoch": 2.276458740234375e-05, + "step": 14919, + "training_step_time": 0.11359143257141113 + }, + { + "epoch": 2.276611328125e-05, + "grad_norm": 0.26866063475608826, + "learning_rate": 5.456822033396076e-05, + "loss": 0.0106, + "step": 14920 + }, + { + "epoch": 2.276611328125e-05, + "model_forward_time": 0.025063753128051758, + "step": 14920 + }, + { + "epoch": 2.276611328125e-05, + "step": 14920, + "training_step_time": 0.11717700958251953 + }, + { + "epoch": 2.276763916015625e-05, + "model_forward_time": 0.025421857833862305, + "step": 14921 + }, + { + "epoch": 2.276763916015625e-05, + "step": 14921, + "training_step_time": 0.11031651496887207 + }, + { + "epoch": 2.27691650390625e-05, + "model_forward_time": 0.02510666847229004, + "step": 14922 + }, + { + "epoch": 2.27691650390625e-05, + "step": 14922, + "training_step_time": 0.11158514022827148 + }, + { + "epoch": 2.277069091796875e-05, + "model_forward_time": 0.02490091323852539, + "step": 14923 + }, + { + "epoch": 2.277069091796875e-05, + "step": 14923, + "training_step_time": 0.10818672180175781 + }, + { + "epoch": 2.2772216796875e-05, + "model_forward_time": 0.025289535522460938, + "step": 14924 + }, + { + "epoch": 2.2772216796875e-05, + "step": 14924, + "training_step_time": 0.10561609268188477 + }, + { + "epoch": 2.277374267578125e-05, + "model_forward_time": 0.024762392044067383, + "step": 14925 + }, + { + "epoch": 2.277374267578125e-05, + "step": 14925, + "training_step_time": 0.10829544067382812 + }, + { + "epoch": 2.27752685546875e-05, + "model_forward_time": 0.025540828704833984, + "step": 14926 + }, + { + "epoch": 2.27752685546875e-05, + "step": 14926, + "training_step_time": 0.11206722259521484 + }, + { + "epoch": 2.277679443359375e-05, + "model_forward_time": 0.025088787078857422, + "step": 14927 + }, + { + "epoch": 2.277679443359375e-05, + "step": 14927, + "training_step_time": 0.1061398983001709 + }, + { + "epoch": 2.27783203125e-05, + "model_forward_time": 0.02525162696838379, + "step": 14928 + }, + { + "epoch": 2.27783203125e-05, + "step": 14928, + "training_step_time": 0.10618829727172852 + }, + { + "epoch": 2.277984619140625e-05, + "model_forward_time": 0.027097463607788086, + "step": 14929 + }, + { + "epoch": 2.277984619140625e-05, + "step": 14929, + "training_step_time": 0.10720419883728027 + }, + { + "epoch": 2.27813720703125e-05, + "grad_norm": 0.5203060507774353, + "learning_rate": 5.4513332428891887e-05, + "loss": 0.0129, + "step": 14930 + }, + { + "epoch": 2.27813720703125e-05, + "model_forward_time": 0.02560257911682129, + "step": 14930 + }, + { + "epoch": 2.27813720703125e-05, + "step": 14930, + "training_step_time": 0.10634350776672363 + }, + { + "epoch": 2.278289794921875e-05, + "model_forward_time": 0.026325702667236328, + "step": 14931 + }, + { + "epoch": 2.278289794921875e-05, + "step": 14931, + "training_step_time": 0.10548520088195801 + }, + { + "epoch": 2.2784423828125e-05, + "model_forward_time": 0.024842262268066406, + "step": 14932 + }, + { + "epoch": 2.2784423828125e-05, + "step": 14932, + "training_step_time": 0.10508370399475098 + }, + { + "epoch": 2.278594970703125e-05, + "model_forward_time": 0.023933887481689453, + "step": 14933 + }, + { + "epoch": 2.278594970703125e-05, + "step": 14933, + "training_step_time": 0.10372781753540039 + }, + { + "epoch": 2.27874755859375e-05, + "model_forward_time": 0.025464534759521484, + "step": 14934 + }, + { + "epoch": 2.27874755859375e-05, + "step": 14934, + "training_step_time": 0.10503840446472168 + }, + { + "epoch": 2.278900146484375e-05, + "model_forward_time": 0.024759292602539062, + "step": 14935 + }, + { + "epoch": 2.278900146484375e-05, + "step": 14935, + "training_step_time": 0.11078548431396484 + }, + { + "epoch": 2.279052734375e-05, + "model_forward_time": 0.02487921714782715, + "step": 14936 + }, + { + "epoch": 2.279052734375e-05, + "step": 14936, + "training_step_time": 0.10695219039916992 + }, + { + "epoch": 2.279205322265625e-05, + "model_forward_time": 0.024962425231933594, + "step": 14937 + }, + { + "epoch": 2.279205322265625e-05, + "step": 14937, + "training_step_time": 0.11014485359191895 + }, + { + "epoch": 2.27935791015625e-05, + "model_forward_time": 0.025368928909301758, + "step": 14938 + }, + { + "epoch": 2.27935791015625e-05, + "step": 14938, + "training_step_time": 0.10602331161499023 + }, + { + "epoch": 2.279510498046875e-05, + "model_forward_time": 0.024961233139038086, + "step": 14939 + }, + { + "epoch": 2.279510498046875e-05, + "step": 14939, + "training_step_time": 0.17903470993041992 + }, + { + "epoch": 2.2796630859375e-05, + "grad_norm": 0.21775636076927185, + "learning_rate": 5.445843903969854e-05, + "loss": 0.0228, + "step": 14940 + }, + { + "epoch": 2.2796630859375e-05, + "model_forward_time": 0.02483344078063965, + "step": 14940 + }, + { + "epoch": 2.2796630859375e-05, + "step": 14940, + "training_step_time": 0.13159823417663574 + }, + { + "epoch": 2.279815673828125e-05, + "model_forward_time": 0.02428603172302246, + "step": 14941 + }, + { + "epoch": 2.279815673828125e-05, + "step": 14941, + "training_step_time": 0.11121535301208496 + }, + { + "epoch": 2.27996826171875e-05, + "model_forward_time": 0.026320457458496094, + "step": 14942 + }, + { + "epoch": 2.27996826171875e-05, + "step": 14942, + "training_step_time": 0.12621140480041504 + }, + { + "epoch": 2.280120849609375e-05, + "model_forward_time": 0.025321006774902344, + "step": 14943 + }, + { + "epoch": 2.280120849609375e-05, + "step": 14943, + "training_step_time": 0.10862612724304199 + }, + { + "epoch": 2.2802734375e-05, + "model_forward_time": 0.02555251121520996, + "step": 14944 + }, + { + "epoch": 2.2802734375e-05, + "step": 14944, + "training_step_time": 0.10481429100036621 + }, + { + "epoch": 2.280426025390625e-05, + "model_forward_time": 0.025687456130981445, + "step": 14945 + }, + { + "epoch": 2.280426025390625e-05, + "step": 14945, + "training_step_time": 0.1972184181213379 + }, + { + "epoch": 2.28057861328125e-05, + "model_forward_time": 0.024505138397216797, + "step": 14946 + }, + { + "epoch": 2.28057861328125e-05, + "step": 14946, + "training_step_time": 0.1042940616607666 + }, + { + "epoch": 2.280731201171875e-05, + "model_forward_time": 0.024341583251953125, + "step": 14947 + }, + { + "epoch": 2.280731201171875e-05, + "step": 14947, + "training_step_time": 0.10468935966491699 + }, + { + "epoch": 2.2808837890625e-05, + "model_forward_time": 0.024756669998168945, + "step": 14948 + }, + { + "epoch": 2.2808837890625e-05, + "step": 14948, + "training_step_time": 0.10578727722167969 + }, + { + "epoch": 2.281036376953125e-05, + "model_forward_time": 0.024662494659423828, + "step": 14949 + }, + { + "epoch": 2.281036376953125e-05, + "step": 14949, + "training_step_time": 0.10571455955505371 + }, + { + "epoch": 2.28118896484375e-05, + "grad_norm": 0.2750996947288513, + "learning_rate": 5.440354023308134e-05, + "loss": 0.03, + "step": 14950 + }, + { + "epoch": 2.28118896484375e-05, + "model_forward_time": 0.024669170379638672, + "step": 14950 + }, + { + "epoch": 2.28118896484375e-05, + "step": 14950, + "training_step_time": 0.10721278190612793 + }, + { + "epoch": 2.281341552734375e-05, + "model_forward_time": 0.02471017837524414, + "step": 14951 + }, + { + "epoch": 2.281341552734375e-05, + "step": 14951, + "training_step_time": 0.11367106437683105 + }, + { + "epoch": 2.281494140625e-05, + "model_forward_time": 0.025657176971435547, + "step": 14952 + }, + { + "epoch": 2.281494140625e-05, + "step": 14952, + "training_step_time": 0.10823988914489746 + }, + { + "epoch": 2.281646728515625e-05, + "model_forward_time": 0.02899646759033203, + "step": 14953 + }, + { + "epoch": 2.281646728515625e-05, + "step": 14953, + "training_step_time": 0.10874676704406738 + }, + { + "epoch": 2.28179931640625e-05, + "model_forward_time": 0.025506973266601562, + "step": 14954 + }, + { + "epoch": 2.28179931640625e-05, + "step": 14954, + "training_step_time": 0.1342613697052002 + }, + { + "epoch": 2.281951904296875e-05, + "model_forward_time": 0.024376392364501953, + "step": 14955 + }, + { + "epoch": 2.281951904296875e-05, + "step": 14955, + "training_step_time": 0.10312962532043457 + }, + { + "epoch": 2.2821044921875e-05, + "model_forward_time": 0.023671627044677734, + "step": 14956 + }, + { + "epoch": 2.2821044921875e-05, + "step": 14956, + "training_step_time": 0.1657695770263672 + }, + { + "epoch": 2.282257080078125e-05, + "model_forward_time": 0.024108171463012695, + "step": 14957 + }, + { + "epoch": 2.282257080078125e-05, + "step": 14957, + "training_step_time": 0.14923524856567383 + }, + { + "epoch": 2.28240966796875e-05, + "model_forward_time": 0.023862361907958984, + "step": 14958 + }, + { + "epoch": 2.28240966796875e-05, + "step": 14958, + "training_step_time": 0.10638093948364258 + }, + { + "epoch": 2.282562255859375e-05, + "model_forward_time": 0.024613380432128906, + "step": 14959 + }, + { + "epoch": 2.282562255859375e-05, + "step": 14959, + "training_step_time": 0.14725017547607422 + }, + { + "epoch": 2.28271484375e-05, + "grad_norm": 0.22984150052070618, + "learning_rate": 5.4348636075747536e-05, + "loss": 0.0185, + "step": 14960 + }, + { + "epoch": 2.28271484375e-05, + "model_forward_time": 0.025384902954101562, + "step": 14960 + }, + { + "epoch": 2.28271484375e-05, + "step": 14960, + "training_step_time": 0.1861410140991211 + }, + { + "epoch": 2.282867431640625e-05, + "model_forward_time": 0.023892641067504883, + "step": 14961 + }, + { + "epoch": 2.282867431640625e-05, + "step": 14961, + "training_step_time": 0.21457839012145996 + }, + { + "epoch": 2.28302001953125e-05, + "model_forward_time": 0.02549147605895996, + "step": 14962 + }, + { + "epoch": 2.28302001953125e-05, + "step": 14962, + "training_step_time": 0.13365602493286133 + }, + { + "epoch": 2.283172607421875e-05, + "model_forward_time": 0.0237424373626709, + "step": 14963 + }, + { + "epoch": 2.283172607421875e-05, + "step": 14963, + "training_step_time": 0.1128232479095459 + }, + { + "epoch": 2.2833251953125e-05, + "model_forward_time": 0.024410486221313477, + "step": 14964 + }, + { + "epoch": 2.2833251953125e-05, + "step": 14964, + "training_step_time": 0.13319945335388184 + }, + { + "epoch": 2.283477783203125e-05, + "model_forward_time": 0.024748563766479492, + "step": 14965 + }, + { + "epoch": 2.283477783203125e-05, + "step": 14965, + "training_step_time": 0.10736966133117676 + }, + { + "epoch": 2.28363037109375e-05, + "model_forward_time": 0.028514862060546875, + "step": 14966 + }, + { + "epoch": 2.28363037109375e-05, + "step": 14966, + "training_step_time": 0.1073768138885498 + }, + { + "epoch": 2.283782958984375e-05, + "model_forward_time": 0.024472951889038086, + "step": 14967 + }, + { + "epoch": 2.283782958984375e-05, + "step": 14967, + "training_step_time": 0.10858154296875 + }, + { + "epoch": 2.283935546875e-05, + "model_forward_time": 0.02510976791381836, + "step": 14968 + }, + { + "epoch": 2.283935546875e-05, + "step": 14968, + "training_step_time": 0.1047811508178711 + }, + { + "epoch": 2.284088134765625e-05, + "model_forward_time": 0.025711774826049805, + "step": 14969 + }, + { + "epoch": 2.284088134765625e-05, + "step": 14969, + "training_step_time": 0.10716390609741211 + }, + { + "epoch": 2.28424072265625e-05, + "grad_norm": 0.33416223526000977, + "learning_rate": 5.4293726634410855e-05, + "loss": 0.0151, + "step": 14970 + }, + { + "epoch": 2.28424072265625e-05, + "model_forward_time": 0.025225162506103516, + "step": 14970 + }, + { + "epoch": 2.28424072265625e-05, + "step": 14970, + "training_step_time": 0.10739588737487793 + }, + { + "epoch": 2.284393310546875e-05, + "model_forward_time": 0.024729013442993164, + "step": 14971 + }, + { + "epoch": 2.284393310546875e-05, + "step": 14971, + "training_step_time": 0.12632513046264648 + }, + { + "epoch": 2.2845458984375e-05, + "model_forward_time": 0.02450394630432129, + "step": 14972 + }, + { + "epoch": 2.2845458984375e-05, + "step": 14972, + "training_step_time": 0.10403299331665039 + }, + { + "epoch": 2.284698486328125e-05, + "model_forward_time": 0.02456045150756836, + "step": 14973 + }, + { + "epoch": 2.284698486328125e-05, + "step": 14973, + "training_step_time": 0.10335516929626465 + }, + { + "epoch": 2.28485107421875e-05, + "model_forward_time": 0.024209022521972656, + "step": 14974 + }, + { + "epoch": 2.28485107421875e-05, + "step": 14974, + "training_step_time": 0.10730338096618652 + }, + { + "epoch": 2.285003662109375e-05, + "model_forward_time": 0.02454400062561035, + "step": 14975 + }, + { + "epoch": 2.285003662109375e-05, + "step": 14975, + "training_step_time": 0.10568857192993164 + }, + { + "epoch": 2.28515625e-05, + "model_forward_time": 0.024779558181762695, + "step": 14976 + }, + { + "epoch": 2.28515625e-05, + "step": 14976, + "training_step_time": 0.10506343841552734 + }, + { + "epoch": 2.285308837890625e-05, + "model_forward_time": 0.02538752555847168, + "step": 14977 + }, + { + "epoch": 2.285308837890625e-05, + "step": 14977, + "training_step_time": 0.10524964332580566 + }, + { + "epoch": 2.28546142578125e-05, + "model_forward_time": 0.024718046188354492, + "step": 14978 + }, + { + "epoch": 2.28546142578125e-05, + "step": 14978, + "training_step_time": 0.10432314872741699 + }, + { + "epoch": 2.285614013671875e-05, + "model_forward_time": 0.02632594108581543, + "step": 14979 + }, + { + "epoch": 2.285614013671875e-05, + "step": 14979, + "training_step_time": 0.10746026039123535 + }, + { + "epoch": 2.2857666015625e-05, + "grad_norm": 0.29110127687454224, + "learning_rate": 5.423881197579144e-05, + "loss": 0.016, + "step": 14980 + }, + { + "epoch": 2.2857666015625e-05, + "model_forward_time": 0.024974346160888672, + "step": 14980 + }, + { + "epoch": 2.2857666015625e-05, + "step": 14980, + "training_step_time": 0.10523700714111328 + }, + { + "epoch": 2.285919189453125e-05, + "model_forward_time": 0.024628400802612305, + "step": 14981 + }, + { + "epoch": 2.285919189453125e-05, + "step": 14981, + "training_step_time": 0.10376644134521484 + }, + { + "epoch": 2.28607177734375e-05, + "model_forward_time": 0.0260012149810791, + "step": 14982 + }, + { + "epoch": 2.28607177734375e-05, + "step": 14982, + "training_step_time": 0.10646677017211914 + }, + { + "epoch": 2.286224365234375e-05, + "model_forward_time": 0.02599644660949707, + "step": 14983 + }, + { + "epoch": 2.286224365234375e-05, + "step": 14983, + "training_step_time": 0.10651421546936035 + }, + { + "epoch": 2.286376953125e-05, + "model_forward_time": 0.024258136749267578, + "step": 14984 + }, + { + "epoch": 2.286376953125e-05, + "step": 14984, + "training_step_time": 0.1071784496307373 + }, + { + "epoch": 2.286529541015625e-05, + "model_forward_time": 0.02463388442993164, + "step": 14985 + }, + { + "epoch": 2.286529541015625e-05, + "step": 14985, + "training_step_time": 0.10445713996887207 + }, + { + "epoch": 2.28668212890625e-05, + "model_forward_time": 0.02465200424194336, + "step": 14986 + }, + { + "epoch": 2.28668212890625e-05, + "step": 14986, + "training_step_time": 0.13952064514160156 + }, + { + "epoch": 2.286834716796875e-05, + "model_forward_time": 0.024690628051757812, + "step": 14987 + }, + { + "epoch": 2.286834716796875e-05, + "step": 14987, + "training_step_time": 0.130631685256958 + }, + { + "epoch": 2.2869873046875e-05, + "model_forward_time": 0.023593664169311523, + "step": 14988 + }, + { + "epoch": 2.2869873046875e-05, + "step": 14988, + "training_step_time": 0.1164402961730957 + }, + { + "epoch": 2.287139892578125e-05, + "model_forward_time": 0.025287866592407227, + "step": 14989 + }, + { + "epoch": 2.287139892578125e-05, + "step": 14989, + "training_step_time": 0.12649106979370117 + }, + { + "epoch": 2.28729248046875e-05, + "grad_norm": 0.222764253616333, + "learning_rate": 5.418389216661579e-05, + "loss": 0.0149, + "step": 14990 + }, + { + "epoch": 2.28729248046875e-05, + "model_forward_time": 0.027182579040527344, + "step": 14990 + }, + { + "epoch": 2.28729248046875e-05, + "step": 14990, + "training_step_time": 0.16152191162109375 + }, + { + "epoch": 2.287445068359375e-05, + "model_forward_time": 0.028750896453857422, + "step": 14991 + }, + { + "epoch": 2.287445068359375e-05, + "step": 14991, + "training_step_time": 0.24138259887695312 + }, + { + "epoch": 2.28759765625e-05, + "model_forward_time": 0.02733898162841797, + "step": 14992 + }, + { + "epoch": 2.28759765625e-05, + "step": 14992, + "training_step_time": 0.21708059310913086 + }, + { + "epoch": 2.287750244140625e-05, + "model_forward_time": 0.027028322219848633, + "step": 14993 + }, + { + "epoch": 2.287750244140625e-05, + "step": 14993, + "training_step_time": 0.3045024871826172 + }, + { + "epoch": 2.28790283203125e-05, + "model_forward_time": 0.029262304306030273, + "step": 14994 + }, + { + "epoch": 2.28790283203125e-05, + "step": 14994, + "training_step_time": 0.36133384704589844 + }, + { + "epoch": 2.288055419921875e-05, + "model_forward_time": 0.030585765838623047, + "step": 14995 + }, + { + "epoch": 2.288055419921875e-05, + "step": 14995, + "training_step_time": 0.30129098892211914 + }, + { + "epoch": 2.2882080078125e-05, + "model_forward_time": 0.03171396255493164, + "step": 14996 + }, + { + "epoch": 2.2882080078125e-05, + "step": 14996, + "training_step_time": 0.3172571659088135 + }, + { + "epoch": 2.288360595703125e-05, + "model_forward_time": 0.02950263023376465, + "step": 14997 + }, + { + "epoch": 2.288360595703125e-05, + "step": 14997, + "training_step_time": 0.33362317085266113 + }, + { + "epoch": 2.28851318359375e-05, + "model_forward_time": 0.028829574584960938, + "step": 14998 + }, + { + "epoch": 2.28851318359375e-05, + "step": 14998, + "training_step_time": 0.4050710201263428 + }, + { + "epoch": 2.288665771484375e-05, + "model_forward_time": 0.03067159652709961, + "step": 14999 + }, + { + "epoch": 2.288665771484375e-05, + "step": 14999, + "training_step_time": 0.36992526054382324 + }, + { + "epoch": 2.288818359375e-05, + "grad_norm": 0.26424986124038696, + "learning_rate": 5.4128967273616625e-05, + "loss": 0.019, + "step": 15000 + }, + { + "epoch": 2.288818359375e-05, + "model_forward_time": 0.029282093048095703, + "step": 15000 + }, + { + "epoch": 2.288818359375e-05, + "step": 15000, + "training_step_time": 0.11265778541564941 + }, + { + "epoch": 2.288970947265625e-05, + "model_forward_time": 0.023313045501708984, + "step": 15001 + }, + { + "epoch": 2.288970947265625e-05, + "step": 15001, + "training_step_time": 0.1719212532043457 + }, + { + "epoch": 2.28912353515625e-05, + "model_forward_time": 0.02394247055053711, + "step": 15002 + }, + { + "epoch": 2.28912353515625e-05, + "step": 15002, + "training_step_time": 0.19634008407592773 + }, + { + "epoch": 2.289276123046875e-05, + "model_forward_time": 0.029191017150878906, + "step": 15003 + }, + { + "epoch": 2.289276123046875e-05, + "step": 15003, + "training_step_time": 0.15695881843566895 + }, + { + "epoch": 2.2894287109375e-05, + "model_forward_time": 0.02435135841369629, + "step": 15004 + }, + { + "epoch": 2.2894287109375e-05, + "step": 15004, + "training_step_time": 0.14747929573059082 + }, + { + "epoch": 2.289581298828125e-05, + "model_forward_time": 0.02428150177001953, + "step": 15005 + }, + { + "epoch": 2.289581298828125e-05, + "step": 15005, + "training_step_time": 0.1345212459564209 + }, + { + "epoch": 2.28973388671875e-05, + "model_forward_time": 0.026450157165527344, + "step": 15006 + }, + { + "epoch": 2.28973388671875e-05, + "step": 15006, + "training_step_time": 0.11518311500549316 + }, + { + "epoch": 2.289886474609375e-05, + "model_forward_time": 0.025815725326538086, + "step": 15007 + }, + { + "epoch": 2.289886474609375e-05, + "step": 15007, + "training_step_time": 0.10572099685668945 + }, + { + "epoch": 2.2900390625e-05, + "model_forward_time": 0.026295900344848633, + "step": 15008 + }, + { + "epoch": 2.2900390625e-05, + "step": 15008, + "training_step_time": 0.10952973365783691 + }, + { + "epoch": 2.290191650390625e-05, + "model_forward_time": 0.025368213653564453, + "step": 15009 + }, + { + "epoch": 2.290191650390625e-05, + "step": 15009, + "training_step_time": 0.10861754417419434 + }, + { + "epoch": 2.29034423828125e-05, + "grad_norm": 0.18423643708229065, + "learning_rate": 5.407403736353288e-05, + "loss": 0.0112, + "step": 15010 + }, + { + "epoch": 2.29034423828125e-05, + "model_forward_time": 0.025160551071166992, + "step": 15010 + }, + { + "epoch": 2.29034423828125e-05, + "step": 15010, + "training_step_time": 0.10837984085083008 + }, + { + "epoch": 2.290496826171875e-05, + "model_forward_time": 0.02501702308654785, + "step": 15011 + }, + { + "epoch": 2.290496826171875e-05, + "step": 15011, + "training_step_time": 0.10631418228149414 + }, + { + "epoch": 2.2906494140625e-05, + "model_forward_time": 0.025348424911499023, + "step": 15012 + }, + { + "epoch": 2.2906494140625e-05, + "step": 15012, + "training_step_time": 0.10901522636413574 + }, + { + "epoch": 2.290802001953125e-05, + "model_forward_time": 0.025160789489746094, + "step": 15013 + }, + { + "epoch": 2.290802001953125e-05, + "step": 15013, + "training_step_time": 0.10619091987609863 + }, + { + "epoch": 2.29095458984375e-05, + "model_forward_time": 0.025130510330200195, + "step": 15014 + }, + { + "epoch": 2.29095458984375e-05, + "step": 15014, + "training_step_time": 0.1072239875793457 + }, + { + "epoch": 2.291107177734375e-05, + "model_forward_time": 0.025676965713500977, + "step": 15015 + }, + { + "epoch": 2.291107177734375e-05, + "step": 15015, + "training_step_time": 0.11076855659484863 + }, + { + "epoch": 2.291259765625e-05, + "model_forward_time": 0.025240659713745117, + "step": 15016 + }, + { + "epoch": 2.291259765625e-05, + "step": 15016, + "training_step_time": 0.10893630981445312 + }, + { + "epoch": 2.291412353515625e-05, + "model_forward_time": 0.02554011344909668, + "step": 15017 + }, + { + "epoch": 2.291412353515625e-05, + "step": 15017, + "training_step_time": 0.11052870750427246 + }, + { + "epoch": 2.29156494140625e-05, + "model_forward_time": 0.0255129337310791, + "step": 15018 + }, + { + "epoch": 2.29156494140625e-05, + "step": 15018, + "training_step_time": 0.10647106170654297 + }, + { + "epoch": 2.291717529296875e-05, + "model_forward_time": 0.02864241600036621, + "step": 15019 + }, + { + "epoch": 2.291717529296875e-05, + "step": 15019, + "training_step_time": 0.11237406730651855 + }, + { + "epoch": 2.2918701171875e-05, + "grad_norm": 0.3490600287914276, + "learning_rate": 5.401910250310961e-05, + "loss": 0.0122, + "step": 15020 + }, + { + "epoch": 2.2918701171875e-05, + "model_forward_time": 0.025377988815307617, + "step": 15020 + }, + { + "epoch": 2.2918701171875e-05, + "step": 15020, + "training_step_time": 0.10810637474060059 + }, + { + "epoch": 2.292022705078125e-05, + "model_forward_time": 0.02509331703186035, + "step": 15021 + }, + { + "epoch": 2.292022705078125e-05, + "step": 15021, + "training_step_time": 0.1086878776550293 + }, + { + "epoch": 2.29217529296875e-05, + "model_forward_time": 0.02539348602294922, + "step": 15022 + }, + { + "epoch": 2.29217529296875e-05, + "step": 15022, + "training_step_time": 0.10701227188110352 + }, + { + "epoch": 2.292327880859375e-05, + "model_forward_time": 0.02545928955078125, + "step": 15023 + }, + { + "epoch": 2.292327880859375e-05, + "step": 15023, + "training_step_time": 0.1645052433013916 + }, + { + "epoch": 2.29248046875e-05, + "model_forward_time": 0.024446725845336914, + "step": 15024 + }, + { + "epoch": 2.29248046875e-05, + "step": 15024, + "training_step_time": 0.14653420448303223 + }, + { + "epoch": 2.292633056640625e-05, + "model_forward_time": 0.024332761764526367, + "step": 15025 + }, + { + "epoch": 2.292633056640625e-05, + "step": 15025, + "training_step_time": 0.11445784568786621 + }, + { + "epoch": 2.29278564453125e-05, + "model_forward_time": 0.025975942611694336, + "step": 15026 + }, + { + "epoch": 2.29278564453125e-05, + "step": 15026, + "training_step_time": 0.11252903938293457 + }, + { + "epoch": 2.292938232421875e-05, + "model_forward_time": 0.02544713020324707, + "step": 15027 + }, + { + "epoch": 2.292938232421875e-05, + "step": 15027, + "training_step_time": 0.12750577926635742 + }, + { + "epoch": 2.2930908203125e-05, + "model_forward_time": 0.025244712829589844, + "step": 15028 + }, + { + "epoch": 2.2930908203125e-05, + "step": 15028, + "training_step_time": 0.10831785202026367 + }, + { + "epoch": 2.293243408203125e-05, + "model_forward_time": 0.025341033935546875, + "step": 15029 + }, + { + "epoch": 2.293243408203125e-05, + "step": 15029, + "training_step_time": 0.19951891899108887 + }, + { + "epoch": 2.29339599609375e-05, + "grad_norm": 0.4564681947231293, + "learning_rate": 5.396416275909779e-05, + "loss": 0.0127, + "step": 15030 + }, + { + "epoch": 2.29339599609375e-05, + "model_forward_time": 0.02498793601989746, + "step": 15030 + }, + { + "epoch": 2.29339599609375e-05, + "step": 15030, + "training_step_time": 0.10516905784606934 + }, + { + "epoch": 2.293548583984375e-05, + "model_forward_time": 0.024854421615600586, + "step": 15031 + }, + { + "epoch": 2.293548583984375e-05, + "step": 15031, + "training_step_time": 0.11056303977966309 + }, + { + "epoch": 2.293701171875e-05, + "model_forward_time": 0.0254819393157959, + "step": 15032 + }, + { + "epoch": 2.293701171875e-05, + "step": 15032, + "training_step_time": 0.11067533493041992 + }, + { + "epoch": 2.293853759765625e-05, + "model_forward_time": 0.026250123977661133, + "step": 15033 + }, + { + "epoch": 2.293853759765625e-05, + "step": 15033, + "training_step_time": 0.12028074264526367 + }, + { + "epoch": 2.29400634765625e-05, + "model_forward_time": 0.028162479400634766, + "step": 15034 + }, + { + "epoch": 2.29400634765625e-05, + "step": 15034, + "training_step_time": 0.11025357246398926 + }, + { + "epoch": 2.294158935546875e-05, + "model_forward_time": 0.025998353958129883, + "step": 15035 + }, + { + "epoch": 2.294158935546875e-05, + "step": 15035, + "training_step_time": 0.11320853233337402 + }, + { + "epoch": 2.2943115234375e-05, + "model_forward_time": 0.02683877944946289, + "step": 15036 + }, + { + "epoch": 2.2943115234375e-05, + "step": 15036, + "training_step_time": 0.11117982864379883 + }, + { + "epoch": 2.294464111328125e-05, + "model_forward_time": 0.025893449783325195, + "step": 15037 + }, + { + "epoch": 2.294464111328125e-05, + "step": 15037, + "training_step_time": 0.11093521118164062 + }, + { + "epoch": 2.29461669921875e-05, + "model_forward_time": 0.025307655334472656, + "step": 15038 + }, + { + "epoch": 2.29461669921875e-05, + "step": 15038, + "training_step_time": 0.1437699794769287 + }, + { + "epoch": 2.294769287109375e-05, + "model_forward_time": 0.024954795837402344, + "step": 15039 + }, + { + "epoch": 2.294769287109375e-05, + "step": 15039, + "training_step_time": 0.11598753929138184 + }, + { + "epoch": 2.294921875e-05, + "grad_norm": 0.25832730531692505, + "learning_rate": 5.390921819825445e-05, + "loss": 0.0242, + "step": 15040 + }, + { + "epoch": 2.294921875e-05, + "model_forward_time": 0.02530980110168457, + "step": 15040 + }, + { + "epoch": 2.294921875e-05, + "step": 15040, + "training_step_time": 0.21268057823181152 + }, + { + "epoch": 2.295074462890625e-05, + "model_forward_time": 0.026793956756591797, + "step": 15041 + }, + { + "epoch": 2.295074462890625e-05, + "step": 15041, + "training_step_time": 0.1333768367767334 + }, + { + "epoch": 2.29522705078125e-05, + "model_forward_time": 0.02501082420349121, + "step": 15042 + }, + { + "epoch": 2.29522705078125e-05, + "step": 15042, + "training_step_time": 0.1112372875213623 + }, + { + "epoch": 2.295379638671875e-05, + "model_forward_time": 0.025006532669067383, + "step": 15043 + }, + { + "epoch": 2.295379638671875e-05, + "step": 15043, + "training_step_time": 0.21079635620117188 + }, + { + "epoch": 2.2955322265625e-05, + "model_forward_time": 0.024823904037475586, + "step": 15044 + }, + { + "epoch": 2.2955322265625e-05, + "step": 15044, + "training_step_time": 0.10738348960876465 + }, + { + "epoch": 2.295684814453125e-05, + "model_forward_time": 0.025113821029663086, + "step": 15045 + }, + { + "epoch": 2.295684814453125e-05, + "step": 15045, + "training_step_time": 0.12389230728149414 + }, + { + "epoch": 2.29583740234375e-05, + "model_forward_time": 0.026228904724121094, + "step": 15046 + }, + { + "epoch": 2.29583740234375e-05, + "step": 15046, + "training_step_time": 0.21314024925231934 + }, + { + "epoch": 2.295989990234375e-05, + "model_forward_time": 0.02530384063720703, + "step": 15047 + }, + { + "epoch": 2.295989990234375e-05, + "step": 15047, + "training_step_time": 0.14976143836975098 + }, + { + "epoch": 2.296142578125e-05, + "model_forward_time": 0.024532079696655273, + "step": 15048 + }, + { + "epoch": 2.296142578125e-05, + "step": 15048, + "training_step_time": 0.1369328498840332 + }, + { + "epoch": 2.296295166015625e-05, + "model_forward_time": 0.024765729904174805, + "step": 15049 + }, + { + "epoch": 2.296295166015625e-05, + "step": 15049, + "training_step_time": 0.12376213073730469 + }, + { + "epoch": 2.29644775390625e-05, + "grad_norm": 0.45770174264907837, + "learning_rate": 5.3854268887342374e-05, + "loss": 0.0272, + "step": 15050 + }, + { + "epoch": 2.29644775390625e-05, + "model_forward_time": 0.025024890899658203, + "step": 15050 + }, + { + "epoch": 2.29644775390625e-05, + "step": 15050, + "training_step_time": 0.11605238914489746 + }, + { + "epoch": 2.296600341796875e-05, + "model_forward_time": 0.024723052978515625, + "step": 15051 + }, + { + "epoch": 2.296600341796875e-05, + "step": 15051, + "training_step_time": 0.11722564697265625 + }, + { + "epoch": 2.2967529296875e-05, + "model_forward_time": 0.02539801597595215, + "step": 15052 + }, + { + "epoch": 2.2967529296875e-05, + "step": 15052, + "training_step_time": 0.10764002799987793 + }, + { + "epoch": 2.296905517578125e-05, + "model_forward_time": 0.025334596633911133, + "step": 15053 + }, + { + "epoch": 2.296905517578125e-05, + "step": 15053, + "training_step_time": 0.1038048267364502 + }, + { + "epoch": 2.29705810546875e-05, + "model_forward_time": 0.025327205657958984, + "step": 15054 + }, + { + "epoch": 2.29705810546875e-05, + "step": 15054, + "training_step_time": 0.10838985443115234 + }, + { + "epoch": 2.297210693359375e-05, + "model_forward_time": 0.0253598690032959, + "step": 15055 + }, + { + "epoch": 2.297210693359375e-05, + "step": 15055, + "training_step_time": 0.10774540901184082 + }, + { + "epoch": 2.29736328125e-05, + "model_forward_time": 0.02521228790283203, + "step": 15056 + }, + { + "epoch": 2.29736328125e-05, + "step": 15056, + "training_step_time": 0.11204814910888672 + }, + { + "epoch": 2.297515869140625e-05, + "model_forward_time": 0.025344133377075195, + "step": 15057 + }, + { + "epoch": 2.297515869140625e-05, + "step": 15057, + "training_step_time": 0.1063082218170166 + }, + { + "epoch": 2.29766845703125e-05, + "model_forward_time": 0.025377750396728516, + "step": 15058 + }, + { + "epoch": 2.29766845703125e-05, + "step": 15058, + "training_step_time": 0.10656857490539551 + }, + { + "epoch": 2.297821044921875e-05, + "model_forward_time": 0.02560138702392578, + "step": 15059 + }, + { + "epoch": 2.297821044921875e-05, + "step": 15059, + "training_step_time": 0.1133720874786377 + }, + { + "epoch": 2.2979736328125e-05, + "grad_norm": 0.3180738687515259, + "learning_rate": 5.379931489313016e-05, + "loss": 0.0244, + "step": 15060 + }, + { + "epoch": 2.2979736328125e-05, + "model_forward_time": 0.025599002838134766, + "step": 15060 + }, + { + "epoch": 2.2979736328125e-05, + "step": 15060, + "training_step_time": 0.10819125175476074 + }, + { + "epoch": 2.298126220703125e-05, + "model_forward_time": 0.027921438217163086, + "step": 15061 + }, + { + "epoch": 2.298126220703125e-05, + "step": 15061, + "training_step_time": 0.11433076858520508 + }, + { + "epoch": 2.29827880859375e-05, + "model_forward_time": 0.024801254272460938, + "step": 15062 + }, + { + "epoch": 2.29827880859375e-05, + "step": 15062, + "training_step_time": 0.10566186904907227 + }, + { + "epoch": 2.298431396484375e-05, + "model_forward_time": 0.025398969650268555, + "step": 15063 + }, + { + "epoch": 2.298431396484375e-05, + "step": 15063, + "training_step_time": 0.11199212074279785 + }, + { + "epoch": 2.298583984375e-05, + "model_forward_time": 0.025867223739624023, + "step": 15064 + }, + { + "epoch": 2.298583984375e-05, + "step": 15064, + "training_step_time": 0.11232805252075195 + }, + { + "epoch": 2.298736572265625e-05, + "model_forward_time": 0.024958133697509766, + "step": 15065 + }, + { + "epoch": 2.298736572265625e-05, + "step": 15065, + "training_step_time": 0.1078646183013916 + }, + { + "epoch": 2.29888916015625e-05, + "model_forward_time": 0.025483131408691406, + "step": 15066 + }, + { + "epoch": 2.29888916015625e-05, + "step": 15066, + "training_step_time": 0.10941791534423828 + }, + { + "epoch": 2.299041748046875e-05, + "model_forward_time": 0.02559971809387207, + "step": 15067 + }, + { + "epoch": 2.299041748046875e-05, + "step": 15067, + "training_step_time": 0.10875630378723145 + }, + { + "epoch": 2.2991943359375e-05, + "model_forward_time": 0.025031089782714844, + "step": 15068 + }, + { + "epoch": 2.2991943359375e-05, + "step": 15068, + "training_step_time": 0.1502697467803955 + }, + { + "epoch": 2.299346923828125e-05, + "model_forward_time": 0.02503347396850586, + "step": 15069 + }, + { + "epoch": 2.299346923828125e-05, + "step": 15069, + "training_step_time": 0.12860774993896484 + }, + { + "epoch": 2.29949951171875e-05, + "grad_norm": 0.4590432941913605, + "learning_rate": 5.3744356282392104e-05, + "loss": 0.0161, + "step": 15070 + }, + { + "epoch": 2.29949951171875e-05, + "model_forward_time": 0.02456808090209961, + "step": 15070 + }, + { + "epoch": 2.29949951171875e-05, + "step": 15070, + "training_step_time": 0.11590027809143066 + }, + { + "epoch": 2.299652099609375e-05, + "model_forward_time": 0.02557682991027832, + "step": 15071 + }, + { + "epoch": 2.299652099609375e-05, + "step": 15071, + "training_step_time": 0.1117405891418457 + }, + { + "epoch": 2.2998046875e-05, + "model_forward_time": 0.025438785552978516, + "step": 15072 + }, + { + "epoch": 2.2998046875e-05, + "step": 15072, + "training_step_time": 0.11348748207092285 + }, + { + "epoch": 2.299957275390625e-05, + "model_forward_time": 0.025197744369506836, + "step": 15073 + }, + { + "epoch": 2.299957275390625e-05, + "step": 15073, + "training_step_time": 0.10701990127563477 + }, + { + "epoch": 2.30010986328125e-05, + "model_forward_time": 0.024688720703125, + "step": 15074 + }, + { + "epoch": 2.30010986328125e-05, + "step": 15074, + "training_step_time": 0.19213581085205078 + }, + { + "epoch": 2.300262451171875e-05, + "model_forward_time": 0.023845672607421875, + "step": 15075 + }, + { + "epoch": 2.300262451171875e-05, + "step": 15075, + "training_step_time": 0.1041252613067627 + }, + { + "epoch": 2.3004150390625e-05, + "model_forward_time": 0.026081323623657227, + "step": 15076 + }, + { + "epoch": 2.3004150390625e-05, + "step": 15076, + "training_step_time": 0.10598015785217285 + }, + { + "epoch": 2.300567626953125e-05, + "model_forward_time": 0.025079965591430664, + "step": 15077 + }, + { + "epoch": 2.300567626953125e-05, + "step": 15077, + "training_step_time": 0.10759639739990234 + }, + { + "epoch": 2.30072021484375e-05, + "model_forward_time": 0.025138378143310547, + "step": 15078 + }, + { + "epoch": 2.30072021484375e-05, + "step": 15078, + "training_step_time": 0.1792159080505371 + }, + { + "epoch": 2.300872802734375e-05, + "model_forward_time": 0.024249553680419922, + "step": 15079 + }, + { + "epoch": 2.300872802734375e-05, + "step": 15079, + "training_step_time": 0.1941516399383545 + }, + { + "epoch": 2.301025390625e-05, + "grad_norm": 0.2518446743488312, + "learning_rate": 5.368939312190808e-05, + "loss": 0.0168, + "step": 15080 + }, + { + "epoch": 2.301025390625e-05, + "model_forward_time": 0.023861408233642578, + "step": 15080 + }, + { + "epoch": 2.301025390625e-05, + "step": 15080, + "training_step_time": 0.18638324737548828 + }, + { + "epoch": 2.301177978515625e-05, + "model_forward_time": 0.024325132369995117, + "step": 15081 + }, + { + "epoch": 2.301177978515625e-05, + "step": 15081, + "training_step_time": 0.19497251510620117 + }, + { + "epoch": 2.30133056640625e-05, + "model_forward_time": 0.02428150177001953, + "step": 15082 + }, + { + "epoch": 2.30133056640625e-05, + "step": 15082, + "training_step_time": 0.17471981048583984 + }, + { + "epoch": 2.301483154296875e-05, + "model_forward_time": 0.024077177047729492, + "step": 15083 + }, + { + "epoch": 2.301483154296875e-05, + "step": 15083, + "training_step_time": 0.17238712310791016 + }, + { + "epoch": 2.3016357421875e-05, + "model_forward_time": 0.024082660675048828, + "step": 15084 + }, + { + "epoch": 2.3016357421875e-05, + "step": 15084, + "training_step_time": 0.11663174629211426 + }, + { + "epoch": 2.301788330078125e-05, + "model_forward_time": 0.024986982345581055, + "step": 15085 + }, + { + "epoch": 2.301788330078125e-05, + "step": 15085, + "training_step_time": 0.1058812141418457 + }, + { + "epoch": 2.30194091796875e-05, + "model_forward_time": 0.02472686767578125, + "step": 15086 + }, + { + "epoch": 2.30194091796875e-05, + "step": 15086, + "training_step_time": 0.15372014045715332 + }, + { + "epoch": 2.302093505859375e-05, + "model_forward_time": 0.0248258113861084, + "step": 15087 + }, + { + "epoch": 2.302093505859375e-05, + "step": 15087, + "training_step_time": 0.1794569492340088 + }, + { + "epoch": 2.30224609375e-05, + "model_forward_time": 0.024673938751220703, + "step": 15088 + }, + { + "epoch": 2.30224609375e-05, + "step": 15088, + "training_step_time": 0.11197328567504883 + }, + { + "epoch": 2.302398681640625e-05, + "model_forward_time": 0.024243831634521484, + "step": 15089 + }, + { + "epoch": 2.302398681640625e-05, + "step": 15089, + "training_step_time": 0.1715836524963379 + }, + { + "epoch": 2.30255126953125e-05, + "grad_norm": 0.372438907623291, + "learning_rate": 5.363442547846356e-05, + "loss": 0.0187, + "step": 15090 + }, + { + "epoch": 2.30255126953125e-05, + "model_forward_time": 0.024494647979736328, + "step": 15090 + }, + { + "epoch": 2.30255126953125e-05, + "step": 15090, + "training_step_time": 0.20508861541748047 + }, + { + "epoch": 2.302703857421875e-05, + "model_forward_time": 0.02452683448791504, + "step": 15091 + }, + { + "epoch": 2.302703857421875e-05, + "step": 15091, + "training_step_time": 0.15358757972717285 + }, + { + "epoch": 2.3028564453125e-05, + "model_forward_time": 0.024537324905395508, + "step": 15092 + }, + { + "epoch": 2.3028564453125e-05, + "step": 15092, + "training_step_time": 0.10625314712524414 + }, + { + "epoch": 2.303009033203125e-05, + "model_forward_time": 0.024866342544555664, + "step": 15093 + }, + { + "epoch": 2.303009033203125e-05, + "step": 15093, + "training_step_time": 0.11246037483215332 + }, + { + "epoch": 2.30316162109375e-05, + "model_forward_time": 0.02501225471496582, + "step": 15094 + }, + { + "epoch": 2.30316162109375e-05, + "step": 15094, + "training_step_time": 0.11722922325134277 + }, + { + "epoch": 2.303314208984375e-05, + "model_forward_time": 0.024810075759887695, + "step": 15095 + }, + { + "epoch": 2.303314208984375e-05, + "step": 15095, + "training_step_time": 0.10576105117797852 + }, + { + "epoch": 2.303466796875e-05, + "model_forward_time": 0.025440692901611328, + "step": 15096 + }, + { + "epoch": 2.303466796875e-05, + "step": 15096, + "training_step_time": 0.10622239112854004 + }, + { + "epoch": 2.303619384765625e-05, + "model_forward_time": 0.025469541549682617, + "step": 15097 + }, + { + "epoch": 2.303619384765625e-05, + "step": 15097, + "training_step_time": 0.10528707504272461 + }, + { + "epoch": 2.30377197265625e-05, + "model_forward_time": 0.02519512176513672, + "step": 15098 + }, + { + "epoch": 2.30377197265625e-05, + "step": 15098, + "training_step_time": 0.1302187442779541 + }, + { + "epoch": 2.303924560546875e-05, + "model_forward_time": 0.025171756744384766, + "step": 15099 + }, + { + "epoch": 2.303924560546875e-05, + "step": 15099, + "training_step_time": 0.16975164413452148 + }, + { + "epoch": 2.3040771484375e-05, + "grad_norm": 0.20305180549621582, + "learning_rate": 5.357945341884936e-05, + "loss": 0.0138, + "step": 15100 + }, + { + "epoch": 2.3040771484375e-05, + "model_forward_time": 0.02460002899169922, + "step": 15100 + }, + { + "epoch": 2.3040771484375e-05, + "step": 15100, + "training_step_time": 0.1485445499420166 + }, + { + "epoch": 2.304229736328125e-05, + "model_forward_time": 0.024235963821411133, + "step": 15101 + }, + { + "epoch": 2.304229736328125e-05, + "step": 15101, + "training_step_time": 0.14203429222106934 + }, + { + "epoch": 2.30438232421875e-05, + "model_forward_time": 0.024251461029052734, + "step": 15102 + }, + { + "epoch": 2.30438232421875e-05, + "step": 15102, + "training_step_time": 0.12809133529663086 + }, + { + "epoch": 2.304534912109375e-05, + "model_forward_time": 0.024077177047729492, + "step": 15103 + }, + { + "epoch": 2.304534912109375e-05, + "step": 15103, + "training_step_time": 0.12756061553955078 + }, + { + "epoch": 2.3046875e-05, + "model_forward_time": 0.024845600128173828, + "step": 15104 + }, + { + "epoch": 2.3046875e-05, + "step": 15104, + "training_step_time": 0.12728428840637207 + }, + { + "epoch": 2.304840087890625e-05, + "model_forward_time": 0.024758577346801758, + "step": 15105 + }, + { + "epoch": 2.304840087890625e-05, + "step": 15105, + "training_step_time": 0.11991763114929199 + }, + { + "epoch": 2.30499267578125e-05, + "model_forward_time": 0.025340795516967773, + "step": 15106 + }, + { + "epoch": 2.30499267578125e-05, + "step": 15106, + "training_step_time": 0.11484503746032715 + }, + { + "epoch": 2.305145263671875e-05, + "model_forward_time": 0.025386810302734375, + "step": 15107 + }, + { + "epoch": 2.305145263671875e-05, + "step": 15107, + "training_step_time": 0.11636590957641602 + }, + { + "epoch": 2.3052978515625e-05, + "model_forward_time": 0.025002479553222656, + "step": 15108 + }, + { + "epoch": 2.3052978515625e-05, + "step": 15108, + "training_step_time": 0.10548639297485352 + }, + { + "epoch": 2.305450439453125e-05, + "model_forward_time": 0.02538013458251953, + "step": 15109 + }, + { + "epoch": 2.305450439453125e-05, + "step": 15109, + "training_step_time": 0.10415124893188477 + }, + { + "epoch": 2.30560302734375e-05, + "grad_norm": 0.2843940556049347, + "learning_rate": 5.352447700986173e-05, + "loss": 0.0135, + "step": 15110 + }, + { + "epoch": 2.30560302734375e-05, + "model_forward_time": 0.02521061897277832, + "step": 15110 + }, + { + "epoch": 2.30560302734375e-05, + "step": 15110, + "training_step_time": 0.10312724113464355 + }, + { + "epoch": 2.305755615234375e-05, + "model_forward_time": 0.025058269500732422, + "step": 15111 + }, + { + "epoch": 2.305755615234375e-05, + "step": 15111, + "training_step_time": 0.12854623794555664 + }, + { + "epoch": 2.305908203125e-05, + "model_forward_time": 0.02548694610595703, + "step": 15112 + }, + { + "epoch": 2.305908203125e-05, + "step": 15112, + "training_step_time": 0.10845613479614258 + }, + { + "epoch": 2.306060791015625e-05, + "model_forward_time": 0.02502608299255371, + "step": 15113 + }, + { + "epoch": 2.306060791015625e-05, + "step": 15113, + "training_step_time": 0.12392210960388184 + }, + { + "epoch": 2.30621337890625e-05, + "model_forward_time": 0.025203466415405273, + "step": 15114 + }, + { + "epoch": 2.30621337890625e-05, + "step": 15114, + "training_step_time": 0.11066484451293945 + }, + { + "epoch": 2.306365966796875e-05, + "model_forward_time": 0.02529740333557129, + "step": 15115 + }, + { + "epoch": 2.306365966796875e-05, + "step": 15115, + "training_step_time": 0.10944747924804688 + }, + { + "epoch": 2.3065185546875e-05, + "model_forward_time": 0.025159358978271484, + "step": 15116 + }, + { + "epoch": 2.3065185546875e-05, + "step": 15116, + "training_step_time": 0.19126367568969727 + }, + { + "epoch": 2.306671142578125e-05, + "model_forward_time": 0.02397012710571289, + "step": 15117 + }, + { + "epoch": 2.306671142578125e-05, + "step": 15117, + "training_step_time": 0.10267925262451172 + }, + { + "epoch": 2.30682373046875e-05, + "model_forward_time": 0.024417400360107422, + "step": 15118 + }, + { + "epoch": 2.30682373046875e-05, + "step": 15118, + "training_step_time": 0.1025233268737793 + }, + { + "epoch": 2.306976318359375e-05, + "model_forward_time": 0.02510976791381836, + "step": 15119 + }, + { + "epoch": 2.306976318359375e-05, + "step": 15119, + "training_step_time": 0.11516308784484863 + }, + { + "epoch": 2.30712890625e-05, + "grad_norm": 0.19091233611106873, + "learning_rate": 5.3469496318302204e-05, + "loss": 0.0098, + "step": 15120 + }, + { + "epoch": 2.30712890625e-05, + "model_forward_time": 0.025023698806762695, + "step": 15120 + }, + { + "epoch": 2.30712890625e-05, + "step": 15120, + "training_step_time": 0.11725044250488281 + }, + { + "epoch": 2.307281494140625e-05, + "model_forward_time": 0.025522947311401367, + "step": 15121 + }, + { + "epoch": 2.307281494140625e-05, + "step": 15121, + "training_step_time": 0.12138772010803223 + }, + { + "epoch": 2.30743408203125e-05, + "model_forward_time": 0.024593353271484375, + "step": 15122 + }, + { + "epoch": 2.30743408203125e-05, + "step": 15122, + "training_step_time": 0.11642074584960938 + }, + { + "epoch": 2.307586669921875e-05, + "model_forward_time": 0.025285005569458008, + "step": 15123 + }, + { + "epoch": 2.307586669921875e-05, + "step": 15123, + "training_step_time": 0.11846733093261719 + }, + { + "epoch": 2.3077392578125e-05, + "model_forward_time": 0.02529120445251465, + "step": 15124 + }, + { + "epoch": 2.3077392578125e-05, + "step": 15124, + "training_step_time": 0.1178286075592041 + }, + { + "epoch": 2.307891845703125e-05, + "model_forward_time": 0.025361299514770508, + "step": 15125 + }, + { + "epoch": 2.307891845703125e-05, + "step": 15125, + "training_step_time": 0.1553044319152832 + }, + { + "epoch": 2.30804443359375e-05, + "model_forward_time": 0.024880647659301758, + "step": 15126 + }, + { + "epoch": 2.30804443359375e-05, + "step": 15126, + "training_step_time": 0.11622977256774902 + }, + { + "epoch": 2.308197021484375e-05, + "model_forward_time": 0.02504277229309082, + "step": 15127 + }, + { + "epoch": 2.308197021484375e-05, + "step": 15127, + "training_step_time": 0.11028909683227539 + }, + { + "epoch": 2.308349609375e-05, + "model_forward_time": 0.02505350112915039, + "step": 15128 + }, + { + "epoch": 2.308349609375e-05, + "step": 15128, + "training_step_time": 0.11510610580444336 + }, + { + "epoch": 2.308502197265625e-05, + "model_forward_time": 0.025426149368286133, + "step": 15129 + }, + { + "epoch": 2.308502197265625e-05, + "step": 15129, + "training_step_time": 0.13404321670532227 + }, + { + "epoch": 2.30865478515625e-05, + "grad_norm": 0.15558725595474243, + "learning_rate": 5.341451141097751e-05, + "loss": 0.0113, + "step": 15130 + }, + { + "epoch": 2.30865478515625e-05, + "model_forward_time": 0.026026248931884766, + "step": 15130 + }, + { + "epoch": 2.30865478515625e-05, + "step": 15130, + "training_step_time": 0.23007774353027344 + }, + { + "epoch": 2.308807373046875e-05, + "model_forward_time": 0.024254798889160156, + "step": 15131 + }, + { + "epoch": 2.308807373046875e-05, + "step": 15131, + "training_step_time": 0.11007142066955566 + }, + { + "epoch": 2.3089599609375e-05, + "model_forward_time": 0.02466607093811035, + "step": 15132 + }, + { + "epoch": 2.3089599609375e-05, + "step": 15132, + "training_step_time": 0.20800542831420898 + }, + { + "epoch": 2.309112548828125e-05, + "model_forward_time": 0.024325847625732422, + "step": 15133 + }, + { + "epoch": 2.309112548828125e-05, + "step": 15133, + "training_step_time": 0.1589200496673584 + }, + { + "epoch": 2.30926513671875e-05, + "model_forward_time": 0.024413585662841797, + "step": 15134 + }, + { + "epoch": 2.30926513671875e-05, + "step": 15134, + "training_step_time": 0.20228028297424316 + }, + { + "epoch": 2.309417724609375e-05, + "model_forward_time": 0.024412155151367188, + "step": 15135 + }, + { + "epoch": 2.309417724609375e-05, + "step": 15135, + "training_step_time": 0.14936065673828125 + }, + { + "epoch": 2.3095703125e-05, + "model_forward_time": 0.024557113647460938, + "step": 15136 + }, + { + "epoch": 2.3095703125e-05, + "step": 15136, + "training_step_time": 0.10716509819030762 + }, + { + "epoch": 2.309722900390625e-05, + "model_forward_time": 0.024345874786376953, + "step": 15137 + }, + { + "epoch": 2.309722900390625e-05, + "step": 15137, + "training_step_time": 0.21418094635009766 + }, + { + "epoch": 2.30987548828125e-05, + "model_forward_time": 0.024988174438476562, + "step": 15138 + }, + { + "epoch": 2.30987548828125e-05, + "step": 15138, + "training_step_time": 0.1153714656829834 + }, + { + "epoch": 2.310028076171875e-05, + "model_forward_time": 0.02432107925415039, + "step": 15139 + }, + { + "epoch": 2.310028076171875e-05, + "step": 15139, + "training_step_time": 0.10955119132995605 + }, + { + "epoch": 2.3101806640625e-05, + "grad_norm": 0.18189500272274017, + "learning_rate": 5.335952235469947e-05, + "loss": 0.0063, + "step": 15140 + }, + { + "epoch": 2.3101806640625e-05, + "model_forward_time": 0.025746583938598633, + "step": 15140 + }, + { + "epoch": 2.3101806640625e-05, + "step": 15140, + "training_step_time": 0.10930323600769043 + }, + { + "epoch": 2.310333251953125e-05, + "model_forward_time": 0.024839162826538086, + "step": 15141 + }, + { + "epoch": 2.310333251953125e-05, + "step": 15141, + "training_step_time": 0.10483169555664062 + }, + { + "epoch": 2.31048583984375e-05, + "model_forward_time": 0.024852752685546875, + "step": 15142 + }, + { + "epoch": 2.31048583984375e-05, + "step": 15142, + "training_step_time": 0.10447955131530762 + }, + { + "epoch": 2.310638427734375e-05, + "model_forward_time": 0.025101184844970703, + "step": 15143 + }, + { + "epoch": 2.310638427734375e-05, + "step": 15143, + "training_step_time": 0.10958504676818848 + }, + { + "epoch": 2.310791015625e-05, + "model_forward_time": 0.025983333587646484, + "step": 15144 + }, + { + "epoch": 2.310791015625e-05, + "step": 15144, + "training_step_time": 0.10749173164367676 + }, + { + "epoch": 2.310943603515625e-05, + "model_forward_time": 0.024897098541259766, + "step": 15145 + }, + { + "epoch": 2.310943603515625e-05, + "step": 15145, + "training_step_time": 0.10457134246826172 + }, + { + "epoch": 2.31109619140625e-05, + "model_forward_time": 0.025930166244506836, + "step": 15146 + }, + { + "epoch": 2.31109619140625e-05, + "step": 15146, + "training_step_time": 0.10493993759155273 + }, + { + "epoch": 2.311248779296875e-05, + "model_forward_time": 0.025051116943359375, + "step": 15147 + }, + { + "epoch": 2.311248779296875e-05, + "step": 15147, + "training_step_time": 0.10572504997253418 + }, + { + "epoch": 2.3114013671875e-05, + "model_forward_time": 0.02550220489501953, + "step": 15148 + }, + { + "epoch": 2.3114013671875e-05, + "step": 15148, + "training_step_time": 0.10783505439758301 + }, + { + "epoch": 2.311553955078125e-05, + "model_forward_time": 0.025368690490722656, + "step": 15149 + }, + { + "epoch": 2.311553955078125e-05, + "step": 15149, + "training_step_time": 0.1063392162322998 + }, + { + "epoch": 2.31170654296875e-05, + "grad_norm": 0.3034445345401764, + "learning_rate": 5.330452921628497e-05, + "loss": 0.0179, + "step": 15150 + }, + { + "epoch": 2.31170654296875e-05, + "model_forward_time": 0.02475762367248535, + "step": 15150 + }, + { + "epoch": 2.31170654296875e-05, + "step": 15150, + "training_step_time": 0.10345005989074707 + }, + { + "epoch": 2.311859130859375e-05, + "model_forward_time": 0.025151968002319336, + "step": 15151 + }, + { + "epoch": 2.311859130859375e-05, + "step": 15151, + "training_step_time": 0.10395312309265137 + }, + { + "epoch": 2.31201171875e-05, + "model_forward_time": 0.02509450912475586, + "step": 15152 + }, + { + "epoch": 2.31201171875e-05, + "step": 15152, + "training_step_time": 0.10491251945495605 + }, + { + "epoch": 2.312164306640625e-05, + "model_forward_time": 0.02513575553894043, + "step": 15153 + }, + { + "epoch": 2.312164306640625e-05, + "step": 15153, + "training_step_time": 0.10374045372009277 + }, + { + "epoch": 2.31231689453125e-05, + "model_forward_time": 0.025278091430664062, + "step": 15154 + }, + { + "epoch": 2.31231689453125e-05, + "step": 15154, + "training_step_time": 0.11058187484741211 + }, + { + "epoch": 2.312469482421875e-05, + "model_forward_time": 0.024873018264770508, + "step": 15155 + }, + { + "epoch": 2.312469482421875e-05, + "step": 15155, + "training_step_time": 0.15862584114074707 + }, + { + "epoch": 2.3126220703125e-05, + "model_forward_time": 0.027033567428588867, + "step": 15156 + }, + { + "epoch": 2.3126220703125e-05, + "step": 15156, + "training_step_time": 0.13894367218017578 + }, + { + "epoch": 2.312774658203125e-05, + "model_forward_time": 0.024155378341674805, + "step": 15157 + }, + { + "epoch": 2.312774658203125e-05, + "step": 15157, + "training_step_time": 0.11385226249694824 + }, + { + "epoch": 2.31292724609375e-05, + "model_forward_time": 0.025581836700439453, + "step": 15158 + }, + { + "epoch": 2.31292724609375e-05, + "step": 15158, + "training_step_time": 0.12105393409729004 + }, + { + "epoch": 2.313079833984375e-05, + "model_forward_time": 0.025092124938964844, + "step": 15159 + }, + { + "epoch": 2.313079833984375e-05, + "step": 15159, + "training_step_time": 0.1070401668548584 + }, + { + "epoch": 2.313232421875e-05, + "grad_norm": 0.2653610408306122, + "learning_rate": 5.32495320625559e-05, + "loss": 0.0148, + "step": 15160 + }, + { + "epoch": 2.313232421875e-05, + "model_forward_time": 0.027029991149902344, + "step": 15160 + }, + { + "epoch": 2.313232421875e-05, + "step": 15160, + "training_step_time": 0.1074836254119873 + }, + { + "epoch": 2.313385009765625e-05, + "model_forward_time": 0.02638387680053711, + "step": 15161 + }, + { + "epoch": 2.313385009765625e-05, + "step": 15161, + "training_step_time": 0.20430850982666016 + }, + { + "epoch": 2.31353759765625e-05, + "model_forward_time": 0.024222850799560547, + "step": 15162 + }, + { + "epoch": 2.31353759765625e-05, + "step": 15162, + "training_step_time": 0.11161994934082031 + }, + { + "epoch": 2.313690185546875e-05, + "model_forward_time": 0.02324390411376953, + "step": 15163 + }, + { + "epoch": 2.313690185546875e-05, + "step": 15163, + "training_step_time": 0.1516859531402588 + }, + { + "epoch": 2.3138427734375e-05, + "model_forward_time": 0.024242639541625977, + "step": 15164 + }, + { + "epoch": 2.3138427734375e-05, + "step": 15164, + "training_step_time": 0.16608548164367676 + }, + { + "epoch": 2.313995361328125e-05, + "model_forward_time": 0.023941516876220703, + "step": 15165 + }, + { + "epoch": 2.313995361328125e-05, + "step": 15165, + "training_step_time": 0.15635204315185547 + }, + { + "epoch": 2.31414794921875e-05, + "model_forward_time": 0.024400949478149414, + "step": 15166 + }, + { + "epoch": 2.31414794921875e-05, + "step": 15166, + "training_step_time": 0.15026259422302246 + }, + { + "epoch": 2.314300537109375e-05, + "model_forward_time": 0.024080753326416016, + "step": 15167 + }, + { + "epoch": 2.314300537109375e-05, + "step": 15167, + "training_step_time": 0.14571547508239746 + }, + { + "epoch": 2.314453125e-05, + "model_forward_time": 0.02380084991455078, + "step": 15168 + }, + { + "epoch": 2.314453125e-05, + "step": 15168, + "training_step_time": 0.13129019737243652 + }, + { + "epoch": 2.314605712890625e-05, + "model_forward_time": 0.024225234985351562, + "step": 15169 + }, + { + "epoch": 2.314605712890625e-05, + "step": 15169, + "training_step_time": 0.12425994873046875 + }, + { + "epoch": 2.31475830078125e-05, + "grad_norm": 0.2752803564071655, + "learning_rate": 5.319453096033896e-05, + "loss": 0.0201, + "step": 15170 + }, + { + "epoch": 2.31475830078125e-05, + "model_forward_time": 0.024196863174438477, + "step": 15170 + }, + { + "epoch": 2.31475830078125e-05, + "step": 15170, + "training_step_time": 0.13992738723754883 + }, + { + "epoch": 2.314910888671875e-05, + "model_forward_time": 0.02509903907775879, + "step": 15171 + }, + { + "epoch": 2.314910888671875e-05, + "step": 15171, + "training_step_time": 0.1178598403930664 + }, + { + "epoch": 2.3150634765625e-05, + "model_forward_time": 0.025107622146606445, + "step": 15172 + }, + { + "epoch": 2.3150634765625e-05, + "step": 15172, + "training_step_time": 0.17634153366088867 + }, + { + "epoch": 2.315216064453125e-05, + "model_forward_time": 0.024816274642944336, + "step": 15173 + }, + { + "epoch": 2.315216064453125e-05, + "step": 15173, + "training_step_time": 0.16254186630249023 + }, + { + "epoch": 2.31536865234375e-05, + "model_forward_time": 0.02430248260498047, + "step": 15174 + }, + { + "epoch": 2.31536865234375e-05, + "step": 15174, + "training_step_time": 0.19335460662841797 + }, + { + "epoch": 2.315521240234375e-05, + "model_forward_time": 0.023804187774658203, + "step": 15175 + }, + { + "epoch": 2.315521240234375e-05, + "step": 15175, + "training_step_time": 0.14205002784729004 + }, + { + "epoch": 2.315673828125e-05, + "model_forward_time": 0.025174617767333984, + "step": 15176 + }, + { + "epoch": 2.315673828125e-05, + "step": 15176, + "training_step_time": 0.21821165084838867 + }, + { + "epoch": 2.315826416015625e-05, + "model_forward_time": 0.02447032928466797, + "step": 15177 + }, + { + "epoch": 2.315826416015625e-05, + "step": 15177, + "training_step_time": 0.11786389350891113 + }, + { + "epoch": 2.31597900390625e-05, + "model_forward_time": 0.024997234344482422, + "step": 15178 + }, + { + "epoch": 2.31597900390625e-05, + "step": 15178, + "training_step_time": 0.13414621353149414 + }, + { + "epoch": 2.316131591796875e-05, + "model_forward_time": 0.024641990661621094, + "step": 15179 + }, + { + "epoch": 2.316131591796875e-05, + "step": 15179, + "training_step_time": 0.11742496490478516 + }, + { + "epoch": 2.3162841796875e-05, + "grad_norm": 0.23647968471050262, + "learning_rate": 5.313952597646568e-05, + "loss": 0.0127, + "step": 15180 + }, + { + "epoch": 2.3162841796875e-05, + "model_forward_time": 0.02506232261657715, + "step": 15180 + }, + { + "epoch": 2.3162841796875e-05, + "step": 15180, + "training_step_time": 0.16370511054992676 + }, + { + "epoch": 2.316436767578125e-05, + "model_forward_time": 0.024260282516479492, + "step": 15181 + }, + { + "epoch": 2.316436767578125e-05, + "step": 15181, + "training_step_time": 0.12792658805847168 + }, + { + "epoch": 2.31658935546875e-05, + "model_forward_time": 0.027574539184570312, + "step": 15182 + }, + { + "epoch": 2.31658935546875e-05, + "step": 15182, + "training_step_time": 0.11096715927124023 + }, + { + "epoch": 2.316741943359375e-05, + "model_forward_time": 0.02543163299560547, + "step": 15183 + }, + { + "epoch": 2.316741943359375e-05, + "step": 15183, + "training_step_time": 0.10979390144348145 + }, + { + "epoch": 2.31689453125e-05, + "model_forward_time": 0.02460479736328125, + "step": 15184 + }, + { + "epoch": 2.31689453125e-05, + "step": 15184, + "training_step_time": 0.11016511917114258 + }, + { + "epoch": 2.317047119140625e-05, + "model_forward_time": 0.025109052658081055, + "step": 15185 + }, + { + "epoch": 2.317047119140625e-05, + "step": 15185, + "training_step_time": 0.10927987098693848 + }, + { + "epoch": 2.31719970703125e-05, + "model_forward_time": 0.024995803833007812, + "step": 15186 + }, + { + "epoch": 2.31719970703125e-05, + "step": 15186, + "training_step_time": 0.10856270790100098 + }, + { + "epoch": 2.317352294921875e-05, + "model_forward_time": 0.025207996368408203, + "step": 15187 + }, + { + "epoch": 2.317352294921875e-05, + "step": 15187, + "training_step_time": 0.10843276977539062 + }, + { + "epoch": 2.3175048828125e-05, + "model_forward_time": 0.02490687370300293, + "step": 15188 + }, + { + "epoch": 2.3175048828125e-05, + "step": 15188, + "training_step_time": 0.1112217903137207 + }, + { + "epoch": 2.317657470703125e-05, + "model_forward_time": 0.0247952938079834, + "step": 15189 + }, + { + "epoch": 2.317657470703125e-05, + "step": 15189, + "training_step_time": 0.10732865333557129 + }, + { + "epoch": 2.31781005859375e-05, + "grad_norm": 0.268989622592926, + "learning_rate": 5.308451717777228e-05, + "loss": 0.0125, + "step": 15190 + }, + { + "epoch": 2.31781005859375e-05, + "model_forward_time": 0.02546858787536621, + "step": 15190 + }, + { + "epoch": 2.31781005859375e-05, + "step": 15190, + "training_step_time": 0.1132197380065918 + }, + { + "epoch": 2.317962646484375e-05, + "model_forward_time": 0.02496647834777832, + "step": 15191 + }, + { + "epoch": 2.317962646484375e-05, + "step": 15191, + "training_step_time": 0.10580825805664062 + }, + { + "epoch": 2.318115234375e-05, + "model_forward_time": 0.02521204948425293, + "step": 15192 + }, + { + "epoch": 2.318115234375e-05, + "step": 15192, + "training_step_time": 0.10617494583129883 + }, + { + "epoch": 2.318267822265625e-05, + "model_forward_time": 0.024981021881103516, + "step": 15193 + }, + { + "epoch": 2.318267822265625e-05, + "step": 15193, + "training_step_time": 0.10784149169921875 + }, + { + "epoch": 2.31842041015625e-05, + "model_forward_time": 0.024806737899780273, + "step": 15194 + }, + { + "epoch": 2.31842041015625e-05, + "step": 15194, + "training_step_time": 0.11903071403503418 + }, + { + "epoch": 2.318572998046875e-05, + "model_forward_time": 0.02520608901977539, + "step": 15195 + }, + { + "epoch": 2.318572998046875e-05, + "step": 15195, + "training_step_time": 0.12225031852722168 + }, + { + "epoch": 2.3187255859375e-05, + "model_forward_time": 0.025177955627441406, + "step": 15196 + }, + { + "epoch": 2.3187255859375e-05, + "step": 15196, + "training_step_time": 0.11891865730285645 + }, + { + "epoch": 2.318878173828125e-05, + "model_forward_time": 0.02466726303100586, + "step": 15197 + }, + { + "epoch": 2.318878173828125e-05, + "step": 15197, + "training_step_time": 0.12197637557983398 + }, + { + "epoch": 2.31903076171875e-05, + "model_forward_time": 0.02492070198059082, + "step": 15198 + }, + { + "epoch": 2.31903076171875e-05, + "step": 15198, + "training_step_time": 0.1685185432434082 + }, + { + "epoch": 2.319183349609375e-05, + "model_forward_time": 0.024220705032348633, + "step": 15199 + }, + { + "epoch": 2.319183349609375e-05, + "step": 15199, + "training_step_time": 0.16828250885009766 + }, + { + "epoch": 2.3193359375e-05, + "grad_norm": 0.23509950935840607, + "learning_rate": 5.3029504631099694e-05, + "loss": 0.019, + "step": 15200 + }, + { + "epoch": 2.3193359375e-05, + "model_forward_time": 0.026134252548217773, + "step": 15200 + }, + { + "epoch": 2.3193359375e-05, + "step": 15200, + "training_step_time": 0.11395716667175293 + }, + { + "epoch": 2.319488525390625e-05, + "model_forward_time": 0.024451017379760742, + "step": 15201 + }, + { + "epoch": 2.319488525390625e-05, + "step": 15201, + "training_step_time": 0.11915349960327148 + }, + { + "epoch": 2.31964111328125e-05, + "model_forward_time": 0.025002002716064453, + "step": 15202 + }, + { + "epoch": 2.31964111328125e-05, + "step": 15202, + "training_step_time": 0.11178994178771973 + }, + { + "epoch": 2.319793701171875e-05, + "model_forward_time": 0.025363683700561523, + "step": 15203 + }, + { + "epoch": 2.319793701171875e-05, + "step": 15203, + "training_step_time": 0.11108875274658203 + }, + { + "epoch": 2.3199462890625e-05, + "model_forward_time": 0.02533745765686035, + "step": 15204 + }, + { + "epoch": 2.3199462890625e-05, + "step": 15204, + "training_step_time": 0.19553828239440918 + }, + { + "epoch": 2.320098876953125e-05, + "model_forward_time": 0.02496957778930664, + "step": 15205 + }, + { + "epoch": 2.320098876953125e-05, + "step": 15205, + "training_step_time": 0.10851550102233887 + }, + { + "epoch": 2.32025146484375e-05, + "model_forward_time": 0.024639368057250977, + "step": 15206 + }, + { + "epoch": 2.32025146484375e-05, + "step": 15206, + "training_step_time": 0.10751748085021973 + }, + { + "epoch": 2.320404052734375e-05, + "model_forward_time": 0.025209426879882812, + "step": 15207 + }, + { + "epoch": 2.320404052734375e-05, + "step": 15207, + "training_step_time": 0.10505175590515137 + }, + { + "epoch": 2.320556640625e-05, + "model_forward_time": 0.02533864974975586, + "step": 15208 + }, + { + "epoch": 2.320556640625e-05, + "step": 15208, + "training_step_time": 0.10550045967102051 + }, + { + "epoch": 2.320709228515625e-05, + "model_forward_time": 0.02524542808532715, + "step": 15209 + }, + { + "epoch": 2.320709228515625e-05, + "step": 15209, + "training_step_time": 0.10498929023742676 + }, + { + "epoch": 2.32086181640625e-05, + "grad_norm": 0.2184288650751114, + "learning_rate": 5.297448840329329e-05, + "loss": 0.0138, + "step": 15210 + }, + { + "epoch": 2.32086181640625e-05, + "model_forward_time": 0.025586366653442383, + "step": 15210 + }, + { + "epoch": 2.32086181640625e-05, + "step": 15210, + "training_step_time": 0.10554718971252441 + }, + { + "epoch": 2.321014404296875e-05, + "model_forward_time": 0.025174379348754883, + "step": 15211 + }, + { + "epoch": 2.321014404296875e-05, + "step": 15211, + "training_step_time": 0.10606169700622559 + }, + { + "epoch": 2.3211669921875e-05, + "model_forward_time": 0.025154829025268555, + "step": 15212 + }, + { + "epoch": 2.3211669921875e-05, + "step": 15212, + "training_step_time": 0.10537433624267578 + }, + { + "epoch": 2.321319580078125e-05, + "model_forward_time": 0.025281429290771484, + "step": 15213 + }, + { + "epoch": 2.321319580078125e-05, + "step": 15213, + "training_step_time": 0.10767698287963867 + }, + { + "epoch": 2.32147216796875e-05, + "model_forward_time": 0.02534317970275879, + "step": 15214 + }, + { + "epoch": 2.32147216796875e-05, + "step": 15214, + "training_step_time": 0.10593199729919434 + }, + { + "epoch": 2.321624755859375e-05, + "model_forward_time": 0.025388240814208984, + "step": 15215 + }, + { + "epoch": 2.321624755859375e-05, + "step": 15215, + "training_step_time": 0.18900823593139648 + }, + { + "epoch": 2.32177734375e-05, + "model_forward_time": 0.02385854721069336, + "step": 15216 + }, + { + "epoch": 2.32177734375e-05, + "step": 15216, + "training_step_time": 0.11395883560180664 + }, + { + "epoch": 2.321929931640625e-05, + "model_forward_time": 0.02419281005859375, + "step": 15217 + }, + { + "epoch": 2.321929931640625e-05, + "step": 15217, + "training_step_time": 0.10848593711853027 + }, + { + "epoch": 2.32208251953125e-05, + "model_forward_time": 0.025037050247192383, + "step": 15218 + }, + { + "epoch": 2.32208251953125e-05, + "step": 15218, + "training_step_time": 0.12380790710449219 + }, + { + "epoch": 2.322235107421875e-05, + "model_forward_time": 0.025401830673217773, + "step": 15219 + }, + { + "epoch": 2.322235107421875e-05, + "step": 15219, + "training_step_time": 0.10733461380004883 + }, + { + "epoch": 2.3223876953125e-05, + "grad_norm": 0.4464765787124634, + "learning_rate": 5.2919468561203e-05, + "loss": 0.0142, + "step": 15220 + }, + { + "epoch": 2.3223876953125e-05, + "model_forward_time": 0.02494072914123535, + "step": 15220 + }, + { + "epoch": 2.3223876953125e-05, + "step": 15220, + "training_step_time": 0.1690807342529297 + }, + { + "epoch": 2.322540283203125e-05, + "model_forward_time": 0.02456521987915039, + "step": 15221 + }, + { + "epoch": 2.322540283203125e-05, + "step": 15221, + "training_step_time": 0.1689004898071289 + }, + { + "epoch": 2.32269287109375e-05, + "model_forward_time": 0.024642229080200195, + "step": 15222 + }, + { + "epoch": 2.32269287109375e-05, + "step": 15222, + "training_step_time": 0.1803112030029297 + }, + { + "epoch": 2.322845458984375e-05, + "model_forward_time": 0.024687767028808594, + "step": 15223 + }, + { + "epoch": 2.322845458984375e-05, + "step": 15223, + "training_step_time": 0.15085625648498535 + }, + { + "epoch": 2.322998046875e-05, + "model_forward_time": 0.024325847625732422, + "step": 15224 + }, + { + "epoch": 2.322998046875e-05, + "step": 15224, + "training_step_time": 0.1209559440612793 + }, + { + "epoch": 2.323150634765625e-05, + "model_forward_time": 0.02503824234008789, + "step": 15225 + }, + { + "epoch": 2.323150634765625e-05, + "step": 15225, + "training_step_time": 0.14713382720947266 + }, + { + "epoch": 2.32330322265625e-05, + "model_forward_time": 0.02477264404296875, + "step": 15226 + }, + { + "epoch": 2.32330322265625e-05, + "step": 15226, + "training_step_time": 0.1715548038482666 + }, + { + "epoch": 2.323455810546875e-05, + "model_forward_time": 0.025087594985961914, + "step": 15227 + }, + { + "epoch": 2.323455810546875e-05, + "step": 15227, + "training_step_time": 0.17395949363708496 + }, + { + "epoch": 2.3236083984375e-05, + "model_forward_time": 0.024259567260742188, + "step": 15228 + }, + { + "epoch": 2.3236083984375e-05, + "step": 15228, + "training_step_time": 0.10468769073486328 + }, + { + "epoch": 2.323760986328125e-05, + "model_forward_time": 0.024086475372314453, + "step": 15229 + }, + { + "epoch": 2.323760986328125e-05, + "step": 15229, + "training_step_time": 0.10098075866699219 + }, + { + "epoch": 2.32391357421875e-05, + "grad_norm": 0.2585608661174774, + "learning_rate": 5.286444517168313e-05, + "loss": 0.0078, + "step": 15230 + }, + { + "epoch": 2.32391357421875e-05, + "model_forward_time": 0.025635480880737305, + "step": 15230 + }, + { + "epoch": 2.32391357421875e-05, + "step": 15230, + "training_step_time": 0.10502052307128906 + }, + { + "epoch": 2.324066162109375e-05, + "model_forward_time": 0.024561405181884766, + "step": 15231 + }, + { + "epoch": 2.324066162109375e-05, + "step": 15231, + "training_step_time": 0.17623448371887207 + }, + { + "epoch": 2.32421875e-05, + "model_forward_time": 0.024820566177368164, + "step": 15232 + }, + { + "epoch": 2.32421875e-05, + "step": 15232, + "training_step_time": 0.20193123817443848 + }, + { + "epoch": 2.324371337890625e-05, + "model_forward_time": 0.024186134338378906, + "step": 15233 + }, + { + "epoch": 2.324371337890625e-05, + "step": 15233, + "training_step_time": 0.19202160835266113 + }, + { + "epoch": 2.32452392578125e-05, + "model_forward_time": 0.024191617965698242, + "step": 15234 + }, + { + "epoch": 2.32452392578125e-05, + "step": 15234, + "training_step_time": 0.18738603591918945 + }, + { + "epoch": 2.324676513671875e-05, + "model_forward_time": 0.023966073989868164, + "step": 15235 + }, + { + "epoch": 2.324676513671875e-05, + "step": 15235, + "training_step_time": 0.17643141746520996 + }, + { + "epoch": 2.3248291015625e-05, + "model_forward_time": 0.024158477783203125, + "step": 15236 + }, + { + "epoch": 2.3248291015625e-05, + "step": 15236, + "training_step_time": 0.1663217544555664 + }, + { + "epoch": 2.324981689453125e-05, + "model_forward_time": 0.025075435638427734, + "step": 15237 + }, + { + "epoch": 2.324981689453125e-05, + "step": 15237, + "training_step_time": 0.14250564575195312 + }, + { + "epoch": 2.32513427734375e-05, + "model_forward_time": 0.024433612823486328, + "step": 15238 + }, + { + "epoch": 2.32513427734375e-05, + "step": 15238, + "training_step_time": 0.13890814781188965 + }, + { + "epoch": 2.325286865234375e-05, + "model_forward_time": 0.024560928344726562, + "step": 15239 + }, + { + "epoch": 2.325286865234375e-05, + "step": 15239, + "training_step_time": 0.13401579856872559 + }, + { + "epoch": 2.325439453125e-05, + "grad_norm": 0.1279037743806839, + "learning_rate": 5.280941830159227e-05, + "loss": 0.0105, + "step": 15240 + }, + { + "epoch": 2.325439453125e-05, + "model_forward_time": 0.024589061737060547, + "step": 15240 + }, + { + "epoch": 2.325439453125e-05, + "step": 15240, + "training_step_time": 0.12030696868896484 + }, + { + "epoch": 2.325592041015625e-05, + "model_forward_time": 0.025700807571411133, + "step": 15241 + }, + { + "epoch": 2.325592041015625e-05, + "step": 15241, + "training_step_time": 0.12104487419128418 + }, + { + "epoch": 2.32574462890625e-05, + "model_forward_time": 0.025126934051513672, + "step": 15242 + }, + { + "epoch": 2.32574462890625e-05, + "step": 15242, + "training_step_time": 0.11581730842590332 + }, + { + "epoch": 2.325897216796875e-05, + "model_forward_time": 0.024884462356567383, + "step": 15243 + }, + { + "epoch": 2.325897216796875e-05, + "step": 15243, + "training_step_time": 0.10743594169616699 + }, + { + "epoch": 2.3260498046875e-05, + "model_forward_time": 0.025055646896362305, + "step": 15244 + }, + { + "epoch": 2.3260498046875e-05, + "step": 15244, + "training_step_time": 0.11369729042053223 + }, + { + "epoch": 2.326202392578125e-05, + "model_forward_time": 0.02525782585144043, + "step": 15245 + }, + { + "epoch": 2.326202392578125e-05, + "step": 15245, + "training_step_time": 0.10636377334594727 + }, + { + "epoch": 2.32635498046875e-05, + "model_forward_time": 0.025176525115966797, + "step": 15246 + }, + { + "epoch": 2.32635498046875e-05, + "step": 15246, + "training_step_time": 0.19860529899597168 + }, + { + "epoch": 2.326507568359375e-05, + "model_forward_time": 0.024476051330566406, + "step": 15247 + }, + { + "epoch": 2.326507568359375e-05, + "step": 15247, + "training_step_time": 0.10784411430358887 + }, + { + "epoch": 2.32666015625e-05, + "model_forward_time": 0.024445533752441406, + "step": 15248 + }, + { + "epoch": 2.32666015625e-05, + "step": 15248, + "training_step_time": 0.10091519355773926 + }, + { + "epoch": 2.326812744140625e-05, + "model_forward_time": 0.02532052993774414, + "step": 15249 + }, + { + "epoch": 2.326812744140625e-05, + "step": 15249, + "training_step_time": 0.10778450965881348 + }, + { + "epoch": 2.32696533203125e-05, + "grad_norm": 0.35009992122650146, + "learning_rate": 5.2754388017793274e-05, + "loss": 0.0257, + "step": 15250 + }, + { + "epoch": 2.32696533203125e-05, + "model_forward_time": 0.024982690811157227, + "step": 15250 + }, + { + "epoch": 2.32696533203125e-05, + "step": 15250, + "training_step_time": 0.10418844223022461 + }, + { + "epoch": 2.327117919921875e-05, + "model_forward_time": 0.0249783992767334, + "step": 15251 + }, + { + "epoch": 2.327117919921875e-05, + "step": 15251, + "training_step_time": 0.103668212890625 + }, + { + "epoch": 2.3272705078125e-05, + "model_forward_time": 0.025068998336791992, + "step": 15252 + }, + { + "epoch": 2.3272705078125e-05, + "step": 15252, + "training_step_time": 0.10806918144226074 + }, + { + "epoch": 2.327423095703125e-05, + "model_forward_time": 0.024802446365356445, + "step": 15253 + }, + { + "epoch": 2.327423095703125e-05, + "step": 15253, + "training_step_time": 0.10371613502502441 + }, + { + "epoch": 2.32757568359375e-05, + "model_forward_time": 0.02511143684387207, + "step": 15254 + }, + { + "epoch": 2.32757568359375e-05, + "step": 15254, + "training_step_time": 0.10765218734741211 + }, + { + "epoch": 2.327728271484375e-05, + "model_forward_time": 0.025191783905029297, + "step": 15255 + }, + { + "epoch": 2.327728271484375e-05, + "step": 15255, + "training_step_time": 0.10999059677124023 + }, + { + "epoch": 2.327880859375e-05, + "model_forward_time": 0.025082826614379883, + "step": 15256 + }, + { + "epoch": 2.327880859375e-05, + "step": 15256, + "training_step_time": 0.10776138305664062 + }, + { + "epoch": 2.328033447265625e-05, + "model_forward_time": 0.024895429611206055, + "step": 15257 + }, + { + "epoch": 2.328033447265625e-05, + "step": 15257, + "training_step_time": 0.10508298873901367 + }, + { + "epoch": 2.32818603515625e-05, + "model_forward_time": 0.024883270263671875, + "step": 15258 + }, + { + "epoch": 2.32818603515625e-05, + "step": 15258, + "training_step_time": 0.12468743324279785 + }, + { + "epoch": 2.328338623046875e-05, + "model_forward_time": 0.025212526321411133, + "step": 15259 + }, + { + "epoch": 2.328338623046875e-05, + "step": 15259, + "training_step_time": 0.10891914367675781 + }, + { + "epoch": 2.3284912109375e-05, + "grad_norm": 0.35786086320877075, + "learning_rate": 5.269935438715312e-05, + "loss": 0.0167, + "step": 15260 + }, + { + "epoch": 2.3284912109375e-05, + "model_forward_time": 0.025102615356445312, + "step": 15260 + }, + { + "epoch": 2.3284912109375e-05, + "step": 15260, + "training_step_time": 0.11223840713500977 + }, + { + "epoch": 2.328643798828125e-05, + "model_forward_time": 0.025061368942260742, + "step": 15261 + }, + { + "epoch": 2.328643798828125e-05, + "step": 15261, + "training_step_time": 0.1064004898071289 + }, + { + "epoch": 2.32879638671875e-05, + "model_forward_time": 0.024347543716430664, + "step": 15262 + }, + { + "epoch": 2.32879638671875e-05, + "step": 15262, + "training_step_time": 0.19633865356445312 + }, + { + "epoch": 2.328948974609375e-05, + "model_forward_time": 0.024576187133789062, + "step": 15263 + }, + { + "epoch": 2.328948974609375e-05, + "step": 15263, + "training_step_time": 0.1814260482788086 + }, + { + "epoch": 2.3291015625e-05, + "model_forward_time": 0.02428889274597168, + "step": 15264 + }, + { + "epoch": 2.3291015625e-05, + "step": 15264, + "training_step_time": 0.21654391288757324 + }, + { + "epoch": 2.329254150390625e-05, + "model_forward_time": 0.024482011795043945, + "step": 15265 + }, + { + "epoch": 2.329254150390625e-05, + "step": 15265, + "training_step_time": 0.11835718154907227 + }, + { + "epoch": 2.32940673828125e-05, + "model_forward_time": 0.024366378784179688, + "step": 15266 + }, + { + "epoch": 2.32940673828125e-05, + "step": 15266, + "training_step_time": 0.13567018508911133 + }, + { + "epoch": 2.329559326171875e-05, + "model_forward_time": 0.0249178409576416, + "step": 15267 + }, + { + "epoch": 2.329559326171875e-05, + "step": 15267, + "training_step_time": 0.16544771194458008 + }, + { + "epoch": 2.3297119140625e-05, + "model_forward_time": 0.02442169189453125, + "step": 15268 + }, + { + "epoch": 2.3297119140625e-05, + "step": 15268, + "training_step_time": 0.22284746170043945 + }, + { + "epoch": 2.329864501953125e-05, + "model_forward_time": 0.024158954620361328, + "step": 15269 + }, + { + "epoch": 2.329864501953125e-05, + "step": 15269, + "training_step_time": 0.11124777793884277 + }, + { + "epoch": 2.33001708984375e-05, + "grad_norm": 0.2568480670452118, + "learning_rate": 5.264431747654284e-05, + "loss": 0.0116, + "step": 15270 + }, + { + "epoch": 2.33001708984375e-05, + "model_forward_time": 0.023680686950683594, + "step": 15270 + }, + { + "epoch": 2.33001708984375e-05, + "step": 15270, + "training_step_time": 0.10302042961120605 + }, + { + "epoch": 2.330169677734375e-05, + "model_forward_time": 0.027590513229370117, + "step": 15271 + }, + { + "epoch": 2.330169677734375e-05, + "step": 15271, + "training_step_time": 0.1077120304107666 + }, + { + "epoch": 2.330322265625e-05, + "model_forward_time": 0.02540135383605957, + "step": 15272 + }, + { + "epoch": 2.330322265625e-05, + "step": 15272, + "training_step_time": 0.10612607002258301 + }, + { + "epoch": 2.330474853515625e-05, + "model_forward_time": 0.02534174919128418, + "step": 15273 + }, + { + "epoch": 2.330474853515625e-05, + "step": 15273, + "training_step_time": 0.10607242584228516 + }, + { + "epoch": 2.33062744140625e-05, + "model_forward_time": 0.02555370330810547, + "step": 15274 + }, + { + "epoch": 2.33062744140625e-05, + "step": 15274, + "training_step_time": 0.10603666305541992 + }, + { + "epoch": 2.330780029296875e-05, + "model_forward_time": 0.025315046310424805, + "step": 15275 + }, + { + "epoch": 2.330780029296875e-05, + "step": 15275, + "training_step_time": 0.10519814491271973 + }, + { + "epoch": 2.3309326171875e-05, + "model_forward_time": 0.025450468063354492, + "step": 15276 + }, + { + "epoch": 2.3309326171875e-05, + "step": 15276, + "training_step_time": 0.10500073432922363 + }, + { + "epoch": 2.331085205078125e-05, + "model_forward_time": 0.025153398513793945, + "step": 15277 + }, + { + "epoch": 2.331085205078125e-05, + "step": 15277, + "training_step_time": 0.10687518119812012 + }, + { + "epoch": 2.33123779296875e-05, + "model_forward_time": 0.02491164207458496, + "step": 15278 + }, + { + "epoch": 2.33123779296875e-05, + "step": 15278, + "training_step_time": 0.10454821586608887 + }, + { + "epoch": 2.331390380859375e-05, + "model_forward_time": 0.02551555633544922, + "step": 15279 + }, + { + "epoch": 2.331390380859375e-05, + "step": 15279, + "training_step_time": 0.10577702522277832 + }, + { + "epoch": 2.33154296875e-05, + "grad_norm": 0.2536419630050659, + "learning_rate": 5.258927735283748e-05, + "loss": 0.0116, + "step": 15280 + }, + { + "epoch": 2.33154296875e-05, + "model_forward_time": 0.025087833404541016, + "step": 15280 + }, + { + "epoch": 2.33154296875e-05, + "step": 15280, + "training_step_time": 0.1040959358215332 + }, + { + "epoch": 2.331695556640625e-05, + "model_forward_time": 0.024971961975097656, + "step": 15281 + }, + { + "epoch": 2.331695556640625e-05, + "step": 15281, + "training_step_time": 0.10511565208435059 + }, + { + "epoch": 2.33184814453125e-05, + "model_forward_time": 0.023833274841308594, + "step": 15282 + }, + { + "epoch": 2.33184814453125e-05, + "step": 15282, + "training_step_time": 0.10863137245178223 + }, + { + "epoch": 2.332000732421875e-05, + "model_forward_time": 0.02470111846923828, + "step": 15283 + }, + { + "epoch": 2.332000732421875e-05, + "step": 15283, + "training_step_time": 0.10718393325805664 + }, + { + "epoch": 2.3321533203125e-05, + "model_forward_time": 0.025243282318115234, + "step": 15284 + }, + { + "epoch": 2.3321533203125e-05, + "step": 15284, + "training_step_time": 0.1086874008178711 + }, + { + "epoch": 2.332305908203125e-05, + "model_forward_time": 0.025231361389160156, + "step": 15285 + }, + { + "epoch": 2.332305908203125e-05, + "step": 15285, + "training_step_time": 0.10646581649780273 + }, + { + "epoch": 2.33245849609375e-05, + "model_forward_time": 0.025090694427490234, + "step": 15286 + }, + { + "epoch": 2.33245849609375e-05, + "step": 15286, + "training_step_time": 0.10531091690063477 + }, + { + "epoch": 2.332611083984375e-05, + "model_forward_time": 0.025076866149902344, + "step": 15287 + }, + { + "epoch": 2.332611083984375e-05, + "step": 15287, + "training_step_time": 0.12819266319274902 + }, + { + "epoch": 2.332763671875e-05, + "model_forward_time": 0.026549577713012695, + "step": 15288 + }, + { + "epoch": 2.332763671875e-05, + "step": 15288, + "training_step_time": 0.1395878791809082 + }, + { + "epoch": 2.332916259765625e-05, + "model_forward_time": 0.02529454231262207, + "step": 15289 + }, + { + "epoch": 2.332916259765625e-05, + "step": 15289, + "training_step_time": 0.10957598686218262 + }, + { + "epoch": 2.33306884765625e-05, + "grad_norm": 0.20586350560188293, + "learning_rate": 5.2534234082915976e-05, + "loss": 0.0218, + "step": 15290 + }, + { + "epoch": 2.33306884765625e-05, + "model_forward_time": 0.024724721908569336, + "step": 15290 + }, + { + "epoch": 2.33306884765625e-05, + "step": 15290, + "training_step_time": 0.1301717758178711 + }, + { + "epoch": 2.333221435546875e-05, + "model_forward_time": 0.02480340003967285, + "step": 15291 + }, + { + "epoch": 2.333221435546875e-05, + "step": 15291, + "training_step_time": 0.19103646278381348 + }, + { + "epoch": 2.3333740234375e-05, + "model_forward_time": 0.024306297302246094, + "step": 15292 + }, + { + "epoch": 2.3333740234375e-05, + "step": 15292, + "training_step_time": 0.18758130073547363 + }, + { + "epoch": 2.333526611328125e-05, + "model_forward_time": 0.023836135864257812, + "step": 15293 + }, + { + "epoch": 2.333526611328125e-05, + "step": 15293, + "training_step_time": 0.1793956756591797 + }, + { + "epoch": 2.33367919921875e-05, + "model_forward_time": 0.024411916732788086, + "step": 15294 + }, + { + "epoch": 2.33367919921875e-05, + "step": 15294, + "training_step_time": 0.16347241401672363 + }, + { + "epoch": 2.333831787109375e-05, + "model_forward_time": 0.02408623695373535, + "step": 15295 + }, + { + "epoch": 2.333831787109375e-05, + "step": 15295, + "training_step_time": 0.14354419708251953 + }, + { + "epoch": 2.333984375e-05, + "model_forward_time": 0.02458810806274414, + "step": 15296 + }, + { + "epoch": 2.333984375e-05, + "step": 15296, + "training_step_time": 0.13205933570861816 + }, + { + "epoch": 2.334136962890625e-05, + "model_forward_time": 0.024129629135131836, + "step": 15297 + }, + { + "epoch": 2.334136962890625e-05, + "step": 15297, + "training_step_time": 0.1281569004058838 + }, + { + "epoch": 2.33428955078125e-05, + "model_forward_time": 0.02498650550842285, + "step": 15298 + }, + { + "epoch": 2.33428955078125e-05, + "step": 15298, + "training_step_time": 0.12688708305358887 + }, + { + "epoch": 2.334442138671875e-05, + "model_forward_time": 0.024173736572265625, + "step": 15299 + }, + { + "epoch": 2.334442138671875e-05, + "step": 15299, + "training_step_time": 0.12340426445007324 + }, + { + "epoch": 2.3345947265625e-05, + "grad_norm": 0.42707836627960205, + "learning_rate": 5.247918773366112e-05, + "loss": 0.0148, + "step": 15300 + }, + { + "epoch": 2.3345947265625e-05, + "model_forward_time": 0.024618864059448242, + "step": 15300 + }, + { + "epoch": 2.3345947265625e-05, + "step": 15300, + "training_step_time": 0.11664533615112305 + }, + { + "epoch": 2.334747314453125e-05, + "model_forward_time": 0.025237083435058594, + "step": 15301 + }, + { + "epoch": 2.334747314453125e-05, + "step": 15301, + "training_step_time": 0.2027144432067871 + }, + { + "epoch": 2.33489990234375e-05, + "model_forward_time": 0.024098634719848633, + "step": 15302 + }, + { + "epoch": 2.33489990234375e-05, + "step": 15302, + "training_step_time": 0.10950112342834473 + }, + { + "epoch": 2.335052490234375e-05, + "model_forward_time": 0.024495363235473633, + "step": 15303 + }, + { + "epoch": 2.335052490234375e-05, + "step": 15303, + "training_step_time": 0.1127464771270752 + }, + { + "epoch": 2.335205078125e-05, + "model_forward_time": 0.025002002716064453, + "step": 15304 + }, + { + "epoch": 2.335205078125e-05, + "step": 15304, + "training_step_time": 0.10552859306335449 + }, + { + "epoch": 2.335357666015625e-05, + "model_forward_time": 0.02428436279296875, + "step": 15305 + }, + { + "epoch": 2.335357666015625e-05, + "step": 15305, + "training_step_time": 0.20306682586669922 + }, + { + "epoch": 2.33551025390625e-05, + "model_forward_time": 0.024345874786376953, + "step": 15306 + }, + { + "epoch": 2.33551025390625e-05, + "step": 15306, + "training_step_time": 0.13819527626037598 + }, + { + "epoch": 2.335662841796875e-05, + "model_forward_time": 0.024289846420288086, + "step": 15307 + }, + { + "epoch": 2.335662841796875e-05, + "step": 15307, + "training_step_time": 0.22170662879943848 + }, + { + "epoch": 2.3358154296875e-05, + "model_forward_time": 0.02424025535583496, + "step": 15308 + }, + { + "epoch": 2.3358154296875e-05, + "step": 15308, + "training_step_time": 0.12711763381958008 + }, + { + "epoch": 2.335968017578125e-05, + "model_forward_time": 0.02436089515686035, + "step": 15309 + }, + { + "epoch": 2.335968017578125e-05, + "step": 15309, + "training_step_time": 0.12703967094421387 + }, + { + "epoch": 2.33612060546875e-05, + "grad_norm": 0.1862405240535736, + "learning_rate": 5.242413837195938e-05, + "loss": 0.0224, + "step": 15310 + }, + { + "epoch": 2.33612060546875e-05, + "model_forward_time": 0.025122642517089844, + "step": 15310 + }, + { + "epoch": 2.33612060546875e-05, + "step": 15310, + "training_step_time": 0.1587541103363037 + }, + { + "epoch": 2.336273193359375e-05, + "model_forward_time": 0.024602651596069336, + "step": 15311 + }, + { + "epoch": 2.336273193359375e-05, + "step": 15311, + "training_step_time": 0.10514330863952637 + }, + { + "epoch": 2.33642578125e-05, + "model_forward_time": 0.025187015533447266, + "step": 15312 + }, + { + "epoch": 2.33642578125e-05, + "step": 15312, + "training_step_time": 0.11492061614990234 + }, + { + "epoch": 2.336578369140625e-05, + "model_forward_time": 0.024924278259277344, + "step": 15313 + }, + { + "epoch": 2.336578369140625e-05, + "step": 15313, + "training_step_time": 0.12243032455444336 + }, + { + "epoch": 2.33673095703125e-05, + "model_forward_time": 0.024816513061523438, + "step": 15314 + }, + { + "epoch": 2.33673095703125e-05, + "step": 15314, + "training_step_time": 0.1029207706451416 + }, + { + "epoch": 2.336883544921875e-05, + "model_forward_time": 0.02472090721130371, + "step": 15315 + }, + { + "epoch": 2.336883544921875e-05, + "step": 15315, + "training_step_time": 0.10407686233520508 + }, + { + "epoch": 2.3370361328125e-05, + "model_forward_time": 0.025075674057006836, + "step": 15316 + }, + { + "epoch": 2.3370361328125e-05, + "step": 15316, + "training_step_time": 0.10959339141845703 + }, + { + "epoch": 2.337188720703125e-05, + "model_forward_time": 0.025055408477783203, + "step": 15317 + }, + { + "epoch": 2.337188720703125e-05, + "step": 15317, + "training_step_time": 0.10413765907287598 + }, + { + "epoch": 2.33734130859375e-05, + "model_forward_time": 0.024980545043945312, + "step": 15318 + }, + { + "epoch": 2.33734130859375e-05, + "step": 15318, + "training_step_time": 0.10468339920043945 + }, + { + "epoch": 2.337493896484375e-05, + "model_forward_time": 0.025492191314697266, + "step": 15319 + }, + { + "epoch": 2.337493896484375e-05, + "step": 15319, + "training_step_time": 0.1098322868347168 + }, + { + "epoch": 2.337646484375e-05, + "grad_norm": 0.18742430210113525, + "learning_rate": 5.2369086064700945e-05, + "loss": 0.0098, + "step": 15320 + }, + { + "epoch": 2.337646484375e-05, + "model_forward_time": 0.025102853775024414, + "step": 15320 + }, + { + "epoch": 2.337646484375e-05, + "step": 15320, + "training_step_time": 0.10607647895812988 + }, + { + "epoch": 2.337799072265625e-05, + "model_forward_time": 0.025090932846069336, + "step": 15321 + }, + { + "epoch": 2.337799072265625e-05, + "step": 15321, + "training_step_time": 0.10474205017089844 + }, + { + "epoch": 2.33795166015625e-05, + "model_forward_time": 0.025668859481811523, + "step": 15322 + }, + { + "epoch": 2.33795166015625e-05, + "step": 15322, + "training_step_time": 0.10908031463623047 + }, + { + "epoch": 2.338104248046875e-05, + "model_forward_time": 0.025102853775024414, + "step": 15323 + }, + { + "epoch": 2.338104248046875e-05, + "step": 15323, + "training_step_time": 0.1050269603729248 + }, + { + "epoch": 2.3382568359375e-05, + "model_forward_time": 0.025225162506103516, + "step": 15324 + }, + { + "epoch": 2.3382568359375e-05, + "step": 15324, + "training_step_time": 0.10441994667053223 + }, + { + "epoch": 2.338409423828125e-05, + "model_forward_time": 0.025038480758666992, + "step": 15325 + }, + { + "epoch": 2.338409423828125e-05, + "step": 15325, + "training_step_time": 0.11023640632629395 + }, + { + "epoch": 2.33856201171875e-05, + "model_forward_time": 0.024993896484375, + "step": 15326 + }, + { + "epoch": 2.33856201171875e-05, + "step": 15326, + "training_step_time": 0.13028454780578613 + }, + { + "epoch": 2.338714599609375e-05, + "model_forward_time": 0.024782896041870117, + "step": 15327 + }, + { + "epoch": 2.338714599609375e-05, + "step": 15327, + "training_step_time": 0.13089704513549805 + }, + { + "epoch": 2.3388671875e-05, + "model_forward_time": 0.02513885498046875, + "step": 15328 + }, + { + "epoch": 2.3388671875e-05, + "step": 15328, + "training_step_time": 0.1268014907836914 + }, + { + "epoch": 2.339019775390625e-05, + "model_forward_time": 0.02509903907775879, + "step": 15329 + }, + { + "epoch": 2.339019775390625e-05, + "step": 15329, + "training_step_time": 0.11759662628173828 + }, + { + "epoch": 2.33917236328125e-05, + "grad_norm": 0.3768186867237091, + "learning_rate": 5.231403087877955e-05, + "loss": 0.0103, + "step": 15330 + }, + { + "epoch": 2.33917236328125e-05, + "model_forward_time": 0.025177955627441406, + "step": 15330 + }, + { + "epoch": 2.33917236328125e-05, + "step": 15330, + "training_step_time": 0.16248250007629395 + }, + { + "epoch": 2.339324951171875e-05, + "model_forward_time": 0.024389982223510742, + "step": 15331 + }, + { + "epoch": 2.339324951171875e-05, + "step": 15331, + "training_step_time": 0.13031840324401855 + }, + { + "epoch": 2.3394775390625e-05, + "model_forward_time": 0.02442455291748047, + "step": 15332 + }, + { + "epoch": 2.3394775390625e-05, + "step": 15332, + "training_step_time": 0.11825394630432129 + }, + { + "epoch": 2.339630126953125e-05, + "model_forward_time": 0.024667739868164062, + "step": 15333 + }, + { + "epoch": 2.339630126953125e-05, + "step": 15333, + "training_step_time": 0.11692261695861816 + }, + { + "epoch": 2.33978271484375e-05, + "model_forward_time": 0.024834871292114258, + "step": 15334 + }, + { + "epoch": 2.33978271484375e-05, + "step": 15334, + "training_step_time": 0.16986608505249023 + }, + { + "epoch": 2.339935302734375e-05, + "model_forward_time": 0.02391648292541504, + "step": 15335 + }, + { + "epoch": 2.339935302734375e-05, + "step": 15335, + "training_step_time": 0.13137102127075195 + }, + { + "epoch": 2.340087890625e-05, + "model_forward_time": 0.024741411209106445, + "step": 15336 + }, + { + "epoch": 2.340087890625e-05, + "step": 15336, + "training_step_time": 0.10733795166015625 + }, + { + "epoch": 2.340240478515625e-05, + "model_forward_time": 0.025008678436279297, + "step": 15337 + }, + { + "epoch": 2.340240478515625e-05, + "step": 15337, + "training_step_time": 0.10665702819824219 + }, + { + "epoch": 2.34039306640625e-05, + "model_forward_time": 0.02646803855895996, + "step": 15338 + }, + { + "epoch": 2.34039306640625e-05, + "step": 15338, + "training_step_time": 0.1087789535522461 + }, + { + "epoch": 2.340545654296875e-05, + "model_forward_time": 0.025359630584716797, + "step": 15339 + }, + { + "epoch": 2.340545654296875e-05, + "step": 15339, + "training_step_time": 0.10595822334289551 + }, + { + "epoch": 2.3406982421875e-05, + "grad_norm": 0.3194389045238495, + "learning_rate": 5.225897288109245e-05, + "loss": 0.0134, + "step": 15340 + }, + { + "epoch": 2.3406982421875e-05, + "model_forward_time": 0.025155067443847656, + "step": 15340 + }, + { + "epoch": 2.3406982421875e-05, + "step": 15340, + "training_step_time": 0.10541176795959473 + }, + { + "epoch": 2.340850830078125e-05, + "model_forward_time": 0.024828433990478516, + "step": 15341 + }, + { + "epoch": 2.340850830078125e-05, + "step": 15341, + "training_step_time": 0.10384440422058105 + }, + { + "epoch": 2.34100341796875e-05, + "model_forward_time": 0.025161266326904297, + "step": 15342 + }, + { + "epoch": 2.34100341796875e-05, + "step": 15342, + "training_step_time": 0.10907888412475586 + }, + { + "epoch": 2.341156005859375e-05, + "model_forward_time": 0.024771690368652344, + "step": 15343 + }, + { + "epoch": 2.341156005859375e-05, + "step": 15343, + "training_step_time": 0.10642671585083008 + }, + { + "epoch": 2.34130859375e-05, + "model_forward_time": 0.025197505950927734, + "step": 15344 + }, + { + "epoch": 2.34130859375e-05, + "step": 15344, + "training_step_time": 0.1052391529083252 + }, + { + "epoch": 2.341461181640625e-05, + "model_forward_time": 0.025090932846069336, + "step": 15345 + }, + { + "epoch": 2.341461181640625e-05, + "step": 15345, + "training_step_time": 0.10569882392883301 + }, + { + "epoch": 2.34161376953125e-05, + "model_forward_time": 0.025136709213256836, + "step": 15346 + }, + { + "epoch": 2.34161376953125e-05, + "step": 15346, + "training_step_time": 0.10583758354187012 + }, + { + "epoch": 2.341766357421875e-05, + "model_forward_time": 0.025170087814331055, + "step": 15347 + }, + { + "epoch": 2.341766357421875e-05, + "step": 15347, + "training_step_time": 0.10538554191589355 + }, + { + "epoch": 2.3419189453125e-05, + "model_forward_time": 0.025414705276489258, + "step": 15348 + }, + { + "epoch": 2.3419189453125e-05, + "step": 15348, + "training_step_time": 0.11744141578674316 + }, + { + "epoch": 2.342071533203125e-05, + "model_forward_time": 0.025246858596801758, + "step": 15349 + }, + { + "epoch": 2.342071533203125e-05, + "step": 15349, + "training_step_time": 0.1069784164428711 + }, + { + "epoch": 2.34222412109375e-05, + "grad_norm": 0.26905763149261475, + "learning_rate": 5.220391213854028e-05, + "loss": 0.0184, + "step": 15350 + }, + { + "epoch": 2.34222412109375e-05, + "model_forward_time": 0.025348424911499023, + "step": 15350 + }, + { + "epoch": 2.34222412109375e-05, + "step": 15350, + "training_step_time": 0.10698223114013672 + }, + { + "epoch": 2.342376708984375e-05, + "model_forward_time": 0.02601456642150879, + "step": 15351 + }, + { + "epoch": 2.342376708984375e-05, + "step": 15351, + "training_step_time": 0.1059103012084961 + }, + { + "epoch": 2.342529296875e-05, + "model_forward_time": 0.02536630630493164, + "step": 15352 + }, + { + "epoch": 2.342529296875e-05, + "step": 15352, + "training_step_time": 0.1172034740447998 + }, + { + "epoch": 2.342681884765625e-05, + "model_forward_time": 0.025052785873413086, + "step": 15353 + }, + { + "epoch": 2.342681884765625e-05, + "step": 15353, + "training_step_time": 0.16534662246704102 + }, + { + "epoch": 2.34283447265625e-05, + "model_forward_time": 0.024302244186401367, + "step": 15354 + }, + { + "epoch": 2.34283447265625e-05, + "step": 15354, + "training_step_time": 0.16155099868774414 + }, + { + "epoch": 2.342987060546875e-05, + "model_forward_time": 0.023742198944091797, + "step": 15355 + }, + { + "epoch": 2.342987060546875e-05, + "step": 15355, + "training_step_time": 0.1527423858642578 + }, + { + "epoch": 2.3431396484375e-05, + "model_forward_time": 0.02551412582397461, + "step": 15356 + }, + { + "epoch": 2.3431396484375e-05, + "step": 15356, + "training_step_time": 0.12360501289367676 + }, + { + "epoch": 2.343292236328125e-05, + "model_forward_time": 0.024333477020263672, + "step": 15357 + }, + { + "epoch": 2.343292236328125e-05, + "step": 15357, + "training_step_time": 0.1407303810119629 + }, + { + "epoch": 2.34344482421875e-05, + "model_forward_time": 0.02489304542541504, + "step": 15358 + }, + { + "epoch": 2.34344482421875e-05, + "step": 15358, + "training_step_time": 0.15712380409240723 + }, + { + "epoch": 2.343597412109375e-05, + "model_forward_time": 0.024536848068237305, + "step": 15359 + }, + { + "epoch": 2.343597412109375e-05, + "step": 15359, + "training_step_time": 0.10138440132141113 + }, + { + "epoch": 2.34375e-05, + "grad_norm": 0.23495899140834808, + "learning_rate": 5.214884871802703e-05, + "loss": 0.011, + "step": 15360 + }, + { + "epoch": 2.34375e-05, + "model_forward_time": 0.02471184730529785, + "step": 15360 + }, + { + "epoch": 2.34375e-05, + "step": 15360, + "training_step_time": 0.11548018455505371 + }, + { + "epoch": 2.343902587890625e-05, + "model_forward_time": 0.02496027946472168, + "step": 15361 + }, + { + "epoch": 2.343902587890625e-05, + "step": 15361, + "training_step_time": 0.12210655212402344 + }, + { + "epoch": 2.34405517578125e-05, + "model_forward_time": 0.025074481964111328, + "step": 15362 + }, + { + "epoch": 2.34405517578125e-05, + "step": 15362, + "training_step_time": 0.11411142349243164 + }, + { + "epoch": 2.344207763671875e-05, + "model_forward_time": 0.024842023849487305, + "step": 15363 + }, + { + "epoch": 2.344207763671875e-05, + "step": 15363, + "training_step_time": 0.11220860481262207 + }, + { + "epoch": 2.3443603515625e-05, + "model_forward_time": 0.02507781982421875, + "step": 15364 + }, + { + "epoch": 2.3443603515625e-05, + "step": 15364, + "training_step_time": 0.10889554023742676 + }, + { + "epoch": 2.344512939453125e-05, + "model_forward_time": 0.025470972061157227, + "step": 15365 + }, + { + "epoch": 2.344512939453125e-05, + "step": 15365, + "training_step_time": 0.10918879508972168 + }, + { + "epoch": 2.34466552734375e-05, + "model_forward_time": 0.02514028549194336, + "step": 15366 + }, + { + "epoch": 2.34466552734375e-05, + "step": 15366, + "training_step_time": 0.11275744438171387 + }, + { + "epoch": 2.344818115234375e-05, + "model_forward_time": 0.025383472442626953, + "step": 15367 + }, + { + "epoch": 2.344818115234375e-05, + "step": 15367, + "training_step_time": 0.11038017272949219 + }, + { + "epoch": 2.344970703125e-05, + "model_forward_time": 0.025271177291870117, + "step": 15368 + }, + { + "epoch": 2.344970703125e-05, + "step": 15368, + "training_step_time": 0.11092162132263184 + }, + { + "epoch": 2.345123291015625e-05, + "model_forward_time": 0.02517414093017578, + "step": 15369 + }, + { + "epoch": 2.345123291015625e-05, + "step": 15369, + "training_step_time": 0.11031627655029297 + }, + { + "epoch": 2.34527587890625e-05, + "grad_norm": 0.16505533456802368, + "learning_rate": 5.209378268645998e-05, + "loss": 0.0132, + "step": 15370 + }, + { + "epoch": 2.34527587890625e-05, + "model_forward_time": 0.025294065475463867, + "step": 15370 + }, + { + "epoch": 2.34527587890625e-05, + "step": 15370, + "training_step_time": 0.10870862007141113 + }, + { + "epoch": 2.345428466796875e-05, + "model_forward_time": 0.024875640869140625, + "step": 15371 + }, + { + "epoch": 2.345428466796875e-05, + "step": 15371, + "training_step_time": 0.10671424865722656 + }, + { + "epoch": 2.3455810546875e-05, + "model_forward_time": 0.025078773498535156, + "step": 15372 + }, + { + "epoch": 2.3455810546875e-05, + "step": 15372, + "training_step_time": 0.1061561107635498 + }, + { + "epoch": 2.345733642578125e-05, + "model_forward_time": 0.025143861770629883, + "step": 15373 + }, + { + "epoch": 2.345733642578125e-05, + "step": 15373, + "training_step_time": 0.10863304138183594 + }, + { + "epoch": 2.34588623046875e-05, + "model_forward_time": 0.024934053421020508, + "step": 15374 + }, + { + "epoch": 2.34588623046875e-05, + "step": 15374, + "training_step_time": 0.10528159141540527 + }, + { + "epoch": 2.346038818359375e-05, + "model_forward_time": 0.02546405792236328, + "step": 15375 + }, + { + "epoch": 2.346038818359375e-05, + "step": 15375, + "training_step_time": 0.10586833953857422 + }, + { + "epoch": 2.34619140625e-05, + "model_forward_time": 0.025496244430541992, + "step": 15376 + }, + { + "epoch": 2.34619140625e-05, + "step": 15376, + "training_step_time": 0.10705113410949707 + }, + { + "epoch": 2.346343994140625e-05, + "model_forward_time": 0.02477574348449707, + "step": 15377 + }, + { + "epoch": 2.346343994140625e-05, + "step": 15377, + "training_step_time": 0.10455822944641113 + }, + { + "epoch": 2.34649658203125e-05, + "model_forward_time": 0.024817943572998047, + "step": 15378 + }, + { + "epoch": 2.34649658203125e-05, + "step": 15378, + "training_step_time": 0.11150622367858887 + }, + { + "epoch": 2.346649169921875e-05, + "model_forward_time": 0.024812698364257812, + "step": 15379 + }, + { + "epoch": 2.346649169921875e-05, + "step": 15379, + "training_step_time": 0.15322542190551758 + }, + { + "epoch": 2.3468017578125e-05, + "grad_norm": 0.2848520576953888, + "learning_rate": 5.203871411074954e-05, + "loss": 0.0138, + "step": 15380 + }, + { + "epoch": 2.3468017578125e-05, + "model_forward_time": 0.024263381958007812, + "step": 15380 + }, + { + "epoch": 2.3468017578125e-05, + "step": 15380, + "training_step_time": 0.10468792915344238 + }, + { + "epoch": 2.346954345703125e-05, + "model_forward_time": 0.02417922019958496, + "step": 15381 + }, + { + "epoch": 2.346954345703125e-05, + "step": 15381, + "training_step_time": 0.11133933067321777 + }, + { + "epoch": 2.34710693359375e-05, + "model_forward_time": 0.025397539138793945, + "step": 15382 + }, + { + "epoch": 2.34710693359375e-05, + "step": 15382, + "training_step_time": 0.10789775848388672 + }, + { + "epoch": 2.347259521484375e-05, + "model_forward_time": 0.024869680404663086, + "step": 15383 + }, + { + "epoch": 2.347259521484375e-05, + "step": 15383, + "training_step_time": 0.10707378387451172 + }, + { + "epoch": 2.347412109375e-05, + "model_forward_time": 0.025076627731323242, + "step": 15384 + }, + { + "epoch": 2.347412109375e-05, + "step": 15384, + "training_step_time": 0.19950199127197266 + }, + { + "epoch": 2.347564697265625e-05, + "model_forward_time": 0.024183273315429688, + "step": 15385 + }, + { + "epoch": 2.347564697265625e-05, + "step": 15385, + "training_step_time": 0.10201859474182129 + }, + { + "epoch": 2.34771728515625e-05, + "model_forward_time": 0.024294137954711914, + "step": 15386 + }, + { + "epoch": 2.34771728515625e-05, + "step": 15386, + "training_step_time": 0.1039879322052002 + }, + { + "epoch": 2.347869873046875e-05, + "model_forward_time": 0.02482318878173828, + "step": 15387 + }, + { + "epoch": 2.347869873046875e-05, + "step": 15387, + "training_step_time": 0.10565757751464844 + }, + { + "epoch": 2.3480224609375e-05, + "model_forward_time": 0.025242090225219727, + "step": 15388 + }, + { + "epoch": 2.3480224609375e-05, + "step": 15388, + "training_step_time": 0.10662460327148438 + }, + { + "epoch": 2.348175048828125e-05, + "model_forward_time": 0.02550506591796875, + "step": 15389 + }, + { + "epoch": 2.348175048828125e-05, + "step": 15389, + "training_step_time": 0.1093130111694336 + }, + { + "epoch": 2.34832763671875e-05, + "grad_norm": 0.298772394657135, + "learning_rate": 5.198364305780922e-05, + "loss": 0.0117, + "step": 15390 + }, + { + "epoch": 2.34832763671875e-05, + "model_forward_time": 0.025131940841674805, + "step": 15390 + }, + { + "epoch": 2.34832763671875e-05, + "step": 15390, + "training_step_time": 0.10651803016662598 + }, + { + "epoch": 2.348480224609375e-05, + "model_forward_time": 0.025292634963989258, + "step": 15391 + }, + { + "epoch": 2.348480224609375e-05, + "step": 15391, + "training_step_time": 0.10826468467712402 + }, + { + "epoch": 2.3486328125e-05, + "model_forward_time": 0.02551889419555664, + "step": 15392 + }, + { + "epoch": 2.3486328125e-05, + "step": 15392, + "training_step_time": 0.10725760459899902 + }, + { + "epoch": 2.348785400390625e-05, + "model_forward_time": 0.02572035789489746, + "step": 15393 + }, + { + "epoch": 2.348785400390625e-05, + "step": 15393, + "training_step_time": 0.11358070373535156 + }, + { + "epoch": 2.34893798828125e-05, + "model_forward_time": 0.025273561477661133, + "step": 15394 + }, + { + "epoch": 2.34893798828125e-05, + "step": 15394, + "training_step_time": 0.1069033145904541 + }, + { + "epoch": 2.349090576171875e-05, + "model_forward_time": 0.025171518325805664, + "step": 15395 + }, + { + "epoch": 2.349090576171875e-05, + "step": 15395, + "training_step_time": 0.10815834999084473 + }, + { + "epoch": 2.3492431640625e-05, + "model_forward_time": 0.025238752365112305, + "step": 15396 + }, + { + "epoch": 2.3492431640625e-05, + "step": 15396, + "training_step_time": 0.16638398170471191 + }, + { + "epoch": 2.349395751953125e-05, + "model_forward_time": 0.025639057159423828, + "step": 15397 + }, + { + "epoch": 2.349395751953125e-05, + "step": 15397, + "training_step_time": 0.10660839080810547 + }, + { + "epoch": 2.34954833984375e-05, + "model_forward_time": 0.024918794631958008, + "step": 15398 + }, + { + "epoch": 2.34954833984375e-05, + "step": 15398, + "training_step_time": 0.14927196502685547 + }, + { + "epoch": 2.349700927734375e-05, + "model_forward_time": 0.024569272994995117, + "step": 15399 + }, + { + "epoch": 2.349700927734375e-05, + "step": 15399, + "training_step_time": 0.12423467636108398 + }, + { + "epoch": 2.349853515625e-05, + "grad_norm": 0.3625675141811371, + "learning_rate": 5.1928569594555524e-05, + "loss": 0.0119, + "step": 15400 + }, + { + "epoch": 2.349853515625e-05, + "model_forward_time": 0.02469778060913086, + "step": 15400 + }, + { + "epoch": 2.349853515625e-05, + "step": 15400, + "training_step_time": 0.1961045265197754 + }, + { + "epoch": 2.350006103515625e-05, + "model_forward_time": 0.024578332901000977, + "step": 15401 + }, + { + "epoch": 2.350006103515625e-05, + "step": 15401, + "training_step_time": 0.14148569107055664 + }, + { + "epoch": 2.35015869140625e-05, + "model_forward_time": 0.024995803833007812, + "step": 15402 + }, + { + "epoch": 2.35015869140625e-05, + "step": 15402, + "training_step_time": 0.10483098030090332 + }, + { + "epoch": 2.350311279296875e-05, + "model_forward_time": 0.025381088256835938, + "step": 15403 + }, + { + "epoch": 2.350311279296875e-05, + "step": 15403, + "training_step_time": 0.21323180198669434 + }, + { + "epoch": 2.3504638671875e-05, + "model_forward_time": 0.02424168586730957, + "step": 15404 + }, + { + "epoch": 2.3504638671875e-05, + "step": 15404, + "training_step_time": 0.11415362358093262 + }, + { + "epoch": 2.350616455078125e-05, + "model_forward_time": 0.024085283279418945, + "step": 15405 + }, + { + "epoch": 2.350616455078125e-05, + "step": 15405, + "training_step_time": 0.13407683372497559 + }, + { + "epoch": 2.35076904296875e-05, + "model_forward_time": 0.025089502334594727, + "step": 15406 + }, + { + "epoch": 2.35076904296875e-05, + "step": 15406, + "training_step_time": 0.13432741165161133 + }, + { + "epoch": 2.350921630859375e-05, + "model_forward_time": 0.02424168586730957, + "step": 15407 + }, + { + "epoch": 2.350921630859375e-05, + "step": 15407, + "training_step_time": 0.11794853210449219 + }, + { + "epoch": 2.35107421875e-05, + "model_forward_time": 0.024641036987304688, + "step": 15408 + }, + { + "epoch": 2.35107421875e-05, + "step": 15408, + "training_step_time": 0.12234663963317871 + }, + { + "epoch": 2.351226806640625e-05, + "model_forward_time": 0.025035619735717773, + "step": 15409 + }, + { + "epoch": 2.351226806640625e-05, + "step": 15409, + "training_step_time": 0.10802412033081055 + }, + { + "epoch": 2.35137939453125e-05, + "grad_norm": 0.5025345683097839, + "learning_rate": 5.187349378790793e-05, + "loss": 0.0096, + "step": 15410 + }, + { + "epoch": 2.35137939453125e-05, + "model_forward_time": 0.02489638328552246, + "step": 15410 + }, + { + "epoch": 2.35137939453125e-05, + "step": 15410, + "training_step_time": 0.10715770721435547 + }, + { + "epoch": 2.351531982421875e-05, + "model_forward_time": 0.025928258895874023, + "step": 15411 + }, + { + "epoch": 2.351531982421875e-05, + "step": 15411, + "training_step_time": 0.11005735397338867 + }, + { + "epoch": 2.3516845703125e-05, + "model_forward_time": 0.024924516677856445, + "step": 15412 + }, + { + "epoch": 2.3516845703125e-05, + "step": 15412, + "training_step_time": 0.1216881275177002 + }, + { + "epoch": 2.351837158203125e-05, + "model_forward_time": 0.025143146514892578, + "step": 15413 + }, + { + "epoch": 2.351837158203125e-05, + "step": 15413, + "training_step_time": 0.12044548988342285 + }, + { + "epoch": 2.35198974609375e-05, + "model_forward_time": 0.024954795837402344, + "step": 15414 + }, + { + "epoch": 2.35198974609375e-05, + "step": 15414, + "training_step_time": 0.11371207237243652 + }, + { + "epoch": 2.352142333984375e-05, + "model_forward_time": 0.024860143661499023, + "step": 15415 + }, + { + "epoch": 2.352142333984375e-05, + "step": 15415, + "training_step_time": 0.12073802947998047 + }, + { + "epoch": 2.352294921875e-05, + "model_forward_time": 0.02498793601989746, + "step": 15416 + }, + { + "epoch": 2.352294921875e-05, + "step": 15416, + "training_step_time": 0.1166830062866211 + }, + { + "epoch": 2.352447509765625e-05, + "model_forward_time": 0.02482151985168457, + "step": 15417 + }, + { + "epoch": 2.352447509765625e-05, + "step": 15417, + "training_step_time": 0.1142582893371582 + }, + { + "epoch": 2.35260009765625e-05, + "model_forward_time": 0.024872303009033203, + "step": 15418 + }, + { + "epoch": 2.35260009765625e-05, + "step": 15418, + "training_step_time": 0.11376190185546875 + }, + { + "epoch": 2.352752685546875e-05, + "model_forward_time": 0.025324106216430664, + "step": 15419 + }, + { + "epoch": 2.352752685546875e-05, + "step": 15419, + "training_step_time": 0.11222314834594727 + }, + { + "epoch": 2.3529052734375e-05, + "grad_norm": 0.18463116884231567, + "learning_rate": 5.1818415704788725e-05, + "loss": 0.0134, + "step": 15420 + }, + { + "epoch": 2.3529052734375e-05, + "model_forward_time": 0.024056196212768555, + "step": 15420 + }, + { + "epoch": 2.3529052734375e-05, + "step": 15420, + "training_step_time": 0.1104884147644043 + }, + { + "epoch": 2.353057861328125e-05, + "model_forward_time": 0.025332927703857422, + "step": 15421 + }, + { + "epoch": 2.353057861328125e-05, + "step": 15421, + "training_step_time": 0.1106576919555664 + }, + { + "epoch": 2.35321044921875e-05, + "model_forward_time": 0.02504897117614746, + "step": 15422 + }, + { + "epoch": 2.35321044921875e-05, + "step": 15422, + "training_step_time": 0.10846090316772461 + }, + { + "epoch": 2.353363037109375e-05, + "model_forward_time": 0.024890899658203125, + "step": 15423 + }, + { + "epoch": 2.353363037109375e-05, + "step": 15423, + "training_step_time": 0.10877013206481934 + }, + { + "epoch": 2.353515625e-05, + "model_forward_time": 0.024968862533569336, + "step": 15424 + }, + { + "epoch": 2.353515625e-05, + "step": 15424, + "training_step_time": 0.13439416885375977 + }, + { + "epoch": 2.353668212890625e-05, + "model_forward_time": 0.025145292282104492, + "step": 15425 + }, + { + "epoch": 2.353668212890625e-05, + "step": 15425, + "training_step_time": 0.12939810752868652 + }, + { + "epoch": 2.35382080078125e-05, + "model_forward_time": 0.02478194236755371, + "step": 15426 + }, + { + "epoch": 2.35382080078125e-05, + "step": 15426, + "training_step_time": 0.11025047302246094 + }, + { + "epoch": 2.353973388671875e-05, + "model_forward_time": 0.02718830108642578, + "step": 15427 + }, + { + "epoch": 2.353973388671875e-05, + "step": 15427, + "training_step_time": 0.10894656181335449 + }, + { + "epoch": 2.3541259765625e-05, + "model_forward_time": 0.025003433227539062, + "step": 15428 + }, + { + "epoch": 2.3541259765625e-05, + "step": 15428, + "training_step_time": 0.1192939281463623 + }, + { + "epoch": 2.354278564453125e-05, + "model_forward_time": 0.024866342544555664, + "step": 15429 + }, + { + "epoch": 2.354278564453125e-05, + "step": 15429, + "training_step_time": 0.11080098152160645 + }, + { + "epoch": 2.35443115234375e-05, + "grad_norm": 0.2784884572029114, + "learning_rate": 5.176333541212296e-05, + "loss": 0.0306, + "step": 15430 + }, + { + "epoch": 2.35443115234375e-05, + "model_forward_time": 0.02485799789428711, + "step": 15430 + }, + { + "epoch": 2.35443115234375e-05, + "step": 15430, + "training_step_time": 0.18803691864013672 + }, + { + "epoch": 2.354583740234375e-05, + "model_forward_time": 0.024337291717529297, + "step": 15431 + }, + { + "epoch": 2.354583740234375e-05, + "step": 15431, + "training_step_time": 0.1038210391998291 + }, + { + "epoch": 2.354736328125e-05, + "model_forward_time": 0.024437904357910156, + "step": 15432 + }, + { + "epoch": 2.354736328125e-05, + "step": 15432, + "training_step_time": 0.10624289512634277 + }, + { + "epoch": 2.354888916015625e-05, + "model_forward_time": 0.02502727508544922, + "step": 15433 + }, + { + "epoch": 2.354888916015625e-05, + "step": 15433, + "training_step_time": 0.10673403739929199 + }, + { + "epoch": 2.35504150390625e-05, + "model_forward_time": 0.02527904510498047, + "step": 15434 + }, + { + "epoch": 2.35504150390625e-05, + "step": 15434, + "training_step_time": 0.10990214347839355 + }, + { + "epoch": 2.355194091796875e-05, + "model_forward_time": 0.025089025497436523, + "step": 15435 + }, + { + "epoch": 2.355194091796875e-05, + "step": 15435, + "training_step_time": 0.10812997817993164 + }, + { + "epoch": 2.3553466796875e-05, + "model_forward_time": 0.024912357330322266, + "step": 15436 + }, + { + "epoch": 2.3553466796875e-05, + "step": 15436, + "training_step_time": 0.11075687408447266 + }, + { + "epoch": 2.355499267578125e-05, + "model_forward_time": 0.024885177612304688, + "step": 15437 + }, + { + "epoch": 2.355499267578125e-05, + "step": 15437, + "training_step_time": 0.11005043983459473 + }, + { + "epoch": 2.35565185546875e-05, + "model_forward_time": 0.02490067481994629, + "step": 15438 + }, + { + "epoch": 2.35565185546875e-05, + "step": 15438, + "training_step_time": 0.10965943336486816 + }, + { + "epoch": 2.355804443359375e-05, + "model_forward_time": 0.023903369903564453, + "step": 15439 + }, + { + "epoch": 2.355804443359375e-05, + "step": 15439, + "training_step_time": 0.10749006271362305 + }, + { + "epoch": 2.35595703125e-05, + "grad_norm": 0.2178107351064682, + "learning_rate": 5.170825297683841e-05, + "loss": 0.0108, + "step": 15440 + }, + { + "epoch": 2.35595703125e-05, + "model_forward_time": 0.024161577224731445, + "step": 15440 + }, + { + "epoch": 2.35595703125e-05, + "step": 15440, + "training_step_time": 0.1110990047454834 + }, + { + "epoch": 2.356109619140625e-05, + "model_forward_time": 0.024830102920532227, + "step": 15441 + }, + { + "epoch": 2.356109619140625e-05, + "step": 15441, + "training_step_time": 0.1069638729095459 + }, + { + "epoch": 2.35626220703125e-05, + "model_forward_time": 0.024884700775146484, + "step": 15442 + }, + { + "epoch": 2.35626220703125e-05, + "step": 15442, + "training_step_time": 0.18027305603027344 + }, + { + "epoch": 2.356414794921875e-05, + "model_forward_time": 0.02498769760131836, + "step": 15443 + }, + { + "epoch": 2.356414794921875e-05, + "step": 15443, + "training_step_time": 0.10463571548461914 + }, + { + "epoch": 2.3565673828125e-05, + "model_forward_time": 0.024384260177612305, + "step": 15444 + }, + { + "epoch": 2.3565673828125e-05, + "step": 15444, + "training_step_time": 0.19267678260803223 + }, + { + "epoch": 2.356719970703125e-05, + "model_forward_time": 0.0242311954498291, + "step": 15445 + }, + { + "epoch": 2.356719970703125e-05, + "step": 15445, + "training_step_time": 0.13937997817993164 + }, + { + "epoch": 2.35687255859375e-05, + "model_forward_time": 0.024295806884765625, + "step": 15446 + }, + { + "epoch": 2.35687255859375e-05, + "step": 15446, + "training_step_time": 0.11492729187011719 + }, + { + "epoch": 2.357025146484375e-05, + "model_forward_time": 0.025031089782714844, + "step": 15447 + }, + { + "epoch": 2.357025146484375e-05, + "step": 15447, + "training_step_time": 0.1697549819946289 + }, + { + "epoch": 2.357177734375e-05, + "model_forward_time": 0.024139881134033203, + "step": 15448 + }, + { + "epoch": 2.357177734375e-05, + "step": 15448, + "training_step_time": 0.1727585792541504 + }, + { + "epoch": 2.357330322265625e-05, + "model_forward_time": 0.024634838104248047, + "step": 15449 + }, + { + "epoch": 2.357330322265625e-05, + "step": 15449, + "training_step_time": 0.14006805419921875 + }, + { + "epoch": 2.35748291015625e-05, + "grad_norm": 0.3177327811717987, + "learning_rate": 5.165316846586541e-05, + "loss": 0.0209, + "step": 15450 + }, + { + "epoch": 2.35748291015625e-05, + "model_forward_time": 0.024422168731689453, + "step": 15450 + }, + { + "epoch": 2.35748291015625e-05, + "step": 15450, + "training_step_time": 0.10347270965576172 + }, + { + "epoch": 2.357635498046875e-05, + "model_forward_time": 0.02494335174560547, + "step": 15451 + }, + { + "epoch": 2.357635498046875e-05, + "step": 15451, + "training_step_time": 0.11974620819091797 + }, + { + "epoch": 2.3577880859375e-05, + "model_forward_time": 0.024838924407958984, + "step": 15452 + }, + { + "epoch": 2.3577880859375e-05, + "step": 15452, + "training_step_time": 0.10344910621643066 + }, + { + "epoch": 2.357940673828125e-05, + "model_forward_time": 0.025091886520385742, + "step": 15453 + }, + { + "epoch": 2.357940673828125e-05, + "step": 15453, + "training_step_time": 0.1819746494293213 + }, + { + "epoch": 2.35809326171875e-05, + "model_forward_time": 0.02439284324645996, + "step": 15454 + }, + { + "epoch": 2.35809326171875e-05, + "step": 15454, + "training_step_time": 0.1127784252166748 + }, + { + "epoch": 2.358245849609375e-05, + "model_forward_time": 0.024183273315429688, + "step": 15455 + }, + { + "epoch": 2.358245849609375e-05, + "step": 15455, + "training_step_time": 0.1116793155670166 + }, + { + "epoch": 2.3583984375e-05, + "model_forward_time": 0.02536940574645996, + "step": 15456 + }, + { + "epoch": 2.3583984375e-05, + "step": 15456, + "training_step_time": 0.10589981079101562 + }, + { + "epoch": 2.358551025390625e-05, + "model_forward_time": 0.02509307861328125, + "step": 15457 + }, + { + "epoch": 2.358551025390625e-05, + "step": 15457, + "training_step_time": 0.1077585220336914 + }, + { + "epoch": 2.35870361328125e-05, + "model_forward_time": 0.02546548843383789, + "step": 15458 + }, + { + "epoch": 2.35870361328125e-05, + "step": 15458, + "training_step_time": 0.10547542572021484 + }, + { + "epoch": 2.358856201171875e-05, + "model_forward_time": 0.025015830993652344, + "step": 15459 + }, + { + "epoch": 2.358856201171875e-05, + "step": 15459, + "training_step_time": 0.10639286041259766 + }, + { + "epoch": 2.3590087890625e-05, + "grad_norm": 0.3070053458213806, + "learning_rate": 5.1598081946136843e-05, + "loss": 0.0119, + "step": 15460 + }, + { + "epoch": 2.3590087890625e-05, + "model_forward_time": 0.025297880172729492, + "step": 15460 + }, + { + "epoch": 2.3590087890625e-05, + "step": 15460, + "training_step_time": 0.1054983139038086 + }, + { + "epoch": 2.359161376953125e-05, + "model_forward_time": 0.02512669563293457, + "step": 15461 + }, + { + "epoch": 2.359161376953125e-05, + "step": 15461, + "training_step_time": 0.10627937316894531 + }, + { + "epoch": 2.35931396484375e-05, + "model_forward_time": 0.025159358978271484, + "step": 15462 + }, + { + "epoch": 2.35931396484375e-05, + "step": 15462, + "training_step_time": 0.10619521141052246 + }, + { + "epoch": 2.359466552734375e-05, + "model_forward_time": 0.024899721145629883, + "step": 15463 + }, + { + "epoch": 2.359466552734375e-05, + "step": 15463, + "training_step_time": 0.10640764236450195 + }, + { + "epoch": 2.359619140625e-05, + "model_forward_time": 0.024675607681274414, + "step": 15464 + }, + { + "epoch": 2.359619140625e-05, + "step": 15464, + "training_step_time": 0.1460740566253662 + }, + { + "epoch": 2.359771728515625e-05, + "model_forward_time": 0.02475285530090332, + "step": 15465 + }, + { + "epoch": 2.359771728515625e-05, + "step": 15465, + "training_step_time": 0.1727917194366455 + }, + { + "epoch": 2.35992431640625e-05, + "model_forward_time": 0.0242769718170166, + "step": 15466 + }, + { + "epoch": 2.35992431640625e-05, + "step": 15466, + "training_step_time": 0.16758012771606445 + }, + { + "epoch": 2.360076904296875e-05, + "model_forward_time": 0.023667335510253906, + "step": 15467 + }, + { + "epoch": 2.360076904296875e-05, + "step": 15467, + "training_step_time": 0.15587496757507324 + }, + { + "epoch": 2.3602294921875e-05, + "model_forward_time": 0.023774385452270508, + "step": 15468 + }, + { + "epoch": 2.3602294921875e-05, + "step": 15468, + "training_step_time": 0.14188408851623535 + }, + { + "epoch": 2.360382080078125e-05, + "model_forward_time": 0.0244598388671875, + "step": 15469 + }, + { + "epoch": 2.360382080078125e-05, + "step": 15469, + "training_step_time": 0.22607922554016113 + }, + { + "epoch": 2.36053466796875e-05, + "grad_norm": 0.5303618907928467, + "learning_rate": 5.154299348458803e-05, + "loss": 0.0125, + "step": 15470 + }, + { + "epoch": 2.36053466796875e-05, + "model_forward_time": 0.024691343307495117, + "step": 15470 + }, + { + "epoch": 2.36053466796875e-05, + "step": 15470, + "training_step_time": 0.12068867683410645 + }, + { + "epoch": 2.360687255859375e-05, + "model_forward_time": 0.02361154556274414, + "step": 15471 + }, + { + "epoch": 2.360687255859375e-05, + "step": 15471, + "training_step_time": 0.1164555549621582 + }, + { + "epoch": 2.36083984375e-05, + "model_forward_time": 0.025020599365234375, + "step": 15472 + }, + { + "epoch": 2.36083984375e-05, + "step": 15472, + "training_step_time": 0.12052369117736816 + }, + { + "epoch": 2.360992431640625e-05, + "model_forward_time": 0.025152921676635742, + "step": 15473 + }, + { + "epoch": 2.360992431640625e-05, + "step": 15473, + "training_step_time": 0.1830449104309082 + }, + { + "epoch": 2.36114501953125e-05, + "model_forward_time": 0.02413034439086914, + "step": 15474 + }, + { + "epoch": 2.36114501953125e-05, + "step": 15474, + "training_step_time": 0.11108589172363281 + }, + { + "epoch": 2.361297607421875e-05, + "model_forward_time": 0.0240480899810791, + "step": 15475 + }, + { + "epoch": 2.361297607421875e-05, + "step": 15475, + "training_step_time": 0.10810565948486328 + }, + { + "epoch": 2.3614501953125e-05, + "model_forward_time": 0.024749040603637695, + "step": 15476 + }, + { + "epoch": 2.3614501953125e-05, + "step": 15476, + "training_step_time": 0.10934638977050781 + }, + { + "epoch": 2.361602783203125e-05, + "model_forward_time": 0.025372743606567383, + "step": 15477 + }, + { + "epoch": 2.361602783203125e-05, + "step": 15477, + "training_step_time": 0.10945510864257812 + }, + { + "epoch": 2.36175537109375e-05, + "model_forward_time": 0.024855375289916992, + "step": 15478 + }, + { + "epoch": 2.36175537109375e-05, + "step": 15478, + "training_step_time": 0.10562300682067871 + }, + { + "epoch": 2.361907958984375e-05, + "model_forward_time": 0.025145769119262695, + "step": 15479 + }, + { + "epoch": 2.361907958984375e-05, + "step": 15479, + "training_step_time": 0.10640716552734375 + }, + { + "epoch": 2.362060546875e-05, + "grad_norm": 0.2705172896385193, + "learning_rate": 5.148790314815663e-05, + "loss": 0.0162, + "step": 15480 + }, + { + "epoch": 2.362060546875e-05, + "model_forward_time": 0.02538013458251953, + "step": 15480 + }, + { + "epoch": 2.362060546875e-05, + "step": 15480, + "training_step_time": 0.10561037063598633 + }, + { + "epoch": 2.362213134765625e-05, + "model_forward_time": 0.02546858787536621, + "step": 15481 + }, + { + "epoch": 2.362213134765625e-05, + "step": 15481, + "training_step_time": 0.10766482353210449 + }, + { + "epoch": 2.36236572265625e-05, + "model_forward_time": 0.025583744049072266, + "step": 15482 + }, + { + "epoch": 2.36236572265625e-05, + "step": 15482, + "training_step_time": 0.10739254951477051 + }, + { + "epoch": 2.362518310546875e-05, + "model_forward_time": 0.025444507598876953, + "step": 15483 + }, + { + "epoch": 2.362518310546875e-05, + "step": 15483, + "training_step_time": 0.10892939567565918 + }, + { + "epoch": 2.3626708984375e-05, + "model_forward_time": 0.02527332305908203, + "step": 15484 + }, + { + "epoch": 2.3626708984375e-05, + "step": 15484, + "training_step_time": 0.10937261581420898 + }, + { + "epoch": 2.362823486328125e-05, + "model_forward_time": 0.02500295639038086, + "step": 15485 + }, + { + "epoch": 2.362823486328125e-05, + "step": 15485, + "training_step_time": 0.10414910316467285 + }, + { + "epoch": 2.36297607421875e-05, + "model_forward_time": 0.025390148162841797, + "step": 15486 + }, + { + "epoch": 2.36297607421875e-05, + "step": 15486, + "training_step_time": 0.21254229545593262 + }, + { + "epoch": 2.363128662109375e-05, + "model_forward_time": 0.025146961212158203, + "step": 15487 + }, + { + "epoch": 2.363128662109375e-05, + "step": 15487, + "training_step_time": 0.10316276550292969 + }, + { + "epoch": 2.36328125e-05, + "model_forward_time": 0.024444580078125, + "step": 15488 + }, + { + "epoch": 2.36328125e-05, + "step": 15488, + "training_step_time": 0.21838974952697754 + }, + { + "epoch": 2.363433837890625e-05, + "model_forward_time": 0.024268388748168945, + "step": 15489 + }, + { + "epoch": 2.363433837890625e-05, + "step": 15489, + "training_step_time": 0.12634849548339844 + }, + { + "epoch": 2.36358642578125e-05, + "grad_norm": 0.456437885761261, + "learning_rate": 5.143281100378261e-05, + "loss": 0.0164, + "step": 15490 + }, + { + "epoch": 2.36358642578125e-05, + "model_forward_time": 0.02475738525390625, + "step": 15490 + }, + { + "epoch": 2.36358642578125e-05, + "step": 15490, + "training_step_time": 0.1128537654876709 + }, + { + "epoch": 2.363739013671875e-05, + "model_forward_time": 0.025415897369384766, + "step": 15491 + }, + { + "epoch": 2.363739013671875e-05, + "step": 15491, + "training_step_time": 0.12503910064697266 + }, + { + "epoch": 2.3638916015625e-05, + "model_forward_time": 0.025249719619750977, + "step": 15492 + }, + { + "epoch": 2.3638916015625e-05, + "step": 15492, + "training_step_time": 0.11187577247619629 + }, + { + "epoch": 2.364044189453125e-05, + "model_forward_time": 0.02574634552001953, + "step": 15493 + }, + { + "epoch": 2.364044189453125e-05, + "step": 15493, + "training_step_time": 0.1807117462158203 + }, + { + "epoch": 2.36419677734375e-05, + "model_forward_time": 0.024758577346801758, + "step": 15494 + }, + { + "epoch": 2.36419677734375e-05, + "step": 15494, + "training_step_time": 0.10839438438415527 + }, + { + "epoch": 2.364349365234375e-05, + "model_forward_time": 0.024535417556762695, + "step": 15495 + }, + { + "epoch": 2.364349365234375e-05, + "step": 15495, + "training_step_time": 0.11020088195800781 + }, + { + "epoch": 2.364501953125e-05, + "model_forward_time": 0.024944305419921875, + "step": 15496 + }, + { + "epoch": 2.364501953125e-05, + "step": 15496, + "training_step_time": 0.12137317657470703 + }, + { + "epoch": 2.364654541015625e-05, + "model_forward_time": 0.025732994079589844, + "step": 15497 + }, + { + "epoch": 2.364654541015625e-05, + "step": 15497, + "training_step_time": 0.11068940162658691 + }, + { + "epoch": 2.36480712890625e-05, + "model_forward_time": 0.025120973587036133, + "step": 15498 + }, + { + "epoch": 2.36480712890625e-05, + "step": 15498, + "training_step_time": 0.11432290077209473 + }, + { + "epoch": 2.364959716796875e-05, + "model_forward_time": 0.024804115295410156, + "step": 15499 + }, + { + "epoch": 2.364959716796875e-05, + "step": 15499, + "training_step_time": 0.16653013229370117 + }, + { + "epoch": 2.3651123046875e-05, + "grad_norm": 0.25273188948631287, + "learning_rate": 5.1377717118408105e-05, + "loss": 0.0126, + "step": 15500 + }, + { + "epoch": 2.3651123046875e-05, + "model_forward_time": 0.02519965171813965, + "step": 15500 + }, + { + "epoch": 2.3651123046875e-05, + "step": 15500, + "training_step_time": 0.10695862770080566 + }, + { + "epoch": 2.365264892578125e-05, + "model_forward_time": 0.024699926376342773, + "step": 15501 + }, + { + "epoch": 2.365264892578125e-05, + "step": 15501, + "training_step_time": 0.10287117958068848 + }, + { + "epoch": 2.36541748046875e-05, + "model_forward_time": 0.025287389755249023, + "step": 15502 + }, + { + "epoch": 2.36541748046875e-05, + "step": 15502, + "training_step_time": 0.10683393478393555 + }, + { + "epoch": 2.365570068359375e-05, + "model_forward_time": 0.025455474853515625, + "step": 15503 + }, + { + "epoch": 2.365570068359375e-05, + "step": 15503, + "training_step_time": 0.10776019096374512 + }, + { + "epoch": 2.36572265625e-05, + "model_forward_time": 0.02508997917175293, + "step": 15504 + }, + { + "epoch": 2.36572265625e-05, + "step": 15504, + "training_step_time": 0.10859990119934082 + }, + { + "epoch": 2.365875244140625e-05, + "model_forward_time": 0.02528238296508789, + "step": 15505 + }, + { + "epoch": 2.365875244140625e-05, + "step": 15505, + "training_step_time": 0.1126713752746582 + }, + { + "epoch": 2.36602783203125e-05, + "model_forward_time": 0.02522754669189453, + "step": 15506 + }, + { + "epoch": 2.36602783203125e-05, + "step": 15506, + "training_step_time": 0.10576677322387695 + }, + { + "epoch": 2.366180419921875e-05, + "model_forward_time": 0.02542281150817871, + "step": 15507 + }, + { + "epoch": 2.366180419921875e-05, + "step": 15507, + "training_step_time": 0.1065669059753418 + }, + { + "epoch": 2.3663330078125e-05, + "model_forward_time": 0.02556920051574707, + "step": 15508 + }, + { + "epoch": 2.3663330078125e-05, + "step": 15508, + "training_step_time": 0.10675168037414551 + }, + { + "epoch": 2.366485595703125e-05, + "model_forward_time": 0.025197267532348633, + "step": 15509 + }, + { + "epoch": 2.366485595703125e-05, + "step": 15509, + "training_step_time": 0.10640859603881836 + }, + { + "epoch": 2.36663818359375e-05, + "grad_norm": 0.23745200037956238, + "learning_rate": 5.132262155897739e-05, + "loss": 0.0165, + "step": 15510 + }, + { + "epoch": 2.36663818359375e-05, + "model_forward_time": 0.025296449661254883, + "step": 15510 + }, + { + "epoch": 2.36663818359375e-05, + "step": 15510, + "training_step_time": 0.10574531555175781 + }, + { + "epoch": 2.366790771484375e-05, + "model_forward_time": 0.025619983673095703, + "step": 15511 + }, + { + "epoch": 2.366790771484375e-05, + "step": 15511, + "training_step_time": 0.10712242126464844 + }, + { + "epoch": 2.366943359375e-05, + "model_forward_time": 0.024913549423217773, + "step": 15512 + }, + { + "epoch": 2.366943359375e-05, + "step": 15512, + "training_step_time": 0.1060788631439209 + }, + { + "epoch": 2.367095947265625e-05, + "model_forward_time": 0.025278568267822266, + "step": 15513 + }, + { + "epoch": 2.367095947265625e-05, + "step": 15513, + "training_step_time": 0.10665512084960938 + }, + { + "epoch": 2.36724853515625e-05, + "model_forward_time": 0.025328874588012695, + "step": 15514 + }, + { + "epoch": 2.36724853515625e-05, + "step": 15514, + "training_step_time": 0.10864830017089844 + }, + { + "epoch": 2.367401123046875e-05, + "model_forward_time": 0.02547430992126465, + "step": 15515 + }, + { + "epoch": 2.367401123046875e-05, + "step": 15515, + "training_step_time": 0.13143396377563477 + }, + { + "epoch": 2.3675537109375e-05, + "model_forward_time": 0.026354074478149414, + "step": 15516 + }, + { + "epoch": 2.3675537109375e-05, + "step": 15516, + "training_step_time": 0.1341395378112793 + }, + { + "epoch": 2.367706298828125e-05, + "model_forward_time": 0.024558067321777344, + "step": 15517 + }, + { + "epoch": 2.367706298828125e-05, + "step": 15517, + "training_step_time": 0.11649513244628906 + }, + { + "epoch": 2.36785888671875e-05, + "model_forward_time": 0.026198863983154297, + "step": 15518 + }, + { + "epoch": 2.36785888671875e-05, + "step": 15518, + "training_step_time": 0.11956572532653809 + }, + { + "epoch": 2.368011474609375e-05, + "model_forward_time": 0.02507328987121582, + "step": 15519 + }, + { + "epoch": 2.368011474609375e-05, + "step": 15519, + "training_step_time": 0.1077735424041748 + }, + { + "epoch": 2.3681640625e-05, + "grad_norm": 0.2525326609611511, + "learning_rate": 5.1267524392436784e-05, + "loss": 0.0076, + "step": 15520 + }, + { + "epoch": 2.3681640625e-05, + "model_forward_time": 0.025011301040649414, + "step": 15520 + }, + { + "epoch": 2.3681640625e-05, + "step": 15520, + "training_step_time": 0.15593647956848145 + }, + { + "epoch": 2.368316650390625e-05, + "model_forward_time": 0.024903059005737305, + "step": 15521 + }, + { + "epoch": 2.368316650390625e-05, + "step": 15521, + "training_step_time": 0.14361357688903809 + }, + { + "epoch": 2.36846923828125e-05, + "model_forward_time": 0.02489614486694336, + "step": 15522 + }, + { + "epoch": 2.36846923828125e-05, + "step": 15522, + "training_step_time": 0.10280609130859375 + }, + { + "epoch": 2.368621826171875e-05, + "model_forward_time": 0.025716304779052734, + "step": 15523 + }, + { + "epoch": 2.368621826171875e-05, + "step": 15523, + "training_step_time": 0.10572409629821777 + }, + { + "epoch": 2.3687744140625e-05, + "model_forward_time": 0.025220394134521484, + "step": 15524 + }, + { + "epoch": 2.3687744140625e-05, + "step": 15524, + "training_step_time": 0.10412740707397461 + }, + { + "epoch": 2.368927001953125e-05, + "model_forward_time": 0.024684906005859375, + "step": 15525 + }, + { + "epoch": 2.368927001953125e-05, + "step": 15525, + "training_step_time": 0.10775518417358398 + }, + { + "epoch": 2.36907958984375e-05, + "model_forward_time": 0.025216102600097656, + "step": 15526 + }, + { + "epoch": 2.36907958984375e-05, + "step": 15526, + "training_step_time": 0.10562849044799805 + }, + { + "epoch": 2.369232177734375e-05, + "model_forward_time": 0.02550339698791504, + "step": 15527 + }, + { + "epoch": 2.369232177734375e-05, + "step": 15527, + "training_step_time": 0.10631871223449707 + }, + { + "epoch": 2.369384765625e-05, + "model_forward_time": 0.02595376968383789, + "step": 15528 + }, + { + "epoch": 2.369384765625e-05, + "step": 15528, + "training_step_time": 0.10671257972717285 + }, + { + "epoch": 2.369537353515625e-05, + "model_forward_time": 0.02534341812133789, + "step": 15529 + }, + { + "epoch": 2.369537353515625e-05, + "step": 15529, + "training_step_time": 0.1061403751373291 + }, + { + "epoch": 2.36968994140625e-05, + "grad_norm": 0.19660969078540802, + "learning_rate": 5.12124256857345e-05, + "loss": 0.0084, + "step": 15530 + }, + { + "epoch": 2.36968994140625e-05, + "model_forward_time": 0.025191307067871094, + "step": 15530 + }, + { + "epoch": 2.36968994140625e-05, + "step": 15530, + "training_step_time": 0.1143960952758789 + }, + { + "epoch": 2.369842529296875e-05, + "model_forward_time": 0.025026321411132812, + "step": 15531 + }, + { + "epoch": 2.369842529296875e-05, + "step": 15531, + "training_step_time": 0.1423177719116211 + }, + { + "epoch": 2.3699951171875e-05, + "model_forward_time": 0.02529621124267578, + "step": 15532 + }, + { + "epoch": 2.3699951171875e-05, + "step": 15532, + "training_step_time": 0.1184380054473877 + }, + { + "epoch": 2.370147705078125e-05, + "model_forward_time": 0.025246381759643555, + "step": 15533 + }, + { + "epoch": 2.370147705078125e-05, + "step": 15533, + "training_step_time": 0.11681675910949707 + }, + { + "epoch": 2.37030029296875e-05, + "model_forward_time": 0.025043487548828125, + "step": 15534 + }, + { + "epoch": 2.37030029296875e-05, + "step": 15534, + "training_step_time": 0.12097930908203125 + }, + { + "epoch": 2.370452880859375e-05, + "model_forward_time": 0.025690555572509766, + "step": 15535 + }, + { + "epoch": 2.370452880859375e-05, + "step": 15535, + "training_step_time": 0.18161416053771973 + }, + { + "epoch": 2.37060546875e-05, + "model_forward_time": 0.024939537048339844, + "step": 15536 + }, + { + "epoch": 2.37060546875e-05, + "step": 15536, + "training_step_time": 0.15705394744873047 + }, + { + "epoch": 2.370758056640625e-05, + "model_forward_time": 0.024589061737060547, + "step": 15537 + }, + { + "epoch": 2.370758056640625e-05, + "step": 15537, + "training_step_time": 0.14492130279541016 + }, + { + "epoch": 2.37091064453125e-05, + "model_forward_time": 0.024574995040893555, + "step": 15538 + }, + { + "epoch": 2.37091064453125e-05, + "step": 15538, + "training_step_time": 0.19661617279052734 + }, + { + "epoch": 2.371063232421875e-05, + "model_forward_time": 0.024836063385009766, + "step": 15539 + }, + { + "epoch": 2.371063232421875e-05, + "step": 15539, + "training_step_time": 0.11748814582824707 + }, + { + "epoch": 2.3712158203125e-05, + "grad_norm": 0.22174404561519623, + "learning_rate": 5.1157325505820694e-05, + "loss": 0.0107, + "step": 15540 + }, + { + "epoch": 2.3712158203125e-05, + "model_forward_time": 0.02497577667236328, + "step": 15540 + }, + { + "epoch": 2.3712158203125e-05, + "step": 15540, + "training_step_time": 0.17559480667114258 + }, + { + "epoch": 2.371368408203125e-05, + "model_forward_time": 0.027292490005493164, + "step": 15541 + }, + { + "epoch": 2.371368408203125e-05, + "step": 15541, + "training_step_time": 0.12818336486816406 + }, + { + "epoch": 2.37152099609375e-05, + "model_forward_time": 0.024483442306518555, + "step": 15542 + }, + { + "epoch": 2.37152099609375e-05, + "step": 15542, + "training_step_time": 0.13470458984375 + }, + { + "epoch": 2.371673583984375e-05, + "model_forward_time": 0.0247952938079834, + "step": 15543 + }, + { + "epoch": 2.371673583984375e-05, + "step": 15543, + "training_step_time": 0.10858464241027832 + }, + { + "epoch": 2.371826171875e-05, + "model_forward_time": 0.025104284286499023, + "step": 15544 + }, + { + "epoch": 2.371826171875e-05, + "step": 15544, + "training_step_time": 0.1763756275177002 + }, + { + "epoch": 2.371978759765625e-05, + "model_forward_time": 0.02468395233154297, + "step": 15545 + }, + { + "epoch": 2.371978759765625e-05, + "step": 15545, + "training_step_time": 0.13909649848937988 + }, + { + "epoch": 2.37213134765625e-05, + "model_forward_time": 0.024155616760253906, + "step": 15546 + }, + { + "epoch": 2.37213134765625e-05, + "step": 15546, + "training_step_time": 0.11394286155700684 + }, + { + "epoch": 2.372283935546875e-05, + "model_forward_time": 0.024597644805908203, + "step": 15547 + }, + { + "epoch": 2.372283935546875e-05, + "step": 15547, + "training_step_time": 0.10602164268493652 + }, + { + "epoch": 2.3724365234375e-05, + "model_forward_time": 0.02496170997619629, + "step": 15548 + }, + { + "epoch": 2.3724365234375e-05, + "step": 15548, + "training_step_time": 0.104339599609375 + }, + { + "epoch": 2.372589111328125e-05, + "model_forward_time": 0.024941444396972656, + "step": 15549 + }, + { + "epoch": 2.372589111328125e-05, + "step": 15549, + "training_step_time": 0.10719060897827148 + }, + { + "epoch": 2.37274169921875e-05, + "grad_norm": 0.3350104093551636, + "learning_rate": 5.110222391964728e-05, + "loss": 0.0111, + "step": 15550 + }, + { + "epoch": 2.37274169921875e-05, + "model_forward_time": 0.02505970001220703, + "step": 15550 + }, + { + "epoch": 2.37274169921875e-05, + "step": 15550, + "training_step_time": 0.1048574447631836 + }, + { + "epoch": 2.372894287109375e-05, + "model_forward_time": 0.025135040283203125, + "step": 15551 + }, + { + "epoch": 2.372894287109375e-05, + "step": 15551, + "training_step_time": 0.10508251190185547 + }, + { + "epoch": 2.373046875e-05, + "model_forward_time": 0.0250399112701416, + "step": 15552 + }, + { + "epoch": 2.373046875e-05, + "step": 15552, + "training_step_time": 0.10562539100646973 + }, + { + "epoch": 2.373199462890625e-05, + "model_forward_time": 0.02492499351501465, + "step": 15553 + }, + { + "epoch": 2.373199462890625e-05, + "step": 15553, + "training_step_time": 0.10793876647949219 + }, + { + "epoch": 2.37335205078125e-05, + "model_forward_time": 0.025185585021972656, + "step": 15554 + }, + { + "epoch": 2.37335205078125e-05, + "step": 15554, + "training_step_time": 0.10675525665283203 + }, + { + "epoch": 2.373504638671875e-05, + "model_forward_time": 0.024840116500854492, + "step": 15555 + }, + { + "epoch": 2.373504638671875e-05, + "step": 15555, + "training_step_time": 0.10751819610595703 + }, + { + "epoch": 2.3736572265625e-05, + "model_forward_time": 0.024757862091064453, + "step": 15556 + }, + { + "epoch": 2.3736572265625e-05, + "step": 15556, + "training_step_time": 0.10419392585754395 + }, + { + "epoch": 2.373809814453125e-05, + "model_forward_time": 0.02526688575744629, + "step": 15557 + }, + { + "epoch": 2.373809814453125e-05, + "step": 15557, + "training_step_time": 0.1046595573425293 + }, + { + "epoch": 2.37396240234375e-05, + "model_forward_time": 0.025385618209838867, + "step": 15558 + }, + { + "epoch": 2.37396240234375e-05, + "step": 15558, + "training_step_time": 0.10466217994689941 + }, + { + "epoch": 2.374114990234375e-05, + "model_forward_time": 0.0250852108001709, + "step": 15559 + }, + { + "epoch": 2.374114990234375e-05, + "step": 15559, + "training_step_time": 0.10519838333129883 + }, + { + "epoch": 2.374267578125e-05, + "grad_norm": 0.3381047546863556, + "learning_rate": 5.104712099416785e-05, + "loss": 0.0144, + "step": 15560 + }, + { + "epoch": 2.374267578125e-05, + "model_forward_time": 0.025135040283203125, + "step": 15560 + }, + { + "epoch": 2.374267578125e-05, + "step": 15560, + "training_step_time": 0.10407280921936035 + }, + { + "epoch": 2.374420166015625e-05, + "model_forward_time": 0.025406837463378906, + "step": 15561 + }, + { + "epoch": 2.374420166015625e-05, + "step": 15561, + "training_step_time": 0.18298101425170898 + }, + { + "epoch": 2.37457275390625e-05, + "model_forward_time": 0.024218082427978516, + "step": 15562 + }, + { + "epoch": 2.37457275390625e-05, + "step": 15562, + "training_step_time": 0.14161419868469238 + }, + { + "epoch": 2.374725341796875e-05, + "model_forward_time": 0.024161577224731445, + "step": 15563 + }, + { + "epoch": 2.374725341796875e-05, + "step": 15563, + "training_step_time": 0.1076512336730957 + }, + { + "epoch": 2.3748779296875e-05, + "model_forward_time": 0.025122880935668945, + "step": 15564 + }, + { + "epoch": 2.3748779296875e-05, + "step": 15564, + "training_step_time": 0.10458135604858398 + }, + { + "epoch": 2.375030517578125e-05, + "model_forward_time": 0.02505016326904297, + "step": 15565 + }, + { + "epoch": 2.375030517578125e-05, + "step": 15565, + "training_step_time": 0.11544489860534668 + }, + { + "epoch": 2.37518310546875e-05, + "model_forward_time": 0.025093793869018555, + "step": 15566 + }, + { + "epoch": 2.37518310546875e-05, + "step": 15566, + "training_step_time": 0.10781002044677734 + }, + { + "epoch": 2.375335693359375e-05, + "model_forward_time": 0.02479076385498047, + "step": 15567 + }, + { + "epoch": 2.375335693359375e-05, + "step": 15567, + "training_step_time": 0.19495177268981934 + }, + { + "epoch": 2.37548828125e-05, + "model_forward_time": 0.024281978607177734, + "step": 15568 + }, + { + "epoch": 2.37548828125e-05, + "step": 15568, + "training_step_time": 0.10193657875061035 + }, + { + "epoch": 2.375640869140625e-05, + "model_forward_time": 0.024460792541503906, + "step": 15569 + }, + { + "epoch": 2.375640869140625e-05, + "step": 15569, + "training_step_time": 0.10332179069519043 + }, + { + "epoch": 2.37579345703125e-05, + "grad_norm": 0.4016200304031372, + "learning_rate": 5.0992016796337686e-05, + "loss": 0.0166, + "step": 15570 + }, + { + "epoch": 2.37579345703125e-05, + "model_forward_time": 0.02515554428100586, + "step": 15570 + }, + { + "epoch": 2.37579345703125e-05, + "step": 15570, + "training_step_time": 0.10767722129821777 + }, + { + "epoch": 2.375946044921875e-05, + "model_forward_time": 0.02510213851928711, + "step": 15571 + }, + { + "epoch": 2.375946044921875e-05, + "step": 15571, + "training_step_time": 0.10695695877075195 + }, + { + "epoch": 2.3760986328125e-05, + "model_forward_time": 0.02525019645690918, + "step": 15572 + }, + { + "epoch": 2.3760986328125e-05, + "step": 15572, + "training_step_time": 0.17679834365844727 + }, + { + "epoch": 2.376251220703125e-05, + "model_forward_time": 0.02431774139404297, + "step": 15573 + }, + { + "epoch": 2.376251220703125e-05, + "step": 15573, + "training_step_time": 0.18374896049499512 + }, + { + "epoch": 2.37640380859375e-05, + "model_forward_time": 0.02478766441345215, + "step": 15574 + }, + { + "epoch": 2.37640380859375e-05, + "step": 15574, + "training_step_time": 0.17661786079406738 + }, + { + "epoch": 2.376556396484375e-05, + "model_forward_time": 0.023332834243774414, + "step": 15575 + }, + { + "epoch": 2.376556396484375e-05, + "step": 15575, + "training_step_time": 0.15988945960998535 + }, + { + "epoch": 2.376708984375e-05, + "model_forward_time": 0.023907899856567383, + "step": 15576 + }, + { + "epoch": 2.376708984375e-05, + "step": 15576, + "training_step_time": 0.15201449394226074 + }, + { + "epoch": 2.376861572265625e-05, + "model_forward_time": 0.024202585220336914, + "step": 15577 + }, + { + "epoch": 2.376861572265625e-05, + "step": 15577, + "training_step_time": 0.1348402500152588 + }, + { + "epoch": 2.37701416015625e-05, + "model_forward_time": 0.02507925033569336, + "step": 15578 + }, + { + "epoch": 2.37701416015625e-05, + "step": 15578, + "training_step_time": 0.12445616722106934 + }, + { + "epoch": 2.377166748046875e-05, + "model_forward_time": 0.024689912796020508, + "step": 15579 + }, + { + "epoch": 2.377166748046875e-05, + "step": 15579, + "training_step_time": 0.17964577674865723 + }, + { + "epoch": 2.3773193359375e-05, + "grad_norm": 0.35026347637176514, + "learning_rate": 5.093691139311356e-05, + "loss": 0.0204, + "step": 15580 + }, + { + "epoch": 2.3773193359375e-05, + "model_forward_time": 0.025986433029174805, + "step": 15580 + }, + { + "epoch": 2.3773193359375e-05, + "step": 15580, + "training_step_time": 0.14970684051513672 + }, + { + "epoch": 2.377471923828125e-05, + "model_forward_time": 0.024339914321899414, + "step": 15581 + }, + { + "epoch": 2.377471923828125e-05, + "step": 15581, + "training_step_time": 0.12225461006164551 + }, + { + "epoch": 2.37762451171875e-05, + "model_forward_time": 0.024596691131591797, + "step": 15582 + }, + { + "epoch": 2.37762451171875e-05, + "step": 15582, + "training_step_time": 0.13561606407165527 + }, + { + "epoch": 2.377777099609375e-05, + "model_forward_time": 0.024923086166381836, + "step": 15583 + }, + { + "epoch": 2.377777099609375e-05, + "step": 15583, + "training_step_time": 0.17176580429077148 + }, + { + "epoch": 2.3779296875e-05, + "model_forward_time": 0.024042367935180664, + "step": 15584 + }, + { + "epoch": 2.3779296875e-05, + "step": 15584, + "training_step_time": 0.2165834903717041 + }, + { + "epoch": 2.378082275390625e-05, + "model_forward_time": 0.024832963943481445, + "step": 15585 + }, + { + "epoch": 2.378082275390625e-05, + "step": 15585, + "training_step_time": 0.11080408096313477 + }, + { + "epoch": 2.37823486328125e-05, + "model_forward_time": 0.02445220947265625, + "step": 15586 + }, + { + "epoch": 2.37823486328125e-05, + "step": 15586, + "training_step_time": 0.14115309715270996 + }, + { + "epoch": 2.378387451171875e-05, + "model_forward_time": 0.025157928466796875, + "step": 15587 + }, + { + "epoch": 2.378387451171875e-05, + "step": 15587, + "training_step_time": 0.16283750534057617 + }, + { + "epoch": 2.3785400390625e-05, + "model_forward_time": 0.025167226791381836, + "step": 15588 + }, + { + "epoch": 2.3785400390625e-05, + "step": 15588, + "training_step_time": 0.2176222801208496 + }, + { + "epoch": 2.378692626953125e-05, + "model_forward_time": 0.024124622344970703, + "step": 15589 + }, + { + "epoch": 2.378692626953125e-05, + "step": 15589, + "training_step_time": 0.10517382621765137 + }, + { + "epoch": 2.37884521484375e-05, + "grad_norm": 0.2896489202976227, + "learning_rate": 5.088180485145378e-05, + "loss": 0.0114, + "step": 15590 + }, + { + "epoch": 2.37884521484375e-05, + "model_forward_time": 0.024439334869384766, + "step": 15590 + }, + { + "epoch": 2.37884521484375e-05, + "step": 15590, + "training_step_time": 0.10289835929870605 + }, + { + "epoch": 2.378997802734375e-05, + "model_forward_time": 0.025264739990234375, + "step": 15591 + }, + { + "epoch": 2.378997802734375e-05, + "step": 15591, + "training_step_time": 0.1031949520111084 + }, + { + "epoch": 2.379150390625e-05, + "model_forward_time": 0.025368213653564453, + "step": 15592 + }, + { + "epoch": 2.379150390625e-05, + "step": 15592, + "training_step_time": 0.10387086868286133 + }, + { + "epoch": 2.379302978515625e-05, + "model_forward_time": 0.02516484260559082, + "step": 15593 + }, + { + "epoch": 2.379302978515625e-05, + "step": 15593, + "training_step_time": 0.10605406761169434 + }, + { + "epoch": 2.37945556640625e-05, + "model_forward_time": 0.025130033493041992, + "step": 15594 + }, + { + "epoch": 2.37945556640625e-05, + "step": 15594, + "training_step_time": 0.10422801971435547 + }, + { + "epoch": 2.379608154296875e-05, + "model_forward_time": 0.02509903907775879, + "step": 15595 + }, + { + "epoch": 2.379608154296875e-05, + "step": 15595, + "training_step_time": 0.11664748191833496 + }, + { + "epoch": 2.3797607421875e-05, + "model_forward_time": 0.025197744369506836, + "step": 15596 + }, + { + "epoch": 2.3797607421875e-05, + "step": 15596, + "training_step_time": 0.13244318962097168 + }, + { + "epoch": 2.379913330078125e-05, + "model_forward_time": 0.02502298355102539, + "step": 15597 + }, + { + "epoch": 2.379913330078125e-05, + "step": 15597, + "training_step_time": 0.1247413158416748 + }, + { + "epoch": 2.38006591796875e-05, + "model_forward_time": 0.024704456329345703, + "step": 15598 + }, + { + "epoch": 2.38006591796875e-05, + "step": 15598, + "training_step_time": 0.1258080005645752 + }, + { + "epoch": 2.380218505859375e-05, + "model_forward_time": 0.02482008934020996, + "step": 15599 + }, + { + "epoch": 2.380218505859375e-05, + "step": 15599, + "training_step_time": 0.12758708000183105 + }, + { + "epoch": 2.38037109375e-05, + "grad_norm": 0.16501736640930176, + "learning_rate": 5.0826697238317935e-05, + "loss": 0.0101, + "step": 15600 + }, + { + "epoch": 2.38037109375e-05, + "model_forward_time": 0.024660587310791016, + "step": 15600 + }, + { + "epoch": 2.38037109375e-05, + "step": 15600, + "training_step_time": 0.1298818588256836 + }, + { + "epoch": 2.380523681640625e-05, + "model_forward_time": 0.02463245391845703, + "step": 15601 + }, + { + "epoch": 2.380523681640625e-05, + "step": 15601, + "training_step_time": 0.1278674602508545 + }, + { + "epoch": 2.38067626953125e-05, + "model_forward_time": 0.025204896926879883, + "step": 15602 + }, + { + "epoch": 2.38067626953125e-05, + "step": 15602, + "training_step_time": 0.12260246276855469 + }, + { + "epoch": 2.380828857421875e-05, + "model_forward_time": 0.02535843849182129, + "step": 15603 + }, + { + "epoch": 2.380828857421875e-05, + "step": 15603, + "training_step_time": 0.1605980396270752 + }, + { + "epoch": 2.3809814453125e-05, + "model_forward_time": 0.024189233779907227, + "step": 15604 + }, + { + "epoch": 2.3809814453125e-05, + "step": 15604, + "training_step_time": 0.13411641120910645 + }, + { + "epoch": 2.381134033203125e-05, + "model_forward_time": 0.024723529815673828, + "step": 15605 + }, + { + "epoch": 2.381134033203125e-05, + "step": 15605, + "training_step_time": 0.11251378059387207 + }, + { + "epoch": 2.38128662109375e-05, + "model_forward_time": 0.025576353073120117, + "step": 15606 + }, + { + "epoch": 2.38128662109375e-05, + "step": 15606, + "training_step_time": 0.11512970924377441 + }, + { + "epoch": 2.381439208984375e-05, + "model_forward_time": 0.025397539138793945, + "step": 15607 + }, + { + "epoch": 2.381439208984375e-05, + "step": 15607, + "training_step_time": 0.11396622657775879 + }, + { + "epoch": 2.381591796875e-05, + "model_forward_time": 0.02524399757385254, + "step": 15608 + }, + { + "epoch": 2.381591796875e-05, + "step": 15608, + "training_step_time": 0.1091468334197998 + }, + { + "epoch": 2.381744384765625e-05, + "model_forward_time": 0.02525162696838379, + "step": 15609 + }, + { + "epoch": 2.381744384765625e-05, + "step": 15609, + "training_step_time": 0.18956470489501953 + }, + { + "epoch": 2.38189697265625e-05, + "grad_norm": 0.2760356068611145, + "learning_rate": 5.077158862066699e-05, + "loss": 0.0159, + "step": 15610 + }, + { + "epoch": 2.38189697265625e-05, + "model_forward_time": 0.024942636489868164, + "step": 15610 + }, + { + "epoch": 2.38189697265625e-05, + "step": 15610, + "training_step_time": 0.10869026184082031 + }, + { + "epoch": 2.382049560546875e-05, + "model_forward_time": 0.024528980255126953, + "step": 15611 + }, + { + "epoch": 2.382049560546875e-05, + "step": 15611, + "training_step_time": 0.10511445999145508 + }, + { + "epoch": 2.3822021484375e-05, + "model_forward_time": 0.025424957275390625, + "step": 15612 + }, + { + "epoch": 2.3822021484375e-05, + "step": 15612, + "training_step_time": 0.10639476776123047 + }, + { + "epoch": 2.382354736328125e-05, + "model_forward_time": 0.02530527114868164, + "step": 15613 + }, + { + "epoch": 2.382354736328125e-05, + "step": 15613, + "training_step_time": 0.1054840087890625 + }, + { + "epoch": 2.38250732421875e-05, + "model_forward_time": 0.025310754776000977, + "step": 15614 + }, + { + "epoch": 2.38250732421875e-05, + "step": 15614, + "training_step_time": 0.1055150032043457 + }, + { + "epoch": 2.382659912109375e-05, + "model_forward_time": 0.025423049926757812, + "step": 15615 + }, + { + "epoch": 2.382659912109375e-05, + "step": 15615, + "training_step_time": 0.10662078857421875 + }, + { + "epoch": 2.3828125e-05, + "model_forward_time": 0.025767087936401367, + "step": 15616 + }, + { + "epoch": 2.3828125e-05, + "step": 15616, + "training_step_time": 0.1063082218170166 + }, + { + "epoch": 2.382965087890625e-05, + "model_forward_time": 0.025623798370361328, + "step": 15617 + }, + { + "epoch": 2.382965087890625e-05, + "step": 15617, + "training_step_time": 0.10524296760559082 + }, + { + "epoch": 2.38311767578125e-05, + "model_forward_time": 0.025317907333374023, + "step": 15618 + }, + { + "epoch": 2.38311767578125e-05, + "step": 15618, + "training_step_time": 0.10434246063232422 + }, + { + "epoch": 2.383270263671875e-05, + "model_forward_time": 0.025449752807617188, + "step": 15619 + }, + { + "epoch": 2.383270263671875e-05, + "step": 15619, + "training_step_time": 0.10587954521179199 + }, + { + "epoch": 2.3834228515625e-05, + "grad_norm": 0.2215505689382553, + "learning_rate": 5.071647906546312e-05, + "loss": 0.0085, + "step": 15620 + }, + { + "epoch": 2.3834228515625e-05, + "model_forward_time": 0.025365352630615234, + "step": 15620 + }, + { + "epoch": 2.3834228515625e-05, + "step": 15620, + "training_step_time": 0.10589313507080078 + }, + { + "epoch": 2.383575439453125e-05, + "model_forward_time": 0.025188922882080078, + "step": 15621 + }, + { + "epoch": 2.383575439453125e-05, + "step": 15621, + "training_step_time": 0.10543441772460938 + }, + { + "epoch": 2.38372802734375e-05, + "model_forward_time": 0.025354385375976562, + "step": 15622 + }, + { + "epoch": 2.38372802734375e-05, + "step": 15622, + "training_step_time": 0.2088792324066162 + }, + { + "epoch": 2.383880615234375e-05, + "model_forward_time": 0.024627685546875, + "step": 15623 + }, + { + "epoch": 2.383880615234375e-05, + "step": 15623, + "training_step_time": 0.20875072479248047 + }, + { + "epoch": 2.384033203125e-05, + "model_forward_time": 0.024672508239746094, + "step": 15624 + }, + { + "epoch": 2.384033203125e-05, + "step": 15624, + "training_step_time": 0.14546966552734375 + }, + { + "epoch": 2.384185791015625e-05, + "model_forward_time": 0.02501964569091797, + "step": 15625 + }, + { + "epoch": 2.384185791015625e-05, + "step": 15625, + "training_step_time": 0.12274909019470215 + }, + { + "epoch": 2.38433837890625e-05, + "model_forward_time": 0.02538156509399414, + "step": 15626 + }, + { + "epoch": 2.38433837890625e-05, + "step": 15626, + "training_step_time": 0.11711645126342773 + }, + { + "epoch": 2.384490966796875e-05, + "model_forward_time": 0.025151491165161133, + "step": 15627 + }, + { + "epoch": 2.384490966796875e-05, + "step": 15627, + "training_step_time": 0.19344210624694824 + }, + { + "epoch": 2.3846435546875e-05, + "model_forward_time": 0.02436089515686035, + "step": 15628 + }, + { + "epoch": 2.3846435546875e-05, + "step": 15628, + "training_step_time": 0.22031569480895996 + }, + { + "epoch": 2.384796142578125e-05, + "model_forward_time": 0.0250244140625, + "step": 15629 + }, + { + "epoch": 2.384796142578125e-05, + "step": 15629, + "training_step_time": 0.1169736385345459 + }, + { + "epoch": 2.38494873046875e-05, + "grad_norm": 0.26977258920669556, + "learning_rate": 5.066136863966963e-05, + "loss": 0.013, + "step": 15630 + }, + { + "epoch": 2.38494873046875e-05, + "model_forward_time": 0.024294614791870117, + "step": 15630 + }, + { + "epoch": 2.38494873046875e-05, + "step": 15630, + "training_step_time": 0.1336665153503418 + }, + { + "epoch": 2.385101318359375e-05, + "model_forward_time": 0.025176525115966797, + "step": 15631 + }, + { + "epoch": 2.385101318359375e-05, + "step": 15631, + "training_step_time": 0.15837621688842773 + }, + { + "epoch": 2.38525390625e-05, + "model_forward_time": 0.024575471878051758, + "step": 15632 + }, + { + "epoch": 2.38525390625e-05, + "step": 15632, + "training_step_time": 0.22232365608215332 + }, + { + "epoch": 2.385406494140625e-05, + "model_forward_time": 0.02417302131652832, + "step": 15633 + }, + { + "epoch": 2.385406494140625e-05, + "step": 15633, + "training_step_time": 0.11557555198669434 + }, + { + "epoch": 2.38555908203125e-05, + "model_forward_time": 0.024214982986450195, + "step": 15634 + }, + { + "epoch": 2.38555908203125e-05, + "step": 15634, + "training_step_time": 0.10381317138671875 + }, + { + "epoch": 2.385711669921875e-05, + "model_forward_time": 0.02511119842529297, + "step": 15635 + }, + { + "epoch": 2.385711669921875e-05, + "step": 15635, + "training_step_time": 0.10708165168762207 + }, + { + "epoch": 2.3858642578125e-05, + "model_forward_time": 0.025550365447998047, + "step": 15636 + }, + { + "epoch": 2.3858642578125e-05, + "step": 15636, + "training_step_time": 0.1111898422241211 + }, + { + "epoch": 2.386016845703125e-05, + "model_forward_time": 0.025110483169555664, + "step": 15637 + }, + { + "epoch": 2.386016845703125e-05, + "step": 15637, + "training_step_time": 0.1081082820892334 + }, + { + "epoch": 2.38616943359375e-05, + "model_forward_time": 0.025223255157470703, + "step": 15638 + }, + { + "epoch": 2.38616943359375e-05, + "step": 15638, + "training_step_time": 0.1057896614074707 + }, + { + "epoch": 2.386322021484375e-05, + "model_forward_time": 0.0266420841217041, + "step": 15639 + }, + { + "epoch": 2.386322021484375e-05, + "step": 15639, + "training_step_time": 0.10665774345397949 + }, + { + "epoch": 2.386474609375e-05, + "grad_norm": 0.23721297085285187, + "learning_rate": 5.0606257410250866e-05, + "loss": 0.0118, + "step": 15640 + }, + { + "epoch": 2.386474609375e-05, + "model_forward_time": 0.0257110595703125, + "step": 15640 + }, + { + "epoch": 2.386474609375e-05, + "step": 15640, + "training_step_time": 0.10663628578186035 + }, + { + "epoch": 2.386627197265625e-05, + "model_forward_time": 0.025228500366210938, + "step": 15641 + }, + { + "epoch": 2.386627197265625e-05, + "step": 15641, + "training_step_time": 0.10524153709411621 + }, + { + "epoch": 2.38677978515625e-05, + "model_forward_time": 0.025109052658081055, + "step": 15642 + }, + { + "epoch": 2.38677978515625e-05, + "step": 15642, + "training_step_time": 0.10617804527282715 + }, + { + "epoch": 2.386932373046875e-05, + "model_forward_time": 0.025444984436035156, + "step": 15643 + }, + { + "epoch": 2.386932373046875e-05, + "step": 15643, + "training_step_time": 0.10571408271789551 + }, + { + "epoch": 2.3870849609375e-05, + "model_forward_time": 0.024799823760986328, + "step": 15644 + }, + { + "epoch": 2.3870849609375e-05, + "step": 15644, + "training_step_time": 0.10444450378417969 + }, + { + "epoch": 2.387237548828125e-05, + "model_forward_time": 0.025690317153930664, + "step": 15645 + }, + { + "epoch": 2.387237548828125e-05, + "step": 15645, + "training_step_time": 0.10672354698181152 + }, + { + "epoch": 2.38739013671875e-05, + "model_forward_time": 0.0254669189453125, + "step": 15646 + }, + { + "epoch": 2.38739013671875e-05, + "step": 15646, + "training_step_time": 0.10465312004089355 + }, + { + "epoch": 2.387542724609375e-05, + "model_forward_time": 0.02547621726989746, + "step": 15647 + }, + { + "epoch": 2.387542724609375e-05, + "step": 15647, + "training_step_time": 0.10809636116027832 + }, + { + "epoch": 2.3876953125e-05, + "model_forward_time": 0.025116920471191406, + "step": 15648 + }, + { + "epoch": 2.3876953125e-05, + "step": 15648, + "training_step_time": 0.10679912567138672 + }, + { + "epoch": 2.387847900390625e-05, + "model_forward_time": 0.0255887508392334, + "step": 15649 + }, + { + "epoch": 2.387847900390625e-05, + "step": 15649, + "training_step_time": 0.10684967041015625 + }, + { + "epoch": 2.38800048828125e-05, + "grad_norm": 0.12469741702079773, + "learning_rate": 5.0551145444172186e-05, + "loss": 0.0124, + "step": 15650 + }, + { + "epoch": 2.38800048828125e-05, + "model_forward_time": 0.025740861892700195, + "step": 15650 + }, + { + "epoch": 2.38800048828125e-05, + "step": 15650, + "training_step_time": 0.12513995170593262 + }, + { + "epoch": 2.388153076171875e-05, + "model_forward_time": 0.025266408920288086, + "step": 15651 + }, + { + "epoch": 2.388153076171875e-05, + "step": 15651, + "training_step_time": 0.10985207557678223 + }, + { + "epoch": 2.3883056640625e-05, + "model_forward_time": 0.02570033073425293, + "step": 15652 + }, + { + "epoch": 2.3883056640625e-05, + "step": 15652, + "training_step_time": 0.11091756820678711 + }, + { + "epoch": 2.388458251953125e-05, + "model_forward_time": 0.025640487670898438, + "step": 15653 + }, + { + "epoch": 2.388458251953125e-05, + "step": 15653, + "training_step_time": 0.1194925308227539 + }, + { + "epoch": 2.38861083984375e-05, + "model_forward_time": 0.02581954002380371, + "step": 15654 + }, + { + "epoch": 2.38861083984375e-05, + "step": 15654, + "training_step_time": 0.10608649253845215 + }, + { + "epoch": 2.388763427734375e-05, + "model_forward_time": 0.025385141372680664, + "step": 15655 + }, + { + "epoch": 2.388763427734375e-05, + "step": 15655, + "training_step_time": 0.18947172164916992 + }, + { + "epoch": 2.388916015625e-05, + "model_forward_time": 0.025116682052612305, + "step": 15656 + }, + { + "epoch": 2.388916015625e-05, + "step": 15656, + "training_step_time": 0.10415387153625488 + }, + { + "epoch": 2.389068603515625e-05, + "model_forward_time": 0.02492046356201172, + "step": 15657 + }, + { + "epoch": 2.389068603515625e-05, + "step": 15657, + "training_step_time": 0.1020052433013916 + }, + { + "epoch": 2.38922119140625e-05, + "model_forward_time": 0.02547311782836914, + "step": 15658 + }, + { + "epoch": 2.38922119140625e-05, + "step": 15658, + "training_step_time": 0.10596990585327148 + }, + { + "epoch": 2.389373779296875e-05, + "model_forward_time": 0.025180578231811523, + "step": 15659 + }, + { + "epoch": 2.389373779296875e-05, + "step": 15659, + "training_step_time": 0.10626721382141113 + }, + { + "epoch": 2.3895263671875e-05, + "grad_norm": 0.3579079210758209, + "learning_rate": 5.0496032808399815e-05, + "loss": 0.0146, + "step": 15660 + }, + { + "epoch": 2.3895263671875e-05, + "model_forward_time": 0.025380373001098633, + "step": 15660 + }, + { + "epoch": 2.3895263671875e-05, + "step": 15660, + "training_step_time": 0.10590934753417969 + }, + { + "epoch": 2.389678955078125e-05, + "model_forward_time": 0.02711939811706543, + "step": 15661 + }, + { + "epoch": 2.389678955078125e-05, + "step": 15661, + "training_step_time": 0.10676169395446777 + }, + { + "epoch": 2.38983154296875e-05, + "model_forward_time": 0.025482892990112305, + "step": 15662 + }, + { + "epoch": 2.38983154296875e-05, + "step": 15662, + "training_step_time": 0.10722780227661133 + }, + { + "epoch": 2.389984130859375e-05, + "model_forward_time": 0.025774717330932617, + "step": 15663 + }, + { + "epoch": 2.389984130859375e-05, + "step": 15663, + "training_step_time": 0.10567951202392578 + }, + { + "epoch": 2.39013671875e-05, + "model_forward_time": 0.025280237197875977, + "step": 15664 + }, + { + "epoch": 2.39013671875e-05, + "step": 15664, + "training_step_time": 0.10564327239990234 + }, + { + "epoch": 2.390289306640625e-05, + "model_forward_time": 0.025325298309326172, + "step": 15665 + }, + { + "epoch": 2.390289306640625e-05, + "step": 15665, + "training_step_time": 0.10602641105651855 + }, + { + "epoch": 2.39044189453125e-05, + "model_forward_time": 0.02639174461364746, + "step": 15666 + }, + { + "epoch": 2.39044189453125e-05, + "step": 15666, + "training_step_time": 0.10949540138244629 + }, + { + "epoch": 2.390594482421875e-05, + "model_forward_time": 0.02569580078125, + "step": 15667 + }, + { + "epoch": 2.390594482421875e-05, + "step": 15667, + "training_step_time": 0.10619878768920898 + }, + { + "epoch": 2.3907470703125e-05, + "model_forward_time": 0.025929927825927734, + "step": 15668 + }, + { + "epoch": 2.3907470703125e-05, + "step": 15668, + "training_step_time": 0.1085813045501709 + }, + { + "epoch": 2.390899658203125e-05, + "model_forward_time": 0.02612757682800293, + "step": 15669 + }, + { + "epoch": 2.390899658203125e-05, + "step": 15669, + "training_step_time": 0.1049036979675293 + }, + { + "epoch": 2.39105224609375e-05, + "grad_norm": 0.3659554421901703, + "learning_rate": 5.0440919569900835e-05, + "loss": 0.0121, + "step": 15670 + }, + { + "epoch": 2.39105224609375e-05, + "model_forward_time": 0.024453163146972656, + "step": 15670 + }, + { + "epoch": 2.39105224609375e-05, + "step": 15670, + "training_step_time": 0.14203572273254395 + }, + { + "epoch": 2.391204833984375e-05, + "model_forward_time": 0.025274276733398438, + "step": 15671 + }, + { + "epoch": 2.391204833984375e-05, + "step": 15671, + "training_step_time": 0.11493611335754395 + }, + { + "epoch": 2.391357421875e-05, + "model_forward_time": 0.025693893432617188, + "step": 15672 + }, + { + "epoch": 2.391357421875e-05, + "step": 15672, + "training_step_time": 0.19053339958190918 + }, + { + "epoch": 2.391510009765625e-05, + "model_forward_time": 0.024596691131591797, + "step": 15673 + }, + { + "epoch": 2.391510009765625e-05, + "step": 15673, + "training_step_time": 0.17298626899719238 + }, + { + "epoch": 2.39166259765625e-05, + "model_forward_time": 0.02466559410095215, + "step": 15674 + }, + { + "epoch": 2.39166259765625e-05, + "step": 15674, + "training_step_time": 0.16990995407104492 + }, + { + "epoch": 2.391815185546875e-05, + "model_forward_time": 0.024994611740112305, + "step": 15675 + }, + { + "epoch": 2.391815185546875e-05, + "step": 15675, + "training_step_time": 0.1241142749786377 + }, + { + "epoch": 2.3919677734375e-05, + "model_forward_time": 0.024948596954345703, + "step": 15676 + }, + { + "epoch": 2.3919677734375e-05, + "step": 15676, + "training_step_time": 0.1110687255859375 + }, + { + "epoch": 2.392120361328125e-05, + "model_forward_time": 0.02531599998474121, + "step": 15677 + }, + { + "epoch": 2.392120361328125e-05, + "step": 15677, + "training_step_time": 0.13848352432250977 + }, + { + "epoch": 2.39227294921875e-05, + "model_forward_time": 0.026212453842163086, + "step": 15678 + }, + { + "epoch": 2.39227294921875e-05, + "step": 15678, + "training_step_time": 0.1585986614227295 + }, + { + "epoch": 2.392425537109375e-05, + "model_forward_time": 0.024724245071411133, + "step": 15679 + }, + { + "epoch": 2.392425537109375e-05, + "step": 15679, + "training_step_time": 0.17809128761291504 + }, + { + "epoch": 2.392578125e-05, + "grad_norm": 0.20290325582027435, + "learning_rate": 5.038580579564298e-05, + "loss": 0.0081, + "step": 15680 + }, + { + "epoch": 2.392578125e-05, + "model_forward_time": 0.024472951889038086, + "step": 15680 + }, + { + "epoch": 2.392578125e-05, + "step": 15680, + "training_step_time": 0.16106295585632324 + }, + { + "epoch": 2.392730712890625e-05, + "model_forward_time": 0.024515867233276367, + "step": 15681 + }, + { + "epoch": 2.392730712890625e-05, + "step": 15681, + "training_step_time": 0.10512518882751465 + }, + { + "epoch": 2.39288330078125e-05, + "model_forward_time": 0.025154829025268555, + "step": 15682 + }, + { + "epoch": 2.39288330078125e-05, + "step": 15682, + "training_step_time": 0.10563302040100098 + }, + { + "epoch": 2.393035888671875e-05, + "model_forward_time": 0.025299787521362305, + "step": 15683 + }, + { + "epoch": 2.393035888671875e-05, + "step": 15683, + "training_step_time": 0.1065983772277832 + }, + { + "epoch": 2.3931884765625e-05, + "model_forward_time": 0.02611517906188965, + "step": 15684 + }, + { + "epoch": 2.3931884765625e-05, + "step": 15684, + "training_step_time": 0.11059141159057617 + }, + { + "epoch": 2.393341064453125e-05, + "model_forward_time": 0.02445840835571289, + "step": 15685 + }, + { + "epoch": 2.393341064453125e-05, + "step": 15685, + "training_step_time": 0.11269688606262207 + }, + { + "epoch": 2.39349365234375e-05, + "model_forward_time": 0.0266571044921875, + "step": 15686 + }, + { + "epoch": 2.39349365234375e-05, + "step": 15686, + "training_step_time": 0.1074678897857666 + }, + { + "epoch": 2.393646240234375e-05, + "model_forward_time": 0.02553415298461914, + "step": 15687 + }, + { + "epoch": 2.393646240234375e-05, + "step": 15687, + "training_step_time": 0.10420751571655273 + }, + { + "epoch": 2.393798828125e-05, + "model_forward_time": 0.025606870651245117, + "step": 15688 + }, + { + "epoch": 2.393798828125e-05, + "step": 15688, + "training_step_time": 0.10428428649902344 + }, + { + "epoch": 2.393951416015625e-05, + "model_forward_time": 0.025835752487182617, + "step": 15689 + }, + { + "epoch": 2.393951416015625e-05, + "step": 15689, + "training_step_time": 0.10545182228088379 + }, + { + "epoch": 2.39410400390625e-05, + "grad_norm": 0.1512109339237213, + "learning_rate": 5.033069155259471e-05, + "loss": 0.0118, + "step": 15690 + }, + { + "epoch": 2.39410400390625e-05, + "model_forward_time": 0.02567434310913086, + "step": 15690 + }, + { + "epoch": 2.39410400390625e-05, + "step": 15690, + "training_step_time": 0.10708451271057129 + }, + { + "epoch": 2.394256591796875e-05, + "model_forward_time": 0.025406837463378906, + "step": 15691 + }, + { + "epoch": 2.394256591796875e-05, + "step": 15691, + "training_step_time": 0.10428786277770996 + }, + { + "epoch": 2.3944091796875e-05, + "model_forward_time": 0.0257265567779541, + "step": 15692 + }, + { + "epoch": 2.3944091796875e-05, + "step": 15692, + "training_step_time": 0.10521364212036133 + }, + { + "epoch": 2.394561767578125e-05, + "model_forward_time": 0.025312423706054688, + "step": 15693 + }, + { + "epoch": 2.394561767578125e-05, + "step": 15693, + "training_step_time": 0.1078634262084961 + }, + { + "epoch": 2.39471435546875e-05, + "model_forward_time": 0.026050567626953125, + "step": 15694 + }, + { + "epoch": 2.39471435546875e-05, + "step": 15694, + "training_step_time": 0.1060945987701416 + }, + { + "epoch": 2.394866943359375e-05, + "model_forward_time": 0.02522110939025879, + "step": 15695 + }, + { + "epoch": 2.394866943359375e-05, + "step": 15695, + "training_step_time": 0.1257786750793457 + }, + { + "epoch": 2.39501953125e-05, + "model_forward_time": 0.025350093841552734, + "step": 15696 + }, + { + "epoch": 2.39501953125e-05, + "step": 15696, + "training_step_time": 0.1403203010559082 + }, + { + "epoch": 2.395172119140625e-05, + "model_forward_time": 0.025290489196777344, + "step": 15697 + }, + { + "epoch": 2.395172119140625e-05, + "step": 15697, + "training_step_time": 0.10766243934631348 + }, + { + "epoch": 2.39532470703125e-05, + "model_forward_time": 0.02566242218017578, + "step": 15698 + }, + { + "epoch": 2.39532470703125e-05, + "step": 15698, + "training_step_time": 0.1178436279296875 + }, + { + "epoch": 2.395477294921875e-05, + "model_forward_time": 0.02545452117919922, + "step": 15699 + }, + { + "epoch": 2.395477294921875e-05, + "step": 15699, + "training_step_time": 0.11051583290100098 + }, + { + "epoch": 2.3956298828125e-05, + "grad_norm": 0.37655261158943176, + "learning_rate": 5.027557690772503e-05, + "loss": 0.0153, + "step": 15700 + }, + { + "epoch": 2.3956298828125e-05, + "model_forward_time": 0.02591085433959961, + "step": 15700 + }, + { + "epoch": 2.3956298828125e-05, + "step": 15700, + "training_step_time": 0.10465764999389648 + }, + { + "epoch": 2.395782470703125e-05, + "model_forward_time": 0.026204347610473633, + "step": 15701 + }, + { + "epoch": 2.395782470703125e-05, + "step": 15701, + "training_step_time": 0.19267773628234863 + }, + { + "epoch": 2.39593505859375e-05, + "model_forward_time": 0.025120973587036133, + "step": 15702 + }, + { + "epoch": 2.39593505859375e-05, + "step": 15702, + "training_step_time": 0.10290408134460449 + }, + { + "epoch": 2.396087646484375e-05, + "model_forward_time": 0.025056123733520508, + "step": 15703 + }, + { + "epoch": 2.396087646484375e-05, + "step": 15703, + "training_step_time": 0.10310602188110352 + }, + { + "epoch": 2.396240234375e-05, + "model_forward_time": 0.02574896812438965, + "step": 15704 + }, + { + "epoch": 2.396240234375e-05, + "step": 15704, + "training_step_time": 0.10790634155273438 + }, + { + "epoch": 2.396392822265625e-05, + "model_forward_time": 0.025341510772705078, + "step": 15705 + }, + { + "epoch": 2.396392822265625e-05, + "step": 15705, + "training_step_time": 0.17306828498840332 + }, + { + "epoch": 2.39654541015625e-05, + "model_forward_time": 0.02524876594543457, + "step": 15706 + }, + { + "epoch": 2.39654541015625e-05, + "step": 15706, + "training_step_time": 0.1821305751800537 + }, + { + "epoch": 2.396697998046875e-05, + "model_forward_time": 0.024592876434326172, + "step": 15707 + }, + { + "epoch": 2.396697998046875e-05, + "step": 15707, + "training_step_time": 0.1700141429901123 + }, + { + "epoch": 2.3968505859375e-05, + "model_forward_time": 0.02700018882751465, + "step": 15708 + }, + { + "epoch": 2.3968505859375e-05, + "step": 15708, + "training_step_time": 0.16891098022460938 + }, + { + "epoch": 2.397003173828125e-05, + "model_forward_time": 0.024342060089111328, + "step": 15709 + }, + { + "epoch": 2.397003173828125e-05, + "step": 15709, + "training_step_time": 0.15590286254882812 + }, + { + "epoch": 2.39715576171875e-05, + "grad_norm": 0.19053003191947937, + "learning_rate": 5.0220461928003406e-05, + "loss": 0.0157, + "step": 15710 + }, + { + "epoch": 2.39715576171875e-05, + "model_forward_time": 0.024369001388549805, + "step": 15710 + }, + { + "epoch": 2.39715576171875e-05, + "step": 15710, + "training_step_time": 0.14293909072875977 + }, + { + "epoch": 2.397308349609375e-05, + "model_forward_time": 0.024281024932861328, + "step": 15711 + }, + { + "epoch": 2.397308349609375e-05, + "step": 15711, + "training_step_time": 0.13981842994689941 + }, + { + "epoch": 2.3974609375e-05, + "model_forward_time": 0.02434992790222168, + "step": 15712 + }, + { + "epoch": 2.3974609375e-05, + "step": 15712, + "training_step_time": 0.1496868133544922 + }, + { + "epoch": 2.397613525390625e-05, + "model_forward_time": 0.024439096450805664, + "step": 15713 + }, + { + "epoch": 2.397613525390625e-05, + "step": 15713, + "training_step_time": 0.10545682907104492 + }, + { + "epoch": 2.39776611328125e-05, + "model_forward_time": 0.025188922882080078, + "step": 15714 + }, + { + "epoch": 2.39776611328125e-05, + "step": 15714, + "training_step_time": 0.1512157917022705 + }, + { + "epoch": 2.397918701171875e-05, + "model_forward_time": 0.025780916213989258, + "step": 15715 + }, + { + "epoch": 2.397918701171875e-05, + "step": 15715, + "training_step_time": 0.19835686683654785 + }, + { + "epoch": 2.3980712890625e-05, + "model_forward_time": 0.024177074432373047, + "step": 15716 + }, + { + "epoch": 2.3980712890625e-05, + "step": 15716, + "training_step_time": 0.12496805191040039 + }, + { + "epoch": 2.398223876953125e-05, + "model_forward_time": 0.025367259979248047, + "step": 15717 + }, + { + "epoch": 2.398223876953125e-05, + "step": 15717, + "training_step_time": 0.10844254493713379 + }, + { + "epoch": 2.39837646484375e-05, + "model_forward_time": 0.02589869499206543, + "step": 15718 + }, + { + "epoch": 2.39837646484375e-05, + "step": 15718, + "training_step_time": 0.12061381340026855 + }, + { + "epoch": 2.398529052734375e-05, + "model_forward_time": 0.02615499496459961, + "step": 15719 + }, + { + "epoch": 2.398529052734375e-05, + "step": 15719, + "training_step_time": 0.18742942810058594 + }, + { + "epoch": 2.398681640625e-05, + "grad_norm": 0.20142294466495514, + "learning_rate": 5.016534668039976e-05, + "loss": 0.0081, + "step": 15720 + }, + { + "epoch": 2.398681640625e-05, + "model_forward_time": 0.024407148361206055, + "step": 15720 + }, + { + "epoch": 2.398681640625e-05, + "step": 15720, + "training_step_time": 0.11683297157287598 + }, + { + "epoch": 2.398834228515625e-05, + "model_forward_time": 0.024501562118530273, + "step": 15721 + }, + { + "epoch": 2.398834228515625e-05, + "step": 15721, + "training_step_time": 0.130723237991333 + }, + { + "epoch": 2.39898681640625e-05, + "model_forward_time": 0.0258944034576416, + "step": 15722 + }, + { + "epoch": 2.39898681640625e-05, + "step": 15722, + "training_step_time": 0.1405935287475586 + }, + { + "epoch": 2.399139404296875e-05, + "model_forward_time": 0.025165319442749023, + "step": 15723 + }, + { + "epoch": 2.399139404296875e-05, + "step": 15723, + "training_step_time": 0.12010598182678223 + }, + { + "epoch": 2.3992919921875e-05, + "model_forward_time": 0.025543212890625, + "step": 15724 + }, + { + "epoch": 2.3992919921875e-05, + "step": 15724, + "training_step_time": 0.12578797340393066 + }, + { + "epoch": 2.399444580078125e-05, + "model_forward_time": 0.025387287139892578, + "step": 15725 + }, + { + "epoch": 2.399444580078125e-05, + "step": 15725, + "training_step_time": 0.1106715202331543 + }, + { + "epoch": 2.39959716796875e-05, + "model_forward_time": 0.02564239501953125, + "step": 15726 + }, + { + "epoch": 2.39959716796875e-05, + "step": 15726, + "training_step_time": 0.10779571533203125 + }, + { + "epoch": 2.399749755859375e-05, + "model_forward_time": 0.02602839469909668, + "step": 15727 + }, + { + "epoch": 2.399749755859375e-05, + "step": 15727, + "training_step_time": 0.10991978645324707 + }, + { + "epoch": 2.39990234375e-05, + "model_forward_time": 0.025864362716674805, + "step": 15728 + }, + { + "epoch": 2.39990234375e-05, + "step": 15728, + "training_step_time": 0.10994291305541992 + }, + { + "epoch": 2.400054931640625e-05, + "model_forward_time": 0.02585434913635254, + "step": 15729 + }, + { + "epoch": 2.400054931640625e-05, + "step": 15729, + "training_step_time": 0.1133272647857666 + }, + { + "epoch": 2.40020751953125e-05, + "grad_norm": 0.406608521938324, + "learning_rate": 5.011023123188431e-05, + "loss": 0.0189, + "step": 15730 + }, + { + "epoch": 2.40020751953125e-05, + "model_forward_time": 0.02548384666442871, + "step": 15730 + }, + { + "epoch": 2.40020751953125e-05, + "step": 15730, + "training_step_time": 0.1119084358215332 + }, + { + "epoch": 2.400360107421875e-05, + "model_forward_time": 0.02577519416809082, + "step": 15731 + }, + { + "epoch": 2.400360107421875e-05, + "step": 15731, + "training_step_time": 0.10664033889770508 + }, + { + "epoch": 2.4005126953125e-05, + "model_forward_time": 0.025902509689331055, + "step": 15732 + }, + { + "epoch": 2.4005126953125e-05, + "step": 15732, + "training_step_time": 0.11297035217285156 + }, + { + "epoch": 2.400665283203125e-05, + "model_forward_time": 0.026175737380981445, + "step": 15733 + }, + { + "epoch": 2.400665283203125e-05, + "step": 15733, + "training_step_time": 0.10593867301940918 + }, + { + "epoch": 2.40081787109375e-05, + "model_forward_time": 0.025608539581298828, + "step": 15734 + }, + { + "epoch": 2.40081787109375e-05, + "step": 15734, + "training_step_time": 0.10799932479858398 + }, + { + "epoch": 2.400970458984375e-05, + "model_forward_time": 0.02581620216369629, + "step": 15735 + }, + { + "epoch": 2.400970458984375e-05, + "step": 15735, + "training_step_time": 0.10684680938720703 + }, + { + "epoch": 2.401123046875e-05, + "model_forward_time": 0.025358915328979492, + "step": 15736 + }, + { + "epoch": 2.401123046875e-05, + "step": 15736, + "training_step_time": 0.10815548896789551 + }, + { + "epoch": 2.401275634765625e-05, + "model_forward_time": 0.02623271942138672, + "step": 15737 + }, + { + "epoch": 2.401275634765625e-05, + "step": 15737, + "training_step_time": 0.10978078842163086 + }, + { + "epoch": 2.40142822265625e-05, + "model_forward_time": 0.02579784393310547, + "step": 15738 + }, + { + "epoch": 2.40142822265625e-05, + "step": 15738, + "training_step_time": 0.1045680046081543 + }, + { + "epoch": 2.401580810546875e-05, + "model_forward_time": 0.02588510513305664, + "step": 15739 + }, + { + "epoch": 2.401580810546875e-05, + "step": 15739, + "training_step_time": 0.1911606788635254 + }, + { + "epoch": 2.4017333984375e-05, + "grad_norm": 0.2088557779788971, + "learning_rate": 5.005511564942751e-05, + "loss": 0.0093, + "step": 15740 + }, + { + "epoch": 2.4017333984375e-05, + "model_forward_time": 0.02411818504333496, + "step": 15740 + }, + { + "epoch": 2.4017333984375e-05, + "step": 15740, + "training_step_time": 0.13492608070373535 + }, + { + "epoch": 2.401885986328125e-05, + "model_forward_time": 0.02290797233581543, + "step": 15741 + }, + { + "epoch": 2.401885986328125e-05, + "step": 15741, + "training_step_time": 0.10648155212402344 + }, + { + "epoch": 2.40203857421875e-05, + "model_forward_time": 0.025433063507080078, + "step": 15742 + }, + { + "epoch": 2.40203857421875e-05, + "step": 15742, + "training_step_time": 0.12204146385192871 + }, + { + "epoch": 2.402191162109375e-05, + "model_forward_time": 0.025731563568115234, + "step": 15743 + }, + { + "epoch": 2.402191162109375e-05, + "step": 15743, + "training_step_time": 0.10908007621765137 + }, + { + "epoch": 2.40234375e-05, + "model_forward_time": 0.029944896697998047, + "step": 15744 + }, + { + "epoch": 2.40234375e-05, + "step": 15744, + "training_step_time": 0.10880923271179199 + }, + { + "epoch": 2.402496337890625e-05, + "model_forward_time": 0.025807619094848633, + "step": 15745 + }, + { + "epoch": 2.402496337890625e-05, + "step": 15745, + "training_step_time": 0.19451689720153809 + }, + { + "epoch": 2.40264892578125e-05, + "model_forward_time": 0.02525162696838379, + "step": 15746 + }, + { + "epoch": 2.40264892578125e-05, + "step": 15746, + "training_step_time": 0.10396194458007812 + }, + { + "epoch": 2.402801513671875e-05, + "model_forward_time": 0.025376319885253906, + "step": 15747 + }, + { + "epoch": 2.402801513671875e-05, + "step": 15747, + "training_step_time": 0.10477805137634277 + }, + { + "epoch": 2.4029541015625e-05, + "model_forward_time": 0.025945663452148438, + "step": 15748 + }, + { + "epoch": 2.4029541015625e-05, + "step": 15748, + "training_step_time": 0.10656166076660156 + }, + { + "epoch": 2.403106689453125e-05, + "model_forward_time": 0.025737285614013672, + "step": 15749 + }, + { + "epoch": 2.403106689453125e-05, + "step": 15749, + "training_step_time": 0.10791826248168945 + }, + { + "epoch": 2.40325927734375e-05, + "grad_norm": 0.17635443806648254, + "learning_rate": 5e-05, + "loss": 0.014, + "step": 15750 + }, + { + "epoch": 2.40325927734375e-05, + "model_forward_time": 0.02617049217224121, + "step": 15750 + }, + { + "epoch": 2.40325927734375e-05, + "step": 15750, + "training_step_time": 0.10543346405029297 + }, + { + "epoch": 2.403411865234375e-05, + "model_forward_time": 0.02562546730041504, + "step": 15751 + }, + { + "epoch": 2.403411865234375e-05, + "step": 15751, + "training_step_time": 0.10540437698364258 + }, + { + "epoch": 2.403564453125e-05, + "model_forward_time": 0.028939247131347656, + "step": 15752 + }, + { + "epoch": 2.403564453125e-05, + "step": 15752, + "training_step_time": 0.11156201362609863 + }, + { + "epoch": 2.403717041015625e-05, + "model_forward_time": 0.025876283645629883, + "step": 15753 + }, + { + "epoch": 2.403717041015625e-05, + "step": 15753, + "training_step_time": 0.1595165729522705 + }, + { + "epoch": 2.40386962890625e-05, + "model_forward_time": 0.025485992431640625, + "step": 15754 + }, + { + "epoch": 2.40386962890625e-05, + "step": 15754, + "training_step_time": 0.12966632843017578 + }, + { + "epoch": 2.404022216796875e-05, + "model_forward_time": 0.0247652530670166, + "step": 15755 + }, + { + "epoch": 2.404022216796875e-05, + "step": 15755, + "training_step_time": 0.10863614082336426 + }, + { + "epoch": 2.4041748046875e-05, + "model_forward_time": 0.02574443817138672, + "step": 15756 + }, + { + "epoch": 2.4041748046875e-05, + "step": 15756, + "training_step_time": 0.10725879669189453 + }, + { + "epoch": 2.404327392578125e-05, + "model_forward_time": 0.025397300720214844, + "step": 15757 + }, + { + "epoch": 2.404327392578125e-05, + "step": 15757, + "training_step_time": 0.10614991188049316 + }, + { + "epoch": 2.40447998046875e-05, + "model_forward_time": 0.025533437728881836, + "step": 15758 + }, + { + "epoch": 2.40447998046875e-05, + "step": 15758, + "training_step_time": 0.10601568222045898 + }, + { + "epoch": 2.404632568359375e-05, + "model_forward_time": 0.02973031997680664, + "step": 15759 + }, + { + "epoch": 2.404632568359375e-05, + "step": 15759, + "training_step_time": 0.1150655746459961 + }, + { + "epoch": 2.40478515625e-05, + "grad_norm": 0.3995972275733948, + "learning_rate": 4.994488435057251e-05, + "loss": 0.0122, + "step": 15760 + }, + { + "epoch": 2.40478515625e-05, + "model_forward_time": 0.025236129760742188, + "step": 15760 + }, + { + "epoch": 2.40478515625e-05, + "step": 15760, + "training_step_time": 0.20906376838684082 + }, + { + "epoch": 2.404937744140625e-05, + "model_forward_time": 0.0255582332611084, + "step": 15761 + }, + { + "epoch": 2.404937744140625e-05, + "step": 15761, + "training_step_time": 0.17047595977783203 + }, + { + "epoch": 2.40509033203125e-05, + "model_forward_time": 0.0250546932220459, + "step": 15762 + }, + { + "epoch": 2.40509033203125e-05, + "step": 15762, + "training_step_time": 0.15946364402770996 + }, + { + "epoch": 2.405242919921875e-05, + "model_forward_time": 0.024927854537963867, + "step": 15763 + }, + { + "epoch": 2.405242919921875e-05, + "step": 15763, + "training_step_time": 0.16434407234191895 + }, + { + "epoch": 2.4053955078125e-05, + "model_forward_time": 0.024791955947875977, + "step": 15764 + }, + { + "epoch": 2.4053955078125e-05, + "step": 15764, + "training_step_time": 0.16209888458251953 + }, + { + "epoch": 2.405548095703125e-05, + "model_forward_time": 0.025361061096191406, + "step": 15765 + }, + { + "epoch": 2.405548095703125e-05, + "step": 15765, + "training_step_time": 0.11711454391479492 + }, + { + "epoch": 2.40570068359375e-05, + "model_forward_time": 0.025193452835083008, + "step": 15766 + }, + { + "epoch": 2.40570068359375e-05, + "step": 15766, + "training_step_time": 0.13407039642333984 + }, + { + "epoch": 2.405853271484375e-05, + "model_forward_time": 0.025889158248901367, + "step": 15767 + }, + { + "epoch": 2.405853271484375e-05, + "step": 15767, + "training_step_time": 0.15701603889465332 + }, + { + "epoch": 2.406005859375e-05, + "model_forward_time": 0.025141239166259766, + "step": 15768 + }, + { + "epoch": 2.406005859375e-05, + "step": 15768, + "training_step_time": 0.10872602462768555 + }, + { + "epoch": 2.406158447265625e-05, + "model_forward_time": 0.02553534507751465, + "step": 15769 + }, + { + "epoch": 2.406158447265625e-05, + "step": 15769, + "training_step_time": 0.12163162231445312 + }, + { + "epoch": 2.40631103515625e-05, + "grad_norm": 0.25937697291374207, + "learning_rate": 4.988976876811571e-05, + "loss": 0.0142, + "step": 15770 + }, + { + "epoch": 2.40631103515625e-05, + "model_forward_time": 0.025218486785888672, + "step": 15770 + }, + { + "epoch": 2.40631103515625e-05, + "step": 15770, + "training_step_time": 0.10687446594238281 + }, + { + "epoch": 2.406463623046875e-05, + "model_forward_time": 0.026124000549316406, + "step": 15771 + }, + { + "epoch": 2.406463623046875e-05, + "step": 15771, + "training_step_time": 0.10693860054016113 + }, + { + "epoch": 2.4066162109375e-05, + "model_forward_time": 0.025829315185546875, + "step": 15772 + }, + { + "epoch": 2.4066162109375e-05, + "step": 15772, + "training_step_time": 0.10928893089294434 + }, + { + "epoch": 2.406768798828125e-05, + "model_forward_time": 0.026218175888061523, + "step": 15773 + }, + { + "epoch": 2.406768798828125e-05, + "step": 15773, + "training_step_time": 0.10986065864562988 + }, + { + "epoch": 2.40692138671875e-05, + "model_forward_time": 0.026322364807128906, + "step": 15774 + }, + { + "epoch": 2.40692138671875e-05, + "step": 15774, + "training_step_time": 0.10667562484741211 + }, + { + "epoch": 2.407073974609375e-05, + "model_forward_time": 0.02584385871887207, + "step": 15775 + }, + { + "epoch": 2.407073974609375e-05, + "step": 15775, + "training_step_time": 0.10597348213195801 + }, + { + "epoch": 2.4072265625e-05, + "model_forward_time": 0.02616286277770996, + "step": 15776 + }, + { + "epoch": 2.4072265625e-05, + "step": 15776, + "training_step_time": 0.10861873626708984 + }, + { + "epoch": 2.407379150390625e-05, + "model_forward_time": 0.025615215301513672, + "step": 15777 + }, + { + "epoch": 2.407379150390625e-05, + "step": 15777, + "training_step_time": 0.10670042037963867 + }, + { + "epoch": 2.40753173828125e-05, + "model_forward_time": 0.025829315185546875, + "step": 15778 + }, + { + "epoch": 2.40753173828125e-05, + "step": 15778, + "training_step_time": 0.11101603507995605 + }, + { + "epoch": 2.407684326171875e-05, + "model_forward_time": 0.02596449851989746, + "step": 15779 + }, + { + "epoch": 2.407684326171875e-05, + "step": 15779, + "training_step_time": 0.1047966480255127 + }, + { + "epoch": 2.4078369140625e-05, + "grad_norm": 0.25760772824287415, + "learning_rate": 4.9834653319600246e-05, + "loss": 0.0136, + "step": 15780 + }, + { + "epoch": 2.4078369140625e-05, + "model_forward_time": 0.025949954986572266, + "step": 15780 + }, + { + "epoch": 2.4078369140625e-05, + "step": 15780, + "training_step_time": 0.1060335636138916 + }, + { + "epoch": 2.407989501953125e-05, + "model_forward_time": 0.02608633041381836, + "step": 15781 + }, + { + "epoch": 2.407989501953125e-05, + "step": 15781, + "training_step_time": 0.10626053810119629 + }, + { + "epoch": 2.40814208984375e-05, + "model_forward_time": 0.025837182998657227, + "step": 15782 + }, + { + "epoch": 2.40814208984375e-05, + "step": 15782, + "training_step_time": 0.10515642166137695 + }, + { + "epoch": 2.408294677734375e-05, + "model_forward_time": 0.027230024337768555, + "step": 15783 + }, + { + "epoch": 2.408294677734375e-05, + "step": 15783, + "training_step_time": 0.10765790939331055 + }, + { + "epoch": 2.408447265625e-05, + "model_forward_time": 0.026217937469482422, + "step": 15784 + }, + { + "epoch": 2.408447265625e-05, + "step": 15784, + "training_step_time": 0.10561633110046387 + }, + { + "epoch": 2.408599853515625e-05, + "model_forward_time": 0.026778697967529297, + "step": 15785 + }, + { + "epoch": 2.408599853515625e-05, + "step": 15785, + "training_step_time": 0.11178970336914062 + }, + { + "epoch": 2.40875244140625e-05, + "model_forward_time": 0.025687694549560547, + "step": 15786 + }, + { + "epoch": 2.40875244140625e-05, + "step": 15786, + "training_step_time": 0.14815092086791992 + }, + { + "epoch": 2.408905029296875e-05, + "model_forward_time": 0.026078224182128906, + "step": 15787 + }, + { + "epoch": 2.408905029296875e-05, + "step": 15787, + "training_step_time": 0.10693025588989258 + }, + { + "epoch": 2.4090576171875e-05, + "model_forward_time": 0.025758981704711914, + "step": 15788 + }, + { + "epoch": 2.4090576171875e-05, + "step": 15788, + "training_step_time": 0.11509013175964355 + }, + { + "epoch": 2.409210205078125e-05, + "model_forward_time": 0.029214859008789062, + "step": 15789 + }, + { + "epoch": 2.409210205078125e-05, + "step": 15789, + "training_step_time": 0.112762451171875 + }, + { + "epoch": 2.40936279296875e-05, + "grad_norm": 0.3325439691543579, + "learning_rate": 4.97795380719966e-05, + "loss": 0.021, + "step": 15790 + }, + { + "epoch": 2.40936279296875e-05, + "model_forward_time": 0.02478194236755371, + "step": 15790 + }, + { + "epoch": 2.40936279296875e-05, + "step": 15790, + "training_step_time": 0.11779904365539551 + }, + { + "epoch": 2.409515380859375e-05, + "model_forward_time": 0.02554774284362793, + "step": 15791 + }, + { + "epoch": 2.409515380859375e-05, + "step": 15791, + "training_step_time": 0.17894721031188965 + }, + { + "epoch": 2.40966796875e-05, + "model_forward_time": 0.024927377700805664, + "step": 15792 + }, + { + "epoch": 2.40966796875e-05, + "step": 15792, + "training_step_time": 0.11855912208557129 + }, + { + "epoch": 2.409820556640625e-05, + "model_forward_time": 0.025980234146118164, + "step": 15793 + }, + { + "epoch": 2.409820556640625e-05, + "step": 15793, + "training_step_time": 0.12667489051818848 + }, + { + "epoch": 2.40997314453125e-05, + "model_forward_time": 0.025071382522583008, + "step": 15794 + }, + { + "epoch": 2.40997314453125e-05, + "step": 15794, + "training_step_time": 0.11824154853820801 + }, + { + "epoch": 2.410125732421875e-05, + "model_forward_time": 0.025911331176757812, + "step": 15795 + }, + { + "epoch": 2.410125732421875e-05, + "step": 15795, + "training_step_time": 0.11380553245544434 + }, + { + "epoch": 2.4102783203125e-05, + "model_forward_time": 0.025363683700561523, + "step": 15796 + }, + { + "epoch": 2.4102783203125e-05, + "step": 15796, + "training_step_time": 0.11666440963745117 + }, + { + "epoch": 2.410430908203125e-05, + "model_forward_time": 0.025876760482788086, + "step": 15797 + }, + { + "epoch": 2.410430908203125e-05, + "step": 15797, + "training_step_time": 0.10966110229492188 + }, + { + "epoch": 2.41058349609375e-05, + "model_forward_time": 0.026041269302368164, + "step": 15798 + }, + { + "epoch": 2.41058349609375e-05, + "step": 15798, + "training_step_time": 0.1086728572845459 + }, + { + "epoch": 2.410736083984375e-05, + "model_forward_time": 0.025957822799682617, + "step": 15799 + }, + { + "epoch": 2.410736083984375e-05, + "step": 15799, + "training_step_time": 0.10919523239135742 + }, + { + "epoch": 2.410888671875e-05, + "grad_norm": 0.2951965034008026, + "learning_rate": 4.972442309227498e-05, + "loss": 0.0148, + "step": 15800 + }, + { + "epoch": 2.410888671875e-05, + "model_forward_time": 0.025502681732177734, + "step": 15800 + }, + { + "epoch": 2.410888671875e-05, + "step": 15800, + "training_step_time": 0.10789346694946289 + }, + { + "epoch": 2.411041259765625e-05, + "model_forward_time": 0.02546858787536621, + "step": 15801 + }, + { + "epoch": 2.411041259765625e-05, + "step": 15801, + "training_step_time": 0.10580611228942871 + }, + { + "epoch": 2.41119384765625e-05, + "model_forward_time": 0.025721311569213867, + "step": 15802 + }, + { + "epoch": 2.41119384765625e-05, + "step": 15802, + "training_step_time": 0.10697054862976074 + }, + { + "epoch": 2.411346435546875e-05, + "model_forward_time": 0.025745153427124023, + "step": 15803 + }, + { + "epoch": 2.411346435546875e-05, + "step": 15803, + "training_step_time": 0.10657548904418945 + }, + { + "epoch": 2.4114990234375e-05, + "model_forward_time": 0.025782346725463867, + "step": 15804 + }, + { + "epoch": 2.4114990234375e-05, + "step": 15804, + "training_step_time": 0.10963892936706543 + }, + { + "epoch": 2.411651611328125e-05, + "model_forward_time": 0.025864601135253906, + "step": 15805 + }, + { + "epoch": 2.411651611328125e-05, + "step": 15805, + "training_step_time": 0.11355066299438477 + }, + { + "epoch": 2.41180419921875e-05, + "model_forward_time": 0.02550530433654785, + "step": 15806 + }, + { + "epoch": 2.41180419921875e-05, + "step": 15806, + "training_step_time": 0.19260001182556152 + }, + { + "epoch": 2.411956787109375e-05, + "model_forward_time": 0.025295495986938477, + "step": 15807 + }, + { + "epoch": 2.411956787109375e-05, + "step": 15807, + "training_step_time": 0.20040321350097656 + }, + { + "epoch": 2.412109375e-05, + "model_forward_time": 0.025110244750976562, + "step": 15808 + }, + { + "epoch": 2.412109375e-05, + "step": 15808, + "training_step_time": 0.19210124015808105 + }, + { + "epoch": 2.412261962890625e-05, + "model_forward_time": 0.024487972259521484, + "step": 15809 + }, + { + "epoch": 2.412261962890625e-05, + "step": 15809, + "training_step_time": 0.16778779029846191 + }, + { + "epoch": 2.41241455078125e-05, + "grad_norm": 0.2948176860809326, + "learning_rate": 4.96693084474053e-05, + "loss": 0.0137, + "step": 15810 + }, + { + "epoch": 2.41241455078125e-05, + "model_forward_time": 0.027551889419555664, + "step": 15810 + }, + { + "epoch": 2.41241455078125e-05, + "step": 15810, + "training_step_time": 0.12750530242919922 + }, + { + "epoch": 2.412567138671875e-05, + "model_forward_time": 0.025095224380493164, + "step": 15811 + }, + { + "epoch": 2.412567138671875e-05, + "step": 15811, + "training_step_time": 0.11366772651672363 + }, + { + "epoch": 2.4127197265625e-05, + "model_forward_time": 0.025704622268676758, + "step": 15812 + }, + { + "epoch": 2.4127197265625e-05, + "step": 15812, + "training_step_time": 0.12529611587524414 + }, + { + "epoch": 2.412872314453125e-05, + "model_forward_time": 0.025648117065429688, + "step": 15813 + }, + { + "epoch": 2.412872314453125e-05, + "step": 15813, + "training_step_time": 0.1047525405883789 + }, + { + "epoch": 2.41302490234375e-05, + "model_forward_time": 0.025472402572631836, + "step": 15814 + }, + { + "epoch": 2.41302490234375e-05, + "step": 15814, + "training_step_time": 0.1850287914276123 + }, + { + "epoch": 2.413177490234375e-05, + "model_forward_time": 0.02468585968017578, + "step": 15815 + }, + { + "epoch": 2.413177490234375e-05, + "step": 15815, + "training_step_time": 0.16116023063659668 + }, + { + "epoch": 2.413330078125e-05, + "model_forward_time": 0.0242311954498291, + "step": 15816 + }, + { + "epoch": 2.413330078125e-05, + "step": 15816, + "training_step_time": 0.188004732131958 + }, + { + "epoch": 2.413482666015625e-05, + "model_forward_time": 0.024767637252807617, + "step": 15817 + }, + { + "epoch": 2.413482666015625e-05, + "step": 15817, + "training_step_time": 0.17100310325622559 + }, + { + "epoch": 2.41363525390625e-05, + "model_forward_time": 0.02544546127319336, + "step": 15818 + }, + { + "epoch": 2.41363525390625e-05, + "step": 15818, + "training_step_time": 0.1612837314605713 + }, + { + "epoch": 2.413787841796875e-05, + "model_forward_time": 0.024760723114013672, + "step": 15819 + }, + { + "epoch": 2.413787841796875e-05, + "step": 15819, + "training_step_time": 0.13944172859191895 + }, + { + "epoch": 2.4139404296875e-05, + "grad_norm": 0.21831856667995453, + "learning_rate": 4.961419420435703e-05, + "loss": 0.0125, + "step": 15820 + }, + { + "epoch": 2.4139404296875e-05, + "model_forward_time": 0.024768829345703125, + "step": 15820 + }, + { + "epoch": 2.4139404296875e-05, + "step": 15820, + "training_step_time": 0.13277482986450195 + }, + { + "epoch": 2.414093017578125e-05, + "model_forward_time": 0.02490711212158203, + "step": 15821 + }, + { + "epoch": 2.414093017578125e-05, + "step": 15821, + "training_step_time": 0.12941956520080566 + }, + { + "epoch": 2.41424560546875e-05, + "model_forward_time": 0.02538466453552246, + "step": 15822 + }, + { + "epoch": 2.41424560546875e-05, + "step": 15822, + "training_step_time": 0.12295937538146973 + }, + { + "epoch": 2.414398193359375e-05, + "model_forward_time": 0.027056455612182617, + "step": 15823 + }, + { + "epoch": 2.414398193359375e-05, + "step": 15823, + "training_step_time": 0.12282776832580566 + }, + { + "epoch": 2.41455078125e-05, + "model_forward_time": 0.025541067123413086, + "step": 15824 + }, + { + "epoch": 2.41455078125e-05, + "step": 15824, + "training_step_time": 0.11903858184814453 + }, + { + "epoch": 2.414703369140625e-05, + "model_forward_time": 0.02582693099975586, + "step": 15825 + }, + { + "epoch": 2.414703369140625e-05, + "step": 15825, + "training_step_time": 0.11393070220947266 + }, + { + "epoch": 2.41485595703125e-05, + "model_forward_time": 0.025495290756225586, + "step": 15826 + }, + { + "epoch": 2.41485595703125e-05, + "step": 15826, + "training_step_time": 0.1136636734008789 + }, + { + "epoch": 2.415008544921875e-05, + "model_forward_time": 0.025578022003173828, + "step": 15827 + }, + { + "epoch": 2.415008544921875e-05, + "step": 15827, + "training_step_time": 0.15861845016479492 + }, + { + "epoch": 2.4151611328125e-05, + "model_forward_time": 0.024882078170776367, + "step": 15828 + }, + { + "epoch": 2.4151611328125e-05, + "step": 15828, + "training_step_time": 0.17461276054382324 + }, + { + "epoch": 2.415313720703125e-05, + "model_forward_time": 0.02426290512084961, + "step": 15829 + }, + { + "epoch": 2.415313720703125e-05, + "step": 15829, + "training_step_time": 0.10796284675598145 + }, + { + "epoch": 2.41546630859375e-05, + "grad_norm": 0.41024482250213623, + "learning_rate": 4.955908043009917e-05, + "loss": 0.0123, + "step": 15830 + }, + { + "epoch": 2.41546630859375e-05, + "model_forward_time": 0.025498390197753906, + "step": 15830 + }, + { + "epoch": 2.41546630859375e-05, + "step": 15830, + "training_step_time": 0.1199045181274414 + }, + { + "epoch": 2.415618896484375e-05, + "model_forward_time": 0.025540590286254883, + "step": 15831 + }, + { + "epoch": 2.415618896484375e-05, + "step": 15831, + "training_step_time": 0.11087441444396973 + }, + { + "epoch": 2.415771484375e-05, + "model_forward_time": 0.025449037551879883, + "step": 15832 + }, + { + "epoch": 2.415771484375e-05, + "step": 15832, + "training_step_time": 0.10480070114135742 + }, + { + "epoch": 2.415924072265625e-05, + "model_forward_time": 0.025753498077392578, + "step": 15833 + }, + { + "epoch": 2.415924072265625e-05, + "step": 15833, + "training_step_time": 0.19542288780212402 + }, + { + "epoch": 2.41607666015625e-05, + "model_forward_time": 0.02477288246154785, + "step": 15834 + }, + { + "epoch": 2.41607666015625e-05, + "step": 15834, + "training_step_time": 0.10563373565673828 + }, + { + "epoch": 2.416229248046875e-05, + "model_forward_time": 0.02510356903076172, + "step": 15835 + }, + { + "epoch": 2.416229248046875e-05, + "step": 15835, + "training_step_time": 0.10892748832702637 + }, + { + "epoch": 2.4163818359375e-05, + "model_forward_time": 0.025130033493041992, + "step": 15836 + }, + { + "epoch": 2.4163818359375e-05, + "step": 15836, + "training_step_time": 0.10665225982666016 + }, + { + "epoch": 2.416534423828125e-05, + "model_forward_time": 0.025502443313598633, + "step": 15837 + }, + { + "epoch": 2.416534423828125e-05, + "step": 15837, + "training_step_time": 0.10480070114135742 + }, + { + "epoch": 2.41668701171875e-05, + "model_forward_time": 0.028617143630981445, + "step": 15838 + }, + { + "epoch": 2.41668701171875e-05, + "step": 15838, + "training_step_time": 0.10938143730163574 + }, + { + "epoch": 2.416839599609375e-05, + "model_forward_time": 0.025852441787719727, + "step": 15839 + }, + { + "epoch": 2.416839599609375e-05, + "step": 15839, + "training_step_time": 0.10679483413696289 + }, + { + "epoch": 2.4169921875e-05, + "grad_norm": 0.17590944468975067, + "learning_rate": 4.950396719160018e-05, + "loss": 0.013, + "step": 15840 + }, + { + "epoch": 2.4169921875e-05, + "model_forward_time": 0.025693178176879883, + "step": 15840 + }, + { + "epoch": 2.4169921875e-05, + "step": 15840, + "training_step_time": 0.10664200782775879 + }, + { + "epoch": 2.417144775390625e-05, + "model_forward_time": 0.025706768035888672, + "step": 15841 + }, + { + "epoch": 2.417144775390625e-05, + "step": 15841, + "training_step_time": 0.10791993141174316 + }, + { + "epoch": 2.41729736328125e-05, + "model_forward_time": 0.02624201774597168, + "step": 15842 + }, + { + "epoch": 2.41729736328125e-05, + "step": 15842, + "training_step_time": 0.10647845268249512 + }, + { + "epoch": 2.417449951171875e-05, + "model_forward_time": 0.02576589584350586, + "step": 15843 + }, + { + "epoch": 2.417449951171875e-05, + "step": 15843, + "training_step_time": 0.1066436767578125 + }, + { + "epoch": 2.4176025390625e-05, + "model_forward_time": 0.025748729705810547, + "step": 15844 + }, + { + "epoch": 2.4176025390625e-05, + "step": 15844, + "training_step_time": 0.10545134544372559 + }, + { + "epoch": 2.417755126953125e-05, + "model_forward_time": 0.025636672973632812, + "step": 15845 + }, + { + "epoch": 2.417755126953125e-05, + "step": 15845, + "training_step_time": 0.10648918151855469 + }, + { + "epoch": 2.41790771484375e-05, + "model_forward_time": 0.026278972625732422, + "step": 15846 + }, + { + "epoch": 2.41790771484375e-05, + "step": 15846, + "training_step_time": 0.10998773574829102 + }, + { + "epoch": 2.418060302734375e-05, + "model_forward_time": 0.026279687881469727, + "step": 15847 + }, + { + "epoch": 2.418060302734375e-05, + "step": 15847, + "training_step_time": 0.10999917984008789 + }, + { + "epoch": 2.418212890625e-05, + "model_forward_time": 0.02591681480407715, + "step": 15848 + }, + { + "epoch": 2.418212890625e-05, + "step": 15848, + "training_step_time": 0.15461254119873047 + }, + { + "epoch": 2.418365478515625e-05, + "model_forward_time": 0.02440500259399414, + "step": 15849 + }, + { + "epoch": 2.418365478515625e-05, + "step": 15849, + "training_step_time": 0.10561490058898926 + }, + { + "epoch": 2.41851806640625e-05, + "grad_norm": 0.26678934693336487, + "learning_rate": 4.9448854555827825e-05, + "loss": 0.0117, + "step": 15850 + }, + { + "epoch": 2.41851806640625e-05, + "model_forward_time": 0.024710416793823242, + "step": 15850 + }, + { + "epoch": 2.41851806640625e-05, + "step": 15850, + "training_step_time": 0.16521763801574707 + }, + { + "epoch": 2.418670654296875e-05, + "model_forward_time": 0.024506330490112305, + "step": 15851 + }, + { + "epoch": 2.418670654296875e-05, + "step": 15851, + "training_step_time": 0.19717860221862793 + }, + { + "epoch": 2.4188232421875e-05, + "model_forward_time": 0.024341821670532227, + "step": 15852 + }, + { + "epoch": 2.4188232421875e-05, + "step": 15852, + "training_step_time": 0.14354300498962402 + }, + { + "epoch": 2.418975830078125e-05, + "model_forward_time": 0.02642035484313965, + "step": 15853 + }, + { + "epoch": 2.418975830078125e-05, + "step": 15853, + "training_step_time": 0.1900327205657959 + }, + { + "epoch": 2.41912841796875e-05, + "model_forward_time": 0.02536320686340332, + "step": 15854 + }, + { + "epoch": 2.41912841796875e-05, + "step": 15854, + "training_step_time": 0.17298412322998047 + }, + { + "epoch": 2.419281005859375e-05, + "model_forward_time": 0.025203704833984375, + "step": 15855 + }, + { + "epoch": 2.419281005859375e-05, + "step": 15855, + "training_step_time": 0.10773253440856934 + }, + { + "epoch": 2.41943359375e-05, + "model_forward_time": 0.02529430389404297, + "step": 15856 + }, + { + "epoch": 2.41943359375e-05, + "step": 15856, + "training_step_time": 0.12476611137390137 + }, + { + "epoch": 2.419586181640625e-05, + "model_forward_time": 0.02646183967590332, + "step": 15857 + }, + { + "epoch": 2.419586181640625e-05, + "step": 15857, + "training_step_time": 0.1109309196472168 + }, + { + "epoch": 2.41973876953125e-05, + "model_forward_time": 0.026561975479125977, + "step": 15858 + }, + { + "epoch": 2.41973876953125e-05, + "step": 15858, + "training_step_time": 0.11260294914245605 + }, + { + "epoch": 2.419891357421875e-05, + "model_forward_time": 0.025049686431884766, + "step": 15859 + }, + { + "epoch": 2.419891357421875e-05, + "step": 15859, + "training_step_time": 0.19774889945983887 + }, + { + "epoch": 2.4200439453125e-05, + "grad_norm": 0.35972481966018677, + "learning_rate": 4.9393742589749145e-05, + "loss": 0.0126, + "step": 15860 + }, + { + "epoch": 2.4200439453125e-05, + "model_forward_time": 0.02472972869873047, + "step": 15860 + }, + { + "epoch": 2.4200439453125e-05, + "step": 15860, + "training_step_time": 0.10699343681335449 + }, + { + "epoch": 2.420196533203125e-05, + "model_forward_time": 0.02517223358154297, + "step": 15861 + }, + { + "epoch": 2.420196533203125e-05, + "step": 15861, + "training_step_time": 0.10393333435058594 + }, + { + "epoch": 2.42034912109375e-05, + "model_forward_time": 0.025421619415283203, + "step": 15862 + }, + { + "epoch": 2.42034912109375e-05, + "step": 15862, + "training_step_time": 0.10455727577209473 + }, + { + "epoch": 2.420501708984375e-05, + "model_forward_time": 0.025794506072998047, + "step": 15863 + }, + { + "epoch": 2.420501708984375e-05, + "step": 15863, + "training_step_time": 0.10572934150695801 + }, + { + "epoch": 2.420654296875e-05, + "model_forward_time": 0.024958372116088867, + "step": 15864 + }, + { + "epoch": 2.420654296875e-05, + "step": 15864, + "training_step_time": 0.10541963577270508 + }, + { + "epoch": 2.420806884765625e-05, + "model_forward_time": 0.025580644607543945, + "step": 15865 + }, + { + "epoch": 2.420806884765625e-05, + "step": 15865, + "training_step_time": 0.10817766189575195 + }, + { + "epoch": 2.42095947265625e-05, + "model_forward_time": 0.025849342346191406, + "step": 15866 + }, + { + "epoch": 2.42095947265625e-05, + "step": 15866, + "training_step_time": 0.10609817504882812 + }, + { + "epoch": 2.421112060546875e-05, + "model_forward_time": 0.025813817977905273, + "step": 15867 + }, + { + "epoch": 2.421112060546875e-05, + "step": 15867, + "training_step_time": 0.10567402839660645 + }, + { + "epoch": 2.4212646484375e-05, + "model_forward_time": 0.02597522735595703, + "step": 15868 + }, + { + "epoch": 2.4212646484375e-05, + "step": 15868, + "training_step_time": 0.10711789131164551 + }, + { + "epoch": 2.421417236328125e-05, + "model_forward_time": 0.025913715362548828, + "step": 15869 + }, + { + "epoch": 2.421417236328125e-05, + "step": 15869, + "training_step_time": 0.10759878158569336 + }, + { + "epoch": 2.42156982421875e-05, + "grad_norm": 0.32081305980682373, + "learning_rate": 4.93386313603304e-05, + "loss": 0.0175, + "step": 15870 + }, + { + "epoch": 2.42156982421875e-05, + "model_forward_time": 0.025621652603149414, + "step": 15870 + }, + { + "epoch": 2.42156982421875e-05, + "step": 15870, + "training_step_time": 0.10730576515197754 + }, + { + "epoch": 2.421722412109375e-05, + "model_forward_time": 0.025763988494873047, + "step": 15871 + }, + { + "epoch": 2.421722412109375e-05, + "step": 15871, + "training_step_time": 0.1067664623260498 + }, + { + "epoch": 2.421875e-05, + "model_forward_time": 0.025990962982177734, + "step": 15872 + }, + { + "epoch": 2.421875e-05, + "step": 15872, + "training_step_time": 0.1060187816619873 + }, + { + "epoch": 2.422027587890625e-05, + "model_forward_time": 0.026315689086914062, + "step": 15873 + }, + { + "epoch": 2.422027587890625e-05, + "step": 15873, + "training_step_time": 0.1778872013092041 + }, + { + "epoch": 2.42218017578125e-05, + "model_forward_time": 0.025370121002197266, + "step": 15874 + }, + { + "epoch": 2.42218017578125e-05, + "step": 15874, + "training_step_time": 0.13401436805725098 + }, + { + "epoch": 2.422332763671875e-05, + "model_forward_time": 0.025191783905029297, + "step": 15875 + }, + { + "epoch": 2.422332763671875e-05, + "step": 15875, + "training_step_time": 0.1104578971862793 + }, + { + "epoch": 2.4224853515625e-05, + "model_forward_time": 0.026271581649780273, + "step": 15876 + }, + { + "epoch": 2.4224853515625e-05, + "step": 15876, + "training_step_time": 0.11933588981628418 + }, + { + "epoch": 2.422637939453125e-05, + "model_forward_time": 0.025708675384521484, + "step": 15877 + }, + { + "epoch": 2.422637939453125e-05, + "step": 15877, + "training_step_time": 0.10750508308410645 + }, + { + "epoch": 2.42279052734375e-05, + "model_forward_time": 0.02553844451904297, + "step": 15878 + }, + { + "epoch": 2.42279052734375e-05, + "step": 15878, + "training_step_time": 0.1074683666229248 + }, + { + "epoch": 2.422943115234375e-05, + "model_forward_time": 0.025675058364868164, + "step": 15879 + }, + { + "epoch": 2.422943115234375e-05, + "step": 15879, + "training_step_time": 0.19632577896118164 + }, + { + "epoch": 2.423095703125e-05, + "grad_norm": 0.384095162153244, + "learning_rate": 4.9283520934536904e-05, + "loss": 0.0164, + "step": 15880 + }, + { + "epoch": 2.423095703125e-05, + "model_forward_time": 0.024239778518676758, + "step": 15880 + }, + { + "epoch": 2.423095703125e-05, + "step": 15880, + "training_step_time": 0.10271501541137695 + }, + { + "epoch": 2.423248291015625e-05, + "model_forward_time": 0.02489304542541504, + "step": 15881 + }, + { + "epoch": 2.423248291015625e-05, + "step": 15881, + "training_step_time": 0.10219669342041016 + }, + { + "epoch": 2.42340087890625e-05, + "model_forward_time": 0.025671005249023438, + "step": 15882 + }, + { + "epoch": 2.42340087890625e-05, + "step": 15882, + "training_step_time": 0.10906648635864258 + }, + { + "epoch": 2.423553466796875e-05, + "model_forward_time": 0.025266170501708984, + "step": 15883 + }, + { + "epoch": 2.423553466796875e-05, + "step": 15883, + "training_step_time": 0.10835576057434082 + }, + { + "epoch": 2.4237060546875e-05, + "model_forward_time": 0.025645971298217773, + "step": 15884 + }, + { + "epoch": 2.4237060546875e-05, + "step": 15884, + "training_step_time": 0.10694122314453125 + }, + { + "epoch": 2.423858642578125e-05, + "model_forward_time": 0.026115894317626953, + "step": 15885 + }, + { + "epoch": 2.423858642578125e-05, + "step": 15885, + "training_step_time": 0.10697221755981445 + }, + { + "epoch": 2.42401123046875e-05, + "model_forward_time": 0.025562286376953125, + "step": 15886 + }, + { + "epoch": 2.42401123046875e-05, + "step": 15886, + "training_step_time": 0.10576319694519043 + }, + { + "epoch": 2.424163818359375e-05, + "model_forward_time": 0.025818586349487305, + "step": 15887 + }, + { + "epoch": 2.424163818359375e-05, + "step": 15887, + "training_step_time": 0.10538721084594727 + }, + { + "epoch": 2.42431640625e-05, + "model_forward_time": 0.025905847549438477, + "step": 15888 + }, + { + "epoch": 2.42431640625e-05, + "step": 15888, + "training_step_time": 0.10671162605285645 + }, + { + "epoch": 2.424468994140625e-05, + "model_forward_time": 0.025387287139892578, + "step": 15889 + }, + { + "epoch": 2.424468994140625e-05, + "step": 15889, + "training_step_time": 0.10831189155578613 + }, + { + "epoch": 2.42462158203125e-05, + "grad_norm": 0.20762024819850922, + "learning_rate": 4.9228411379333014e-05, + "loss": 0.0127, + "step": 15890 + }, + { + "epoch": 2.42462158203125e-05, + "model_forward_time": 0.026102066040039062, + "step": 15890 + }, + { + "epoch": 2.42462158203125e-05, + "step": 15890, + "training_step_time": 0.10633206367492676 + }, + { + "epoch": 2.424774169921875e-05, + "model_forward_time": 0.027546167373657227, + "step": 15891 + }, + { + "epoch": 2.424774169921875e-05, + "step": 15891, + "training_step_time": 0.10753345489501953 + }, + { + "epoch": 2.4249267578125e-05, + "model_forward_time": 0.026748180389404297, + "step": 15892 + }, + { + "epoch": 2.4249267578125e-05, + "step": 15892, + "training_step_time": 0.11024665832519531 + }, + { + "epoch": 2.425079345703125e-05, + "model_forward_time": 0.025822877883911133, + "step": 15893 + }, + { + "epoch": 2.425079345703125e-05, + "step": 15893, + "training_step_time": 0.10959315299987793 + }, + { + "epoch": 2.42523193359375e-05, + "model_forward_time": 0.025696992874145508, + "step": 15894 + }, + { + "epoch": 2.42523193359375e-05, + "step": 15894, + "training_step_time": 0.10599088668823242 + }, + { + "epoch": 2.425384521484375e-05, + "model_forward_time": 0.02540755271911621, + "step": 15895 + }, + { + "epoch": 2.425384521484375e-05, + "step": 15895, + "training_step_time": 0.18365001678466797 + }, + { + "epoch": 2.425537109375e-05, + "model_forward_time": 0.025188684463500977, + "step": 15896 + }, + { + "epoch": 2.425537109375e-05, + "step": 15896, + "training_step_time": 0.135390043258667 + }, + { + "epoch": 2.425689697265625e-05, + "model_forward_time": 0.025185585021972656, + "step": 15897 + }, + { + "epoch": 2.425689697265625e-05, + "step": 15897, + "training_step_time": 0.10508251190185547 + }, + { + "epoch": 2.42584228515625e-05, + "model_forward_time": 0.025966644287109375, + "step": 15898 + }, + { + "epoch": 2.42584228515625e-05, + "step": 15898, + "training_step_time": 0.13190364837646484 + }, + { + "epoch": 2.425994873046875e-05, + "model_forward_time": 0.029868364334106445, + "step": 15899 + }, + { + "epoch": 2.425994873046875e-05, + "step": 15899, + "training_step_time": 0.21393394470214844 + }, + { + "epoch": 2.4261474609375e-05, + "grad_norm": 0.14643548429012299, + "learning_rate": 4.917330276168208e-05, + "loss": 0.007, + "step": 15900 + }, + { + "epoch": 2.4261474609375e-05, + "model_forward_time": 0.025090932846069336, + "step": 15900 + }, + { + "epoch": 2.4261474609375e-05, + "step": 15900, + "training_step_time": 0.10868263244628906 + }, + { + "epoch": 2.426300048828125e-05, + "model_forward_time": 0.02563619613647461, + "step": 15901 + }, + { + "epoch": 2.426300048828125e-05, + "step": 15901, + "training_step_time": 0.11607813835144043 + }, + { + "epoch": 2.42645263671875e-05, + "model_forward_time": 0.02555394172668457, + "step": 15902 + }, + { + "epoch": 2.42645263671875e-05, + "step": 15902, + "training_step_time": 0.17038440704345703 + }, + { + "epoch": 2.426605224609375e-05, + "model_forward_time": 0.025087594985961914, + "step": 15903 + }, + { + "epoch": 2.426605224609375e-05, + "step": 15903, + "training_step_time": 0.1440563201904297 + }, + { + "epoch": 2.4267578125e-05, + "model_forward_time": 0.02509760856628418, + "step": 15904 + }, + { + "epoch": 2.4267578125e-05, + "step": 15904, + "training_step_time": 0.12248110771179199 + }, + { + "epoch": 2.426910400390625e-05, + "model_forward_time": 0.02478957176208496, + "step": 15905 + }, + { + "epoch": 2.426910400390625e-05, + "step": 15905, + "training_step_time": 0.10920500755310059 + }, + { + "epoch": 2.42706298828125e-05, + "model_forward_time": 0.02589106559753418, + "step": 15906 + }, + { + "epoch": 2.42706298828125e-05, + "step": 15906, + "training_step_time": 0.10769796371459961 + }, + { + "epoch": 2.427215576171875e-05, + "model_forward_time": 0.025074005126953125, + "step": 15907 + }, + { + "epoch": 2.427215576171875e-05, + "step": 15907, + "training_step_time": 0.10381889343261719 + }, + { + "epoch": 2.4273681640625e-05, + "model_forward_time": 0.025251150131225586, + "step": 15908 + }, + { + "epoch": 2.4273681640625e-05, + "step": 15908, + "training_step_time": 0.10627913475036621 + }, + { + "epoch": 2.427520751953125e-05, + "model_forward_time": 0.02614879608154297, + "step": 15909 + }, + { + "epoch": 2.427520751953125e-05, + "step": 15909, + "training_step_time": 0.11140656471252441 + }, + { + "epoch": 2.42767333984375e-05, + "grad_norm": 0.30145934224128723, + "learning_rate": 4.911819514854624e-05, + "loss": 0.0098, + "step": 15910 + }, + { + "epoch": 2.42767333984375e-05, + "model_forward_time": 0.02497696876525879, + "step": 15910 + }, + { + "epoch": 2.42767333984375e-05, + "step": 15910, + "training_step_time": 0.10891985893249512 + }, + { + "epoch": 2.427825927734375e-05, + "model_forward_time": 0.025133132934570312, + "step": 15911 + }, + { + "epoch": 2.427825927734375e-05, + "step": 15911, + "training_step_time": 0.10997700691223145 + }, + { + "epoch": 2.427978515625e-05, + "model_forward_time": 0.025237321853637695, + "step": 15912 + }, + { + "epoch": 2.427978515625e-05, + "step": 15912, + "training_step_time": 0.10519576072692871 + }, + { + "epoch": 2.428131103515625e-05, + "model_forward_time": 0.025793075561523438, + "step": 15913 + }, + { + "epoch": 2.428131103515625e-05, + "step": 15913, + "training_step_time": 0.10698103904724121 + }, + { + "epoch": 2.42828369140625e-05, + "model_forward_time": 0.02942371368408203, + "step": 15914 + }, + { + "epoch": 2.42828369140625e-05, + "step": 15914, + "training_step_time": 0.11082577705383301 + }, + { + "epoch": 2.428436279296875e-05, + "model_forward_time": 0.0254364013671875, + "step": 15915 + }, + { + "epoch": 2.428436279296875e-05, + "step": 15915, + "training_step_time": 0.1064605712890625 + }, + { + "epoch": 2.4285888671875e-05, + "model_forward_time": 0.025509119033813477, + "step": 15916 + }, + { + "epoch": 2.4285888671875e-05, + "step": 15916, + "training_step_time": 0.1069495677947998 + }, + { + "epoch": 2.428741455078125e-05, + "model_forward_time": 0.025258779525756836, + "step": 15917 + }, + { + "epoch": 2.428741455078125e-05, + "step": 15917, + "training_step_time": 0.10472822189331055 + }, + { + "epoch": 2.42889404296875e-05, + "model_forward_time": 0.025116443634033203, + "step": 15918 + }, + { + "epoch": 2.42889404296875e-05, + "step": 15918, + "training_step_time": 0.10599899291992188 + }, + { + "epoch": 2.429046630859375e-05, + "model_forward_time": 0.025040388107299805, + "step": 15919 + }, + { + "epoch": 2.429046630859375e-05, + "step": 15919, + "training_step_time": 0.10509538650512695 + }, + { + "epoch": 2.42919921875e-05, + "grad_norm": 0.1788942962884903, + "learning_rate": 4.906308860688645e-05, + "loss": 0.0122, + "step": 15920 + }, + { + "epoch": 2.42919921875e-05, + "model_forward_time": 0.024704694747924805, + "step": 15920 + }, + { + "epoch": 2.42919921875e-05, + "step": 15920, + "training_step_time": 0.10908269882202148 + }, + { + "epoch": 2.429351806640625e-05, + "model_forward_time": 0.024132966995239258, + "step": 15921 + }, + { + "epoch": 2.429351806640625e-05, + "step": 15921, + "training_step_time": 0.10468530654907227 + }, + { + "epoch": 2.42950439453125e-05, + "model_forward_time": 0.025097370147705078, + "step": 15922 + }, + { + "epoch": 2.42950439453125e-05, + "step": 15922, + "training_step_time": 0.13061738014221191 + }, + { + "epoch": 2.429656982421875e-05, + "model_forward_time": 0.024858713150024414, + "step": 15923 + }, + { + "epoch": 2.429656982421875e-05, + "step": 15923, + "training_step_time": 0.11765551567077637 + }, + { + "epoch": 2.4298095703125e-05, + "model_forward_time": 0.025319576263427734, + "step": 15924 + }, + { + "epoch": 2.4298095703125e-05, + "step": 15924, + "training_step_time": 0.10913586616516113 + }, + { + "epoch": 2.429962158203125e-05, + "model_forward_time": 0.025531530380249023, + "step": 15925 + }, + { + "epoch": 2.429962158203125e-05, + "step": 15925, + "training_step_time": 0.11077141761779785 + }, + { + "epoch": 2.43011474609375e-05, + "model_forward_time": 0.02533721923828125, + "step": 15926 + }, + { + "epoch": 2.43011474609375e-05, + "step": 15926, + "training_step_time": 0.10660672187805176 + }, + { + "epoch": 2.430267333984375e-05, + "model_forward_time": 0.02531909942626953, + "step": 15927 + }, + { + "epoch": 2.430267333984375e-05, + "step": 15927, + "training_step_time": 0.19421911239624023 + }, + { + "epoch": 2.430419921875e-05, + "model_forward_time": 0.025098562240600586, + "step": 15928 + }, + { + "epoch": 2.430419921875e-05, + "step": 15928, + "training_step_time": 0.10227751731872559 + }, + { + "epoch": 2.430572509765625e-05, + "model_forward_time": 0.02476191520690918, + "step": 15929 + }, + { + "epoch": 2.430572509765625e-05, + "step": 15929, + "training_step_time": 0.10433697700500488 + }, + { + "epoch": 2.43072509765625e-05, + "grad_norm": 0.32863739132881165, + "learning_rate": 4.9007983203662326e-05, + "loss": 0.0148, + "step": 15930 + }, + { + "epoch": 2.43072509765625e-05, + "model_forward_time": 0.026124238967895508, + "step": 15930 + }, + { + "epoch": 2.43072509765625e-05, + "step": 15930, + "training_step_time": 0.10723996162414551 + }, + { + "epoch": 2.430877685546875e-05, + "model_forward_time": 0.02552962303161621, + "step": 15931 + }, + { + "epoch": 2.430877685546875e-05, + "step": 15931, + "training_step_time": 0.10885930061340332 + }, + { + "epoch": 2.4310302734375e-05, + "model_forward_time": 0.026043415069580078, + "step": 15932 + }, + { + "epoch": 2.4310302734375e-05, + "step": 15932, + "training_step_time": 0.10613417625427246 + }, + { + "epoch": 2.431182861328125e-05, + "model_forward_time": 0.02562427520751953, + "step": 15933 + }, + { + "epoch": 2.431182861328125e-05, + "step": 15933, + "training_step_time": 0.10567808151245117 + }, + { + "epoch": 2.43133544921875e-05, + "model_forward_time": 0.02583456039428711, + "step": 15934 + }, + { + "epoch": 2.43133544921875e-05, + "step": 15934, + "training_step_time": 0.10648989677429199 + }, + { + "epoch": 2.431488037109375e-05, + "model_forward_time": 0.025661706924438477, + "step": 15935 + }, + { + "epoch": 2.431488037109375e-05, + "step": 15935, + "training_step_time": 0.10765194892883301 + }, + { + "epoch": 2.431640625e-05, + "model_forward_time": 0.024990081787109375, + "step": 15936 + }, + { + "epoch": 2.431640625e-05, + "step": 15936, + "training_step_time": 0.10599589347839355 + }, + { + "epoch": 2.431793212890625e-05, + "model_forward_time": 0.025968313217163086, + "step": 15937 + }, + { + "epoch": 2.431793212890625e-05, + "step": 15937, + "training_step_time": 0.10855555534362793 + }, + { + "epoch": 2.43194580078125e-05, + "model_forward_time": 0.02735447883605957, + "step": 15938 + }, + { + "epoch": 2.43194580078125e-05, + "step": 15938, + "training_step_time": 0.10811400413513184 + }, + { + "epoch": 2.432098388671875e-05, + "model_forward_time": 0.026804208755493164, + "step": 15939 + }, + { + "epoch": 2.432098388671875e-05, + "step": 15939, + "training_step_time": 0.10663986206054688 + }, + { + "epoch": 2.4322509765625e-05, + "grad_norm": 0.2623797655105591, + "learning_rate": 4.895287900583216e-05, + "loss": 0.0122, + "step": 15940 + }, + { + "epoch": 2.4322509765625e-05, + "model_forward_time": 0.02431941032409668, + "step": 15940 + }, + { + "epoch": 2.4322509765625e-05, + "step": 15940, + "training_step_time": 0.10401558876037598 + }, + { + "epoch": 2.432403564453125e-05, + "model_forward_time": 0.024582862854003906, + "step": 15941 + }, + { + "epoch": 2.432403564453125e-05, + "step": 15941, + "training_step_time": 0.10406661033630371 + }, + { + "epoch": 2.43255615234375e-05, + "model_forward_time": 0.026092052459716797, + "step": 15942 + }, + { + "epoch": 2.43255615234375e-05, + "step": 15942, + "training_step_time": 0.10527443885803223 + }, + { + "epoch": 2.432708740234375e-05, + "model_forward_time": 0.02536296844482422, + "step": 15943 + }, + { + "epoch": 2.432708740234375e-05, + "step": 15943, + "training_step_time": 0.1746978759765625 + }, + { + "epoch": 2.432861328125e-05, + "model_forward_time": 0.02556014060974121, + "step": 15944 + }, + { + "epoch": 2.432861328125e-05, + "step": 15944, + "training_step_time": 0.17778635025024414 + }, + { + "epoch": 2.433013916015625e-05, + "model_forward_time": 0.02498459815979004, + "step": 15945 + }, + { + "epoch": 2.433013916015625e-05, + "step": 15945, + "training_step_time": 0.19803738594055176 + }, + { + "epoch": 2.43316650390625e-05, + "model_forward_time": 0.025725603103637695, + "step": 15946 + }, + { + "epoch": 2.43316650390625e-05, + "step": 15946, + "training_step_time": 0.16976666450500488 + }, + { + "epoch": 2.433319091796875e-05, + "model_forward_time": 0.024913787841796875, + "step": 15947 + }, + { + "epoch": 2.433319091796875e-05, + "step": 15947, + "training_step_time": 0.1595759391784668 + }, + { + "epoch": 2.4334716796875e-05, + "model_forward_time": 0.024934768676757812, + "step": 15948 + }, + { + "epoch": 2.4334716796875e-05, + "step": 15948, + "training_step_time": 0.11550354957580566 + }, + { + "epoch": 2.433624267578125e-05, + "model_forward_time": 0.025606870651245117, + "step": 15949 + }, + { + "epoch": 2.433624267578125e-05, + "step": 15949, + "training_step_time": 0.13918447494506836 + }, + { + "epoch": 2.43377685546875e-05, + "grad_norm": 0.15143872797489166, + "learning_rate": 4.889777608035273e-05, + "loss": 0.0077, + "step": 15950 + }, + { + "epoch": 2.43377685546875e-05, + "model_forward_time": 0.025627613067626953, + "step": 15950 + }, + { + "epoch": 2.43377685546875e-05, + "step": 15950, + "training_step_time": 0.11227560043334961 + }, + { + "epoch": 2.433929443359375e-05, + "model_forward_time": 0.025737524032592773, + "step": 15951 + }, + { + "epoch": 2.433929443359375e-05, + "step": 15951, + "training_step_time": 0.17915630340576172 + }, + { + "epoch": 2.43408203125e-05, + "model_forward_time": 0.025168418884277344, + "step": 15952 + }, + { + "epoch": 2.43408203125e-05, + "step": 15952, + "training_step_time": 0.1336383819580078 + }, + { + "epoch": 2.434234619140625e-05, + "model_forward_time": 0.02490830421447754, + "step": 15953 + }, + { + "epoch": 2.434234619140625e-05, + "step": 15953, + "training_step_time": 0.11654448509216309 + }, + { + "epoch": 2.43438720703125e-05, + "model_forward_time": 0.02537083625793457, + "step": 15954 + }, + { + "epoch": 2.43438720703125e-05, + "step": 15954, + "training_step_time": 0.10313296318054199 + }, + { + "epoch": 2.434539794921875e-05, + "model_forward_time": 0.025387048721313477, + "step": 15955 + }, + { + "epoch": 2.434539794921875e-05, + "step": 15955, + "training_step_time": 0.10349178314208984 + }, + { + "epoch": 2.4346923828125e-05, + "model_forward_time": 0.02561783790588379, + "step": 15956 + }, + { + "epoch": 2.4346923828125e-05, + "step": 15956, + "training_step_time": 0.10423946380615234 + }, + { + "epoch": 2.434844970703125e-05, + "model_forward_time": 0.025623798370361328, + "step": 15957 + }, + { + "epoch": 2.434844970703125e-05, + "step": 15957, + "training_step_time": 0.10730814933776855 + }, + { + "epoch": 2.43499755859375e-05, + "model_forward_time": 0.02522563934326172, + "step": 15958 + }, + { + "epoch": 2.43499755859375e-05, + "step": 15958, + "training_step_time": 0.11627459526062012 + }, + { + "epoch": 2.435150146484375e-05, + "model_forward_time": 0.02457427978515625, + "step": 15959 + }, + { + "epoch": 2.435150146484375e-05, + "step": 15959, + "training_step_time": 0.1127774715423584 + }, + { + "epoch": 2.435302734375e-05, + "grad_norm": 0.13048915565013885, + "learning_rate": 4.884267449417931e-05, + "loss": 0.0109, + "step": 15960 + }, + { + "epoch": 2.435302734375e-05, + "model_forward_time": 0.026351451873779297, + "step": 15960 + }, + { + "epoch": 2.435302734375e-05, + "step": 15960, + "training_step_time": 0.11735105514526367 + }, + { + "epoch": 2.435455322265625e-05, + "model_forward_time": 0.025661468505859375, + "step": 15961 + }, + { + "epoch": 2.435455322265625e-05, + "step": 15961, + "training_step_time": 0.1125333309173584 + }, + { + "epoch": 2.43560791015625e-05, + "model_forward_time": 0.025895357131958008, + "step": 15962 + }, + { + "epoch": 2.43560791015625e-05, + "step": 15962, + "training_step_time": 0.1161501407623291 + }, + { + "epoch": 2.435760498046875e-05, + "model_forward_time": 0.025594472885131836, + "step": 15963 + }, + { + "epoch": 2.435760498046875e-05, + "step": 15963, + "training_step_time": 0.11189889907836914 + }, + { + "epoch": 2.4359130859375e-05, + "model_forward_time": 0.02572154998779297, + "step": 15964 + }, + { + "epoch": 2.4359130859375e-05, + "step": 15964, + "training_step_time": 0.11070418357849121 + }, + { + "epoch": 2.436065673828125e-05, + "model_forward_time": 0.025930404663085938, + "step": 15965 + }, + { + "epoch": 2.436065673828125e-05, + "step": 15965, + "training_step_time": 0.11066246032714844 + }, + { + "epoch": 2.43621826171875e-05, + "model_forward_time": 0.025638580322265625, + "step": 15966 + }, + { + "epoch": 2.43621826171875e-05, + "step": 15966, + "training_step_time": 0.1066124439239502 + }, + { + "epoch": 2.436370849609375e-05, + "model_forward_time": 0.02590799331665039, + "step": 15967 + }, + { + "epoch": 2.436370849609375e-05, + "step": 15967, + "training_step_time": 0.14440560340881348 + }, + { + "epoch": 2.4365234375e-05, + "model_forward_time": 0.026114702224731445, + "step": 15968 + }, + { + "epoch": 2.4365234375e-05, + "step": 15968, + "training_step_time": 0.14677739143371582 + }, + { + "epoch": 2.436676025390625e-05, + "model_forward_time": 0.025386333465576172, + "step": 15969 + }, + { + "epoch": 2.436676025390625e-05, + "step": 15969, + "training_step_time": 0.10649895668029785 + }, + { + "epoch": 2.43682861328125e-05, + "grad_norm": 0.4667963683605194, + "learning_rate": 4.878757431426551e-05, + "loss": 0.0173, + "step": 15970 + }, + { + "epoch": 2.43682861328125e-05, + "model_forward_time": 0.026465177536010742, + "step": 15970 + }, + { + "epoch": 2.43682861328125e-05, + "step": 15970, + "training_step_time": 0.10843157768249512 + }, + { + "epoch": 2.436981201171875e-05, + "model_forward_time": 0.025693416595458984, + "step": 15971 + }, + { + "epoch": 2.436981201171875e-05, + "step": 15971, + "training_step_time": 0.10870504379272461 + }, + { + "epoch": 2.4371337890625e-05, + "model_forward_time": 0.025734424591064453, + "step": 15972 + }, + { + "epoch": 2.4371337890625e-05, + "step": 15972, + "training_step_time": 0.10793256759643555 + }, + { + "epoch": 2.437286376953125e-05, + "model_forward_time": 0.025585412979125977, + "step": 15973 + }, + { + "epoch": 2.437286376953125e-05, + "step": 15973, + "training_step_time": 0.19218230247497559 + }, + { + "epoch": 2.43743896484375e-05, + "model_forward_time": 0.02491140365600586, + "step": 15974 + }, + { + "epoch": 2.43743896484375e-05, + "step": 15974, + "training_step_time": 0.10451340675354004 + }, + { + "epoch": 2.437591552734375e-05, + "model_forward_time": 0.027354001998901367, + "step": 15975 + }, + { + "epoch": 2.437591552734375e-05, + "step": 15975, + "training_step_time": 0.10729265213012695 + }, + { + "epoch": 2.437744140625e-05, + "model_forward_time": 0.025938987731933594, + "step": 15976 + }, + { + "epoch": 2.437744140625e-05, + "step": 15976, + "training_step_time": 0.10894656181335449 + }, + { + "epoch": 2.437896728515625e-05, + "model_forward_time": 0.02557539939880371, + "step": 15977 + }, + { + "epoch": 2.437896728515625e-05, + "step": 15977, + "training_step_time": 0.10466217994689941 + }, + { + "epoch": 2.43804931640625e-05, + "model_forward_time": 0.02507948875427246, + "step": 15978 + }, + { + "epoch": 2.43804931640625e-05, + "step": 15978, + "training_step_time": 0.10676407814025879 + }, + { + "epoch": 2.438201904296875e-05, + "model_forward_time": 0.025479555130004883, + "step": 15979 + }, + { + "epoch": 2.438201904296875e-05, + "step": 15979, + "training_step_time": 0.10506010055541992 + }, + { + "epoch": 2.4383544921875e-05, + "grad_norm": 0.29334574937820435, + "learning_rate": 4.873247560756324e-05, + "loss": 0.0098, + "step": 15980 + }, + { + "epoch": 2.4383544921875e-05, + "model_forward_time": 0.025799274444580078, + "step": 15980 + }, + { + "epoch": 2.4383544921875e-05, + "step": 15980, + "training_step_time": 0.10599017143249512 + }, + { + "epoch": 2.438507080078125e-05, + "model_forward_time": 0.026062488555908203, + "step": 15981 + }, + { + "epoch": 2.438507080078125e-05, + "step": 15981, + "training_step_time": 0.1067500114440918 + }, + { + "epoch": 2.43865966796875e-05, + "model_forward_time": 0.025292396545410156, + "step": 15982 + }, + { + "epoch": 2.43865966796875e-05, + "step": 15982, + "training_step_time": 0.1045982837677002 + }, + { + "epoch": 2.438812255859375e-05, + "model_forward_time": 0.02561354637145996, + "step": 15983 + }, + { + "epoch": 2.438812255859375e-05, + "step": 15983, + "training_step_time": 0.10487008094787598 + }, + { + "epoch": 2.43896484375e-05, + "model_forward_time": 0.026061296463012695, + "step": 15984 + }, + { + "epoch": 2.43896484375e-05, + "step": 15984, + "training_step_time": 0.10548281669616699 + }, + { + "epoch": 2.439117431640625e-05, + "model_forward_time": 0.025626659393310547, + "step": 15985 + }, + { + "epoch": 2.439117431640625e-05, + "step": 15985, + "training_step_time": 0.10392022132873535 + }, + { + "epoch": 2.43927001953125e-05, + "model_forward_time": 0.02581048011779785, + "step": 15986 + }, + { + "epoch": 2.43927001953125e-05, + "step": 15986, + "training_step_time": 0.10550999641418457 + }, + { + "epoch": 2.439422607421875e-05, + "model_forward_time": 0.025572538375854492, + "step": 15987 + }, + { + "epoch": 2.439422607421875e-05, + "step": 15987, + "training_step_time": 0.10541439056396484 + }, + { + "epoch": 2.4395751953125e-05, + "model_forward_time": 0.025936603546142578, + "step": 15988 + }, + { + "epoch": 2.4395751953125e-05, + "step": 15988, + "training_step_time": 0.10556578636169434 + }, + { + "epoch": 2.439727783203125e-05, + "model_forward_time": 0.02664804458618164, + "step": 15989 + }, + { + "epoch": 2.439727783203125e-05, + "step": 15989, + "training_step_time": 0.10661935806274414 + }, + { + "epoch": 2.43988037109375e-05, + "grad_norm": 0.17938953638076782, + "learning_rate": 4.867737844102261e-05, + "loss": 0.0184, + "step": 15990 + }, + { + "epoch": 2.43988037109375e-05, + "model_forward_time": 0.02633047103881836, + "step": 15990 + }, + { + "epoch": 2.43988037109375e-05, + "step": 15990, + "training_step_time": 0.18438220024108887 + }, + { + "epoch": 2.440032958984375e-05, + "model_forward_time": 0.0251009464263916, + "step": 15991 + }, + { + "epoch": 2.440032958984375e-05, + "step": 15991, + "training_step_time": 0.12546944618225098 + }, + { + "epoch": 2.440185546875e-05, + "model_forward_time": 0.02520918846130371, + "step": 15992 + }, + { + "epoch": 2.440185546875e-05, + "step": 15992, + "training_step_time": 0.11509394645690918 + }, + { + "epoch": 2.440338134765625e-05, + "model_forward_time": 0.026113033294677734, + "step": 15993 + }, + { + "epoch": 2.440338134765625e-05, + "step": 15993, + "training_step_time": 0.1378002166748047 + }, + { + "epoch": 2.44049072265625e-05, + "model_forward_time": 0.025772571563720703, + "step": 15994 + }, + { + "epoch": 2.44049072265625e-05, + "step": 15994, + "training_step_time": 0.10792136192321777 + }, + { + "epoch": 2.440643310546875e-05, + "model_forward_time": 0.02566218376159668, + "step": 15995 + }, + { + "epoch": 2.440643310546875e-05, + "step": 15995, + "training_step_time": 0.20623564720153809 + }, + { + "epoch": 2.4407958984375e-05, + "model_forward_time": 0.025394916534423828, + "step": 15996 + }, + { + "epoch": 2.4407958984375e-05, + "step": 15996, + "training_step_time": 0.11642718315124512 + }, + { + "epoch": 2.440948486328125e-05, + "model_forward_time": 0.025074005126953125, + "step": 15997 + }, + { + "epoch": 2.440948486328125e-05, + "step": 15997, + "training_step_time": 0.1303861141204834 + }, + { + "epoch": 2.44110107421875e-05, + "model_forward_time": 0.025536537170410156, + "step": 15998 + }, + { + "epoch": 2.44110107421875e-05, + "step": 15998, + "training_step_time": 0.1416034698486328 + }, + { + "epoch": 2.441253662109375e-05, + "model_forward_time": 0.02530837059020996, + "step": 15999 + }, + { + "epoch": 2.441253662109375e-05, + "step": 15999, + "training_step_time": 0.11546540260314941 + }, + { + "epoch": 2.44140625e-05, + "grad_norm": 0.31458476185798645, + "learning_rate": 4.8622282881591906e-05, + "loss": 0.0117, + "step": 16000 + }, + { + "epoch": 2.44140625e-05, + "model_forward_time": 0.026625871658325195, + "step": 16000 + }, + { + "epoch": 2.44140625e-05, + "step": 16000, + "training_step_time": 0.11011123657226562 + }, + { + "epoch": 2.441558837890625e-05, + "model_forward_time": 0.023554325103759766, + "step": 16001 + }, + { + "epoch": 2.441558837890625e-05, + "step": 16001, + "training_step_time": 0.15520930290222168 + }, + { + "epoch": 2.44171142578125e-05, + "model_forward_time": 0.024297237396240234, + "step": 16002 + }, + { + "epoch": 2.44171142578125e-05, + "step": 16002, + "training_step_time": 0.13415908813476562 + }, + { + "epoch": 2.441864013671875e-05, + "model_forward_time": 0.02445244789123535, + "step": 16003 + }, + { + "epoch": 2.441864013671875e-05, + "step": 16003, + "training_step_time": 0.16687917709350586 + }, + { + "epoch": 2.4420166015625e-05, + "model_forward_time": 0.024739980697631836, + "step": 16004 + }, + { + "epoch": 2.4420166015625e-05, + "step": 16004, + "training_step_time": 0.13107967376708984 + }, + { + "epoch": 2.442169189453125e-05, + "model_forward_time": 0.024922609329223633, + "step": 16005 + }, + { + "epoch": 2.442169189453125e-05, + "step": 16005, + "training_step_time": 0.11837315559387207 + }, + { + "epoch": 2.44232177734375e-05, + "model_forward_time": 0.025043964385986328, + "step": 16006 + }, + { + "epoch": 2.44232177734375e-05, + "step": 16006, + "training_step_time": 0.14241456985473633 + }, + { + "epoch": 2.442474365234375e-05, + "model_forward_time": 0.02635049819946289, + "step": 16007 + }, + { + "epoch": 2.442474365234375e-05, + "step": 16007, + "training_step_time": 0.1542069911956787 + }, + { + "epoch": 2.442626953125e-05, + "model_forward_time": 0.024143218994140625, + "step": 16008 + }, + { + "epoch": 2.442626953125e-05, + "step": 16008, + "training_step_time": 0.22746992111206055 + }, + { + "epoch": 2.442779541015625e-05, + "model_forward_time": 0.02458500862121582, + "step": 16009 + }, + { + "epoch": 2.442779541015625e-05, + "step": 16009, + "training_step_time": 0.1101388931274414 + }, + { + "epoch": 2.44293212890625e-05, + "grad_norm": 0.28905314207077026, + "learning_rate": 4.85671889962174e-05, + "loss": 0.0126, + "step": 16010 + }, + { + "epoch": 2.44293212890625e-05, + "model_forward_time": 0.024885177612304688, + "step": 16010 + }, + { + "epoch": 2.44293212890625e-05, + "step": 16010, + "training_step_time": 0.1119379997253418 + }, + { + "epoch": 2.443084716796875e-05, + "model_forward_time": 0.025030851364135742, + "step": 16011 + }, + { + "epoch": 2.443084716796875e-05, + "step": 16011, + "training_step_time": 0.10663294792175293 + }, + { + "epoch": 2.4432373046875e-05, + "model_forward_time": 0.02551102638244629, + "step": 16012 + }, + { + "epoch": 2.4432373046875e-05, + "step": 16012, + "training_step_time": 0.10792016983032227 + }, + { + "epoch": 2.443389892578125e-05, + "model_forward_time": 0.02524113655090332, + "step": 16013 + }, + { + "epoch": 2.443389892578125e-05, + "step": 16013, + "training_step_time": 0.11145853996276855 + }, + { + "epoch": 2.44354248046875e-05, + "model_forward_time": 0.02544546127319336, + "step": 16014 + }, + { + "epoch": 2.44354248046875e-05, + "step": 16014, + "training_step_time": 0.10948348045349121 + }, + { + "epoch": 2.443695068359375e-05, + "model_forward_time": 0.025015830993652344, + "step": 16015 + }, + { + "epoch": 2.443695068359375e-05, + "step": 16015, + "training_step_time": 0.10814213752746582 + }, + { + "epoch": 2.44384765625e-05, + "model_forward_time": 0.02673625946044922, + "step": 16016 + }, + { + "epoch": 2.44384765625e-05, + "step": 16016, + "training_step_time": 0.11029171943664551 + }, + { + "epoch": 2.444000244140625e-05, + "model_forward_time": 0.02514338493347168, + "step": 16017 + }, + { + "epoch": 2.444000244140625e-05, + "step": 16017, + "training_step_time": 0.10781049728393555 + }, + { + "epoch": 2.44415283203125e-05, + "model_forward_time": 0.02511143684387207, + "step": 16018 + }, + { + "epoch": 2.44415283203125e-05, + "step": 16018, + "training_step_time": 0.10732817649841309 + }, + { + "epoch": 2.444305419921875e-05, + "model_forward_time": 0.02526378631591797, + "step": 16019 + }, + { + "epoch": 2.444305419921875e-05, + "step": 16019, + "training_step_time": 0.1095438003540039 + }, + { + "epoch": 2.4444580078125e-05, + "grad_norm": 0.22905737161636353, + "learning_rate": 4.851209685184338e-05, + "loss": 0.0104, + "step": 16020 + }, + { + "epoch": 2.4444580078125e-05, + "model_forward_time": 0.024835824966430664, + "step": 16020 + }, + { + "epoch": 2.4444580078125e-05, + "step": 16020, + "training_step_time": 0.10628747940063477 + }, + { + "epoch": 2.444610595703125e-05, + "model_forward_time": 0.024904966354370117, + "step": 16021 + }, + { + "epoch": 2.444610595703125e-05, + "step": 16021, + "training_step_time": 0.10562610626220703 + }, + { + "epoch": 2.44476318359375e-05, + "model_forward_time": 0.02544426918029785, + "step": 16022 + }, + { + "epoch": 2.44476318359375e-05, + "step": 16022, + "training_step_time": 0.10908746719360352 + }, + { + "epoch": 2.444915771484375e-05, + "model_forward_time": 0.025531768798828125, + "step": 16023 + }, + { + "epoch": 2.444915771484375e-05, + "step": 16023, + "training_step_time": 0.18296432495117188 + }, + { + "epoch": 2.445068359375e-05, + "model_forward_time": 0.02452850341796875, + "step": 16024 + }, + { + "epoch": 2.445068359375e-05, + "step": 16024, + "training_step_time": 0.20955276489257812 + }, + { + "epoch": 2.445220947265625e-05, + "model_forward_time": 0.02400803565979004, + "step": 16025 + }, + { + "epoch": 2.445220947265625e-05, + "step": 16025, + "training_step_time": 0.2079789638519287 + }, + { + "epoch": 2.44537353515625e-05, + "model_forward_time": 0.023763179779052734, + "step": 16026 + }, + { + "epoch": 2.44537353515625e-05, + "step": 16026, + "training_step_time": 0.20606422424316406 + }, + { + "epoch": 2.445526123046875e-05, + "model_forward_time": 0.024510860443115234, + "step": 16027 + }, + { + "epoch": 2.445526123046875e-05, + "step": 16027, + "training_step_time": 0.1596519947052002 + }, + { + "epoch": 2.4456787109375e-05, + "model_forward_time": 0.02387404441833496, + "step": 16028 + }, + { + "epoch": 2.4456787109375e-05, + "step": 16028, + "training_step_time": 0.1474475860595703 + }, + { + "epoch": 2.445831298828125e-05, + "model_forward_time": 0.0247194766998291, + "step": 16029 + }, + { + "epoch": 2.445831298828125e-05, + "step": 16029, + "training_step_time": 0.1114950180053711 + }, + { + "epoch": 2.44598388671875e-05, + "grad_norm": 0.12559278309345245, + "learning_rate": 4.845700651541199e-05, + "loss": 0.0129, + "step": 16030 + }, + { + "epoch": 2.44598388671875e-05, + "model_forward_time": 0.024735689163208008, + "step": 16030 + }, + { + "epoch": 2.44598388671875e-05, + "step": 16030, + "training_step_time": 0.10233592987060547 + }, + { + "epoch": 2.446136474609375e-05, + "model_forward_time": 0.024773120880126953, + "step": 16031 + }, + { + "epoch": 2.446136474609375e-05, + "step": 16031, + "training_step_time": 0.10128474235534668 + }, + { + "epoch": 2.4462890625e-05, + "model_forward_time": 0.024929523468017578, + "step": 16032 + }, + { + "epoch": 2.4462890625e-05, + "step": 16032, + "training_step_time": 0.10394048690795898 + }, + { + "epoch": 2.446441650390625e-05, + "model_forward_time": 0.02707695960998535, + "step": 16033 + }, + { + "epoch": 2.446441650390625e-05, + "step": 16033, + "training_step_time": 0.1057279109954834 + }, + { + "epoch": 2.44659423828125e-05, + "model_forward_time": 0.025636911392211914, + "step": 16034 + }, + { + "epoch": 2.44659423828125e-05, + "step": 16034, + "training_step_time": 0.10531949996948242 + }, + { + "epoch": 2.446746826171875e-05, + "model_forward_time": 0.025171518325805664, + "step": 16035 + }, + { + "epoch": 2.446746826171875e-05, + "step": 16035, + "training_step_time": 0.1037743091583252 + }, + { + "epoch": 2.4468994140625e-05, + "model_forward_time": 0.02550506591796875, + "step": 16036 + }, + { + "epoch": 2.4468994140625e-05, + "step": 16036, + "training_step_time": 0.10723590850830078 + }, + { + "epoch": 2.447052001953125e-05, + "model_forward_time": 0.025185585021972656, + "step": 16037 + }, + { + "epoch": 2.447052001953125e-05, + "step": 16037, + "training_step_time": 0.1066126823425293 + }, + { + "epoch": 2.44720458984375e-05, + "model_forward_time": 0.02517557144165039, + "step": 16038 + }, + { + "epoch": 2.44720458984375e-05, + "step": 16038, + "training_step_time": 0.10784101486206055 + }, + { + "epoch": 2.447357177734375e-05, + "model_forward_time": 0.02515125274658203, + "step": 16039 + }, + { + "epoch": 2.447357177734375e-05, + "step": 16039, + "training_step_time": 0.10886144638061523 + }, + { + "epoch": 2.447509765625e-05, + "grad_norm": 0.17869317531585693, + "learning_rate": 4.840191805386315e-05, + "loss": 0.0105, + "step": 16040 + }, + { + "epoch": 2.447509765625e-05, + "model_forward_time": 0.024691104888916016, + "step": 16040 + }, + { + "epoch": 2.447509765625e-05, + "step": 16040, + "training_step_time": 0.10958528518676758 + }, + { + "epoch": 2.447662353515625e-05, + "model_forward_time": 0.025226354598999023, + "step": 16041 + }, + { + "epoch": 2.447662353515625e-05, + "step": 16041, + "training_step_time": 0.10772013664245605 + }, + { + "epoch": 2.44781494140625e-05, + "model_forward_time": 0.025742292404174805, + "step": 16042 + }, + { + "epoch": 2.44781494140625e-05, + "step": 16042, + "training_step_time": 0.10521101951599121 + }, + { + "epoch": 2.447967529296875e-05, + "model_forward_time": 0.024721622467041016, + "step": 16043 + }, + { + "epoch": 2.447967529296875e-05, + "step": 16043, + "training_step_time": 0.11118674278259277 + }, + { + "epoch": 2.4481201171875e-05, + "model_forward_time": 0.024915456771850586, + "step": 16044 + }, + { + "epoch": 2.4481201171875e-05, + "step": 16044, + "training_step_time": 0.14704394340515137 + }, + { + "epoch": 2.448272705078125e-05, + "model_forward_time": 0.024907827377319336, + "step": 16045 + }, + { + "epoch": 2.448272705078125e-05, + "step": 16045, + "training_step_time": 0.142988920211792 + }, + { + "epoch": 2.44842529296875e-05, + "model_forward_time": 0.02510213851928711, + "step": 16046 + }, + { + "epoch": 2.44842529296875e-05, + "step": 16046, + "training_step_time": 0.19709253311157227 + }, + { + "epoch": 2.448577880859375e-05, + "model_forward_time": 0.0263364315032959, + "step": 16047 + }, + { + "epoch": 2.448577880859375e-05, + "step": 16047, + "training_step_time": 0.14141154289245605 + }, + { + "epoch": 2.44873046875e-05, + "model_forward_time": 0.024976491928100586, + "step": 16048 + }, + { + "epoch": 2.44873046875e-05, + "step": 16048, + "training_step_time": 0.13085460662841797 + }, + { + "epoch": 2.448883056640625e-05, + "model_forward_time": 0.02457737922668457, + "step": 16049 + }, + { + "epoch": 2.448883056640625e-05, + "step": 16049, + "training_step_time": 0.11881160736083984 + }, + { + "epoch": 2.44903564453125e-05, + "grad_norm": 0.19629864394664764, + "learning_rate": 4.834683153413459e-05, + "loss": 0.0104, + "step": 16050 + }, + { + "epoch": 2.44903564453125e-05, + "model_forward_time": 0.025118589401245117, + "step": 16050 + }, + { + "epoch": 2.44903564453125e-05, + "step": 16050, + "training_step_time": 0.13252973556518555 + }, + { + "epoch": 2.449188232421875e-05, + "model_forward_time": 0.024988412857055664, + "step": 16051 + }, + { + "epoch": 2.449188232421875e-05, + "step": 16051, + "training_step_time": 0.1635758876800537 + }, + { + "epoch": 2.4493408203125e-05, + "model_forward_time": 0.024254560470581055, + "step": 16052 + }, + { + "epoch": 2.4493408203125e-05, + "step": 16052, + "training_step_time": 0.22408628463745117 + }, + { + "epoch": 2.449493408203125e-05, + "model_forward_time": 0.02434825897216797, + "step": 16053 + }, + { + "epoch": 2.449493408203125e-05, + "step": 16053, + "training_step_time": 0.10737228393554688 + }, + { + "epoch": 2.44964599609375e-05, + "model_forward_time": 0.024283409118652344, + "step": 16054 + }, + { + "epoch": 2.44964599609375e-05, + "step": 16054, + "training_step_time": 0.1102597713470459 + }, + { + "epoch": 2.449798583984375e-05, + "model_forward_time": 0.0248873233795166, + "step": 16055 + }, + { + "epoch": 2.449798583984375e-05, + "step": 16055, + "training_step_time": 0.10745763778686523 + }, + { + "epoch": 2.449951171875e-05, + "model_forward_time": 0.025278568267822266, + "step": 16056 + }, + { + "epoch": 2.449951171875e-05, + "step": 16056, + "training_step_time": 0.10583353042602539 + }, + { + "epoch": 2.450103759765625e-05, + "model_forward_time": 0.025668859481811523, + "step": 16057 + }, + { + "epoch": 2.450103759765625e-05, + "step": 16057, + "training_step_time": 0.1065981388092041 + }, + { + "epoch": 2.45025634765625e-05, + "model_forward_time": 0.025279998779296875, + "step": 16058 + }, + { + "epoch": 2.45025634765625e-05, + "step": 16058, + "training_step_time": 0.1078941822052002 + }, + { + "epoch": 2.450408935546875e-05, + "model_forward_time": 0.02539801597595215, + "step": 16059 + }, + { + "epoch": 2.450408935546875e-05, + "step": 16059, + "training_step_time": 0.10501527786254883 + }, + { + "epoch": 2.4505615234375e-05, + "grad_norm": 0.2447129338979721, + "learning_rate": 4.82917470231616e-05, + "loss": 0.0103, + "step": 16060 + }, + { + "epoch": 2.4505615234375e-05, + "model_forward_time": 0.02509760856628418, + "step": 16060 + }, + { + "epoch": 2.4505615234375e-05, + "step": 16060, + "training_step_time": 0.10478544235229492 + }, + { + "epoch": 2.450714111328125e-05, + "model_forward_time": 0.024875402450561523, + "step": 16061 + }, + { + "epoch": 2.450714111328125e-05, + "step": 16061, + "training_step_time": 0.10678672790527344 + }, + { + "epoch": 2.45086669921875e-05, + "model_forward_time": 0.024808168411254883, + "step": 16062 + }, + { + "epoch": 2.45086669921875e-05, + "step": 16062, + "training_step_time": 0.10939908027648926 + }, + { + "epoch": 2.451019287109375e-05, + "model_forward_time": 0.02469491958618164, + "step": 16063 + }, + { + "epoch": 2.451019287109375e-05, + "step": 16063, + "training_step_time": 0.1053767204284668 + }, + { + "epoch": 2.451171875e-05, + "model_forward_time": 0.025281429290771484, + "step": 16064 + }, + { + "epoch": 2.451171875e-05, + "step": 16064, + "training_step_time": 0.10822534561157227 + }, + { + "epoch": 2.451324462890625e-05, + "model_forward_time": 0.024911165237426758, + "step": 16065 + }, + { + "epoch": 2.451324462890625e-05, + "step": 16065, + "training_step_time": 0.10770344734191895 + }, + { + "epoch": 2.45147705078125e-05, + "model_forward_time": 0.025319576263427734, + "step": 16066 + }, + { + "epoch": 2.45147705078125e-05, + "step": 16066, + "training_step_time": 0.1389482021331787 + }, + { + "epoch": 2.451629638671875e-05, + "model_forward_time": 0.025473594665527344, + "step": 16067 + }, + { + "epoch": 2.451629638671875e-05, + "step": 16067, + "training_step_time": 0.1396324634552002 + }, + { + "epoch": 2.4517822265625e-05, + "model_forward_time": 0.02412867546081543, + "step": 16068 + }, + { + "epoch": 2.4517822265625e-05, + "step": 16068, + "training_step_time": 0.10547018051147461 + }, + { + "epoch": 2.451934814453125e-05, + "model_forward_time": 0.024769306182861328, + "step": 16069 + }, + { + "epoch": 2.451934814453125e-05, + "step": 16069, + "training_step_time": 0.1202085018157959 + }, + { + "epoch": 2.45208740234375e-05, + "grad_norm": 0.34994441270828247, + "learning_rate": 4.823666458787705e-05, + "loss": 0.0098, + "step": 16070 + }, + { + "epoch": 2.45208740234375e-05, + "model_forward_time": 0.025077104568481445, + "step": 16070 + }, + { + "epoch": 2.45208740234375e-05, + "step": 16070, + "training_step_time": 0.11086845397949219 + }, + { + "epoch": 2.452239990234375e-05, + "model_forward_time": 0.025316476821899414, + "step": 16071 + }, + { + "epoch": 2.452239990234375e-05, + "step": 16071, + "training_step_time": 0.10955166816711426 + }, + { + "epoch": 2.452392578125e-05, + "model_forward_time": 0.025339365005493164, + "step": 16072 + }, + { + "epoch": 2.452392578125e-05, + "step": 16072, + "training_step_time": 0.20117926597595215 + }, + { + "epoch": 2.452545166015625e-05, + "model_forward_time": 0.02426004409790039, + "step": 16073 + }, + { + "epoch": 2.452545166015625e-05, + "step": 16073, + "training_step_time": 0.1071014404296875 + }, + { + "epoch": 2.45269775390625e-05, + "model_forward_time": 0.024445772171020508, + "step": 16074 + }, + { + "epoch": 2.45269775390625e-05, + "step": 16074, + "training_step_time": 0.10209822654724121 + }, + { + "epoch": 2.452850341796875e-05, + "model_forward_time": 0.024713754653930664, + "step": 16075 + }, + { + "epoch": 2.452850341796875e-05, + "step": 16075, + "training_step_time": 0.1036672592163086 + }, + { + "epoch": 2.4530029296875e-05, + "model_forward_time": 0.02493453025817871, + "step": 16076 + }, + { + "epoch": 2.4530029296875e-05, + "step": 16076, + "training_step_time": 0.10753417015075684 + }, + { + "epoch": 2.453155517578125e-05, + "model_forward_time": 0.025128841400146484, + "step": 16077 + }, + { + "epoch": 2.453155517578125e-05, + "step": 16077, + "training_step_time": 0.1046915054321289 + }, + { + "epoch": 2.45330810546875e-05, + "model_forward_time": 0.025221586227416992, + "step": 16078 + }, + { + "epoch": 2.45330810546875e-05, + "step": 16078, + "training_step_time": 0.10518550872802734 + }, + { + "epoch": 2.453460693359375e-05, + "model_forward_time": 0.025501728057861328, + "step": 16079 + }, + { + "epoch": 2.453460693359375e-05, + "step": 16079, + "training_step_time": 0.10677289962768555 + }, + { + "epoch": 2.45361328125e-05, + "grad_norm": 0.3603598177433014, + "learning_rate": 4.818158429521129e-05, + "loss": 0.0118, + "step": 16080 + }, + { + "epoch": 2.45361328125e-05, + "model_forward_time": 0.0250699520111084, + "step": 16080 + }, + { + "epoch": 2.45361328125e-05, + "step": 16080, + "training_step_time": 0.10611152648925781 + }, + { + "epoch": 2.453765869140625e-05, + "model_forward_time": 0.0255281925201416, + "step": 16081 + }, + { + "epoch": 2.453765869140625e-05, + "step": 16081, + "training_step_time": 0.1064307689666748 + }, + { + "epoch": 2.45391845703125e-05, + "model_forward_time": 0.024738073348999023, + "step": 16082 + }, + { + "epoch": 2.45391845703125e-05, + "step": 16082, + "training_step_time": 0.10457611083984375 + }, + { + "epoch": 2.454071044921875e-05, + "model_forward_time": 0.025248050689697266, + "step": 16083 + }, + { + "epoch": 2.454071044921875e-05, + "step": 16083, + "training_step_time": 0.10298562049865723 + }, + { + "epoch": 2.4542236328125e-05, + "model_forward_time": 0.025495290756225586, + "step": 16084 + }, + { + "epoch": 2.4542236328125e-05, + "step": 16084, + "training_step_time": 0.10384058952331543 + }, + { + "epoch": 2.454376220703125e-05, + "model_forward_time": 0.025461196899414062, + "step": 16085 + }, + { + "epoch": 2.454376220703125e-05, + "step": 16085, + "training_step_time": 0.10553216934204102 + }, + { + "epoch": 2.45452880859375e-05, + "model_forward_time": 0.025313377380371094, + "step": 16086 + }, + { + "epoch": 2.45452880859375e-05, + "step": 16086, + "training_step_time": 0.10474443435668945 + }, + { + "epoch": 2.454681396484375e-05, + "model_forward_time": 0.025262117385864258, + "step": 16087 + }, + { + "epoch": 2.454681396484375e-05, + "step": 16087, + "training_step_time": 0.11038088798522949 + }, + { + "epoch": 2.454833984375e-05, + "model_forward_time": 0.025281906127929688, + "step": 16088 + }, + { + "epoch": 2.454833984375e-05, + "step": 16088, + "training_step_time": 0.12487149238586426 + }, + { + "epoch": 2.454986572265625e-05, + "model_forward_time": 0.025722265243530273, + "step": 16089 + }, + { + "epoch": 2.454986572265625e-05, + "step": 16089, + "training_step_time": 0.10833883285522461 + }, + { + "epoch": 2.45513916015625e-05, + "grad_norm": 0.3049020767211914, + "learning_rate": 4.812650621209209e-05, + "loss": 0.0128, + "step": 16090 + }, + { + "epoch": 2.45513916015625e-05, + "model_forward_time": 0.028198957443237305, + "step": 16090 + }, + { + "epoch": 2.45513916015625e-05, + "step": 16090, + "training_step_time": 0.14754581451416016 + }, + { + "epoch": 2.455291748046875e-05, + "model_forward_time": 0.024680614471435547, + "step": 16091 + }, + { + "epoch": 2.455291748046875e-05, + "step": 16091, + "training_step_time": 0.18529558181762695 + }, + { + "epoch": 2.4554443359375e-05, + "model_forward_time": 0.02428436279296875, + "step": 16092 + }, + { + "epoch": 2.4554443359375e-05, + "step": 16092, + "training_step_time": 0.224656343460083 + }, + { + "epoch": 2.455596923828125e-05, + "model_forward_time": 0.02434372901916504, + "step": 16093 + }, + { + "epoch": 2.455596923828125e-05, + "step": 16093, + "training_step_time": 0.12918615341186523 + }, + { + "epoch": 2.45574951171875e-05, + "model_forward_time": 0.024224281311035156, + "step": 16094 + }, + { + "epoch": 2.45574951171875e-05, + "step": 16094, + "training_step_time": 0.21254897117614746 + }, + { + "epoch": 2.455902099609375e-05, + "model_forward_time": 0.02459120750427246, + "step": 16095 + }, + { + "epoch": 2.455902099609375e-05, + "step": 16095, + "training_step_time": 0.1494593620300293 + }, + { + "epoch": 2.4560546875e-05, + "model_forward_time": 0.0243682861328125, + "step": 16096 + }, + { + "epoch": 2.4560546875e-05, + "step": 16096, + "training_step_time": 0.1251683235168457 + }, + { + "epoch": 2.456207275390625e-05, + "model_forward_time": 0.02452540397644043, + "step": 16097 + }, + { + "epoch": 2.456207275390625e-05, + "step": 16097, + "training_step_time": 0.14014649391174316 + }, + { + "epoch": 2.45635986328125e-05, + "model_forward_time": 0.024792909622192383, + "step": 16098 + }, + { + "epoch": 2.45635986328125e-05, + "step": 16098, + "training_step_time": 0.2160184383392334 + }, + { + "epoch": 2.456512451171875e-05, + "model_forward_time": 0.024190187454223633, + "step": 16099 + }, + { + "epoch": 2.456512451171875e-05, + "step": 16099, + "training_step_time": 0.10810136795043945 + }, + { + "epoch": 2.4566650390625e-05, + "grad_norm": 0.2999719977378845, + "learning_rate": 4.8071430405444474e-05, + "loss": 0.0142, + "step": 16100 + }, + { + "epoch": 2.4566650390625e-05, + "model_forward_time": 0.024098873138427734, + "step": 16100 + }, + { + "epoch": 2.4566650390625e-05, + "step": 16100, + "training_step_time": 0.10593509674072266 + }, + { + "epoch": 2.456817626953125e-05, + "model_forward_time": 0.025705575942993164, + "step": 16101 + }, + { + "epoch": 2.456817626953125e-05, + "step": 16101, + "training_step_time": 0.11299729347229004 + }, + { + "epoch": 2.45697021484375e-05, + "model_forward_time": 0.02547454833984375, + "step": 16102 + }, + { + "epoch": 2.45697021484375e-05, + "step": 16102, + "training_step_time": 0.1086578369140625 + }, + { + "epoch": 2.457122802734375e-05, + "model_forward_time": 0.024875402450561523, + "step": 16103 + }, + { + "epoch": 2.457122802734375e-05, + "step": 16103, + "training_step_time": 0.10395574569702148 + }, + { + "epoch": 2.457275390625e-05, + "model_forward_time": 0.025222063064575195, + "step": 16104 + }, + { + "epoch": 2.457275390625e-05, + "step": 16104, + "training_step_time": 0.1070709228515625 + }, + { + "epoch": 2.457427978515625e-05, + "model_forward_time": 0.025050640106201172, + "step": 16105 + }, + { + "epoch": 2.457427978515625e-05, + "step": 16105, + "training_step_time": 0.10502123832702637 + }, + { + "epoch": 2.45758056640625e-05, + "model_forward_time": 0.02514481544494629, + "step": 16106 + }, + { + "epoch": 2.45758056640625e-05, + "step": 16106, + "training_step_time": 0.10582542419433594 + }, + { + "epoch": 2.457733154296875e-05, + "model_forward_time": 0.024735689163208008, + "step": 16107 + }, + { + "epoch": 2.457733154296875e-05, + "step": 16107, + "training_step_time": 0.10391736030578613 + }, + { + "epoch": 2.4578857421875e-05, + "model_forward_time": 0.027152538299560547, + "step": 16108 + }, + { + "epoch": 2.4578857421875e-05, + "step": 16108, + "training_step_time": 0.11144232749938965 + }, + { + "epoch": 2.458038330078125e-05, + "model_forward_time": 0.0252993106842041, + "step": 16109 + }, + { + "epoch": 2.458038330078125e-05, + "step": 16109, + "training_step_time": 0.10557746887207031 + }, + { + "epoch": 2.45819091796875e-05, + "grad_norm": 0.2928496301174164, + "learning_rate": 4.801635694219079e-05, + "loss": 0.0138, + "step": 16110 + }, + { + "epoch": 2.45819091796875e-05, + "model_forward_time": 0.028730392456054688, + "step": 16110 + }, + { + "epoch": 2.45819091796875e-05, + "step": 16110, + "training_step_time": 0.10891079902648926 + }, + { + "epoch": 2.458343505859375e-05, + "model_forward_time": 0.0252683162689209, + "step": 16111 + }, + { + "epoch": 2.458343505859375e-05, + "step": 16111, + "training_step_time": 0.1058034896850586 + }, + { + "epoch": 2.45849609375e-05, + "model_forward_time": 0.025069713592529297, + "step": 16112 + }, + { + "epoch": 2.45849609375e-05, + "step": 16112, + "training_step_time": 0.14451146125793457 + }, + { + "epoch": 2.458648681640625e-05, + "model_forward_time": 0.02535080909729004, + "step": 16113 + }, + { + "epoch": 2.458648681640625e-05, + "step": 16113, + "training_step_time": 0.13773822784423828 + }, + { + "epoch": 2.45880126953125e-05, + "model_forward_time": 0.025204181671142578, + "step": 16114 + }, + { + "epoch": 2.45880126953125e-05, + "step": 16114, + "training_step_time": 0.1137995719909668 + }, + { + "epoch": 2.458953857421875e-05, + "model_forward_time": 0.02487349510192871, + "step": 16115 + }, + { + "epoch": 2.458953857421875e-05, + "step": 16115, + "training_step_time": 0.1198887825012207 + }, + { + "epoch": 2.4591064453125e-05, + "model_forward_time": 0.024190902709960938, + "step": 16116 + }, + { + "epoch": 2.4591064453125e-05, + "step": 16116, + "training_step_time": 0.12017822265625 + }, + { + "epoch": 2.459259033203125e-05, + "model_forward_time": 0.024064302444458008, + "step": 16117 + }, + { + "epoch": 2.459259033203125e-05, + "step": 16117, + "training_step_time": 0.18487882614135742 + }, + { + "epoch": 2.45941162109375e-05, + "model_forward_time": 0.02485513687133789, + "step": 16118 + }, + { + "epoch": 2.45941162109375e-05, + "step": 16118, + "training_step_time": 0.12007832527160645 + }, + { + "epoch": 2.459564208984375e-05, + "model_forward_time": 0.024615764617919922, + "step": 16119 + }, + { + "epoch": 2.459564208984375e-05, + "step": 16119, + "training_step_time": 0.10958337783813477 + }, + { + "epoch": 2.459716796875e-05, + "grad_norm": 0.33059120178222656, + "learning_rate": 4.7961285889250475e-05, + "loss": 0.0176, + "step": 16120 + }, + { + "epoch": 2.459716796875e-05, + "model_forward_time": 0.025120973587036133, + "step": 16120 + }, + { + "epoch": 2.459716796875e-05, + "step": 16120, + "training_step_time": 0.10580110549926758 + }, + { + "epoch": 2.459869384765625e-05, + "model_forward_time": 0.02521371841430664, + "step": 16121 + }, + { + "epoch": 2.459869384765625e-05, + "step": 16121, + "training_step_time": 0.10853123664855957 + }, + { + "epoch": 2.46002197265625e-05, + "model_forward_time": 0.02423262596130371, + "step": 16122 + }, + { + "epoch": 2.46002197265625e-05, + "step": 16122, + "training_step_time": 0.10614967346191406 + }, + { + "epoch": 2.460174560546875e-05, + "model_forward_time": 0.025251150131225586, + "step": 16123 + }, + { + "epoch": 2.460174560546875e-05, + "step": 16123, + "training_step_time": 0.11067581176757812 + }, + { + "epoch": 2.4603271484375e-05, + "model_forward_time": 0.024424314498901367, + "step": 16124 + }, + { + "epoch": 2.4603271484375e-05, + "step": 16124, + "training_step_time": 0.10618782043457031 + }, + { + "epoch": 2.460479736328125e-05, + "model_forward_time": 0.023900747299194336, + "step": 16125 + }, + { + "epoch": 2.460479736328125e-05, + "step": 16125, + "training_step_time": 0.10705137252807617 + }, + { + "epoch": 2.46063232421875e-05, + "model_forward_time": 0.024680137634277344, + "step": 16126 + }, + { + "epoch": 2.46063232421875e-05, + "step": 16126, + "training_step_time": 0.10939192771911621 + }, + { + "epoch": 2.460784912109375e-05, + "model_forward_time": 0.02499699592590332, + "step": 16127 + }, + { + "epoch": 2.460784912109375e-05, + "step": 16127, + "training_step_time": 0.1062169075012207 + }, + { + "epoch": 2.4609375e-05, + "model_forward_time": 0.024322509765625, + "step": 16128 + }, + { + "epoch": 2.4609375e-05, + "step": 16128, + "training_step_time": 0.10481142997741699 + }, + { + "epoch": 2.461090087890625e-05, + "model_forward_time": 0.02375054359436035, + "step": 16129 + }, + { + "epoch": 2.461090087890625e-05, + "step": 16129, + "training_step_time": 0.10536932945251465 + }, + { + "epoch": 2.46124267578125e-05, + "grad_norm": 0.544135570526123, + "learning_rate": 4.790621731354003e-05, + "loss": 0.0164, + "step": 16130 + }, + { + "epoch": 2.46124267578125e-05, + "model_forward_time": 0.024738073348999023, + "step": 16130 + }, + { + "epoch": 2.46124267578125e-05, + "step": 16130, + "training_step_time": 0.10423541069030762 + }, + { + "epoch": 2.461395263671875e-05, + "model_forward_time": 0.024841785430908203, + "step": 16131 + }, + { + "epoch": 2.461395263671875e-05, + "step": 16131, + "training_step_time": 0.10787677764892578 + }, + { + "epoch": 2.4615478515625e-05, + "model_forward_time": 0.02568793296813965, + "step": 16132 + }, + { + "epoch": 2.4615478515625e-05, + "step": 16132, + "training_step_time": 0.10695147514343262 + }, + { + "epoch": 2.461700439453125e-05, + "model_forward_time": 0.025285005569458008, + "step": 16133 + }, + { + "epoch": 2.461700439453125e-05, + "step": 16133, + "training_step_time": 0.10512733459472656 + }, + { + "epoch": 2.46185302734375e-05, + "model_forward_time": 0.025441884994506836, + "step": 16134 + }, + { + "epoch": 2.46185302734375e-05, + "step": 16134, + "training_step_time": 0.10851693153381348 + }, + { + "epoch": 2.462005615234375e-05, + "model_forward_time": 0.026524782180786133, + "step": 16135 + }, + { + "epoch": 2.462005615234375e-05, + "step": 16135, + "training_step_time": 0.11173510551452637 + }, + { + "epoch": 2.462158203125e-05, + "model_forward_time": 0.02531719207763672, + "step": 16136 + }, + { + "epoch": 2.462158203125e-05, + "step": 16136, + "training_step_time": 0.14018917083740234 + }, + { + "epoch": 2.462310791015625e-05, + "model_forward_time": 0.026691675186157227, + "step": 16137 + }, + { + "epoch": 2.462310791015625e-05, + "step": 16137, + "training_step_time": 0.18733811378479004 + }, + { + "epoch": 2.46246337890625e-05, + "model_forward_time": 0.02558588981628418, + "step": 16138 + }, + { + "epoch": 2.46246337890625e-05, + "step": 16138, + "training_step_time": 0.11570215225219727 + }, + { + "epoch": 2.462615966796875e-05, + "model_forward_time": 0.025201082229614258, + "step": 16139 + }, + { + "epoch": 2.462615966796875e-05, + "step": 16139, + "training_step_time": 0.12868285179138184 + }, + { + "epoch": 2.4627685546875e-05, + "grad_norm": 0.3250410854816437, + "learning_rate": 4.785115128197298e-05, + "loss": 0.0098, + "step": 16140 + }, + { + "epoch": 2.4627685546875e-05, + "model_forward_time": 0.025620698928833008, + "step": 16140 + }, + { + "epoch": 2.4627685546875e-05, + "step": 16140, + "training_step_time": 0.11954641342163086 + }, + { + "epoch": 2.462921142578125e-05, + "model_forward_time": 0.026640892028808594, + "step": 16141 + }, + { + "epoch": 2.462921142578125e-05, + "step": 16141, + "training_step_time": 0.14858341217041016 + }, + { + "epoch": 2.46307373046875e-05, + "model_forward_time": 0.02458810806274414, + "step": 16142 + }, + { + "epoch": 2.46307373046875e-05, + "step": 16142, + "training_step_time": 0.1560044288635254 + }, + { + "epoch": 2.463226318359375e-05, + "model_forward_time": 0.023381948471069336, + "step": 16143 + }, + { + "epoch": 2.463226318359375e-05, + "step": 16143, + "training_step_time": 0.15671443939208984 + }, + { + "epoch": 2.46337890625e-05, + "model_forward_time": 0.025445938110351562, + "step": 16144 + }, + { + "epoch": 2.46337890625e-05, + "step": 16144, + "training_step_time": 0.1066136360168457 + }, + { + "epoch": 2.463531494140625e-05, + "model_forward_time": 0.025538206100463867, + "step": 16145 + }, + { + "epoch": 2.463531494140625e-05, + "step": 16145, + "training_step_time": 0.21464848518371582 + }, + { + "epoch": 2.46368408203125e-05, + "model_forward_time": 0.025081157684326172, + "step": 16146 + }, + { + "epoch": 2.46368408203125e-05, + "step": 16146, + "training_step_time": 0.11975431442260742 + }, + { + "epoch": 2.463836669921875e-05, + "model_forward_time": 0.026146650314331055, + "step": 16147 + }, + { + "epoch": 2.463836669921875e-05, + "step": 16147, + "training_step_time": 0.12375187873840332 + }, + { + "epoch": 2.4639892578125e-05, + "model_forward_time": 0.025850772857666016, + "step": 16148 + }, + { + "epoch": 2.4639892578125e-05, + "step": 16148, + "training_step_time": 0.11661672592163086 + }, + { + "epoch": 2.464141845703125e-05, + "model_forward_time": 0.02549600601196289, + "step": 16149 + }, + { + "epoch": 2.464141845703125e-05, + "step": 16149, + "training_step_time": 0.11336874961853027 + }, + { + "epoch": 2.46429443359375e-05, + "grad_norm": 0.22780925035476685, + "learning_rate": 4.779608786145974e-05, + "loss": 0.0165, + "step": 16150 + }, + { + "epoch": 2.46429443359375e-05, + "model_forward_time": 0.026188135147094727, + "step": 16150 + }, + { + "epoch": 2.46429443359375e-05, + "step": 16150, + "training_step_time": 0.11464548110961914 + }, + { + "epoch": 2.464447021484375e-05, + "model_forward_time": 0.02583622932434082, + "step": 16151 + }, + { + "epoch": 2.464447021484375e-05, + "step": 16151, + "training_step_time": 0.10999727249145508 + }, + { + "epoch": 2.464599609375e-05, + "model_forward_time": 0.025386810302734375, + "step": 16152 + }, + { + "epoch": 2.464599609375e-05, + "step": 16152, + "training_step_time": 0.10936284065246582 + }, + { + "epoch": 2.464752197265625e-05, + "model_forward_time": 0.026138782501220703, + "step": 16153 + }, + { + "epoch": 2.464752197265625e-05, + "step": 16153, + "training_step_time": 0.11400222778320312 + }, + { + "epoch": 2.46490478515625e-05, + "model_forward_time": 0.02529168128967285, + "step": 16154 + }, + { + "epoch": 2.46490478515625e-05, + "step": 16154, + "training_step_time": 0.11008834838867188 + }, + { + "epoch": 2.465057373046875e-05, + "model_forward_time": 0.02493572235107422, + "step": 16155 + }, + { + "epoch": 2.465057373046875e-05, + "step": 16155, + "training_step_time": 0.11313605308532715 + }, + { + "epoch": 2.4652099609375e-05, + "model_forward_time": 0.025528430938720703, + "step": 16156 + }, + { + "epoch": 2.4652099609375e-05, + "step": 16156, + "training_step_time": 0.11084222793579102 + }, + { + "epoch": 2.465362548828125e-05, + "model_forward_time": 0.025157928466796875, + "step": 16157 + }, + { + "epoch": 2.465362548828125e-05, + "step": 16157, + "training_step_time": 0.10454368591308594 + }, + { + "epoch": 2.46551513671875e-05, + "model_forward_time": 0.025124788284301758, + "step": 16158 + }, + { + "epoch": 2.46551513671875e-05, + "step": 16158, + "training_step_time": 0.15764188766479492 + }, + { + "epoch": 2.465667724609375e-05, + "model_forward_time": 0.024669885635375977, + "step": 16159 + }, + { + "epoch": 2.465667724609375e-05, + "step": 16159, + "training_step_time": 0.13961529731750488 + }, + { + "epoch": 2.4658203125e-05, + "grad_norm": 0.24136672914028168, + "learning_rate": 4.774102711890756e-05, + "loss": 0.0139, + "step": 16160 + }, + { + "epoch": 2.4658203125e-05, + "model_forward_time": 0.024538278579711914, + "step": 16160 + }, + { + "epoch": 2.4658203125e-05, + "step": 16160, + "training_step_time": 0.10979533195495605 + }, + { + "epoch": 2.465972900390625e-05, + "model_forward_time": 0.024740219116210938, + "step": 16161 + }, + { + "epoch": 2.465972900390625e-05, + "step": 16161, + "training_step_time": 0.11514472961425781 + }, + { + "epoch": 2.46612548828125e-05, + "model_forward_time": 0.0251772403717041, + "step": 16162 + }, + { + "epoch": 2.46612548828125e-05, + "step": 16162, + "training_step_time": 0.1228785514831543 + }, + { + "epoch": 2.466278076171875e-05, + "model_forward_time": 0.026154518127441406, + "step": 16163 + }, + { + "epoch": 2.466278076171875e-05, + "step": 16163, + "training_step_time": 0.10581207275390625 + }, + { + "epoch": 2.4664306640625e-05, + "model_forward_time": 0.025814056396484375, + "step": 16164 + }, + { + "epoch": 2.4664306640625e-05, + "step": 16164, + "training_step_time": 0.1941380500793457 + }, + { + "epoch": 2.466583251953125e-05, + "model_forward_time": 0.025127887725830078, + "step": 16165 + }, + { + "epoch": 2.466583251953125e-05, + "step": 16165, + "training_step_time": 0.10600805282592773 + }, + { + "epoch": 2.46673583984375e-05, + "model_forward_time": 0.025033235549926758, + "step": 16166 + }, + { + "epoch": 2.46673583984375e-05, + "step": 16166, + "training_step_time": 0.10438394546508789 + }, + { + "epoch": 2.466888427734375e-05, + "model_forward_time": 0.02610039710998535, + "step": 16167 + }, + { + "epoch": 2.466888427734375e-05, + "step": 16167, + "training_step_time": 0.10724472999572754 + }, + { + "epoch": 2.467041015625e-05, + "model_forward_time": 0.02712869644165039, + "step": 16168 + }, + { + "epoch": 2.467041015625e-05, + "step": 16168, + "training_step_time": 0.10924911499023438 + }, + { + "epoch": 2.467193603515625e-05, + "model_forward_time": 0.026007652282714844, + "step": 16169 + }, + { + "epoch": 2.467193603515625e-05, + "step": 16169, + "training_step_time": 0.10770177841186523 + }, + { + "epoch": 2.46734619140625e-05, + "grad_norm": 0.4083085060119629, + "learning_rate": 4.7685969121220456e-05, + "loss": 0.0118, + "step": 16170 + }, + { + "epoch": 2.46734619140625e-05, + "model_forward_time": 0.025290966033935547, + "step": 16170 + }, + { + "epoch": 2.46734619140625e-05, + "step": 16170, + "training_step_time": 0.10418009757995605 + }, + { + "epoch": 2.467498779296875e-05, + "model_forward_time": 0.02587413787841797, + "step": 16171 + }, + { + "epoch": 2.467498779296875e-05, + "step": 16171, + "training_step_time": 0.10625100135803223 + }, + { + "epoch": 2.4676513671875e-05, + "model_forward_time": 0.025673627853393555, + "step": 16172 + }, + { + "epoch": 2.4676513671875e-05, + "step": 16172, + "training_step_time": 0.1051933765411377 + }, + { + "epoch": 2.467803955078125e-05, + "model_forward_time": 0.025649070739746094, + "step": 16173 + }, + { + "epoch": 2.467803955078125e-05, + "step": 16173, + "training_step_time": 0.10828709602355957 + }, + { + "epoch": 2.46795654296875e-05, + "model_forward_time": 0.025463104248046875, + "step": 16174 + }, + { + "epoch": 2.46795654296875e-05, + "step": 16174, + "training_step_time": 0.1094357967376709 + }, + { + "epoch": 2.468109130859375e-05, + "model_forward_time": 0.025511741638183594, + "step": 16175 + }, + { + "epoch": 2.468109130859375e-05, + "step": 16175, + "training_step_time": 0.11036467552185059 + }, + { + "epoch": 2.46826171875e-05, + "model_forward_time": 0.025599002838134766, + "step": 16176 + }, + { + "epoch": 2.46826171875e-05, + "step": 16176, + "training_step_time": 0.10901355743408203 + }, + { + "epoch": 2.468414306640625e-05, + "model_forward_time": 0.025568485260009766, + "step": 16177 + }, + { + "epoch": 2.468414306640625e-05, + "step": 16177, + "training_step_time": 0.10729050636291504 + }, + { + "epoch": 2.46856689453125e-05, + "model_forward_time": 0.02558445930480957, + "step": 16178 + }, + { + "epoch": 2.46856689453125e-05, + "step": 16178, + "training_step_time": 0.10770964622497559 + }, + { + "epoch": 2.468719482421875e-05, + "model_forward_time": 0.02515268325805664, + "step": 16179 + }, + { + "epoch": 2.468719482421875e-05, + "step": 16179, + "training_step_time": 0.11266899108886719 + }, + { + "epoch": 2.4688720703125e-05, + "grad_norm": 0.2024080604314804, + "learning_rate": 4.7630913935299066e-05, + "loss": 0.0076, + "step": 16180 + }, + { + "epoch": 2.4688720703125e-05, + "model_forward_time": 0.02666330337524414, + "step": 16180 + }, + { + "epoch": 2.4688720703125e-05, + "step": 16180, + "training_step_time": 0.10637784004211426 + }, + { + "epoch": 2.469024658203125e-05, + "model_forward_time": 0.025818824768066406, + "step": 16181 + }, + { + "epoch": 2.469024658203125e-05, + "step": 16181, + "training_step_time": 0.10465383529663086 + }, + { + "epoch": 2.46917724609375e-05, + "model_forward_time": 0.025391340255737305, + "step": 16182 + }, + { + "epoch": 2.46917724609375e-05, + "step": 16182, + "training_step_time": 0.1235666275024414 + }, + { + "epoch": 2.469329833984375e-05, + "model_forward_time": 0.025562524795532227, + "step": 16183 + }, + { + "epoch": 2.469329833984375e-05, + "step": 16183, + "training_step_time": 0.12334942817687988 + }, + { + "epoch": 2.469482421875e-05, + "model_forward_time": 0.02539825439453125, + "step": 16184 + }, + { + "epoch": 2.469482421875e-05, + "step": 16184, + "training_step_time": 0.11265730857849121 + }, + { + "epoch": 2.469635009765625e-05, + "model_forward_time": 0.025794029235839844, + "step": 16185 + }, + { + "epoch": 2.469635009765625e-05, + "step": 16185, + "training_step_time": 0.12052798271179199 + }, + { + "epoch": 2.46978759765625e-05, + "model_forward_time": 0.029140949249267578, + "step": 16186 + }, + { + "epoch": 2.46978759765625e-05, + "step": 16186, + "training_step_time": 0.11626005172729492 + }, + { + "epoch": 2.469940185546875e-05, + "model_forward_time": 0.027049779891967773, + "step": 16187 + }, + { + "epoch": 2.469940185546875e-05, + "step": 16187, + "training_step_time": 0.19759511947631836 + }, + { + "epoch": 2.4700927734375e-05, + "model_forward_time": 0.02585911750793457, + "step": 16188 + }, + { + "epoch": 2.4700927734375e-05, + "step": 16188, + "training_step_time": 0.1720418930053711 + }, + { + "epoch": 2.470245361328125e-05, + "model_forward_time": 0.025360822677612305, + "step": 16189 + }, + { + "epoch": 2.470245361328125e-05, + "step": 16189, + "training_step_time": 0.1560804843902588 + }, + { + "epoch": 2.47039794921875e-05, + "grad_norm": 0.1812436431646347, + "learning_rate": 4.7575861628040635e-05, + "loss": 0.0112, + "step": 16190 + }, + { + "epoch": 2.47039794921875e-05, + "model_forward_time": 0.024546384811401367, + "step": 16190 + }, + { + "epoch": 2.47039794921875e-05, + "step": 16190, + "training_step_time": 0.1588447093963623 + }, + { + "epoch": 2.470550537109375e-05, + "model_forward_time": 0.02393651008605957, + "step": 16191 + }, + { + "epoch": 2.470550537109375e-05, + "step": 16191, + "training_step_time": 0.13080954551696777 + }, + { + "epoch": 2.470703125e-05, + "model_forward_time": 0.024852275848388672, + "step": 16192 + }, + { + "epoch": 2.470703125e-05, + "step": 16192, + "training_step_time": 0.11185646057128906 + }, + { + "epoch": 2.470855712890625e-05, + "model_forward_time": 0.026625633239746094, + "step": 16193 + }, + { + "epoch": 2.470855712890625e-05, + "step": 16193, + "training_step_time": 0.10564064979553223 + }, + { + "epoch": 2.47100830078125e-05, + "model_forward_time": 0.02526068687438965, + "step": 16194 + }, + { + "epoch": 2.47100830078125e-05, + "step": 16194, + "training_step_time": 0.10600495338439941 + }, + { + "epoch": 2.471160888671875e-05, + "model_forward_time": 0.02494978904724121, + "step": 16195 + }, + { + "epoch": 2.471160888671875e-05, + "step": 16195, + "training_step_time": 0.10504436492919922 + }, + { + "epoch": 2.4713134765625e-05, + "model_forward_time": 0.024843931198120117, + "step": 16196 + }, + { + "epoch": 2.4713134765625e-05, + "step": 16196, + "training_step_time": 0.10647225379943848 + }, + { + "epoch": 2.471466064453125e-05, + "model_forward_time": 0.02707505226135254, + "step": 16197 + }, + { + "epoch": 2.471466064453125e-05, + "step": 16197, + "training_step_time": 0.1067650318145752 + }, + { + "epoch": 2.47161865234375e-05, + "model_forward_time": 0.024207592010498047, + "step": 16198 + }, + { + "epoch": 2.47161865234375e-05, + "step": 16198, + "training_step_time": 0.10439109802246094 + }, + { + "epoch": 2.471771240234375e-05, + "model_forward_time": 0.025090456008911133, + "step": 16199 + }, + { + "epoch": 2.471771240234375e-05, + "step": 16199, + "training_step_time": 0.1062307357788086 + }, + { + "epoch": 2.471923828125e-05, + "grad_norm": 0.25928398966789246, + "learning_rate": 4.7520812266338885e-05, + "loss": 0.0125, + "step": 16200 + }, + { + "epoch": 2.471923828125e-05, + "model_forward_time": 0.02549576759338379, + "step": 16200 + }, + { + "epoch": 2.471923828125e-05, + "step": 16200, + "training_step_time": 0.10840487480163574 + }, + { + "epoch": 2.472076416015625e-05, + "model_forward_time": 0.025158166885375977, + "step": 16201 + }, + { + "epoch": 2.472076416015625e-05, + "step": 16201, + "training_step_time": 0.11226058006286621 + }, + { + "epoch": 2.47222900390625e-05, + "model_forward_time": 0.02422642707824707, + "step": 16202 + }, + { + "epoch": 2.47222900390625e-05, + "step": 16202, + "training_step_time": 0.11089587211608887 + }, + { + "epoch": 2.472381591796875e-05, + "model_forward_time": 0.025419235229492188, + "step": 16203 + }, + { + "epoch": 2.472381591796875e-05, + "step": 16203, + "training_step_time": 0.10629701614379883 + }, + { + "epoch": 2.4725341796875e-05, + "model_forward_time": 0.025270700454711914, + "step": 16204 + }, + { + "epoch": 2.4725341796875e-05, + "step": 16204, + "training_step_time": 0.10700774192810059 + }, + { + "epoch": 2.472686767578125e-05, + "model_forward_time": 0.025156021118164062, + "step": 16205 + }, + { + "epoch": 2.472686767578125e-05, + "step": 16205, + "training_step_time": 0.15929818153381348 + }, + { + "epoch": 2.47283935546875e-05, + "model_forward_time": 0.024748563766479492, + "step": 16206 + }, + { + "epoch": 2.47283935546875e-05, + "step": 16206, + "training_step_time": 0.13684296607971191 + }, + { + "epoch": 2.472991943359375e-05, + "model_forward_time": 0.024466753005981445, + "step": 16207 + }, + { + "epoch": 2.472991943359375e-05, + "step": 16207, + "training_step_time": 0.10899686813354492 + }, + { + "epoch": 2.47314453125e-05, + "model_forward_time": 0.025307655334472656, + "step": 16208 + }, + { + "epoch": 2.47314453125e-05, + "step": 16208, + "training_step_time": 0.12186980247497559 + }, + { + "epoch": 2.473297119140625e-05, + "model_forward_time": 0.02525639533996582, + "step": 16209 + }, + { + "epoch": 2.473297119140625e-05, + "step": 16209, + "training_step_time": 0.1183323860168457 + }, + { + "epoch": 2.47344970703125e-05, + "grad_norm": 0.20085115730762482, + "learning_rate": 4.746576591708403e-05, + "loss": 0.0173, + "step": 16210 + }, + { + "epoch": 2.47344970703125e-05, + "model_forward_time": 0.024528026580810547, + "step": 16210 + }, + { + "epoch": 2.47344970703125e-05, + "step": 16210, + "training_step_time": 0.1824934482574463 + }, + { + "epoch": 2.473602294921875e-05, + "model_forward_time": 0.024796724319458008, + "step": 16211 + }, + { + "epoch": 2.473602294921875e-05, + "step": 16211, + "training_step_time": 0.12742853164672852 + }, + { + "epoch": 2.4737548828125e-05, + "model_forward_time": 0.023262739181518555, + "step": 16212 + }, + { + "epoch": 2.4737548828125e-05, + "step": 16212, + "training_step_time": 0.1163945198059082 + }, + { + "epoch": 2.473907470703125e-05, + "model_forward_time": 0.02364373207092285, + "step": 16213 + }, + { + "epoch": 2.473907470703125e-05, + "step": 16213, + "training_step_time": 0.11248302459716797 + }, + { + "epoch": 2.47406005859375e-05, + "model_forward_time": 0.02513265609741211, + "step": 16214 + }, + { + "epoch": 2.47406005859375e-05, + "step": 16214, + "training_step_time": 0.1156315803527832 + }, + { + "epoch": 2.474212646484375e-05, + "model_forward_time": 0.025038957595825195, + "step": 16215 + }, + { + "epoch": 2.474212646484375e-05, + "step": 16215, + "training_step_time": 0.11132192611694336 + }, + { + "epoch": 2.474365234375e-05, + "model_forward_time": 0.024923086166381836, + "step": 16216 + }, + { + "epoch": 2.474365234375e-05, + "step": 16216, + "training_step_time": 0.11100625991821289 + }, + { + "epoch": 2.474517822265625e-05, + "model_forward_time": 0.025243043899536133, + "step": 16217 + }, + { + "epoch": 2.474517822265625e-05, + "step": 16217, + "training_step_time": 0.10742473602294922 + }, + { + "epoch": 2.47467041015625e-05, + "model_forward_time": 0.02485966682434082, + "step": 16218 + }, + { + "epoch": 2.47467041015625e-05, + "step": 16218, + "training_step_time": 0.10725021362304688 + }, + { + "epoch": 2.474822998046875e-05, + "model_forward_time": 0.025301694869995117, + "step": 16219 + }, + { + "epoch": 2.474822998046875e-05, + "step": 16219, + "training_step_time": 0.10614800453186035 + }, + { + "epoch": 2.4749755859375e-05, + "grad_norm": 0.19542065262794495, + "learning_rate": 4.741072264716252e-05, + "loss": 0.0091, + "step": 16220 + }, + { + "epoch": 2.4749755859375e-05, + "model_forward_time": 0.024325132369995117, + "step": 16220 + }, + { + "epoch": 2.4749755859375e-05, + "step": 16220, + "training_step_time": 0.10867118835449219 + }, + { + "epoch": 2.475128173828125e-05, + "model_forward_time": 0.024011611938476562, + "step": 16221 + }, + { + "epoch": 2.475128173828125e-05, + "step": 16221, + "training_step_time": 0.10500812530517578 + }, + { + "epoch": 2.47528076171875e-05, + "model_forward_time": 0.02426433563232422, + "step": 16222 + }, + { + "epoch": 2.47528076171875e-05, + "step": 16222, + "training_step_time": 0.10507440567016602 + }, + { + "epoch": 2.475433349609375e-05, + "model_forward_time": 0.02481698989868164, + "step": 16223 + }, + { + "epoch": 2.475433349609375e-05, + "step": 16223, + "training_step_time": 0.10480046272277832 + }, + { + "epoch": 2.4755859375e-05, + "model_forward_time": 0.024898290634155273, + "step": 16224 + }, + { + "epoch": 2.4755859375e-05, + "step": 16224, + "training_step_time": 0.10490274429321289 + }, + { + "epoch": 2.475738525390625e-05, + "model_forward_time": 0.02657914161682129, + "step": 16225 + }, + { + "epoch": 2.475738525390625e-05, + "step": 16225, + "training_step_time": 0.11086845397949219 + }, + { + "epoch": 2.47589111328125e-05, + "model_forward_time": 0.024889707565307617, + "step": 16226 + }, + { + "epoch": 2.47589111328125e-05, + "step": 16226, + "training_step_time": 0.11124300956726074 + }, + { + "epoch": 2.476043701171875e-05, + "model_forward_time": 0.02503037452697754, + "step": 16227 + }, + { + "epoch": 2.476043701171875e-05, + "step": 16227, + "training_step_time": 0.10475730895996094 + }, + { + "epoch": 2.4761962890625e-05, + "model_forward_time": 0.026055574417114258, + "step": 16228 + }, + { + "epoch": 2.4761962890625e-05, + "step": 16228, + "training_step_time": 0.10592436790466309 + }, + { + "epoch": 2.476348876953125e-05, + "model_forward_time": 0.024195194244384766, + "step": 16229 + }, + { + "epoch": 2.476348876953125e-05, + "step": 16229, + "training_step_time": 0.19240927696228027 + }, + { + "epoch": 2.47650146484375e-05, + "grad_norm": 0.18954356014728546, + "learning_rate": 4.735568252345718e-05, + "loss": 0.0205, + "step": 16230 + }, + { + "epoch": 2.47650146484375e-05, + "model_forward_time": 0.024260282516479492, + "step": 16230 + }, + { + "epoch": 2.47650146484375e-05, + "step": 16230, + "training_step_time": 0.1103520393371582 + }, + { + "epoch": 2.476654052734375e-05, + "model_forward_time": 0.024547815322875977, + "step": 16231 + }, + { + "epoch": 2.476654052734375e-05, + "step": 16231, + "training_step_time": 0.20546698570251465 + }, + { + "epoch": 2.476806640625e-05, + "model_forward_time": 0.024348974227905273, + "step": 16232 + }, + { + "epoch": 2.476806640625e-05, + "step": 16232, + "training_step_time": 0.1531684398651123 + }, + { + "epoch": 2.476959228515625e-05, + "model_forward_time": 0.02470707893371582, + "step": 16233 + }, + { + "epoch": 2.476959228515625e-05, + "step": 16233, + "training_step_time": 0.1925342082977295 + }, + { + "epoch": 2.47711181640625e-05, + "model_forward_time": 0.02418684959411621, + "step": 16234 + }, + { + "epoch": 2.47711181640625e-05, + "step": 16234, + "training_step_time": 0.1765756607055664 + }, + { + "epoch": 2.477264404296875e-05, + "model_forward_time": 0.024838924407958984, + "step": 16235 + }, + { + "epoch": 2.477264404296875e-05, + "step": 16235, + "training_step_time": 0.14877724647521973 + }, + { + "epoch": 2.4774169921875e-05, + "model_forward_time": 0.02419567108154297, + "step": 16236 + }, + { + "epoch": 2.4774169921875e-05, + "step": 16236, + "training_step_time": 0.2134108543395996 + }, + { + "epoch": 2.477569580078125e-05, + "model_forward_time": 0.025240182876586914, + "step": 16237 + }, + { + "epoch": 2.477569580078125e-05, + "step": 16237, + "training_step_time": 0.1162571907043457 + }, + { + "epoch": 2.47772216796875e-05, + "model_forward_time": 0.02402663230895996, + "step": 16238 + }, + { + "epoch": 2.47772216796875e-05, + "step": 16238, + "training_step_time": 0.10306692123413086 + }, + { + "epoch": 2.477874755859375e-05, + "model_forward_time": 0.025136470794677734, + "step": 16239 + }, + { + "epoch": 2.477874755859375e-05, + "step": 16239, + "training_step_time": 0.1045379638671875 + }, + { + "epoch": 2.47802734375e-05, + "grad_norm": 0.46666330099105835, + "learning_rate": 4.7300645612846907e-05, + "loss": 0.0177, + "step": 16240 + }, + { + "epoch": 2.47802734375e-05, + "model_forward_time": 0.02404022216796875, + "step": 16240 + }, + { + "epoch": 2.47802734375e-05, + "step": 16240, + "training_step_time": 0.11048674583435059 + }, + { + "epoch": 2.478179931640625e-05, + "model_forward_time": 0.025072336196899414, + "step": 16241 + }, + { + "epoch": 2.478179931640625e-05, + "step": 16241, + "training_step_time": 0.10549473762512207 + }, + { + "epoch": 2.47833251953125e-05, + "model_forward_time": 0.024855375289916992, + "step": 16242 + }, + { + "epoch": 2.47833251953125e-05, + "step": 16242, + "training_step_time": 0.10568571090698242 + }, + { + "epoch": 2.478485107421875e-05, + "model_forward_time": 0.02508711814880371, + "step": 16243 + }, + { + "epoch": 2.478485107421875e-05, + "step": 16243, + "training_step_time": 0.10364198684692383 + }, + { + "epoch": 2.4786376953125e-05, + "model_forward_time": 0.024747371673583984, + "step": 16244 + }, + { + "epoch": 2.4786376953125e-05, + "step": 16244, + "training_step_time": 0.10334634780883789 + }, + { + "epoch": 2.478790283203125e-05, + "model_forward_time": 0.025084972381591797, + "step": 16245 + }, + { + "epoch": 2.478790283203125e-05, + "step": 16245, + "training_step_time": 0.10556483268737793 + }, + { + "epoch": 2.47894287109375e-05, + "model_forward_time": 0.025446653366088867, + "step": 16246 + }, + { + "epoch": 2.47894287109375e-05, + "step": 16246, + "training_step_time": 0.10393023490905762 + }, + { + "epoch": 2.479095458984375e-05, + "model_forward_time": 0.025079011917114258, + "step": 16247 + }, + { + "epoch": 2.479095458984375e-05, + "step": 16247, + "training_step_time": 0.10346174240112305 + }, + { + "epoch": 2.479248046875e-05, + "model_forward_time": 0.025050640106201172, + "step": 16248 + }, + { + "epoch": 2.479248046875e-05, + "step": 16248, + "training_step_time": 0.10455822944641113 + }, + { + "epoch": 2.479400634765625e-05, + "model_forward_time": 0.02494955062866211, + "step": 16249 + }, + { + "epoch": 2.479400634765625e-05, + "step": 16249, + "training_step_time": 0.10714197158813477 + }, + { + "epoch": 2.47955322265625e-05, + "grad_norm": 0.2493179738521576, + "learning_rate": 4.7245611982206724e-05, + "loss": 0.0144, + "step": 16250 + }, + { + "epoch": 2.47955322265625e-05, + "model_forward_time": 0.025229454040527344, + "step": 16250 + }, + { + "epoch": 2.47955322265625e-05, + "step": 16250, + "training_step_time": 0.18499398231506348 + }, + { + "epoch": 2.479705810546875e-05, + "model_forward_time": 0.02404928207397461, + "step": 16251 + }, + { + "epoch": 2.479705810546875e-05, + "step": 16251, + "training_step_time": 0.14815235137939453 + }, + { + "epoch": 2.4798583984375e-05, + "model_forward_time": 0.02343606948852539, + "step": 16252 + }, + { + "epoch": 2.4798583984375e-05, + "step": 16252, + "training_step_time": 0.10651707649230957 + }, + { + "epoch": 2.480010986328125e-05, + "model_forward_time": 0.02485060691833496, + "step": 16253 + }, + { + "epoch": 2.480010986328125e-05, + "step": 16253, + "training_step_time": 0.13423585891723633 + }, + { + "epoch": 2.48016357421875e-05, + "model_forward_time": 0.024404525756835938, + "step": 16254 + }, + { + "epoch": 2.48016357421875e-05, + "step": 16254, + "training_step_time": 0.16561031341552734 + }, + { + "epoch": 2.480316162109375e-05, + "model_forward_time": 0.024136066436767578, + "step": 16255 + }, + { + "epoch": 2.480316162109375e-05, + "step": 16255, + "training_step_time": 0.14639639854431152 + }, + { + "epoch": 2.48046875e-05, + "model_forward_time": 0.024193525314331055, + "step": 16256 + }, + { + "epoch": 2.48046875e-05, + "step": 16256, + "training_step_time": 0.12460637092590332 + }, + { + "epoch": 2.480621337890625e-05, + "model_forward_time": 0.024394512176513672, + "step": 16257 + }, + { + "epoch": 2.480621337890625e-05, + "step": 16257, + "training_step_time": 0.12795114517211914 + }, + { + "epoch": 2.48077392578125e-05, + "model_forward_time": 0.025166034698486328, + "step": 16258 + }, + { + "epoch": 2.48077392578125e-05, + "step": 16258, + "training_step_time": 0.11902475357055664 + }, + { + "epoch": 2.480926513671875e-05, + "model_forward_time": 0.025125980377197266, + "step": 16259 + }, + { + "epoch": 2.480926513671875e-05, + "step": 16259, + "training_step_time": 0.1196904182434082 + }, + { + "epoch": 2.4810791015625e-05, + "grad_norm": 0.41187161207199097, + "learning_rate": 4.7190581698407725e-05, + "loss": 0.0178, + "step": 16260 + }, + { + "epoch": 2.4810791015625e-05, + "model_forward_time": 0.024883031845092773, + "step": 16260 + }, + { + "epoch": 2.4810791015625e-05, + "step": 16260, + "training_step_time": 0.11699986457824707 + }, + { + "epoch": 2.481231689453125e-05, + "model_forward_time": 0.025297880172729492, + "step": 16261 + }, + { + "epoch": 2.481231689453125e-05, + "step": 16261, + "training_step_time": 0.11632728576660156 + }, + { + "epoch": 2.48138427734375e-05, + "model_forward_time": 0.025328397750854492, + "step": 16262 + }, + { + "epoch": 2.48138427734375e-05, + "step": 16262, + "training_step_time": 0.1151590347290039 + }, + { + "epoch": 2.481536865234375e-05, + "model_forward_time": 0.025298357009887695, + "step": 16263 + }, + { + "epoch": 2.481536865234375e-05, + "step": 16263, + "training_step_time": 0.11055874824523926 + }, + { + "epoch": 2.481689453125e-05, + "model_forward_time": 0.023949623107910156, + "step": 16264 + }, + { + "epoch": 2.481689453125e-05, + "step": 16264, + "training_step_time": 0.10680747032165527 + }, + { + "epoch": 2.481842041015625e-05, + "model_forward_time": 0.024205446243286133, + "step": 16265 + }, + { + "epoch": 2.481842041015625e-05, + "step": 16265, + "training_step_time": 0.10588288307189941 + }, + { + "epoch": 2.48199462890625e-05, + "model_forward_time": 0.025264978408813477, + "step": 16266 + }, + { + "epoch": 2.48199462890625e-05, + "step": 16266, + "training_step_time": 0.1061089038848877 + }, + { + "epoch": 2.482147216796875e-05, + "model_forward_time": 0.025129079818725586, + "step": 16267 + }, + { + "epoch": 2.482147216796875e-05, + "step": 16267, + "training_step_time": 0.10616922378540039 + }, + { + "epoch": 2.4822998046875e-05, + "model_forward_time": 0.025879859924316406, + "step": 16268 + }, + { + "epoch": 2.4822998046875e-05, + "step": 16268, + "training_step_time": 0.10933637619018555 + }, + { + "epoch": 2.482452392578125e-05, + "model_forward_time": 0.02556324005126953, + "step": 16269 + }, + { + "epoch": 2.482452392578125e-05, + "step": 16269, + "training_step_time": 0.10754871368408203 + }, + { + "epoch": 2.48260498046875e-05, + "grad_norm": 0.29778537154197693, + "learning_rate": 4.713555482831688e-05, + "loss": 0.0116, + "step": 16270 + }, + { + "epoch": 2.48260498046875e-05, + "model_forward_time": 0.02476024627685547, + "step": 16270 + }, + { + "epoch": 2.48260498046875e-05, + "step": 16270, + "training_step_time": 0.10511088371276855 + }, + { + "epoch": 2.482757568359375e-05, + "model_forward_time": 0.024984359741210938, + "step": 16271 + }, + { + "epoch": 2.482757568359375e-05, + "step": 16271, + "training_step_time": 0.10584664344787598 + }, + { + "epoch": 2.48291015625e-05, + "model_forward_time": 0.024882793426513672, + "step": 16272 + }, + { + "epoch": 2.48291015625e-05, + "step": 16272, + "training_step_time": 0.1046748161315918 + }, + { + "epoch": 2.483062744140625e-05, + "model_forward_time": 0.025183677673339844, + "step": 16273 + }, + { + "epoch": 2.483062744140625e-05, + "step": 16273, + "training_step_time": 0.10336112976074219 + }, + { + "epoch": 2.48321533203125e-05, + "model_forward_time": 0.024118423461914062, + "step": 16274 + }, + { + "epoch": 2.48321533203125e-05, + "step": 16274, + "training_step_time": 0.13229107856750488 + }, + { + "epoch": 2.483367919921875e-05, + "model_forward_time": 0.025916576385498047, + "step": 16275 + }, + { + "epoch": 2.483367919921875e-05, + "step": 16275, + "training_step_time": 0.12342047691345215 + }, + { + "epoch": 2.4835205078125e-05, + "model_forward_time": 0.02852344512939453, + "step": 16276 + }, + { + "epoch": 2.4835205078125e-05, + "step": 16276, + "training_step_time": 0.1071012020111084 + }, + { + "epoch": 2.483673095703125e-05, + "model_forward_time": 0.026875019073486328, + "step": 16277 + }, + { + "epoch": 2.483673095703125e-05, + "step": 16277, + "training_step_time": 0.13825321197509766 + }, + { + "epoch": 2.48382568359375e-05, + "model_forward_time": 0.026206493377685547, + "step": 16278 + }, + { + "epoch": 2.48382568359375e-05, + "step": 16278, + "training_step_time": 0.19133424758911133 + }, + { + "epoch": 2.483978271484375e-05, + "model_forward_time": 0.02421116828918457, + "step": 16279 + }, + { + "epoch": 2.483978271484375e-05, + "step": 16279, + "training_step_time": 0.15685462951660156 + }, + { + "epoch": 2.484130859375e-05, + "grad_norm": 0.24624891579151154, + "learning_rate": 4.708053143879701e-05, + "loss": 0.016, + "step": 16280 + }, + { + "epoch": 2.484130859375e-05, + "model_forward_time": 0.023868560791015625, + "step": 16280 + }, + { + "epoch": 2.484130859375e-05, + "step": 16280, + "training_step_time": 0.13067984580993652 + }, + { + "epoch": 2.484283447265625e-05, + "model_forward_time": 0.0245361328125, + "step": 16281 + }, + { + "epoch": 2.484283447265625e-05, + "step": 16281, + "training_step_time": 0.12362837791442871 + }, + { + "epoch": 2.48443603515625e-05, + "model_forward_time": 0.02461719512939453, + "step": 16282 + }, + { + "epoch": 2.48443603515625e-05, + "step": 16282, + "training_step_time": 0.20903635025024414 + }, + { + "epoch": 2.484588623046875e-05, + "model_forward_time": 0.02420830726623535, + "step": 16283 + }, + { + "epoch": 2.484588623046875e-05, + "step": 16283, + "training_step_time": 0.11212754249572754 + }, + { + "epoch": 2.4847412109375e-05, + "model_forward_time": 0.02399158477783203, + "step": 16284 + }, + { + "epoch": 2.4847412109375e-05, + "step": 16284, + "training_step_time": 0.10303592681884766 + }, + { + "epoch": 2.484893798828125e-05, + "model_forward_time": 0.024989604949951172, + "step": 16285 + }, + { + "epoch": 2.484893798828125e-05, + "step": 16285, + "training_step_time": 0.10585665702819824 + }, + { + "epoch": 2.48504638671875e-05, + "model_forward_time": 0.025114774703979492, + "step": 16286 + }, + { + "epoch": 2.48504638671875e-05, + "step": 16286, + "training_step_time": 0.10547614097595215 + }, + { + "epoch": 2.485198974609375e-05, + "model_forward_time": 0.024865150451660156, + "step": 16287 + }, + { + "epoch": 2.485198974609375e-05, + "step": 16287, + "training_step_time": 0.10534405708312988 + }, + { + "epoch": 2.4853515625e-05, + "model_forward_time": 0.024904966354370117, + "step": 16288 + }, + { + "epoch": 2.4853515625e-05, + "step": 16288, + "training_step_time": 0.10594511032104492 + }, + { + "epoch": 2.485504150390625e-05, + "model_forward_time": 0.025199174880981445, + "step": 16289 + }, + { + "epoch": 2.485504150390625e-05, + "step": 16289, + "training_step_time": 0.10417437553405762 + }, + { + "epoch": 2.48565673828125e-05, + "grad_norm": 0.1607915312051773, + "learning_rate": 4.702551159670672e-05, + "loss": 0.0117, + "step": 16290 + }, + { + "epoch": 2.48565673828125e-05, + "model_forward_time": 0.024855852127075195, + "step": 16290 + }, + { + "epoch": 2.48565673828125e-05, + "step": 16290, + "training_step_time": 0.10786008834838867 + }, + { + "epoch": 2.485809326171875e-05, + "model_forward_time": 0.024591684341430664, + "step": 16291 + }, + { + "epoch": 2.485809326171875e-05, + "step": 16291, + "training_step_time": 0.10679364204406738 + }, + { + "epoch": 2.4859619140625e-05, + "model_forward_time": 0.023889541625976562, + "step": 16292 + }, + { + "epoch": 2.4859619140625e-05, + "step": 16292, + "training_step_time": 0.10312747955322266 + }, + { + "epoch": 2.486114501953125e-05, + "model_forward_time": 0.024994373321533203, + "step": 16293 + }, + { + "epoch": 2.486114501953125e-05, + "step": 16293, + "training_step_time": 0.10481023788452148 + }, + { + "epoch": 2.48626708984375e-05, + "model_forward_time": 0.025351524353027344, + "step": 16294 + }, + { + "epoch": 2.48626708984375e-05, + "step": 16294, + "training_step_time": 0.10539054870605469 + }, + { + "epoch": 2.486419677734375e-05, + "model_forward_time": 0.025487899780273438, + "step": 16295 + }, + { + "epoch": 2.486419677734375e-05, + "step": 16295, + "training_step_time": 0.10578775405883789 + }, + { + "epoch": 2.486572265625e-05, + "model_forward_time": 0.024945497512817383, + "step": 16296 + }, + { + "epoch": 2.486572265625e-05, + "step": 16296, + "training_step_time": 0.19185590744018555 + }, + { + "epoch": 2.486724853515625e-05, + "model_forward_time": 0.024527788162231445, + "step": 16297 + }, + { + "epoch": 2.486724853515625e-05, + "step": 16297, + "training_step_time": 0.13971376419067383 + }, + { + "epoch": 2.48687744140625e-05, + "model_forward_time": 0.02409672737121582, + "step": 16298 + }, + { + "epoch": 2.48687744140625e-05, + "step": 16298, + "training_step_time": 0.11168146133422852 + }, + { + "epoch": 2.487030029296875e-05, + "model_forward_time": 0.02462482452392578, + "step": 16299 + }, + { + "epoch": 2.487030029296875e-05, + "step": 16299, + "training_step_time": 0.11505866050720215 + }, + { + "epoch": 2.4871826171875e-05, + "grad_norm": 0.15231294929981232, + "learning_rate": 4.697049536890033e-05, + "loss": 0.0067, + "step": 16300 + }, + { + "epoch": 2.4871826171875e-05, + "model_forward_time": 0.025057315826416016, + "step": 16300 + }, + { + "epoch": 2.4871826171875e-05, + "step": 16300, + "training_step_time": 0.11022377014160156 + }, + { + "epoch": 2.487335205078125e-05, + "model_forward_time": 0.02479267120361328, + "step": 16301 + }, + { + "epoch": 2.487335205078125e-05, + "step": 16301, + "training_step_time": 0.10480833053588867 + }, + { + "epoch": 2.48748779296875e-05, + "model_forward_time": 0.024980783462524414, + "step": 16302 + }, + { + "epoch": 2.48748779296875e-05, + "step": 16302, + "training_step_time": 0.1955420970916748 + }, + { + "epoch": 2.487640380859375e-05, + "model_forward_time": 0.02412867546081543, + "step": 16303 + }, + { + "epoch": 2.487640380859375e-05, + "step": 16303, + "training_step_time": 0.10394859313964844 + }, + { + "epoch": 2.48779296875e-05, + "model_forward_time": 0.024408340454101562, + "step": 16304 + }, + { + "epoch": 2.48779296875e-05, + "step": 16304, + "training_step_time": 0.10170817375183105 + }, + { + "epoch": 2.487945556640625e-05, + "model_forward_time": 0.024803876876831055, + "step": 16305 + }, + { + "epoch": 2.487945556640625e-05, + "step": 16305, + "training_step_time": 0.1048121452331543 + }, + { + "epoch": 2.48809814453125e-05, + "model_forward_time": 0.02779698371887207, + "step": 16306 + }, + { + "epoch": 2.48809814453125e-05, + "step": 16306, + "training_step_time": 0.10891389846801758 + }, + { + "epoch": 2.488250732421875e-05, + "model_forward_time": 0.02533578872680664, + "step": 16307 + }, + { + "epoch": 2.488250732421875e-05, + "step": 16307, + "training_step_time": 0.10491514205932617 + }, + { + "epoch": 2.4884033203125e-05, + "model_forward_time": 0.025112628936767578, + "step": 16308 + }, + { + "epoch": 2.4884033203125e-05, + "step": 16308, + "training_step_time": 0.10504293441772461 + }, + { + "epoch": 2.488555908203125e-05, + "model_forward_time": 0.025204896926879883, + "step": 16309 + }, + { + "epoch": 2.488555908203125e-05, + "step": 16309, + "training_step_time": 0.10461282730102539 + }, + { + "epoch": 2.48870849609375e-05, + "grad_norm": 0.3484882712364197, + "learning_rate": 4.691548282222771e-05, + "loss": 0.0161, + "step": 16310 + }, + { + "epoch": 2.48870849609375e-05, + "model_forward_time": 0.024941682815551758, + "step": 16310 + }, + { + "epoch": 2.48870849609375e-05, + "step": 16310, + "training_step_time": 0.10410261154174805 + }, + { + "epoch": 2.488861083984375e-05, + "model_forward_time": 0.025418996810913086, + "step": 16311 + }, + { + "epoch": 2.488861083984375e-05, + "step": 16311, + "training_step_time": 0.10486364364624023 + }, + { + "epoch": 2.489013671875e-05, + "model_forward_time": 0.02506732940673828, + "step": 16312 + }, + { + "epoch": 2.489013671875e-05, + "step": 16312, + "training_step_time": 0.10433101654052734 + }, + { + "epoch": 2.489166259765625e-05, + "model_forward_time": 0.025041580200195312, + "step": 16313 + }, + { + "epoch": 2.489166259765625e-05, + "step": 16313, + "training_step_time": 0.105072021484375 + }, + { + "epoch": 2.48931884765625e-05, + "model_forward_time": 0.02478504180908203, + "step": 16314 + }, + { + "epoch": 2.48931884765625e-05, + "step": 16314, + "training_step_time": 0.10431265830993652 + }, + { + "epoch": 2.489471435546875e-05, + "model_forward_time": 0.025422096252441406, + "step": 16315 + }, + { + "epoch": 2.489471435546875e-05, + "step": 16315, + "training_step_time": 0.10451745986938477 + }, + { + "epoch": 2.4896240234375e-05, + "model_forward_time": 0.024912118911743164, + "step": 16316 + }, + { + "epoch": 2.4896240234375e-05, + "step": 16316, + "training_step_time": 0.11055994033813477 + }, + { + "epoch": 2.489776611328125e-05, + "model_forward_time": 0.02508997917175293, + "step": 16317 + }, + { + "epoch": 2.489776611328125e-05, + "step": 16317, + "training_step_time": 0.1047368049621582 + }, + { + "epoch": 2.48992919921875e-05, + "model_forward_time": 0.02500462532043457, + "step": 16318 + }, + { + "epoch": 2.48992919921875e-05, + "step": 16318, + "training_step_time": 0.1147313117980957 + }, + { + "epoch": 2.490081787109375e-05, + "model_forward_time": 0.025377273559570312, + "step": 16319 + }, + { + "epoch": 2.490081787109375e-05, + "step": 16319, + "training_step_time": 0.1119074821472168 + }, + { + "epoch": 2.490234375e-05, + "grad_norm": 0.3668254017829895, + "learning_rate": 4.6860474023534335e-05, + "loss": 0.0101, + "step": 16320 + }, + { + "epoch": 2.490234375e-05, + "model_forward_time": 0.024182796478271484, + "step": 16320 + }, + { + "epoch": 2.490234375e-05, + "step": 16320, + "training_step_time": 0.10673284530639648 + }, + { + "epoch": 2.490386962890625e-05, + "model_forward_time": 0.025019407272338867, + "step": 16321 + }, + { + "epoch": 2.490386962890625e-05, + "step": 16321, + "training_step_time": 0.1049048900604248 + }, + { + "epoch": 2.49053955078125e-05, + "model_forward_time": 0.024164438247680664, + "step": 16322 + }, + { + "epoch": 2.49053955078125e-05, + "step": 16322, + "training_step_time": 0.15128326416015625 + }, + { + "epoch": 2.490692138671875e-05, + "model_forward_time": 0.02541065216064453, + "step": 16323 + }, + { + "epoch": 2.490692138671875e-05, + "step": 16323, + "training_step_time": 0.10972213745117188 + }, + { + "epoch": 2.4908447265625e-05, + "model_forward_time": 0.02537703514099121, + "step": 16324 + }, + { + "epoch": 2.4908447265625e-05, + "step": 16324, + "training_step_time": 0.16443753242492676 + }, + { + "epoch": 2.490997314453125e-05, + "model_forward_time": 0.02503347396850586, + "step": 16325 + }, + { + "epoch": 2.490997314453125e-05, + "step": 16325, + "training_step_time": 0.17136788368225098 + }, + { + "epoch": 2.49114990234375e-05, + "model_forward_time": 0.024497270584106445, + "step": 16326 + }, + { + "epoch": 2.49114990234375e-05, + "step": 16326, + "training_step_time": 0.17516231536865234 + }, + { + "epoch": 2.491302490234375e-05, + "model_forward_time": 0.024019956588745117, + "step": 16327 + }, + { + "epoch": 2.491302490234375e-05, + "step": 16327, + "training_step_time": 0.1611323356628418 + }, + { + "epoch": 2.491455078125e-05, + "model_forward_time": 0.0241086483001709, + "step": 16328 + }, + { + "epoch": 2.491455078125e-05, + "step": 16328, + "training_step_time": 0.19383645057678223 + }, + { + "epoch": 2.491607666015625e-05, + "model_forward_time": 0.024566650390625, + "step": 16329 + }, + { + "epoch": 2.491607666015625e-05, + "step": 16329, + "training_step_time": 0.10863876342773438 + }, + { + "epoch": 2.49176025390625e-05, + "grad_norm": 0.2264672964811325, + "learning_rate": 4.680546903966106e-05, + "loss": 0.0184, + "step": 16330 + }, + { + "epoch": 2.49176025390625e-05, + "model_forward_time": 0.024892091751098633, + "step": 16330 + }, + { + "epoch": 2.49176025390625e-05, + "step": 16330, + "training_step_time": 0.10461735725402832 + }, + { + "epoch": 2.491912841796875e-05, + "model_forward_time": 0.025025129318237305, + "step": 16331 + }, + { + "epoch": 2.491912841796875e-05, + "step": 16331, + "training_step_time": 0.10393857955932617 + }, + { + "epoch": 2.4920654296875e-05, + "model_forward_time": 0.024837017059326172, + "step": 16332 + }, + { + "epoch": 2.4920654296875e-05, + "step": 16332, + "training_step_time": 0.10575008392333984 + }, + { + "epoch": 2.492218017578125e-05, + "model_forward_time": 0.026929140090942383, + "step": 16333 + }, + { + "epoch": 2.492218017578125e-05, + "step": 16333, + "training_step_time": 0.10875463485717773 + }, + { + "epoch": 2.49237060546875e-05, + "model_forward_time": 0.025187969207763672, + "step": 16334 + }, + { + "epoch": 2.49237060546875e-05, + "step": 16334, + "training_step_time": 0.1756439208984375 + }, + { + "epoch": 2.492523193359375e-05, + "model_forward_time": 0.02461981773376465, + "step": 16335 + }, + { + "epoch": 2.492523193359375e-05, + "step": 16335, + "training_step_time": 0.1862964630126953 + }, + { + "epoch": 2.49267578125e-05, + "model_forward_time": 0.024547576904296875, + "step": 16336 + }, + { + "epoch": 2.49267578125e-05, + "step": 16336, + "training_step_time": 0.18993282318115234 + }, + { + "epoch": 2.492828369140625e-05, + "model_forward_time": 0.024155855178833008, + "step": 16337 + }, + { + "epoch": 2.492828369140625e-05, + "step": 16337, + "training_step_time": 0.17677855491638184 + }, + { + "epoch": 2.49298095703125e-05, + "model_forward_time": 0.024112701416015625, + "step": 16338 + }, + { + "epoch": 2.49298095703125e-05, + "step": 16338, + "training_step_time": 0.17261219024658203 + }, + { + "epoch": 2.493133544921875e-05, + "model_forward_time": 0.024508953094482422, + "step": 16339 + }, + { + "epoch": 2.493133544921875e-05, + "step": 16339, + "training_step_time": 0.15276527404785156 + }, + { + "epoch": 2.4932861328125e-05, + "grad_norm": 0.2060825675725937, + "learning_rate": 4.6750467937444115e-05, + "loss": 0.0092, + "step": 16340 + }, + { + "epoch": 2.4932861328125e-05, + "model_forward_time": 0.023807287216186523, + "step": 16340 + }, + { + "epoch": 2.4932861328125e-05, + "step": 16340, + "training_step_time": 0.13687825202941895 + }, + { + "epoch": 2.493438720703125e-05, + "model_forward_time": 0.024261951446533203, + "step": 16341 + }, + { + "epoch": 2.493438720703125e-05, + "step": 16341, + "training_step_time": 0.14150047302246094 + }, + { + "epoch": 2.49359130859375e-05, + "model_forward_time": 0.02357625961303711, + "step": 16342 + }, + { + "epoch": 2.49359130859375e-05, + "step": 16342, + "training_step_time": 0.1299598217010498 + }, + { + "epoch": 2.493743896484375e-05, + "model_forward_time": 0.023953676223754883, + "step": 16343 + }, + { + "epoch": 2.493743896484375e-05, + "step": 16343, + "training_step_time": 0.12199187278747559 + }, + { + "epoch": 2.493896484375e-05, + "model_forward_time": 0.024519681930541992, + "step": 16344 + }, + { + "epoch": 2.493896484375e-05, + "step": 16344, + "training_step_time": 0.15520262718200684 + }, + { + "epoch": 2.494049072265625e-05, + "model_forward_time": 0.024527549743652344, + "step": 16345 + }, + { + "epoch": 2.494049072265625e-05, + "step": 16345, + "training_step_time": 0.14768505096435547 + }, + { + "epoch": 2.49420166015625e-05, + "model_forward_time": 0.02425408363342285, + "step": 16346 + }, + { + "epoch": 2.49420166015625e-05, + "step": 16346, + "training_step_time": 0.112396240234375 + }, + { + "epoch": 2.494354248046875e-05, + "model_forward_time": 0.023823261260986328, + "step": 16347 + }, + { + "epoch": 2.494354248046875e-05, + "step": 16347, + "training_step_time": 0.11086869239807129 + }, + { + "epoch": 2.4945068359375e-05, + "model_forward_time": 0.025020122528076172, + "step": 16348 + }, + { + "epoch": 2.4945068359375e-05, + "step": 16348, + "training_step_time": 0.10997748374938965 + }, + { + "epoch": 2.494659423828125e-05, + "model_forward_time": 0.025551795959472656, + "step": 16349 + }, + { + "epoch": 2.494659423828125e-05, + "step": 16349, + "training_step_time": 0.10901117324829102 + }, + { + "epoch": 2.49481201171875e-05, + "grad_norm": 0.11935984343290329, + "learning_rate": 4.669547078371504e-05, + "loss": 0.0065, + "step": 16350 + }, + { + "epoch": 2.49481201171875e-05, + "model_forward_time": 0.025210142135620117, + "step": 16350 + }, + { + "epoch": 2.49481201171875e-05, + "step": 16350, + "training_step_time": 0.10570192337036133 + }, + { + "epoch": 2.494964599609375e-05, + "model_forward_time": 0.02564096450805664, + "step": 16351 + }, + { + "epoch": 2.494964599609375e-05, + "step": 16351, + "training_step_time": 0.10630607604980469 + }, + { + "epoch": 2.4951171875e-05, + "model_forward_time": 0.02502918243408203, + "step": 16352 + }, + { + "epoch": 2.4951171875e-05, + "step": 16352, + "training_step_time": 0.10651874542236328 + }, + { + "epoch": 2.495269775390625e-05, + "model_forward_time": 0.02509450912475586, + "step": 16353 + }, + { + "epoch": 2.495269775390625e-05, + "step": 16353, + "training_step_time": 0.10657787322998047 + }, + { + "epoch": 2.49542236328125e-05, + "model_forward_time": 0.025289058685302734, + "step": 16354 + }, + { + "epoch": 2.49542236328125e-05, + "step": 16354, + "training_step_time": 0.10852885246276855 + }, + { + "epoch": 2.495574951171875e-05, + "model_forward_time": 0.026223182678222656, + "step": 16355 + }, + { + "epoch": 2.495574951171875e-05, + "step": 16355, + "training_step_time": 0.10669922828674316 + }, + { + "epoch": 2.4957275390625e-05, + "model_forward_time": 0.025287628173828125, + "step": 16356 + }, + { + "epoch": 2.4957275390625e-05, + "step": 16356, + "training_step_time": 0.1076345443725586 + }, + { + "epoch": 2.495880126953125e-05, + "model_forward_time": 0.025488853454589844, + "step": 16357 + }, + { + "epoch": 2.495880126953125e-05, + "step": 16357, + "training_step_time": 0.10650825500488281 + }, + { + "epoch": 2.49603271484375e-05, + "model_forward_time": 0.023928403854370117, + "step": 16358 + }, + { + "epoch": 2.49603271484375e-05, + "step": 16358, + "training_step_time": 0.10686659812927246 + }, + { + "epoch": 2.496185302734375e-05, + "model_forward_time": 0.02517867088317871, + "step": 16359 + }, + { + "epoch": 2.496185302734375e-05, + "step": 16359, + "training_step_time": 0.10415863990783691 + }, + { + "epoch": 2.496337890625e-05, + "grad_norm": 0.2253582924604416, + "learning_rate": 4.664047764530055e-05, + "loss": 0.0135, + "step": 16360 + }, + { + "epoch": 2.496337890625e-05, + "model_forward_time": 0.025079727172851562, + "step": 16360 + }, + { + "epoch": 2.496337890625e-05, + "step": 16360, + "training_step_time": 0.10331606864929199 + }, + { + "epoch": 2.496490478515625e-05, + "model_forward_time": 0.025379419326782227, + "step": 16361 + }, + { + "epoch": 2.496490478515625e-05, + "step": 16361, + "training_step_time": 0.10523080825805664 + }, + { + "epoch": 2.49664306640625e-05, + "model_forward_time": 0.02500295639038086, + "step": 16362 + }, + { + "epoch": 2.49664306640625e-05, + "step": 16362, + "training_step_time": 0.8561389446258545 + }, + { + "epoch": 2.496795654296875e-05, + "model_forward_time": 0.022899866104125977, + "step": 16363 + }, + { + "epoch": 2.496795654296875e-05, + "step": 16363, + "training_step_time": 0.1220083236694336 + }, + { + "epoch": 2.4969482421875e-05, + "model_forward_time": 0.02422332763671875, + "step": 16364 + }, + { + "epoch": 2.4969482421875e-05, + "step": 16364, + "training_step_time": 0.18590235710144043 + }, + { + "epoch": 2.497100830078125e-05, + "model_forward_time": 0.02471017837524414, + "step": 16365 + }, + { + "epoch": 2.497100830078125e-05, + "step": 16365, + "training_step_time": 0.1924910545349121 + }, + { + "epoch": 2.49725341796875e-05, + "model_forward_time": 0.024799108505249023, + "step": 16366 + }, + { + "epoch": 2.49725341796875e-05, + "step": 16366, + "training_step_time": 0.17697477340698242 + }, + { + "epoch": 2.497406005859375e-05, + "model_forward_time": 0.024113178253173828, + "step": 16367 + }, + { + "epoch": 2.497406005859375e-05, + "step": 16367, + "training_step_time": 0.20777678489685059 + }, + { + "epoch": 2.49755859375e-05, + "model_forward_time": 0.024667739868164062, + "step": 16368 + }, + { + "epoch": 2.49755859375e-05, + "step": 16368, + "training_step_time": 0.10934662818908691 + }, + { + "epoch": 2.497711181640625e-05, + "model_forward_time": 0.024131059646606445, + "step": 16369 + }, + { + "epoch": 2.497711181640625e-05, + "step": 16369, + "training_step_time": 0.1007537841796875 + }, + { + "epoch": 2.49786376953125e-05, + "grad_norm": 0.1973024159669876, + "learning_rate": 4.65854885890225e-05, + "loss": 0.0171, + "step": 16370 + }, + { + "epoch": 2.49786376953125e-05, + "model_forward_time": 0.025710582733154297, + "step": 16370 + }, + { + "epoch": 2.49786376953125e-05, + "step": 16370, + "training_step_time": 0.10565590858459473 + }, + { + "epoch": 2.498016357421875e-05, + "model_forward_time": 0.025020360946655273, + "step": 16371 + }, + { + "epoch": 2.498016357421875e-05, + "step": 16371, + "training_step_time": 0.10427546501159668 + }, + { + "epoch": 2.4981689453125e-05, + "model_forward_time": 0.025077104568481445, + "step": 16372 + }, + { + "epoch": 2.4981689453125e-05, + "step": 16372, + "training_step_time": 0.10362625122070312 + }, + { + "epoch": 2.498321533203125e-05, + "model_forward_time": 0.02566361427307129, + "step": 16373 + }, + { + "epoch": 2.498321533203125e-05, + "step": 16373, + "training_step_time": 0.11781764030456543 + }, + { + "epoch": 2.49847412109375e-05, + "model_forward_time": 0.024481534957885742, + "step": 16374 + }, + { + "epoch": 2.49847412109375e-05, + "step": 16374, + "training_step_time": 0.1341838836669922 + }, + { + "epoch": 2.498626708984375e-05, + "model_forward_time": 0.02390313148498535, + "step": 16375 + }, + { + "epoch": 2.498626708984375e-05, + "step": 16375, + "training_step_time": 0.12933087348937988 + }, + { + "epoch": 2.498779296875e-05, + "model_forward_time": 0.023952007293701172, + "step": 16376 + }, + { + "epoch": 2.498779296875e-05, + "step": 16376, + "training_step_time": 0.11882829666137695 + }, + { + "epoch": 2.498931884765625e-05, + "model_forward_time": 0.024495363235473633, + "step": 16377 + }, + { + "epoch": 2.498931884765625e-05, + "step": 16377, + "training_step_time": 0.11801695823669434 + }, + { + "epoch": 2.49908447265625e-05, + "model_forward_time": 0.024218082427978516, + "step": 16378 + }, + { + "epoch": 2.49908447265625e-05, + "step": 16378, + "training_step_time": 0.11306476593017578 + }, + { + "epoch": 2.499237060546875e-05, + "model_forward_time": 0.024240493774414062, + "step": 16379 + }, + { + "epoch": 2.499237060546875e-05, + "step": 16379, + "training_step_time": 0.11334037780761719 + }, + { + "epoch": 2.4993896484375e-05, + "grad_norm": 0.18750359117984772, + "learning_rate": 4.65305036816978e-05, + "loss": 0.0112, + "step": 16380 + }, + { + "epoch": 2.4993896484375e-05, + "model_forward_time": 0.024095535278320312, + "step": 16380 + }, + { + "epoch": 2.4993896484375e-05, + "step": 16380, + "training_step_time": 0.1091609001159668 + }, + { + "epoch": 2.499542236328125e-05, + "model_forward_time": 0.024363994598388672, + "step": 16381 + }, + { + "epoch": 2.499542236328125e-05, + "step": 16381, + "training_step_time": 0.11114931106567383 + }, + { + "epoch": 2.49969482421875e-05, + "model_forward_time": 0.02513265609741211, + "step": 16382 + }, + { + "epoch": 2.49969482421875e-05, + "step": 16382, + "training_step_time": 0.14131546020507812 + }, + { + "epoch": 2.499847412109375e-05, + "model_forward_time": 0.025200605392456055, + "step": 16383 + }, + { + "epoch": 2.499847412109375e-05, + "step": 16383, + "training_step_time": 0.13706445693969727 + }, + { + "epoch": 2.5e-05, + "model_forward_time": 0.0248410701751709, + "step": 16384 + }, + { + "epoch": 2.5e-05, + "step": 16384, + "training_step_time": 0.10546612739562988 + }, + { + "epoch": 2.500152587890625e-05, + "model_forward_time": 0.02525472640991211, + "step": 16385 + }, + { + "epoch": 2.500152587890625e-05, + "step": 16385, + "training_step_time": 0.11875677108764648 + }, + { + "epoch": 2.50030517578125e-05, + "model_forward_time": 0.025292396545410156, + "step": 16386 + }, + { + "epoch": 2.50030517578125e-05, + "step": 16386, + "training_step_time": 0.11366009712219238 + }, + { + "epoch": 2.500457763671875e-05, + "model_forward_time": 0.025298357009887695, + "step": 16387 + }, + { + "epoch": 2.500457763671875e-05, + "step": 16387, + "training_step_time": 0.10927557945251465 + }, + { + "epoch": 2.5006103515625e-05, + "model_forward_time": 0.024949312210083008, + "step": 16388 + }, + { + "epoch": 2.5006103515625e-05, + "step": 16388, + "training_step_time": 0.19842839241027832 + }, + { + "epoch": 2.500762939453125e-05, + "model_forward_time": 0.02506852149963379, + "step": 16389 + }, + { + "epoch": 2.500762939453125e-05, + "step": 16389, + "training_step_time": 0.1086266040802002 + }, + { + "epoch": 2.50091552734375e-05, + "grad_norm": 0.21019835770130157, + "learning_rate": 4.647552299013828e-05, + "loss": 0.0173, + "step": 16390 + }, + { + "epoch": 2.50091552734375e-05, + "model_forward_time": 0.023973464965820312, + "step": 16390 + }, + { + "epoch": 2.50091552734375e-05, + "step": 16390, + "training_step_time": 0.10193824768066406 + }, + { + "epoch": 2.501068115234375e-05, + "model_forward_time": 0.025185108184814453, + "step": 16391 + }, + { + "epoch": 2.501068115234375e-05, + "step": 16391, + "training_step_time": 0.10439562797546387 + }, + { + "epoch": 2.501220703125e-05, + "model_forward_time": 0.025111675262451172, + "step": 16392 + }, + { + "epoch": 2.501220703125e-05, + "step": 16392, + "training_step_time": 0.10789251327514648 + }, + { + "epoch": 2.501373291015625e-05, + "model_forward_time": 0.02472543716430664, + "step": 16393 + }, + { + "epoch": 2.501373291015625e-05, + "step": 16393, + "training_step_time": 0.10443997383117676 + }, + { + "epoch": 2.50152587890625e-05, + "model_forward_time": 0.02505183219909668, + "step": 16394 + }, + { + "epoch": 2.50152587890625e-05, + "step": 16394, + "training_step_time": 0.10407376289367676 + }, + { + "epoch": 2.501678466796875e-05, + "model_forward_time": 0.025483369827270508, + "step": 16395 + }, + { + "epoch": 2.501678466796875e-05, + "step": 16395, + "training_step_time": 0.10547423362731934 + }, + { + "epoch": 2.5018310546875e-05, + "model_forward_time": 0.0256044864654541, + "step": 16396 + }, + { + "epoch": 2.5018310546875e-05, + "step": 16396, + "training_step_time": 0.1062936782836914 + }, + { + "epoch": 2.501983642578125e-05, + "model_forward_time": 0.025485992431640625, + "step": 16397 + }, + { + "epoch": 2.501983642578125e-05, + "step": 16397, + "training_step_time": 0.10991168022155762 + }, + { + "epoch": 2.50213623046875e-05, + "model_forward_time": 0.025321245193481445, + "step": 16398 + }, + { + "epoch": 2.50213623046875e-05, + "step": 16398, + "training_step_time": 0.10649967193603516 + }, + { + "epoch": 2.502288818359375e-05, + "model_forward_time": 0.025323152542114258, + "step": 16399 + }, + { + "epoch": 2.502288818359375e-05, + "step": 16399, + "training_step_time": 0.10945630073547363 + }, + { + "epoch": 2.50244140625e-05, + "grad_norm": 0.35515621304512024, + "learning_rate": 4.642054658115067e-05, + "loss": 0.0111, + "step": 16400 + }, + { + "epoch": 2.50244140625e-05, + "model_forward_time": 0.02395486831665039, + "step": 16400 + }, + { + "epoch": 2.50244140625e-05, + "step": 16400, + "training_step_time": 0.10367679595947266 + }, + { + "epoch": 2.502593994140625e-05, + "model_forward_time": 0.024101734161376953, + "step": 16401 + }, + { + "epoch": 2.502593994140625e-05, + "step": 16401, + "training_step_time": 0.10614180564880371 + }, + { + "epoch": 2.50274658203125e-05, + "model_forward_time": 0.02769947052001953, + "step": 16402 + }, + { + "epoch": 2.50274658203125e-05, + "step": 16402, + "training_step_time": 0.10841131210327148 + }, + { + "epoch": 2.502899169921875e-05, + "model_forward_time": 0.025760412216186523, + "step": 16403 + }, + { + "epoch": 2.502899169921875e-05, + "step": 16403, + "training_step_time": 0.10672712326049805 + }, + { + "epoch": 2.5030517578125e-05, + "model_forward_time": 0.025777101516723633, + "step": 16404 + }, + { + "epoch": 2.5030517578125e-05, + "step": 16404, + "training_step_time": 0.10654926300048828 + }, + { + "epoch": 2.503204345703125e-05, + "model_forward_time": 0.025480985641479492, + "step": 16405 + }, + { + "epoch": 2.503204345703125e-05, + "step": 16405, + "training_step_time": 0.10590529441833496 + }, + { + "epoch": 2.50335693359375e-05, + "model_forward_time": 0.025279521942138672, + "step": 16406 + }, + { + "epoch": 2.50335693359375e-05, + "step": 16406, + "training_step_time": 0.10498547554016113 + }, + { + "epoch": 2.503509521484375e-05, + "model_forward_time": 0.026096105575561523, + "step": 16407 + }, + { + "epoch": 2.503509521484375e-05, + "step": 16407, + "training_step_time": 0.1059730052947998 + }, + { + "epoch": 2.503662109375e-05, + "model_forward_time": 0.026453018188476562, + "step": 16408 + }, + { + "epoch": 2.503662109375e-05, + "step": 16408, + "training_step_time": 0.14661693572998047 + }, + { + "epoch": 2.503814697265625e-05, + "model_forward_time": 0.024920940399169922, + "step": 16409 + }, + { + "epoch": 2.503814697265625e-05, + "step": 16409, + "training_step_time": 0.20054221153259277 + }, + { + "epoch": 2.50396728515625e-05, + "grad_norm": 0.29080188274383545, + "learning_rate": 4.6365574521536445e-05, + "loss": 0.013, + "step": 16410 + }, + { + "epoch": 2.50396728515625e-05, + "model_forward_time": 0.024312734603881836, + "step": 16410 + }, + { + "epoch": 2.50396728515625e-05, + "step": 16410, + "training_step_time": 0.21537327766418457 + }, + { + "epoch": 2.504119873046875e-05, + "model_forward_time": 0.023350000381469727, + "step": 16411 + }, + { + "epoch": 2.504119873046875e-05, + "step": 16411, + "training_step_time": 0.1923351287841797 + }, + { + "epoch": 2.5042724609375e-05, + "model_forward_time": 0.02477264404296875, + "step": 16412 + }, + { + "epoch": 2.5042724609375e-05, + "step": 16412, + "training_step_time": 0.17514443397521973 + }, + { + "epoch": 2.504425048828125e-05, + "model_forward_time": 0.024207592010498047, + "step": 16413 + }, + { + "epoch": 2.504425048828125e-05, + "step": 16413, + "training_step_time": 0.19513440132141113 + }, + { + "epoch": 2.50457763671875e-05, + "model_forward_time": 0.024522066116333008, + "step": 16414 + }, + { + "epoch": 2.50457763671875e-05, + "step": 16414, + "training_step_time": 0.11477231979370117 + }, + { + "epoch": 2.504730224609375e-05, + "model_forward_time": 0.024348735809326172, + "step": 16415 + }, + { + "epoch": 2.504730224609375e-05, + "step": 16415, + "training_step_time": 0.10291814804077148 + }, + { + "epoch": 2.5048828125e-05, + "model_forward_time": 0.025489091873168945, + "step": 16416 + }, + { + "epoch": 2.5048828125e-05, + "step": 16416, + "training_step_time": 0.11387300491333008 + }, + { + "epoch": 2.505035400390625e-05, + "model_forward_time": 0.025764942169189453, + "step": 16417 + }, + { + "epoch": 2.505035400390625e-05, + "step": 16417, + "training_step_time": 0.11059021949768066 + }, + { + "epoch": 2.50518798828125e-05, + "model_forward_time": 0.025543212890625, + "step": 16418 + }, + { + "epoch": 2.50518798828125e-05, + "step": 16418, + "training_step_time": 0.11300849914550781 + }, + { + "epoch": 2.505340576171875e-05, + "model_forward_time": 0.02530503273010254, + "step": 16419 + }, + { + "epoch": 2.505340576171875e-05, + "step": 16419, + "training_step_time": 0.10860490798950195 + }, + { + "epoch": 2.5054931640625e-05, + "grad_norm": 0.4580700397491455, + "learning_rate": 4.631060687809191e-05, + "loss": 0.0124, + "step": 16420 + }, + { + "epoch": 2.5054931640625e-05, + "model_forward_time": 0.02600264549255371, + "step": 16420 + }, + { + "epoch": 2.5054931640625e-05, + "step": 16420, + "training_step_time": 0.10634422302246094 + }, + { + "epoch": 2.505645751953125e-05, + "model_forward_time": 0.025363683700561523, + "step": 16421 + }, + { + "epoch": 2.505645751953125e-05, + "step": 16421, + "training_step_time": 0.10703110694885254 + }, + { + "epoch": 2.50579833984375e-05, + "model_forward_time": 0.025494098663330078, + "step": 16422 + }, + { + "epoch": 2.50579833984375e-05, + "step": 16422, + "training_step_time": 0.9178316593170166 + }, + { + "epoch": 2.505950927734375e-05, + "model_forward_time": 0.02284836769104004, + "step": 16423 + }, + { + "epoch": 2.505950927734375e-05, + "step": 16423, + "training_step_time": 0.16357779502868652 + }, + { + "epoch": 2.506103515625e-05, + "model_forward_time": 0.02417278289794922, + "step": 16424 + }, + { + "epoch": 2.506103515625e-05, + "step": 16424, + "training_step_time": 0.15090036392211914 + }, + { + "epoch": 2.506256103515625e-05, + "model_forward_time": 0.023791790008544922, + "step": 16425 + }, + { + "epoch": 2.506256103515625e-05, + "step": 16425, + "training_step_time": 0.11537337303161621 + }, + { + "epoch": 2.50640869140625e-05, + "model_forward_time": 0.02497553825378418, + "step": 16426 + }, + { + "epoch": 2.50640869140625e-05, + "step": 16426, + "training_step_time": 0.10822105407714844 + }, + { + "epoch": 2.506561279296875e-05, + "model_forward_time": 0.02534937858581543, + "step": 16427 + }, + { + "epoch": 2.506561279296875e-05, + "step": 16427, + "training_step_time": 0.19776582717895508 + }, + { + "epoch": 2.5067138671875e-05, + "model_forward_time": 0.02429938316345215, + "step": 16428 + }, + { + "epoch": 2.5067138671875e-05, + "step": 16428, + "training_step_time": 0.10302066802978516 + }, + { + "epoch": 2.506866455078125e-05, + "model_forward_time": 0.024543046951293945, + "step": 16429 + }, + { + "epoch": 2.506866455078125e-05, + "step": 16429, + "training_step_time": 0.1020200252532959 + }, + { + "epoch": 2.50701904296875e-05, + "grad_norm": 0.3575209081172943, + "learning_rate": 4.625564371760791e-05, + "loss": 0.0162, + "step": 16430 + }, + { + "epoch": 2.50701904296875e-05, + "model_forward_time": 0.025224685668945312, + "step": 16430 + }, + { + "epoch": 2.50701904296875e-05, + "step": 16430, + "training_step_time": 0.1049802303314209 + }, + { + "epoch": 2.507171630859375e-05, + "model_forward_time": 0.025543689727783203, + "step": 16431 + }, + { + "epoch": 2.507171630859375e-05, + "step": 16431, + "training_step_time": 0.10553622245788574 + }, + { + "epoch": 2.50732421875e-05, + "model_forward_time": 0.025386333465576172, + "step": 16432 + }, + { + "epoch": 2.50732421875e-05, + "step": 16432, + "training_step_time": 0.10555195808410645 + }, + { + "epoch": 2.507476806640625e-05, + "model_forward_time": 0.025124549865722656, + "step": 16433 + }, + { + "epoch": 2.507476806640625e-05, + "step": 16433, + "training_step_time": 0.10695457458496094 + }, + { + "epoch": 2.50762939453125e-05, + "model_forward_time": 0.025166749954223633, + "step": 16434 + }, + { + "epoch": 2.50762939453125e-05, + "step": 16434, + "training_step_time": 0.10552239418029785 + }, + { + "epoch": 2.507781982421875e-05, + "model_forward_time": 0.025157451629638672, + "step": 16435 + }, + { + "epoch": 2.507781982421875e-05, + "step": 16435, + "training_step_time": 0.10619926452636719 + }, + { + "epoch": 2.5079345703125e-05, + "model_forward_time": 0.025560617446899414, + "step": 16436 + }, + { + "epoch": 2.5079345703125e-05, + "step": 16436, + "training_step_time": 0.10657930374145508 + }, + { + "epoch": 2.508087158203125e-05, + "model_forward_time": 0.025399208068847656, + "step": 16437 + }, + { + "epoch": 2.508087158203125e-05, + "step": 16437, + "training_step_time": 0.10880327224731445 + }, + { + "epoch": 2.50823974609375e-05, + "model_forward_time": 0.025549888610839844, + "step": 16438 + }, + { + "epoch": 2.50823974609375e-05, + "step": 16438, + "training_step_time": 0.10569071769714355 + }, + { + "epoch": 2.508392333984375e-05, + "model_forward_time": 0.025377750396728516, + "step": 16439 + }, + { + "epoch": 2.508392333984375e-05, + "step": 16439, + "training_step_time": 0.10698080062866211 + }, + { + "epoch": 2.508544921875e-05, + "grad_norm": 0.2672197222709656, + "learning_rate": 4.620068510686985e-05, + "loss": 0.0136, + "step": 16440 + }, + { + "epoch": 2.508544921875e-05, + "model_forward_time": 0.02522110939025879, + "step": 16440 + }, + { + "epoch": 2.508544921875e-05, + "step": 16440, + "training_step_time": 0.1342785358428955 + }, + { + "epoch": 2.508697509765625e-05, + "model_forward_time": 0.025295495986938477, + "step": 16441 + }, + { + "epoch": 2.508697509765625e-05, + "step": 16441, + "training_step_time": 0.1684868335723877 + }, + { + "epoch": 2.50885009765625e-05, + "model_forward_time": 0.024318218231201172, + "step": 16442 + }, + { + "epoch": 2.50885009765625e-05, + "step": 16442, + "training_step_time": 0.15961003303527832 + }, + { + "epoch": 2.509002685546875e-05, + "model_forward_time": 0.023906230926513672, + "step": 16443 + }, + { + "epoch": 2.509002685546875e-05, + "step": 16443, + "training_step_time": 0.140031099319458 + }, + { + "epoch": 2.5091552734375e-05, + "model_forward_time": 0.024425506591796875, + "step": 16444 + }, + { + "epoch": 2.5091552734375e-05, + "step": 16444, + "training_step_time": 0.13741731643676758 + }, + { + "epoch": 2.509307861328125e-05, + "model_forward_time": 0.0252687931060791, + "step": 16445 + }, + { + "epoch": 2.509307861328125e-05, + "step": 16445, + "training_step_time": 0.12011456489562988 + }, + { + "epoch": 2.50946044921875e-05, + "model_forward_time": 0.024673938751220703, + "step": 16446 + }, + { + "epoch": 2.50946044921875e-05, + "step": 16446, + "training_step_time": 0.1358191967010498 + }, + { + "epoch": 2.509613037109375e-05, + "model_forward_time": 0.024561643600463867, + "step": 16447 + }, + { + "epoch": 2.509613037109375e-05, + "step": 16447, + "training_step_time": 0.15794754028320312 + }, + { + "epoch": 2.509765625e-05, + "model_forward_time": 0.024929285049438477, + "step": 16448 + }, + { + "epoch": 2.509765625e-05, + "step": 16448, + "training_step_time": 0.19464588165283203 + }, + { + "epoch": 2.509918212890625e-05, + "model_forward_time": 0.024489641189575195, + "step": 16449 + }, + { + "epoch": 2.509918212890625e-05, + "step": 16449, + "training_step_time": 0.1565990447998047 + }, + { + "epoch": 2.51007080078125e-05, + "grad_norm": 0.32794129848480225, + "learning_rate": 4.6145731112657644e-05, + "loss": 0.0124, + "step": 16450 + }, + { + "epoch": 2.51007080078125e-05, + "model_forward_time": 0.024987220764160156, + "step": 16450 + }, + { + "epoch": 2.51007080078125e-05, + "step": 16450, + "training_step_time": 0.19224977493286133 + }, + { + "epoch": 2.510223388671875e-05, + "model_forward_time": 0.025913715362548828, + "step": 16451 + }, + { + "epoch": 2.510223388671875e-05, + "step": 16451, + "training_step_time": 0.1502819061279297 + }, + { + "epoch": 2.5103759765625e-05, + "model_forward_time": 0.02418661117553711, + "step": 16452 + }, + { + "epoch": 2.5103759765625e-05, + "step": 16452, + "training_step_time": 0.1773838996887207 + }, + { + "epoch": 2.510528564453125e-05, + "model_forward_time": 0.024291038513183594, + "step": 16453 + }, + { + "epoch": 2.510528564453125e-05, + "step": 16453, + "training_step_time": 0.10554051399230957 + }, + { + "epoch": 2.51068115234375e-05, + "model_forward_time": 0.024457693099975586, + "step": 16454 + }, + { + "epoch": 2.51068115234375e-05, + "step": 16454, + "training_step_time": 0.10473227500915527 + }, + { + "epoch": 2.510833740234375e-05, + "model_forward_time": 0.025315046310424805, + "step": 16455 + }, + { + "epoch": 2.510833740234375e-05, + "step": 16455, + "training_step_time": 0.10741019248962402 + }, + { + "epoch": 2.510986328125e-05, + "model_forward_time": 0.025682926177978516, + "step": 16456 + }, + { + "epoch": 2.510986328125e-05, + "step": 16456, + "training_step_time": 0.11321353912353516 + }, + { + "epoch": 2.511138916015625e-05, + "model_forward_time": 0.025025129318237305, + "step": 16457 + }, + { + "epoch": 2.511138916015625e-05, + "step": 16457, + "training_step_time": 0.10503840446472168 + }, + { + "epoch": 2.51129150390625e-05, + "model_forward_time": 0.025183439254760742, + "step": 16458 + }, + { + "epoch": 2.51129150390625e-05, + "step": 16458, + "training_step_time": 0.10402584075927734 + }, + { + "epoch": 2.511444091796875e-05, + "model_forward_time": 0.025130271911621094, + "step": 16459 + }, + { + "epoch": 2.511444091796875e-05, + "step": 16459, + "training_step_time": 0.10849952697753906 + }, + { + "epoch": 2.5115966796875e-05, + "grad_norm": 0.31576505303382874, + "learning_rate": 4.609078180174555e-05, + "loss": 0.0181, + "step": 16460 + }, + { + "epoch": 2.5115966796875e-05, + "model_forward_time": 0.025750398635864258, + "step": 16460 + }, + { + "epoch": 2.5115966796875e-05, + "step": 16460, + "training_step_time": 0.10608148574829102 + }, + { + "epoch": 2.511749267578125e-05, + "model_forward_time": 0.025476694107055664, + "step": 16461 + }, + { + "epoch": 2.511749267578125e-05, + "step": 16461, + "training_step_time": 0.10701918601989746 + }, + { + "epoch": 2.51190185546875e-05, + "model_forward_time": 0.026217937469482422, + "step": 16462 + }, + { + "epoch": 2.51190185546875e-05, + "step": 16462, + "training_step_time": 0.10647058486938477 + }, + { + "epoch": 2.512054443359375e-05, + "model_forward_time": 0.02526712417602539, + "step": 16463 + }, + { + "epoch": 2.512054443359375e-05, + "step": 16463, + "training_step_time": 0.10477113723754883 + }, + { + "epoch": 2.51220703125e-05, + "model_forward_time": 0.02513861656188965, + "step": 16464 + }, + { + "epoch": 2.51220703125e-05, + "step": 16464, + "training_step_time": 0.10711383819580078 + }, + { + "epoch": 2.512359619140625e-05, + "model_forward_time": 0.025604724884033203, + "step": 16465 + }, + { + "epoch": 2.512359619140625e-05, + "step": 16465, + "training_step_time": 0.11089730262756348 + }, + { + "epoch": 2.51251220703125e-05, + "model_forward_time": 0.02483367919921875, + "step": 16466 + }, + { + "epoch": 2.51251220703125e-05, + "step": 16466, + "training_step_time": 0.10695266723632812 + }, + { + "epoch": 2.512664794921875e-05, + "model_forward_time": 0.02513289451599121, + "step": 16467 + }, + { + "epoch": 2.512664794921875e-05, + "step": 16467, + "training_step_time": 0.1735219955444336 + }, + { + "epoch": 2.5128173828125e-05, + "model_forward_time": 0.025539636611938477, + "step": 16468 + }, + { + "epoch": 2.5128173828125e-05, + "step": 16468, + "training_step_time": 0.1385173797607422 + }, + { + "epoch": 2.512969970703125e-05, + "model_forward_time": 0.02483367919921875, + "step": 16469 + }, + { + "epoch": 2.512969970703125e-05, + "step": 16469, + "training_step_time": 0.10190558433532715 + }, + { + "epoch": 2.51312255859375e-05, + "grad_norm": 0.27324163913726807, + "learning_rate": 4.60358372409022e-05, + "loss": 0.011, + "step": 16470 + }, + { + "epoch": 2.51312255859375e-05, + "model_forward_time": 0.024628877639770508, + "step": 16470 + }, + { + "epoch": 2.51312255859375e-05, + "step": 16470, + "training_step_time": 0.11878824234008789 + }, + { + "epoch": 2.513275146484375e-05, + "model_forward_time": 0.025277376174926758, + "step": 16471 + }, + { + "epoch": 2.513275146484375e-05, + "step": 16471, + "training_step_time": 0.11488461494445801 + }, + { + "epoch": 2.513427734375e-05, + "model_forward_time": 0.02507805824279785, + "step": 16472 + }, + { + "epoch": 2.513427734375e-05, + "step": 16472, + "training_step_time": 0.10796904563903809 + }, + { + "epoch": 2.513580322265625e-05, + "model_forward_time": 0.02526092529296875, + "step": 16473 + }, + { + "epoch": 2.513580322265625e-05, + "step": 16473, + "training_step_time": 0.18858838081359863 + }, + { + "epoch": 2.51373291015625e-05, + "model_forward_time": 0.02453160285949707, + "step": 16474 + }, + { + "epoch": 2.51373291015625e-05, + "step": 16474, + "training_step_time": 0.1021580696105957 + }, + { + "epoch": 2.513885498046875e-05, + "model_forward_time": 0.024401187896728516, + "step": 16475 + }, + { + "epoch": 2.513885498046875e-05, + "step": 16475, + "training_step_time": 0.10171961784362793 + }, + { + "epoch": 2.5140380859375e-05, + "model_forward_time": 0.0251007080078125, + "step": 16476 + }, + { + "epoch": 2.5140380859375e-05, + "step": 16476, + "training_step_time": 0.10509967803955078 + }, + { + "epoch": 2.514190673828125e-05, + "model_forward_time": 0.024897336959838867, + "step": 16477 + }, + { + "epoch": 2.514190673828125e-05, + "step": 16477, + "training_step_time": 0.10453486442565918 + }, + { + "epoch": 2.51434326171875e-05, + "model_forward_time": 0.02522420883178711, + "step": 16478 + }, + { + "epoch": 2.51434326171875e-05, + "step": 16478, + "training_step_time": 0.10608386993408203 + }, + { + "epoch": 2.514495849609375e-05, + "model_forward_time": 0.025086402893066406, + "step": 16479 + }, + { + "epoch": 2.514495849609375e-05, + "step": 16479, + "training_step_time": 0.10670804977416992 + }, + { + "epoch": 2.5146484375e-05, + "grad_norm": 0.36495304107666016, + "learning_rate": 4.598089749689041e-05, + "loss": 0.0188, + "step": 16480 + }, + { + "epoch": 2.5146484375e-05, + "model_forward_time": 0.025043725967407227, + "step": 16480 + }, + { + "epoch": 2.5146484375e-05, + "step": 16480, + "training_step_time": 0.11182355880737305 + }, + { + "epoch": 2.514801025390625e-05, + "model_forward_time": 0.025373220443725586, + "step": 16481 + }, + { + "epoch": 2.514801025390625e-05, + "step": 16481, + "training_step_time": 0.11329817771911621 + }, + { + "epoch": 2.51495361328125e-05, + "model_forward_time": 0.025304079055786133, + "step": 16482 + }, + { + "epoch": 2.51495361328125e-05, + "step": 16482, + "training_step_time": 0.11450672149658203 + }, + { + "epoch": 2.515106201171875e-05, + "model_forward_time": 0.025184154510498047, + "step": 16483 + }, + { + "epoch": 2.515106201171875e-05, + "step": 16483, + "training_step_time": 0.1051628589630127 + }, + { + "epoch": 2.5152587890625e-05, + "model_forward_time": 0.02497553825378418, + "step": 16484 + }, + { + "epoch": 2.5152587890625e-05, + "step": 16484, + "training_step_time": 0.10339093208312988 + }, + { + "epoch": 2.515411376953125e-05, + "model_forward_time": 0.024957895278930664, + "step": 16485 + }, + { + "epoch": 2.515411376953125e-05, + "step": 16485, + "training_step_time": 0.10459566116333008 + }, + { + "epoch": 2.51556396484375e-05, + "model_forward_time": 0.025227785110473633, + "step": 16486 + }, + { + "epoch": 2.51556396484375e-05, + "step": 16486, + "training_step_time": 0.10567498207092285 + }, + { + "epoch": 2.515716552734375e-05, + "model_forward_time": 0.025365591049194336, + "step": 16487 + }, + { + "epoch": 2.515716552734375e-05, + "step": 16487, + "training_step_time": 0.10596203804016113 + }, + { + "epoch": 2.515869140625e-05, + "model_forward_time": 0.025220394134521484, + "step": 16488 + }, + { + "epoch": 2.515869140625e-05, + "step": 16488, + "training_step_time": 0.1044306755065918 + }, + { + "epoch": 2.516021728515625e-05, + "model_forward_time": 0.0256500244140625, + "step": 16489 + }, + { + "epoch": 2.516021728515625e-05, + "step": 16489, + "training_step_time": 0.10474944114685059 + }, + { + "epoch": 2.51617431640625e-05, + "grad_norm": 0.27902287244796753, + "learning_rate": 4.5925962636467126e-05, + "loss": 0.0157, + "step": 16490 + }, + { + "epoch": 2.51617431640625e-05, + "model_forward_time": 0.025027036666870117, + "step": 16490 + }, + { + "epoch": 2.51617431640625e-05, + "step": 16490, + "training_step_time": 0.10479950904846191 + }, + { + "epoch": 2.516326904296875e-05, + "model_forward_time": 0.025645971298217773, + "step": 16491 + }, + { + "epoch": 2.516326904296875e-05, + "step": 16491, + "training_step_time": 0.10541296005249023 + }, + { + "epoch": 2.5164794921875e-05, + "model_forward_time": 0.02505207061767578, + "step": 16492 + }, + { + "epoch": 2.5164794921875e-05, + "step": 16492, + "training_step_time": 0.10613512992858887 + }, + { + "epoch": 2.516632080078125e-05, + "model_forward_time": 0.02463698387145996, + "step": 16493 + }, + { + "epoch": 2.516632080078125e-05, + "step": 16493, + "training_step_time": 0.1469099521636963 + }, + { + "epoch": 2.51678466796875e-05, + "model_forward_time": 0.024808168411254883, + "step": 16494 + }, + { + "epoch": 2.51678466796875e-05, + "step": 16494, + "training_step_time": 0.16462421417236328 + }, + { + "epoch": 2.516937255859375e-05, + "model_forward_time": 0.02490520477294922, + "step": 16495 + }, + { + "epoch": 2.516937255859375e-05, + "step": 16495, + "training_step_time": 0.12042093276977539 + }, + { + "epoch": 2.51708984375e-05, + "model_forward_time": 0.024792194366455078, + "step": 16496 + }, + { + "epoch": 2.51708984375e-05, + "step": 16496, + "training_step_time": 0.1595468521118164 + }, + { + "epoch": 2.517242431640625e-05, + "model_forward_time": 0.024524688720703125, + "step": 16497 + }, + { + "epoch": 2.517242431640625e-05, + "step": 16497, + "training_step_time": 0.16651201248168945 + }, + { + "epoch": 2.51739501953125e-05, + "model_forward_time": 0.024406909942626953, + "step": 16498 + }, + { + "epoch": 2.51739501953125e-05, + "step": 16498, + "training_step_time": 0.1763901710510254 + }, + { + "epoch": 2.517547607421875e-05, + "model_forward_time": 0.02459883689880371, + "step": 16499 + }, + { + "epoch": 2.517547607421875e-05, + "step": 16499, + "training_step_time": 0.1804189682006836 + }, + { + "epoch": 2.5177001953125e-05, + "grad_norm": 0.2936546802520752, + "learning_rate": 4.5871032726383386e-05, + "loss": 0.0093, + "step": 16500 + }, + { + "epoch": 2.5177001953125e-05, + "model_forward_time": 0.024168729782104492, + "step": 16500 + }, + { + "epoch": 2.5177001953125e-05, + "step": 16500, + "training_step_time": 0.10477113723754883 + }, + { + "epoch": 2.517852783203125e-05, + "model_forward_time": 0.02460312843322754, + "step": 16501 + }, + { + "epoch": 2.517852783203125e-05, + "step": 16501, + "training_step_time": 0.10196876525878906 + }, + { + "epoch": 2.51800537109375e-05, + "model_forward_time": 0.025241613388061523, + "step": 16502 + }, + { + "epoch": 2.51800537109375e-05, + "step": 16502, + "training_step_time": 0.10885930061340332 + }, + { + "epoch": 2.518157958984375e-05, + "model_forward_time": 0.025164365768432617, + "step": 16503 + }, + { + "epoch": 2.518157958984375e-05, + "step": 16503, + "training_step_time": 0.10882854461669922 + }, + { + "epoch": 2.518310546875e-05, + "model_forward_time": 0.025022506713867188, + "step": 16504 + }, + { + "epoch": 2.518310546875e-05, + "step": 16504, + "training_step_time": 0.1059410572052002 + }, + { + "epoch": 2.518463134765625e-05, + "model_forward_time": 0.02507948875427246, + "step": 16505 + }, + { + "epoch": 2.518463134765625e-05, + "step": 16505, + "training_step_time": 0.11070466041564941 + }, + { + "epoch": 2.51861572265625e-05, + "model_forward_time": 0.025187969207763672, + "step": 16506 + }, + { + "epoch": 2.51861572265625e-05, + "step": 16506, + "training_step_time": 0.12397480010986328 + }, + { + "epoch": 2.518768310546875e-05, + "model_forward_time": 0.024918556213378906, + "step": 16507 + }, + { + "epoch": 2.518768310546875e-05, + "step": 16507, + "training_step_time": 0.11527156829833984 + }, + { + "epoch": 2.5189208984375e-05, + "model_forward_time": 0.024707794189453125, + "step": 16508 + }, + { + "epoch": 2.5189208984375e-05, + "step": 16508, + "training_step_time": 0.11465907096862793 + }, + { + "epoch": 2.519073486328125e-05, + "model_forward_time": 0.02501821517944336, + "step": 16509 + }, + { + "epoch": 2.519073486328125e-05, + "step": 16509, + "training_step_time": 0.11523771286010742 + }, + { + "epoch": 2.51922607421875e-05, + "grad_norm": 0.30701643228530884, + "learning_rate": 4.5816107833384234e-05, + "loss": 0.0117, + "step": 16510 + }, + { + "epoch": 2.51922607421875e-05, + "model_forward_time": 0.02506399154663086, + "step": 16510 + }, + { + "epoch": 2.51922607421875e-05, + "step": 16510, + "training_step_time": 0.11409473419189453 + }, + { + "epoch": 2.519378662109375e-05, + "model_forward_time": 0.0250399112701416, + "step": 16511 + }, + { + "epoch": 2.519378662109375e-05, + "step": 16511, + "training_step_time": 0.11527800559997559 + }, + { + "epoch": 2.51953125e-05, + "model_forward_time": 0.024981260299682617, + "step": 16512 + }, + { + "epoch": 2.51953125e-05, + "step": 16512, + "training_step_time": 0.10932755470275879 + }, + { + "epoch": 2.519683837890625e-05, + "model_forward_time": 0.025107622146606445, + "step": 16513 + }, + { + "epoch": 2.519683837890625e-05, + "step": 16513, + "training_step_time": 0.10806441307067871 + }, + { + "epoch": 2.51983642578125e-05, + "model_forward_time": 0.02554798126220703, + "step": 16514 + }, + { + "epoch": 2.51983642578125e-05, + "step": 16514, + "training_step_time": 0.1866769790649414 + }, + { + "epoch": 2.519989013671875e-05, + "model_forward_time": 0.0245513916015625, + "step": 16515 + }, + { + "epoch": 2.519989013671875e-05, + "step": 16515, + "training_step_time": 0.1503283977508545 + }, + { + "epoch": 2.5201416015625e-05, + "model_forward_time": 0.026794910430908203, + "step": 16516 + }, + { + "epoch": 2.5201416015625e-05, + "step": 16516, + "training_step_time": 0.10719966888427734 + }, + { + "epoch": 2.520294189453125e-05, + "model_forward_time": 0.02485489845275879, + "step": 16517 + }, + { + "epoch": 2.520294189453125e-05, + "step": 16517, + "training_step_time": 0.10767030715942383 + }, + { + "epoch": 2.52044677734375e-05, + "model_forward_time": 0.02543783187866211, + "step": 16518 + }, + { + "epoch": 2.52044677734375e-05, + "step": 16518, + "training_step_time": 0.11243295669555664 + }, + { + "epoch": 2.520599365234375e-05, + "model_forward_time": 0.025179147720336914, + "step": 16519 + }, + { + "epoch": 2.520599365234375e-05, + "step": 16519, + "training_step_time": 0.10486125946044922 + }, + { + "epoch": 2.520751953125e-05, + "grad_norm": 0.2752593457698822, + "learning_rate": 4.576118802420856e-05, + "loss": 0.0147, + "step": 16520 + }, + { + "epoch": 2.520751953125e-05, + "model_forward_time": 0.025122404098510742, + "step": 16520 + }, + { + "epoch": 2.520751953125e-05, + "step": 16520, + "training_step_time": 0.19008755683898926 + }, + { + "epoch": 2.520904541015625e-05, + "model_forward_time": 0.024200439453125, + "step": 16521 + }, + { + "epoch": 2.520904541015625e-05, + "step": 16521, + "training_step_time": 0.10243892669677734 + }, + { + "epoch": 2.52105712890625e-05, + "model_forward_time": 0.024297237396240234, + "step": 16522 + }, + { + "epoch": 2.52105712890625e-05, + "step": 16522, + "training_step_time": 0.10262656211853027 + }, + { + "epoch": 2.521209716796875e-05, + "model_forward_time": 0.02601146697998047, + "step": 16523 + }, + { + "epoch": 2.521209716796875e-05, + "step": 16523, + "training_step_time": 0.10913658142089844 + }, + { + "epoch": 2.5213623046875e-05, + "model_forward_time": 0.02550530433654785, + "step": 16524 + }, + { + "epoch": 2.5213623046875e-05, + "step": 16524, + "training_step_time": 0.10615849494934082 + }, + { + "epoch": 2.521514892578125e-05, + "model_forward_time": 0.0251462459564209, + "step": 16525 + }, + { + "epoch": 2.521514892578125e-05, + "step": 16525, + "training_step_time": 0.1041104793548584 + }, + { + "epoch": 2.52166748046875e-05, + "model_forward_time": 0.02687358856201172, + "step": 16526 + }, + { + "epoch": 2.52166748046875e-05, + "step": 16526, + "training_step_time": 0.1058659553527832 + }, + { + "epoch": 2.521820068359375e-05, + "model_forward_time": 0.024223804473876953, + "step": 16527 + }, + { + "epoch": 2.521820068359375e-05, + "step": 16527, + "training_step_time": 0.8029687404632568 + }, + { + "epoch": 2.52197265625e-05, + "model_forward_time": 0.022718429565429688, + "step": 16528 + }, + { + "epoch": 2.52197265625e-05, + "step": 16528, + "training_step_time": 0.09745955467224121 + }, + { + "epoch": 2.522125244140625e-05, + "model_forward_time": 0.02447056770324707, + "step": 16529 + }, + { + "epoch": 2.522125244140625e-05, + "step": 16529, + "training_step_time": 0.1030728816986084 + }, + { + "epoch": 2.52227783203125e-05, + "grad_norm": 0.4050087034702301, + "learning_rate": 4.570627336558915e-05, + "loss": 0.0135, + "step": 16530 + }, + { + "epoch": 2.52227783203125e-05, + "model_forward_time": 0.025234699249267578, + "step": 16530 + }, + { + "epoch": 2.52227783203125e-05, + "step": 16530, + "training_step_time": 0.10991477966308594 + }, + { + "epoch": 2.522430419921875e-05, + "model_forward_time": 0.025871753692626953, + "step": 16531 + }, + { + "epoch": 2.522430419921875e-05, + "step": 16531, + "training_step_time": 0.11064910888671875 + }, + { + "epoch": 2.5225830078125e-05, + "model_forward_time": 0.025597572326660156, + "step": 16532 + }, + { + "epoch": 2.5225830078125e-05, + "step": 16532, + "training_step_time": 0.10509490966796875 + }, + { + "epoch": 2.522735595703125e-05, + "model_forward_time": 0.025438308715820312, + "step": 16533 + }, + { + "epoch": 2.522735595703125e-05, + "step": 16533, + "training_step_time": 0.10590982437133789 + }, + { + "epoch": 2.52288818359375e-05, + "model_forward_time": 0.025182485580444336, + "step": 16534 + }, + { + "epoch": 2.52288818359375e-05, + "step": 16534, + "training_step_time": 0.13657116889953613 + }, + { + "epoch": 2.523040771484375e-05, + "model_forward_time": 0.0258944034576416, + "step": 16535 + }, + { + "epoch": 2.523040771484375e-05, + "step": 16535, + "training_step_time": 0.1272275447845459 + }, + { + "epoch": 2.523193359375e-05, + "model_forward_time": 0.025060415267944336, + "step": 16536 + }, + { + "epoch": 2.523193359375e-05, + "step": 16536, + "training_step_time": 0.1841285228729248 + }, + { + "epoch": 2.523345947265625e-05, + "model_forward_time": 0.02487468719482422, + "step": 16537 + }, + { + "epoch": 2.523345947265625e-05, + "step": 16537, + "training_step_time": 0.18292737007141113 + }, + { + "epoch": 2.52349853515625e-05, + "model_forward_time": 0.024478912353515625, + "step": 16538 + }, + { + "epoch": 2.52349853515625e-05, + "step": 16538, + "training_step_time": 0.1664283275604248 + }, + { + "epoch": 2.523651123046875e-05, + "model_forward_time": 0.02479100227355957, + "step": 16539 + }, + { + "epoch": 2.523651123046875e-05, + "step": 16539, + "training_step_time": 0.13039326667785645 + }, + { + "epoch": 2.5238037109375e-05, + "grad_norm": 0.5051366090774536, + "learning_rate": 4.565136392425247e-05, + "loss": 0.0114, + "step": 16540 + }, + { + "epoch": 2.5238037109375e-05, + "model_forward_time": 0.02457594871520996, + "step": 16540 + }, + { + "epoch": 2.5238037109375e-05, + "step": 16540, + "training_step_time": 0.12413620948791504 + }, + { + "epoch": 2.523956298828125e-05, + "model_forward_time": 0.02474689483642578, + "step": 16541 + }, + { + "epoch": 2.523956298828125e-05, + "step": 16541, + "training_step_time": 0.1603560447692871 + }, + { + "epoch": 2.52410888671875e-05, + "model_forward_time": 0.024825334548950195, + "step": 16542 + }, + { + "epoch": 2.52410888671875e-05, + "step": 16542, + "training_step_time": 0.10730910301208496 + }, + { + "epoch": 2.524261474609375e-05, + "model_forward_time": 0.024782419204711914, + "step": 16543 + }, + { + "epoch": 2.524261474609375e-05, + "step": 16543, + "training_step_time": 0.10634422302246094 + }, + { + "epoch": 2.5244140625e-05, + "model_forward_time": 0.024883031845092773, + "step": 16544 + }, + { + "epoch": 2.5244140625e-05, + "step": 16544, + "training_step_time": 0.10681390762329102 + }, + { + "epoch": 2.524566650390625e-05, + "model_forward_time": 0.026651620864868164, + "step": 16545 + }, + { + "epoch": 2.524566650390625e-05, + "step": 16545, + "training_step_time": 0.14073514938354492 + }, + { + "epoch": 2.52471923828125e-05, + "model_forward_time": 0.02414870262145996, + "step": 16546 + }, + { + "epoch": 2.52471923828125e-05, + "step": 16546, + "training_step_time": 0.14812803268432617 + }, + { + "epoch": 2.524871826171875e-05, + "model_forward_time": 0.024050235748291016, + "step": 16547 + }, + { + "epoch": 2.524871826171875e-05, + "step": 16547, + "training_step_time": 0.140455961227417 + }, + { + "epoch": 2.5250244140625e-05, + "model_forward_time": 0.023543834686279297, + "step": 16548 + }, + { + "epoch": 2.5250244140625e-05, + "step": 16548, + "training_step_time": 0.13102316856384277 + }, + { + "epoch": 2.525177001953125e-05, + "model_forward_time": 0.02336430549621582, + "step": 16549 + }, + { + "epoch": 2.525177001953125e-05, + "step": 16549, + "training_step_time": 0.1204683780670166 + }, + { + "epoch": 2.52532958984375e-05, + "grad_norm": 0.3015366494655609, + "learning_rate": 4.559645976691868e-05, + "loss": 0.0118, + "step": 16550 + }, + { + "epoch": 2.52532958984375e-05, + "model_forward_time": 0.024013757705688477, + "step": 16550 + }, + { + "epoch": 2.52532958984375e-05, + "step": 16550, + "training_step_time": 0.12308359146118164 + }, + { + "epoch": 2.525482177734375e-05, + "model_forward_time": 0.02369999885559082, + "step": 16551 + }, + { + "epoch": 2.525482177734375e-05, + "step": 16551, + "training_step_time": 0.12003612518310547 + }, + { + "epoch": 2.525634765625e-05, + "model_forward_time": 0.024049043655395508, + "step": 16552 + }, + { + "epoch": 2.525634765625e-05, + "step": 16552, + "training_step_time": 0.1148366928100586 + }, + { + "epoch": 2.525787353515625e-05, + "model_forward_time": 0.024448871612548828, + "step": 16553 + }, + { + "epoch": 2.525787353515625e-05, + "step": 16553, + "training_step_time": 0.11223649978637695 + }, + { + "epoch": 2.52593994140625e-05, + "model_forward_time": 0.024054765701293945, + "step": 16554 + }, + { + "epoch": 2.52593994140625e-05, + "step": 16554, + "training_step_time": 0.10934281349182129 + }, + { + "epoch": 2.526092529296875e-05, + "model_forward_time": 0.0249936580657959, + "step": 16555 + }, + { + "epoch": 2.526092529296875e-05, + "step": 16555, + "training_step_time": 0.11856794357299805 + }, + { + "epoch": 2.5262451171875e-05, + "model_forward_time": 0.024978160858154297, + "step": 16556 + }, + { + "epoch": 2.5262451171875e-05, + "step": 16556, + "training_step_time": 0.12755894660949707 + }, + { + "epoch": 2.526397705078125e-05, + "model_forward_time": 0.025196552276611328, + "step": 16557 + }, + { + "epoch": 2.526397705078125e-05, + "step": 16557, + "training_step_time": 0.10776114463806152 + }, + { + "epoch": 2.52655029296875e-05, + "model_forward_time": 0.02548074722290039, + "step": 16558 + }, + { + "epoch": 2.52655029296875e-05, + "step": 16558, + "training_step_time": 0.11733388900756836 + }, + { + "epoch": 2.526702880859375e-05, + "model_forward_time": 0.024979352951049805, + "step": 16559 + }, + { + "epoch": 2.526702880859375e-05, + "step": 16559, + "training_step_time": 0.11196684837341309 + }, + { + "epoch": 2.52685546875e-05, + "grad_norm": 0.28567254543304443, + "learning_rate": 4.554156096030149e-05, + "loss": 0.0161, + "step": 16560 + }, + { + "epoch": 2.52685546875e-05, + "model_forward_time": 0.025266647338867188, + "step": 16560 + }, + { + "epoch": 2.52685546875e-05, + "step": 16560, + "training_step_time": 0.10566043853759766 + }, + { + "epoch": 2.527008056640625e-05, + "model_forward_time": 0.025098323822021484, + "step": 16561 + }, + { + "epoch": 2.527008056640625e-05, + "step": 16561, + "training_step_time": 0.19541358947753906 + }, + { + "epoch": 2.52716064453125e-05, + "model_forward_time": 0.024399995803833008, + "step": 16562 + }, + { + "epoch": 2.52716064453125e-05, + "step": 16562, + "training_step_time": 0.10361933708190918 + }, + { + "epoch": 2.527313232421875e-05, + "model_forward_time": 0.024165868759155273, + "step": 16563 + }, + { + "epoch": 2.527313232421875e-05, + "step": 16563, + "training_step_time": 0.10366702079772949 + }, + { + "epoch": 2.5274658203125e-05, + "model_forward_time": 0.02518010139465332, + "step": 16564 + }, + { + "epoch": 2.5274658203125e-05, + "step": 16564, + "training_step_time": 0.10549187660217285 + }, + { + "epoch": 2.527618408203125e-05, + "model_forward_time": 0.026192903518676758, + "step": 16565 + }, + { + "epoch": 2.527618408203125e-05, + "step": 16565, + "training_step_time": 0.10967206954956055 + }, + { + "epoch": 2.52777099609375e-05, + "model_forward_time": 0.02512669563293457, + "step": 16566 + }, + { + "epoch": 2.52777099609375e-05, + "step": 16566, + "training_step_time": 0.10818696022033691 + }, + { + "epoch": 2.527923583984375e-05, + "model_forward_time": 0.028029680252075195, + "step": 16567 + }, + { + "epoch": 2.527923583984375e-05, + "step": 16567, + "training_step_time": 0.10882425308227539 + }, + { + "epoch": 2.528076171875e-05, + "model_forward_time": 0.026189088821411133, + "step": 16568 + }, + { + "epoch": 2.528076171875e-05, + "step": 16568, + "training_step_time": 0.10634517669677734 + }, + { + "epoch": 2.528228759765625e-05, + "model_forward_time": 0.025264501571655273, + "step": 16569 + }, + { + "epoch": 2.528228759765625e-05, + "step": 16569, + "training_step_time": 0.10529017448425293 + }, + { + "epoch": 2.52838134765625e-05, + "grad_norm": 0.1987486630678177, + "learning_rate": 4.548666757110812e-05, + "loss": 0.013, + "step": 16570 + }, + { + "epoch": 2.52838134765625e-05, + "model_forward_time": 0.02504134178161621, + "step": 16570 + }, + { + "epoch": 2.52838134765625e-05, + "step": 16570, + "training_step_time": 0.10690069198608398 + }, + { + "epoch": 2.528533935546875e-05, + "model_forward_time": 0.024866104125976562, + "step": 16571 + }, + { + "epoch": 2.528533935546875e-05, + "step": 16571, + "training_step_time": 0.11589813232421875 + }, + { + "epoch": 2.5286865234375e-05, + "model_forward_time": 0.024870634078979492, + "step": 16572 + }, + { + "epoch": 2.5286865234375e-05, + "step": 16572, + "training_step_time": 0.1053609848022461 + }, + { + "epoch": 2.528839111328125e-05, + "model_forward_time": 0.02532672882080078, + "step": 16573 + }, + { + "epoch": 2.528839111328125e-05, + "step": 16573, + "training_step_time": 0.10715961456298828 + }, + { + "epoch": 2.52899169921875e-05, + "model_forward_time": 0.027630090713500977, + "step": 16574 + }, + { + "epoch": 2.52899169921875e-05, + "step": 16574, + "training_step_time": 0.10832548141479492 + }, + { + "epoch": 2.529144287109375e-05, + "model_forward_time": 0.024944543838500977, + "step": 16575 + }, + { + "epoch": 2.529144287109375e-05, + "step": 16575, + "training_step_time": 0.10603857040405273 + }, + { + "epoch": 2.529296875e-05, + "model_forward_time": 0.025362253189086914, + "step": 16576 + }, + { + "epoch": 2.529296875e-05, + "step": 16576, + "training_step_time": 0.10519742965698242 + }, + { + "epoch": 2.529449462890625e-05, + "model_forward_time": 0.024924755096435547, + "step": 16577 + }, + { + "epoch": 2.529449462890625e-05, + "step": 16577, + "training_step_time": 0.10332179069519043 + }, + { + "epoch": 2.52960205078125e-05, + "model_forward_time": 0.024797677993774414, + "step": 16578 + }, + { + "epoch": 2.52960205078125e-05, + "step": 16578, + "training_step_time": 0.1048271656036377 + }, + { + "epoch": 2.529754638671875e-05, + "model_forward_time": 0.02509617805480957, + "step": 16579 + }, + { + "epoch": 2.529754638671875e-05, + "step": 16579, + "training_step_time": 0.1047818660736084 + }, + { + "epoch": 2.5299072265625e-05, + "grad_norm": 0.1858363002538681, + "learning_rate": 4.543177966603925e-05, + "loss": 0.0188, + "step": 16580 + }, + { + "epoch": 2.5299072265625e-05, + "model_forward_time": 0.02523326873779297, + "step": 16580 + }, + { + "epoch": 2.5299072265625e-05, + "step": 16580, + "training_step_time": 0.10466265678405762 + }, + { + "epoch": 2.530059814453125e-05, + "model_forward_time": 0.02450704574584961, + "step": 16581 + }, + { + "epoch": 2.530059814453125e-05, + "step": 16581, + "training_step_time": 0.11696004867553711 + }, + { + "epoch": 2.53021240234375e-05, + "model_forward_time": 0.02724623680114746, + "step": 16582 + }, + { + "epoch": 2.53021240234375e-05, + "step": 16582, + "training_step_time": 0.12034058570861816 + }, + { + "epoch": 2.530364990234375e-05, + "model_forward_time": 0.025170087814331055, + "step": 16583 + }, + { + "epoch": 2.530364990234375e-05, + "step": 16583, + "training_step_time": 0.14411425590515137 + }, + { + "epoch": 2.530517578125e-05, + "model_forward_time": 0.024528026580810547, + "step": 16584 + }, + { + "epoch": 2.530517578125e-05, + "step": 16584, + "training_step_time": 0.2190229892730713 + }, + { + "epoch": 2.530670166015625e-05, + "model_forward_time": 0.02458977699279785, + "step": 16585 + }, + { + "epoch": 2.530670166015625e-05, + "step": 16585, + "training_step_time": 0.13709235191345215 + }, + { + "epoch": 2.53082275390625e-05, + "model_forward_time": 0.024309873580932617, + "step": 16586 + }, + { + "epoch": 2.53082275390625e-05, + "step": 16586, + "training_step_time": 0.11889171600341797 + }, + { + "epoch": 2.530975341796875e-05, + "model_forward_time": 0.024812698364257812, + "step": 16587 + }, + { + "epoch": 2.530975341796875e-05, + "step": 16587, + "training_step_time": 0.12322020530700684 + }, + { + "epoch": 2.5311279296875e-05, + "model_forward_time": 0.02521991729736328, + "step": 16588 + }, + { + "epoch": 2.5311279296875e-05, + "step": 16588, + "training_step_time": 0.11711835861206055 + }, + { + "epoch": 2.531280517578125e-05, + "model_forward_time": 0.025865554809570312, + "step": 16589 + }, + { + "epoch": 2.531280517578125e-05, + "step": 16589, + "training_step_time": 0.10886645317077637 + }, + { + "epoch": 2.53143310546875e-05, + "grad_norm": 0.2656418979167938, + "learning_rate": 4.537689731178883e-05, + "loss": 0.0079, + "step": 16590 + }, + { + "epoch": 2.53143310546875e-05, + "model_forward_time": 0.024789094924926758, + "step": 16590 + }, + { + "epoch": 2.53143310546875e-05, + "step": 16590, + "training_step_time": 0.10387253761291504 + }, + { + "epoch": 2.531585693359375e-05, + "model_forward_time": 0.02501368522644043, + "step": 16591 + }, + { + "epoch": 2.531585693359375e-05, + "step": 16591, + "training_step_time": 0.10611248016357422 + }, + { + "epoch": 2.53173828125e-05, + "model_forward_time": 0.0245816707611084, + "step": 16592 + }, + { + "epoch": 2.53173828125e-05, + "step": 16592, + "training_step_time": 0.1059575080871582 + }, + { + "epoch": 2.531890869140625e-05, + "model_forward_time": 0.02487659454345703, + "step": 16593 + }, + { + "epoch": 2.531890869140625e-05, + "step": 16593, + "training_step_time": 0.10468363761901855 + }, + { + "epoch": 2.53204345703125e-05, + "model_forward_time": 0.025085926055908203, + "step": 16594 + }, + { + "epoch": 2.53204345703125e-05, + "step": 16594, + "training_step_time": 0.1050114631652832 + }, + { + "epoch": 2.532196044921875e-05, + "model_forward_time": 0.02528071403503418, + "step": 16595 + }, + { + "epoch": 2.532196044921875e-05, + "step": 16595, + "training_step_time": 0.10451173782348633 + }, + { + "epoch": 2.5323486328125e-05, + "model_forward_time": 0.025014400482177734, + "step": 16596 + }, + { + "epoch": 2.5323486328125e-05, + "step": 16596, + "training_step_time": 0.10512495040893555 + }, + { + "epoch": 2.532501220703125e-05, + "model_forward_time": 0.025255203247070312, + "step": 16597 + }, + { + "epoch": 2.532501220703125e-05, + "step": 16597, + "training_step_time": 0.11398696899414062 + }, + { + "epoch": 2.53265380859375e-05, + "model_forward_time": 0.025079965591430664, + "step": 16598 + }, + { + "epoch": 2.53265380859375e-05, + "step": 16598, + "training_step_time": 0.10764050483703613 + }, + { + "epoch": 2.532806396484375e-05, + "model_forward_time": 0.025055646896362305, + "step": 16599 + }, + { + "epoch": 2.532806396484375e-05, + "step": 16599, + "training_step_time": 0.10536742210388184 + }, + { + "epoch": 2.532958984375e-05, + "grad_norm": 0.1985049694776535, + "learning_rate": 4.5322020575044114e-05, + "loss": 0.0076, + "step": 16600 + }, + { + "epoch": 2.532958984375e-05, + "model_forward_time": 0.025246858596801758, + "step": 16600 + }, + { + "epoch": 2.532958984375e-05, + "step": 16600, + "training_step_time": 0.1058351993560791 + }, + { + "epoch": 2.533111572265625e-05, + "model_forward_time": 0.02504873275756836, + "step": 16601 + }, + { + "epoch": 2.533111572265625e-05, + "step": 16601, + "training_step_time": 0.10551166534423828 + }, + { + "epoch": 2.53326416015625e-05, + "model_forward_time": 0.024591922760009766, + "step": 16602 + }, + { + "epoch": 2.53326416015625e-05, + "step": 16602, + "training_step_time": 0.7987291812896729 + }, + { + "epoch": 2.533416748046875e-05, + "model_forward_time": 0.022065401077270508, + "step": 16603 + }, + { + "epoch": 2.533416748046875e-05, + "step": 16603, + "training_step_time": 0.1444566249847412 + }, + { + "epoch": 2.5335693359375e-05, + "model_forward_time": 0.02356696128845215, + "step": 16604 + }, + { + "epoch": 2.5335693359375e-05, + "step": 16604, + "training_step_time": 0.13671088218688965 + }, + { + "epoch": 2.533721923828125e-05, + "model_forward_time": 0.024448633193969727, + "step": 16605 + }, + { + "epoch": 2.533721923828125e-05, + "step": 16605, + "training_step_time": 0.10616540908813477 + }, + { + "epoch": 2.53387451171875e-05, + "model_forward_time": 0.025038719177246094, + "step": 16606 + }, + { + "epoch": 2.53387451171875e-05, + "step": 16606, + "training_step_time": 0.10336637496948242 + }, + { + "epoch": 2.534027099609375e-05, + "model_forward_time": 0.02539539337158203, + "step": 16607 + }, + { + "epoch": 2.534027099609375e-05, + "step": 16607, + "training_step_time": 0.1045985221862793 + }, + { + "epoch": 2.5341796875e-05, + "model_forward_time": 0.024793386459350586, + "step": 16608 + }, + { + "epoch": 2.5341796875e-05, + "step": 16608, + "training_step_time": 0.1066904067993164 + }, + { + "epoch": 2.534332275390625e-05, + "model_forward_time": 0.02488231658935547, + "step": 16609 + }, + { + "epoch": 2.534332275390625e-05, + "step": 16609, + "training_step_time": 0.11014199256896973 + }, + { + "epoch": 2.53448486328125e-05, + "grad_norm": 0.1672280728816986, + "learning_rate": 4.526714952248551e-05, + "loss": 0.007, + "step": 16610 + }, + { + "epoch": 2.53448486328125e-05, + "model_forward_time": 0.025342941284179688, + "step": 16610 + }, + { + "epoch": 2.53448486328125e-05, + "step": 16610, + "training_step_time": 0.10714030265808105 + }, + { + "epoch": 2.534637451171875e-05, + "model_forward_time": 0.02525615692138672, + "step": 16611 + }, + { + "epoch": 2.534637451171875e-05, + "step": 16611, + "training_step_time": 0.10456204414367676 + }, + { + "epoch": 2.5347900390625e-05, + "model_forward_time": 0.025119543075561523, + "step": 16612 + }, + { + "epoch": 2.5347900390625e-05, + "step": 16612, + "training_step_time": 0.10472249984741211 + }, + { + "epoch": 2.534942626953125e-05, + "model_forward_time": 0.025095701217651367, + "step": 16613 + }, + { + "epoch": 2.534942626953125e-05, + "step": 16613, + "training_step_time": 0.10397100448608398 + }, + { + "epoch": 2.53509521484375e-05, + "model_forward_time": 0.025292158126831055, + "step": 16614 + }, + { + "epoch": 2.53509521484375e-05, + "step": 16614, + "training_step_time": 0.1049032211303711 + }, + { + "epoch": 2.535247802734375e-05, + "model_forward_time": 0.025177478790283203, + "step": 16615 + }, + { + "epoch": 2.535247802734375e-05, + "step": 16615, + "training_step_time": 0.10416483879089355 + }, + { + "epoch": 2.535400390625e-05, + "model_forward_time": 0.02516317367553711, + "step": 16616 + }, + { + "epoch": 2.535400390625e-05, + "step": 16616, + "training_step_time": 0.10529756546020508 + }, + { + "epoch": 2.535552978515625e-05, + "model_forward_time": 0.02542901039123535, + "step": 16617 + }, + { + "epoch": 2.535552978515625e-05, + "step": 16617, + "training_step_time": 0.10552763938903809 + }, + { + "epoch": 2.53570556640625e-05, + "model_forward_time": 0.025108814239501953, + "step": 16618 + }, + { + "epoch": 2.53570556640625e-05, + "step": 16618, + "training_step_time": 0.1042478084564209 + }, + { + "epoch": 2.535858154296875e-05, + "model_forward_time": 0.02516007423400879, + "step": 16619 + }, + { + "epoch": 2.535858154296875e-05, + "step": 16619, + "training_step_time": 0.1058199405670166 + }, + { + "epoch": 2.5360107421875e-05, + "grad_norm": 0.18666355311870575, + "learning_rate": 4.5212284220786494e-05, + "loss": 0.0069, + "step": 16620 + }, + { + "epoch": 2.5360107421875e-05, + "model_forward_time": 0.025111913681030273, + "step": 16620 + }, + { + "epoch": 2.5360107421875e-05, + "step": 16620, + "training_step_time": 0.10625576972961426 + }, + { + "epoch": 2.536163330078125e-05, + "model_forward_time": 0.025203466415405273, + "step": 16621 + }, + { + "epoch": 2.536163330078125e-05, + "step": 16621, + "training_step_time": 0.10848021507263184 + }, + { + "epoch": 2.53631591796875e-05, + "model_forward_time": 0.025542020797729492, + "step": 16622 + }, + { + "epoch": 2.53631591796875e-05, + "step": 16622, + "training_step_time": 0.11608672142028809 + }, + { + "epoch": 2.536468505859375e-05, + "model_forward_time": 0.025019168853759766, + "step": 16623 + }, + { + "epoch": 2.536468505859375e-05, + "step": 16623, + "training_step_time": 0.10539507865905762 + }, + { + "epoch": 2.53662109375e-05, + "model_forward_time": 0.02507162094116211, + "step": 16624 + }, + { + "epoch": 2.53662109375e-05, + "step": 16624, + "training_step_time": 0.10774445533752441 + }, + { + "epoch": 2.536773681640625e-05, + "model_forward_time": 0.026443958282470703, + "step": 16625 + }, + { + "epoch": 2.536773681640625e-05, + "step": 16625, + "training_step_time": 0.17640352249145508 + }, + { + "epoch": 2.53692626953125e-05, + "model_forward_time": 0.02417755126953125, + "step": 16626 + }, + { + "epoch": 2.53692626953125e-05, + "step": 16626, + "training_step_time": 0.17200732231140137 + }, + { + "epoch": 2.537078857421875e-05, + "model_forward_time": 0.024953842163085938, + "step": 16627 + }, + { + "epoch": 2.537078857421875e-05, + "step": 16627, + "training_step_time": 0.20723891258239746 + }, + { + "epoch": 2.5372314453125e-05, + "model_forward_time": 0.024498939514160156, + "step": 16628 + }, + { + "epoch": 2.5372314453125e-05, + "step": 16628, + "training_step_time": 0.18797826766967773 + }, + { + "epoch": 2.537384033203125e-05, + "model_forward_time": 0.02456212043762207, + "step": 16629 + }, + { + "epoch": 2.537384033203125e-05, + "step": 16629, + "training_step_time": 0.16893768310546875 + }, + { + "epoch": 2.53753662109375e-05, + "grad_norm": 0.13135157525539398, + "learning_rate": 4.515742473661362e-05, + "loss": 0.0068, + "step": 16630 + }, + { + "epoch": 2.53753662109375e-05, + "model_forward_time": 0.02428412437438965, + "step": 16630 + }, + { + "epoch": 2.53753662109375e-05, + "step": 16630, + "training_step_time": 0.18810272216796875 + }, + { + "epoch": 2.537689208984375e-05, + "model_forward_time": 0.024073123931884766, + "step": 16631 + }, + { + "epoch": 2.537689208984375e-05, + "step": 16631, + "training_step_time": 0.11037850379943848 + }, + { + "epoch": 2.537841796875e-05, + "model_forward_time": 0.024218320846557617, + "step": 16632 + }, + { + "epoch": 2.537841796875e-05, + "step": 16632, + "training_step_time": 0.10922694206237793 + }, + { + "epoch": 2.537994384765625e-05, + "model_forward_time": 0.025294780731201172, + "step": 16633 + }, + { + "epoch": 2.537994384765625e-05, + "step": 16633, + "training_step_time": 0.11012959480285645 + }, + { + "epoch": 2.53814697265625e-05, + "model_forward_time": 0.024784088134765625, + "step": 16634 + }, + { + "epoch": 2.53814697265625e-05, + "step": 16634, + "training_step_time": 0.10961174964904785 + }, + { + "epoch": 2.538299560546875e-05, + "model_forward_time": 0.02500772476196289, + "step": 16635 + }, + { + "epoch": 2.538299560546875e-05, + "step": 16635, + "training_step_time": 0.1098935604095459 + }, + { + "epoch": 2.5384521484375e-05, + "model_forward_time": 0.02524709701538086, + "step": 16636 + }, + { + "epoch": 2.5384521484375e-05, + "step": 16636, + "training_step_time": 0.10840702056884766 + }, + { + "epoch": 2.538604736328125e-05, + "model_forward_time": 0.025710582733154297, + "step": 16637 + }, + { + "epoch": 2.538604736328125e-05, + "step": 16637, + "training_step_time": 0.10905075073242188 + }, + { + "epoch": 2.53875732421875e-05, + "model_forward_time": 0.027691364288330078, + "step": 16638 + }, + { + "epoch": 2.53875732421875e-05, + "step": 16638, + "training_step_time": 0.11060190200805664 + }, + { + "epoch": 2.538909912109375e-05, + "model_forward_time": 0.025199413299560547, + "step": 16639 + }, + { + "epoch": 2.538909912109375e-05, + "step": 16639, + "training_step_time": 0.10738778114318848 + }, + { + "epoch": 2.5390625e-05, + "grad_norm": 0.49643006920814514, + "learning_rate": 4.510257113662632e-05, + "loss": 0.0185, + "step": 16640 + }, + { + "epoch": 2.5390625e-05, + "model_forward_time": 0.02658867835998535, + "step": 16640 + }, + { + "epoch": 2.5390625e-05, + "step": 16640, + "training_step_time": 0.10846638679504395 + }, + { + "epoch": 2.539215087890625e-05, + "model_forward_time": 0.025012969970703125, + "step": 16641 + }, + { + "epoch": 2.539215087890625e-05, + "step": 16641, + "training_step_time": 0.10785222053527832 + }, + { + "epoch": 2.53936767578125e-05, + "model_forward_time": 0.02572345733642578, + "step": 16642 + }, + { + "epoch": 2.53936767578125e-05, + "step": 16642, + "training_step_time": 0.1070244312286377 + }, + { + "epoch": 2.539520263671875e-05, + "model_forward_time": 0.025281190872192383, + "step": 16643 + }, + { + "epoch": 2.539520263671875e-05, + "step": 16643, + "training_step_time": 0.8571782112121582 + }, + { + "epoch": 2.5396728515625e-05, + "model_forward_time": 0.023311138153076172, + "step": 16644 + }, + { + "epoch": 2.5396728515625e-05, + "step": 16644, + "training_step_time": 0.1541895866394043 + }, + { + "epoch": 2.539825439453125e-05, + "model_forward_time": 0.024463176727294922, + "step": 16645 + }, + { + "epoch": 2.539825439453125e-05, + "step": 16645, + "training_step_time": 0.10650444030761719 + }, + { + "epoch": 2.53997802734375e-05, + "model_forward_time": 0.02795863151550293, + "step": 16646 + }, + { + "epoch": 2.53997802734375e-05, + "step": 16646, + "training_step_time": 0.10680603981018066 + }, + { + "epoch": 2.540130615234375e-05, + "model_forward_time": 0.025423765182495117, + "step": 16647 + }, + { + "epoch": 2.540130615234375e-05, + "step": 16647, + "training_step_time": 0.10442256927490234 + }, + { + "epoch": 2.540283203125e-05, + "model_forward_time": 0.025372028350830078, + "step": 16648 + }, + { + "epoch": 2.540283203125e-05, + "step": 16648, + "training_step_time": 0.10989189147949219 + }, + { + "epoch": 2.540435791015625e-05, + "model_forward_time": 0.025456666946411133, + "step": 16649 + }, + { + "epoch": 2.540435791015625e-05, + "step": 16649, + "training_step_time": 0.10805225372314453 + }, + { + "epoch": 2.54058837890625e-05, + "grad_norm": 0.19242271780967712, + "learning_rate": 4.504772348747687e-05, + "loss": 0.0134, + "step": 16650 + }, + { + "epoch": 2.54058837890625e-05, + "model_forward_time": 0.025990724563598633, + "step": 16650 + }, + { + "epoch": 2.54058837890625e-05, + "step": 16650, + "training_step_time": 0.10658097267150879 + }, + { + "epoch": 2.540740966796875e-05, + "model_forward_time": 0.025502443313598633, + "step": 16651 + }, + { + "epoch": 2.540740966796875e-05, + "step": 16651, + "training_step_time": 0.11491274833679199 + }, + { + "epoch": 2.5408935546875e-05, + "model_forward_time": 0.02398061752319336, + "step": 16652 + }, + { + "epoch": 2.5408935546875e-05, + "step": 16652, + "training_step_time": 0.13004589080810547 + }, + { + "epoch": 2.541046142578125e-05, + "model_forward_time": 0.0239565372467041, + "step": 16653 + }, + { + "epoch": 2.541046142578125e-05, + "step": 16653, + "training_step_time": 0.1210334300994873 + }, + { + "epoch": 2.54119873046875e-05, + "model_forward_time": 0.02406620979309082, + "step": 16654 + }, + { + "epoch": 2.54119873046875e-05, + "step": 16654, + "training_step_time": 0.12010574340820312 + }, + { + "epoch": 2.541351318359375e-05, + "model_forward_time": 0.024120330810546875, + "step": 16655 + }, + { + "epoch": 2.541351318359375e-05, + "step": 16655, + "training_step_time": 0.116180419921875 + }, + { + "epoch": 2.54150390625e-05, + "model_forward_time": 0.024177074432373047, + "step": 16656 + }, + { + "epoch": 2.54150390625e-05, + "step": 16656, + "training_step_time": 0.11922383308410645 + }, + { + "epoch": 2.541656494140625e-05, + "model_forward_time": 0.023957252502441406, + "step": 16657 + }, + { + "epoch": 2.541656494140625e-05, + "step": 16657, + "training_step_time": 0.11320328712463379 + }, + { + "epoch": 2.54180908203125e-05, + "model_forward_time": 0.025034427642822266, + "step": 16658 + }, + { + "epoch": 2.54180908203125e-05, + "step": 16658, + "training_step_time": 0.10765457153320312 + }, + { + "epoch": 2.541961669921875e-05, + "model_forward_time": 0.0250089168548584, + "step": 16659 + }, + { + "epoch": 2.541961669921875e-05, + "step": 16659, + "training_step_time": 0.10962605476379395 + }, + { + "epoch": 2.5421142578125e-05, + "grad_norm": 0.22955352067947388, + "learning_rate": 4.4992881855810366e-05, + "loss": 0.0098, + "step": 16660 + }, + { + "epoch": 2.5421142578125e-05, + "model_forward_time": 0.02432107925415039, + "step": 16660 + }, + { + "epoch": 2.5421142578125e-05, + "step": 16660, + "training_step_time": 0.10808014869689941 + }, + { + "epoch": 2.542266845703125e-05, + "model_forward_time": 0.025298357009887695, + "step": 16661 + }, + { + "epoch": 2.542266845703125e-05, + "step": 16661, + "training_step_time": 0.10692834854125977 + }, + { + "epoch": 2.54241943359375e-05, + "model_forward_time": 0.02498173713684082, + "step": 16662 + }, + { + "epoch": 2.54241943359375e-05, + "step": 16662, + "training_step_time": 0.10931515693664551 + }, + { + "epoch": 2.542572021484375e-05, + "model_forward_time": 0.0238497257232666, + "step": 16663 + }, + { + "epoch": 2.542572021484375e-05, + "step": 16663, + "training_step_time": 0.10422039031982422 + }, + { + "epoch": 2.542724609375e-05, + "model_forward_time": 0.028241634368896484, + "step": 16664 + }, + { + "epoch": 2.542724609375e-05, + "step": 16664, + "training_step_time": 0.10813021659851074 + }, + { + "epoch": 2.542877197265625e-05, + "model_forward_time": 0.02412104606628418, + "step": 16665 + }, + { + "epoch": 2.542877197265625e-05, + "step": 16665, + "training_step_time": 0.1409130096435547 + }, + { + "epoch": 2.54302978515625e-05, + "model_forward_time": 0.024274587631225586, + "step": 16666 + }, + { + "epoch": 2.54302978515625e-05, + "step": 16666, + "training_step_time": 0.14400815963745117 + }, + { + "epoch": 2.543182373046875e-05, + "model_forward_time": 0.024796247482299805, + "step": 16667 + }, + { + "epoch": 2.543182373046875e-05, + "step": 16667, + "training_step_time": 0.15766596794128418 + }, + { + "epoch": 2.5433349609375e-05, + "model_forward_time": 0.024019718170166016, + "step": 16668 + }, + { + "epoch": 2.5433349609375e-05, + "step": 16668, + "training_step_time": 0.16984105110168457 + }, + { + "epoch": 2.543487548828125e-05, + "model_forward_time": 0.02421879768371582, + "step": 16669 + }, + { + "epoch": 2.543487548828125e-05, + "step": 16669, + "training_step_time": 0.15294861793518066 + }, + { + "epoch": 2.54364013671875e-05, + "grad_norm": 0.20682382583618164, + "learning_rate": 4.4938046308264544e-05, + "loss": 0.0252, + "step": 16670 + }, + { + "epoch": 2.54364013671875e-05, + "model_forward_time": 0.024389982223510742, + "step": 16670 + }, + { + "epoch": 2.54364013671875e-05, + "step": 16670, + "training_step_time": 0.1876511573791504 + }, + { + "epoch": 2.543792724609375e-05, + "model_forward_time": 0.024072647094726562, + "step": 16671 + }, + { + "epoch": 2.543792724609375e-05, + "step": 16671, + "training_step_time": 0.14053845405578613 + }, + { + "epoch": 2.5439453125e-05, + "model_forward_time": 0.024406909942626953, + "step": 16672 + }, + { + "epoch": 2.5439453125e-05, + "step": 16672, + "training_step_time": 0.11157536506652832 + }, + { + "epoch": 2.544097900390625e-05, + "model_forward_time": 0.02478647232055664, + "step": 16673 + }, + { + "epoch": 2.544097900390625e-05, + "step": 16673, + "training_step_time": 0.1026923656463623 + }, + { + "epoch": 2.54425048828125e-05, + "model_forward_time": 0.025321006774902344, + "step": 16674 + }, + { + "epoch": 2.54425048828125e-05, + "step": 16674, + "training_step_time": 0.10439133644104004 + }, + { + "epoch": 2.544403076171875e-05, + "model_forward_time": 0.02544236183166504, + "step": 16675 + }, + { + "epoch": 2.544403076171875e-05, + "step": 16675, + "training_step_time": 0.10453557968139648 + }, + { + "epoch": 2.5445556640625e-05, + "model_forward_time": 0.025281667709350586, + "step": 16676 + }, + { + "epoch": 2.5445556640625e-05, + "step": 16676, + "training_step_time": 0.10698318481445312 + }, + { + "epoch": 2.544708251953125e-05, + "model_forward_time": 0.025316238403320312, + "step": 16677 + }, + { + "epoch": 2.544708251953125e-05, + "step": 16677, + "training_step_time": 0.10499024391174316 + }, + { + "epoch": 2.54486083984375e-05, + "model_forward_time": 0.02532792091369629, + "step": 16678 + }, + { + "epoch": 2.54486083984375e-05, + "step": 16678, + "training_step_time": 0.1079416275024414 + }, + { + "epoch": 2.545013427734375e-05, + "model_forward_time": 0.025020360946655273, + "step": 16679 + }, + { + "epoch": 2.545013427734375e-05, + "step": 16679, + "training_step_time": 0.11034703254699707 + }, + { + "epoch": 2.545166015625e-05, + "grad_norm": 0.25903502106666565, + "learning_rate": 4.488321691146975e-05, + "loss": 0.011, + "step": 16680 + }, + { + "epoch": 2.545166015625e-05, + "model_forward_time": 0.025185585021972656, + "step": 16680 + }, + { + "epoch": 2.545166015625e-05, + "step": 16680, + "training_step_time": 0.10507750511169434 + }, + { + "epoch": 2.545318603515625e-05, + "model_forward_time": 0.025228023529052734, + "step": 16681 + }, + { + "epoch": 2.545318603515625e-05, + "step": 16681, + "training_step_time": 0.10497832298278809 + }, + { + "epoch": 2.54547119140625e-05, + "model_forward_time": 0.025407075881958008, + "step": 16682 + }, + { + "epoch": 2.54547119140625e-05, + "step": 16682, + "training_step_time": 0.10824084281921387 + }, + { + "epoch": 2.545623779296875e-05, + "model_forward_time": 0.025044918060302734, + "step": 16683 + }, + { + "epoch": 2.545623779296875e-05, + "step": 16683, + "training_step_time": 0.10697674751281738 + }, + { + "epoch": 2.5457763671875e-05, + "model_forward_time": 0.024970293045043945, + "step": 16684 + }, + { + "epoch": 2.5457763671875e-05, + "step": 16684, + "training_step_time": 0.10669207572937012 + }, + { + "epoch": 2.545928955078125e-05, + "model_forward_time": 0.025036096572875977, + "step": 16685 + }, + { + "epoch": 2.545928955078125e-05, + "step": 16685, + "training_step_time": 0.10999512672424316 + }, + { + "epoch": 2.54608154296875e-05, + "model_forward_time": 0.02453756332397461, + "step": 16686 + }, + { + "epoch": 2.54608154296875e-05, + "step": 16686, + "training_step_time": 0.13719463348388672 + }, + { + "epoch": 2.546234130859375e-05, + "model_forward_time": 0.025812864303588867, + "step": 16687 + }, + { + "epoch": 2.546234130859375e-05, + "step": 16687, + "training_step_time": 0.131516695022583 + }, + { + "epoch": 2.54638671875e-05, + "model_forward_time": 0.024624109268188477, + "step": 16688 + }, + { + "epoch": 2.54638671875e-05, + "step": 16688, + "training_step_time": 0.11099481582641602 + }, + { + "epoch": 2.546539306640625e-05, + "model_forward_time": 0.024971961975097656, + "step": 16689 + }, + { + "epoch": 2.546539306640625e-05, + "step": 16689, + "training_step_time": 0.1086118221282959 + }, + { + "epoch": 2.54669189453125e-05, + "grad_norm": 0.5208256244659424, + "learning_rate": 4.482839373204891e-05, + "loss": 0.0121, + "step": 16690 + }, + { + "epoch": 2.54669189453125e-05, + "model_forward_time": 0.02492523193359375, + "step": 16690 + }, + { + "epoch": 2.54669189453125e-05, + "step": 16690, + "training_step_time": 0.11162877082824707 + }, + { + "epoch": 2.546844482421875e-05, + "model_forward_time": 0.02521800994873047, + "step": 16691 + }, + { + "epoch": 2.546844482421875e-05, + "step": 16691, + "training_step_time": 0.10720562934875488 + }, + { + "epoch": 2.5469970703125e-05, + "model_forward_time": 0.024992704391479492, + "step": 16692 + }, + { + "epoch": 2.5469970703125e-05, + "step": 16692, + "training_step_time": 0.19167208671569824 + }, + { + "epoch": 2.547149658203125e-05, + "model_forward_time": 0.024267196655273438, + "step": 16693 + }, + { + "epoch": 2.547149658203125e-05, + "step": 16693, + "training_step_time": 0.10340046882629395 + }, + { + "epoch": 2.54730224609375e-05, + "model_forward_time": 0.025059938430786133, + "step": 16694 + }, + { + "epoch": 2.54730224609375e-05, + "step": 16694, + "training_step_time": 0.10753440856933594 + }, + { + "epoch": 2.547454833984375e-05, + "model_forward_time": 0.025241851806640625, + "step": 16695 + }, + { + "epoch": 2.547454833984375e-05, + "step": 16695, + "training_step_time": 0.1053619384765625 + }, + { + "epoch": 2.547607421875e-05, + "model_forward_time": 0.025176525115966797, + "step": 16696 + }, + { + "epoch": 2.547607421875e-05, + "step": 16696, + "training_step_time": 0.10573458671569824 + }, + { + "epoch": 2.547760009765625e-05, + "model_forward_time": 0.025429725646972656, + "step": 16697 + }, + { + "epoch": 2.547760009765625e-05, + "step": 16697, + "training_step_time": 0.10465621948242188 + }, + { + "epoch": 2.54791259765625e-05, + "model_forward_time": 0.024989604949951172, + "step": 16698 + }, + { + "epoch": 2.54791259765625e-05, + "step": 16698, + "training_step_time": 0.10613012313842773 + }, + { + "epoch": 2.548065185546875e-05, + "model_forward_time": 0.025126934051513672, + "step": 16699 + }, + { + "epoch": 2.548065185546875e-05, + "step": 16699, + "training_step_time": 0.11264276504516602 + }, + { + "epoch": 2.5482177734375e-05, + "grad_norm": 0.2976929545402527, + "learning_rate": 4.477357683661734e-05, + "loss": 0.0096, + "step": 16700 + }, + { + "epoch": 2.5482177734375e-05, + "model_forward_time": 0.024694204330444336, + "step": 16700 + }, + { + "epoch": 2.5482177734375e-05, + "step": 16700, + "training_step_time": 0.11166071891784668 + }, + { + "epoch": 2.548370361328125e-05, + "model_forward_time": 0.024953126907348633, + "step": 16701 + }, + { + "epoch": 2.548370361328125e-05, + "step": 16701, + "training_step_time": 0.11517596244812012 + }, + { + "epoch": 2.54852294921875e-05, + "model_forward_time": 0.023789167404174805, + "step": 16702 + }, + { + "epoch": 2.54852294921875e-05, + "step": 16702, + "training_step_time": 0.11387372016906738 + }, + { + "epoch": 2.548675537109375e-05, + "model_forward_time": 0.024001121520996094, + "step": 16703 + }, + { + "epoch": 2.548675537109375e-05, + "step": 16703, + "training_step_time": 0.1112363338470459 + }, + { + "epoch": 2.548828125e-05, + "model_forward_time": 0.024921417236328125, + "step": 16704 + }, + { + "epoch": 2.548828125e-05, + "step": 16704, + "training_step_time": 0.10770654678344727 + }, + { + "epoch": 2.548980712890625e-05, + "model_forward_time": 0.025012493133544922, + "step": 16705 + }, + { + "epoch": 2.548980712890625e-05, + "step": 16705, + "training_step_time": 0.10656070709228516 + }, + { + "epoch": 2.54913330078125e-05, + "model_forward_time": 0.024748563766479492, + "step": 16706 + }, + { + "epoch": 2.54913330078125e-05, + "step": 16706, + "training_step_time": 0.10578417778015137 + }, + { + "epoch": 2.549285888671875e-05, + "model_forward_time": 0.024987220764160156, + "step": 16707 + }, + { + "epoch": 2.549285888671875e-05, + "step": 16707, + "training_step_time": 0.10602951049804688 + }, + { + "epoch": 2.5494384765625e-05, + "model_forward_time": 0.024960041046142578, + "step": 16708 + }, + { + "epoch": 2.5494384765625e-05, + "step": 16708, + "training_step_time": 0.10867547988891602 + }, + { + "epoch": 2.549591064453125e-05, + "model_forward_time": 0.02485942840576172, + "step": 16709 + }, + { + "epoch": 2.549591064453125e-05, + "step": 16709, + "training_step_time": 0.10514163970947266 + }, + { + "epoch": 2.54974365234375e-05, + "grad_norm": 0.15842688083648682, + "learning_rate": 4.471876629178273e-05, + "loss": 0.0137, + "step": 16710 + }, + { + "epoch": 2.54974365234375e-05, + "model_forward_time": 0.02489638328552246, + "step": 16710 + }, + { + "epoch": 2.54974365234375e-05, + "step": 16710, + "training_step_time": 0.10535836219787598 + }, + { + "epoch": 2.549896240234375e-05, + "model_forward_time": 0.02777552604675293, + "step": 16711 + }, + { + "epoch": 2.549896240234375e-05, + "step": 16711, + "training_step_time": 0.10854816436767578 + }, + { + "epoch": 2.550048828125e-05, + "model_forward_time": 0.0268707275390625, + "step": 16712 + }, + { + "epoch": 2.550048828125e-05, + "step": 16712, + "training_step_time": 0.10781383514404297 + }, + { + "epoch": 2.550201416015625e-05, + "model_forward_time": 0.025024890899658203, + "step": 16713 + }, + { + "epoch": 2.550201416015625e-05, + "step": 16713, + "training_step_time": 0.14537405967712402 + }, + { + "epoch": 2.55035400390625e-05, + "model_forward_time": 0.0251924991607666, + "step": 16714 + }, + { + "epoch": 2.55035400390625e-05, + "step": 16714, + "training_step_time": 0.10884881019592285 + }, + { + "epoch": 2.550506591796875e-05, + "model_forward_time": 0.025264501571655273, + "step": 16715 + }, + { + "epoch": 2.550506591796875e-05, + "step": 16715, + "training_step_time": 0.1516261100769043 + }, + { + "epoch": 2.5506591796875e-05, + "model_forward_time": 0.02481532096862793, + "step": 16716 + }, + { + "epoch": 2.5506591796875e-05, + "step": 16716, + "training_step_time": 0.18784236907958984 + }, + { + "epoch": 2.550811767578125e-05, + "model_forward_time": 0.02432084083557129, + "step": 16717 + }, + { + "epoch": 2.550811767578125e-05, + "step": 16717, + "training_step_time": 0.17497849464416504 + }, + { + "epoch": 2.55096435546875e-05, + "model_forward_time": 0.024656057357788086, + "step": 16718 + }, + { + "epoch": 2.55096435546875e-05, + "step": 16718, + "training_step_time": 0.1910874843597412 + }, + { + "epoch": 2.551116943359375e-05, + "model_forward_time": 0.024064302444458008, + "step": 16719 + }, + { + "epoch": 2.551116943359375e-05, + "step": 16719, + "training_step_time": 0.1134650707244873 + }, + { + "epoch": 2.55126953125e-05, + "grad_norm": 0.4978160262107849, + "learning_rate": 4.4663962164145045e-05, + "loss": 0.0222, + "step": 16720 + }, + { + "epoch": 2.55126953125e-05, + "model_forward_time": 0.02452683448791504, + "step": 16720 + }, + { + "epoch": 2.55126953125e-05, + "step": 16720, + "training_step_time": 0.10638642311096191 + }, + { + "epoch": 2.551422119140625e-05, + "model_forward_time": 0.02485203742980957, + "step": 16721 + }, + { + "epoch": 2.551422119140625e-05, + "step": 16721, + "training_step_time": 0.10462522506713867 + }, + { + "epoch": 2.55157470703125e-05, + "model_forward_time": 0.025048017501831055, + "step": 16722 + }, + { + "epoch": 2.55157470703125e-05, + "step": 16722, + "training_step_time": 0.10448956489562988 + }, + { + "epoch": 2.551727294921875e-05, + "model_forward_time": 0.025154590606689453, + "step": 16723 + }, + { + "epoch": 2.551727294921875e-05, + "step": 16723, + "training_step_time": 0.10414600372314453 + }, + { + "epoch": 2.5518798828125e-05, + "model_forward_time": 0.024892330169677734, + "step": 16724 + }, + { + "epoch": 2.5518798828125e-05, + "step": 16724, + "training_step_time": 0.10428643226623535 + }, + { + "epoch": 2.552032470703125e-05, + "model_forward_time": 0.025784969329833984, + "step": 16725 + }, + { + "epoch": 2.552032470703125e-05, + "step": 16725, + "training_step_time": 0.11121273040771484 + }, + { + "epoch": 2.55218505859375e-05, + "model_forward_time": 0.02483391761779785, + "step": 16726 + }, + { + "epoch": 2.55218505859375e-05, + "step": 16726, + "training_step_time": 0.10900378227233887 + }, + { + "epoch": 2.552337646484375e-05, + "model_forward_time": 0.025098562240600586, + "step": 16727 + }, + { + "epoch": 2.552337646484375e-05, + "step": 16727, + "training_step_time": 0.10476136207580566 + }, + { + "epoch": 2.552490234375e-05, + "model_forward_time": 0.025157928466796875, + "step": 16728 + }, + { + "epoch": 2.552490234375e-05, + "step": 16728, + "training_step_time": 0.1058509349822998 + }, + { + "epoch": 2.552642822265625e-05, + "model_forward_time": 0.024791955947875977, + "step": 16729 + }, + { + "epoch": 2.552642822265625e-05, + "step": 16729, + "training_step_time": 0.10350918769836426 + }, + { + "epoch": 2.55279541015625e-05, + "grad_norm": 0.4895699918270111, + "learning_rate": 4.46091645202965e-05, + "loss": 0.02, + "step": 16730 + }, + { + "epoch": 2.55279541015625e-05, + "model_forward_time": 0.027889013290405273, + "step": 16730 + }, + { + "epoch": 2.55279541015625e-05, + "step": 16730, + "training_step_time": 0.10820364952087402 + }, + { + "epoch": 2.552947998046875e-05, + "model_forward_time": 0.024933338165283203, + "step": 16731 + }, + { + "epoch": 2.552947998046875e-05, + "step": 16731, + "training_step_time": 0.10579538345336914 + }, + { + "epoch": 2.5531005859375e-05, + "model_forward_time": 0.024780750274658203, + "step": 16732 + }, + { + "epoch": 2.5531005859375e-05, + "step": 16732, + "training_step_time": 0.10418391227722168 + }, + { + "epoch": 2.553253173828125e-05, + "model_forward_time": 0.024895668029785156, + "step": 16733 + }, + { + "epoch": 2.553253173828125e-05, + "step": 16733, + "training_step_time": 0.10424613952636719 + }, + { + "epoch": 2.55340576171875e-05, + "model_forward_time": 0.02785658836364746, + "step": 16734 + }, + { + "epoch": 2.55340576171875e-05, + "step": 16734, + "training_step_time": 0.16634130477905273 + }, + { + "epoch": 2.553558349609375e-05, + "model_forward_time": 0.024422168731689453, + "step": 16735 + }, + { + "epoch": 2.553558349609375e-05, + "step": 16735, + "training_step_time": 0.13601946830749512 + }, + { + "epoch": 2.5537109375e-05, + "model_forward_time": 0.024595975875854492, + "step": 16736 + }, + { + "epoch": 2.5537109375e-05, + "step": 16736, + "training_step_time": 0.10818719863891602 + }, + { + "epoch": 2.553863525390625e-05, + "model_forward_time": 0.024896860122680664, + "step": 16737 + }, + { + "epoch": 2.553863525390625e-05, + "step": 16737, + "training_step_time": 0.11409831047058105 + }, + { + "epoch": 2.55401611328125e-05, + "model_forward_time": 0.024658679962158203, + "step": 16738 + }, + { + "epoch": 2.55401611328125e-05, + "step": 16738, + "training_step_time": 0.10797262191772461 + }, + { + "epoch": 2.554168701171875e-05, + "model_forward_time": 0.025061607360839844, + "step": 16739 + }, + { + "epoch": 2.554168701171875e-05, + "step": 16739, + "training_step_time": 0.11366653442382812 + }, + { + "epoch": 2.5543212890625e-05, + "grad_norm": 0.18995165824890137, + "learning_rate": 4.4554373426821374e-05, + "loss": 0.0112, + "step": 16740 + }, + { + "epoch": 2.5543212890625e-05, + "model_forward_time": 0.024971961975097656, + "step": 16740 + }, + { + "epoch": 2.5543212890625e-05, + "step": 16740, + "training_step_time": 0.1929788589477539 + }, + { + "epoch": 2.554473876953125e-05, + "model_forward_time": 0.02646493911743164, + "step": 16741 + }, + { + "epoch": 2.554473876953125e-05, + "step": 16741, + "training_step_time": 0.10651755332946777 + }, + { + "epoch": 2.55462646484375e-05, + "model_forward_time": 0.024684429168701172, + "step": 16742 + }, + { + "epoch": 2.55462646484375e-05, + "step": 16742, + "training_step_time": 0.10277819633483887 + }, + { + "epoch": 2.554779052734375e-05, + "model_forward_time": 0.024974346160888672, + "step": 16743 + }, + { + "epoch": 2.554779052734375e-05, + "step": 16743, + "training_step_time": 0.10351777076721191 + }, + { + "epoch": 2.554931640625e-05, + "model_forward_time": 0.025186538696289062, + "step": 16744 + }, + { + "epoch": 2.554931640625e-05, + "step": 16744, + "training_step_time": 0.10380268096923828 + }, + { + "epoch": 2.555084228515625e-05, + "model_forward_time": 0.025336265563964844, + "step": 16745 + }, + { + "epoch": 2.555084228515625e-05, + "step": 16745, + "training_step_time": 0.10793137550354004 + }, + { + "epoch": 2.55523681640625e-05, + "model_forward_time": 0.02574014663696289, + "step": 16746 + }, + { + "epoch": 2.55523681640625e-05, + "step": 16746, + "training_step_time": 0.10466647148132324 + }, + { + "epoch": 2.555389404296875e-05, + "model_forward_time": 0.025153636932373047, + "step": 16747 + }, + { + "epoch": 2.555389404296875e-05, + "step": 16747, + "training_step_time": 0.1082465648651123 + }, + { + "epoch": 2.5555419921875e-05, + "model_forward_time": 0.0247189998626709, + "step": 16748 + }, + { + "epoch": 2.5555419921875e-05, + "step": 16748, + "training_step_time": 0.10575699806213379 + }, + { + "epoch": 2.555694580078125e-05, + "model_forward_time": 0.025058269500732422, + "step": 16749 + }, + { + "epoch": 2.555694580078125e-05, + "step": 16749, + "training_step_time": 0.1042623519897461 + }, + { + "epoch": 2.55584716796875e-05, + "grad_norm": 0.31712692975997925, + "learning_rate": 4.449958895029604e-05, + "loss": 0.0208, + "step": 16750 + }, + { + "epoch": 2.55584716796875e-05, + "model_forward_time": 0.025702714920043945, + "step": 16750 + }, + { + "epoch": 2.55584716796875e-05, + "step": 16750, + "training_step_time": 0.10800743103027344 + }, + { + "epoch": 2.555999755859375e-05, + "model_forward_time": 0.025292396545410156, + "step": 16751 + }, + { + "epoch": 2.555999755859375e-05, + "step": 16751, + "training_step_time": 0.10869860649108887 + }, + { + "epoch": 2.55615234375e-05, + "model_forward_time": 0.025087594985961914, + "step": 16752 + }, + { + "epoch": 2.55615234375e-05, + "step": 16752, + "training_step_time": 0.10884904861450195 + }, + { + "epoch": 2.556304931640625e-05, + "model_forward_time": 0.025507688522338867, + "step": 16753 + }, + { + "epoch": 2.556304931640625e-05, + "step": 16753, + "training_step_time": 0.10695886611938477 + }, + { + "epoch": 2.55645751953125e-05, + "model_forward_time": 0.025018930435180664, + "step": 16754 + }, + { + "epoch": 2.55645751953125e-05, + "step": 16754, + "training_step_time": 0.10542798042297363 + }, + { + "epoch": 2.556610107421875e-05, + "model_forward_time": 0.025371313095092773, + "step": 16755 + }, + { + "epoch": 2.556610107421875e-05, + "step": 16755, + "training_step_time": 0.10761356353759766 + }, + { + "epoch": 2.5567626953125e-05, + "model_forward_time": 0.025255918502807617, + "step": 16756 + }, + { + "epoch": 2.5567626953125e-05, + "step": 16756, + "training_step_time": 0.10564112663269043 + }, + { + "epoch": 2.556915283203125e-05, + "model_forward_time": 0.02525806427001953, + "step": 16757 + }, + { + "epoch": 2.556915283203125e-05, + "step": 16757, + "training_step_time": 0.10609173774719238 + }, + { + "epoch": 2.55706787109375e-05, + "model_forward_time": 0.025043249130249023, + "step": 16758 + }, + { + "epoch": 2.55706787109375e-05, + "step": 16758, + "training_step_time": 0.10986900329589844 + }, + { + "epoch": 2.557220458984375e-05, + "model_forward_time": 0.025300979614257812, + "step": 16759 + }, + { + "epoch": 2.557220458984375e-05, + "step": 16759, + "training_step_time": 0.10769391059875488 + }, + { + "epoch": 2.557373046875e-05, + "grad_norm": 0.3258390724658966, + "learning_rate": 4.444481115728878e-05, + "loss": 0.0099, + "step": 16760 + }, + { + "epoch": 2.557373046875e-05, + "model_forward_time": 0.025603771209716797, + "step": 16760 + }, + { + "epoch": 2.557373046875e-05, + "step": 16760, + "training_step_time": 0.10313796997070312 + }, + { + "epoch": 2.557525634765625e-05, + "model_forward_time": 0.02408742904663086, + "step": 16761 + }, + { + "epoch": 2.557525634765625e-05, + "step": 16761, + "training_step_time": 0.15250277519226074 + }, + { + "epoch": 2.55767822265625e-05, + "model_forward_time": 0.024526119232177734, + "step": 16762 + }, + { + "epoch": 2.55767822265625e-05, + "step": 16762, + "training_step_time": 0.15346312522888184 + }, + { + "epoch": 2.557830810546875e-05, + "model_forward_time": 0.024487972259521484, + "step": 16763 + }, + { + "epoch": 2.557830810546875e-05, + "step": 16763, + "training_step_time": 0.20626115798950195 + }, + { + "epoch": 2.5579833984375e-05, + "model_forward_time": 0.027460813522338867, + "step": 16764 + }, + { + "epoch": 2.5579833984375e-05, + "step": 16764, + "training_step_time": 0.17421436309814453 + }, + { + "epoch": 2.558135986328125e-05, + "model_forward_time": 0.02458643913269043, + "step": 16765 + }, + { + "epoch": 2.558135986328125e-05, + "step": 16765, + "training_step_time": 0.19320344924926758 + }, + { + "epoch": 2.55828857421875e-05, + "model_forward_time": 0.02387380599975586, + "step": 16766 + }, + { + "epoch": 2.55828857421875e-05, + "step": 16766, + "training_step_time": 0.17415118217468262 + }, + { + "epoch": 2.558441162109375e-05, + "model_forward_time": 0.024008750915527344, + "step": 16767 + }, + { + "epoch": 2.558441162109375e-05, + "step": 16767, + "training_step_time": 0.10105514526367188 + }, + { + "epoch": 2.55859375e-05, + "model_forward_time": 0.02463078498840332, + "step": 16768 + }, + { + "epoch": 2.55859375e-05, + "step": 16768, + "training_step_time": 0.10674691200256348 + }, + { + "epoch": 2.558746337890625e-05, + "model_forward_time": 0.025688886642456055, + "step": 16769 + }, + { + "epoch": 2.558746337890625e-05, + "step": 16769, + "training_step_time": 0.10628247261047363 + }, + { + "epoch": 2.55889892578125e-05, + "grad_norm": 0.17523738741874695, + "learning_rate": 4.439004011435979e-05, + "loss": 0.0096, + "step": 16770 + }, + { + "epoch": 2.55889892578125e-05, + "model_forward_time": 0.025551795959472656, + "step": 16770 + }, + { + "epoch": 2.55889892578125e-05, + "step": 16770, + "training_step_time": 0.10815978050231934 + }, + { + "epoch": 2.559051513671875e-05, + "model_forward_time": 0.025446414947509766, + "step": 16771 + }, + { + "epoch": 2.559051513671875e-05, + "step": 16771, + "training_step_time": 0.10514068603515625 + }, + { + "epoch": 2.5592041015625e-05, + "model_forward_time": 0.025654077529907227, + "step": 16772 + }, + { + "epoch": 2.5592041015625e-05, + "step": 16772, + "training_step_time": 0.10353231430053711 + }, + { + "epoch": 2.559356689453125e-05, + "model_forward_time": 0.024960756301879883, + "step": 16773 + }, + { + "epoch": 2.559356689453125e-05, + "step": 16773, + "training_step_time": 0.10429120063781738 + }, + { + "epoch": 2.55950927734375e-05, + "model_forward_time": 0.025243759155273438, + "step": 16774 + }, + { + "epoch": 2.55950927734375e-05, + "step": 16774, + "training_step_time": 0.1047215461730957 + }, + { + "epoch": 2.559661865234375e-05, + "model_forward_time": 0.025696754455566406, + "step": 16775 + }, + { + "epoch": 2.559661865234375e-05, + "step": 16775, + "training_step_time": 0.10526227951049805 + }, + { + "epoch": 2.559814453125e-05, + "model_forward_time": 0.025246143341064453, + "step": 16776 + }, + { + "epoch": 2.559814453125e-05, + "step": 16776, + "training_step_time": 0.10478663444519043 + }, + { + "epoch": 2.559967041015625e-05, + "model_forward_time": 0.025341272354125977, + "step": 16777 + }, + { + "epoch": 2.559967041015625e-05, + "step": 16777, + "training_step_time": 0.11000680923461914 + }, + { + "epoch": 2.56011962890625e-05, + "model_forward_time": 0.02501654624938965, + "step": 16778 + }, + { + "epoch": 2.56011962890625e-05, + "step": 16778, + "training_step_time": 0.10725045204162598 + }, + { + "epoch": 2.560272216796875e-05, + "model_forward_time": 0.024740219116210938, + "step": 16779 + }, + { + "epoch": 2.560272216796875e-05, + "step": 16779, + "training_step_time": 0.11274027824401855 + }, + { + "epoch": 2.5604248046875e-05, + "grad_norm": 0.2779456377029419, + "learning_rate": 4.433527588806103e-05, + "loss": 0.0132, + "step": 16780 + }, + { + "epoch": 2.5604248046875e-05, + "model_forward_time": 0.025249719619750977, + "step": 16780 + }, + { + "epoch": 2.5604248046875e-05, + "step": 16780, + "training_step_time": 0.10565924644470215 + }, + { + "epoch": 2.560577392578125e-05, + "model_forward_time": 0.025516271591186523, + "step": 16781 + }, + { + "epoch": 2.560577392578125e-05, + "step": 16781, + "training_step_time": 0.12335634231567383 + }, + { + "epoch": 2.56072998046875e-05, + "model_forward_time": 0.025021791458129883, + "step": 16782 + }, + { + "epoch": 2.56072998046875e-05, + "step": 16782, + "training_step_time": 0.14078235626220703 + }, + { + "epoch": 2.560882568359375e-05, + "model_forward_time": 0.025133609771728516, + "step": 16783 + }, + { + "epoch": 2.560882568359375e-05, + "step": 16783, + "training_step_time": 0.10714030265808105 + }, + { + "epoch": 2.56103515625e-05, + "model_forward_time": 0.025081872940063477, + "step": 16784 + }, + { + "epoch": 2.56103515625e-05, + "step": 16784, + "training_step_time": 0.1062471866607666 + }, + { + "epoch": 2.561187744140625e-05, + "model_forward_time": 0.02557229995727539, + "step": 16785 + }, + { + "epoch": 2.561187744140625e-05, + "step": 16785, + "training_step_time": 0.12314176559448242 + }, + { + "epoch": 2.56134033203125e-05, + "model_forward_time": 0.02576589584350586, + "step": 16786 + }, + { + "epoch": 2.56134033203125e-05, + "step": 16786, + "training_step_time": 0.11900877952575684 + }, + { + "epoch": 2.561492919921875e-05, + "model_forward_time": 0.02558732032775879, + "step": 16787 + }, + { + "epoch": 2.561492919921875e-05, + "step": 16787, + "training_step_time": 0.18416404724121094 + }, + { + "epoch": 2.5616455078125e-05, + "model_forward_time": 0.024999380111694336, + "step": 16788 + }, + { + "epoch": 2.5616455078125e-05, + "step": 16788, + "training_step_time": 0.14604640007019043 + }, + { + "epoch": 2.561798095703125e-05, + "model_forward_time": 0.024517536163330078, + "step": 16789 + }, + { + "epoch": 2.561798095703125e-05, + "step": 16789, + "training_step_time": 0.12738370895385742 + }, + { + "epoch": 2.56195068359375e-05, + "grad_norm": 0.21013395488262177, + "learning_rate": 4.428051854493623e-05, + "loss": 0.0092, + "step": 16790 + }, + { + "epoch": 2.56195068359375e-05, + "model_forward_time": 0.025165796279907227, + "step": 16790 + }, + { + "epoch": 2.56195068359375e-05, + "step": 16790, + "training_step_time": 0.11907148361206055 + }, + { + "epoch": 2.562103271484375e-05, + "model_forward_time": 0.025232553482055664, + "step": 16791 + }, + { + "epoch": 2.562103271484375e-05, + "step": 16791, + "training_step_time": 0.11736059188842773 + }, + { + "epoch": 2.562255859375e-05, + "model_forward_time": 0.025210857391357422, + "step": 16792 + }, + { + "epoch": 2.562255859375e-05, + "step": 16792, + "training_step_time": 0.1143956184387207 + }, + { + "epoch": 2.562408447265625e-05, + "model_forward_time": 0.02561187744140625, + "step": 16793 + }, + { + "epoch": 2.562408447265625e-05, + "step": 16793, + "training_step_time": 0.11107659339904785 + }, + { + "epoch": 2.56256103515625e-05, + "model_forward_time": 0.025137901306152344, + "step": 16794 + }, + { + "epoch": 2.56256103515625e-05, + "step": 16794, + "training_step_time": 0.11233687400817871 + }, + { + "epoch": 2.562713623046875e-05, + "model_forward_time": 0.024496078491210938, + "step": 16795 + }, + { + "epoch": 2.562713623046875e-05, + "step": 16795, + "training_step_time": 0.11037945747375488 + }, + { + "epoch": 2.5628662109375e-05, + "model_forward_time": 0.02522110939025879, + "step": 16796 + }, + { + "epoch": 2.5628662109375e-05, + "step": 16796, + "training_step_time": 0.10970425605773926 + }, + { + "epoch": 2.563018798828125e-05, + "model_forward_time": 0.024808168411254883, + "step": 16797 + }, + { + "epoch": 2.563018798828125e-05, + "step": 16797, + "training_step_time": 0.10735297203063965 + }, + { + "epoch": 2.56317138671875e-05, + "model_forward_time": 0.025754928588867188, + "step": 16798 + }, + { + "epoch": 2.56317138671875e-05, + "step": 16798, + "training_step_time": 0.10755300521850586 + }, + { + "epoch": 2.563323974609375e-05, + "model_forward_time": 0.025974273681640625, + "step": 16799 + }, + { + "epoch": 2.563323974609375e-05, + "step": 16799, + "training_step_time": 0.10742354393005371 + }, + { + "epoch": 2.5634765625e-05, + "grad_norm": 0.3088785409927368, + "learning_rate": 4.4225768151520694e-05, + "loss": 0.0108, + "step": 16800 + }, + { + "epoch": 2.5634765625e-05, + "model_forward_time": 0.025625944137573242, + "step": 16800 + }, + { + "epoch": 2.5634765625e-05, + "step": 16800, + "training_step_time": 0.10926580429077148 + }, + { + "epoch": 2.563629150390625e-05, + "model_forward_time": 0.02532219886779785, + "step": 16801 + }, + { + "epoch": 2.563629150390625e-05, + "step": 16801, + "training_step_time": 0.10619139671325684 + }, + { + "epoch": 2.56378173828125e-05, + "model_forward_time": 0.02483844757080078, + "step": 16802 + }, + { + "epoch": 2.56378173828125e-05, + "step": 16802, + "training_step_time": 0.10608315467834473 + }, + { + "epoch": 2.563934326171875e-05, + "model_forward_time": 0.026016950607299805, + "step": 16803 + }, + { + "epoch": 2.563934326171875e-05, + "step": 16803, + "training_step_time": 0.11148309707641602 + }, + { + "epoch": 2.5640869140625e-05, + "model_forward_time": 0.02501225471496582, + "step": 16804 + }, + { + "epoch": 2.5640869140625e-05, + "step": 16804, + "training_step_time": 0.10559415817260742 + }, + { + "epoch": 2.564239501953125e-05, + "model_forward_time": 0.025614261627197266, + "step": 16805 + }, + { + "epoch": 2.564239501953125e-05, + "step": 16805, + "training_step_time": 0.10633063316345215 + }, + { + "epoch": 2.56439208984375e-05, + "model_forward_time": 0.02532792091369629, + "step": 16806 + }, + { + "epoch": 2.56439208984375e-05, + "step": 16806, + "training_step_time": 0.10275888442993164 + }, + { + "epoch": 2.564544677734375e-05, + "model_forward_time": 0.024401426315307617, + "step": 16807 + }, + { + "epoch": 2.564544677734375e-05, + "step": 16807, + "training_step_time": 0.15028643608093262 + }, + { + "epoch": 2.564697265625e-05, + "model_forward_time": 0.024496078491210938, + "step": 16808 + }, + { + "epoch": 2.564697265625e-05, + "step": 16808, + "training_step_time": 0.16051149368286133 + }, + { + "epoch": 2.564849853515625e-05, + "model_forward_time": 0.02483391761779785, + "step": 16809 + }, + { + "epoch": 2.564849853515625e-05, + "step": 16809, + "training_step_time": 0.171830415725708 + }, + { + "epoch": 2.56500244140625e-05, + "grad_norm": 0.32032281160354614, + "learning_rate": 4.4171024774341346e-05, + "loss": 0.009, + "step": 16810 + }, + { + "epoch": 2.56500244140625e-05, + "model_forward_time": 0.024470090866088867, + "step": 16810 + }, + { + "epoch": 2.56500244140625e-05, + "step": 16810, + "training_step_time": 0.20560145378112793 + }, + { + "epoch": 2.565155029296875e-05, + "model_forward_time": 0.024690866470336914, + "step": 16811 + }, + { + "epoch": 2.565155029296875e-05, + "step": 16811, + "training_step_time": 0.1791832447052002 + }, + { + "epoch": 2.5653076171875e-05, + "model_forward_time": 0.0249636173248291, + "step": 16812 + }, + { + "epoch": 2.5653076171875e-05, + "step": 16812, + "training_step_time": 0.19231772422790527 + }, + { + "epoch": 2.565460205078125e-05, + "model_forward_time": 0.025011301040649414, + "step": 16813 + }, + { + "epoch": 2.565460205078125e-05, + "step": 16813, + "training_step_time": 0.10303616523742676 + }, + { + "epoch": 2.56561279296875e-05, + "model_forward_time": 0.0244295597076416, + "step": 16814 + }, + { + "epoch": 2.56561279296875e-05, + "step": 16814, + "training_step_time": 0.10301685333251953 + }, + { + "epoch": 2.565765380859375e-05, + "model_forward_time": 0.02506875991821289, + "step": 16815 + }, + { + "epoch": 2.565765380859375e-05, + "step": 16815, + "training_step_time": 0.10490775108337402 + }, + { + "epoch": 2.56591796875e-05, + "model_forward_time": 0.026404619216918945, + "step": 16816 + }, + { + "epoch": 2.56591796875e-05, + "step": 16816, + "training_step_time": 0.1056966781616211 + }, + { + "epoch": 2.566070556640625e-05, + "model_forward_time": 0.02535080909729004, + "step": 16817 + }, + { + "epoch": 2.566070556640625e-05, + "step": 16817, + "training_step_time": 0.1038215160369873 + }, + { + "epoch": 2.56622314453125e-05, + "model_forward_time": 0.0249941349029541, + "step": 16818 + }, + { + "epoch": 2.56622314453125e-05, + "step": 16818, + "training_step_time": 0.10544657707214355 + }, + { + "epoch": 2.566375732421875e-05, + "model_forward_time": 0.025719165802001953, + "step": 16819 + }, + { + "epoch": 2.566375732421875e-05, + "step": 16819, + "training_step_time": 0.10309267044067383 + }, + { + "epoch": 2.5665283203125e-05, + "grad_norm": 0.26390019059181213, + "learning_rate": 4.411628847991653e-05, + "loss": 0.0087, + "step": 16820 + }, + { + "epoch": 2.5665283203125e-05, + "model_forward_time": 0.0249483585357666, + "step": 16820 + }, + { + "epoch": 2.5665283203125e-05, + "step": 16820, + "training_step_time": 0.10575437545776367 + }, + { + "epoch": 2.566680908203125e-05, + "model_forward_time": 0.025409460067749023, + "step": 16821 + }, + { + "epoch": 2.566680908203125e-05, + "step": 16821, + "training_step_time": 0.10502314567565918 + }, + { + "epoch": 2.56683349609375e-05, + "model_forward_time": 0.025376319885253906, + "step": 16822 + }, + { + "epoch": 2.56683349609375e-05, + "step": 16822, + "training_step_time": 0.10579991340637207 + }, + { + "epoch": 2.566986083984375e-05, + "model_forward_time": 0.02564239501953125, + "step": 16823 + }, + { + "epoch": 2.566986083984375e-05, + "step": 16823, + "training_step_time": 0.10613274574279785 + }, + { + "epoch": 2.567138671875e-05, + "model_forward_time": 0.025617361068725586, + "step": 16824 + }, + { + "epoch": 2.567138671875e-05, + "step": 16824, + "training_step_time": 0.1105353832244873 + }, + { + "epoch": 2.567291259765625e-05, + "model_forward_time": 0.02543783187866211, + "step": 16825 + }, + { + "epoch": 2.567291259765625e-05, + "step": 16825, + "training_step_time": 0.11483478546142578 + }, + { + "epoch": 2.56744384765625e-05, + "model_forward_time": 0.027030467987060547, + "step": 16826 + }, + { + "epoch": 2.56744384765625e-05, + "step": 16826, + "training_step_time": 0.1994800567626953 + }, + { + "epoch": 2.567596435546875e-05, + "model_forward_time": 0.025115966796875, + "step": 16827 + }, + { + "epoch": 2.567596435546875e-05, + "step": 16827, + "training_step_time": 0.1392652988433838 + }, + { + "epoch": 2.5677490234375e-05, + "model_forward_time": 0.025485754013061523, + "step": 16828 + }, + { + "epoch": 2.5677490234375e-05, + "step": 16828, + "training_step_time": 0.12481284141540527 + }, + { + "epoch": 2.567901611328125e-05, + "model_forward_time": 0.024344682693481445, + "step": 16829 + }, + { + "epoch": 2.567901611328125e-05, + "step": 16829, + "training_step_time": 0.2037820816040039 + }, + { + "epoch": 2.56805419921875e-05, + "grad_norm": 0.14889225363731384, + "learning_rate": 4.406155933475599e-05, + "loss": 0.0111, + "step": 16830 + }, + { + "epoch": 2.56805419921875e-05, + "model_forward_time": 0.025420427322387695, + "step": 16830 + }, + { + "epoch": 2.56805419921875e-05, + "step": 16830, + "training_step_time": 0.1215512752532959 + }, + { + "epoch": 2.568206787109375e-05, + "model_forward_time": 0.026274442672729492, + "step": 16831 + }, + { + "epoch": 2.568206787109375e-05, + "step": 16831, + "training_step_time": 0.18198037147521973 + }, + { + "epoch": 2.568359375e-05, + "model_forward_time": 0.024476051330566406, + "step": 16832 + }, + { + "epoch": 2.568359375e-05, + "step": 16832, + "training_step_time": 0.11098074913024902 + }, + { + "epoch": 2.568511962890625e-05, + "model_forward_time": 0.0240631103515625, + "step": 16833 + }, + { + "epoch": 2.568511962890625e-05, + "step": 16833, + "training_step_time": 0.1103518009185791 + }, + { + "epoch": 2.56866455078125e-05, + "model_forward_time": 0.024391651153564453, + "step": 16834 + }, + { + "epoch": 2.56866455078125e-05, + "step": 16834, + "training_step_time": 0.11121153831481934 + }, + { + "epoch": 2.568817138671875e-05, + "model_forward_time": 0.025082826614379883, + "step": 16835 + }, + { + "epoch": 2.568817138671875e-05, + "step": 16835, + "training_step_time": 0.10668015480041504 + }, + { + "epoch": 2.5689697265625e-05, + "model_forward_time": 0.026810169219970703, + "step": 16836 + }, + { + "epoch": 2.5689697265625e-05, + "step": 16836, + "training_step_time": 0.10784220695495605 + }, + { + "epoch": 2.569122314453125e-05, + "model_forward_time": 0.02538895606994629, + "step": 16837 + }, + { + "epoch": 2.569122314453125e-05, + "step": 16837, + "training_step_time": 0.10799598693847656 + }, + { + "epoch": 2.56927490234375e-05, + "model_forward_time": 0.02486896514892578, + "step": 16838 + }, + { + "epoch": 2.56927490234375e-05, + "step": 16838, + "training_step_time": 0.10961604118347168 + }, + { + "epoch": 2.569427490234375e-05, + "model_forward_time": 0.02542877197265625, + "step": 16839 + }, + { + "epoch": 2.569427490234375e-05, + "step": 16839, + "training_step_time": 0.10716891288757324 + }, + { + "epoch": 2.569580078125e-05, + "grad_norm": 0.17882491648197174, + "learning_rate": 4.400683740536083e-05, + "loss": 0.0074, + "step": 16840 + }, + { + "epoch": 2.569580078125e-05, + "model_forward_time": 0.025153636932373047, + "step": 16840 + }, + { + "epoch": 2.569580078125e-05, + "step": 16840, + "training_step_time": 0.10790657997131348 + }, + { + "epoch": 2.569732666015625e-05, + "model_forward_time": 0.025177955627441406, + "step": 16841 + }, + { + "epoch": 2.569732666015625e-05, + "step": 16841, + "training_step_time": 0.1056067943572998 + }, + { + "epoch": 2.56988525390625e-05, + "model_forward_time": 0.0254209041595459, + "step": 16842 + }, + { + "epoch": 2.56988525390625e-05, + "step": 16842, + "training_step_time": 0.10584473609924316 + }, + { + "epoch": 2.570037841796875e-05, + "model_forward_time": 0.0254209041595459, + "step": 16843 + }, + { + "epoch": 2.570037841796875e-05, + "step": 16843, + "training_step_time": 0.10577678680419922 + }, + { + "epoch": 2.5701904296875e-05, + "model_forward_time": 0.025554656982421875, + "step": 16844 + }, + { + "epoch": 2.5701904296875e-05, + "step": 16844, + "training_step_time": 0.10765242576599121 + }, + { + "epoch": 2.570343017578125e-05, + "model_forward_time": 0.02558302879333496, + "step": 16845 + }, + { + "epoch": 2.570343017578125e-05, + "step": 16845, + "training_step_time": 0.10473942756652832 + }, + { + "epoch": 2.57049560546875e-05, + "model_forward_time": 0.025362253189086914, + "step": 16846 + }, + { + "epoch": 2.57049560546875e-05, + "step": 16846, + "training_step_time": 0.10598874092102051 + }, + { + "epoch": 2.570648193359375e-05, + "model_forward_time": 0.025374174118041992, + "step": 16847 + }, + { + "epoch": 2.570648193359375e-05, + "step": 16847, + "training_step_time": 0.11011266708374023 + }, + { + "epoch": 2.57080078125e-05, + "model_forward_time": 0.025328636169433594, + "step": 16848 + }, + { + "epoch": 2.57080078125e-05, + "step": 16848, + "training_step_time": 0.10754799842834473 + }, + { + "epoch": 2.570953369140625e-05, + "model_forward_time": 0.02509307861328125, + "step": 16849 + }, + { + "epoch": 2.570953369140625e-05, + "step": 16849, + "training_step_time": 0.10747814178466797 + }, + { + "epoch": 2.57110595703125e-05, + "grad_norm": 0.22404731810092926, + "learning_rate": 4.3952122758223354e-05, + "loss": 0.0092, + "step": 16850 + }, + { + "epoch": 2.57110595703125e-05, + "model_forward_time": 0.025411605834960938, + "step": 16850 + }, + { + "epoch": 2.57110595703125e-05, + "step": 16850, + "training_step_time": 0.10464000701904297 + }, + { + "epoch": 2.571258544921875e-05, + "model_forward_time": 0.025107145309448242, + "step": 16851 + }, + { + "epoch": 2.571258544921875e-05, + "step": 16851, + "training_step_time": 0.10290241241455078 + }, + { + "epoch": 2.5714111328125e-05, + "model_forward_time": 0.024219036102294922, + "step": 16852 + }, + { + "epoch": 2.5714111328125e-05, + "step": 16852, + "training_step_time": 0.19141316413879395 + }, + { + "epoch": 2.571563720703125e-05, + "model_forward_time": 0.02443861961364746, + "step": 16853 + }, + { + "epoch": 2.571563720703125e-05, + "step": 16853, + "training_step_time": 0.17130398750305176 + }, + { + "epoch": 2.57171630859375e-05, + "model_forward_time": 0.02448868751525879, + "step": 16854 + }, + { + "epoch": 2.57171630859375e-05, + "step": 16854, + "training_step_time": 0.20472288131713867 + }, + { + "epoch": 2.571868896484375e-05, + "model_forward_time": 0.02407526969909668, + "step": 16855 + }, + { + "epoch": 2.571868896484375e-05, + "step": 16855, + "training_step_time": 0.2225816249847412 + }, + { + "epoch": 2.572021484375e-05, + "model_forward_time": 0.024912357330322266, + "step": 16856 + }, + { + "epoch": 2.572021484375e-05, + "step": 16856, + "training_step_time": 0.1585521697998047 + }, + { + "epoch": 2.572174072265625e-05, + "model_forward_time": 0.024732351303100586, + "step": 16857 + }, + { + "epoch": 2.572174072265625e-05, + "step": 16857, + "training_step_time": 0.1115865707397461 + }, + { + "epoch": 2.57232666015625e-05, + "model_forward_time": 0.024787425994873047, + "step": 16858 + }, + { + "epoch": 2.57232666015625e-05, + "step": 16858, + "training_step_time": 0.10498762130737305 + }, + { + "epoch": 2.572479248046875e-05, + "model_forward_time": 0.025476932525634766, + "step": 16859 + }, + { + "epoch": 2.572479248046875e-05, + "step": 16859, + "training_step_time": 0.10504508018493652 + }, + { + "epoch": 2.5726318359375e-05, + "grad_norm": 0.17102010548114777, + "learning_rate": 4.3897415459827e-05, + "loss": 0.015, + "step": 16860 + }, + { + "epoch": 2.5726318359375e-05, + "model_forward_time": 0.029054880142211914, + "step": 16860 + }, + { + "epoch": 2.5726318359375e-05, + "step": 16860, + "training_step_time": 0.10898709297180176 + }, + { + "epoch": 2.572784423828125e-05, + "model_forward_time": 0.025323867797851562, + "step": 16861 + }, + { + "epoch": 2.572784423828125e-05, + "step": 16861, + "training_step_time": 0.10597634315490723 + }, + { + "epoch": 2.57293701171875e-05, + "model_forward_time": 0.025467395782470703, + "step": 16862 + }, + { + "epoch": 2.57293701171875e-05, + "step": 16862, + "training_step_time": 0.10738611221313477 + }, + { + "epoch": 2.573089599609375e-05, + "model_forward_time": 0.025229454040527344, + "step": 16863 + }, + { + "epoch": 2.573089599609375e-05, + "step": 16863, + "training_step_time": 0.10597920417785645 + }, + { + "epoch": 2.5732421875e-05, + "model_forward_time": 0.02506422996520996, + "step": 16864 + }, + { + "epoch": 2.5732421875e-05, + "step": 16864, + "training_step_time": 0.10720109939575195 + }, + { + "epoch": 2.573394775390625e-05, + "model_forward_time": 0.02538013458251953, + "step": 16865 + }, + { + "epoch": 2.573394775390625e-05, + "step": 16865, + "training_step_time": 0.10530662536621094 + }, + { + "epoch": 2.57354736328125e-05, + "model_forward_time": 0.025450468063354492, + "step": 16866 + }, + { + "epoch": 2.57354736328125e-05, + "step": 16866, + "training_step_time": 0.10777139663696289 + }, + { + "epoch": 2.573699951171875e-05, + "model_forward_time": 0.025695323944091797, + "step": 16867 + }, + { + "epoch": 2.573699951171875e-05, + "step": 16867, + "training_step_time": 0.10548758506774902 + }, + { + "epoch": 2.5738525390625e-05, + "model_forward_time": 0.025517940521240234, + "step": 16868 + }, + { + "epoch": 2.5738525390625e-05, + "step": 16868, + "training_step_time": 0.1103827953338623 + }, + { + "epoch": 2.574005126953125e-05, + "model_forward_time": 0.02513861656188965, + "step": 16869 + }, + { + "epoch": 2.574005126953125e-05, + "step": 16869, + "training_step_time": 0.10711956024169922 + }, + { + "epoch": 2.57415771484375e-05, + "grad_norm": 0.34216833114624023, + "learning_rate": 4.384271557664628e-05, + "loss": 0.0076, + "step": 16870 + }, + { + "epoch": 2.57415771484375e-05, + "model_forward_time": 0.02598404884338379, + "step": 16870 + }, + { + "epoch": 2.57415771484375e-05, + "step": 16870, + "training_step_time": 0.10640430450439453 + }, + { + "epoch": 2.574310302734375e-05, + "model_forward_time": 0.025947093963623047, + "step": 16871 + }, + { + "epoch": 2.574310302734375e-05, + "step": 16871, + "training_step_time": 0.10649633407592773 + }, + { + "epoch": 2.574462890625e-05, + "model_forward_time": 0.025693893432617188, + "step": 16872 + }, + { + "epoch": 2.574462890625e-05, + "step": 16872, + "training_step_time": 0.10899686813354492 + }, + { + "epoch": 2.574615478515625e-05, + "model_forward_time": 0.025373458862304688, + "step": 16873 + }, + { + "epoch": 2.574615478515625e-05, + "step": 16873, + "training_step_time": 0.12107419967651367 + }, + { + "epoch": 2.57476806640625e-05, + "model_forward_time": 0.025385618209838867, + "step": 16874 + }, + { + "epoch": 2.57476806640625e-05, + "step": 16874, + "training_step_time": 0.11342954635620117 + }, + { + "epoch": 2.574920654296875e-05, + "model_forward_time": 0.025710105895996094, + "step": 16875 + }, + { + "epoch": 2.574920654296875e-05, + "step": 16875, + "training_step_time": 0.10808491706848145 + }, + { + "epoch": 2.5750732421875e-05, + "model_forward_time": 0.02534198760986328, + "step": 16876 + }, + { + "epoch": 2.5750732421875e-05, + "step": 16876, + "training_step_time": 0.1106865406036377 + }, + { + "epoch": 2.575225830078125e-05, + "model_forward_time": 0.02613544464111328, + "step": 16877 + }, + { + "epoch": 2.575225830078125e-05, + "step": 16877, + "training_step_time": 0.10642719268798828 + }, + { + "epoch": 2.57537841796875e-05, + "model_forward_time": 0.024759769439697266, + "step": 16878 + }, + { + "epoch": 2.57537841796875e-05, + "step": 16878, + "training_step_time": 0.19239282608032227 + }, + { + "epoch": 2.575531005859375e-05, + "model_forward_time": 0.02441883087158203, + "step": 16879 + }, + { + "epoch": 2.575531005859375e-05, + "step": 16879, + "training_step_time": 0.10502099990844727 + }, + { + "epoch": 2.57568359375e-05, + "grad_norm": 0.20437732338905334, + "learning_rate": 4.3788023175146747e-05, + "loss": 0.0069, + "step": 16880 + }, + { + "epoch": 2.57568359375e-05, + "model_forward_time": 0.024904251098632812, + "step": 16880 + }, + { + "epoch": 2.57568359375e-05, + "step": 16880, + "training_step_time": 0.10310006141662598 + }, + { + "epoch": 2.575836181640625e-05, + "model_forward_time": 0.025289297103881836, + "step": 16881 + }, + { + "epoch": 2.575836181640625e-05, + "step": 16881, + "training_step_time": 0.10827350616455078 + }, + { + "epoch": 2.57598876953125e-05, + "model_forward_time": 0.025571823120117188, + "step": 16882 + }, + { + "epoch": 2.57598876953125e-05, + "step": 16882, + "training_step_time": 0.11071014404296875 + }, + { + "epoch": 2.576141357421875e-05, + "model_forward_time": 0.025173664093017578, + "step": 16883 + }, + { + "epoch": 2.576141357421875e-05, + "step": 16883, + "training_step_time": 0.10628128051757812 + }, + { + "epoch": 2.5762939453125e-05, + "model_forward_time": 0.025313854217529297, + "step": 16884 + }, + { + "epoch": 2.5762939453125e-05, + "step": 16884, + "training_step_time": 0.10785484313964844 + }, + { + "epoch": 2.576446533203125e-05, + "model_forward_time": 0.025681495666503906, + "step": 16885 + }, + { + "epoch": 2.576446533203125e-05, + "step": 16885, + "training_step_time": 0.10761904716491699 + }, + { + "epoch": 2.57659912109375e-05, + "model_forward_time": 0.025356054306030273, + "step": 16886 + }, + { + "epoch": 2.57659912109375e-05, + "step": 16886, + "training_step_time": 0.10830402374267578 + }, + { + "epoch": 2.576751708984375e-05, + "model_forward_time": 0.025353431701660156, + "step": 16887 + }, + { + "epoch": 2.576751708984375e-05, + "step": 16887, + "training_step_time": 0.10603022575378418 + }, + { + "epoch": 2.576904296875e-05, + "model_forward_time": 0.025418519973754883, + "step": 16888 + }, + { + "epoch": 2.576904296875e-05, + "step": 16888, + "training_step_time": 0.10626578330993652 + }, + { + "epoch": 2.577056884765625e-05, + "model_forward_time": 0.024901866912841797, + "step": 16889 + }, + { + "epoch": 2.577056884765625e-05, + "step": 16889, + "training_step_time": 0.10535168647766113 + }, + { + "epoch": 2.57720947265625e-05, + "grad_norm": 0.31352540850639343, + "learning_rate": 4.373333832178478e-05, + "loss": 0.0139, + "step": 16890 + }, + { + "epoch": 2.57720947265625e-05, + "model_forward_time": 0.025788068771362305, + "step": 16890 + }, + { + "epoch": 2.57720947265625e-05, + "step": 16890, + "training_step_time": 0.1047823429107666 + }, + { + "epoch": 2.577362060546875e-05, + "model_forward_time": 0.025628089904785156, + "step": 16891 + }, + { + "epoch": 2.577362060546875e-05, + "step": 16891, + "training_step_time": 0.10533618927001953 + }, + { + "epoch": 2.5775146484375e-05, + "model_forward_time": 0.024789094924926758, + "step": 16892 + }, + { + "epoch": 2.5775146484375e-05, + "step": 16892, + "training_step_time": 0.10709953308105469 + }, + { + "epoch": 2.577667236328125e-05, + "model_forward_time": 0.025305747985839844, + "step": 16893 + }, + { + "epoch": 2.577667236328125e-05, + "step": 16893, + "training_step_time": 0.1086430549621582 + }, + { + "epoch": 2.57781982421875e-05, + "model_forward_time": 0.024613380432128906, + "step": 16894 + }, + { + "epoch": 2.57781982421875e-05, + "step": 16894, + "training_step_time": 0.10937094688415527 + }, + { + "epoch": 2.577972412109375e-05, + "model_forward_time": 0.025641679763793945, + "step": 16895 + }, + { + "epoch": 2.577972412109375e-05, + "step": 16895, + "training_step_time": 0.10694503784179688 + }, + { + "epoch": 2.578125e-05, + "model_forward_time": 0.025772571563720703, + "step": 16896 + }, + { + "epoch": 2.578125e-05, + "step": 16896, + "training_step_time": 0.107269287109375 + }, + { + "epoch": 2.578277587890625e-05, + "model_forward_time": 0.027095556259155273, + "step": 16897 + }, + { + "epoch": 2.578277587890625e-05, + "step": 16897, + "training_step_time": 0.10834360122680664 + }, + { + "epoch": 2.57843017578125e-05, + "model_forward_time": 0.025116682052612305, + "step": 16898 + }, + { + "epoch": 2.57843017578125e-05, + "step": 16898, + "training_step_time": 0.10919857025146484 + }, + { + "epoch": 2.578582763671875e-05, + "model_forward_time": 0.023845672607421875, + "step": 16899 + }, + { + "epoch": 2.578582763671875e-05, + "step": 16899, + "training_step_time": 0.19077229499816895 + }, + { + "epoch": 2.5787353515625e-05, + "grad_norm": 0.48642513155937195, + "learning_rate": 4.367866108300769e-05, + "loss": 0.0085, + "step": 16900 + }, + { + "epoch": 2.5787353515625e-05, + "model_forward_time": 0.024561405181884766, + "step": 16900 + }, + { + "epoch": 2.5787353515625e-05, + "step": 16900, + "training_step_time": 0.20948386192321777 + }, + { + "epoch": 2.578887939453125e-05, + "model_forward_time": 0.02608203887939453, + "step": 16901 + }, + { + "epoch": 2.578887939453125e-05, + "step": 16901, + "training_step_time": 0.1550281047821045 + }, + { + "epoch": 2.57904052734375e-05, + "model_forward_time": 0.024625539779663086, + "step": 16902 + }, + { + "epoch": 2.57904052734375e-05, + "step": 16902, + "training_step_time": 0.20520448684692383 + }, + { + "epoch": 2.579193115234375e-05, + "model_forward_time": 0.02440619468688965, + "step": 16903 + }, + { + "epoch": 2.579193115234375e-05, + "step": 16903, + "training_step_time": 0.19631075859069824 + }, + { + "epoch": 2.579345703125e-05, + "model_forward_time": 0.024335145950317383, + "step": 16904 + }, + { + "epoch": 2.579345703125e-05, + "step": 16904, + "training_step_time": 0.16623950004577637 + }, + { + "epoch": 2.579498291015625e-05, + "model_forward_time": 0.024112701416015625, + "step": 16905 + }, + { + "epoch": 2.579498291015625e-05, + "step": 16905, + "training_step_time": 0.10695457458496094 + }, + { + "epoch": 2.57965087890625e-05, + "model_forward_time": 0.02521514892578125, + "step": 16906 + }, + { + "epoch": 2.57965087890625e-05, + "step": 16906, + "training_step_time": 0.11034321784973145 + }, + { + "epoch": 2.579803466796875e-05, + "model_forward_time": 0.025048494338989258, + "step": 16907 + }, + { + "epoch": 2.579803466796875e-05, + "step": 16907, + "training_step_time": 0.11042356491088867 + }, + { + "epoch": 2.5799560546875e-05, + "model_forward_time": 0.025214433670043945, + "step": 16908 + }, + { + "epoch": 2.5799560546875e-05, + "step": 16908, + "training_step_time": 0.10883235931396484 + }, + { + "epoch": 2.580108642578125e-05, + "model_forward_time": 0.024890422821044922, + "step": 16909 + }, + { + "epoch": 2.580108642578125e-05, + "step": 16909, + "training_step_time": 0.10615134239196777 + }, + { + "epoch": 2.58026123046875e-05, + "grad_norm": 0.2852439880371094, + "learning_rate": 4.362399152525344e-05, + "loss": 0.0081, + "step": 16910 + }, + { + "epoch": 2.58026123046875e-05, + "model_forward_time": 0.024887561798095703, + "step": 16910 + }, + { + "epoch": 2.58026123046875e-05, + "step": 16910, + "training_step_time": 0.10703063011169434 + }, + { + "epoch": 2.580413818359375e-05, + "model_forward_time": 0.02524590492248535, + "step": 16911 + }, + { + "epoch": 2.580413818359375e-05, + "step": 16911, + "training_step_time": 0.11104702949523926 + }, + { + "epoch": 2.58056640625e-05, + "model_forward_time": 0.025271892547607422, + "step": 16912 + }, + { + "epoch": 2.58056640625e-05, + "step": 16912, + "training_step_time": 0.10751628875732422 + }, + { + "epoch": 2.580718994140625e-05, + "model_forward_time": 0.02498602867126465, + "step": 16913 + }, + { + "epoch": 2.580718994140625e-05, + "step": 16913, + "training_step_time": 0.1089017391204834 + }, + { + "epoch": 2.58087158203125e-05, + "model_forward_time": 0.028953075408935547, + "step": 16914 + }, + { + "epoch": 2.58087158203125e-05, + "step": 16914, + "training_step_time": 0.11086463928222656 + }, + { + "epoch": 2.581024169921875e-05, + "model_forward_time": 0.025261640548706055, + "step": 16915 + }, + { + "epoch": 2.581024169921875e-05, + "step": 16915, + "training_step_time": 0.11073899269104004 + }, + { + "epoch": 2.5811767578125e-05, + "model_forward_time": 0.025379419326782227, + "step": 16916 + }, + { + "epoch": 2.5811767578125e-05, + "step": 16916, + "training_step_time": 0.1063237190246582 + }, + { + "epoch": 2.581329345703125e-05, + "model_forward_time": 0.02490067481994629, + "step": 16917 + }, + { + "epoch": 2.581329345703125e-05, + "step": 16917, + "training_step_time": 0.10724067687988281 + }, + { + "epoch": 2.58148193359375e-05, + "model_forward_time": 0.02509164810180664, + "step": 16918 + }, + { + "epoch": 2.58148193359375e-05, + "step": 16918, + "training_step_time": 0.14635658264160156 + }, + { + "epoch": 2.581634521484375e-05, + "model_forward_time": 0.025643587112426758, + "step": 16919 + }, + { + "epoch": 2.581634521484375e-05, + "step": 16919, + "training_step_time": 0.14476513862609863 + }, + { + "epoch": 2.581787109375e-05, + "grad_norm": 0.2205013483762741, + "learning_rate": 4.3569329714950704e-05, + "loss": 0.0113, + "step": 16920 + }, + { + "epoch": 2.581787109375e-05, + "model_forward_time": 0.02438068389892578, + "step": 16920 + }, + { + "epoch": 2.581787109375e-05, + "step": 16920, + "training_step_time": 0.1072843074798584 + }, + { + "epoch": 2.581939697265625e-05, + "model_forward_time": 0.025136232376098633, + "step": 16921 + }, + { + "epoch": 2.581939697265625e-05, + "step": 16921, + "training_step_time": 0.10743451118469238 + }, + { + "epoch": 2.58209228515625e-05, + "model_forward_time": 0.0253903865814209, + "step": 16922 + }, + { + "epoch": 2.58209228515625e-05, + "step": 16922, + "training_step_time": 0.11026215553283691 + }, + { + "epoch": 2.582244873046875e-05, + "model_forward_time": 0.024866819381713867, + "step": 16923 + }, + { + "epoch": 2.582244873046875e-05, + "step": 16923, + "training_step_time": 0.10651254653930664 + }, + { + "epoch": 2.5823974609375e-05, + "model_forward_time": 0.025058507919311523, + "step": 16924 + }, + { + "epoch": 2.5823974609375e-05, + "step": 16924, + "training_step_time": 0.19185423851013184 + }, + { + "epoch": 2.582550048828125e-05, + "model_forward_time": 0.024497032165527344, + "step": 16925 + }, + { + "epoch": 2.582550048828125e-05, + "step": 16925, + "training_step_time": 0.10343146324157715 + }, + { + "epoch": 2.58270263671875e-05, + "model_forward_time": 0.024136066436767578, + "step": 16926 + }, + { + "epoch": 2.58270263671875e-05, + "step": 16926, + "training_step_time": 0.10203266143798828 + }, + { + "epoch": 2.582855224609375e-05, + "model_forward_time": 0.024825572967529297, + "step": 16927 + }, + { + "epoch": 2.582855224609375e-05, + "step": 16927, + "training_step_time": 0.10566568374633789 + }, + { + "epoch": 2.5830078125e-05, + "model_forward_time": 0.025715112686157227, + "step": 16928 + }, + { + "epoch": 2.5830078125e-05, + "step": 16928, + "training_step_time": 0.10661125183105469 + }, + { + "epoch": 2.583160400390625e-05, + "model_forward_time": 0.02545952796936035, + "step": 16929 + }, + { + "epoch": 2.583160400390625e-05, + "step": 16929, + "training_step_time": 0.10692834854125977 + }, + { + "epoch": 2.58331298828125e-05, + "grad_norm": 0.42118752002716064, + "learning_rate": 4.3514675718518734e-05, + "loss": 0.0126, + "step": 16930 + }, + { + "epoch": 2.58331298828125e-05, + "model_forward_time": 0.02561044692993164, + "step": 16930 + }, + { + "epoch": 2.58331298828125e-05, + "step": 16930, + "training_step_time": 0.10451579093933105 + }, + { + "epoch": 2.583465576171875e-05, + "model_forward_time": 0.024625778198242188, + "step": 16931 + }, + { + "epoch": 2.583465576171875e-05, + "step": 16931, + "training_step_time": 0.10590362548828125 + }, + { + "epoch": 2.5836181640625e-05, + "model_forward_time": 0.02568197250366211, + "step": 16932 + }, + { + "epoch": 2.5836181640625e-05, + "step": 16932, + "training_step_time": 0.11085319519042969 + }, + { + "epoch": 2.583770751953125e-05, + "model_forward_time": 0.025183677673339844, + "step": 16933 + }, + { + "epoch": 2.583770751953125e-05, + "step": 16933, + "training_step_time": 0.1221308708190918 + }, + { + "epoch": 2.58392333984375e-05, + "model_forward_time": 0.02544546127319336, + "step": 16934 + }, + { + "epoch": 2.58392333984375e-05, + "step": 16934, + "training_step_time": 0.12078332901000977 + }, + { + "epoch": 2.584075927734375e-05, + "model_forward_time": 0.025042295455932617, + "step": 16935 + }, + { + "epoch": 2.584075927734375e-05, + "step": 16935, + "training_step_time": 0.11515021324157715 + }, + { + "epoch": 2.584228515625e-05, + "model_forward_time": 0.025109291076660156, + "step": 16936 + }, + { + "epoch": 2.584228515625e-05, + "step": 16936, + "training_step_time": 0.11169004440307617 + }, + { + "epoch": 2.584381103515625e-05, + "model_forward_time": 0.025378704071044922, + "step": 16937 + }, + { + "epoch": 2.584381103515625e-05, + "step": 16937, + "training_step_time": 0.10795879364013672 + }, + { + "epoch": 2.58453369140625e-05, + "model_forward_time": 0.028805017471313477, + "step": 16938 + }, + { + "epoch": 2.58453369140625e-05, + "step": 16938, + "training_step_time": 0.11074328422546387 + }, + { + "epoch": 2.584686279296875e-05, + "model_forward_time": 0.0253753662109375, + "step": 16939 + }, + { + "epoch": 2.584686279296875e-05, + "step": 16939, + "training_step_time": 0.10857772827148438 + }, + { + "epoch": 2.5848388671875e-05, + "grad_norm": 0.2935320734977722, + "learning_rate": 4.3460029602367284e-05, + "loss": 0.0095, + "step": 16940 + }, + { + "epoch": 2.5848388671875e-05, + "model_forward_time": 0.025522232055664062, + "step": 16940 + }, + { + "epoch": 2.5848388671875e-05, + "step": 16940, + "training_step_time": 0.10840439796447754 + }, + { + "epoch": 2.584991455078125e-05, + "model_forward_time": 0.02521991729736328, + "step": 16941 + }, + { + "epoch": 2.584991455078125e-05, + "step": 16941, + "training_step_time": 0.10709714889526367 + }, + { + "epoch": 2.58514404296875e-05, + "model_forward_time": 0.025694847106933594, + "step": 16942 + }, + { + "epoch": 2.58514404296875e-05, + "step": 16942, + "training_step_time": 0.10622668266296387 + }, + { + "epoch": 2.585296630859375e-05, + "model_forward_time": 0.025577545166015625, + "step": 16943 + }, + { + "epoch": 2.585296630859375e-05, + "step": 16943, + "training_step_time": 0.10670995712280273 + }, + { + "epoch": 2.58544921875e-05, + "model_forward_time": 0.02601337432861328, + "step": 16944 + }, + { + "epoch": 2.58544921875e-05, + "step": 16944, + "training_step_time": 0.1082158088684082 + }, + { + "epoch": 2.585601806640625e-05, + "model_forward_time": 0.024785518646240234, + "step": 16945 + }, + { + "epoch": 2.585601806640625e-05, + "step": 16945, + "training_step_time": 0.19185543060302734 + }, + { + "epoch": 2.58575439453125e-05, + "model_forward_time": 0.02455615997314453, + "step": 16946 + }, + { + "epoch": 2.58575439453125e-05, + "step": 16946, + "training_step_time": 0.11673593521118164 + }, + { + "epoch": 2.585906982421875e-05, + "model_forward_time": 0.024769067764282227, + "step": 16947 + }, + { + "epoch": 2.585906982421875e-05, + "step": 16947, + "training_step_time": 0.12357568740844727 + }, + { + "epoch": 2.5860595703125e-05, + "model_forward_time": 0.026067733764648438, + "step": 16948 + }, + { + "epoch": 2.5860595703125e-05, + "step": 16948, + "training_step_time": 0.14186930656433105 + }, + { + "epoch": 2.586212158203125e-05, + "model_forward_time": 0.02537703514099121, + "step": 16949 + }, + { + "epoch": 2.586212158203125e-05, + "step": 16949, + "training_step_time": 0.19973063468933105 + }, + { + "epoch": 2.58636474609375e-05, + "grad_norm": 0.219862699508667, + "learning_rate": 4.3405391432896555e-05, + "loss": 0.0183, + "step": 16950 + }, + { + "epoch": 2.58636474609375e-05, + "model_forward_time": 0.02424764633178711, + "step": 16950 + }, + { + "epoch": 2.58636474609375e-05, + "step": 16950, + "training_step_time": 0.15396332740783691 + }, + { + "epoch": 2.586517333984375e-05, + "model_forward_time": 0.024554967880249023, + "step": 16951 + }, + { + "epoch": 2.586517333984375e-05, + "step": 16951, + "training_step_time": 0.19883465766906738 + }, + { + "epoch": 2.586669921875e-05, + "model_forward_time": 0.02415609359741211, + "step": 16952 + }, + { + "epoch": 2.586669921875e-05, + "step": 16952, + "training_step_time": 0.10562729835510254 + }, + { + "epoch": 2.586822509765625e-05, + "model_forward_time": 0.024715185165405273, + "step": 16953 + }, + { + "epoch": 2.586822509765625e-05, + "step": 16953, + "training_step_time": 0.10382080078125 + }, + { + "epoch": 2.58697509765625e-05, + "model_forward_time": 0.02546858787536621, + "step": 16954 + }, + { + "epoch": 2.58697509765625e-05, + "step": 16954, + "training_step_time": 0.10567259788513184 + }, + { + "epoch": 2.587127685546875e-05, + "model_forward_time": 0.025244712829589844, + "step": 16955 + }, + { + "epoch": 2.587127685546875e-05, + "step": 16955, + "training_step_time": 0.10644268989562988 + }, + { + "epoch": 2.5872802734375e-05, + "model_forward_time": 0.025655746459960938, + "step": 16956 + }, + { + "epoch": 2.5872802734375e-05, + "step": 16956, + "training_step_time": 0.10638284683227539 + }, + { + "epoch": 2.587432861328125e-05, + "model_forward_time": 0.02520132064819336, + "step": 16957 + }, + { + "epoch": 2.587432861328125e-05, + "step": 16957, + "training_step_time": 0.1074066162109375 + }, + { + "epoch": 2.58758544921875e-05, + "model_forward_time": 0.025557279586791992, + "step": 16958 + }, + { + "epoch": 2.58758544921875e-05, + "step": 16958, + "training_step_time": 0.10603880882263184 + }, + { + "epoch": 2.587738037109375e-05, + "model_forward_time": 0.025171756744384766, + "step": 16959 + }, + { + "epoch": 2.587738037109375e-05, + "step": 16959, + "training_step_time": 0.10498642921447754 + }, + { + "epoch": 2.587890625e-05, + "grad_norm": 0.4881453514099121, + "learning_rate": 4.335076127649707e-05, + "loss": 0.0119, + "step": 16960 + }, + { + "epoch": 2.587890625e-05, + "model_forward_time": 0.028659343719482422, + "step": 16960 + }, + { + "epoch": 2.587890625e-05, + "step": 16960, + "training_step_time": 0.10964417457580566 + }, + { + "epoch": 2.588043212890625e-05, + "model_forward_time": 0.025478839874267578, + "step": 16961 + }, + { + "epoch": 2.588043212890625e-05, + "step": 16961, + "training_step_time": 0.10509943962097168 + }, + { + "epoch": 2.58819580078125e-05, + "model_forward_time": 0.025113582611083984, + "step": 16962 + }, + { + "epoch": 2.58819580078125e-05, + "step": 16962, + "training_step_time": 0.103851318359375 + }, + { + "epoch": 2.588348388671875e-05, + "model_forward_time": 0.024210691452026367, + "step": 16963 + }, + { + "epoch": 2.588348388671875e-05, + "step": 16963, + "training_step_time": 0.10747623443603516 + }, + { + "epoch": 2.5885009765625e-05, + "model_forward_time": 0.025072574615478516, + "step": 16964 + }, + { + "epoch": 2.5885009765625e-05, + "step": 16964, + "training_step_time": 0.10664606094360352 + }, + { + "epoch": 2.588653564453125e-05, + "model_forward_time": 0.02512669563293457, + "step": 16965 + }, + { + "epoch": 2.588653564453125e-05, + "step": 16965, + "training_step_time": 0.10632658004760742 + }, + { + "epoch": 2.58880615234375e-05, + "model_forward_time": 0.02564406394958496, + "step": 16966 + }, + { + "epoch": 2.58880615234375e-05, + "step": 16966, + "training_step_time": 0.11219525337219238 + }, + { + "epoch": 2.588958740234375e-05, + "model_forward_time": 0.02495574951171875, + "step": 16967 + }, + { + "epoch": 2.588958740234375e-05, + "step": 16967, + "training_step_time": 0.14197850227355957 + }, + { + "epoch": 2.589111328125e-05, + "model_forward_time": 0.025455713272094727, + "step": 16968 + }, + { + "epoch": 2.589111328125e-05, + "step": 16968, + "training_step_time": 0.11001920700073242 + }, + { + "epoch": 2.589263916015625e-05, + "model_forward_time": 0.025901317596435547, + "step": 16969 + }, + { + "epoch": 2.589263916015625e-05, + "step": 16969, + "training_step_time": 0.1117708683013916 + }, + { + "epoch": 2.58941650390625e-05, + "grad_norm": 0.4694342315196991, + "learning_rate": 4.329613919954962e-05, + "loss": 0.0118, + "step": 16970 + }, + { + "epoch": 2.58941650390625e-05, + "model_forward_time": 0.025183677673339844, + "step": 16970 + }, + { + "epoch": 2.58941650390625e-05, + "step": 16970, + "training_step_time": 0.10701608657836914 + }, + { + "epoch": 2.589569091796875e-05, + "model_forward_time": 0.02750706672668457, + "step": 16971 + }, + { + "epoch": 2.589569091796875e-05, + "step": 16971, + "training_step_time": 0.11022233963012695 + }, + { + "epoch": 2.5897216796875e-05, + "model_forward_time": 0.025002479553222656, + "step": 16972 + }, + { + "epoch": 2.5897216796875e-05, + "step": 16972, + "training_step_time": 0.19635295867919922 + }, + { + "epoch": 2.589874267578125e-05, + "model_forward_time": 0.024892807006835938, + "step": 16973 + }, + { + "epoch": 2.589874267578125e-05, + "step": 16973, + "training_step_time": 0.10893607139587402 + }, + { + "epoch": 2.59002685546875e-05, + "model_forward_time": 0.025014162063598633, + "step": 16974 + }, + { + "epoch": 2.59002685546875e-05, + "step": 16974, + "training_step_time": 0.10539746284484863 + }, + { + "epoch": 2.590179443359375e-05, + "model_forward_time": 0.025304317474365234, + "step": 16975 + }, + { + "epoch": 2.590179443359375e-05, + "step": 16975, + "training_step_time": 0.10684466361999512 + }, + { + "epoch": 2.59033203125e-05, + "model_forward_time": 0.028641700744628906, + "step": 16976 + }, + { + "epoch": 2.59033203125e-05, + "step": 16976, + "training_step_time": 0.1104745864868164 + }, + { + "epoch": 2.590484619140625e-05, + "model_forward_time": 0.024919509887695312, + "step": 16977 + }, + { + "epoch": 2.590484619140625e-05, + "step": 16977, + "training_step_time": 0.11068964004516602 + }, + { + "epoch": 2.59063720703125e-05, + "model_forward_time": 0.025005578994750977, + "step": 16978 + }, + { + "epoch": 2.59063720703125e-05, + "step": 16978, + "training_step_time": 0.10700702667236328 + }, + { + "epoch": 2.590789794921875e-05, + "model_forward_time": 0.025437116622924805, + "step": 16979 + }, + { + "epoch": 2.590789794921875e-05, + "step": 16979, + "training_step_time": 0.10615348815917969 + }, + { + "epoch": 2.5909423828125e-05, + "grad_norm": 0.17927812039852142, + "learning_rate": 4.324152526842517e-05, + "loss": 0.0117, + "step": 16980 + }, + { + "epoch": 2.5909423828125e-05, + "model_forward_time": 0.024857521057128906, + "step": 16980 + }, + { + "epoch": 2.5909423828125e-05, + "step": 16980, + "training_step_time": 0.10616397857666016 + }, + { + "epoch": 2.591094970703125e-05, + "model_forward_time": 0.025354623794555664, + "step": 16981 + }, + { + "epoch": 2.591094970703125e-05, + "step": 16981, + "training_step_time": 0.10742712020874023 + }, + { + "epoch": 2.59124755859375e-05, + "model_forward_time": 0.024940967559814453, + "step": 16982 + }, + { + "epoch": 2.59124755859375e-05, + "step": 16982, + "training_step_time": 0.10494518280029297 + }, + { + "epoch": 2.591400146484375e-05, + "model_forward_time": 0.025066852569580078, + "step": 16983 + }, + { + "epoch": 2.591400146484375e-05, + "step": 16983, + "training_step_time": 0.10450482368469238 + }, + { + "epoch": 2.591552734375e-05, + "model_forward_time": 0.02487349510192871, + "step": 16984 + }, + { + "epoch": 2.591552734375e-05, + "step": 16984, + "training_step_time": 0.10953736305236816 + }, + { + "epoch": 2.591705322265625e-05, + "model_forward_time": 0.02508854866027832, + "step": 16985 + }, + { + "epoch": 2.591705322265625e-05, + "step": 16985, + "training_step_time": 0.10444498062133789 + }, + { + "epoch": 2.59185791015625e-05, + "model_forward_time": 0.025126934051513672, + "step": 16986 + }, + { + "epoch": 2.59185791015625e-05, + "step": 16986, + "training_step_time": 0.10359525680541992 + }, + { + "epoch": 2.592010498046875e-05, + "model_forward_time": 0.025371074676513672, + "step": 16987 + }, + { + "epoch": 2.592010498046875e-05, + "step": 16987, + "training_step_time": 0.1058967113494873 + }, + { + "epoch": 2.5921630859375e-05, + "model_forward_time": 0.025093793869018555, + "step": 16988 + }, + { + "epoch": 2.5921630859375e-05, + "step": 16988, + "training_step_time": 0.10768795013427734 + }, + { + "epoch": 2.592315673828125e-05, + "model_forward_time": 0.025876522064208984, + "step": 16989 + }, + { + "epoch": 2.592315673828125e-05, + "step": 16989, + "training_step_time": 0.10626888275146484 + }, + { + "epoch": 2.59246826171875e-05, + "grad_norm": 0.2882422208786011, + "learning_rate": 4.3186919549484784e-05, + "loss": 0.0156, + "step": 16990 + }, + { + "epoch": 2.59246826171875e-05, + "model_forward_time": 0.025634765625, + "step": 16990 + }, + { + "epoch": 2.59246826171875e-05, + "step": 16990, + "training_step_time": 0.11052346229553223 + }, + { + "epoch": 2.592620849609375e-05, + "model_forward_time": 0.024799108505249023, + "step": 16991 + }, + { + "epoch": 2.592620849609375e-05, + "step": 16991, + "training_step_time": 0.11473727226257324 + }, + { + "epoch": 2.5927734375e-05, + "model_forward_time": 0.02532362937927246, + "step": 16992 + }, + { + "epoch": 2.5927734375e-05, + "step": 16992, + "training_step_time": 0.1107175350189209 + }, + { + "epoch": 2.592926025390625e-05, + "model_forward_time": 0.02476334571838379, + "step": 16993 + }, + { + "epoch": 2.592926025390625e-05, + "step": 16993, + "training_step_time": 0.14792537689208984 + }, + { + "epoch": 2.59307861328125e-05, + "model_forward_time": 0.02482295036315918, + "step": 16994 + }, + { + "epoch": 2.59307861328125e-05, + "step": 16994, + "training_step_time": 0.16503334045410156 + }, + { + "epoch": 2.593231201171875e-05, + "model_forward_time": 0.024509429931640625, + "step": 16995 + }, + { + "epoch": 2.593231201171875e-05, + "step": 16995, + "training_step_time": 0.16500091552734375 + }, + { + "epoch": 2.5933837890625e-05, + "model_forward_time": 0.02477717399597168, + "step": 16996 + }, + { + "epoch": 2.5933837890625e-05, + "step": 16996, + "training_step_time": 0.18506884574890137 + }, + { + "epoch": 2.593536376953125e-05, + "model_forward_time": 0.02429819107055664, + "step": 16997 + }, + { + "epoch": 2.593536376953125e-05, + "step": 16997, + "training_step_time": 0.12148761749267578 + }, + { + "epoch": 2.59368896484375e-05, + "model_forward_time": 0.024514436721801758, + "step": 16998 + }, + { + "epoch": 2.59368896484375e-05, + "step": 16998, + "training_step_time": 0.11691617965698242 + }, + { + "epoch": 2.593841552734375e-05, + "model_forward_time": 0.026449203491210938, + "step": 16999 + }, + { + "epoch": 2.593841552734375e-05, + "step": 16999, + "training_step_time": 0.1191549301147461 + }, + { + "epoch": 2.593994140625e-05, + "grad_norm": 0.7259067893028259, + "learning_rate": 4.3132322109079596e-05, + "loss": 0.0136, + "step": 17000 + }, + { + "epoch": 2.593994140625e-05, + "model_forward_time": 0.02618098258972168, + "step": 17000 + }, + { + "epoch": 2.593994140625e-05, + "step": 17000, + "training_step_time": 0.10682129859924316 + }, + { + "epoch": 2.594146728515625e-05, + "model_forward_time": 0.025656700134277344, + "step": 17001 + }, + { + "epoch": 2.594146728515625e-05, + "step": 17001, + "training_step_time": 0.1811974048614502 + }, + { + "epoch": 2.59429931640625e-05, + "model_forward_time": 0.02442169189453125, + "step": 17002 + }, + { + "epoch": 2.59429931640625e-05, + "step": 17002, + "training_step_time": 0.15425825119018555 + }, + { + "epoch": 2.594451904296875e-05, + "model_forward_time": 0.024133920669555664, + "step": 17003 + }, + { + "epoch": 2.594451904296875e-05, + "step": 17003, + "training_step_time": 0.14968180656433105 + }, + { + "epoch": 2.5946044921875e-05, + "model_forward_time": 0.023937702178955078, + "step": 17004 + }, + { + "epoch": 2.5946044921875e-05, + "step": 17004, + "training_step_time": 0.2172856330871582 + }, + { + "epoch": 2.594757080078125e-05, + "model_forward_time": 0.024451732635498047, + "step": 17005 + }, + { + "epoch": 2.594757080078125e-05, + "step": 17005, + "training_step_time": 0.12508940696716309 + }, + { + "epoch": 2.59490966796875e-05, + "model_forward_time": 0.02489018440246582, + "step": 17006 + }, + { + "epoch": 2.59490966796875e-05, + "step": 17006, + "training_step_time": 0.12331604957580566 + }, + { + "epoch": 2.595062255859375e-05, + "model_forward_time": 0.025366783142089844, + "step": 17007 + }, + { + "epoch": 2.595062255859375e-05, + "step": 17007, + "training_step_time": 0.11742329597473145 + }, + { + "epoch": 2.59521484375e-05, + "model_forward_time": 0.02476358413696289, + "step": 17008 + }, + { + "epoch": 2.59521484375e-05, + "step": 17008, + "training_step_time": 0.11369204521179199 + }, + { + "epoch": 2.595367431640625e-05, + "model_forward_time": 0.025826215744018555, + "step": 17009 + }, + { + "epoch": 2.595367431640625e-05, + "step": 17009, + "training_step_time": 0.11291623115539551 + }, + { + "epoch": 2.59552001953125e-05, + "grad_norm": 0.31492379307746887, + "learning_rate": 4.307773301355062e-05, + "loss": 0.0184, + "step": 17010 + }, + { + "epoch": 2.59552001953125e-05, + "model_forward_time": 0.025204896926879883, + "step": 17010 + }, + { + "epoch": 2.59552001953125e-05, + "step": 17010, + "training_step_time": 0.1060037612915039 + }, + { + "epoch": 2.595672607421875e-05, + "model_forward_time": 0.025438547134399414, + "step": 17011 + }, + { + "epoch": 2.595672607421875e-05, + "step": 17011, + "training_step_time": 0.10681343078613281 + }, + { + "epoch": 2.5958251953125e-05, + "model_forward_time": 0.025011301040649414, + "step": 17012 + }, + { + "epoch": 2.5958251953125e-05, + "step": 17012, + "training_step_time": 0.10706305503845215 + }, + { + "epoch": 2.595977783203125e-05, + "model_forward_time": 0.025531291961669922, + "step": 17013 + }, + { + "epoch": 2.595977783203125e-05, + "step": 17013, + "training_step_time": 0.10780835151672363 + }, + { + "epoch": 2.59613037109375e-05, + "model_forward_time": 0.025274991989135742, + "step": 17014 + }, + { + "epoch": 2.59613037109375e-05, + "step": 17014, + "training_step_time": 0.10739445686340332 + }, + { + "epoch": 2.596282958984375e-05, + "model_forward_time": 0.024921417236328125, + "step": 17015 + }, + { + "epoch": 2.596282958984375e-05, + "step": 17015, + "training_step_time": 0.10861754417419434 + }, + { + "epoch": 2.596435546875e-05, + "model_forward_time": 0.024893522262573242, + "step": 17016 + }, + { + "epoch": 2.596435546875e-05, + "step": 17016, + "training_step_time": 0.16959142684936523 + }, + { + "epoch": 2.596588134765625e-05, + "model_forward_time": 0.02426600456237793, + "step": 17017 + }, + { + "epoch": 2.596588134765625e-05, + "step": 17017, + "training_step_time": 0.1753695011138916 + }, + { + "epoch": 2.59674072265625e-05, + "model_forward_time": 0.024460315704345703, + "step": 17018 + }, + { + "epoch": 2.59674072265625e-05, + "step": 17018, + "training_step_time": 0.16143298149108887 + }, + { + "epoch": 2.596893310546875e-05, + "model_forward_time": 0.023349285125732422, + "step": 17019 + }, + { + "epoch": 2.596893310546875e-05, + "step": 17019, + "training_step_time": 0.1622481346130371 + }, + { + "epoch": 2.5970458984375e-05, + "grad_norm": 0.27651742100715637, + "learning_rate": 4.302315232922876e-05, + "loss": 0.0201, + "step": 17020 + }, + { + "epoch": 2.5970458984375e-05, + "model_forward_time": 0.02457284927368164, + "step": 17020 + }, + { + "epoch": 2.5970458984375e-05, + "step": 17020, + "training_step_time": 0.14145970344543457 + }, + { + "epoch": 2.597198486328125e-05, + "model_forward_time": 0.0241701602935791, + "step": 17021 + }, + { + "epoch": 2.597198486328125e-05, + "step": 17021, + "training_step_time": 0.2047266960144043 + }, + { + "epoch": 2.59735107421875e-05, + "model_forward_time": 0.024259328842163086, + "step": 17022 + }, + { + "epoch": 2.59735107421875e-05, + "step": 17022, + "training_step_time": 0.1355588436126709 + }, + { + "epoch": 2.597503662109375e-05, + "model_forward_time": 0.02413344383239746, + "step": 17023 + }, + { + "epoch": 2.597503662109375e-05, + "step": 17023, + "training_step_time": 0.18563175201416016 + }, + { + "epoch": 2.59765625e-05, + "model_forward_time": 0.023995161056518555, + "step": 17024 + }, + { + "epoch": 2.59765625e-05, + "step": 17024, + "training_step_time": 0.10613703727722168 + }, + { + "epoch": 2.597808837890625e-05, + "model_forward_time": 0.024899959564208984, + "step": 17025 + }, + { + "epoch": 2.597808837890625e-05, + "step": 17025, + "training_step_time": 0.10330462455749512 + }, + { + "epoch": 2.59796142578125e-05, + "model_forward_time": 0.025627851486206055, + "step": 17026 + }, + { + "epoch": 2.59796142578125e-05, + "step": 17026, + "training_step_time": 0.10488390922546387 + }, + { + "epoch": 2.598114013671875e-05, + "model_forward_time": 0.029017210006713867, + "step": 17027 + }, + { + "epoch": 2.598114013671875e-05, + "step": 17027, + "training_step_time": 0.10715436935424805 + }, + { + "epoch": 2.5982666015625e-05, + "model_forward_time": 0.02545762062072754, + "step": 17028 + }, + { + "epoch": 2.5982666015625e-05, + "step": 17028, + "training_step_time": 0.10448384284973145 + }, + { + "epoch": 2.598419189453125e-05, + "model_forward_time": 0.025335311889648438, + "step": 17029 + }, + { + "epoch": 2.598419189453125e-05, + "step": 17029, + "training_step_time": 0.1055140495300293 + }, + { + "epoch": 2.59857177734375e-05, + "grad_norm": 0.14272859692573547, + "learning_rate": 4.29685801224347e-05, + "loss": 0.0099, + "step": 17030 + }, + { + "epoch": 2.59857177734375e-05, + "model_forward_time": 0.02500295639038086, + "step": 17030 + }, + { + "epoch": 2.59857177734375e-05, + "step": 17030, + "training_step_time": 0.10641121864318848 + }, + { + "epoch": 2.598724365234375e-05, + "model_forward_time": 0.02498340606689453, + "step": 17031 + }, + { + "epoch": 2.598724365234375e-05, + "step": 17031, + "training_step_time": 0.1062626838684082 + }, + { + "epoch": 2.598876953125e-05, + "model_forward_time": 0.024964570999145508, + "step": 17032 + }, + { + "epoch": 2.598876953125e-05, + "step": 17032, + "training_step_time": 0.1054835319519043 + }, + { + "epoch": 2.599029541015625e-05, + "model_forward_time": 0.025264263153076172, + "step": 17033 + }, + { + "epoch": 2.599029541015625e-05, + "step": 17033, + "training_step_time": 0.1057441234588623 + }, + { + "epoch": 2.59918212890625e-05, + "model_forward_time": 0.024937152862548828, + "step": 17034 + }, + { + "epoch": 2.59918212890625e-05, + "step": 17034, + "training_step_time": 0.10555672645568848 + }, + { + "epoch": 2.599334716796875e-05, + "model_forward_time": 0.02401423454284668, + "step": 17035 + }, + { + "epoch": 2.599334716796875e-05, + "step": 17035, + "training_step_time": 0.10944104194641113 + }, + { + "epoch": 2.5994873046875e-05, + "model_forward_time": 0.024460792541503906, + "step": 17036 + }, + { + "epoch": 2.5994873046875e-05, + "step": 17036, + "training_step_time": 0.10684871673583984 + }, + { + "epoch": 2.599639892578125e-05, + "model_forward_time": 0.025495529174804688, + "step": 17037 + }, + { + "epoch": 2.599639892578125e-05, + "step": 17037, + "training_step_time": 0.10866308212280273 + }, + { + "epoch": 2.59979248046875e-05, + "model_forward_time": 0.024892091751098633, + "step": 17038 + }, + { + "epoch": 2.59979248046875e-05, + "step": 17038, + "training_step_time": 0.10711336135864258 + }, + { + "epoch": 2.599945068359375e-05, + "model_forward_time": 0.02543044090270996, + "step": 17039 + }, + { + "epoch": 2.599945068359375e-05, + "step": 17039, + "training_step_time": 0.10552167892456055 + }, + { + "epoch": 2.60009765625e-05, + "grad_norm": 0.29485321044921875, + "learning_rate": 4.291401645947879e-05, + "loss": 0.0092, + "step": 17040 + }, + { + "epoch": 2.60009765625e-05, + "model_forward_time": 0.024939537048339844, + "step": 17040 + }, + { + "epoch": 2.60009765625e-05, + "step": 17040, + "training_step_time": 0.1069190502166748 + }, + { + "epoch": 2.600250244140625e-05, + "model_forward_time": 0.023682832717895508, + "step": 17041 + }, + { + "epoch": 2.600250244140625e-05, + "step": 17041, + "training_step_time": 0.10683345794677734 + }, + { + "epoch": 2.60040283203125e-05, + "model_forward_time": 0.025570392608642578, + "step": 17042 + }, + { + "epoch": 2.60040283203125e-05, + "step": 17042, + "training_step_time": 0.10709357261657715 + }, + { + "epoch": 2.600555419921875e-05, + "model_forward_time": 0.025325298309326172, + "step": 17043 + }, + { + "epoch": 2.600555419921875e-05, + "step": 17043, + "training_step_time": 0.10787248611450195 + }, + { + "epoch": 2.6007080078125e-05, + "model_forward_time": 0.0250396728515625, + "step": 17044 + }, + { + "epoch": 2.6007080078125e-05, + "step": 17044, + "training_step_time": 0.10573530197143555 + }, + { + "epoch": 2.600860595703125e-05, + "model_forward_time": 0.024651288986206055, + "step": 17045 + }, + { + "epoch": 2.600860595703125e-05, + "step": 17045, + "training_step_time": 0.14682364463806152 + }, + { + "epoch": 2.60101318359375e-05, + "model_forward_time": 0.024847984313964844, + "step": 17046 + }, + { + "epoch": 2.60101318359375e-05, + "step": 17046, + "training_step_time": 0.18950343132019043 + }, + { + "epoch": 2.601165771484375e-05, + "model_forward_time": 0.024460792541503906, + "step": 17047 + }, + { + "epoch": 2.601165771484375e-05, + "step": 17047, + "training_step_time": 0.15612173080444336 + }, + { + "epoch": 2.601318359375e-05, + "model_forward_time": 0.024820566177368164, + "step": 17048 + }, + { + "epoch": 2.601318359375e-05, + "step": 17048, + "training_step_time": 0.2116234302520752 + }, + { + "epoch": 2.601470947265625e-05, + "model_forward_time": 0.024631261825561523, + "step": 17049 + }, + { + "epoch": 2.601470947265625e-05, + "step": 17049, + "training_step_time": 0.19156265258789062 + }, + { + "epoch": 2.60162353515625e-05, + "grad_norm": 0.29122012853622437, + "learning_rate": 4.2859461406661065e-05, + "loss": 0.0116, + "step": 17050 + }, + { + "epoch": 2.60162353515625e-05, + "model_forward_time": 0.024192333221435547, + "step": 17050 + }, + { + "epoch": 2.60162353515625e-05, + "step": 17050, + "training_step_time": 0.16969633102416992 + }, + { + "epoch": 2.601776123046875e-05, + "model_forward_time": 0.02439403533935547, + "step": 17051 + }, + { + "epoch": 2.601776123046875e-05, + "step": 17051, + "training_step_time": 0.11534762382507324 + }, + { + "epoch": 2.6019287109375e-05, + "model_forward_time": 0.02419900894165039, + "step": 17052 + }, + { + "epoch": 2.6019287109375e-05, + "step": 17052, + "training_step_time": 0.115814208984375 + }, + { + "epoch": 2.602081298828125e-05, + "model_forward_time": 0.02513885498046875, + "step": 17053 + }, + { + "epoch": 2.602081298828125e-05, + "step": 17053, + "training_step_time": 0.11325907707214355 + }, + { + "epoch": 2.60223388671875e-05, + "model_forward_time": 0.025537967681884766, + "step": 17054 + }, + { + "epoch": 2.60223388671875e-05, + "step": 17054, + "training_step_time": 0.11498880386352539 + }, + { + "epoch": 2.602386474609375e-05, + "model_forward_time": 0.026131153106689453, + "step": 17055 + }, + { + "epoch": 2.602386474609375e-05, + "step": 17055, + "training_step_time": 0.11494755744934082 + }, + { + "epoch": 2.6025390625e-05, + "model_forward_time": 0.02545785903930664, + "step": 17056 + }, + { + "epoch": 2.6025390625e-05, + "step": 17056, + "training_step_time": 0.1131281852722168 + }, + { + "epoch": 2.602691650390625e-05, + "model_forward_time": 0.025496244430541992, + "step": 17057 + }, + { + "epoch": 2.602691650390625e-05, + "step": 17057, + "training_step_time": 0.11297392845153809 + }, + { + "epoch": 2.60284423828125e-05, + "model_forward_time": 0.025649070739746094, + "step": 17058 + }, + { + "epoch": 2.60284423828125e-05, + "step": 17058, + "training_step_time": 0.1132206916809082 + }, + { + "epoch": 2.602996826171875e-05, + "model_forward_time": 0.02504134178161621, + "step": 17059 + }, + { + "epoch": 2.602996826171875e-05, + "step": 17059, + "training_step_time": 0.11168670654296875 + }, + { + "epoch": 2.6031494140625e-05, + "grad_norm": 0.5060628652572632, + "learning_rate": 4.280491503027104e-05, + "loss": 0.021, + "step": 17060 + }, + { + "epoch": 2.6031494140625e-05, + "model_forward_time": 0.024856090545654297, + "step": 17060 + }, + { + "epoch": 2.6031494140625e-05, + "step": 17060, + "training_step_time": 0.10833024978637695 + }, + { + "epoch": 2.603302001953125e-05, + "model_forward_time": 0.02521204948425293, + "step": 17061 + }, + { + "epoch": 2.603302001953125e-05, + "step": 17061, + "training_step_time": 0.11145901679992676 + }, + { + "epoch": 2.60345458984375e-05, + "model_forward_time": 0.025153160095214844, + "step": 17062 + }, + { + "epoch": 2.60345458984375e-05, + "step": 17062, + "training_step_time": 0.10854840278625488 + }, + { + "epoch": 2.603607177734375e-05, + "model_forward_time": 0.02508068084716797, + "step": 17063 + }, + { + "epoch": 2.603607177734375e-05, + "step": 17063, + "training_step_time": 0.10778498649597168 + }, + { + "epoch": 2.603759765625e-05, + "model_forward_time": 0.024805784225463867, + "step": 17064 + }, + { + "epoch": 2.603759765625e-05, + "step": 17064, + "training_step_time": 0.18959355354309082 + }, + { + "epoch": 2.603912353515625e-05, + "model_forward_time": 0.02463221549987793, + "step": 17065 + }, + { + "epoch": 2.603912353515625e-05, + "step": 17065, + "training_step_time": 0.1422865390777588 + }, + { + "epoch": 2.60406494140625e-05, + "model_forward_time": 0.02444744110107422, + "step": 17066 + }, + { + "epoch": 2.60406494140625e-05, + "step": 17066, + "training_step_time": 0.10944843292236328 + }, + { + "epoch": 2.604217529296875e-05, + "model_forward_time": 0.0251462459564209, + "step": 17067 + }, + { + "epoch": 2.604217529296875e-05, + "step": 17067, + "training_step_time": 0.11240243911743164 + }, + { + "epoch": 2.6043701171875e-05, + "model_forward_time": 0.02491617202758789, + "step": 17068 + }, + { + "epoch": 2.6043701171875e-05, + "step": 17068, + "training_step_time": 0.11077141761779785 + }, + { + "epoch": 2.604522705078125e-05, + "model_forward_time": 0.025319814682006836, + "step": 17069 + }, + { + "epoch": 2.604522705078125e-05, + "step": 17069, + "training_step_time": 0.10977935791015625 + }, + { + "epoch": 2.60467529296875e-05, + "grad_norm": 0.29499542713165283, + "learning_rate": 4.275037739658771e-05, + "loss": 0.0102, + "step": 17070 + }, + { + "epoch": 2.60467529296875e-05, + "model_forward_time": 0.0249178409576416, + "step": 17070 + }, + { + "epoch": 2.60467529296875e-05, + "step": 17070, + "training_step_time": 0.19310593605041504 + }, + { + "epoch": 2.604827880859375e-05, + "model_forward_time": 0.02432537078857422, + "step": 17071 + }, + { + "epoch": 2.604827880859375e-05, + "step": 17071, + "training_step_time": 0.10997390747070312 + }, + { + "epoch": 2.60498046875e-05, + "model_forward_time": 0.024555683135986328, + "step": 17072 + }, + { + "epoch": 2.60498046875e-05, + "step": 17072, + "training_step_time": 0.1048116683959961 + }, + { + "epoch": 2.605133056640625e-05, + "model_forward_time": 0.025147438049316406, + "step": 17073 + }, + { + "epoch": 2.605133056640625e-05, + "step": 17073, + "training_step_time": 0.10991859436035156 + }, + { + "epoch": 2.60528564453125e-05, + "model_forward_time": 0.02544546127319336, + "step": 17074 + }, + { + "epoch": 2.60528564453125e-05, + "step": 17074, + "training_step_time": 0.1059560775756836 + }, + { + "epoch": 2.605438232421875e-05, + "model_forward_time": 0.025191783905029297, + "step": 17075 + }, + { + "epoch": 2.605438232421875e-05, + "step": 17075, + "training_step_time": 0.10484504699707031 + }, + { + "epoch": 2.6055908203125e-05, + "model_forward_time": 0.025140762329101562, + "step": 17076 + }, + { + "epoch": 2.6055908203125e-05, + "step": 17076, + "training_step_time": 0.10912537574768066 + }, + { + "epoch": 2.605743408203125e-05, + "model_forward_time": 0.02520585060119629, + "step": 17077 + }, + { + "epoch": 2.605743408203125e-05, + "step": 17077, + "training_step_time": 0.10785150527954102 + }, + { + "epoch": 2.60589599609375e-05, + "model_forward_time": 0.02561354637145996, + "step": 17078 + }, + { + "epoch": 2.60589599609375e-05, + "step": 17078, + "training_step_time": 0.10725808143615723 + }, + { + "epoch": 2.606048583984375e-05, + "model_forward_time": 0.02504873275756836, + "step": 17079 + }, + { + "epoch": 2.606048583984375e-05, + "step": 17079, + "training_step_time": 0.10757589340209961 + }, + { + "epoch": 2.606201171875e-05, + "grad_norm": 0.4745246171951294, + "learning_rate": 4.269584857187943e-05, + "loss": 0.0109, + "step": 17080 + }, + { + "epoch": 2.606201171875e-05, + "model_forward_time": 0.028053760528564453, + "step": 17080 + }, + { + "epoch": 2.606201171875e-05, + "step": 17080, + "training_step_time": 0.11024069786071777 + }, + { + "epoch": 2.606353759765625e-05, + "model_forward_time": 0.024960041046142578, + "step": 17081 + }, + { + "epoch": 2.606353759765625e-05, + "step": 17081, + "training_step_time": 0.10504364967346191 + }, + { + "epoch": 2.60650634765625e-05, + "model_forward_time": 0.024954557418823242, + "step": 17082 + }, + { + "epoch": 2.60650634765625e-05, + "step": 17082, + "training_step_time": 0.10534954071044922 + }, + { + "epoch": 2.606658935546875e-05, + "model_forward_time": 0.025187015533447266, + "step": 17083 + }, + { + "epoch": 2.606658935546875e-05, + "step": 17083, + "training_step_time": 0.10299038887023926 + }, + { + "epoch": 2.6068115234375e-05, + "model_forward_time": 0.024805545806884766, + "step": 17084 + }, + { + "epoch": 2.6068115234375e-05, + "step": 17084, + "training_step_time": 0.10322833061218262 + }, + { + "epoch": 2.606964111328125e-05, + "model_forward_time": 0.024718046188354492, + "step": 17085 + }, + { + "epoch": 2.606964111328125e-05, + "step": 17085, + "training_step_time": 0.10423946380615234 + }, + { + "epoch": 2.60711669921875e-05, + "model_forward_time": 0.0252835750579834, + "step": 17086 + }, + { + "epoch": 2.60711669921875e-05, + "step": 17086, + "training_step_time": 0.10544347763061523 + }, + { + "epoch": 2.607269287109375e-05, + "model_forward_time": 0.025025367736816406, + "step": 17087 + }, + { + "epoch": 2.607269287109375e-05, + "step": 17087, + "training_step_time": 0.10693883895874023 + }, + { + "epoch": 2.607421875e-05, + "model_forward_time": 0.02555561065673828, + "step": 17088 + }, + { + "epoch": 2.607421875e-05, + "step": 17088, + "training_step_time": 0.10808420181274414 + }, + { + "epoch": 2.607574462890625e-05, + "model_forward_time": 0.025806903839111328, + "step": 17089 + }, + { + "epoch": 2.607574462890625e-05, + "step": 17089, + "training_step_time": 0.10807204246520996 + }, + { + "epoch": 2.60772705078125e-05, + "grad_norm": 0.1551487147808075, + "learning_rate": 4.264132862240387e-05, + "loss": 0.0121, + "step": 17090 + }, + { + "epoch": 2.60772705078125e-05, + "model_forward_time": 0.025624990463256836, + "step": 17090 + }, + { + "epoch": 2.60772705078125e-05, + "step": 17090, + "training_step_time": 0.10882806777954102 + }, + { + "epoch": 2.607879638671875e-05, + "model_forward_time": 0.024482250213623047, + "step": 17091 + }, + { + "epoch": 2.607879638671875e-05, + "step": 17091, + "training_step_time": 0.14906954765319824 + }, + { + "epoch": 2.6080322265625e-05, + "model_forward_time": 0.023906230926513672, + "step": 17092 + }, + { + "epoch": 2.6080322265625e-05, + "step": 17092, + "training_step_time": 0.18044233322143555 + }, + { + "epoch": 2.608184814453125e-05, + "model_forward_time": 0.024450302124023438, + "step": 17093 + }, + { + "epoch": 2.608184814453125e-05, + "step": 17093, + "training_step_time": 0.1486985683441162 + }, + { + "epoch": 2.60833740234375e-05, + "model_forward_time": 0.024481534957885742, + "step": 17094 + }, + { + "epoch": 2.60833740234375e-05, + "step": 17094, + "training_step_time": 0.15880489349365234 + }, + { + "epoch": 2.608489990234375e-05, + "model_forward_time": 0.024695396423339844, + "step": 17095 + }, + { + "epoch": 2.608489990234375e-05, + "step": 17095, + "training_step_time": 0.17287373542785645 + }, + { + "epoch": 2.608642578125e-05, + "model_forward_time": 0.02428436279296875, + "step": 17096 + }, + { + "epoch": 2.608642578125e-05, + "step": 17096, + "training_step_time": 0.11421704292297363 + }, + { + "epoch": 2.608795166015625e-05, + "model_forward_time": 0.02487945556640625, + "step": 17097 + }, + { + "epoch": 2.608795166015625e-05, + "step": 17097, + "training_step_time": 0.12466311454772949 + }, + { + "epoch": 2.60894775390625e-05, + "model_forward_time": 0.025197505950927734, + "step": 17098 + }, + { + "epoch": 2.60894775390625e-05, + "step": 17098, + "training_step_time": 0.10833597183227539 + }, + { + "epoch": 2.609100341796875e-05, + "model_forward_time": 0.02648019790649414, + "step": 17099 + }, + { + "epoch": 2.609100341796875e-05, + "step": 17099, + "training_step_time": 0.11718201637268066 + }, + { + "epoch": 2.6092529296875e-05, + "grad_norm": 0.32109713554382324, + "learning_rate": 4.2586817614407895e-05, + "loss": 0.0093, + "step": 17100 + }, + { + "epoch": 2.6092529296875e-05, + "model_forward_time": 0.0250091552734375, + "step": 17100 + }, + { + "epoch": 2.6092529296875e-05, + "step": 17100, + "training_step_time": 0.10696840286254883 + }, + { + "epoch": 2.609405517578125e-05, + "model_forward_time": 0.025254249572753906, + "step": 17101 + }, + { + "epoch": 2.609405517578125e-05, + "step": 17101, + "training_step_time": 0.10769987106323242 + }, + { + "epoch": 2.60955810546875e-05, + "model_forward_time": 0.025107383728027344, + "step": 17102 + }, + { + "epoch": 2.60955810546875e-05, + "step": 17102, + "training_step_time": 0.10953664779663086 + }, + { + "epoch": 2.609710693359375e-05, + "model_forward_time": 0.025088787078857422, + "step": 17103 + }, + { + "epoch": 2.609710693359375e-05, + "step": 17103, + "training_step_time": 0.10822534561157227 + }, + { + "epoch": 2.60986328125e-05, + "model_forward_time": 0.025412321090698242, + "step": 17104 + }, + { + "epoch": 2.60986328125e-05, + "step": 17104, + "training_step_time": 0.10643887519836426 + }, + { + "epoch": 2.610015869140625e-05, + "model_forward_time": 0.02579641342163086, + "step": 17105 + }, + { + "epoch": 2.610015869140625e-05, + "step": 17105, + "training_step_time": 0.10853886604309082 + }, + { + "epoch": 2.61016845703125e-05, + "model_forward_time": 0.02515721321105957, + "step": 17106 + }, + { + "epoch": 2.61016845703125e-05, + "step": 17106, + "training_step_time": 0.10728812217712402 + }, + { + "epoch": 2.610321044921875e-05, + "model_forward_time": 0.025631427764892578, + "step": 17107 + }, + { + "epoch": 2.610321044921875e-05, + "step": 17107, + "training_step_time": 0.10686254501342773 + }, + { + "epoch": 2.6104736328125e-05, + "model_forward_time": 0.025186538696289062, + "step": 17108 + }, + { + "epoch": 2.6104736328125e-05, + "step": 17108, + "training_step_time": 0.10636210441589355 + }, + { + "epoch": 2.610626220703125e-05, + "model_forward_time": 0.025334835052490234, + "step": 17109 + }, + { + "epoch": 2.610626220703125e-05, + "step": 17109, + "training_step_time": 0.10912346839904785 + }, + { + "epoch": 2.61077880859375e-05, + "grad_norm": 0.19505758583545685, + "learning_rate": 4.253231561412756e-05, + "loss": 0.0125, + "step": 17110 + }, + { + "epoch": 2.61077880859375e-05, + "model_forward_time": 0.025179147720336914, + "step": 17110 + }, + { + "epoch": 2.61077880859375e-05, + "step": 17110, + "training_step_time": 0.10516214370727539 + }, + { + "epoch": 2.610931396484375e-05, + "model_forward_time": 0.028001785278320312, + "step": 17111 + }, + { + "epoch": 2.610931396484375e-05, + "step": 17111, + "training_step_time": 0.10783267021179199 + }, + { + "epoch": 2.611083984375e-05, + "model_forward_time": 0.02522754669189453, + "step": 17112 + }, + { + "epoch": 2.611083984375e-05, + "step": 17112, + "training_step_time": 0.11644577980041504 + }, + { + "epoch": 2.611236572265625e-05, + "model_forward_time": 0.024864912033081055, + "step": 17113 + }, + { + "epoch": 2.611236572265625e-05, + "step": 17113, + "training_step_time": 0.14072036743164062 + }, + { + "epoch": 2.61138916015625e-05, + "model_forward_time": 0.025297164916992188, + "step": 17114 + }, + { + "epoch": 2.61138916015625e-05, + "step": 17114, + "training_step_time": 0.11302638053894043 + }, + { + "epoch": 2.611541748046875e-05, + "model_forward_time": 0.02515578269958496, + "step": 17115 + }, + { + "epoch": 2.611541748046875e-05, + "step": 17115, + "training_step_time": 0.11042451858520508 + }, + { + "epoch": 2.6116943359375e-05, + "model_forward_time": 0.025621414184570312, + "step": 17116 + }, + { + "epoch": 2.6116943359375e-05, + "step": 17116, + "training_step_time": 0.11751747131347656 + }, + { + "epoch": 2.611846923828125e-05, + "model_forward_time": 0.025569677352905273, + "step": 17117 + }, + { + "epoch": 2.611846923828125e-05, + "step": 17117, + "training_step_time": 0.11192536354064941 + }, + { + "epoch": 2.61199951171875e-05, + "model_forward_time": 0.024882078170776367, + "step": 17118 + }, + { + "epoch": 2.61199951171875e-05, + "step": 17118, + "training_step_time": 0.1949782371520996 + }, + { + "epoch": 2.612152099609375e-05, + "model_forward_time": 0.025637149810791016, + "step": 17119 + }, + { + "epoch": 2.612152099609375e-05, + "step": 17119, + "training_step_time": 0.11349797248840332 + }, + { + "epoch": 2.6123046875e-05, + "grad_norm": 0.18414703011512756, + "learning_rate": 4.247782268778791e-05, + "loss": 0.0094, + "step": 17120 + }, + { + "epoch": 2.6123046875e-05, + "model_forward_time": 0.02391839027404785, + "step": 17120 + }, + { + "epoch": 2.6123046875e-05, + "step": 17120, + "training_step_time": 0.1106119155883789 + }, + { + "epoch": 2.612457275390625e-05, + "model_forward_time": 0.02509927749633789, + "step": 17121 + }, + { + "epoch": 2.612457275390625e-05, + "step": 17121, + "training_step_time": 0.11249518394470215 + }, + { + "epoch": 2.61260986328125e-05, + "model_forward_time": 0.025298118591308594, + "step": 17122 + }, + { + "epoch": 2.61260986328125e-05, + "step": 17122, + "training_step_time": 0.10798478126525879 + }, + { + "epoch": 2.612762451171875e-05, + "model_forward_time": 0.02499985694885254, + "step": 17123 + }, + { + "epoch": 2.612762451171875e-05, + "step": 17123, + "training_step_time": 0.11208581924438477 + }, + { + "epoch": 2.6129150390625e-05, + "model_forward_time": 0.025351762771606445, + "step": 17124 + }, + { + "epoch": 2.6129150390625e-05, + "step": 17124, + "training_step_time": 0.10893487930297852 + }, + { + "epoch": 2.613067626953125e-05, + "model_forward_time": 0.025028467178344727, + "step": 17125 + }, + { + "epoch": 2.613067626953125e-05, + "step": 17125, + "training_step_time": 0.10668301582336426 + }, + { + "epoch": 2.61322021484375e-05, + "model_forward_time": 0.025165319442749023, + "step": 17126 + }, + { + "epoch": 2.61322021484375e-05, + "step": 17126, + "training_step_time": 0.11016440391540527 + }, + { + "epoch": 2.613372802734375e-05, + "model_forward_time": 0.025391101837158203, + "step": 17127 + }, + { + "epoch": 2.613372802734375e-05, + "step": 17127, + "training_step_time": 0.10554647445678711 + }, + { + "epoch": 2.613525390625e-05, + "model_forward_time": 0.025171279907226562, + "step": 17128 + }, + { + "epoch": 2.613525390625e-05, + "step": 17128, + "training_step_time": 0.10634589195251465 + }, + { + "epoch": 2.613677978515625e-05, + "model_forward_time": 0.025005817413330078, + "step": 17129 + }, + { + "epoch": 2.613677978515625e-05, + "step": 17129, + "training_step_time": 0.10880637168884277 + }, + { + "epoch": 2.61383056640625e-05, + "grad_norm": 0.3356666564941406, + "learning_rate": 4.2423338901602985e-05, + "loss": 0.0087, + "step": 17130 + }, + { + "epoch": 2.61383056640625e-05, + "model_forward_time": 0.025043249130249023, + "step": 17130 + }, + { + "epoch": 2.61383056640625e-05, + "step": 17130, + "training_step_time": 0.10920572280883789 + }, + { + "epoch": 2.613983154296875e-05, + "model_forward_time": 0.025278091430664062, + "step": 17131 + }, + { + "epoch": 2.613983154296875e-05, + "step": 17131, + "training_step_time": 0.11083436012268066 + }, + { + "epoch": 2.6141357421875e-05, + "model_forward_time": 0.02502608299255371, + "step": 17132 + }, + { + "epoch": 2.6141357421875e-05, + "step": 17132, + "training_step_time": 0.10721898078918457 + }, + { + "epoch": 2.614288330078125e-05, + "model_forward_time": 0.025105953216552734, + "step": 17133 + }, + { + "epoch": 2.614288330078125e-05, + "step": 17133, + "training_step_time": 0.11062979698181152 + }, + { + "epoch": 2.61444091796875e-05, + "model_forward_time": 0.0243682861328125, + "step": 17134 + }, + { + "epoch": 2.61444091796875e-05, + "step": 17134, + "training_step_time": 0.1041722297668457 + }, + { + "epoch": 2.614593505859375e-05, + "model_forward_time": 0.0250701904296875, + "step": 17135 + }, + { + "epoch": 2.614593505859375e-05, + "step": 17135, + "training_step_time": 0.1082925796508789 + }, + { + "epoch": 2.61474609375e-05, + "model_forward_time": 0.02537679672241211, + "step": 17136 + }, + { + "epoch": 2.61474609375e-05, + "step": 17136, + "training_step_time": 0.10610103607177734 + }, + { + "epoch": 2.614898681640625e-05, + "model_forward_time": 0.02535557746887207, + "step": 17137 + }, + { + "epoch": 2.614898681640625e-05, + "step": 17137, + "training_step_time": 0.10697245597839355 + }, + { + "epoch": 2.61505126953125e-05, + "model_forward_time": 0.025920867919921875, + "step": 17138 + }, + { + "epoch": 2.61505126953125e-05, + "step": 17138, + "training_step_time": 0.1043233871459961 + }, + { + "epoch": 2.615203857421875e-05, + "model_forward_time": 0.024859905242919922, + "step": 17139 + }, + { + "epoch": 2.615203857421875e-05, + "step": 17139, + "training_step_time": 0.18228983879089355 + }, + { + "epoch": 2.6153564453125e-05, + "grad_norm": 0.12060805410146713, + "learning_rate": 4.236886432177572e-05, + "loss": 0.0059, + "step": 17140 + }, + { + "epoch": 2.6153564453125e-05, + "model_forward_time": 0.024861812591552734, + "step": 17140 + }, + { + "epoch": 2.6153564453125e-05, + "step": 17140, + "training_step_time": 0.17465496063232422 + }, + { + "epoch": 2.615509033203125e-05, + "model_forward_time": 0.024479389190673828, + "step": 17141 + }, + { + "epoch": 2.615509033203125e-05, + "step": 17141, + "training_step_time": 0.18379712104797363 + }, + { + "epoch": 2.61566162109375e-05, + "model_forward_time": 0.025355100631713867, + "step": 17142 + }, + { + "epoch": 2.61566162109375e-05, + "step": 17142, + "training_step_time": 0.18802142143249512 + }, + { + "epoch": 2.615814208984375e-05, + "model_forward_time": 0.024550437927246094, + "step": 17143 + }, + { + "epoch": 2.615814208984375e-05, + "step": 17143, + "training_step_time": 0.15912532806396484 + }, + { + "epoch": 2.615966796875e-05, + "model_forward_time": 0.024535179138183594, + "step": 17144 + }, + { + "epoch": 2.615966796875e-05, + "step": 17144, + "training_step_time": 0.1309065818786621 + }, + { + "epoch": 2.616119384765625e-05, + "model_forward_time": 0.02463817596435547, + "step": 17145 + }, + { + "epoch": 2.616119384765625e-05, + "step": 17145, + "training_step_time": 0.10896062850952148 + }, + { + "epoch": 2.61627197265625e-05, + "model_forward_time": 0.025447845458984375, + "step": 17146 + }, + { + "epoch": 2.61627197265625e-05, + "step": 17146, + "training_step_time": 0.1152653694152832 + }, + { + "epoch": 2.616424560546875e-05, + "model_forward_time": 0.025068998336791992, + "step": 17147 + }, + { + "epoch": 2.616424560546875e-05, + "step": 17147, + "training_step_time": 0.10412740707397461 + }, + { + "epoch": 2.6165771484375e-05, + "model_forward_time": 0.02513599395751953, + "step": 17148 + }, + { + "epoch": 2.6165771484375e-05, + "step": 17148, + "training_step_time": 0.10889887809753418 + }, + { + "epoch": 2.616729736328125e-05, + "model_forward_time": 0.025618314743041992, + "step": 17149 + }, + { + "epoch": 2.616729736328125e-05, + "step": 17149, + "training_step_time": 0.10521221160888672 + }, + { + "epoch": 2.61688232421875e-05, + "grad_norm": 0.1229744404554367, + "learning_rate": 4.231439901449788e-05, + "loss": 0.0109, + "step": 17150 + }, + { + "epoch": 2.61688232421875e-05, + "model_forward_time": 0.025228500366210938, + "step": 17150 + }, + { + "epoch": 2.61688232421875e-05, + "step": 17150, + "training_step_time": 0.11307668685913086 + }, + { + "epoch": 2.617034912109375e-05, + "model_forward_time": 0.025208711624145508, + "step": 17151 + }, + { + "epoch": 2.617034912109375e-05, + "step": 17151, + "training_step_time": 0.11640739440917969 + }, + { + "epoch": 2.6171875e-05, + "model_forward_time": 0.025891780853271484, + "step": 17152 + }, + { + "epoch": 2.6171875e-05, + "step": 17152, + "training_step_time": 0.1880357265472412 + }, + { + "epoch": 2.617340087890625e-05, + "model_forward_time": 0.024780988693237305, + "step": 17153 + }, + { + "epoch": 2.617340087890625e-05, + "step": 17153, + "training_step_time": 0.2084496021270752 + }, + { + "epoch": 2.61749267578125e-05, + "model_forward_time": 0.024461030960083008, + "step": 17154 + }, + { + "epoch": 2.61749267578125e-05, + "step": 17154, + "training_step_time": 0.20163917541503906 + }, + { + "epoch": 2.617645263671875e-05, + "model_forward_time": 0.023973703384399414, + "step": 17155 + }, + { + "epoch": 2.617645263671875e-05, + "step": 17155, + "training_step_time": 0.2014768123626709 + }, + { + "epoch": 2.6177978515625e-05, + "model_forward_time": 0.024760961532592773, + "step": 17156 + }, + { + "epoch": 2.6177978515625e-05, + "step": 17156, + "training_step_time": 0.22733163833618164 + }, + { + "epoch": 2.617950439453125e-05, + "model_forward_time": 0.02423095703125, + "step": 17157 + }, + { + "epoch": 2.617950439453125e-05, + "step": 17157, + "training_step_time": 0.2110605239868164 + }, + { + "epoch": 2.61810302734375e-05, + "model_forward_time": 0.024020910263061523, + "step": 17158 + }, + { + "epoch": 2.61810302734375e-05, + "step": 17158, + "training_step_time": 0.17118430137634277 + }, + { + "epoch": 2.618255615234375e-05, + "model_forward_time": 0.024384260177612305, + "step": 17159 + }, + { + "epoch": 2.618255615234375e-05, + "step": 17159, + "training_step_time": 0.12720775604248047 + }, + { + "epoch": 2.618408203125e-05, + "grad_norm": 0.2088848203420639, + "learning_rate": 4.2259943045949934e-05, + "loss": 0.0122, + "step": 17160 + }, + { + "epoch": 2.618408203125e-05, + "model_forward_time": 0.024971723556518555, + "step": 17160 + }, + { + "epoch": 2.618408203125e-05, + "step": 17160, + "training_step_time": 0.10787773132324219 + }, + { + "epoch": 2.618560791015625e-05, + "model_forward_time": 0.026466846466064453, + "step": 17161 + }, + { + "epoch": 2.618560791015625e-05, + "step": 17161, + "training_step_time": 0.11113119125366211 + }, + { + "epoch": 2.61871337890625e-05, + "model_forward_time": 0.024970054626464844, + "step": 17162 + }, + { + "epoch": 2.61871337890625e-05, + "step": 17162, + "training_step_time": 0.10395479202270508 + }, + { + "epoch": 2.618865966796875e-05, + "model_forward_time": 0.025197505950927734, + "step": 17163 + }, + { + "epoch": 2.618865966796875e-05, + "step": 17163, + "training_step_time": 0.10346865653991699 + }, + { + "epoch": 2.6190185546875e-05, + "model_forward_time": 0.025367021560668945, + "step": 17164 + }, + { + "epoch": 2.6190185546875e-05, + "step": 17164, + "training_step_time": 0.10303568840026855 + }, + { + "epoch": 2.619171142578125e-05, + "model_forward_time": 0.025267362594604492, + "step": 17165 + }, + { + "epoch": 2.619171142578125e-05, + "step": 17165, + "training_step_time": 0.10638952255249023 + }, + { + "epoch": 2.61932373046875e-05, + "model_forward_time": 0.02488112449645996, + "step": 17166 + }, + { + "epoch": 2.61932373046875e-05, + "step": 17166, + "training_step_time": 0.10659241676330566 + }, + { + "epoch": 2.619476318359375e-05, + "model_forward_time": 0.02528238296508789, + "step": 17167 + }, + { + "epoch": 2.619476318359375e-05, + "step": 17167, + "training_step_time": 0.18051862716674805 + }, + { + "epoch": 2.61962890625e-05, + "model_forward_time": 0.02492523193359375, + "step": 17168 + }, + { + "epoch": 2.61962890625e-05, + "step": 17168, + "training_step_time": 0.20075368881225586 + }, + { + "epoch": 2.619781494140625e-05, + "model_forward_time": 0.02717447280883789, + "step": 17169 + }, + { + "epoch": 2.619781494140625e-05, + "step": 17169, + "training_step_time": 0.1989452838897705 + }, + { + "epoch": 2.61993408203125e-05, + "grad_norm": 0.15796822309494019, + "learning_rate": 4.220549648230104e-05, + "loss": 0.0132, + "step": 17170 + }, + { + "epoch": 2.61993408203125e-05, + "model_forward_time": 0.024607181549072266, + "step": 17170 + }, + { + "epoch": 2.61993408203125e-05, + "step": 17170, + "training_step_time": 0.17736172676086426 + }, + { + "epoch": 2.620086669921875e-05, + "model_forward_time": 0.025049209594726562, + "step": 17171 + }, + { + "epoch": 2.620086669921875e-05, + "step": 17171, + "training_step_time": 0.17417073249816895 + }, + { + "epoch": 2.6202392578125e-05, + "model_forward_time": 0.024482250213623047, + "step": 17172 + }, + { + "epoch": 2.6202392578125e-05, + "step": 17172, + "training_step_time": 0.1578364372253418 + }, + { + "epoch": 2.620391845703125e-05, + "model_forward_time": 0.02760601043701172, + "step": 17173 + }, + { + "epoch": 2.620391845703125e-05, + "step": 17173, + "training_step_time": 0.15186643600463867 + }, + { + "epoch": 2.62054443359375e-05, + "model_forward_time": 0.024909019470214844, + "step": 17174 + }, + { + "epoch": 2.62054443359375e-05, + "step": 17174, + "training_step_time": 0.13660550117492676 + }, + { + "epoch": 2.620697021484375e-05, + "model_forward_time": 0.024498939514160156, + "step": 17175 + }, + { + "epoch": 2.620697021484375e-05, + "step": 17175, + "training_step_time": 0.10116291046142578 + }, + { + "epoch": 2.620849609375e-05, + "model_forward_time": 0.02588677406311035, + "step": 17176 + }, + { + "epoch": 2.620849609375e-05, + "step": 17176, + "training_step_time": 0.10361266136169434 + }, + { + "epoch": 2.621002197265625e-05, + "model_forward_time": 0.025633811950683594, + "step": 17177 + }, + { + "epoch": 2.621002197265625e-05, + "step": 17177, + "training_step_time": 0.10399675369262695 + }, + { + "epoch": 2.62115478515625e-05, + "model_forward_time": 0.024599790573120117, + "step": 17178 + }, + { + "epoch": 2.62115478515625e-05, + "step": 17178, + "training_step_time": 0.21236872673034668 + }, + { + "epoch": 2.621307373046875e-05, + "model_forward_time": 0.02465343475341797, + "step": 17179 + }, + { + "epoch": 2.621307373046875e-05, + "step": 17179, + "training_step_time": 0.14782953262329102 + }, + { + "epoch": 2.6214599609375e-05, + "grad_norm": 0.2959529459476471, + "learning_rate": 4.215105938970889e-05, + "loss": 0.0092, + "step": 17180 + }, + { + "epoch": 2.6214599609375e-05, + "model_forward_time": 0.025079965591430664, + "step": 17180 + }, + { + "epoch": 2.6214599609375e-05, + "step": 17180, + "training_step_time": 0.18485260009765625 + }, + { + "epoch": 2.621612548828125e-05, + "model_forward_time": 0.02491450309753418, + "step": 17181 + }, + { + "epoch": 2.621612548828125e-05, + "step": 17181, + "training_step_time": 0.15768003463745117 + }, + { + "epoch": 2.62176513671875e-05, + "model_forward_time": 0.02473282814025879, + "step": 17182 + }, + { + "epoch": 2.62176513671875e-05, + "step": 17182, + "training_step_time": 0.18913817405700684 + }, + { + "epoch": 2.621917724609375e-05, + "model_forward_time": 0.024394512176513672, + "step": 17183 + }, + { + "epoch": 2.621917724609375e-05, + "step": 17183, + "training_step_time": 0.1275479793548584 + }, + { + "epoch": 2.6220703125e-05, + "model_forward_time": 0.024399757385253906, + "step": 17184 + }, + { + "epoch": 2.6220703125e-05, + "step": 17184, + "training_step_time": 0.11643266677856445 + }, + { + "epoch": 2.622222900390625e-05, + "model_forward_time": 0.025268077850341797, + "step": 17185 + }, + { + "epoch": 2.622222900390625e-05, + "step": 17185, + "training_step_time": 0.12129044532775879 + }, + { + "epoch": 2.62237548828125e-05, + "model_forward_time": 0.025583267211914062, + "step": 17186 + }, + { + "epoch": 2.62237548828125e-05, + "step": 17186, + "training_step_time": 0.10653162002563477 + }, + { + "epoch": 2.622528076171875e-05, + "model_forward_time": 0.025265932083129883, + "step": 17187 + }, + { + "epoch": 2.622528076171875e-05, + "step": 17187, + "training_step_time": 0.10411548614501953 + }, + { + "epoch": 2.6226806640625e-05, + "model_forward_time": 0.024854421615600586, + "step": 17188 + }, + { + "epoch": 2.6226806640625e-05, + "step": 17188, + "training_step_time": 0.11610984802246094 + }, + { + "epoch": 2.622833251953125e-05, + "model_forward_time": 0.025444984436035156, + "step": 17189 + }, + { + "epoch": 2.622833251953125e-05, + "step": 17189, + "training_step_time": 0.1246798038482666 + }, + { + "epoch": 2.62298583984375e-05, + "grad_norm": 0.2057543843984604, + "learning_rate": 4.209663183431969e-05, + "loss": 0.0075, + "step": 17190 + }, + { + "epoch": 2.62298583984375e-05, + "model_forward_time": 0.02504754066467285, + "step": 17190 + }, + { + "epoch": 2.62298583984375e-05, + "step": 17190, + "training_step_time": 0.12670564651489258 + }, + { + "epoch": 2.623138427734375e-05, + "model_forward_time": 0.0247342586517334, + "step": 17191 + }, + { + "epoch": 2.623138427734375e-05, + "step": 17191, + "training_step_time": 0.12469840049743652 + }, + { + "epoch": 2.623291015625e-05, + "model_forward_time": 0.02462458610534668, + "step": 17192 + }, + { + "epoch": 2.623291015625e-05, + "step": 17192, + "training_step_time": 0.12204670906066895 + }, + { + "epoch": 2.623443603515625e-05, + "model_forward_time": 0.025412797927856445, + "step": 17193 + }, + { + "epoch": 2.623443603515625e-05, + "step": 17193, + "training_step_time": 0.11913418769836426 + }, + { + "epoch": 2.62359619140625e-05, + "model_forward_time": 0.025601863861083984, + "step": 17194 + }, + { + "epoch": 2.62359619140625e-05, + "step": 17194, + "training_step_time": 0.11663269996643066 + }, + { + "epoch": 2.623748779296875e-05, + "model_forward_time": 0.02789759635925293, + "step": 17195 + }, + { + "epoch": 2.623748779296875e-05, + "step": 17195, + "training_step_time": 0.11406993865966797 + }, + { + "epoch": 2.6239013671875e-05, + "model_forward_time": 0.024938344955444336, + "step": 17196 + }, + { + "epoch": 2.6239013671875e-05, + "step": 17196, + "training_step_time": 0.13062238693237305 + }, + { + "epoch": 2.624053955078125e-05, + "model_forward_time": 0.025087833404541016, + "step": 17197 + }, + { + "epoch": 2.624053955078125e-05, + "step": 17197, + "training_step_time": 0.14064550399780273 + }, + { + "epoch": 2.62420654296875e-05, + "model_forward_time": 0.025380373001098633, + "step": 17198 + }, + { + "epoch": 2.62420654296875e-05, + "step": 17198, + "training_step_time": 0.11208939552307129 + }, + { + "epoch": 2.624359130859375e-05, + "model_forward_time": 0.02482748031616211, + "step": 17199 + }, + { + "epoch": 2.624359130859375e-05, + "step": 17199, + "training_step_time": 0.1139683723449707 + }, + { + "epoch": 2.62451171875e-05, + "grad_norm": 0.32496362924575806, + "learning_rate": 4.2042213882268025e-05, + "loss": 0.0164, + "step": 17200 + }, + { + "epoch": 2.62451171875e-05, + "model_forward_time": 0.025487661361694336, + "step": 17200 + }, + { + "epoch": 2.62451171875e-05, + "step": 17200, + "training_step_time": 0.10844635963439941 + }, + { + "epoch": 2.624664306640625e-05, + "model_forward_time": 0.0249481201171875, + "step": 17201 + }, + { + "epoch": 2.624664306640625e-05, + "step": 17201, + "training_step_time": 0.11020445823669434 + }, + { + "epoch": 2.62481689453125e-05, + "model_forward_time": 0.02522110939025879, + "step": 17202 + }, + { + "epoch": 2.62481689453125e-05, + "step": 17202, + "training_step_time": 0.19670391082763672 + }, + { + "epoch": 2.624969482421875e-05, + "model_forward_time": 0.02395153045654297, + "step": 17203 + }, + { + "epoch": 2.624969482421875e-05, + "step": 17203, + "training_step_time": 0.10595870018005371 + }, + { + "epoch": 2.6251220703125e-05, + "model_forward_time": 0.024007558822631836, + "step": 17204 + }, + { + "epoch": 2.6251220703125e-05, + "step": 17204, + "training_step_time": 0.10351872444152832 + }, + { + "epoch": 2.625274658203125e-05, + "model_forward_time": 0.02525615692138672, + "step": 17205 + }, + { + "epoch": 2.625274658203125e-05, + "step": 17205, + "training_step_time": 0.10487174987792969 + }, + { + "epoch": 2.62542724609375e-05, + "model_forward_time": 0.025624513626098633, + "step": 17206 + }, + { + "epoch": 2.62542724609375e-05, + "step": 17206, + "training_step_time": 0.10958075523376465 + }, + { + "epoch": 2.625579833984375e-05, + "model_forward_time": 0.025081157684326172, + "step": 17207 + }, + { + "epoch": 2.625579833984375e-05, + "step": 17207, + "training_step_time": 0.10593056678771973 + }, + { + "epoch": 2.625732421875e-05, + "model_forward_time": 0.02512669563293457, + "step": 17208 + }, + { + "epoch": 2.625732421875e-05, + "step": 17208, + "training_step_time": 0.10435104370117188 + }, + { + "epoch": 2.625885009765625e-05, + "model_forward_time": 0.025334835052490234, + "step": 17209 + }, + { + "epoch": 2.625885009765625e-05, + "step": 17209, + "training_step_time": 0.10810708999633789 + }, + { + "epoch": 2.62603759765625e-05, + "grad_norm": 0.3234359622001648, + "learning_rate": 4.1987805599676896e-05, + "loss": 0.0088, + "step": 17210 + }, + { + "epoch": 2.62603759765625e-05, + "model_forward_time": 0.025055408477783203, + "step": 17210 + }, + { + "epoch": 2.62603759765625e-05, + "step": 17210, + "training_step_time": 0.10582232475280762 + }, + { + "epoch": 2.626190185546875e-05, + "model_forward_time": 0.02558588981628418, + "step": 17211 + }, + { + "epoch": 2.626190185546875e-05, + "step": 17211, + "training_step_time": 0.10567283630371094 + }, + { + "epoch": 2.6263427734375e-05, + "model_forward_time": 0.025130033493041992, + "step": 17212 + }, + { + "epoch": 2.6263427734375e-05, + "step": 17212, + "training_step_time": 0.10471200942993164 + }, + { + "epoch": 2.626495361328125e-05, + "model_forward_time": 0.025229215621948242, + "step": 17213 + }, + { + "epoch": 2.626495361328125e-05, + "step": 17213, + "training_step_time": 0.10382223129272461 + }, + { + "epoch": 2.62664794921875e-05, + "model_forward_time": 0.023931503295898438, + "step": 17214 + }, + { + "epoch": 2.62664794921875e-05, + "step": 17214, + "training_step_time": 0.10650253295898438 + }, + { + "epoch": 2.626800537109375e-05, + "model_forward_time": 0.025163650512695312, + "step": 17215 + }, + { + "epoch": 2.626800537109375e-05, + "step": 17215, + "training_step_time": 0.10566401481628418 + }, + { + "epoch": 2.626953125e-05, + "model_forward_time": 0.025220394134521484, + "step": 17216 + }, + { + "epoch": 2.626953125e-05, + "step": 17216, + "training_step_time": 0.1061241626739502 + }, + { + "epoch": 2.627105712890625e-05, + "model_forward_time": 0.025054931640625, + "step": 17217 + }, + { + "epoch": 2.627105712890625e-05, + "step": 17217, + "training_step_time": 0.1074988842010498 + }, + { + "epoch": 2.62725830078125e-05, + "model_forward_time": 0.02532815933227539, + "step": 17218 + }, + { + "epoch": 2.62725830078125e-05, + "step": 17218, + "training_step_time": 0.10403299331665039 + }, + { + "epoch": 2.627410888671875e-05, + "model_forward_time": 0.025821685791015625, + "step": 17219 + }, + { + "epoch": 2.627410888671875e-05, + "step": 17219, + "training_step_time": 0.1048579216003418 + }, + { + "epoch": 2.6275634765625e-05, + "grad_norm": 0.1794009804725647, + "learning_rate": 4.1933407052657456e-05, + "loss": 0.0118, + "step": 17220 + }, + { + "epoch": 2.6275634765625e-05, + "model_forward_time": 0.024385929107666016, + "step": 17220 + }, + { + "epoch": 2.6275634765625e-05, + "step": 17220, + "training_step_time": 0.1023712158203125 + }, + { + "epoch": 2.627716064453125e-05, + "model_forward_time": 0.0245819091796875, + "step": 17221 + }, + { + "epoch": 2.627716064453125e-05, + "step": 17221, + "training_step_time": 0.10754084587097168 + }, + { + "epoch": 2.62786865234375e-05, + "model_forward_time": 0.025638103485107422, + "step": 17222 + }, + { + "epoch": 2.62786865234375e-05, + "step": 17222, + "training_step_time": 0.10706353187561035 + }, + { + "epoch": 2.628021240234375e-05, + "model_forward_time": 0.025616168975830078, + "step": 17223 + }, + { + "epoch": 2.628021240234375e-05, + "step": 17223, + "training_step_time": 0.1905684471130371 + }, + { + "epoch": 2.628173828125e-05, + "model_forward_time": 0.024653196334838867, + "step": 17224 + }, + { + "epoch": 2.628173828125e-05, + "step": 17224, + "training_step_time": 0.22989368438720703 + }, + { + "epoch": 2.628326416015625e-05, + "model_forward_time": 0.024637699127197266, + "step": 17225 + }, + { + "epoch": 2.628326416015625e-05, + "step": 17225, + "training_step_time": 0.11776041984558105 + }, + { + "epoch": 2.62847900390625e-05, + "model_forward_time": 0.024598121643066406, + "step": 17226 + }, + { + "epoch": 2.62847900390625e-05, + "step": 17226, + "training_step_time": 0.1938154697418213 + }, + { + "epoch": 2.628631591796875e-05, + "model_forward_time": 0.024374008178710938, + "step": 17227 + }, + { + "epoch": 2.628631591796875e-05, + "step": 17227, + "training_step_time": 0.1888718605041504 + }, + { + "epoch": 2.6287841796875e-05, + "model_forward_time": 0.023993492126464844, + "step": 17228 + }, + { + "epoch": 2.6287841796875e-05, + "step": 17228, + "training_step_time": 0.21954083442687988 + }, + { + "epoch": 2.628936767578125e-05, + "model_forward_time": 0.024465084075927734, + "step": 17229 + }, + { + "epoch": 2.628936767578125e-05, + "step": 17229, + "training_step_time": 0.12374091148376465 + }, + { + "epoch": 2.62908935546875e-05, + "grad_norm": 0.3404396176338196, + "learning_rate": 4.187901830730906e-05, + "loss": 0.0082, + "step": 17230 + }, + { + "epoch": 2.62908935546875e-05, + "model_forward_time": 0.02412557601928711, + "step": 17230 + }, + { + "epoch": 2.62908935546875e-05, + "step": 17230, + "training_step_time": 0.11684441566467285 + }, + { + "epoch": 2.629241943359375e-05, + "model_forward_time": 0.02497076988220215, + "step": 17231 + }, + { + "epoch": 2.629241943359375e-05, + "step": 17231, + "training_step_time": 0.10600471496582031 + }, + { + "epoch": 2.62939453125e-05, + "model_forward_time": 0.025120258331298828, + "step": 17232 + }, + { + "epoch": 2.62939453125e-05, + "step": 17232, + "training_step_time": 0.11123085021972656 + }, + { + "epoch": 2.629547119140625e-05, + "model_forward_time": 0.025255203247070312, + "step": 17233 + }, + { + "epoch": 2.629547119140625e-05, + "step": 17233, + "training_step_time": 0.10569357872009277 + }, + { + "epoch": 2.62969970703125e-05, + "model_forward_time": 0.025356531143188477, + "step": 17234 + }, + { + "epoch": 2.62969970703125e-05, + "step": 17234, + "training_step_time": 0.10668516159057617 + }, + { + "epoch": 2.629852294921875e-05, + "model_forward_time": 0.02522897720336914, + "step": 17235 + }, + { + "epoch": 2.629852294921875e-05, + "step": 17235, + "training_step_time": 0.10543513298034668 + }, + { + "epoch": 2.6300048828125e-05, + "model_forward_time": 0.025376319885253906, + "step": 17236 + }, + { + "epoch": 2.6300048828125e-05, + "step": 17236, + "training_step_time": 0.10745954513549805 + }, + { + "epoch": 2.630157470703125e-05, + "model_forward_time": 0.025453567504882812, + "step": 17237 + }, + { + "epoch": 2.630157470703125e-05, + "step": 17237, + "training_step_time": 0.10630512237548828 + }, + { + "epoch": 2.63031005859375e-05, + "model_forward_time": 0.025068998336791992, + "step": 17238 + }, + { + "epoch": 2.63031005859375e-05, + "step": 17238, + "training_step_time": 0.10552573204040527 + }, + { + "epoch": 2.630462646484375e-05, + "model_forward_time": 0.025550365447998047, + "step": 17239 + }, + { + "epoch": 2.630462646484375e-05, + "step": 17239, + "training_step_time": 0.10872364044189453 + }, + { + "epoch": 2.630615234375e-05, + "grad_norm": 0.176010400056839, + "learning_rate": 4.18246394297192e-05, + "loss": 0.0089, + "step": 17240 + }, + { + "epoch": 2.630615234375e-05, + "model_forward_time": 0.025264978408813477, + "step": 17240 + }, + { + "epoch": 2.630615234375e-05, + "step": 17240, + "training_step_time": 0.10586357116699219 + }, + { + "epoch": 2.630767822265625e-05, + "model_forward_time": 0.025884628295898438, + "step": 17241 + }, + { + "epoch": 2.630767822265625e-05, + "step": 17241, + "training_step_time": 0.10614347457885742 + }, + { + "epoch": 2.63092041015625e-05, + "model_forward_time": 0.02504706382751465, + "step": 17242 + }, + { + "epoch": 2.63092041015625e-05, + "step": 17242, + "training_step_time": 0.10645174980163574 + }, + { + "epoch": 2.631072998046875e-05, + "model_forward_time": 0.025099992752075195, + "step": 17243 + }, + { + "epoch": 2.631072998046875e-05, + "step": 17243, + "training_step_time": 0.10491204261779785 + }, + { + "epoch": 2.6312255859375e-05, + "model_forward_time": 0.025243043899536133, + "step": 17244 + }, + { + "epoch": 2.6312255859375e-05, + "step": 17244, + "training_step_time": 0.13492488861083984 + }, + { + "epoch": 2.631378173828125e-05, + "model_forward_time": 0.025098085403442383, + "step": 17245 + }, + { + "epoch": 2.631378173828125e-05, + "step": 17245, + "training_step_time": 0.11101078987121582 + }, + { + "epoch": 2.63153076171875e-05, + "model_forward_time": 0.025069713592529297, + "step": 17246 + }, + { + "epoch": 2.63153076171875e-05, + "step": 17246, + "training_step_time": 0.11706113815307617 + }, + { + "epoch": 2.631683349609375e-05, + "model_forward_time": 0.02504277229309082, + "step": 17247 + }, + { + "epoch": 2.631683349609375e-05, + "step": 17247, + "training_step_time": 0.1148676872253418 + }, + { + "epoch": 2.6318359375e-05, + "model_forward_time": 0.02492976188659668, + "step": 17248 + }, + { + "epoch": 2.6318359375e-05, + "step": 17248, + "training_step_time": 0.10640382766723633 + }, + { + "epoch": 2.631988525390625e-05, + "model_forward_time": 0.026098251342773438, + "step": 17249 + }, + { + "epoch": 2.631988525390625e-05, + "step": 17249, + "training_step_time": 0.19114971160888672 + }, + { + "epoch": 2.63214111328125e-05, + "grad_norm": 0.21381649374961853, + "learning_rate": 4.17702704859633e-05, + "loss": 0.022, + "step": 17250 + }, + { + "epoch": 2.63214111328125e-05, + "model_forward_time": 0.024113178253173828, + "step": 17250 + }, + { + "epoch": 2.63214111328125e-05, + "step": 17250, + "training_step_time": 0.10228466987609863 + }, + { + "epoch": 2.632293701171875e-05, + "model_forward_time": 0.024683713912963867, + "step": 17251 + }, + { + "epoch": 2.632293701171875e-05, + "step": 17251, + "training_step_time": 0.10090756416320801 + }, + { + "epoch": 2.6324462890625e-05, + "model_forward_time": 0.025188684463500977, + "step": 17252 + }, + { + "epoch": 2.6324462890625e-05, + "step": 17252, + "training_step_time": 0.10788750648498535 + }, + { + "epoch": 2.632598876953125e-05, + "model_forward_time": 0.025177478790283203, + "step": 17253 + }, + { + "epoch": 2.632598876953125e-05, + "step": 17253, + "training_step_time": 0.10860800743103027 + }, + { + "epoch": 2.63275146484375e-05, + "model_forward_time": 0.024621248245239258, + "step": 17254 + }, + { + "epoch": 2.63275146484375e-05, + "step": 17254, + "training_step_time": 0.1089925765991211 + }, + { + "epoch": 2.632904052734375e-05, + "model_forward_time": 0.02517533302307129, + "step": 17255 + }, + { + "epoch": 2.632904052734375e-05, + "step": 17255, + "training_step_time": 0.11105942726135254 + }, + { + "epoch": 2.633056640625e-05, + "model_forward_time": 0.025098085403442383, + "step": 17256 + }, + { + "epoch": 2.633056640625e-05, + "step": 17256, + "training_step_time": 0.10741615295410156 + }, + { + "epoch": 2.633209228515625e-05, + "model_forward_time": 0.025020360946655273, + "step": 17257 + }, + { + "epoch": 2.633209228515625e-05, + "step": 17257, + "training_step_time": 0.10759210586547852 + }, + { + "epoch": 2.63336181640625e-05, + "model_forward_time": 0.0252225399017334, + "step": 17258 + }, + { + "epoch": 2.63336181640625e-05, + "step": 17258, + "training_step_time": 0.10917949676513672 + }, + { + "epoch": 2.633514404296875e-05, + "model_forward_time": 0.024944782257080078, + "step": 17259 + }, + { + "epoch": 2.633514404296875e-05, + "step": 17259, + "training_step_time": 0.10735750198364258 + }, + { + "epoch": 2.6336669921875e-05, + "grad_norm": 0.22420473396778107, + "learning_rate": 4.171591154210479e-05, + "loss": 0.01, + "step": 17260 + }, + { + "epoch": 2.6336669921875e-05, + "model_forward_time": 0.025457382202148438, + "step": 17260 + }, + { + "epoch": 2.6336669921875e-05, + "step": 17260, + "training_step_time": 0.10855531692504883 + }, + { + "epoch": 2.633819580078125e-05, + "model_forward_time": 0.025414705276489258, + "step": 17261 + }, + { + "epoch": 2.633819580078125e-05, + "step": 17261, + "training_step_time": 0.10737800598144531 + }, + { + "epoch": 2.63397216796875e-05, + "model_forward_time": 0.02512073516845703, + "step": 17262 + }, + { + "epoch": 2.63397216796875e-05, + "step": 17262, + "training_step_time": 0.10809922218322754 + }, + { + "epoch": 2.634124755859375e-05, + "model_forward_time": 0.02538752555847168, + "step": 17263 + }, + { + "epoch": 2.634124755859375e-05, + "step": 17263, + "training_step_time": 0.11210894584655762 + }, + { + "epoch": 2.63427734375e-05, + "model_forward_time": 0.025348663330078125, + "step": 17264 + }, + { + "epoch": 2.63427734375e-05, + "step": 17264, + "training_step_time": 0.10833573341369629 + }, + { + "epoch": 2.634429931640625e-05, + "model_forward_time": 0.024996519088745117, + "step": 17265 + }, + { + "epoch": 2.634429931640625e-05, + "step": 17265, + "training_step_time": 0.10490894317626953 + }, + { + "epoch": 2.63458251953125e-05, + "model_forward_time": 0.025429248809814453, + "step": 17266 + }, + { + "epoch": 2.63458251953125e-05, + "step": 17266, + "training_step_time": 0.10659646987915039 + }, + { + "epoch": 2.634735107421875e-05, + "model_forward_time": 0.02536773681640625, + "step": 17267 + }, + { + "epoch": 2.634735107421875e-05, + "step": 17267, + "training_step_time": 0.10606241226196289 + }, + { + "epoch": 2.6348876953125e-05, + "model_forward_time": 0.02514171600341797, + "step": 17268 + }, + { + "epoch": 2.6348876953125e-05, + "step": 17268, + "training_step_time": 0.10623812675476074 + }, + { + "epoch": 2.635040283203125e-05, + "model_forward_time": 0.025213241577148438, + "step": 17269 + }, + { + "epoch": 2.635040283203125e-05, + "step": 17269, + "training_step_time": 0.10567092895507812 + }, + { + "epoch": 2.63519287109375e-05, + "grad_norm": 0.19210898876190186, + "learning_rate": 4.166156266419489e-05, + "loss": 0.0091, + "step": 17270 + }, + { + "epoch": 2.63519287109375e-05, + "model_forward_time": 0.02423381805419922, + "step": 17270 + }, + { + "epoch": 2.63519287109375e-05, + "step": 17270, + "training_step_time": 0.14628362655639648 + }, + { + "epoch": 2.635345458984375e-05, + "model_forward_time": 0.025501728057861328, + "step": 17271 + }, + { + "epoch": 2.635345458984375e-05, + "step": 17271, + "training_step_time": 0.11100983619689941 + }, + { + "epoch": 2.635498046875e-05, + "model_forward_time": 0.0246124267578125, + "step": 17272 + }, + { + "epoch": 2.635498046875e-05, + "step": 17272, + "training_step_time": 0.17659521102905273 + }, + { + "epoch": 2.635650634765625e-05, + "model_forward_time": 0.02464580535888672, + "step": 17273 + }, + { + "epoch": 2.635650634765625e-05, + "step": 17273, + "training_step_time": 0.16070008277893066 + }, + { + "epoch": 2.63580322265625e-05, + "model_forward_time": 0.023994922637939453, + "step": 17274 + }, + { + "epoch": 2.63580322265625e-05, + "step": 17274, + "training_step_time": 0.2162027359008789 + }, + { + "epoch": 2.635955810546875e-05, + "model_forward_time": 0.02430438995361328, + "step": 17275 + }, + { + "epoch": 2.635955810546875e-05, + "step": 17275, + "training_step_time": 0.19339776039123535 + }, + { + "epoch": 2.6361083984375e-05, + "model_forward_time": 0.024966955184936523, + "step": 17276 + }, + { + "epoch": 2.6361083984375e-05, + "step": 17276, + "training_step_time": 0.16322994232177734 + }, + { + "epoch": 2.636260986328125e-05, + "model_forward_time": 0.02600860595703125, + "step": 17277 + }, + { + "epoch": 2.636260986328125e-05, + "step": 17277, + "training_step_time": 0.1701974868774414 + }, + { + "epoch": 2.63641357421875e-05, + "model_forward_time": 0.024682998657226562, + "step": 17278 + }, + { + "epoch": 2.63641357421875e-05, + "step": 17278, + "training_step_time": 0.12079191207885742 + }, + { + "epoch": 2.636566162109375e-05, + "model_forward_time": 0.02451467514038086, + "step": 17279 + }, + { + "epoch": 2.636566162109375e-05, + "step": 17279, + "training_step_time": 0.10463428497314453 + }, + { + "epoch": 2.63671875e-05, + "grad_norm": 0.1971217542886734, + "learning_rate": 4.160722391827262e-05, + "loss": 0.0072, + "step": 17280 + }, + { + "epoch": 2.63671875e-05, + "model_forward_time": 0.026851654052734375, + "step": 17280 + }, + { + "epoch": 2.63671875e-05, + "step": 17280, + "training_step_time": 0.10940814018249512 + }, + { + "epoch": 2.636871337890625e-05, + "model_forward_time": 0.025453805923461914, + "step": 17281 + }, + { + "epoch": 2.636871337890625e-05, + "step": 17281, + "training_step_time": 0.1073918342590332 + }, + { + "epoch": 2.63702392578125e-05, + "model_forward_time": 0.025534868240356445, + "step": 17282 + }, + { + "epoch": 2.63702392578125e-05, + "step": 17282, + "training_step_time": 0.10864138603210449 + }, + { + "epoch": 2.637176513671875e-05, + "model_forward_time": 0.024863243103027344, + "step": 17283 + }, + { + "epoch": 2.637176513671875e-05, + "step": 17283, + "training_step_time": 0.10479903221130371 + }, + { + "epoch": 2.6373291015625e-05, + "model_forward_time": 0.02559185028076172, + "step": 17284 + }, + { + "epoch": 2.6373291015625e-05, + "step": 17284, + "training_step_time": 0.10883474349975586 + }, + { + "epoch": 2.637481689453125e-05, + "model_forward_time": 0.02559351921081543, + "step": 17285 + }, + { + "epoch": 2.637481689453125e-05, + "step": 17285, + "training_step_time": 0.10543560981750488 + }, + { + "epoch": 2.63763427734375e-05, + "model_forward_time": 0.025194168090820312, + "step": 17286 + }, + { + "epoch": 2.63763427734375e-05, + "step": 17286, + "training_step_time": 0.10472941398620605 + }, + { + "epoch": 2.637786865234375e-05, + "model_forward_time": 0.02605891227722168, + "step": 17287 + }, + { + "epoch": 2.637786865234375e-05, + "step": 17287, + "training_step_time": 0.10611367225646973 + }, + { + "epoch": 2.637939453125e-05, + "model_forward_time": 0.0252530574798584, + "step": 17288 + }, + { + "epoch": 2.637939453125e-05, + "step": 17288, + "training_step_time": 0.10325503349304199 + }, + { + "epoch": 2.638092041015625e-05, + "model_forward_time": 0.02496504783630371, + "step": 17289 + }, + { + "epoch": 2.638092041015625e-05, + "step": 17289, + "training_step_time": 0.1527245044708252 + }, + { + "epoch": 2.63824462890625e-05, + "grad_norm": 0.11559871584177017, + "learning_rate": 4.155289537036466e-05, + "loss": 0.0118, + "step": 17290 + }, + { + "epoch": 2.63824462890625e-05, + "model_forward_time": 0.024950742721557617, + "step": 17290 + }, + { + "epoch": 2.63824462890625e-05, + "step": 17290, + "training_step_time": 0.14170336723327637 + }, + { + "epoch": 2.638397216796875e-05, + "model_forward_time": 0.024463891983032227, + "step": 17291 + }, + { + "epoch": 2.638397216796875e-05, + "step": 17291, + "training_step_time": 0.10897707939147949 + }, + { + "epoch": 2.6385498046875e-05, + "model_forward_time": 0.025054216384887695, + "step": 17292 + }, + { + "epoch": 2.6385498046875e-05, + "step": 17292, + "training_step_time": 0.12053084373474121 + }, + { + "epoch": 2.638702392578125e-05, + "model_forward_time": 0.025673627853393555, + "step": 17293 + }, + { + "epoch": 2.638702392578125e-05, + "step": 17293, + "training_step_time": 0.10564637184143066 + }, + { + "epoch": 2.63885498046875e-05, + "model_forward_time": 0.02535533905029297, + "step": 17294 + }, + { + "epoch": 2.63885498046875e-05, + "step": 17294, + "training_step_time": 0.10338950157165527 + }, + { + "epoch": 2.639007568359375e-05, + "model_forward_time": 0.025218486785888672, + "step": 17295 + }, + { + "epoch": 2.639007568359375e-05, + "step": 17295, + "training_step_time": 0.16329336166381836 + }, + { + "epoch": 2.63916015625e-05, + "model_forward_time": 0.025038957595825195, + "step": 17296 + }, + { + "epoch": 2.63916015625e-05, + "step": 17296, + "training_step_time": 0.10536670684814453 + }, + { + "epoch": 2.639312744140625e-05, + "model_forward_time": 0.02482295036315918, + "step": 17297 + }, + { + "epoch": 2.639312744140625e-05, + "step": 17297, + "training_step_time": 0.10226035118103027 + }, + { + "epoch": 2.63946533203125e-05, + "model_forward_time": 0.026494741439819336, + "step": 17298 + }, + { + "epoch": 2.63946533203125e-05, + "step": 17298, + "training_step_time": 0.14969491958618164 + }, + { + "epoch": 2.639617919921875e-05, + "model_forward_time": 0.02373647689819336, + "step": 17299 + }, + { + "epoch": 2.639617919921875e-05, + "step": 17299, + "training_step_time": 0.18056702613830566 + }, + { + "epoch": 2.6397705078125e-05, + "grad_norm": 0.22532442212104797, + "learning_rate": 4.149857708648536e-05, + "loss": 0.009, + "step": 17300 + }, + { + "epoch": 2.6397705078125e-05, + "model_forward_time": 0.02420639991760254, + "step": 17300 + }, + { + "epoch": 2.6397705078125e-05, + "step": 17300, + "training_step_time": 0.1705455780029297 + }, + { + "epoch": 2.639923095703125e-05, + "model_forward_time": 0.023001909255981445, + "step": 17301 + }, + { + "epoch": 2.639923095703125e-05, + "step": 17301, + "training_step_time": 0.15152192115783691 + }, + { + "epoch": 2.64007568359375e-05, + "model_forward_time": 0.023016691207885742, + "step": 17302 + }, + { + "epoch": 2.64007568359375e-05, + "step": 17302, + "training_step_time": 0.1493997573852539 + }, + { + "epoch": 2.640228271484375e-05, + "model_forward_time": 0.023234844207763672, + "step": 17303 + }, + { + "epoch": 2.640228271484375e-05, + "step": 17303, + "training_step_time": 0.1351921558380127 + }, + { + "epoch": 2.640380859375e-05, + "model_forward_time": 0.023465871810913086, + "step": 17304 + }, + { + "epoch": 2.640380859375e-05, + "step": 17304, + "training_step_time": 0.13282036781311035 + }, + { + "epoch": 2.640533447265625e-05, + "model_forward_time": 0.022979021072387695, + "step": 17305 + }, + { + "epoch": 2.640533447265625e-05, + "step": 17305, + "training_step_time": 0.1275489330291748 + }, + { + "epoch": 2.64068603515625e-05, + "model_forward_time": 0.023621082305908203, + "step": 17306 + }, + { + "epoch": 2.64068603515625e-05, + "step": 17306, + "training_step_time": 0.12679553031921387 + }, + { + "epoch": 2.640838623046875e-05, + "model_forward_time": 0.024034500122070312, + "step": 17307 + }, + { + "epoch": 2.640838623046875e-05, + "step": 17307, + "training_step_time": 0.12052083015441895 + }, + { + "epoch": 2.6409912109375e-05, + "model_forward_time": 0.024214744567871094, + "step": 17308 + }, + { + "epoch": 2.6409912109375e-05, + "step": 17308, + "training_step_time": 0.1169443130493164 + }, + { + "epoch": 2.641143798828125e-05, + "model_forward_time": 0.024995088577270508, + "step": 17309 + }, + { + "epoch": 2.641143798828125e-05, + "step": 17309, + "training_step_time": 0.11229920387268066 + }, + { + "epoch": 2.64129638671875e-05, + "grad_norm": 0.28290852904319763, + "learning_rate": 4.14442691326365e-05, + "loss": 0.0112, + "step": 17310 + }, + { + "epoch": 2.64129638671875e-05, + "model_forward_time": 0.02481698989868164, + "step": 17310 + }, + { + "epoch": 2.64129638671875e-05, + "step": 17310, + "training_step_time": 0.11238884925842285 + }, + { + "epoch": 2.641448974609375e-05, + "model_forward_time": 0.025311946868896484, + "step": 17311 + }, + { + "epoch": 2.641448974609375e-05, + "step": 17311, + "training_step_time": 0.11097502708435059 + }, + { + "epoch": 2.6416015625e-05, + "model_forward_time": 0.024950027465820312, + "step": 17312 + }, + { + "epoch": 2.6416015625e-05, + "step": 17312, + "training_step_time": 0.10665178298950195 + }, + { + "epoch": 2.641754150390625e-05, + "model_forward_time": 0.025180816650390625, + "step": 17313 + }, + { + "epoch": 2.641754150390625e-05, + "step": 17313, + "training_step_time": 0.15871763229370117 + }, + { + "epoch": 2.64190673828125e-05, + "model_forward_time": 0.024593353271484375, + "step": 17314 + }, + { + "epoch": 2.64190673828125e-05, + "step": 17314, + "training_step_time": 0.10336470603942871 + }, + { + "epoch": 2.642059326171875e-05, + "model_forward_time": 0.024129629135131836, + "step": 17315 + }, + { + "epoch": 2.642059326171875e-05, + "step": 17315, + "training_step_time": 0.14912867546081543 + }, + { + "epoch": 2.6422119140625e-05, + "model_forward_time": 0.02430438995361328, + "step": 17316 + }, + { + "epoch": 2.6422119140625e-05, + "step": 17316, + "training_step_time": 0.1629195213317871 + }, + { + "epoch": 2.642364501953125e-05, + "model_forward_time": 0.024187088012695312, + "step": 17317 + }, + { + "epoch": 2.642364501953125e-05, + "step": 17317, + "training_step_time": 0.19014239311218262 + }, + { + "epoch": 2.64251708984375e-05, + "model_forward_time": 0.024280548095703125, + "step": 17318 + }, + { + "epoch": 2.64251708984375e-05, + "step": 17318, + "training_step_time": 0.16835427284240723 + }, + { + "epoch": 2.642669677734375e-05, + "model_forward_time": 0.0245969295501709, + "step": 17319 + }, + { + "epoch": 2.642669677734375e-05, + "step": 17319, + "training_step_time": 0.13764142990112305 + }, + { + "epoch": 2.642822265625e-05, + "grad_norm": 0.1506081372499466, + "learning_rate": 4.1389971574807416e-05, + "loss": 0.0057, + "step": 17320 + }, + { + "epoch": 2.642822265625e-05, + "model_forward_time": 0.02449178695678711, + "step": 17320 + }, + { + "epoch": 2.642822265625e-05, + "step": 17320, + "training_step_time": 0.12434720993041992 + }, + { + "epoch": 2.642974853515625e-05, + "model_forward_time": 0.02458930015563965, + "step": 17321 + }, + { + "epoch": 2.642974853515625e-05, + "step": 17321, + "training_step_time": 0.1180570125579834 + }, + { + "epoch": 2.64312744140625e-05, + "model_forward_time": 0.025118350982666016, + "step": 17322 + }, + { + "epoch": 2.64312744140625e-05, + "step": 17322, + "training_step_time": 0.11302328109741211 + }, + { + "epoch": 2.643280029296875e-05, + "model_forward_time": 0.02489018440246582, + "step": 17323 + }, + { + "epoch": 2.643280029296875e-05, + "step": 17323, + "training_step_time": 0.10639643669128418 + }, + { + "epoch": 2.6434326171875e-05, + "model_forward_time": 0.025298595428466797, + "step": 17324 + }, + { + "epoch": 2.6434326171875e-05, + "step": 17324, + "training_step_time": 0.10550618171691895 + }, + { + "epoch": 2.643585205078125e-05, + "model_forward_time": 0.02501964569091797, + "step": 17325 + }, + { + "epoch": 2.643585205078125e-05, + "step": 17325, + "training_step_time": 0.1074678897857666 + }, + { + "epoch": 2.64373779296875e-05, + "model_forward_time": 0.025665760040283203, + "step": 17326 + }, + { + "epoch": 2.64373779296875e-05, + "step": 17326, + "training_step_time": 0.10670995712280273 + }, + { + "epoch": 2.643890380859375e-05, + "model_forward_time": 0.02523183822631836, + "step": 17327 + }, + { + "epoch": 2.643890380859375e-05, + "step": 17327, + "training_step_time": 0.11145305633544922 + }, + { + "epoch": 2.64404296875e-05, + "model_forward_time": 0.025435447692871094, + "step": 17328 + }, + { + "epoch": 2.64404296875e-05, + "step": 17328, + "training_step_time": 0.1056814193725586 + }, + { + "epoch": 2.644195556640625e-05, + "model_forward_time": 0.02504420280456543, + "step": 17329 + }, + { + "epoch": 2.644195556640625e-05, + "step": 17329, + "training_step_time": 0.10418152809143066 + }, + { + "epoch": 2.64434814453125e-05, + "grad_norm": 0.15126276016235352, + "learning_rate": 4.1335684478974744e-05, + "loss": 0.0078, + "step": 17330 + }, + { + "epoch": 2.64434814453125e-05, + "model_forward_time": 0.025588274002075195, + "step": 17330 + }, + { + "epoch": 2.64434814453125e-05, + "step": 17330, + "training_step_time": 0.10495567321777344 + }, + { + "epoch": 2.644500732421875e-05, + "model_forward_time": 0.025192737579345703, + "step": 17331 + }, + { + "epoch": 2.644500732421875e-05, + "step": 17331, + "training_step_time": 0.10581445693969727 + }, + { + "epoch": 2.6446533203125e-05, + "model_forward_time": 0.02521204948425293, + "step": 17332 + }, + { + "epoch": 2.6446533203125e-05, + "step": 17332, + "training_step_time": 0.11215448379516602 + }, + { + "epoch": 2.644805908203125e-05, + "model_forward_time": 0.025271892547607422, + "step": 17333 + }, + { + "epoch": 2.644805908203125e-05, + "step": 17333, + "training_step_time": 0.11980390548706055 + }, + { + "epoch": 2.64495849609375e-05, + "model_forward_time": 0.02512073516845703, + "step": 17334 + }, + { + "epoch": 2.64495849609375e-05, + "step": 17334, + "training_step_time": 0.13007402420043945 + }, + { + "epoch": 2.645111083984375e-05, + "model_forward_time": 0.02453303337097168, + "step": 17335 + }, + { + "epoch": 2.645111083984375e-05, + "step": 17335, + "training_step_time": 0.1090550422668457 + }, + { + "epoch": 2.645263671875e-05, + "model_forward_time": 0.025823593139648438, + "step": 17336 + }, + { + "epoch": 2.645263671875e-05, + "step": 17336, + "training_step_time": 0.12313580513000488 + }, + { + "epoch": 2.645416259765625e-05, + "model_forward_time": 0.025315046310424805, + "step": 17337 + }, + { + "epoch": 2.645416259765625e-05, + "step": 17337, + "training_step_time": 0.1257326602935791 + }, + { + "epoch": 2.64556884765625e-05, + "model_forward_time": 0.025159835815429688, + "step": 17338 + }, + { + "epoch": 2.64556884765625e-05, + "step": 17338, + "training_step_time": 0.18605780601501465 + }, + { + "epoch": 2.645721435546875e-05, + "model_forward_time": 0.02439141273498535, + "step": 17339 + }, + { + "epoch": 2.645721435546875e-05, + "step": 17339, + "training_step_time": 0.11363887786865234 + }, + { + "epoch": 2.6458740234375e-05, + "grad_norm": 0.13624484837055206, + "learning_rate": 4.1281407911102425e-05, + "loss": 0.0113, + "step": 17340 + }, + { + "epoch": 2.6458740234375e-05, + "model_forward_time": 0.023279190063476562, + "step": 17340 + }, + { + "epoch": 2.6458740234375e-05, + "step": 17340, + "training_step_time": 0.11270380020141602 + }, + { + "epoch": 2.646026611328125e-05, + "model_forward_time": 0.024838685989379883, + "step": 17341 + }, + { + "epoch": 2.646026611328125e-05, + "step": 17341, + "training_step_time": 0.1092836856842041 + }, + { + "epoch": 2.64617919921875e-05, + "model_forward_time": 0.025051593780517578, + "step": 17342 + }, + { + "epoch": 2.64617919921875e-05, + "step": 17342, + "training_step_time": 0.11126255989074707 + }, + { + "epoch": 2.646331787109375e-05, + "model_forward_time": 0.025032520294189453, + "step": 17343 + }, + { + "epoch": 2.646331787109375e-05, + "step": 17343, + "training_step_time": 0.10983967781066895 + }, + { + "epoch": 2.646484375e-05, + "model_forward_time": 0.024884700775146484, + "step": 17344 + }, + { + "epoch": 2.646484375e-05, + "step": 17344, + "training_step_time": 0.10742068290710449 + }, + { + "epoch": 2.646636962890625e-05, + "model_forward_time": 0.025624990463256836, + "step": 17345 + }, + { + "epoch": 2.646636962890625e-05, + "step": 17345, + "training_step_time": 0.10619139671325684 + }, + { + "epoch": 2.64678955078125e-05, + "model_forward_time": 0.024969816207885742, + "step": 17346 + }, + { + "epoch": 2.64678955078125e-05, + "step": 17346, + "training_step_time": 0.10596013069152832 + }, + { + "epoch": 2.646942138671875e-05, + "model_forward_time": 0.025330543518066406, + "step": 17347 + }, + { + "epoch": 2.646942138671875e-05, + "step": 17347, + "training_step_time": 0.10667634010314941 + }, + { + "epoch": 2.6470947265625e-05, + "model_forward_time": 0.025158405303955078, + "step": 17348 + }, + { + "epoch": 2.6470947265625e-05, + "step": 17348, + "training_step_time": 0.1053316593170166 + }, + { + "epoch": 2.647247314453125e-05, + "model_forward_time": 0.024869918823242188, + "step": 17349 + }, + { + "epoch": 2.647247314453125e-05, + "step": 17349, + "training_step_time": 0.10649418830871582 + }, + { + "epoch": 2.64739990234375e-05, + "grad_norm": 0.09851908683776855, + "learning_rate": 4.12271419371416e-05, + "loss": 0.0072, + "step": 17350 + }, + { + "epoch": 2.64739990234375e-05, + "model_forward_time": 0.02512669563293457, + "step": 17350 + }, + { + "epoch": 2.64739990234375e-05, + "step": 17350, + "training_step_time": 0.10622787475585938 + }, + { + "epoch": 2.647552490234375e-05, + "model_forward_time": 0.025164127349853516, + "step": 17351 + }, + { + "epoch": 2.647552490234375e-05, + "step": 17351, + "training_step_time": 0.1058645248413086 + }, + { + "epoch": 2.647705078125e-05, + "model_forward_time": 0.026790380477905273, + "step": 17352 + }, + { + "epoch": 2.647705078125e-05, + "step": 17352, + "training_step_time": 0.10963869094848633 + }, + { + "epoch": 2.647857666015625e-05, + "model_forward_time": 0.025164365768432617, + "step": 17353 + }, + { + "epoch": 2.647857666015625e-05, + "step": 17353, + "training_step_time": 0.1059424877166748 + }, + { + "epoch": 2.64801025390625e-05, + "model_forward_time": 0.024867534637451172, + "step": 17354 + }, + { + "epoch": 2.64801025390625e-05, + "step": 17354, + "training_step_time": 0.1049659252166748 + }, + { + "epoch": 2.648162841796875e-05, + "model_forward_time": 0.0250091552734375, + "step": 17355 + }, + { + "epoch": 2.648162841796875e-05, + "step": 17355, + "training_step_time": 0.10384726524353027 + }, + { + "epoch": 2.6483154296875e-05, + "model_forward_time": 0.0248873233795166, + "step": 17356 + }, + { + "epoch": 2.6483154296875e-05, + "step": 17356, + "training_step_time": 0.10447549819946289 + }, + { + "epoch": 2.648468017578125e-05, + "model_forward_time": 0.02530360221862793, + "step": 17357 + }, + { + "epoch": 2.648468017578125e-05, + "step": 17357, + "training_step_time": 0.10425162315368652 + }, + { + "epoch": 2.64862060546875e-05, + "model_forward_time": 0.02587294578552246, + "step": 17358 + }, + { + "epoch": 2.64862060546875e-05, + "step": 17358, + "training_step_time": 0.10603785514831543 + }, + { + "epoch": 2.648773193359375e-05, + "model_forward_time": 0.02524566650390625, + "step": 17359 + }, + { + "epoch": 2.648773193359375e-05, + "step": 17359, + "training_step_time": 0.10524725914001465 + }, + { + "epoch": 2.64892578125e-05, + "grad_norm": 0.2906000018119812, + "learning_rate": 4.1172886623030526e-05, + "loss": 0.0146, + "step": 17360 + }, + { + "epoch": 2.64892578125e-05, + "model_forward_time": 0.0249478816986084, + "step": 17360 + }, + { + "epoch": 2.64892578125e-05, + "step": 17360, + "training_step_time": 0.10855603218078613 + }, + { + "epoch": 2.649078369140625e-05, + "model_forward_time": 0.0253143310546875, + "step": 17361 + }, + { + "epoch": 2.649078369140625e-05, + "step": 17361, + "training_step_time": 0.16248512268066406 + }, + { + "epoch": 2.64923095703125e-05, + "model_forward_time": 0.02475714683532715, + "step": 17362 + }, + { + "epoch": 2.64923095703125e-05, + "step": 17362, + "training_step_time": 0.11358499526977539 + }, + { + "epoch": 2.649383544921875e-05, + "model_forward_time": 0.024197816848754883, + "step": 17363 + }, + { + "epoch": 2.649383544921875e-05, + "step": 17363, + "training_step_time": 0.13588547706604004 + }, + { + "epoch": 2.6495361328125e-05, + "model_forward_time": 0.024999141693115234, + "step": 17364 + }, + { + "epoch": 2.6495361328125e-05, + "step": 17364, + "training_step_time": 0.1621096134185791 + }, + { + "epoch": 2.649688720703125e-05, + "model_forward_time": 0.025062084197998047, + "step": 17365 + }, + { + "epoch": 2.649688720703125e-05, + "step": 17365, + "training_step_time": 0.21131491661071777 + }, + { + "epoch": 2.64984130859375e-05, + "model_forward_time": 0.024469375610351562, + "step": 17366 + }, + { + "epoch": 2.64984130859375e-05, + "step": 17366, + "training_step_time": 0.1831972599029541 + }, + { + "epoch": 2.649993896484375e-05, + "model_forward_time": 0.0245363712310791, + "step": 17367 + }, + { + "epoch": 2.649993896484375e-05, + "step": 17367, + "training_step_time": 0.1284778118133545 + }, + { + "epoch": 2.650146484375e-05, + "model_forward_time": 0.024142742156982422, + "step": 17368 + }, + { + "epoch": 2.650146484375e-05, + "step": 17368, + "training_step_time": 0.13205671310424805 + }, + { + "epoch": 2.650299072265625e-05, + "model_forward_time": 0.02471470832824707, + "step": 17369 + }, + { + "epoch": 2.650299072265625e-05, + "step": 17369, + "training_step_time": 0.10786938667297363 + }, + { + "epoch": 2.65045166015625e-05, + "grad_norm": 0.21624208986759186, + "learning_rate": 4.111864203469457e-05, + "loss": 0.0095, + "step": 17370 + }, + { + "epoch": 2.65045166015625e-05, + "model_forward_time": 0.024713993072509766, + "step": 17370 + }, + { + "epoch": 2.65045166015625e-05, + "step": 17370, + "training_step_time": 0.11575579643249512 + }, + { + "epoch": 2.650604248046875e-05, + "model_forward_time": 0.02503657341003418, + "step": 17371 + }, + { + "epoch": 2.650604248046875e-05, + "step": 17371, + "training_step_time": 0.10616540908813477 + }, + { + "epoch": 2.6507568359375e-05, + "model_forward_time": 0.024625062942504883, + "step": 17372 + }, + { + "epoch": 2.6507568359375e-05, + "step": 17372, + "training_step_time": 0.10445523262023926 + }, + { + "epoch": 2.650909423828125e-05, + "model_forward_time": 0.025371074676513672, + "step": 17373 + }, + { + "epoch": 2.650909423828125e-05, + "step": 17373, + "training_step_time": 0.10759639739990234 + }, + { + "epoch": 2.65106201171875e-05, + "model_forward_time": 0.024988174438476562, + "step": 17374 + }, + { + "epoch": 2.65106201171875e-05, + "step": 17374, + "training_step_time": 0.12104511260986328 + }, + { + "epoch": 2.651214599609375e-05, + "model_forward_time": 0.0249483585357666, + "step": 17375 + }, + { + "epoch": 2.651214599609375e-05, + "step": 17375, + "training_step_time": 0.11311626434326172 + }, + { + "epoch": 2.6513671875e-05, + "model_forward_time": 0.02491021156311035, + "step": 17376 + }, + { + "epoch": 2.6513671875e-05, + "step": 17376, + "training_step_time": 0.11319541931152344 + }, + { + "epoch": 2.651519775390625e-05, + "model_forward_time": 0.02414679527282715, + "step": 17377 + }, + { + "epoch": 2.651519775390625e-05, + "step": 17377, + "training_step_time": 0.11836695671081543 + }, + { + "epoch": 2.65167236328125e-05, + "model_forward_time": 0.02394843101501465, + "step": 17378 + }, + { + "epoch": 2.65167236328125e-05, + "step": 17378, + "training_step_time": 0.11460113525390625 + }, + { + "epoch": 2.651824951171875e-05, + "model_forward_time": 0.023104190826416016, + "step": 17379 + }, + { + "epoch": 2.651824951171875e-05, + "step": 17379, + "training_step_time": 0.12064480781555176 + }, + { + "epoch": 2.6519775390625e-05, + "grad_norm": 0.22850698232650757, + "learning_rate": 4.1064408238045994e-05, + "loss": 0.0082, + "step": 17380 + }, + { + "epoch": 2.6519775390625e-05, + "model_forward_time": 0.023877859115600586, + "step": 17380 + }, + { + "epoch": 2.6519775390625e-05, + "step": 17380, + "training_step_time": 0.13541173934936523 + }, + { + "epoch": 2.652130126953125e-05, + "model_forward_time": 0.024523496627807617, + "step": 17381 + }, + { + "epoch": 2.652130126953125e-05, + "step": 17381, + "training_step_time": 0.1353771686553955 + }, + { + "epoch": 2.65228271484375e-05, + "model_forward_time": 0.027658462524414062, + "step": 17382 + }, + { + "epoch": 2.65228271484375e-05, + "step": 17382, + "training_step_time": 0.11253714561462402 + }, + { + "epoch": 2.652435302734375e-05, + "model_forward_time": 0.024641036987304688, + "step": 17383 + }, + { + "epoch": 2.652435302734375e-05, + "step": 17383, + "training_step_time": 0.11119627952575684 + }, + { + "epoch": 2.652587890625e-05, + "model_forward_time": 0.024518728256225586, + "step": 17384 + }, + { + "epoch": 2.652587890625e-05, + "step": 17384, + "training_step_time": 0.11068272590637207 + }, + { + "epoch": 2.652740478515625e-05, + "model_forward_time": 0.024587154388427734, + "step": 17385 + }, + { + "epoch": 2.652740478515625e-05, + "step": 17385, + "training_step_time": 0.10877060890197754 + }, + { + "epoch": 2.65289306640625e-05, + "model_forward_time": 0.024904727935791016, + "step": 17386 + }, + { + "epoch": 2.65289306640625e-05, + "step": 17386, + "training_step_time": 0.19142818450927734 + }, + { + "epoch": 2.653045654296875e-05, + "model_forward_time": 0.02515101432800293, + "step": 17387 + }, + { + "epoch": 2.653045654296875e-05, + "step": 17387, + "training_step_time": 0.1065223217010498 + }, + { + "epoch": 2.6531982421875e-05, + "model_forward_time": 0.024884462356567383, + "step": 17388 + }, + { + "epoch": 2.6531982421875e-05, + "step": 17388, + "training_step_time": 0.1046895980834961 + }, + { + "epoch": 2.653350830078125e-05, + "model_forward_time": 0.024915695190429688, + "step": 17389 + }, + { + "epoch": 2.653350830078125e-05, + "step": 17389, + "training_step_time": 0.10771393775939941 + }, + { + "epoch": 2.65350341796875e-05, + "grad_norm": 0.11718787997961044, + "learning_rate": 4.1010185298983984e-05, + "loss": 0.0083, + "step": 17390 + }, + { + "epoch": 2.65350341796875e-05, + "model_forward_time": 0.02546548843383789, + "step": 17390 + }, + { + "epoch": 2.65350341796875e-05, + "step": 17390, + "training_step_time": 0.10936498641967773 + }, + { + "epoch": 2.653656005859375e-05, + "model_forward_time": 0.024692773818969727, + "step": 17391 + }, + { + "epoch": 2.653656005859375e-05, + "step": 17391, + "training_step_time": 0.10518097877502441 + }, + { + "epoch": 2.65380859375e-05, + "model_forward_time": 0.02512192726135254, + "step": 17392 + }, + { + "epoch": 2.65380859375e-05, + "step": 17392, + "training_step_time": 0.10402536392211914 + }, + { + "epoch": 2.653961181640625e-05, + "model_forward_time": 0.02496051788330078, + "step": 17393 + }, + { + "epoch": 2.653961181640625e-05, + "step": 17393, + "training_step_time": 0.10575103759765625 + }, + { + "epoch": 2.65411376953125e-05, + "model_forward_time": 0.024969816207885742, + "step": 17394 + }, + { + "epoch": 2.65411376953125e-05, + "step": 17394, + "training_step_time": 0.10711193084716797 + }, + { + "epoch": 2.654266357421875e-05, + "model_forward_time": 0.025255918502807617, + "step": 17395 + }, + { + "epoch": 2.654266357421875e-05, + "step": 17395, + "training_step_time": 0.10687065124511719 + }, + { + "epoch": 2.6544189453125e-05, + "model_forward_time": 0.02545475959777832, + "step": 17396 + }, + { + "epoch": 2.6544189453125e-05, + "step": 17396, + "training_step_time": 0.10602188110351562 + }, + { + "epoch": 2.654571533203125e-05, + "model_forward_time": 0.024350404739379883, + "step": 17397 + }, + { + "epoch": 2.654571533203125e-05, + "step": 17397, + "training_step_time": 0.10664248466491699 + }, + { + "epoch": 2.65472412109375e-05, + "model_forward_time": 0.024203062057495117, + "step": 17398 + }, + { + "epoch": 2.65472412109375e-05, + "step": 17398, + "training_step_time": 0.11047029495239258 + }, + { + "epoch": 2.654876708984375e-05, + "model_forward_time": 0.024906635284423828, + "step": 17399 + }, + { + "epoch": 2.654876708984375e-05, + "step": 17399, + "training_step_time": 0.10651612281799316 + }, + { + "epoch": 2.655029296875e-05, + "grad_norm": 0.17764317989349365, + "learning_rate": 4.095597328339452e-05, + "loss": 0.0124, + "step": 17400 + }, + { + "epoch": 2.655029296875e-05, + "model_forward_time": 0.024923324584960938, + "step": 17400 + }, + { + "epoch": 2.655029296875e-05, + "step": 17400, + "training_step_time": 0.10562252998352051 + }, + { + "epoch": 2.655181884765625e-05, + "model_forward_time": 0.024651765823364258, + "step": 17401 + }, + { + "epoch": 2.655181884765625e-05, + "step": 17401, + "training_step_time": 0.10401678085327148 + }, + { + "epoch": 2.65533447265625e-05, + "model_forward_time": 0.02510690689086914, + "step": 17402 + }, + { + "epoch": 2.65533447265625e-05, + "step": 17402, + "training_step_time": 0.10438370704650879 + }, + { + "epoch": 2.655487060546875e-05, + "model_forward_time": 0.02412271499633789, + "step": 17403 + }, + { + "epoch": 2.655487060546875e-05, + "step": 17403, + "training_step_time": 0.10577702522277832 + }, + { + "epoch": 2.6556396484375e-05, + "model_forward_time": 0.02506232261657715, + "step": 17404 + }, + { + "epoch": 2.6556396484375e-05, + "step": 17404, + "training_step_time": 0.10733413696289062 + }, + { + "epoch": 2.655792236328125e-05, + "model_forward_time": 0.024585247039794922, + "step": 17405 + }, + { + "epoch": 2.655792236328125e-05, + "step": 17405, + "training_step_time": 0.1061396598815918 + }, + { + "epoch": 2.65594482421875e-05, + "model_forward_time": 0.02439737319946289, + "step": 17406 + }, + { + "epoch": 2.65594482421875e-05, + "step": 17406, + "training_step_time": 0.10524320602416992 + }, + { + "epoch": 2.656097412109375e-05, + "model_forward_time": 0.025557994842529297, + "step": 17407 + }, + { + "epoch": 2.656097412109375e-05, + "step": 17407, + "training_step_time": 0.10546684265136719 + }, + { + "epoch": 2.65625e-05, + "model_forward_time": 0.02512073516845703, + "step": 17408 + }, + { + "epoch": 2.65625e-05, + "step": 17408, + "training_step_time": 0.10961747169494629 + }, + { + "epoch": 2.656402587890625e-05, + "model_forward_time": 0.025422334671020508, + "step": 17409 + }, + { + "epoch": 2.656402587890625e-05, + "step": 17409, + "training_step_time": 0.11741209030151367 + }, + { + "epoch": 2.65655517578125e-05, + "grad_norm": 0.2733633816242218, + "learning_rate": 4.09017722571503e-05, + "loss": 0.0054, + "step": 17410 + }, + { + "epoch": 2.65655517578125e-05, + "model_forward_time": 0.024267196655273438, + "step": 17410 + }, + { + "epoch": 2.65655517578125e-05, + "step": 17410, + "training_step_time": 0.12797117233276367 + }, + { + "epoch": 2.656707763671875e-05, + "model_forward_time": 0.024196624755859375, + "step": 17411 + }, + { + "epoch": 2.656707763671875e-05, + "step": 17411, + "training_step_time": 0.16608357429504395 + }, + { + "epoch": 2.6568603515625e-05, + "model_forward_time": 0.024645090103149414, + "step": 17412 + }, + { + "epoch": 2.6568603515625e-05, + "step": 17412, + "training_step_time": 0.12074518203735352 + }, + { + "epoch": 2.657012939453125e-05, + "model_forward_time": 0.024386882781982422, + "step": 17413 + }, + { + "epoch": 2.657012939453125e-05, + "step": 17413, + "training_step_time": 0.2061324119567871 + }, + { + "epoch": 2.65716552734375e-05, + "model_forward_time": 0.0244748592376709, + "step": 17414 + }, + { + "epoch": 2.65716552734375e-05, + "step": 17414, + "training_step_time": 0.13633227348327637 + }, + { + "epoch": 2.657318115234375e-05, + "model_forward_time": 0.0245974063873291, + "step": 17415 + }, + { + "epoch": 2.657318115234375e-05, + "step": 17415, + "training_step_time": 0.12576532363891602 + }, + { + "epoch": 2.657470703125e-05, + "model_forward_time": 0.024877548217773438, + "step": 17416 + }, + { + "epoch": 2.657470703125e-05, + "step": 17416, + "training_step_time": 0.12431168556213379 + }, + { + "epoch": 2.657623291015625e-05, + "model_forward_time": 0.025235652923583984, + "step": 17417 + }, + { + "epoch": 2.657623291015625e-05, + "step": 17417, + "training_step_time": 0.11294889450073242 + }, + { + "epoch": 2.65777587890625e-05, + "model_forward_time": 0.025366783142089844, + "step": 17418 + }, + { + "epoch": 2.65777587890625e-05, + "step": 17418, + "training_step_time": 0.1235969066619873 + }, + { + "epoch": 2.657928466796875e-05, + "model_forward_time": 0.025411367416381836, + "step": 17419 + }, + { + "epoch": 2.657928466796875e-05, + "step": 17419, + "training_step_time": 0.1071782112121582 + }, + { + "epoch": 2.6580810546875e-05, + "grad_norm": 0.29529955983161926, + "learning_rate": 4.08475822861107e-05, + "loss": 0.0113, + "step": 17420 + }, + { + "epoch": 2.6580810546875e-05, + "model_forward_time": 0.025249242782592773, + "step": 17420 + }, + { + "epoch": 2.6580810546875e-05, + "step": 17420, + "training_step_time": 0.10512804985046387 + }, + { + "epoch": 2.658233642578125e-05, + "model_forward_time": 0.025555849075317383, + "step": 17421 + }, + { + "epoch": 2.658233642578125e-05, + "step": 17421, + "training_step_time": 0.10470390319824219 + }, + { + "epoch": 2.65838623046875e-05, + "model_forward_time": 0.02538752555847168, + "step": 17422 + }, + { + "epoch": 2.65838623046875e-05, + "step": 17422, + "training_step_time": 0.10553383827209473 + }, + { + "epoch": 2.658538818359375e-05, + "model_forward_time": 0.02519059181213379, + "step": 17423 + }, + { + "epoch": 2.658538818359375e-05, + "step": 17423, + "training_step_time": 0.10537862777709961 + }, + { + "epoch": 2.65869140625e-05, + "model_forward_time": 0.025046110153198242, + "step": 17424 + }, + { + "epoch": 2.65869140625e-05, + "step": 17424, + "training_step_time": 0.10690665245056152 + }, + { + "epoch": 2.658843994140625e-05, + "model_forward_time": 0.025130510330200195, + "step": 17425 + }, + { + "epoch": 2.658843994140625e-05, + "step": 17425, + "training_step_time": 0.11019396781921387 + }, + { + "epoch": 2.65899658203125e-05, + "model_forward_time": 0.02534627914428711, + "step": 17426 + }, + { + "epoch": 2.65899658203125e-05, + "step": 17426, + "training_step_time": 0.10869908332824707 + }, + { + "epoch": 2.659149169921875e-05, + "model_forward_time": 0.025396108627319336, + "step": 17427 + }, + { + "epoch": 2.659149169921875e-05, + "step": 17427, + "training_step_time": 0.1078641414642334 + }, + { + "epoch": 2.6593017578125e-05, + "model_forward_time": 0.02506875991821289, + "step": 17428 + }, + { + "epoch": 2.6593017578125e-05, + "step": 17428, + "training_step_time": 0.1121985912322998 + }, + { + "epoch": 2.659454345703125e-05, + "model_forward_time": 0.025459766387939453, + "step": 17429 + }, + { + "epoch": 2.659454345703125e-05, + "step": 17429, + "training_step_time": 0.13109111785888672 + }, + { + "epoch": 2.65960693359375e-05, + "grad_norm": 0.20403897762298584, + "learning_rate": 4.079340343612165e-05, + "loss": 0.0067, + "step": 17430 + }, + { + "epoch": 2.65960693359375e-05, + "model_forward_time": 0.024564743041992188, + "step": 17430 + }, + { + "epoch": 2.65960693359375e-05, + "step": 17430, + "training_step_time": 0.10887956619262695 + }, + { + "epoch": 2.659759521484375e-05, + "model_forward_time": 0.025285005569458008, + "step": 17431 + }, + { + "epoch": 2.659759521484375e-05, + "step": 17431, + "training_step_time": 0.1164555549621582 + }, + { + "epoch": 2.659912109375e-05, + "model_forward_time": 0.025060415267944336, + "step": 17432 + }, + { + "epoch": 2.659912109375e-05, + "step": 17432, + "training_step_time": 0.11455202102661133 + }, + { + "epoch": 2.660064697265625e-05, + "model_forward_time": 0.025921106338500977, + "step": 17433 + }, + { + "epoch": 2.660064697265625e-05, + "step": 17433, + "training_step_time": 0.11010456085205078 + }, + { + "epoch": 2.66021728515625e-05, + "model_forward_time": 0.025133132934570312, + "step": 17434 + }, + { + "epoch": 2.66021728515625e-05, + "step": 17434, + "training_step_time": 0.15939688682556152 + }, + { + "epoch": 2.660369873046875e-05, + "model_forward_time": 0.02482461929321289, + "step": 17435 + }, + { + "epoch": 2.660369873046875e-05, + "step": 17435, + "training_step_time": 0.10695528984069824 + }, + { + "epoch": 2.6605224609375e-05, + "model_forward_time": 0.024995088577270508, + "step": 17436 + }, + { + "epoch": 2.6605224609375e-05, + "step": 17436, + "training_step_time": 0.10377931594848633 + }, + { + "epoch": 2.660675048828125e-05, + "model_forward_time": 0.025388479232788086, + "step": 17437 + }, + { + "epoch": 2.660675048828125e-05, + "step": 17437, + "training_step_time": 0.10732412338256836 + }, + { + "epoch": 2.66082763671875e-05, + "model_forward_time": 0.025522708892822266, + "step": 17438 + }, + { + "epoch": 2.66082763671875e-05, + "step": 17438, + "training_step_time": 0.11180329322814941 + }, + { + "epoch": 2.660980224609375e-05, + "model_forward_time": 0.02534031867980957, + "step": 17439 + }, + { + "epoch": 2.660980224609375e-05, + "step": 17439, + "training_step_time": 0.10809898376464844 + }, + { + "epoch": 2.6611328125e-05, + "grad_norm": 0.23942086100578308, + "learning_rate": 4.0739235773015536e-05, + "loss": 0.0121, + "step": 17440 + }, + { + "epoch": 2.6611328125e-05, + "model_forward_time": 0.02501082420349121, + "step": 17440 + }, + { + "epoch": 2.6611328125e-05, + "step": 17440, + "training_step_time": 0.10644102096557617 + }, + { + "epoch": 2.661285400390625e-05, + "model_forward_time": 0.02586650848388672, + "step": 17441 + }, + { + "epoch": 2.661285400390625e-05, + "step": 17441, + "training_step_time": 0.10570383071899414 + }, + { + "epoch": 2.66143798828125e-05, + "model_forward_time": 0.025304794311523438, + "step": 17442 + }, + { + "epoch": 2.66143798828125e-05, + "step": 17442, + "training_step_time": 0.10619282722473145 + }, + { + "epoch": 2.661590576171875e-05, + "model_forward_time": 0.02513742446899414, + "step": 17443 + }, + { + "epoch": 2.661590576171875e-05, + "step": 17443, + "training_step_time": 0.1076653003692627 + }, + { + "epoch": 2.6617431640625e-05, + "model_forward_time": 0.025265216827392578, + "step": 17444 + }, + { + "epoch": 2.6617431640625e-05, + "step": 17444, + "training_step_time": 0.1077871322631836 + }, + { + "epoch": 2.661895751953125e-05, + "model_forward_time": 0.025287151336669922, + "step": 17445 + }, + { + "epoch": 2.661895751953125e-05, + "step": 17445, + "training_step_time": 0.1093447208404541 + }, + { + "epoch": 2.66204833984375e-05, + "model_forward_time": 0.025706052780151367, + "step": 17446 + }, + { + "epoch": 2.66204833984375e-05, + "step": 17446, + "training_step_time": 0.10851263999938965 + }, + { + "epoch": 2.662200927734375e-05, + "model_forward_time": 0.02530837059020996, + "step": 17447 + }, + { + "epoch": 2.662200927734375e-05, + "step": 17447, + "training_step_time": 0.10523533821105957 + }, + { + "epoch": 2.662353515625e-05, + "model_forward_time": 0.025644779205322266, + "step": 17448 + }, + { + "epoch": 2.662353515625e-05, + "step": 17448, + "training_step_time": 0.10475492477416992 + }, + { + "epoch": 2.662506103515625e-05, + "model_forward_time": 0.025245189666748047, + "step": 17449 + }, + { + "epoch": 2.662506103515625e-05, + "step": 17449, + "training_step_time": 0.1042928695678711 + }, + { + "epoch": 2.66265869140625e-05, + "grad_norm": 0.2432943880558014, + "learning_rate": 4.0685079362611204e-05, + "loss": 0.0105, + "step": 17450 + }, + { + "epoch": 2.66265869140625e-05, + "model_forward_time": 0.02544236183166504, + "step": 17450 + }, + { + "epoch": 2.66265869140625e-05, + "step": 17450, + "training_step_time": 0.10440278053283691 + }, + { + "epoch": 2.662811279296875e-05, + "model_forward_time": 0.02541828155517578, + "step": 17451 + }, + { + "epoch": 2.662811279296875e-05, + "step": 17451, + "training_step_time": 0.1040029525756836 + }, + { + "epoch": 2.6629638671875e-05, + "model_forward_time": 0.02515244483947754, + "step": 17452 + }, + { + "epoch": 2.6629638671875e-05, + "step": 17452, + "training_step_time": 0.10409808158874512 + }, + { + "epoch": 2.663116455078125e-05, + "model_forward_time": 0.025072574615478516, + "step": 17453 + }, + { + "epoch": 2.663116455078125e-05, + "step": 17453, + "training_step_time": 0.10724830627441406 + }, + { + "epoch": 2.66326904296875e-05, + "model_forward_time": 0.02482318878173828, + "step": 17454 + }, + { + "epoch": 2.66326904296875e-05, + "step": 17454, + "training_step_time": 0.10903453826904297 + }, + { + "epoch": 2.663421630859375e-05, + "model_forward_time": 0.02501225471496582, + "step": 17455 + }, + { + "epoch": 2.663421630859375e-05, + "step": 17455, + "training_step_time": 0.10596799850463867 + }, + { + "epoch": 2.66357421875e-05, + "model_forward_time": 0.025432109832763672, + "step": 17456 + }, + { + "epoch": 2.66357421875e-05, + "step": 17456, + "training_step_time": 0.10452413558959961 + }, + { + "epoch": 2.663726806640625e-05, + "model_forward_time": 0.024520397186279297, + "step": 17457 + }, + { + "epoch": 2.663726806640625e-05, + "step": 17457, + "training_step_time": 0.1013803482055664 + }, + { + "epoch": 2.66387939453125e-05, + "model_forward_time": 0.0249025821685791, + "step": 17458 + }, + { + "epoch": 2.66387939453125e-05, + "step": 17458, + "training_step_time": 0.1785118579864502 + }, + { + "epoch": 2.664031982421875e-05, + "model_forward_time": 0.024979591369628906, + "step": 17459 + }, + { + "epoch": 2.664031982421875e-05, + "step": 17459, + "training_step_time": 0.13813138008117676 + }, + { + "epoch": 2.6641845703125e-05, + "grad_norm": 0.22207190096378326, + "learning_rate": 4.063093427071376e-05, + "loss": 0.008, + "step": 17460 + }, + { + "epoch": 2.6641845703125e-05, + "model_forward_time": 0.02448582649230957, + "step": 17460 + }, + { + "epoch": 2.6641845703125e-05, + "step": 17460, + "training_step_time": 0.2078540325164795 + }, + { + "epoch": 2.664337158203125e-05, + "model_forward_time": 0.024660587310791016, + "step": 17461 + }, + { + "epoch": 2.664337158203125e-05, + "step": 17461, + "training_step_time": 0.18691492080688477 + }, + { + "epoch": 2.66448974609375e-05, + "model_forward_time": 0.02429938316345215, + "step": 17462 + }, + { + "epoch": 2.66448974609375e-05, + "step": 17462, + "training_step_time": 0.11313176155090332 + }, + { + "epoch": 2.664642333984375e-05, + "model_forward_time": 0.024513959884643555, + "step": 17463 + }, + { + "epoch": 2.664642333984375e-05, + "step": 17463, + "training_step_time": 0.1282343864440918 + }, + { + "epoch": 2.664794921875e-05, + "model_forward_time": 0.02527642250061035, + "step": 17464 + }, + { + "epoch": 2.664794921875e-05, + "step": 17464, + "training_step_time": 0.13495659828186035 + }, + { + "epoch": 2.664947509765625e-05, + "model_forward_time": 0.024925947189331055, + "step": 17465 + }, + { + "epoch": 2.664947509765625e-05, + "step": 17465, + "training_step_time": 0.11701369285583496 + }, + { + "epoch": 2.66510009765625e-05, + "model_forward_time": 0.024776458740234375, + "step": 17466 + }, + { + "epoch": 2.66510009765625e-05, + "step": 17466, + "training_step_time": 0.116668701171875 + }, + { + "epoch": 2.665252685546875e-05, + "model_forward_time": 0.02521371841430664, + "step": 17467 + }, + { + "epoch": 2.665252685546875e-05, + "step": 17467, + "training_step_time": 0.11075782775878906 + }, + { + "epoch": 2.6654052734375e-05, + "model_forward_time": 0.02500748634338379, + "step": 17468 + }, + { + "epoch": 2.6654052734375e-05, + "step": 17468, + "training_step_time": 0.10521435737609863 + }, + { + "epoch": 2.665557861328125e-05, + "model_forward_time": 0.024941205978393555, + "step": 17469 + }, + { + "epoch": 2.665557861328125e-05, + "step": 17469, + "training_step_time": 0.1032874584197998 + }, + { + "epoch": 2.66571044921875e-05, + "grad_norm": 0.15203389525413513, + "learning_rate": 4.0576800563114646e-05, + "loss": 0.0078, + "step": 17470 + }, + { + "epoch": 2.66571044921875e-05, + "model_forward_time": 0.025072574615478516, + "step": 17470 + }, + { + "epoch": 2.66571044921875e-05, + "step": 17470, + "training_step_time": 0.10580253601074219 + }, + { + "epoch": 2.665863037109375e-05, + "model_forward_time": 0.025432586669921875, + "step": 17471 + }, + { + "epoch": 2.665863037109375e-05, + "step": 17471, + "training_step_time": 0.10408854484558105 + }, + { + "epoch": 2.666015625e-05, + "model_forward_time": 0.025377273559570312, + "step": 17472 + }, + { + "epoch": 2.666015625e-05, + "step": 17472, + "training_step_time": 0.10570120811462402 + }, + { + "epoch": 2.666168212890625e-05, + "model_forward_time": 0.025342464447021484, + "step": 17473 + }, + { + "epoch": 2.666168212890625e-05, + "step": 17473, + "training_step_time": 0.11157059669494629 + }, + { + "epoch": 2.66632080078125e-05, + "model_forward_time": 0.025072574615478516, + "step": 17474 + }, + { + "epoch": 2.66632080078125e-05, + "step": 17474, + "training_step_time": 0.11541461944580078 + }, + { + "epoch": 2.666473388671875e-05, + "model_forward_time": 0.02510356903076172, + "step": 17475 + }, + { + "epoch": 2.666473388671875e-05, + "step": 17475, + "training_step_time": 0.1733393669128418 + }, + { + "epoch": 2.6666259765625e-05, + "model_forward_time": 0.024592876434326172, + "step": 17476 + }, + { + "epoch": 2.6666259765625e-05, + "step": 17476, + "training_step_time": 0.140547513961792 + }, + { + "epoch": 2.666778564453125e-05, + "model_forward_time": 0.02441239356994629, + "step": 17477 + }, + { + "epoch": 2.666778564453125e-05, + "step": 17477, + "training_step_time": 0.10975837707519531 + }, + { + "epoch": 2.66693115234375e-05, + "model_forward_time": 0.025066852569580078, + "step": 17478 + }, + { + "epoch": 2.66693115234375e-05, + "step": 17478, + "training_step_time": 0.12679553031921387 + }, + { + "epoch": 2.667083740234375e-05, + "model_forward_time": 0.025180339813232422, + "step": 17479 + }, + { + "epoch": 2.667083740234375e-05, + "step": 17479, + "training_step_time": 0.11168718338012695 + }, + { + "epoch": 2.667236328125e-05, + "grad_norm": 0.22342152893543243, + "learning_rate": 4.05226783055914e-05, + "loss": 0.0106, + "step": 17480 + }, + { + "epoch": 2.667236328125e-05, + "model_forward_time": 0.025020837783813477, + "step": 17480 + }, + { + "epoch": 2.667236328125e-05, + "step": 17480, + "training_step_time": 0.10591530799865723 + }, + { + "epoch": 2.667388916015625e-05, + "model_forward_time": 0.024991989135742188, + "step": 17481 + }, + { + "epoch": 2.667388916015625e-05, + "step": 17481, + "training_step_time": 0.13350939750671387 + }, + { + "epoch": 2.66754150390625e-05, + "model_forward_time": 0.02670145034790039, + "step": 17482 + }, + { + "epoch": 2.66754150390625e-05, + "step": 17482, + "training_step_time": 0.11898279190063477 + }, + { + "epoch": 2.667694091796875e-05, + "model_forward_time": 0.027080059051513672, + "step": 17483 + }, + { + "epoch": 2.667694091796875e-05, + "step": 17483, + "training_step_time": 0.10708332061767578 + }, + { + "epoch": 2.6678466796875e-05, + "model_forward_time": 0.024899959564208984, + "step": 17484 + }, + { + "epoch": 2.6678466796875e-05, + "step": 17484, + "training_step_time": 0.10762834548950195 + }, + { + "epoch": 2.667999267578125e-05, + "model_forward_time": 0.02585768699645996, + "step": 17485 + }, + { + "epoch": 2.667999267578125e-05, + "step": 17485, + "training_step_time": 0.10704922676086426 + }, + { + "epoch": 2.66815185546875e-05, + "model_forward_time": 0.027258872985839844, + "step": 17486 + }, + { + "epoch": 2.66815185546875e-05, + "step": 17486, + "training_step_time": 0.10779976844787598 + }, + { + "epoch": 2.668304443359375e-05, + "model_forward_time": 0.02631545066833496, + "step": 17487 + }, + { + "epoch": 2.668304443359375e-05, + "step": 17487, + "training_step_time": 0.10582447052001953 + }, + { + "epoch": 2.66845703125e-05, + "model_forward_time": 0.026203393936157227, + "step": 17488 + }, + { + "epoch": 2.66845703125e-05, + "step": 17488, + "training_step_time": 0.10605001449584961 + }, + { + "epoch": 2.668609619140625e-05, + "model_forward_time": 0.025286436080932617, + "step": 17489 + }, + { + "epoch": 2.668609619140625e-05, + "step": 17489, + "training_step_time": 0.10620832443237305 + }, + { + "epoch": 2.66876220703125e-05, + "grad_norm": 0.24360807240009308, + "learning_rate": 4.046856756390767e-05, + "loss": 0.0087, + "step": 17490 + }, + { + "epoch": 2.66876220703125e-05, + "model_forward_time": 0.025455713272094727, + "step": 17490 + }, + { + "epoch": 2.66876220703125e-05, + "step": 17490, + "training_step_time": 0.10508847236633301 + }, + { + "epoch": 2.668914794921875e-05, + "model_forward_time": 0.02489781379699707, + "step": 17491 + }, + { + "epoch": 2.668914794921875e-05, + "step": 17491, + "training_step_time": 0.10788583755493164 + }, + { + "epoch": 2.6690673828125e-05, + "model_forward_time": 0.025566577911376953, + "step": 17492 + }, + { + "epoch": 2.6690673828125e-05, + "step": 17492, + "training_step_time": 0.10794830322265625 + }, + { + "epoch": 2.669219970703125e-05, + "model_forward_time": 0.025432348251342773, + "step": 17493 + }, + { + "epoch": 2.669219970703125e-05, + "step": 17493, + "training_step_time": 0.10476803779602051 + }, + { + "epoch": 2.66937255859375e-05, + "model_forward_time": 0.025760173797607422, + "step": 17494 + }, + { + "epoch": 2.66937255859375e-05, + "step": 17494, + "training_step_time": 0.10517549514770508 + }, + { + "epoch": 2.669525146484375e-05, + "model_forward_time": 0.025211095809936523, + "step": 17495 + }, + { + "epoch": 2.669525146484375e-05, + "step": 17495, + "training_step_time": 0.1053609848022461 + }, + { + "epoch": 2.669677734375e-05, + "model_forward_time": 0.025086164474487305, + "step": 17496 + }, + { + "epoch": 2.669677734375e-05, + "step": 17496, + "training_step_time": 0.1063849925994873 + }, + { + "epoch": 2.669830322265625e-05, + "model_forward_time": 0.025098800659179688, + "step": 17497 + }, + { + "epoch": 2.669830322265625e-05, + "step": 17497, + "training_step_time": 0.10855412483215332 + }, + { + "epoch": 2.66998291015625e-05, + "model_forward_time": 0.02516770362854004, + "step": 17498 + }, + { + "epoch": 2.66998291015625e-05, + "step": 17498, + "training_step_time": 0.11717343330383301 + }, + { + "epoch": 2.670135498046875e-05, + "model_forward_time": 0.025408267974853516, + "step": 17499 + }, + { + "epoch": 2.670135498046875e-05, + "step": 17499, + "training_step_time": 0.19134831428527832 + }, + { + "epoch": 2.6702880859375e-05, + "grad_norm": 0.20986664295196533, + "learning_rate": 4.0414468403813095e-05, + "loss": 0.0069, + "step": 17500 + }, + { + "epoch": 2.6702880859375e-05, + "model_forward_time": 0.024262428283691406, + "step": 17500 + }, + { + "epoch": 2.6702880859375e-05, + "step": 17500, + "training_step_time": 0.196730375289917 + }, + { + "epoch": 2.670440673828125e-05, + "model_forward_time": 0.023336410522460938, + "step": 17501 + }, + { + "epoch": 2.670440673828125e-05, + "step": 17501, + "training_step_time": 0.19736576080322266 + }, + { + "epoch": 2.67059326171875e-05, + "model_forward_time": 0.02443408966064453, + "step": 17502 + }, + { + "epoch": 2.67059326171875e-05, + "step": 17502, + "training_step_time": 0.1837158203125 + }, + { + "epoch": 2.670745849609375e-05, + "model_forward_time": 0.024596691131591797, + "step": 17503 + }, + { + "epoch": 2.670745849609375e-05, + "step": 17503, + "training_step_time": 0.20628118515014648 + }, + { + "epoch": 2.6708984375e-05, + "model_forward_time": 0.02451777458190918, + "step": 17504 + }, + { + "epoch": 2.6708984375e-05, + "step": 17504, + "training_step_time": 0.1834108829498291 + }, + { + "epoch": 2.671051025390625e-05, + "model_forward_time": 0.0247647762298584, + "step": 17505 + }, + { + "epoch": 2.671051025390625e-05, + "step": 17505, + "training_step_time": 0.22809576988220215 + }, + { + "epoch": 2.67120361328125e-05, + "model_forward_time": 0.024686813354492188, + "step": 17506 + }, + { + "epoch": 2.67120361328125e-05, + "step": 17506, + "training_step_time": 0.2069075107574463 + }, + { + "epoch": 2.671356201171875e-05, + "model_forward_time": 0.024651765823364258, + "step": 17507 + }, + { + "epoch": 2.671356201171875e-05, + "step": 17507, + "training_step_time": 0.17825555801391602 + }, + { + "epoch": 2.6715087890625e-05, + "model_forward_time": 0.02429962158203125, + "step": 17508 + }, + { + "epoch": 2.6715087890625e-05, + "step": 17508, + "training_step_time": 0.12929630279541016 + }, + { + "epoch": 2.671661376953125e-05, + "model_forward_time": 0.026206016540527344, + "step": 17509 + }, + { + "epoch": 2.671661376953125e-05, + "step": 17509, + "training_step_time": 0.11239290237426758 + }, + { + "epoch": 2.67181396484375e-05, + "grad_norm": 0.1403159350156784, + "learning_rate": 4.036038089104326e-05, + "loss": 0.0118, + "step": 17510 + }, + { + "epoch": 2.67181396484375e-05, + "model_forward_time": 0.024848222732543945, + "step": 17510 + }, + { + "epoch": 2.67181396484375e-05, + "step": 17510, + "training_step_time": 0.12391138076782227 + }, + { + "epoch": 2.671966552734375e-05, + "model_forward_time": 0.024845600128173828, + "step": 17511 + }, + { + "epoch": 2.671966552734375e-05, + "step": 17511, + "training_step_time": 0.10621500015258789 + }, + { + "epoch": 2.672119140625e-05, + "model_forward_time": 0.025402069091796875, + "step": 17512 + }, + { + "epoch": 2.672119140625e-05, + "step": 17512, + "training_step_time": 0.10650277137756348 + }, + { + "epoch": 2.672271728515625e-05, + "model_forward_time": 0.02516031265258789, + "step": 17513 + }, + { + "epoch": 2.672271728515625e-05, + "step": 17513, + "training_step_time": 0.10428094863891602 + }, + { + "epoch": 2.67242431640625e-05, + "model_forward_time": 0.025116682052612305, + "step": 17514 + }, + { + "epoch": 2.67242431640625e-05, + "step": 17514, + "training_step_time": 0.10645437240600586 + }, + { + "epoch": 2.672576904296875e-05, + "model_forward_time": 0.0245821475982666, + "step": 17515 + }, + { + "epoch": 2.672576904296875e-05, + "step": 17515, + "training_step_time": 0.10923647880554199 + }, + { + "epoch": 2.6727294921875e-05, + "model_forward_time": 0.025254249572753906, + "step": 17516 + }, + { + "epoch": 2.6727294921875e-05, + "step": 17516, + "training_step_time": 0.11212587356567383 + }, + { + "epoch": 2.672882080078125e-05, + "model_forward_time": 0.027120590209960938, + "step": 17517 + }, + { + "epoch": 2.672882080078125e-05, + "step": 17517, + "training_step_time": 0.11363744735717773 + }, + { + "epoch": 2.67303466796875e-05, + "model_forward_time": 0.024886131286621094, + "step": 17518 + }, + { + "epoch": 2.67303466796875e-05, + "step": 17518, + "training_step_time": 0.11769413948059082 + }, + { + "epoch": 2.673187255859375e-05, + "model_forward_time": 0.025722265243530273, + "step": 17519 + }, + { + "epoch": 2.673187255859375e-05, + "step": 17519, + "training_step_time": 0.11069297790527344 + }, + { + "epoch": 2.67333984375e-05, + "grad_norm": 0.1784925013780594, + "learning_rate": 4.0306305091319595e-05, + "loss": 0.0101, + "step": 17520 + }, + { + "epoch": 2.67333984375e-05, + "model_forward_time": 0.025279998779296875, + "step": 17520 + }, + { + "epoch": 2.67333984375e-05, + "step": 17520, + "training_step_time": 0.11763215065002441 + }, + { + "epoch": 2.673492431640625e-05, + "model_forward_time": 0.02506279945373535, + "step": 17521 + }, + { + "epoch": 2.673492431640625e-05, + "step": 17521, + "training_step_time": 0.11384463310241699 + }, + { + "epoch": 2.67364501953125e-05, + "model_forward_time": 0.02523350715637207, + "step": 17522 + }, + { + "epoch": 2.67364501953125e-05, + "step": 17522, + "training_step_time": 0.10673141479492188 + }, + { + "epoch": 2.673797607421875e-05, + "model_forward_time": 0.02489018440246582, + "step": 17523 + }, + { + "epoch": 2.673797607421875e-05, + "step": 17523, + "training_step_time": 0.10979938507080078 + }, + { + "epoch": 2.6739501953125e-05, + "model_forward_time": 0.025228023529052734, + "step": 17524 + }, + { + "epoch": 2.6739501953125e-05, + "step": 17524, + "training_step_time": 0.1249995231628418 + }, + { + "epoch": 2.674102783203125e-05, + "model_forward_time": 0.025093555450439453, + "step": 17525 + }, + { + "epoch": 2.674102783203125e-05, + "step": 17525, + "training_step_time": 0.13387060165405273 + }, + { + "epoch": 2.67425537109375e-05, + "model_forward_time": 0.024849653244018555, + "step": 17526 + }, + { + "epoch": 2.67425537109375e-05, + "step": 17526, + "training_step_time": 0.11285543441772461 + }, + { + "epoch": 2.674407958984375e-05, + "model_forward_time": 0.024337053298950195, + "step": 17527 + }, + { + "epoch": 2.674407958984375e-05, + "step": 17527, + "training_step_time": 0.11037778854370117 + }, + { + "epoch": 2.674560546875e-05, + "model_forward_time": 0.024193286895751953, + "step": 17528 + }, + { + "epoch": 2.674560546875e-05, + "step": 17528, + "training_step_time": 0.10362815856933594 + }, + { + "epoch": 2.674713134765625e-05, + "model_forward_time": 0.024202823638916016, + "step": 17529 + }, + { + "epoch": 2.674713134765625e-05, + "step": 17529, + "training_step_time": 0.10526299476623535 + }, + { + "epoch": 2.67486572265625e-05, + "grad_norm": 0.17740324139595032, + "learning_rate": 4.0252241070349304e-05, + "loss": 0.0093, + "step": 17530 + }, + { + "epoch": 2.67486572265625e-05, + "model_forward_time": 0.025275230407714844, + "step": 17530 + }, + { + "epoch": 2.67486572265625e-05, + "step": 17530, + "training_step_time": 0.10555815696716309 + }, + { + "epoch": 2.675018310546875e-05, + "model_forward_time": 0.024858474731445312, + "step": 17531 + }, + { + "epoch": 2.675018310546875e-05, + "step": 17531, + "training_step_time": 0.1067655086517334 + }, + { + "epoch": 2.6751708984375e-05, + "model_forward_time": 0.025211572647094727, + "step": 17532 + }, + { + "epoch": 2.6751708984375e-05, + "step": 17532, + "training_step_time": 0.10576438903808594 + }, + { + "epoch": 2.675323486328125e-05, + "model_forward_time": 0.02503800392150879, + "step": 17533 + }, + { + "epoch": 2.675323486328125e-05, + "step": 17533, + "training_step_time": 0.10522794723510742 + }, + { + "epoch": 2.67547607421875e-05, + "model_forward_time": 0.024678945541381836, + "step": 17534 + }, + { + "epoch": 2.67547607421875e-05, + "step": 17534, + "training_step_time": 0.10391926765441895 + }, + { + "epoch": 2.675628662109375e-05, + "model_forward_time": 0.027218103408813477, + "step": 17535 + }, + { + "epoch": 2.675628662109375e-05, + "step": 17535, + "training_step_time": 0.10737776756286621 + }, + { + "epoch": 2.67578125e-05, + "model_forward_time": 0.025104761123657227, + "step": 17536 + }, + { + "epoch": 2.67578125e-05, + "step": 17536, + "training_step_time": 0.10677170753479004 + }, + { + "epoch": 2.675933837890625e-05, + "model_forward_time": 0.02547287940979004, + "step": 17537 + }, + { + "epoch": 2.675933837890625e-05, + "step": 17537, + "training_step_time": 0.10755491256713867 + }, + { + "epoch": 2.67608642578125e-05, + "model_forward_time": 0.025130033493041992, + "step": 17538 + }, + { + "epoch": 2.67608642578125e-05, + "step": 17538, + "training_step_time": 0.10477066040039062 + }, + { + "epoch": 2.676239013671875e-05, + "model_forward_time": 0.025388479232788086, + "step": 17539 + }, + { + "epoch": 2.676239013671875e-05, + "step": 17539, + "training_step_time": 0.10525965690612793 + }, + { + "epoch": 2.6763916015625e-05, + "grad_norm": 0.21201591193675995, + "learning_rate": 4.019818889382528e-05, + "loss": 0.0169, + "step": 17540 + }, + { + "epoch": 2.6763916015625e-05, + "model_forward_time": 0.02529597282409668, + "step": 17540 + }, + { + "epoch": 2.6763916015625e-05, + "step": 17540, + "training_step_time": 0.1049807071685791 + }, + { + "epoch": 2.676544189453125e-05, + "model_forward_time": 0.025216341018676758, + "step": 17541 + }, + { + "epoch": 2.676544189453125e-05, + "step": 17541, + "training_step_time": 0.10723400115966797 + }, + { + "epoch": 2.67669677734375e-05, + "model_forward_time": 0.02614307403564453, + "step": 17542 + }, + { + "epoch": 2.67669677734375e-05, + "step": 17542, + "training_step_time": 0.10492801666259766 + }, + { + "epoch": 2.676849365234375e-05, + "model_forward_time": 0.025014638900756836, + "step": 17543 + }, + { + "epoch": 2.676849365234375e-05, + "step": 17543, + "training_step_time": 0.1089332103729248 + }, + { + "epoch": 2.677001953125e-05, + "model_forward_time": 0.025524616241455078, + "step": 17544 + }, + { + "epoch": 2.677001953125e-05, + "step": 17544, + "training_step_time": 0.10503506660461426 + }, + { + "epoch": 2.677154541015625e-05, + "model_forward_time": 0.025252342224121094, + "step": 17545 + }, + { + "epoch": 2.677154541015625e-05, + "step": 17545, + "training_step_time": 0.10491299629211426 + }, + { + "epoch": 2.67730712890625e-05, + "model_forward_time": 0.025172948837280273, + "step": 17546 + }, + { + "epoch": 2.67730712890625e-05, + "step": 17546, + "training_step_time": 0.10589790344238281 + }, + { + "epoch": 2.677459716796875e-05, + "model_forward_time": 0.02561783790588379, + "step": 17547 + }, + { + "epoch": 2.677459716796875e-05, + "step": 17547, + "training_step_time": 0.10440325736999512 + }, + { + "epoch": 2.6776123046875e-05, + "model_forward_time": 0.02538609504699707, + "step": 17548 + }, + { + "epoch": 2.6776123046875e-05, + "step": 17548, + "training_step_time": 0.16729021072387695 + }, + { + "epoch": 2.677764892578125e-05, + "model_forward_time": 0.024501323699951172, + "step": 17549 + }, + { + "epoch": 2.677764892578125e-05, + "step": 17549, + "training_step_time": 0.10737895965576172 + }, + { + "epoch": 2.67791748046875e-05, + "grad_norm": 0.24493327736854553, + "learning_rate": 4.0144148627425993e-05, + "loss": 0.0097, + "step": 17550 + }, + { + "epoch": 2.67791748046875e-05, + "model_forward_time": 0.025326013565063477, + "step": 17550 + }, + { + "epoch": 2.67791748046875e-05, + "step": 17550, + "training_step_time": 0.10544729232788086 + }, + { + "epoch": 2.678070068359375e-05, + "model_forward_time": 0.024802207946777344, + "step": 17551 + }, + { + "epoch": 2.678070068359375e-05, + "step": 17551, + "training_step_time": 0.17994952201843262 + }, + { + "epoch": 2.67822265625e-05, + "model_forward_time": 0.02500462532043457, + "step": 17552 + }, + { + "epoch": 2.67822265625e-05, + "step": 17552, + "training_step_time": 0.1630702018737793 + }, + { + "epoch": 2.678375244140625e-05, + "model_forward_time": 0.024752140045166016, + "step": 17553 + }, + { + "epoch": 2.678375244140625e-05, + "step": 17553, + "training_step_time": 0.20124530792236328 + }, + { + "epoch": 2.67852783203125e-05, + "model_forward_time": 0.024921178817749023, + "step": 17554 + }, + { + "epoch": 2.67852783203125e-05, + "step": 17554, + "training_step_time": 0.12817096710205078 + }, + { + "epoch": 2.678680419921875e-05, + "model_forward_time": 0.02462625503540039, + "step": 17555 + }, + { + "epoch": 2.678680419921875e-05, + "step": 17555, + "training_step_time": 0.12283825874328613 + }, + { + "epoch": 2.6788330078125e-05, + "model_forward_time": 0.02425384521484375, + "step": 17556 + }, + { + "epoch": 2.6788330078125e-05, + "step": 17556, + "training_step_time": 0.2152097225189209 + }, + { + "epoch": 2.678985595703125e-05, + "model_forward_time": 0.02482128143310547, + "step": 17557 + }, + { + "epoch": 2.678985595703125e-05, + "step": 17557, + "training_step_time": 0.12040305137634277 + }, + { + "epoch": 2.67913818359375e-05, + "model_forward_time": 0.02370452880859375, + "step": 17558 + }, + { + "epoch": 2.67913818359375e-05, + "step": 17558, + "training_step_time": 0.11918973922729492 + }, + { + "epoch": 2.679290771484375e-05, + "model_forward_time": 0.02402472496032715, + "step": 17559 + }, + { + "epoch": 2.679290771484375e-05, + "step": 17559, + "training_step_time": 0.11201024055480957 + }, + { + "epoch": 2.679443359375e-05, + "grad_norm": 0.31486862897872925, + "learning_rate": 4.0090120336815474e-05, + "loss": 0.0214, + "step": 17560 + }, + { + "epoch": 2.679443359375e-05, + "model_forward_time": 0.024424314498901367, + "step": 17560 + }, + { + "epoch": 2.679443359375e-05, + "step": 17560, + "training_step_time": 0.10986781120300293 + }, + { + "epoch": 2.679595947265625e-05, + "model_forward_time": 0.024964332580566406, + "step": 17561 + }, + { + "epoch": 2.679595947265625e-05, + "step": 17561, + "training_step_time": 0.10563397407531738 + }, + { + "epoch": 2.67974853515625e-05, + "model_forward_time": 0.025179386138916016, + "step": 17562 + }, + { + "epoch": 2.67974853515625e-05, + "step": 17562, + "training_step_time": 0.10660910606384277 + }, + { + "epoch": 2.679901123046875e-05, + "model_forward_time": 0.024835586547851562, + "step": 17563 + }, + { + "epoch": 2.679901123046875e-05, + "step": 17563, + "training_step_time": 0.1060636043548584 + }, + { + "epoch": 2.6800537109375e-05, + "model_forward_time": 0.024786949157714844, + "step": 17564 + }, + { + "epoch": 2.6800537109375e-05, + "step": 17564, + "training_step_time": 0.10484719276428223 + }, + { + "epoch": 2.680206298828125e-05, + "model_forward_time": 0.024945735931396484, + "step": 17565 + }, + { + "epoch": 2.680206298828125e-05, + "step": 17565, + "training_step_time": 0.11204814910888672 + }, + { + "epoch": 2.68035888671875e-05, + "model_forward_time": 0.025630712509155273, + "step": 17566 + }, + { + "epoch": 2.68035888671875e-05, + "step": 17566, + "training_step_time": 0.10729765892028809 + }, + { + "epoch": 2.680511474609375e-05, + "model_forward_time": 0.026267290115356445, + "step": 17567 + }, + { + "epoch": 2.680511474609375e-05, + "step": 17567, + "training_step_time": 0.10755467414855957 + }, + { + "epoch": 2.6806640625e-05, + "model_forward_time": 0.02503061294555664, + "step": 17568 + }, + { + "epoch": 2.6806640625e-05, + "step": 17568, + "training_step_time": 0.13031625747680664 + }, + { + "epoch": 2.680816650390625e-05, + "model_forward_time": 0.025171518325805664, + "step": 17569 + }, + { + "epoch": 2.680816650390625e-05, + "step": 17569, + "training_step_time": 0.1303567886352539 + }, + { + "epoch": 2.68096923828125e-05, + "grad_norm": 0.24160060286521912, + "learning_rate": 4.003610408764317e-05, + "loss": 0.0177, + "step": 17570 + }, + { + "epoch": 2.68096923828125e-05, + "model_forward_time": 0.024194955825805664, + "step": 17570 + }, + { + "epoch": 2.68096923828125e-05, + "step": 17570, + "training_step_time": 0.10577630996704102 + }, + { + "epoch": 2.681121826171875e-05, + "model_forward_time": 0.02499222755432129, + "step": 17571 + }, + { + "epoch": 2.681121826171875e-05, + "step": 17571, + "training_step_time": 0.1332862377166748 + }, + { + "epoch": 2.6812744140625e-05, + "model_forward_time": 0.02595996856689453, + "step": 17572 + }, + { + "epoch": 2.6812744140625e-05, + "step": 17572, + "training_step_time": 0.11815023422241211 + }, + { + "epoch": 2.681427001953125e-05, + "model_forward_time": 0.025684118270874023, + "step": 17573 + }, + { + "epoch": 2.681427001953125e-05, + "step": 17573, + "training_step_time": 0.1052243709564209 + }, + { + "epoch": 2.68157958984375e-05, + "model_forward_time": 0.02522444725036621, + "step": 17574 + }, + { + "epoch": 2.68157958984375e-05, + "step": 17574, + "training_step_time": 0.12038278579711914 + }, + { + "epoch": 2.681732177734375e-05, + "model_forward_time": 0.025427579879760742, + "step": 17575 + }, + { + "epoch": 2.681732177734375e-05, + "step": 17575, + "training_step_time": 0.1071779727935791 + }, + { + "epoch": 2.681884765625e-05, + "model_forward_time": 0.024938106536865234, + "step": 17576 + }, + { + "epoch": 2.681884765625e-05, + "step": 17576, + "training_step_time": 0.10592961311340332 + }, + { + "epoch": 2.682037353515625e-05, + "model_forward_time": 0.02560138702392578, + "step": 17577 + }, + { + "epoch": 2.682037353515625e-05, + "step": 17577, + "training_step_time": 0.10559773445129395 + }, + { + "epoch": 2.68218994140625e-05, + "model_forward_time": 0.02500319480895996, + "step": 17578 + }, + { + "epoch": 2.68218994140625e-05, + "step": 17578, + "training_step_time": 0.10771727561950684 + }, + { + "epoch": 2.682342529296875e-05, + "model_forward_time": 0.024972200393676758, + "step": 17579 + }, + { + "epoch": 2.682342529296875e-05, + "step": 17579, + "training_step_time": 0.10482573509216309 + }, + { + "epoch": 2.6824951171875e-05, + "grad_norm": 0.4323936700820923, + "learning_rate": 3.9982099945543945e-05, + "loss": 0.0127, + "step": 17580 + }, + { + "epoch": 2.6824951171875e-05, + "model_forward_time": 0.025233745574951172, + "step": 17580 + }, + { + "epoch": 2.6824951171875e-05, + "step": 17580, + "training_step_time": 0.10559391975402832 + }, + { + "epoch": 2.682647705078125e-05, + "model_forward_time": 0.025292396545410156, + "step": 17581 + }, + { + "epoch": 2.682647705078125e-05, + "step": 17581, + "training_step_time": 0.11092042922973633 + }, + { + "epoch": 2.68280029296875e-05, + "model_forward_time": 0.025029659271240234, + "step": 17582 + }, + { + "epoch": 2.68280029296875e-05, + "step": 17582, + "training_step_time": 0.11712217330932617 + }, + { + "epoch": 2.682952880859375e-05, + "model_forward_time": 0.025607824325561523, + "step": 17583 + }, + { + "epoch": 2.682952880859375e-05, + "step": 17583, + "training_step_time": 0.13639211654663086 + }, + { + "epoch": 2.68310546875e-05, + "model_forward_time": 0.0238802433013916, + "step": 17584 + }, + { + "epoch": 2.68310546875e-05, + "step": 17584, + "training_step_time": 0.13219332695007324 + }, + { + "epoch": 2.683258056640625e-05, + "model_forward_time": 0.0234832763671875, + "step": 17585 + }, + { + "epoch": 2.683258056640625e-05, + "step": 17585, + "training_step_time": 0.12552380561828613 + }, + { + "epoch": 2.68341064453125e-05, + "model_forward_time": 0.023898601531982422, + "step": 17586 + }, + { + "epoch": 2.68341064453125e-05, + "step": 17586, + "training_step_time": 0.11919856071472168 + }, + { + "epoch": 2.683563232421875e-05, + "model_forward_time": 0.025143861770629883, + "step": 17587 + }, + { + "epoch": 2.683563232421875e-05, + "step": 17587, + "training_step_time": 0.11458563804626465 + }, + { + "epoch": 2.6837158203125e-05, + "model_forward_time": 0.025059938430786133, + "step": 17588 + }, + { + "epoch": 2.6837158203125e-05, + "step": 17588, + "training_step_time": 0.11882805824279785 + }, + { + "epoch": 2.683868408203125e-05, + "model_forward_time": 0.028535842895507812, + "step": 17589 + }, + { + "epoch": 2.683868408203125e-05, + "step": 17589, + "training_step_time": 0.11333632469177246 + }, + { + "epoch": 2.68402099609375e-05, + "grad_norm": 0.16022445261478424, + "learning_rate": 3.9928107976137906e-05, + "loss": 0.0159, + "step": 17590 + }, + { + "epoch": 2.68402099609375e-05, + "model_forward_time": 0.02550792694091797, + "step": 17590 + }, + { + "epoch": 2.68402099609375e-05, + "step": 17590, + "training_step_time": 0.11345887184143066 + }, + { + "epoch": 2.684173583984375e-05, + "model_forward_time": 0.025355100631713867, + "step": 17591 + }, + { + "epoch": 2.684173583984375e-05, + "step": 17591, + "training_step_time": 0.10816287994384766 + }, + { + "epoch": 2.684326171875e-05, + "model_forward_time": 0.02538919448852539, + "step": 17592 + }, + { + "epoch": 2.684326171875e-05, + "step": 17592, + "training_step_time": 0.10787701606750488 + }, + { + "epoch": 2.684478759765625e-05, + "model_forward_time": 0.024888038635253906, + "step": 17593 + }, + { + "epoch": 2.684478759765625e-05, + "step": 17593, + "training_step_time": 0.10723018646240234 + }, + { + "epoch": 2.68463134765625e-05, + "model_forward_time": 0.025054216384887695, + "step": 17594 + }, + { + "epoch": 2.68463134765625e-05, + "step": 17594, + "training_step_time": 0.10564684867858887 + }, + { + "epoch": 2.684783935546875e-05, + "model_forward_time": 0.025013208389282227, + "step": 17595 + }, + { + "epoch": 2.684783935546875e-05, + "step": 17595, + "training_step_time": 0.13254308700561523 + }, + { + "epoch": 2.6849365234375e-05, + "model_forward_time": 0.025025129318237305, + "step": 17596 + }, + { + "epoch": 2.6849365234375e-05, + "step": 17596, + "training_step_time": 0.10689663887023926 + }, + { + "epoch": 2.685089111328125e-05, + "model_forward_time": 0.02489495277404785, + "step": 17597 + }, + { + "epoch": 2.685089111328125e-05, + "step": 17597, + "training_step_time": 0.17582941055297852 + }, + { + "epoch": 2.68524169921875e-05, + "model_forward_time": 0.02502274513244629, + "step": 17598 + }, + { + "epoch": 2.68524169921875e-05, + "step": 17598, + "training_step_time": 0.1739051342010498 + }, + { + "epoch": 2.685394287109375e-05, + "model_forward_time": 0.024285078048706055, + "step": 17599 + }, + { + "epoch": 2.685394287109375e-05, + "step": 17599, + "training_step_time": 0.13638877868652344 + }, + { + "epoch": 2.685546875e-05, + "grad_norm": 0.11669151484966278, + "learning_rate": 3.9874128245030404e-05, + "loss": 0.0214, + "step": 17600 + }, + { + "epoch": 2.685546875e-05, + "model_forward_time": 0.024765968322753906, + "step": 17600 + }, + { + "epoch": 2.685546875e-05, + "step": 17600, + "training_step_time": 0.17300677299499512 + }, + { + "epoch": 2.685699462890625e-05, + "model_forward_time": 0.024624347686767578, + "step": 17601 + }, + { + "epoch": 2.685699462890625e-05, + "step": 17601, + "training_step_time": 0.15225958824157715 + }, + { + "epoch": 2.68585205078125e-05, + "model_forward_time": 0.023837566375732422, + "step": 17602 + }, + { + "epoch": 2.68585205078125e-05, + "step": 17602, + "training_step_time": 0.13386917114257812 + }, + { + "epoch": 2.686004638671875e-05, + "model_forward_time": 0.024768829345703125, + "step": 17603 + }, + { + "epoch": 2.686004638671875e-05, + "step": 17603, + "training_step_time": 0.13197803497314453 + }, + { + "epoch": 2.6861572265625e-05, + "model_forward_time": 0.024423599243164062, + "step": 17604 + }, + { + "epoch": 2.6861572265625e-05, + "step": 17604, + "training_step_time": 0.10570359230041504 + }, + { + "epoch": 2.686309814453125e-05, + "model_forward_time": 0.025017976760864258, + "step": 17605 + }, + { + "epoch": 2.686309814453125e-05, + "step": 17605, + "training_step_time": 0.12499380111694336 + }, + { + "epoch": 2.68646240234375e-05, + "model_forward_time": 0.02516913414001465, + "step": 17606 + }, + { + "epoch": 2.68646240234375e-05, + "step": 17606, + "training_step_time": 0.10401558876037598 + }, + { + "epoch": 2.686614990234375e-05, + "model_forward_time": 0.025025129318237305, + "step": 17607 + }, + { + "epoch": 2.686614990234375e-05, + "step": 17607, + "training_step_time": 0.1042790412902832 + }, + { + "epoch": 2.686767578125e-05, + "model_forward_time": 0.025037288665771484, + "step": 17608 + }, + { + "epoch": 2.686767578125e-05, + "step": 17608, + "training_step_time": 0.1080327033996582 + }, + { + "epoch": 2.686920166015625e-05, + "model_forward_time": 0.02513432502746582, + "step": 17609 + }, + { + "epoch": 2.686920166015625e-05, + "step": 17609, + "training_step_time": 0.10373783111572266 + }, + { + "epoch": 2.68707275390625e-05, + "grad_norm": 0.3164677321910858, + "learning_rate": 3.982016081781189e-05, + "loss": 0.016, + "step": 17610 + }, + { + "epoch": 2.68707275390625e-05, + "model_forward_time": 0.02566242218017578, + "step": 17610 + }, + { + "epoch": 2.68707275390625e-05, + "step": 17610, + "training_step_time": 0.10534548759460449 + }, + { + "epoch": 2.687225341796875e-05, + "model_forward_time": 0.026174545288085938, + "step": 17611 + }, + { + "epoch": 2.687225341796875e-05, + "step": 17611, + "training_step_time": 0.1085824966430664 + }, + { + "epoch": 2.6873779296875e-05, + "model_forward_time": 0.0253293514251709, + "step": 17612 + }, + { + "epoch": 2.6873779296875e-05, + "step": 17612, + "training_step_time": 0.10844278335571289 + }, + { + "epoch": 2.687530517578125e-05, + "model_forward_time": 0.025023937225341797, + "step": 17613 + }, + { + "epoch": 2.687530517578125e-05, + "step": 17613, + "training_step_time": 0.1492776870727539 + }, + { + "epoch": 2.68768310546875e-05, + "model_forward_time": 0.024797439575195312, + "step": 17614 + }, + { + "epoch": 2.68768310546875e-05, + "step": 17614, + "training_step_time": 0.1757051944732666 + }, + { + "epoch": 2.687835693359375e-05, + "model_forward_time": 0.02590465545654297, + "step": 17615 + }, + { + "epoch": 2.687835693359375e-05, + "step": 17615, + "training_step_time": 0.20859956741333008 + }, + { + "epoch": 2.68798828125e-05, + "model_forward_time": 0.024591922760009766, + "step": 17616 + }, + { + "epoch": 2.68798828125e-05, + "step": 17616, + "training_step_time": 0.15752792358398438 + }, + { + "epoch": 2.688140869140625e-05, + "model_forward_time": 0.024396419525146484, + "step": 17617 + }, + { + "epoch": 2.688140869140625e-05, + "step": 17617, + "training_step_time": 0.19089508056640625 + }, + { + "epoch": 2.68829345703125e-05, + "model_forward_time": 0.02430891990661621, + "step": 17618 + }, + { + "epoch": 2.68829345703125e-05, + "step": 17618, + "training_step_time": 0.14585423469543457 + }, + { + "epoch": 2.688446044921875e-05, + "model_forward_time": 0.025299787521362305, + "step": 17619 + }, + { + "epoch": 2.688446044921875e-05, + "step": 17619, + "training_step_time": 0.17435526847839355 + }, + { + "epoch": 2.6885986328125e-05, + "grad_norm": 0.3168177604675293, + "learning_rate": 3.976620576005786e-05, + "loss": 0.0117, + "step": 17620 + }, + { + "epoch": 2.6885986328125e-05, + "model_forward_time": 0.024061203002929688, + "step": 17620 + }, + { + "epoch": 2.6885986328125e-05, + "step": 17620, + "training_step_time": 0.12474346160888672 + }, + { + "epoch": 2.688751220703125e-05, + "model_forward_time": 0.02381610870361328, + "step": 17621 + }, + { + "epoch": 2.688751220703125e-05, + "step": 17621, + "training_step_time": 0.12219595909118652 + }, + { + "epoch": 2.68890380859375e-05, + "model_forward_time": 0.024809837341308594, + "step": 17622 + }, + { + "epoch": 2.68890380859375e-05, + "step": 17622, + "training_step_time": 0.11995744705200195 + }, + { + "epoch": 2.689056396484375e-05, + "model_forward_time": 0.02580857276916504, + "step": 17623 + }, + { + "epoch": 2.689056396484375e-05, + "step": 17623, + "training_step_time": 0.11118197441101074 + }, + { + "epoch": 2.689208984375e-05, + "model_forward_time": 0.025321006774902344, + "step": 17624 + }, + { + "epoch": 2.689208984375e-05, + "step": 17624, + "training_step_time": 0.11187386512756348 + }, + { + "epoch": 2.689361572265625e-05, + "model_forward_time": 0.02521681785583496, + "step": 17625 + }, + { + "epoch": 2.689361572265625e-05, + "step": 17625, + "training_step_time": 0.11209273338317871 + }, + { + "epoch": 2.68951416015625e-05, + "model_forward_time": 0.025374650955200195, + "step": 17626 + }, + { + "epoch": 2.68951416015625e-05, + "step": 17626, + "training_step_time": 0.10788846015930176 + }, + { + "epoch": 2.689666748046875e-05, + "model_forward_time": 0.025298118591308594, + "step": 17627 + }, + { + "epoch": 2.689666748046875e-05, + "step": 17627, + "training_step_time": 0.10836362838745117 + }, + { + "epoch": 2.6898193359375e-05, + "model_forward_time": 0.024960041046142578, + "step": 17628 + }, + { + "epoch": 2.6898193359375e-05, + "step": 17628, + "training_step_time": 0.10666871070861816 + }, + { + "epoch": 2.689971923828125e-05, + "model_forward_time": 0.024715185165405273, + "step": 17629 + }, + { + "epoch": 2.689971923828125e-05, + "step": 17629, + "training_step_time": 0.10511326789855957 + }, + { + "epoch": 2.69012451171875e-05, + "grad_norm": 0.24846415221691132, + "learning_rate": 3.9712263137328836e-05, + "loss": 0.0076, + "step": 17630 + }, + { + "epoch": 2.69012451171875e-05, + "model_forward_time": 0.024354219436645508, + "step": 17630 + }, + { + "epoch": 2.69012451171875e-05, + "step": 17630, + "training_step_time": 0.1078329086303711 + }, + { + "epoch": 2.690277099609375e-05, + "model_forward_time": 0.02511739730834961, + "step": 17631 + }, + { + "epoch": 2.690277099609375e-05, + "step": 17631, + "training_step_time": 0.10584855079650879 + }, + { + "epoch": 2.6904296875e-05, + "model_forward_time": 0.02543473243713379, + "step": 17632 + }, + { + "epoch": 2.6904296875e-05, + "step": 17632, + "training_step_time": 0.11030459403991699 + }, + { + "epoch": 2.690582275390625e-05, + "model_forward_time": 0.024874448776245117, + "step": 17633 + }, + { + "epoch": 2.690582275390625e-05, + "step": 17633, + "training_step_time": 0.11121892929077148 + }, + { + "epoch": 2.69073486328125e-05, + "model_forward_time": 0.02493429183959961, + "step": 17634 + }, + { + "epoch": 2.69073486328125e-05, + "step": 17634, + "training_step_time": 0.11022543907165527 + }, + { + "epoch": 2.690887451171875e-05, + "model_forward_time": 0.024924039840698242, + "step": 17635 + }, + { + "epoch": 2.690887451171875e-05, + "step": 17635, + "training_step_time": 0.10805249214172363 + }, + { + "epoch": 2.6910400390625e-05, + "model_forward_time": 0.024998188018798828, + "step": 17636 + }, + { + "epoch": 2.6910400390625e-05, + "step": 17636, + "training_step_time": 0.10616612434387207 + }, + { + "epoch": 2.691192626953125e-05, + "model_forward_time": 0.024995803833007812, + "step": 17637 + }, + { + "epoch": 2.691192626953125e-05, + "step": 17637, + "training_step_time": 0.10666322708129883 + }, + { + "epoch": 2.69134521484375e-05, + "model_forward_time": 0.02477860450744629, + "step": 17638 + }, + { + "epoch": 2.69134521484375e-05, + "step": 17638, + "training_step_time": 0.1084434986114502 + }, + { + "epoch": 2.691497802734375e-05, + "model_forward_time": 0.02492380142211914, + "step": 17639 + }, + { + "epoch": 2.691497802734375e-05, + "step": 17639, + "training_step_time": 0.17279887199401855 + }, + { + "epoch": 2.691650390625e-05, + "grad_norm": 0.42609038949012756, + "learning_rate": 3.965833301517017e-05, + "loss": 0.0119, + "step": 17640 + }, + { + "epoch": 2.691650390625e-05, + "model_forward_time": 0.024018526077270508, + "step": 17640 + }, + { + "epoch": 2.691650390625e-05, + "step": 17640, + "training_step_time": 0.10555362701416016 + }, + { + "epoch": 2.691802978515625e-05, + "model_forward_time": 0.024334430694580078, + "step": 17641 + }, + { + "epoch": 2.691802978515625e-05, + "step": 17641, + "training_step_time": 0.1655597686767578 + }, + { + "epoch": 2.69195556640625e-05, + "model_forward_time": 0.024773836135864258, + "step": 17642 + }, + { + "epoch": 2.69195556640625e-05, + "step": 17642, + "training_step_time": 0.15639066696166992 + }, + { + "epoch": 2.692108154296875e-05, + "model_forward_time": 0.02418041229248047, + "step": 17643 + }, + { + "epoch": 2.692108154296875e-05, + "step": 17643, + "training_step_time": 0.1178121566772461 + }, + { + "epoch": 2.6922607421875e-05, + "model_forward_time": 0.02393507957458496, + "step": 17644 + }, + { + "epoch": 2.6922607421875e-05, + "step": 17644, + "training_step_time": 0.16220307350158691 + }, + { + "epoch": 2.692413330078125e-05, + "model_forward_time": 0.02425241470336914, + "step": 17645 + }, + { + "epoch": 2.692413330078125e-05, + "step": 17645, + "training_step_time": 0.19728708267211914 + }, + { + "epoch": 2.69256591796875e-05, + "model_forward_time": 0.02452826499938965, + "step": 17646 + }, + { + "epoch": 2.69256591796875e-05, + "step": 17646, + "training_step_time": 0.15880775451660156 + }, + { + "epoch": 2.692718505859375e-05, + "model_forward_time": 0.0243685245513916, + "step": 17647 + }, + { + "epoch": 2.692718505859375e-05, + "step": 17647, + "training_step_time": 0.11892008781433105 + }, + { + "epoch": 2.69287109375e-05, + "model_forward_time": 0.024597644805908203, + "step": 17648 + }, + { + "epoch": 2.69287109375e-05, + "step": 17648, + "training_step_time": 0.11197018623352051 + }, + { + "epoch": 2.693023681640625e-05, + "model_forward_time": 0.025578737258911133, + "step": 17649 + }, + { + "epoch": 2.693023681640625e-05, + "step": 17649, + "training_step_time": 0.1174924373626709 + }, + { + "epoch": 2.69317626953125e-05, + "grad_norm": 0.19126154482364655, + "learning_rate": 3.960441545911204e-05, + "loss": 0.01, + "step": 17650 + }, + { + "epoch": 2.69317626953125e-05, + "model_forward_time": 0.024829387664794922, + "step": 17650 + }, + { + "epoch": 2.69317626953125e-05, + "step": 17650, + "training_step_time": 0.10478663444519043 + }, + { + "epoch": 2.693328857421875e-05, + "model_forward_time": 0.024965763092041016, + "step": 17651 + }, + { + "epoch": 2.693328857421875e-05, + "step": 17651, + "training_step_time": 0.10884952545166016 + }, + { + "epoch": 2.6934814453125e-05, + "model_forward_time": 0.024044275283813477, + "step": 17652 + }, + { + "epoch": 2.6934814453125e-05, + "step": 17652, + "training_step_time": 0.10444879531860352 + }, + { + "epoch": 2.693634033203125e-05, + "model_forward_time": 0.024767160415649414, + "step": 17653 + }, + { + "epoch": 2.693634033203125e-05, + "step": 17653, + "training_step_time": 0.10766768455505371 + }, + { + "epoch": 2.69378662109375e-05, + "model_forward_time": 0.025311708450317383, + "step": 17654 + }, + { + "epoch": 2.69378662109375e-05, + "step": 17654, + "training_step_time": 0.10631537437438965 + }, + { + "epoch": 2.693939208984375e-05, + "model_forward_time": 0.024583101272583008, + "step": 17655 + }, + { + "epoch": 2.693939208984375e-05, + "step": 17655, + "training_step_time": 0.10692811012268066 + }, + { + "epoch": 2.694091796875e-05, + "model_forward_time": 0.025423526763916016, + "step": 17656 + }, + { + "epoch": 2.694091796875e-05, + "step": 17656, + "training_step_time": 0.10620355606079102 + }, + { + "epoch": 2.694244384765625e-05, + "model_forward_time": 0.02510690689086914, + "step": 17657 + }, + { + "epoch": 2.694244384765625e-05, + "step": 17657, + "training_step_time": 0.10471224784851074 + }, + { + "epoch": 2.69439697265625e-05, + "model_forward_time": 0.0250704288482666, + "step": 17658 + }, + { + "epoch": 2.69439697265625e-05, + "step": 17658, + "training_step_time": 0.10493111610412598 + }, + { + "epoch": 2.694549560546875e-05, + "model_forward_time": 0.02513742446899414, + "step": 17659 + }, + { + "epoch": 2.694549560546875e-05, + "step": 17659, + "training_step_time": 0.10521602630615234 + }, + { + "epoch": 2.6947021484375e-05, + "grad_norm": 0.34858188033103943, + "learning_rate": 3.955051053466937e-05, + "loss": 0.0183, + "step": 17660 + }, + { + "epoch": 2.6947021484375e-05, + "model_forward_time": 0.024333715438842773, + "step": 17660 + }, + { + "epoch": 2.6947021484375e-05, + "step": 17660, + "training_step_time": 0.15219402313232422 + }, + { + "epoch": 2.694854736328125e-05, + "model_forward_time": 0.02514195442199707, + "step": 17661 + }, + { + "epoch": 2.694854736328125e-05, + "step": 17661, + "training_step_time": 0.11740517616271973 + }, + { + "epoch": 2.69500732421875e-05, + "model_forward_time": 0.025423526763916016, + "step": 17662 + }, + { + "epoch": 2.69500732421875e-05, + "step": 17662, + "training_step_time": 0.11199712753295898 + }, + { + "epoch": 2.695159912109375e-05, + "model_forward_time": 0.02520585060119629, + "step": 17663 + }, + { + "epoch": 2.695159912109375e-05, + "step": 17663, + "training_step_time": 0.11115360260009766 + }, + { + "epoch": 2.6953125e-05, + "model_forward_time": 0.025011301040649414, + "step": 17664 + }, + { + "epoch": 2.6953125e-05, + "step": 17664, + "training_step_time": 0.12707781791687012 + }, + { + "epoch": 2.695465087890625e-05, + "model_forward_time": 0.0248873233795166, + "step": 17665 + }, + { + "epoch": 2.695465087890625e-05, + "step": 17665, + "training_step_time": 0.1087038516998291 + }, + { + "epoch": 2.69561767578125e-05, + "model_forward_time": 0.02496504783630371, + "step": 17666 + }, + { + "epoch": 2.69561767578125e-05, + "step": 17666, + "training_step_time": 0.1276235580444336 + }, + { + "epoch": 2.695770263671875e-05, + "model_forward_time": 0.024932146072387695, + "step": 17667 + }, + { + "epoch": 2.695770263671875e-05, + "step": 17667, + "training_step_time": 0.129652738571167 + }, + { + "epoch": 2.6959228515625e-05, + "model_forward_time": 0.024729013442993164, + "step": 17668 + }, + { + "epoch": 2.6959228515625e-05, + "step": 17668, + "training_step_time": 0.10419940948486328 + }, + { + "epoch": 2.696075439453125e-05, + "model_forward_time": 0.02474212646484375, + "step": 17669 + }, + { + "epoch": 2.696075439453125e-05, + "step": 17669, + "training_step_time": 0.10564637184143066 + }, + { + "epoch": 2.69622802734375e-05, + "grad_norm": 0.2636212408542633, + "learning_rate": 3.949661830734172e-05, + "loss": 0.011, + "step": 17670 + }, + { + "epoch": 2.69622802734375e-05, + "model_forward_time": 0.02762746810913086, + "step": 17670 + }, + { + "epoch": 2.69622802734375e-05, + "step": 17670, + "training_step_time": 0.10918784141540527 + }, + { + "epoch": 2.696380615234375e-05, + "model_forward_time": 0.025259971618652344, + "step": 17671 + }, + { + "epoch": 2.696380615234375e-05, + "step": 17671, + "training_step_time": 0.10972213745117188 + }, + { + "epoch": 2.696533203125e-05, + "model_forward_time": 0.025129079818725586, + "step": 17672 + }, + { + "epoch": 2.696533203125e-05, + "step": 17672, + "training_step_time": 0.11216926574707031 + }, + { + "epoch": 2.696685791015625e-05, + "model_forward_time": 0.025035858154296875, + "step": 17673 + }, + { + "epoch": 2.696685791015625e-05, + "step": 17673, + "training_step_time": 0.10884857177734375 + }, + { + "epoch": 2.69683837890625e-05, + "model_forward_time": 0.02518630027770996, + "step": 17674 + }, + { + "epoch": 2.69683837890625e-05, + "step": 17674, + "training_step_time": 0.10832715034484863 + }, + { + "epoch": 2.696990966796875e-05, + "model_forward_time": 0.02406167984008789, + "step": 17675 + }, + { + "epoch": 2.696990966796875e-05, + "step": 17675, + "training_step_time": 0.1084146499633789 + }, + { + "epoch": 2.6971435546875e-05, + "model_forward_time": 0.02490520477294922, + "step": 17676 + }, + { + "epoch": 2.6971435546875e-05, + "step": 17676, + "training_step_time": 0.11028790473937988 + }, + { + "epoch": 2.697296142578125e-05, + "model_forward_time": 0.02487778663635254, + "step": 17677 + }, + { + "epoch": 2.697296142578125e-05, + "step": 17677, + "training_step_time": 0.10698342323303223 + }, + { + "epoch": 2.69744873046875e-05, + "model_forward_time": 0.024646759033203125, + "step": 17678 + }, + { + "epoch": 2.69744873046875e-05, + "step": 17678, + "training_step_time": 0.1171114444732666 + }, + { + "epoch": 2.697601318359375e-05, + "model_forward_time": 0.02503347396850586, + "step": 17679 + }, + { + "epoch": 2.697601318359375e-05, + "step": 17679, + "training_step_time": 0.10998749732971191 + }, + { + "epoch": 2.69775390625e-05, + "grad_norm": 0.3377482295036316, + "learning_rate": 3.944273884261322e-05, + "loss": 0.0127, + "step": 17680 + }, + { + "epoch": 2.69775390625e-05, + "model_forward_time": 0.02502274513244629, + "step": 17680 + }, + { + "epoch": 2.69775390625e-05, + "step": 17680, + "training_step_time": 0.10909867286682129 + }, + { + "epoch": 2.697906494140625e-05, + "model_forward_time": 0.02521204948425293, + "step": 17681 + }, + { + "epoch": 2.697906494140625e-05, + "step": 17681, + "training_step_time": 0.10574984550476074 + }, + { + "epoch": 2.69805908203125e-05, + "model_forward_time": 0.024303913116455078, + "step": 17682 + }, + { + "epoch": 2.69805908203125e-05, + "step": 17682, + "training_step_time": 0.10669422149658203 + }, + { + "epoch": 2.698211669921875e-05, + "model_forward_time": 0.02496647834777832, + "step": 17683 + }, + { + "epoch": 2.698211669921875e-05, + "step": 17683, + "training_step_time": 0.10692262649536133 + }, + { + "epoch": 2.6983642578125e-05, + "model_forward_time": 0.025257587432861328, + "step": 17684 + }, + { + "epoch": 2.6983642578125e-05, + "step": 17684, + "training_step_time": 0.10901212692260742 + }, + { + "epoch": 2.698516845703125e-05, + "model_forward_time": 0.02487325668334961, + "step": 17685 + }, + { + "epoch": 2.698516845703125e-05, + "step": 17685, + "training_step_time": 0.15722346305847168 + }, + { + "epoch": 2.69866943359375e-05, + "model_forward_time": 0.024527311325073242, + "step": 17686 + }, + { + "epoch": 2.69866943359375e-05, + "step": 17686, + "training_step_time": 0.2013704776763916 + }, + { + "epoch": 2.698822021484375e-05, + "model_forward_time": 0.025080204010009766, + "step": 17687 + }, + { + "epoch": 2.698822021484375e-05, + "step": 17687, + "training_step_time": 0.10350990295410156 + }, + { + "epoch": 2.698974609375e-05, + "model_forward_time": 0.02432560920715332, + "step": 17688 + }, + { + "epoch": 2.698974609375e-05, + "step": 17688, + "training_step_time": 0.13787198066711426 + }, + { + "epoch": 2.699127197265625e-05, + "model_forward_time": 0.02454090118408203, + "step": 17689 + }, + { + "epoch": 2.699127197265625e-05, + "step": 17689, + "training_step_time": 0.1922752857208252 + }, + { + "epoch": 2.69927978515625e-05, + "grad_norm": 0.2020740509033203, + "learning_rate": 3.9388872205952526e-05, + "loss": 0.0094, + "step": 17690 + }, + { + "epoch": 2.69927978515625e-05, + "model_forward_time": 0.024262189865112305, + "step": 17690 + }, + { + "epoch": 2.69927978515625e-05, + "step": 17690, + "training_step_time": 0.13552045822143555 + }, + { + "epoch": 2.699432373046875e-05, + "model_forward_time": 0.024175643920898438, + "step": 17691 + }, + { + "epoch": 2.699432373046875e-05, + "step": 17691, + "training_step_time": 0.14896655082702637 + }, + { + "epoch": 2.6995849609375e-05, + "model_forward_time": 0.024967193603515625, + "step": 17692 + }, + { + "epoch": 2.6995849609375e-05, + "step": 17692, + "training_step_time": 0.19589471817016602 + }, + { + "epoch": 2.699737548828125e-05, + "model_forward_time": 0.024436473846435547, + "step": 17693 + }, + { + "epoch": 2.699737548828125e-05, + "step": 17693, + "training_step_time": 0.16414737701416016 + }, + { + "epoch": 2.69989013671875e-05, + "model_forward_time": 0.02432417869567871, + "step": 17694 + }, + { + "epoch": 2.69989013671875e-05, + "step": 17694, + "training_step_time": 0.1172029972076416 + }, + { + "epoch": 2.700042724609375e-05, + "model_forward_time": 0.024344682693481445, + "step": 17695 + }, + { + "epoch": 2.700042724609375e-05, + "step": 17695, + "training_step_time": 0.11496686935424805 + }, + { + "epoch": 2.7001953125e-05, + "model_forward_time": 0.02486276626586914, + "step": 17696 + }, + { + "epoch": 2.7001953125e-05, + "step": 17696, + "training_step_time": 0.11989808082580566 + }, + { + "epoch": 2.700347900390625e-05, + "model_forward_time": 0.02474045753479004, + "step": 17697 + }, + { + "epoch": 2.700347900390625e-05, + "step": 17697, + "training_step_time": 0.10661792755126953 + }, + { + "epoch": 2.70050048828125e-05, + "model_forward_time": 0.024944305419921875, + "step": 17698 + }, + { + "epoch": 2.70050048828125e-05, + "step": 17698, + "training_step_time": 0.10542559623718262 + }, + { + "epoch": 2.700653076171875e-05, + "model_forward_time": 0.02500319480895996, + "step": 17699 + }, + { + "epoch": 2.700653076171875e-05, + "step": 17699, + "training_step_time": 0.11017203330993652 + }, + { + "epoch": 2.7008056640625e-05, + "grad_norm": 0.11523959785699844, + "learning_rate": 3.933501846281267e-05, + "loss": 0.0118, + "step": 17700 + }, + { + "epoch": 2.7008056640625e-05, + "model_forward_time": 0.02520465850830078, + "step": 17700 + }, + { + "epoch": 2.7008056640625e-05, + "step": 17700, + "training_step_time": 0.10685610771179199 + }, + { + "epoch": 2.700958251953125e-05, + "model_forward_time": 0.025278806686401367, + "step": 17701 + }, + { + "epoch": 2.700958251953125e-05, + "step": 17701, + "training_step_time": 0.10702848434448242 + }, + { + "epoch": 2.70111083984375e-05, + "model_forward_time": 0.024937868118286133, + "step": 17702 + }, + { + "epoch": 2.70111083984375e-05, + "step": 17702, + "training_step_time": 0.10634708404541016 + }, + { + "epoch": 2.701263427734375e-05, + "model_forward_time": 0.024875164031982422, + "step": 17703 + }, + { + "epoch": 2.701263427734375e-05, + "step": 17703, + "training_step_time": 0.10957956314086914 + }, + { + "epoch": 2.701416015625e-05, + "model_forward_time": 0.024683475494384766, + "step": 17704 + }, + { + "epoch": 2.701416015625e-05, + "step": 17704, + "training_step_time": 0.1058650016784668 + }, + { + "epoch": 2.701568603515625e-05, + "model_forward_time": 0.025029420852661133, + "step": 17705 + }, + { + "epoch": 2.701568603515625e-05, + "step": 17705, + "training_step_time": 0.10803079605102539 + }, + { + "epoch": 2.70172119140625e-05, + "model_forward_time": 0.025226354598999023, + "step": 17706 + }, + { + "epoch": 2.70172119140625e-05, + "step": 17706, + "training_step_time": 0.10757231712341309 + }, + { + "epoch": 2.701873779296875e-05, + "model_forward_time": 0.025002241134643555, + "step": 17707 + }, + { + "epoch": 2.701873779296875e-05, + "step": 17707, + "training_step_time": 0.14447689056396484 + }, + { + "epoch": 2.7020263671875e-05, + "model_forward_time": 0.02555251121520996, + "step": 17708 + }, + { + "epoch": 2.7020263671875e-05, + "step": 17708, + "training_step_time": 0.14852571487426758 + }, + { + "epoch": 2.702178955078125e-05, + "model_forward_time": 0.0244905948638916, + "step": 17709 + }, + { + "epoch": 2.702178955078125e-05, + "step": 17709, + "training_step_time": 0.11041784286499023 + }, + { + "epoch": 2.70233154296875e-05, + "grad_norm": 0.47804808616638184, + "learning_rate": 3.928117767863102e-05, + "loss": 0.0148, + "step": 17710 + }, + { + "epoch": 2.70233154296875e-05, + "model_forward_time": 0.02489328384399414, + "step": 17710 + }, + { + "epoch": 2.70233154296875e-05, + "step": 17710, + "training_step_time": 0.1114044189453125 + }, + { + "epoch": 2.702484130859375e-05, + "model_forward_time": 0.025493860244750977, + "step": 17711 + }, + { + "epoch": 2.702484130859375e-05, + "step": 17711, + "training_step_time": 0.15456080436706543 + }, + { + "epoch": 2.70263671875e-05, + "model_forward_time": 0.024722814559936523, + "step": 17712 + }, + { + "epoch": 2.70263671875e-05, + "step": 17712, + "training_step_time": 0.20668625831604004 + }, + { + "epoch": 2.702789306640625e-05, + "model_forward_time": 0.02420210838317871, + "step": 17713 + }, + { + "epoch": 2.702789306640625e-05, + "step": 17713, + "training_step_time": 0.15131807327270508 + }, + { + "epoch": 2.70294189453125e-05, + "model_forward_time": 0.024515151977539062, + "step": 17714 + }, + { + "epoch": 2.70294189453125e-05, + "step": 17714, + "training_step_time": 0.13583779335021973 + }, + { + "epoch": 2.703094482421875e-05, + "model_forward_time": 0.02474689483642578, + "step": 17715 + }, + { + "epoch": 2.703094482421875e-05, + "step": 17715, + "training_step_time": 0.13012433052062988 + }, + { + "epoch": 2.7032470703125e-05, + "model_forward_time": 0.024486541748046875, + "step": 17716 + }, + { + "epoch": 2.7032470703125e-05, + "step": 17716, + "training_step_time": 0.12494301795959473 + }, + { + "epoch": 2.703399658203125e-05, + "model_forward_time": 0.024567604064941406, + "step": 17717 + }, + { + "epoch": 2.703399658203125e-05, + "step": 17717, + "training_step_time": 0.11674308776855469 + }, + { + "epoch": 2.70355224609375e-05, + "model_forward_time": 0.02482295036315918, + "step": 17718 + }, + { + "epoch": 2.70355224609375e-05, + "step": 17718, + "training_step_time": 0.11928224563598633 + }, + { + "epoch": 2.703704833984375e-05, + "model_forward_time": 0.024878263473510742, + "step": 17719 + }, + { + "epoch": 2.703704833984375e-05, + "step": 17719, + "training_step_time": 0.1118168830871582 + }, + { + "epoch": 2.703857421875e-05, + "grad_norm": 0.16659250855445862, + "learning_rate": 3.92273499188292e-05, + "loss": 0.0105, + "step": 17720 + }, + { + "epoch": 2.703857421875e-05, + "model_forward_time": 0.02500009536743164, + "step": 17720 + }, + { + "epoch": 2.703857421875e-05, + "step": 17720, + "training_step_time": 0.11332273483276367 + }, + { + "epoch": 2.704010009765625e-05, + "model_forward_time": 0.02514481544494629, + "step": 17721 + }, + { + "epoch": 2.704010009765625e-05, + "step": 17721, + "training_step_time": 0.10699892044067383 + }, + { + "epoch": 2.70416259765625e-05, + "model_forward_time": 0.025344371795654297, + "step": 17722 + }, + { + "epoch": 2.70416259765625e-05, + "step": 17722, + "training_step_time": 0.1105661392211914 + }, + { + "epoch": 2.704315185546875e-05, + "model_forward_time": 0.025389671325683594, + "step": 17723 + }, + { + "epoch": 2.704315185546875e-05, + "step": 17723, + "training_step_time": 0.1089777946472168 + }, + { + "epoch": 2.7044677734375e-05, + "model_forward_time": 0.025086641311645508, + "step": 17724 + }, + { + "epoch": 2.7044677734375e-05, + "step": 17724, + "training_step_time": 0.10904264450073242 + }, + { + "epoch": 2.704620361328125e-05, + "model_forward_time": 0.025250673294067383, + "step": 17725 + }, + { + "epoch": 2.704620361328125e-05, + "step": 17725, + "training_step_time": 0.10663652420043945 + }, + { + "epoch": 2.70477294921875e-05, + "model_forward_time": 0.02503347396850586, + "step": 17726 + }, + { + "epoch": 2.70477294921875e-05, + "step": 17726, + "training_step_time": 0.10947728157043457 + }, + { + "epoch": 2.704925537109375e-05, + "model_forward_time": 0.025022506713867188, + "step": 17727 + }, + { + "epoch": 2.704925537109375e-05, + "step": 17727, + "training_step_time": 0.1046450138092041 + }, + { + "epoch": 2.705078125e-05, + "model_forward_time": 0.024769067764282227, + "step": 17728 + }, + { + "epoch": 2.705078125e-05, + "step": 17728, + "training_step_time": 0.10725212097167969 + }, + { + "epoch": 2.705230712890625e-05, + "model_forward_time": 0.025760889053344727, + "step": 17729 + }, + { + "epoch": 2.705230712890625e-05, + "step": 17729, + "training_step_time": 0.10475873947143555 + }, + { + "epoch": 2.70538330078125e-05, + "grad_norm": 0.1870919018983841, + "learning_rate": 3.917353524881302e-05, + "loss": 0.0082, + "step": 17730 + }, + { + "epoch": 2.70538330078125e-05, + "model_forward_time": 0.024820566177368164, + "step": 17730 + }, + { + "epoch": 2.70538330078125e-05, + "step": 17730, + "training_step_time": 0.15718626976013184 + }, + { + "epoch": 2.705535888671875e-05, + "model_forward_time": 0.02448749542236328, + "step": 17731 + }, + { + "epoch": 2.705535888671875e-05, + "step": 17731, + "training_step_time": 0.12178659439086914 + }, + { + "epoch": 2.7056884765625e-05, + "model_forward_time": 0.024935007095336914, + "step": 17732 + }, + { + "epoch": 2.7056884765625e-05, + "step": 17732, + "training_step_time": 0.10374808311462402 + }, + { + "epoch": 2.705841064453125e-05, + "model_forward_time": 0.024270057678222656, + "step": 17733 + }, + { + "epoch": 2.705841064453125e-05, + "step": 17733, + "training_step_time": 0.10608386993408203 + }, + { + "epoch": 2.70599365234375e-05, + "model_forward_time": 0.025130271911621094, + "step": 17734 + }, + { + "epoch": 2.70599365234375e-05, + "step": 17734, + "training_step_time": 0.21529817581176758 + }, + { + "epoch": 2.706146240234375e-05, + "model_forward_time": 0.02480459213256836, + "step": 17735 + }, + { + "epoch": 2.706146240234375e-05, + "step": 17735, + "training_step_time": 0.1272883415222168 + }, + { + "epoch": 2.706298828125e-05, + "model_forward_time": 0.02445054054260254, + "step": 17736 + }, + { + "epoch": 2.706298828125e-05, + "step": 17736, + "training_step_time": 0.19802546501159668 + }, + { + "epoch": 2.706451416015625e-05, + "model_forward_time": 0.02454686164855957, + "step": 17737 + }, + { + "epoch": 2.706451416015625e-05, + "step": 17737, + "training_step_time": 0.14284801483154297 + }, + { + "epoch": 2.70660400390625e-05, + "model_forward_time": 0.024601221084594727, + "step": 17738 + }, + { + "epoch": 2.70660400390625e-05, + "step": 17738, + "training_step_time": 0.2062370777130127 + }, + { + "epoch": 2.706756591796875e-05, + "model_forward_time": 0.02434372901916504, + "step": 17739 + }, + { + "epoch": 2.706756591796875e-05, + "step": 17739, + "training_step_time": 0.1223289966583252 + }, + { + "epoch": 2.7069091796875e-05, + "grad_norm": 0.19186779856681824, + "learning_rate": 3.9119733733972387e-05, + "loss": 0.0155, + "step": 17740 + }, + { + "epoch": 2.7069091796875e-05, + "model_forward_time": 0.026275157928466797, + "step": 17740 + }, + { + "epoch": 2.7069091796875e-05, + "step": 17740, + "training_step_time": 0.11357760429382324 + }, + { + "epoch": 2.707061767578125e-05, + "model_forward_time": 0.025209903717041016, + "step": 17741 + }, + { + "epoch": 2.707061767578125e-05, + "step": 17741, + "training_step_time": 0.11390233039855957 + }, + { + "epoch": 2.70721435546875e-05, + "model_forward_time": 0.024872541427612305, + "step": 17742 + }, + { + "epoch": 2.70721435546875e-05, + "step": 17742, + "training_step_time": 0.11147785186767578 + }, + { + "epoch": 2.707366943359375e-05, + "model_forward_time": 0.02546548843383789, + "step": 17743 + }, + { + "epoch": 2.707366943359375e-05, + "step": 17743, + "training_step_time": 0.10614657402038574 + }, + { + "epoch": 2.70751953125e-05, + "model_forward_time": 0.02489018440246582, + "step": 17744 + }, + { + "epoch": 2.70751953125e-05, + "step": 17744, + "training_step_time": 0.10436511039733887 + }, + { + "epoch": 2.707672119140625e-05, + "model_forward_time": 0.024971485137939453, + "step": 17745 + }, + { + "epoch": 2.707672119140625e-05, + "step": 17745, + "training_step_time": 0.10857510566711426 + }, + { + "epoch": 2.70782470703125e-05, + "model_forward_time": 0.026225805282592773, + "step": 17746 + }, + { + "epoch": 2.70782470703125e-05, + "step": 17746, + "training_step_time": 0.1093130111694336 + }, + { + "epoch": 2.707977294921875e-05, + "model_forward_time": 0.024330854415893555, + "step": 17747 + }, + { + "epoch": 2.707977294921875e-05, + "step": 17747, + "training_step_time": 0.10790705680847168 + }, + { + "epoch": 2.7081298828125e-05, + "model_forward_time": 0.02520155906677246, + "step": 17748 + }, + { + "epoch": 2.7081298828125e-05, + "step": 17748, + "training_step_time": 0.10710811614990234 + }, + { + "epoch": 2.708282470703125e-05, + "model_forward_time": 0.02506852149963379, + "step": 17749 + }, + { + "epoch": 2.708282470703125e-05, + "step": 17749, + "training_step_time": 0.10506844520568848 + }, + { + "epoch": 2.70843505859375e-05, + "grad_norm": 0.29710689187049866, + "learning_rate": 3.9065945439681214e-05, + "loss": 0.021, + "step": 17750 + }, + { + "epoch": 2.70843505859375e-05, + "model_forward_time": 0.024570226669311523, + "step": 17750 + }, + { + "epoch": 2.70843505859375e-05, + "step": 17750, + "training_step_time": 0.10391879081726074 + }, + { + "epoch": 2.708587646484375e-05, + "model_forward_time": 0.024657726287841797, + "step": 17751 + }, + { + "epoch": 2.708587646484375e-05, + "step": 17751, + "training_step_time": 0.10632586479187012 + }, + { + "epoch": 2.708740234375e-05, + "model_forward_time": 0.02540111541748047, + "step": 17752 + }, + { + "epoch": 2.708740234375e-05, + "step": 17752, + "training_step_time": 0.14304161071777344 + }, + { + "epoch": 2.708892822265625e-05, + "model_forward_time": 0.025876522064208984, + "step": 17753 + }, + { + "epoch": 2.708892822265625e-05, + "step": 17753, + "training_step_time": 0.11693501472473145 + }, + { + "epoch": 2.70904541015625e-05, + "model_forward_time": 0.0242159366607666, + "step": 17754 + }, + { + "epoch": 2.70904541015625e-05, + "step": 17754, + "training_step_time": 0.10764336585998535 + }, + { + "epoch": 2.709197998046875e-05, + "model_forward_time": 0.025173425674438477, + "step": 17755 + }, + { + "epoch": 2.709197998046875e-05, + "step": 17755, + "training_step_time": 0.1087043285369873 + }, + { + "epoch": 2.7093505859375e-05, + "model_forward_time": 0.025537967681884766, + "step": 17756 + }, + { + "epoch": 2.7093505859375e-05, + "step": 17756, + "training_step_time": 0.12853407859802246 + }, + { + "epoch": 2.709503173828125e-05, + "model_forward_time": 0.025127172470092773, + "step": 17757 + }, + { + "epoch": 2.709503173828125e-05, + "step": 17757, + "training_step_time": 0.10955452919006348 + }, + { + "epoch": 2.70965576171875e-05, + "model_forward_time": 0.025132179260253906, + "step": 17758 + }, + { + "epoch": 2.70965576171875e-05, + "step": 17758, + "training_step_time": 0.13810229301452637 + }, + { + "epoch": 2.709808349609375e-05, + "model_forward_time": 0.025025606155395508, + "step": 17759 + }, + { + "epoch": 2.709808349609375e-05, + "step": 17759, + "training_step_time": 0.11642932891845703 + }, + { + "epoch": 2.7099609375e-05, + "grad_norm": 0.3531988561153412, + "learning_rate": 3.901217043129735e-05, + "loss": 0.0161, + "step": 17760 + }, + { + "epoch": 2.7099609375e-05, + "model_forward_time": 0.02491617202758789, + "step": 17760 + }, + { + "epoch": 2.7099609375e-05, + "step": 17760, + "training_step_time": 0.10485625267028809 + }, + { + "epoch": 2.710113525390625e-05, + "model_forward_time": 0.025086402893066406, + "step": 17761 + }, + { + "epoch": 2.710113525390625e-05, + "step": 17761, + "training_step_time": 0.11249613761901855 + }, + { + "epoch": 2.71026611328125e-05, + "model_forward_time": 0.02499222755432129, + "step": 17762 + }, + { + "epoch": 2.71026611328125e-05, + "step": 17762, + "training_step_time": 0.1263437271118164 + }, + { + "epoch": 2.710418701171875e-05, + "model_forward_time": 0.02515888214111328, + "step": 17763 + }, + { + "epoch": 2.710418701171875e-05, + "step": 17763, + "training_step_time": 0.12273168563842773 + }, + { + "epoch": 2.7105712890625e-05, + "model_forward_time": 0.02496027946472168, + "step": 17764 + }, + { + "epoch": 2.7105712890625e-05, + "step": 17764, + "training_step_time": 0.12393403053283691 + }, + { + "epoch": 2.710723876953125e-05, + "model_forward_time": 0.025132417678833008, + "step": 17765 + }, + { + "epoch": 2.710723876953125e-05, + "step": 17765, + "training_step_time": 0.11769413948059082 + }, + { + "epoch": 2.71087646484375e-05, + "model_forward_time": 0.02494192123413086, + "step": 17766 + }, + { + "epoch": 2.71087646484375e-05, + "step": 17766, + "training_step_time": 0.11533880233764648 + }, + { + "epoch": 2.711029052734375e-05, + "model_forward_time": 0.025026798248291016, + "step": 17767 + }, + { + "epoch": 2.711029052734375e-05, + "step": 17767, + "training_step_time": 0.11738038063049316 + }, + { + "epoch": 2.711181640625e-05, + "model_forward_time": 0.024738073348999023, + "step": 17768 + }, + { + "epoch": 2.711181640625e-05, + "step": 17768, + "training_step_time": 0.11084151268005371 + }, + { + "epoch": 2.711334228515625e-05, + "model_forward_time": 0.02548384666442871, + "step": 17769 + }, + { + "epoch": 2.711334228515625e-05, + "step": 17769, + "training_step_time": 0.11447405815124512 + }, + { + "epoch": 2.71148681640625e-05, + "grad_norm": 0.2782493233680725, + "learning_rate": 3.895840877416249e-05, + "loss": 0.0077, + "step": 17770 + }, + { + "epoch": 2.71148681640625e-05, + "model_forward_time": 0.02552175521850586, + "step": 17770 + }, + { + "epoch": 2.71148681640625e-05, + "step": 17770, + "training_step_time": 0.10943126678466797 + }, + { + "epoch": 2.711639404296875e-05, + "model_forward_time": 0.025345563888549805, + "step": 17771 + }, + { + "epoch": 2.711639404296875e-05, + "step": 17771, + "training_step_time": 0.10887742042541504 + }, + { + "epoch": 2.7117919921875e-05, + "model_forward_time": 0.025590896606445312, + "step": 17772 + }, + { + "epoch": 2.7117919921875e-05, + "step": 17772, + "training_step_time": 0.10801267623901367 + }, + { + "epoch": 2.711944580078125e-05, + "model_forward_time": 0.026002168655395508, + "step": 17773 + }, + { + "epoch": 2.711944580078125e-05, + "step": 17773, + "training_step_time": 0.10735845565795898 + }, + { + "epoch": 2.71209716796875e-05, + "model_forward_time": 0.0259702205657959, + "step": 17774 + }, + { + "epoch": 2.71209716796875e-05, + "step": 17774, + "training_step_time": 0.10898089408874512 + }, + { + "epoch": 2.712249755859375e-05, + "model_forward_time": 0.02728748321533203, + "step": 17775 + }, + { + "epoch": 2.712249755859375e-05, + "step": 17775, + "training_step_time": 0.11421632766723633 + }, + { + "epoch": 2.71240234375e-05, + "model_forward_time": 0.0254514217376709, + "step": 17776 + }, + { + "epoch": 2.71240234375e-05, + "step": 17776, + "training_step_time": 0.214951753616333 + }, + { + "epoch": 2.712554931640625e-05, + "model_forward_time": 0.02418375015258789, + "step": 17777 + }, + { + "epoch": 2.712554931640625e-05, + "step": 17777, + "training_step_time": 0.12428712844848633 + }, + { + "epoch": 2.71270751953125e-05, + "model_forward_time": 0.026113510131835938, + "step": 17778 + }, + { + "epoch": 2.71270751953125e-05, + "step": 17778, + "training_step_time": 0.10728883743286133 + }, + { + "epoch": 2.712860107421875e-05, + "model_forward_time": 0.024712085723876953, + "step": 17779 + }, + { + "epoch": 2.712860107421875e-05, + "step": 17779, + "training_step_time": 0.12960124015808105 + }, + { + "epoch": 2.7130126953125e-05, + "grad_norm": 0.24737589061260223, + "learning_rate": 3.890466053360211e-05, + "loss": 0.0187, + "step": 17780 + }, + { + "epoch": 2.7130126953125e-05, + "model_forward_time": 0.025352954864501953, + "step": 17780 + }, + { + "epoch": 2.7130126953125e-05, + "step": 17780, + "training_step_time": 0.22339248657226562 + }, + { + "epoch": 2.713165283203125e-05, + "model_forward_time": 0.024074077606201172, + "step": 17781 + }, + { + "epoch": 2.713165283203125e-05, + "step": 17781, + "training_step_time": 0.22021794319152832 + }, + { + "epoch": 2.71331787109375e-05, + "model_forward_time": 0.02434086799621582, + "step": 17782 + }, + { + "epoch": 2.71331787109375e-05, + "step": 17782, + "training_step_time": 0.1247413158416748 + }, + { + "epoch": 2.713470458984375e-05, + "model_forward_time": 0.024853944778442383, + "step": 17783 + }, + { + "epoch": 2.713470458984375e-05, + "step": 17783, + "training_step_time": 0.11188817024230957 + }, + { + "epoch": 2.713623046875e-05, + "model_forward_time": 0.024930953979492188, + "step": 17784 + }, + { + "epoch": 2.713623046875e-05, + "step": 17784, + "training_step_time": 0.10723018646240234 + }, + { + "epoch": 2.713775634765625e-05, + "model_forward_time": 0.024888992309570312, + "step": 17785 + }, + { + "epoch": 2.713775634765625e-05, + "step": 17785, + "training_step_time": 0.12799930572509766 + }, + { + "epoch": 2.71392822265625e-05, + "model_forward_time": 0.024920940399169922, + "step": 17786 + }, + { + "epoch": 2.71392822265625e-05, + "step": 17786, + "training_step_time": 0.12634873390197754 + }, + { + "epoch": 2.714080810546875e-05, + "model_forward_time": 0.024768829345703125, + "step": 17787 + }, + { + "epoch": 2.714080810546875e-05, + "step": 17787, + "training_step_time": 0.10676336288452148 + }, + { + "epoch": 2.7142333984375e-05, + "model_forward_time": 0.025170326232910156, + "step": 17788 + }, + { + "epoch": 2.7142333984375e-05, + "step": 17788, + "training_step_time": 0.12009906768798828 + }, + { + "epoch": 2.714385986328125e-05, + "model_forward_time": 0.025107145309448242, + "step": 17789 + }, + { + "epoch": 2.714385986328125e-05, + "step": 17789, + "training_step_time": 0.11100149154663086 + }, + { + "epoch": 2.71453857421875e-05, + "grad_norm": 0.14849424362182617, + "learning_rate": 3.8850925774925425e-05, + "loss": 0.0044, + "step": 17790 + }, + { + "epoch": 2.71453857421875e-05, + "model_forward_time": 0.025336265563964844, + "step": 17790 + }, + { + "epoch": 2.71453857421875e-05, + "step": 17790, + "training_step_time": 0.10687541961669922 + }, + { + "epoch": 2.714691162109375e-05, + "model_forward_time": 0.025002241134643555, + "step": 17791 + }, + { + "epoch": 2.714691162109375e-05, + "step": 17791, + "training_step_time": 0.10997629165649414 + }, + { + "epoch": 2.71484375e-05, + "model_forward_time": 0.02515125274658203, + "step": 17792 + }, + { + "epoch": 2.71484375e-05, + "step": 17792, + "training_step_time": 0.14942479133605957 + }, + { + "epoch": 2.714996337890625e-05, + "model_forward_time": 0.02477264404296875, + "step": 17793 + }, + { + "epoch": 2.714996337890625e-05, + "step": 17793, + "training_step_time": 0.17796111106872559 + }, + { + "epoch": 2.71514892578125e-05, + "model_forward_time": 0.024335622787475586, + "step": 17794 + }, + { + "epoch": 2.71514892578125e-05, + "step": 17794, + "training_step_time": 0.18273639678955078 + }, + { + "epoch": 2.715301513671875e-05, + "model_forward_time": 0.024575233459472656, + "step": 17795 + }, + { + "epoch": 2.715301513671875e-05, + "step": 17795, + "training_step_time": 0.1756458282470703 + }, + { + "epoch": 2.7154541015625e-05, + "model_forward_time": 0.024271011352539062, + "step": 17796 + }, + { + "epoch": 2.7154541015625e-05, + "step": 17796, + "training_step_time": 0.19151997566223145 + }, + { + "epoch": 2.715606689453125e-05, + "model_forward_time": 0.02467179298400879, + "step": 17797 + }, + { + "epoch": 2.715606689453125e-05, + "step": 17797, + "training_step_time": 0.16283822059631348 + }, + { + "epoch": 2.71575927734375e-05, + "model_forward_time": 0.02411174774169922, + "step": 17798 + }, + { + "epoch": 2.71575927734375e-05, + "step": 17798, + "training_step_time": 0.16323304176330566 + }, + { + "epoch": 2.715911865234375e-05, + "model_forward_time": 0.024342775344848633, + "step": 17799 + }, + { + "epoch": 2.715911865234375e-05, + "step": 17799, + "training_step_time": 0.13154125213623047 + }, + { + "epoch": 2.716064453125e-05, + "grad_norm": 0.29433611035346985, + "learning_rate": 3.879720456342521e-05, + "loss": 0.0079, + "step": 17800 + }, + { + "epoch": 2.716064453125e-05, + "model_forward_time": 0.023529767990112305, + "step": 17800 + }, + { + "epoch": 2.716064453125e-05, + "step": 17800, + "training_step_time": 0.16872954368591309 + }, + { + "epoch": 2.716217041015625e-05, + "model_forward_time": 0.024827003479003906, + "step": 17801 + }, + { + "epoch": 2.716217041015625e-05, + "step": 17801, + "training_step_time": 0.160963773727417 + }, + { + "epoch": 2.71636962890625e-05, + "model_forward_time": 0.02395153045654297, + "step": 17802 + }, + { + "epoch": 2.71636962890625e-05, + "step": 17802, + "training_step_time": 0.0999443531036377 + }, + { + "epoch": 2.716522216796875e-05, + "model_forward_time": 0.024326324462890625, + "step": 17803 + }, + { + "epoch": 2.716522216796875e-05, + "step": 17803, + "training_step_time": 0.10318398475646973 + }, + { + "epoch": 2.7166748046875e-05, + "model_forward_time": 0.025301218032836914, + "step": 17804 + }, + { + "epoch": 2.7166748046875e-05, + "step": 17804, + "training_step_time": 0.10341000556945801 + }, + { + "epoch": 2.716827392578125e-05, + "model_forward_time": 0.02529311180114746, + "step": 17805 + }, + { + "epoch": 2.716827392578125e-05, + "step": 17805, + "training_step_time": 0.10476231575012207 + }, + { + "epoch": 2.71697998046875e-05, + "model_forward_time": 0.025449514389038086, + "step": 17806 + }, + { + "epoch": 2.71697998046875e-05, + "step": 17806, + "training_step_time": 0.10488677024841309 + }, + { + "epoch": 2.717132568359375e-05, + "model_forward_time": 0.025334835052490234, + "step": 17807 + }, + { + "epoch": 2.717132568359375e-05, + "step": 17807, + "training_step_time": 0.1038503646850586 + }, + { + "epoch": 2.71728515625e-05, + "model_forward_time": 0.025269269943237305, + "step": 17808 + }, + { + "epoch": 2.71728515625e-05, + "step": 17808, + "training_step_time": 0.10480117797851562 + }, + { + "epoch": 2.717437744140625e-05, + "model_forward_time": 0.025156259536743164, + "step": 17809 + }, + { + "epoch": 2.717437744140625e-05, + "step": 17809, + "training_step_time": 0.10785365104675293 + }, + { + "epoch": 2.71759033203125e-05, + "grad_norm": 0.30822667479515076, + "learning_rate": 3.87434969643778e-05, + "loss": 0.0146, + "step": 17810 + }, + { + "epoch": 2.71759033203125e-05, + "model_forward_time": 0.025263309478759766, + "step": 17810 + }, + { + "epoch": 2.71759033203125e-05, + "step": 17810, + "training_step_time": 0.10577630996704102 + }, + { + "epoch": 2.717742919921875e-05, + "model_forward_time": 0.025304317474365234, + "step": 17811 + }, + { + "epoch": 2.717742919921875e-05, + "step": 17811, + "training_step_time": 0.10428953170776367 + }, + { + "epoch": 2.7178955078125e-05, + "model_forward_time": 0.025110960006713867, + "step": 17812 + }, + { + "epoch": 2.7178955078125e-05, + "step": 17812, + "training_step_time": 0.10439157485961914 + }, + { + "epoch": 2.718048095703125e-05, + "model_forward_time": 0.02563309669494629, + "step": 17813 + }, + { + "epoch": 2.718048095703125e-05, + "step": 17813, + "training_step_time": 0.10702228546142578 + }, + { + "epoch": 2.71820068359375e-05, + "model_forward_time": 0.024785280227661133, + "step": 17814 + }, + { + "epoch": 2.71820068359375e-05, + "step": 17814, + "training_step_time": 0.10537457466125488 + }, + { + "epoch": 2.718353271484375e-05, + "model_forward_time": 0.025554418563842773, + "step": 17815 + }, + { + "epoch": 2.718353271484375e-05, + "step": 17815, + "training_step_time": 0.11116719245910645 + }, + { + "epoch": 2.718505859375e-05, + "model_forward_time": 0.025223970413208008, + "step": 17816 + }, + { + "epoch": 2.718505859375e-05, + "step": 17816, + "training_step_time": 0.10426211357116699 + }, + { + "epoch": 2.718658447265625e-05, + "model_forward_time": 0.024869441986083984, + "step": 17817 + }, + { + "epoch": 2.718658447265625e-05, + "step": 17817, + "training_step_time": 0.10783696174621582 + }, + { + "epoch": 2.71881103515625e-05, + "model_forward_time": 0.025443553924560547, + "step": 17818 + }, + { + "epoch": 2.71881103515625e-05, + "step": 17818, + "training_step_time": 0.11128664016723633 + }, + { + "epoch": 2.718963623046875e-05, + "model_forward_time": 0.023201704025268555, + "step": 17819 + }, + { + "epoch": 2.718963623046875e-05, + "step": 17819, + "training_step_time": 0.20087242126464844 + }, + { + "epoch": 2.7191162109375e-05, + "grad_norm": 0.2797226011753082, + "learning_rate": 3.8689803043043e-05, + "loss": 0.013, + "step": 17820 + }, + { + "epoch": 2.7191162109375e-05, + "model_forward_time": 0.02434086799621582, + "step": 17820 + }, + { + "epoch": 2.7191162109375e-05, + "step": 17820, + "training_step_time": 0.11786365509033203 + }, + { + "epoch": 2.719268798828125e-05, + "model_forward_time": 0.025154829025268555, + "step": 17821 + }, + { + "epoch": 2.719268798828125e-05, + "step": 17821, + "training_step_time": 0.10412883758544922 + }, + { + "epoch": 2.71942138671875e-05, + "model_forward_time": 0.02414560317993164, + "step": 17822 + }, + { + "epoch": 2.71942138671875e-05, + "step": 17822, + "training_step_time": 0.14868927001953125 + }, + { + "epoch": 2.719573974609375e-05, + "model_forward_time": 0.025008201599121094, + "step": 17823 + }, + { + "epoch": 2.719573974609375e-05, + "step": 17823, + "training_step_time": 0.20080280303955078 + }, + { + "epoch": 2.7197265625e-05, + "model_forward_time": 0.024900436401367188, + "step": 17824 + }, + { + "epoch": 2.7197265625e-05, + "step": 17824, + "training_step_time": 0.17074918746948242 + }, + { + "epoch": 2.719879150390625e-05, + "model_forward_time": 0.024929285049438477, + "step": 17825 + }, + { + "epoch": 2.719879150390625e-05, + "step": 17825, + "training_step_time": 0.2047901153564453 + }, + { + "epoch": 2.72003173828125e-05, + "model_forward_time": 0.0244443416595459, + "step": 17826 + }, + { + "epoch": 2.72003173828125e-05, + "step": 17826, + "training_step_time": 0.10326552391052246 + }, + { + "epoch": 2.720184326171875e-05, + "model_forward_time": 0.024632930755615234, + "step": 17827 + }, + { + "epoch": 2.720184326171875e-05, + "step": 17827, + "training_step_time": 0.11418461799621582 + }, + { + "epoch": 2.7203369140625e-05, + "model_forward_time": 0.024976253509521484, + "step": 17828 + }, + { + "epoch": 2.7203369140625e-05, + "step": 17828, + "training_step_time": 0.12517619132995605 + }, + { + "epoch": 2.720489501953125e-05, + "model_forward_time": 0.025359392166137695, + "step": 17829 + }, + { + "epoch": 2.720489501953125e-05, + "step": 17829, + "training_step_time": 0.13460326194763184 + }, + { + "epoch": 2.72064208984375e-05, + "grad_norm": 0.160170778632164, + "learning_rate": 3.863612286466396e-05, + "loss": 0.0124, + "step": 17830 + }, + { + "epoch": 2.72064208984375e-05, + "model_forward_time": 0.024671554565429688, + "step": 17830 + }, + { + "epoch": 2.72064208984375e-05, + "step": 17830, + "training_step_time": 0.11482381820678711 + }, + { + "epoch": 2.720794677734375e-05, + "model_forward_time": 0.025677204132080078, + "step": 17831 + }, + { + "epoch": 2.720794677734375e-05, + "step": 17831, + "training_step_time": 0.11292362213134766 + }, + { + "epoch": 2.720947265625e-05, + "model_forward_time": 0.025136947631835938, + "step": 17832 + }, + { + "epoch": 2.720947265625e-05, + "step": 17832, + "training_step_time": 0.15885281562805176 + }, + { + "epoch": 2.721099853515625e-05, + "model_forward_time": 0.024775028228759766, + "step": 17833 + }, + { + "epoch": 2.721099853515625e-05, + "step": 17833, + "training_step_time": 0.18265700340270996 + }, + { + "epoch": 2.72125244140625e-05, + "model_forward_time": 0.024469614028930664, + "step": 17834 + }, + { + "epoch": 2.72125244140625e-05, + "step": 17834, + "training_step_time": 0.16023707389831543 + }, + { + "epoch": 2.721405029296875e-05, + "model_forward_time": 0.024408340454101562, + "step": 17835 + }, + { + "epoch": 2.721405029296875e-05, + "step": 17835, + "training_step_time": 0.14282965660095215 + }, + { + "epoch": 2.7215576171875e-05, + "model_forward_time": 0.02514362335205078, + "step": 17836 + }, + { + "epoch": 2.7215576171875e-05, + "step": 17836, + "training_step_time": 0.13952112197875977 + }, + { + "epoch": 2.721710205078125e-05, + "model_forward_time": 0.024619102478027344, + "step": 17837 + }, + { + "epoch": 2.721710205078125e-05, + "step": 17837, + "training_step_time": 0.12865781784057617 + }, + { + "epoch": 2.72186279296875e-05, + "model_forward_time": 0.02448105812072754, + "step": 17838 + }, + { + "epoch": 2.72186279296875e-05, + "step": 17838, + "training_step_time": 0.12499737739562988 + }, + { + "epoch": 2.722015380859375e-05, + "model_forward_time": 0.02493143081665039, + "step": 17839 + }, + { + "epoch": 2.722015380859375e-05, + "step": 17839, + "training_step_time": 0.13923406600952148 + }, + { + "epoch": 2.72216796875e-05, + "grad_norm": 0.20724281668663025, + "learning_rate": 3.858245649446721e-05, + "loss": 0.0089, + "step": 17840 + }, + { + "epoch": 2.72216796875e-05, + "model_forward_time": 0.02482128143310547, + "step": 17840 + }, + { + "epoch": 2.72216796875e-05, + "step": 17840, + "training_step_time": 0.13233351707458496 + }, + { + "epoch": 2.722320556640625e-05, + "model_forward_time": 0.0250244140625, + "step": 17841 + }, + { + "epoch": 2.722320556640625e-05, + "step": 17841, + "training_step_time": 0.11634016036987305 + }, + { + "epoch": 2.72247314453125e-05, + "model_forward_time": 0.02407383918762207, + "step": 17842 + }, + { + "epoch": 2.72247314453125e-05, + "step": 17842, + "training_step_time": 0.1212615966796875 + }, + { + "epoch": 2.722625732421875e-05, + "model_forward_time": 0.024248838424682617, + "step": 17843 + }, + { + "epoch": 2.722625732421875e-05, + "step": 17843, + "training_step_time": 0.1099236011505127 + }, + { + "epoch": 2.7227783203125e-05, + "model_forward_time": 0.029558897018432617, + "step": 17844 + }, + { + "epoch": 2.7227783203125e-05, + "step": 17844, + "training_step_time": 0.12109065055847168 + }, + { + "epoch": 2.722930908203125e-05, + "model_forward_time": 0.02521991729736328, + "step": 17845 + }, + { + "epoch": 2.722930908203125e-05, + "step": 17845, + "training_step_time": 0.18286466598510742 + }, + { + "epoch": 2.72308349609375e-05, + "model_forward_time": 0.0246889591217041, + "step": 17846 + }, + { + "epoch": 2.72308349609375e-05, + "step": 17846, + "training_step_time": 0.10530400276184082 + }, + { + "epoch": 2.723236083984375e-05, + "model_forward_time": 0.024507999420166016, + "step": 17847 + }, + { + "epoch": 2.723236083984375e-05, + "step": 17847, + "training_step_time": 0.10642695426940918 + }, + { + "epoch": 2.723388671875e-05, + "model_forward_time": 0.025201082229614258, + "step": 17848 + }, + { + "epoch": 2.723388671875e-05, + "step": 17848, + "training_step_time": 0.10555386543273926 + }, + { + "epoch": 2.723541259765625e-05, + "model_forward_time": 0.02525019645690918, + "step": 17849 + }, + { + "epoch": 2.723541259765625e-05, + "step": 17849, + "training_step_time": 0.11086893081665039 + }, + { + "epoch": 2.72369384765625e-05, + "grad_norm": 0.29175248742103577, + "learning_rate": 3.852880399766243e-05, + "loss": 0.0102, + "step": 17850 + }, + { + "epoch": 2.72369384765625e-05, + "model_forward_time": 0.023540735244750977, + "step": 17850 + }, + { + "epoch": 2.72369384765625e-05, + "step": 17850, + "training_step_time": 0.1093754768371582 + }, + { + "epoch": 2.723846435546875e-05, + "model_forward_time": 0.02463531494140625, + "step": 17851 + }, + { + "epoch": 2.723846435546875e-05, + "step": 17851, + "training_step_time": 0.11007237434387207 + }, + { + "epoch": 2.7239990234375e-05, + "model_forward_time": 0.02586674690246582, + "step": 17852 + }, + { + "epoch": 2.7239990234375e-05, + "step": 17852, + "training_step_time": 0.10846757888793945 + }, + { + "epoch": 2.724151611328125e-05, + "model_forward_time": 0.0252835750579834, + "step": 17853 + }, + { + "epoch": 2.724151611328125e-05, + "step": 17853, + "training_step_time": 0.1046297550201416 + }, + { + "epoch": 2.72430419921875e-05, + "model_forward_time": 0.025349855422973633, + "step": 17854 + }, + { + "epoch": 2.72430419921875e-05, + "step": 17854, + "training_step_time": 0.10582780838012695 + }, + { + "epoch": 2.724456787109375e-05, + "model_forward_time": 0.02541637420654297, + "step": 17855 + }, + { + "epoch": 2.724456787109375e-05, + "step": 17855, + "training_step_time": 0.1049504280090332 + }, + { + "epoch": 2.724609375e-05, + "model_forward_time": 0.025146484375, + "step": 17856 + }, + { + "epoch": 2.724609375e-05, + "step": 17856, + "training_step_time": 0.1041109561920166 + }, + { + "epoch": 2.724761962890625e-05, + "model_forward_time": 0.025187015533447266, + "step": 17857 + }, + { + "epoch": 2.724761962890625e-05, + "step": 17857, + "training_step_time": 0.1051790714263916 + }, + { + "epoch": 2.72491455078125e-05, + "model_forward_time": 0.025150299072265625, + "step": 17858 + }, + { + "epoch": 2.72491455078125e-05, + "step": 17858, + "training_step_time": 0.10592508316040039 + }, + { + "epoch": 2.725067138671875e-05, + "model_forward_time": 0.026232481002807617, + "step": 17859 + }, + { + "epoch": 2.725067138671875e-05, + "step": 17859, + "training_step_time": 0.10644984245300293 + }, + { + "epoch": 2.7252197265625e-05, + "grad_norm": 0.2431711107492447, + "learning_rate": 3.8475165439442446e-05, + "loss": 0.0091, + "step": 17860 + }, + { + "epoch": 2.7252197265625e-05, + "model_forward_time": 0.025551557540893555, + "step": 17860 + }, + { + "epoch": 2.7252197265625e-05, + "step": 17860, + "training_step_time": 0.10732293128967285 + }, + { + "epoch": 2.725372314453125e-05, + "model_forward_time": 0.026672840118408203, + "step": 17861 + }, + { + "epoch": 2.725372314453125e-05, + "step": 17861, + "training_step_time": 0.10648775100708008 + }, + { + "epoch": 2.72552490234375e-05, + "model_forward_time": 0.02519536018371582, + "step": 17862 + }, + { + "epoch": 2.72552490234375e-05, + "step": 17862, + "training_step_time": 0.10601353645324707 + }, + { + "epoch": 2.725677490234375e-05, + "model_forward_time": 0.02534031867980957, + "step": 17863 + }, + { + "epoch": 2.725677490234375e-05, + "step": 17863, + "training_step_time": 0.18191146850585938 + }, + { + "epoch": 2.725830078125e-05, + "model_forward_time": 0.024652481079101562, + "step": 17864 + }, + { + "epoch": 2.725830078125e-05, + "step": 17864, + "training_step_time": 0.12701034545898438 + }, + { + "epoch": 2.725982666015625e-05, + "model_forward_time": 0.02532815933227539, + "step": 17865 + }, + { + "epoch": 2.725982666015625e-05, + "step": 17865, + "training_step_time": 0.10142159461975098 + }, + { + "epoch": 2.72613525390625e-05, + "model_forward_time": 0.024637937545776367, + "step": 17866 + }, + { + "epoch": 2.72613525390625e-05, + "step": 17866, + "training_step_time": 0.1498427391052246 + }, + { + "epoch": 2.726287841796875e-05, + "model_forward_time": 0.02797865867614746, + "step": 17867 + }, + { + "epoch": 2.726287841796875e-05, + "step": 17867, + "training_step_time": 0.19466686248779297 + }, + { + "epoch": 2.7264404296875e-05, + "model_forward_time": 0.024108171463012695, + "step": 17868 + }, + { + "epoch": 2.7264404296875e-05, + "step": 17868, + "training_step_time": 0.21257948875427246 + }, + { + "epoch": 2.726593017578125e-05, + "model_forward_time": 0.024782657623291016, + "step": 17869 + }, + { + "epoch": 2.726593017578125e-05, + "step": 17869, + "training_step_time": 0.16902542114257812 + }, + { + "epoch": 2.72674560546875e-05, + "grad_norm": 0.6194291710853577, + "learning_rate": 3.842154088498316e-05, + "loss": 0.0118, + "step": 17870 + }, + { + "epoch": 2.72674560546875e-05, + "model_forward_time": 0.024775028228759766, + "step": 17870 + }, + { + "epoch": 2.72674560546875e-05, + "step": 17870, + "training_step_time": 0.11042070388793945 + }, + { + "epoch": 2.726898193359375e-05, + "model_forward_time": 0.024674654006958008, + "step": 17871 + }, + { + "epoch": 2.726898193359375e-05, + "step": 17871, + "training_step_time": 0.1410846710205078 + }, + { + "epoch": 2.72705078125e-05, + "model_forward_time": 0.024624347686767578, + "step": 17872 + }, + { + "epoch": 2.72705078125e-05, + "step": 17872, + "training_step_time": 0.10656380653381348 + }, + { + "epoch": 2.727203369140625e-05, + "model_forward_time": 0.02496647834777832, + "step": 17873 + }, + { + "epoch": 2.727203369140625e-05, + "step": 17873, + "training_step_time": 0.12030458450317383 + }, + { + "epoch": 2.72735595703125e-05, + "model_forward_time": 0.02531147003173828, + "step": 17874 + }, + { + "epoch": 2.72735595703125e-05, + "step": 17874, + "training_step_time": 0.1288294792175293 + }, + { + "epoch": 2.727508544921875e-05, + "model_forward_time": 0.02532505989074707, + "step": 17875 + }, + { + "epoch": 2.727508544921875e-05, + "step": 17875, + "training_step_time": 0.11631083488464355 + }, + { + "epoch": 2.7276611328125e-05, + "model_forward_time": 0.025744915008544922, + "step": 17876 + }, + { + "epoch": 2.7276611328125e-05, + "step": 17876, + "training_step_time": 0.11756610870361328 + }, + { + "epoch": 2.727813720703125e-05, + "model_forward_time": 0.02522730827331543, + "step": 17877 + }, + { + "epoch": 2.727813720703125e-05, + "step": 17877, + "training_step_time": 0.10687875747680664 + }, + { + "epoch": 2.72796630859375e-05, + "model_forward_time": 0.025696277618408203, + "step": 17878 + }, + { + "epoch": 2.72796630859375e-05, + "step": 17878, + "training_step_time": 0.10690808296203613 + }, + { + "epoch": 2.728118896484375e-05, + "model_forward_time": 0.025755882263183594, + "step": 17879 + }, + { + "epoch": 2.728118896484375e-05, + "step": 17879, + "training_step_time": 0.10820436477661133 + }, + { + "epoch": 2.728271484375e-05, + "grad_norm": 0.22942398488521576, + "learning_rate": 3.836793039944349e-05, + "loss": 0.0097, + "step": 17880 + }, + { + "epoch": 2.728271484375e-05, + "model_forward_time": 0.02470231056213379, + "step": 17880 + }, + { + "epoch": 2.728271484375e-05, + "step": 17880, + "training_step_time": 0.10743188858032227 + }, + { + "epoch": 2.728424072265625e-05, + "model_forward_time": 0.02498030662536621, + "step": 17881 + }, + { + "epoch": 2.728424072265625e-05, + "step": 17881, + "training_step_time": 0.10877490043640137 + }, + { + "epoch": 2.72857666015625e-05, + "model_forward_time": 0.025319814682006836, + "step": 17882 + }, + { + "epoch": 2.72857666015625e-05, + "step": 17882, + "training_step_time": 0.10659074783325195 + }, + { + "epoch": 2.728729248046875e-05, + "model_forward_time": 0.025788307189941406, + "step": 17883 + }, + { + "epoch": 2.728729248046875e-05, + "step": 17883, + "training_step_time": 0.10712337493896484 + }, + { + "epoch": 2.7288818359375e-05, + "model_forward_time": 0.0251615047454834, + "step": 17884 + }, + { + "epoch": 2.7288818359375e-05, + "step": 17884, + "training_step_time": 0.10642099380493164 + }, + { + "epoch": 2.729034423828125e-05, + "model_forward_time": 0.025527000427246094, + "step": 17885 + }, + { + "epoch": 2.729034423828125e-05, + "step": 17885, + "training_step_time": 0.1554703712463379 + }, + { + "epoch": 2.72918701171875e-05, + "model_forward_time": 0.02492237091064453, + "step": 17886 + }, + { + "epoch": 2.72918701171875e-05, + "step": 17886, + "training_step_time": 0.1442704200744629 + }, + { + "epoch": 2.729339599609375e-05, + "model_forward_time": 0.024675607681274414, + "step": 17887 + }, + { + "epoch": 2.729339599609375e-05, + "step": 17887, + "training_step_time": 0.10928940773010254 + }, + { + "epoch": 2.7294921875e-05, + "model_forward_time": 0.024618864059448242, + "step": 17888 + }, + { + "epoch": 2.7294921875e-05, + "step": 17888, + "training_step_time": 0.11086416244506836 + }, + { + "epoch": 2.729644775390625e-05, + "model_forward_time": 0.02772665023803711, + "step": 17889 + }, + { + "epoch": 2.729644775390625e-05, + "step": 17889, + "training_step_time": 0.10779953002929688 + }, + { + "epoch": 2.72979736328125e-05, + "grad_norm": 0.21543103456497192, + "learning_rate": 3.831433404796521e-05, + "loss": 0.0108, + "step": 17890 + }, + { + "epoch": 2.72979736328125e-05, + "model_forward_time": 0.02519989013671875, + "step": 17890 + }, + { + "epoch": 2.72979736328125e-05, + "step": 17890, + "training_step_time": 0.10872244834899902 + }, + { + "epoch": 2.729949951171875e-05, + "model_forward_time": 0.02528548240661621, + "step": 17891 + }, + { + "epoch": 2.729949951171875e-05, + "step": 17891, + "training_step_time": 0.10813093185424805 + }, + { + "epoch": 2.7301025390625e-05, + "model_forward_time": 0.025361299514770508, + "step": 17892 + }, + { + "epoch": 2.7301025390625e-05, + "step": 17892, + "training_step_time": 0.11327600479125977 + }, + { + "epoch": 2.730255126953125e-05, + "model_forward_time": 0.025127887725830078, + "step": 17893 + }, + { + "epoch": 2.730255126953125e-05, + "step": 17893, + "training_step_time": 0.10529470443725586 + }, + { + "epoch": 2.73040771484375e-05, + "model_forward_time": 0.025068998336791992, + "step": 17894 + }, + { + "epoch": 2.73040771484375e-05, + "step": 17894, + "training_step_time": 0.1077580451965332 + }, + { + "epoch": 2.730560302734375e-05, + "model_forward_time": 0.025731801986694336, + "step": 17895 + }, + { + "epoch": 2.730560302734375e-05, + "step": 17895, + "training_step_time": 0.10904932022094727 + }, + { + "epoch": 2.730712890625e-05, + "model_forward_time": 0.025609970092773438, + "step": 17896 + }, + { + "epoch": 2.730712890625e-05, + "step": 17896, + "training_step_time": 0.10838103294372559 + }, + { + "epoch": 2.730865478515625e-05, + "model_forward_time": 0.02516007423400879, + "step": 17897 + }, + { + "epoch": 2.730865478515625e-05, + "step": 17897, + "training_step_time": 0.1088261604309082 + }, + { + "epoch": 2.73101806640625e-05, + "model_forward_time": 0.025600194931030273, + "step": 17898 + }, + { + "epoch": 2.73101806640625e-05, + "step": 17898, + "training_step_time": 0.10634136199951172 + }, + { + "epoch": 2.731170654296875e-05, + "model_forward_time": 0.025068044662475586, + "step": 17899 + }, + { + "epoch": 2.731170654296875e-05, + "step": 17899, + "training_step_time": 0.1076054573059082 + }, + { + "epoch": 2.7313232421875e-05, + "grad_norm": 0.270064115524292, + "learning_rate": 3.826075189567296e-05, + "loss": 0.0085, + "step": 17900 + }, + { + "epoch": 2.7313232421875e-05, + "model_forward_time": 0.02731156349182129, + "step": 17900 + }, + { + "epoch": 2.7313232421875e-05, + "step": 17900, + "training_step_time": 0.10802221298217773 + }, + { + "epoch": 2.731475830078125e-05, + "model_forward_time": 0.02545309066772461, + "step": 17901 + }, + { + "epoch": 2.731475830078125e-05, + "step": 17901, + "training_step_time": 0.10477757453918457 + }, + { + "epoch": 2.73162841796875e-05, + "model_forward_time": 0.025415897369384766, + "step": 17902 + }, + { + "epoch": 2.73162841796875e-05, + "step": 17902, + "training_step_time": 0.1063234806060791 + }, + { + "epoch": 2.731781005859375e-05, + "model_forward_time": 0.024984121322631836, + "step": 17903 + }, + { + "epoch": 2.731781005859375e-05, + "step": 17903, + "training_step_time": 0.1046607494354248 + }, + { + "epoch": 2.73193359375e-05, + "model_forward_time": 0.025334596633911133, + "step": 17904 + }, + { + "epoch": 2.73193359375e-05, + "step": 17904, + "training_step_time": 0.10480475425720215 + }, + { + "epoch": 2.732086181640625e-05, + "model_forward_time": 0.025446176528930664, + "step": 17905 + }, + { + "epoch": 2.732086181640625e-05, + "step": 17905, + "training_step_time": 0.10482096672058105 + }, + { + "epoch": 2.73223876953125e-05, + "model_forward_time": 0.02720952033996582, + "step": 17906 + }, + { + "epoch": 2.73223876953125e-05, + "step": 17906, + "training_step_time": 0.10787391662597656 + }, + { + "epoch": 2.732391357421875e-05, + "model_forward_time": 0.025163650512695312, + "step": 17907 + }, + { + "epoch": 2.732391357421875e-05, + "step": 17907, + "training_step_time": 0.10352063179016113 + }, + { + "epoch": 2.7325439453125e-05, + "model_forward_time": 0.02638530731201172, + "step": 17908 + }, + { + "epoch": 2.7325439453125e-05, + "step": 17908, + "training_step_time": 0.10435891151428223 + }, + { + "epoch": 2.732696533203125e-05, + "model_forward_time": 0.025278329849243164, + "step": 17909 + }, + { + "epoch": 2.732696533203125e-05, + "step": 17909, + "training_step_time": 0.10368895530700684 + }, + { + "epoch": 2.73284912109375e-05, + "grad_norm": 0.5367772579193115, + "learning_rate": 3.820718400767409e-05, + "loss": 0.0128, + "step": 17910 + }, + { + "epoch": 2.73284912109375e-05, + "model_forward_time": 0.02409958839416504, + "step": 17910 + }, + { + "epoch": 2.73284912109375e-05, + "step": 17910, + "training_step_time": 0.14976286888122559 + }, + { + "epoch": 2.733001708984375e-05, + "model_forward_time": 0.025003910064697266, + "step": 17911 + }, + { + "epoch": 2.733001708984375e-05, + "step": 17911, + "training_step_time": 0.11549615859985352 + }, + { + "epoch": 2.733154296875e-05, + "model_forward_time": 0.025644302368164062, + "step": 17912 + }, + { + "epoch": 2.733154296875e-05, + "step": 17912, + "training_step_time": 0.12738299369812012 + }, + { + "epoch": 2.733306884765625e-05, + "model_forward_time": 0.025639057159423828, + "step": 17913 + }, + { + "epoch": 2.733306884765625e-05, + "step": 17913, + "training_step_time": 0.1114192008972168 + }, + { + "epoch": 2.73345947265625e-05, + "model_forward_time": 0.0251767635345459, + "step": 17914 + }, + { + "epoch": 2.73345947265625e-05, + "step": 17914, + "training_step_time": 0.17258620262145996 + }, + { + "epoch": 2.733612060546875e-05, + "model_forward_time": 0.024928569793701172, + "step": 17915 + }, + { + "epoch": 2.733612060546875e-05, + "step": 17915, + "training_step_time": 0.1775527000427246 + }, + { + "epoch": 2.7337646484375e-05, + "model_forward_time": 0.02510976791381836, + "step": 17916 + }, + { + "epoch": 2.7337646484375e-05, + "step": 17916, + "training_step_time": 0.11106157302856445 + }, + { + "epoch": 2.733917236328125e-05, + "model_forward_time": 0.02491140365600586, + "step": 17917 + }, + { + "epoch": 2.733917236328125e-05, + "step": 17917, + "training_step_time": 0.11459517478942871 + }, + { + "epoch": 2.73406982421875e-05, + "model_forward_time": 0.025683879852294922, + "step": 17918 + }, + { + "epoch": 2.73406982421875e-05, + "step": 17918, + "training_step_time": 0.14354419708251953 + }, + { + "epoch": 2.734222412109375e-05, + "model_forward_time": 0.02560591697692871, + "step": 17919 + }, + { + "epoch": 2.734222412109375e-05, + "step": 17919, + "training_step_time": 0.1070866584777832 + }, + { + "epoch": 2.734375e-05, + "grad_norm": 0.33692288398742676, + "learning_rate": 3.8153630449058646e-05, + "loss": 0.009, + "step": 17920 + }, + { + "epoch": 2.734375e-05, + "model_forward_time": 0.026272296905517578, + "step": 17920 + }, + { + "epoch": 2.734375e-05, + "step": 17920, + "training_step_time": 0.12343764305114746 + }, + { + "epoch": 2.734527587890625e-05, + "model_forward_time": 0.02541065216064453, + "step": 17921 + }, + { + "epoch": 2.734527587890625e-05, + "step": 17921, + "training_step_time": 0.1226193904876709 + }, + { + "epoch": 2.73468017578125e-05, + "model_forward_time": 0.025192975997924805, + "step": 17922 + }, + { + "epoch": 2.73468017578125e-05, + "step": 17922, + "training_step_time": 0.13296270370483398 + }, + { + "epoch": 2.734832763671875e-05, + "model_forward_time": 0.02533698081970215, + "step": 17923 + }, + { + "epoch": 2.734832763671875e-05, + "step": 17923, + "training_step_time": 0.12338542938232422 + }, + { + "epoch": 2.7349853515625e-05, + "model_forward_time": 0.024743080139160156, + "step": 17924 + }, + { + "epoch": 2.7349853515625e-05, + "step": 17924, + "training_step_time": 0.1304161548614502 + }, + { + "epoch": 2.735137939453125e-05, + "model_forward_time": 0.024888038635253906, + "step": 17925 + }, + { + "epoch": 2.735137939453125e-05, + "step": 17925, + "training_step_time": 0.10959291458129883 + }, + { + "epoch": 2.73529052734375e-05, + "model_forward_time": 0.025396347045898438, + "step": 17926 + }, + { + "epoch": 2.73529052734375e-05, + "step": 17926, + "training_step_time": 0.10519862174987793 + }, + { + "epoch": 2.735443115234375e-05, + "model_forward_time": 0.02593398094177246, + "step": 17927 + }, + { + "epoch": 2.735443115234375e-05, + "step": 17927, + "training_step_time": 0.10903263092041016 + }, + { + "epoch": 2.735595703125e-05, + "model_forward_time": 0.025213956832885742, + "step": 17928 + }, + { + "epoch": 2.735595703125e-05, + "step": 17928, + "training_step_time": 0.11295914649963379 + }, + { + "epoch": 2.735748291015625e-05, + "model_forward_time": 0.0255584716796875, + "step": 17929 + }, + { + "epoch": 2.735748291015625e-05, + "step": 17929, + "training_step_time": 0.1051168441772461 + }, + { + "epoch": 2.73590087890625e-05, + "grad_norm": 0.14932318031787872, + "learning_rate": 3.810009128489925e-05, + "loss": 0.0095, + "step": 17930 + }, + { + "epoch": 2.73590087890625e-05, + "model_forward_time": 0.025115966796875, + "step": 17930 + }, + { + "epoch": 2.73590087890625e-05, + "step": 17930, + "training_step_time": 0.10474348068237305 + }, + { + "epoch": 2.736053466796875e-05, + "model_forward_time": 0.02556324005126953, + "step": 17931 + }, + { + "epoch": 2.736053466796875e-05, + "step": 17931, + "training_step_time": 0.10521578788757324 + }, + { + "epoch": 2.7362060546875e-05, + "model_forward_time": 0.025293827056884766, + "step": 17932 + }, + { + "epoch": 2.7362060546875e-05, + "step": 17932, + "training_step_time": 0.1053309440612793 + }, + { + "epoch": 2.736358642578125e-05, + "model_forward_time": 0.025243282318115234, + "step": 17933 + }, + { + "epoch": 2.736358642578125e-05, + "step": 17933, + "training_step_time": 0.1839134693145752 + }, + { + "epoch": 2.73651123046875e-05, + "model_forward_time": 0.024533987045288086, + "step": 17934 + }, + { + "epoch": 2.73651123046875e-05, + "step": 17934, + "training_step_time": 0.14489030838012695 + }, + { + "epoch": 2.736663818359375e-05, + "model_forward_time": 0.02425980567932129, + "step": 17935 + }, + { + "epoch": 2.736663818359375e-05, + "step": 17935, + "training_step_time": 0.1082301139831543 + }, + { + "epoch": 2.73681640625e-05, + "model_forward_time": 0.02870798110961914, + "step": 17936 + }, + { + "epoch": 2.73681640625e-05, + "step": 17936, + "training_step_time": 0.11541581153869629 + }, + { + "epoch": 2.736968994140625e-05, + "model_forward_time": 0.025124073028564453, + "step": 17937 + }, + { + "epoch": 2.736968994140625e-05, + "step": 17937, + "training_step_time": 0.10977864265441895 + }, + { + "epoch": 2.73712158203125e-05, + "model_forward_time": 0.025575876235961914, + "step": 17938 + }, + { + "epoch": 2.73712158203125e-05, + "step": 17938, + "training_step_time": 0.1134188175201416 + }, + { + "epoch": 2.737274169921875e-05, + "model_forward_time": 0.025717973709106445, + "step": 17939 + }, + { + "epoch": 2.737274169921875e-05, + "step": 17939, + "training_step_time": 0.13526415824890137 + }, + { + "epoch": 2.7374267578125e-05, + "grad_norm": 0.3307208716869354, + "learning_rate": 3.8046566580251e-05, + "loss": 0.0085, + "step": 17940 + }, + { + "epoch": 2.7374267578125e-05, + "model_forward_time": 0.025039196014404297, + "step": 17940 + }, + { + "epoch": 2.7374267578125e-05, + "step": 17940, + "training_step_time": 0.1084134578704834 + }, + { + "epoch": 2.737579345703125e-05, + "model_forward_time": 0.02523946762084961, + "step": 17941 + }, + { + "epoch": 2.737579345703125e-05, + "step": 17941, + "training_step_time": 0.10616540908813477 + }, + { + "epoch": 2.73773193359375e-05, + "model_forward_time": 0.0255124568939209, + "step": 17942 + }, + { + "epoch": 2.73773193359375e-05, + "step": 17942, + "training_step_time": 0.10609555244445801 + }, + { + "epoch": 2.737884521484375e-05, + "model_forward_time": 0.025499343872070312, + "step": 17943 + }, + { + "epoch": 2.737884521484375e-05, + "step": 17943, + "training_step_time": 0.10842013359069824 + }, + { + "epoch": 2.738037109375e-05, + "model_forward_time": 0.02535271644592285, + "step": 17944 + }, + { + "epoch": 2.738037109375e-05, + "step": 17944, + "training_step_time": 0.1105194091796875 + }, + { + "epoch": 2.738189697265625e-05, + "model_forward_time": 0.025133609771728516, + "step": 17945 + }, + { + "epoch": 2.738189697265625e-05, + "step": 17945, + "training_step_time": 0.10741400718688965 + }, + { + "epoch": 2.73834228515625e-05, + "model_forward_time": 0.024821996688842773, + "step": 17946 + }, + { + "epoch": 2.73834228515625e-05, + "step": 17946, + "training_step_time": 0.1054847240447998 + }, + { + "epoch": 2.738494873046875e-05, + "model_forward_time": 0.025104999542236328, + "step": 17947 + }, + { + "epoch": 2.738494873046875e-05, + "step": 17947, + "training_step_time": 0.10679817199707031 + }, + { + "epoch": 2.7386474609375e-05, + "model_forward_time": 0.02493739128112793, + "step": 17948 + }, + { + "epoch": 2.7386474609375e-05, + "step": 17948, + "training_step_time": 0.10469698905944824 + }, + { + "epoch": 2.738800048828125e-05, + "model_forward_time": 0.02559208869934082, + "step": 17949 + }, + { + "epoch": 2.738800048828125e-05, + "step": 17949, + "training_step_time": 0.10902833938598633 + }, + { + "epoch": 2.73895263671875e-05, + "grad_norm": 0.2965926229953766, + "learning_rate": 3.799305640015152e-05, + "loss": 0.0154, + "step": 17950 + }, + { + "epoch": 2.73895263671875e-05, + "model_forward_time": 0.025310754776000977, + "step": 17950 + }, + { + "epoch": 2.73895263671875e-05, + "step": 17950, + "training_step_time": 0.11017346382141113 + }, + { + "epoch": 2.739105224609375e-05, + "model_forward_time": 0.025224685668945312, + "step": 17951 + }, + { + "epoch": 2.739105224609375e-05, + "step": 17951, + "training_step_time": 0.10585355758666992 + }, + { + "epoch": 2.7392578125e-05, + "model_forward_time": 0.025029659271240234, + "step": 17952 + }, + { + "epoch": 2.7392578125e-05, + "step": 17952, + "training_step_time": 0.10971832275390625 + }, + { + "epoch": 2.739410400390625e-05, + "model_forward_time": 0.025522947311401367, + "step": 17953 + }, + { + "epoch": 2.739410400390625e-05, + "step": 17953, + "training_step_time": 0.11266779899597168 + }, + { + "epoch": 2.73956298828125e-05, + "model_forward_time": 0.025726795196533203, + "step": 17954 + }, + { + "epoch": 2.73956298828125e-05, + "step": 17954, + "training_step_time": 0.10637211799621582 + }, + { + "epoch": 2.739715576171875e-05, + "model_forward_time": 0.02517223358154297, + "step": 17955 + }, + { + "epoch": 2.739715576171875e-05, + "step": 17955, + "training_step_time": 0.10816764831542969 + }, + { + "epoch": 2.7398681640625e-05, + "model_forward_time": 0.025997400283813477, + "step": 17956 + }, + { + "epoch": 2.7398681640625e-05, + "step": 17956, + "training_step_time": 0.1063072681427002 + }, + { + "epoch": 2.740020751953125e-05, + "model_forward_time": 0.02524256706237793, + "step": 17957 + }, + { + "epoch": 2.740020751953125e-05, + "step": 17957, + "training_step_time": 0.1907942295074463 + }, + { + "epoch": 2.74017333984375e-05, + "model_forward_time": 0.02437615394592285, + "step": 17958 + }, + { + "epoch": 2.74017333984375e-05, + "step": 17958, + "training_step_time": 0.11754894256591797 + }, + { + "epoch": 2.740325927734375e-05, + "model_forward_time": 0.024527549743652344, + "step": 17959 + }, + { + "epoch": 2.740325927734375e-05, + "step": 17959, + "training_step_time": 0.12745976448059082 + }, + { + "epoch": 2.740478515625e-05, + "grad_norm": 0.2102188616991043, + "learning_rate": 3.793956080962068e-05, + "loss": 0.0122, + "step": 17960 + }, + { + "epoch": 2.740478515625e-05, + "model_forward_time": 0.025849580764770508, + "step": 17960 + }, + { + "epoch": 2.740478515625e-05, + "step": 17960, + "training_step_time": 0.10631322860717773 + }, + { + "epoch": 2.740631103515625e-05, + "model_forward_time": 0.024773836135864258, + "step": 17961 + }, + { + "epoch": 2.740631103515625e-05, + "step": 17961, + "training_step_time": 0.2002706527709961 + }, + { + "epoch": 2.74078369140625e-05, + "model_forward_time": 0.024735450744628906, + "step": 17962 + }, + { + "epoch": 2.74078369140625e-05, + "step": 17962, + "training_step_time": 0.1320209503173828 + }, + { + "epoch": 2.740936279296875e-05, + "model_forward_time": 0.024775266647338867, + "step": 17963 + }, + { + "epoch": 2.740936279296875e-05, + "step": 17963, + "training_step_time": 0.11639237403869629 + }, + { + "epoch": 2.7410888671875e-05, + "model_forward_time": 0.023922443389892578, + "step": 17964 + }, + { + "epoch": 2.7410888671875e-05, + "step": 17964, + "training_step_time": 0.1317157745361328 + }, + { + "epoch": 2.741241455078125e-05, + "model_forward_time": 0.024988412857055664, + "step": 17965 + }, + { + "epoch": 2.741241455078125e-05, + "step": 17965, + "training_step_time": 0.10732078552246094 + }, + { + "epoch": 2.74139404296875e-05, + "model_forward_time": 0.025366783142089844, + "step": 17966 + }, + { + "epoch": 2.74139404296875e-05, + "step": 17966, + "training_step_time": 0.12041521072387695 + }, + { + "epoch": 2.741546630859375e-05, + "model_forward_time": 0.0282289981842041, + "step": 17967 + }, + { + "epoch": 2.741546630859375e-05, + "step": 17967, + "training_step_time": 0.11133360862731934 + }, + { + "epoch": 2.74169921875e-05, + "model_forward_time": 0.02516341209411621, + "step": 17968 + }, + { + "epoch": 2.74169921875e-05, + "step": 17968, + "training_step_time": 0.10429811477661133 + }, + { + "epoch": 2.741851806640625e-05, + "model_forward_time": 0.025118112564086914, + "step": 17969 + }, + { + "epoch": 2.741851806640625e-05, + "step": 17969, + "training_step_time": 0.1212007999420166 + }, + { + "epoch": 2.74200439453125e-05, + "grad_norm": 0.18284453451633453, + "learning_rate": 3.788607987366069e-05, + "loss": 0.0076, + "step": 17970 + }, + { + "epoch": 2.74200439453125e-05, + "model_forward_time": 0.025450944900512695, + "step": 17970 + }, + { + "epoch": 2.74200439453125e-05, + "step": 17970, + "training_step_time": 0.12494087219238281 + }, + { + "epoch": 2.742156982421875e-05, + "model_forward_time": 0.025272130966186523, + "step": 17971 + }, + { + "epoch": 2.742156982421875e-05, + "step": 17971, + "training_step_time": 0.11694717407226562 + }, + { + "epoch": 2.7423095703125e-05, + "model_forward_time": 0.025257349014282227, + "step": 17972 + }, + { + "epoch": 2.7423095703125e-05, + "step": 17972, + "training_step_time": 0.11249542236328125 + }, + { + "epoch": 2.742462158203125e-05, + "model_forward_time": 0.02526712417602539, + "step": 17973 + }, + { + "epoch": 2.742462158203125e-05, + "step": 17973, + "training_step_time": 0.10704183578491211 + }, + { + "epoch": 2.74261474609375e-05, + "model_forward_time": 0.025203704833984375, + "step": 17974 + }, + { + "epoch": 2.74261474609375e-05, + "step": 17974, + "training_step_time": 0.11341476440429688 + }, + { + "epoch": 2.742767333984375e-05, + "model_forward_time": 0.028165340423583984, + "step": 17975 + }, + { + "epoch": 2.742767333984375e-05, + "step": 17975, + "training_step_time": 0.11616992950439453 + }, + { + "epoch": 2.742919921875e-05, + "model_forward_time": 0.025283336639404297, + "step": 17976 + }, + { + "epoch": 2.742919921875e-05, + "step": 17976, + "training_step_time": 0.1097259521484375 + }, + { + "epoch": 2.743072509765625e-05, + "model_forward_time": 0.02525615692138672, + "step": 17977 + }, + { + "epoch": 2.743072509765625e-05, + "step": 17977, + "training_step_time": 0.11013531684875488 + }, + { + "epoch": 2.74322509765625e-05, + "model_forward_time": 0.024924278259277344, + "step": 17978 + }, + { + "epoch": 2.74322509765625e-05, + "step": 17978, + "training_step_time": 0.11000370979309082 + }, + { + "epoch": 2.743377685546875e-05, + "model_forward_time": 0.024070262908935547, + "step": 17979 + }, + { + "epoch": 2.743377685546875e-05, + "step": 17979, + "training_step_time": 0.10774922370910645 + }, + { + "epoch": 2.7435302734375e-05, + "grad_norm": 0.27353590726852417, + "learning_rate": 3.783261365725592e-05, + "loss": 0.0167, + "step": 17980 + }, + { + "epoch": 2.7435302734375e-05, + "model_forward_time": 0.025155305862426758, + "step": 17980 + }, + { + "epoch": 2.7435302734375e-05, + "step": 17980, + "training_step_time": 0.11198043823242188 + }, + { + "epoch": 2.743682861328125e-05, + "model_forward_time": 0.024308204650878906, + "step": 17981 + }, + { + "epoch": 2.743682861328125e-05, + "step": 17981, + "training_step_time": 0.15448594093322754 + }, + { + "epoch": 2.74383544921875e-05, + "model_forward_time": 0.0247194766998291, + "step": 17982 + }, + { + "epoch": 2.74383544921875e-05, + "step": 17982, + "training_step_time": 0.11015748977661133 + }, + { + "epoch": 2.743988037109375e-05, + "model_forward_time": 0.02830982208251953, + "step": 17983 + }, + { + "epoch": 2.743988037109375e-05, + "step": 17983, + "training_step_time": 0.11189126968383789 + }, + { + "epoch": 2.744140625e-05, + "model_forward_time": 0.025470495223999023, + "step": 17984 + }, + { + "epoch": 2.744140625e-05, + "step": 17984, + "training_step_time": 0.1059103012084961 + }, + { + "epoch": 2.744293212890625e-05, + "model_forward_time": 0.02527475357055664, + "step": 17985 + }, + { + "epoch": 2.744293212890625e-05, + "step": 17985, + "training_step_time": 0.11489653587341309 + }, + { + "epoch": 2.74444580078125e-05, + "model_forward_time": 0.025204181671142578, + "step": 17986 + }, + { + "epoch": 2.74444580078125e-05, + "step": 17986, + "training_step_time": 0.22503018379211426 + }, + { + "epoch": 2.744598388671875e-05, + "model_forward_time": 0.023204565048217773, + "step": 17987 + }, + { + "epoch": 2.744598388671875e-05, + "step": 17987, + "training_step_time": 0.11169886589050293 + }, + { + "epoch": 2.7447509765625e-05, + "model_forward_time": 0.024588346481323242, + "step": 17988 + }, + { + "epoch": 2.7447509765625e-05, + "step": 17988, + "training_step_time": 0.10314106941223145 + }, + { + "epoch": 2.744903564453125e-05, + "model_forward_time": 0.02552056312561035, + "step": 17989 + }, + { + "epoch": 2.744903564453125e-05, + "step": 17989, + "training_step_time": 0.10691571235656738 + }, + { + "epoch": 2.74505615234375e-05, + "grad_norm": 0.4213157892227173, + "learning_rate": 3.777916222537285e-05, + "loss": 0.0076, + "step": 17990 + }, + { + "epoch": 2.74505615234375e-05, + "model_forward_time": 0.023868560791015625, + "step": 17990 + }, + { + "epoch": 2.74505615234375e-05, + "step": 17990, + "training_step_time": 0.1145319938659668 + }, + { + "epoch": 2.745208740234375e-05, + "model_forward_time": 0.02421736717224121, + "step": 17991 + }, + { + "epoch": 2.745208740234375e-05, + "step": 17991, + "training_step_time": 0.11203432083129883 + }, + { + "epoch": 2.745361328125e-05, + "model_forward_time": 0.02419567108154297, + "step": 17992 + }, + { + "epoch": 2.745361328125e-05, + "step": 17992, + "training_step_time": 0.10560011863708496 + }, + { + "epoch": 2.745513916015625e-05, + "model_forward_time": 0.0252225399017334, + "step": 17993 + }, + { + "epoch": 2.745513916015625e-05, + "step": 17993, + "training_step_time": 0.11844587326049805 + }, + { + "epoch": 2.74566650390625e-05, + "model_forward_time": 0.02525806427001953, + "step": 17994 + }, + { + "epoch": 2.74566650390625e-05, + "step": 17994, + "training_step_time": 0.10957527160644531 + }, + { + "epoch": 2.745819091796875e-05, + "model_forward_time": 0.025019168853759766, + "step": 17995 + }, + { + "epoch": 2.745819091796875e-05, + "step": 17995, + "training_step_time": 0.10733246803283691 + }, + { + "epoch": 2.7459716796875e-05, + "model_forward_time": 0.02508258819580078, + "step": 17996 + }, + { + "epoch": 2.7459716796875e-05, + "step": 17996, + "training_step_time": 0.11580681800842285 + }, + { + "epoch": 2.746124267578125e-05, + "model_forward_time": 0.025430917739868164, + "step": 17997 + }, + { + "epoch": 2.746124267578125e-05, + "step": 17997, + "training_step_time": 0.1108696460723877 + }, + { + "epoch": 2.74627685546875e-05, + "model_forward_time": 0.025517940521240234, + "step": 17998 + }, + { + "epoch": 2.74627685546875e-05, + "step": 17998, + "training_step_time": 0.10732054710388184 + }, + { + "epoch": 2.746429443359375e-05, + "model_forward_time": 0.025665998458862305, + "step": 17999 + }, + { + "epoch": 2.746429443359375e-05, + "step": 17999, + "training_step_time": 0.1076362133026123 + }, + { + "epoch": 2.74658203125e-05, + "grad_norm": 0.18084366619586945, + "learning_rate": 3.772572564296005e-05, + "loss": 0.014, + "step": 18000 + }, + { + "epoch": 2.74658203125e-05, + "model_forward_time": 0.025009632110595703, + "step": 18000 + }, + { + "epoch": 2.74658203125e-05, + "step": 18000, + "training_step_time": 0.10673260688781738 + }, + { + "epoch": 2.746734619140625e-05, + "model_forward_time": 0.024454832077026367, + "step": 18001 + }, + { + "epoch": 2.746734619140625e-05, + "step": 18001, + "training_step_time": 0.1043856143951416 + }, + { + "epoch": 2.74688720703125e-05, + "model_forward_time": 0.02487635612487793, + "step": 18002 + }, + { + "epoch": 2.74688720703125e-05, + "step": 18002, + "training_step_time": 0.10207295417785645 + }, + { + "epoch": 2.747039794921875e-05, + "model_forward_time": 0.025430679321289062, + "step": 18003 + }, + { + "epoch": 2.747039794921875e-05, + "step": 18003, + "training_step_time": 0.10685372352600098 + }, + { + "epoch": 2.7471923828125e-05, + "model_forward_time": 0.02514052391052246, + "step": 18004 + }, + { + "epoch": 2.7471923828125e-05, + "step": 18004, + "training_step_time": 0.10427451133728027 + }, + { + "epoch": 2.747344970703125e-05, + "model_forward_time": 0.024594545364379883, + "step": 18005 + }, + { + "epoch": 2.747344970703125e-05, + "step": 18005, + "training_step_time": 0.10706639289855957 + }, + { + "epoch": 2.74749755859375e-05, + "model_forward_time": 0.025272130966186523, + "step": 18006 + }, + { + "epoch": 2.74749755859375e-05, + "step": 18006, + "training_step_time": 0.10577702522277832 + }, + { + "epoch": 2.747650146484375e-05, + "model_forward_time": 0.025304317474365234, + "step": 18007 + }, + { + "epoch": 2.747650146484375e-05, + "step": 18007, + "training_step_time": 0.1071314811706543 + }, + { + "epoch": 2.747802734375e-05, + "model_forward_time": 0.02533888816833496, + "step": 18008 + }, + { + "epoch": 2.747802734375e-05, + "step": 18008, + "training_step_time": 0.10979104042053223 + }, + { + "epoch": 2.747955322265625e-05, + "model_forward_time": 0.0255126953125, + "step": 18009 + }, + { + "epoch": 2.747955322265625e-05, + "step": 18009, + "training_step_time": 0.10956525802612305 + }, + { + "epoch": 2.74810791015625e-05, + "grad_norm": 0.2783471345901489, + "learning_rate": 3.767230397494798e-05, + "loss": 0.0188, + "step": 18010 + }, + { + "epoch": 2.74810791015625e-05, + "model_forward_time": 0.02611517906188965, + "step": 18010 + }, + { + "epoch": 2.74810791015625e-05, + "step": 18010, + "training_step_time": 0.10888242721557617 + }, + { + "epoch": 2.748260498046875e-05, + "model_forward_time": 0.025205612182617188, + "step": 18011 + }, + { + "epoch": 2.748260498046875e-05, + "step": 18011, + "training_step_time": 0.1067039966583252 + }, + { + "epoch": 2.7484130859375e-05, + "model_forward_time": 0.025316715240478516, + "step": 18012 + }, + { + "epoch": 2.7484130859375e-05, + "step": 18012, + "training_step_time": 0.10681939125061035 + }, + { + "epoch": 2.748565673828125e-05, + "model_forward_time": 0.025093793869018555, + "step": 18013 + }, + { + "epoch": 2.748565673828125e-05, + "step": 18013, + "training_step_time": 0.10825729370117188 + }, + { + "epoch": 2.74871826171875e-05, + "model_forward_time": 0.025198936462402344, + "step": 18014 + }, + { + "epoch": 2.74871826171875e-05, + "step": 18014, + "training_step_time": 0.10617876052856445 + }, + { + "epoch": 2.748870849609375e-05, + "model_forward_time": 0.025570392608642578, + "step": 18015 + }, + { + "epoch": 2.748870849609375e-05, + "step": 18015, + "training_step_time": 0.1208188533782959 + }, + { + "epoch": 2.7490234375e-05, + "model_forward_time": 0.02491021156311035, + "step": 18016 + }, + { + "epoch": 2.7490234375e-05, + "step": 18016, + "training_step_time": 0.10976386070251465 + }, + { + "epoch": 2.749176025390625e-05, + "model_forward_time": 0.025936365127563477, + "step": 18017 + }, + { + "epoch": 2.749176025390625e-05, + "step": 18017, + "training_step_time": 0.11401891708374023 + }, + { + "epoch": 2.74932861328125e-05, + "model_forward_time": 0.025758981704711914, + "step": 18018 + }, + { + "epoch": 2.74932861328125e-05, + "step": 18018, + "training_step_time": 0.12434673309326172 + }, + { + "epoch": 2.749481201171875e-05, + "model_forward_time": 0.02500152587890625, + "step": 18019 + }, + { + "epoch": 2.749481201171875e-05, + "step": 18019, + "training_step_time": 0.1058804988861084 + }, + { + "epoch": 2.7496337890625e-05, + "grad_norm": 0.2061215192079544, + "learning_rate": 3.761889728624899e-05, + "loss": 0.0112, + "step": 18020 + }, + { + "epoch": 2.7496337890625e-05, + "model_forward_time": 0.02477264404296875, + "step": 18020 + }, + { + "epoch": 2.7496337890625e-05, + "step": 18020, + "training_step_time": 0.11475586891174316 + }, + { + "epoch": 2.749786376953125e-05, + "model_forward_time": 0.02521204948425293, + "step": 18021 + }, + { + "epoch": 2.749786376953125e-05, + "step": 18021, + "training_step_time": 0.12122535705566406 + }, + { + "epoch": 2.74993896484375e-05, + "model_forward_time": 0.025592803955078125, + "step": 18022 + }, + { + "epoch": 2.74993896484375e-05, + "step": 18022, + "training_step_time": 0.11378741264343262 + }, + { + "epoch": 2.750091552734375e-05, + "model_forward_time": 0.025400876998901367, + "step": 18023 + }, + { + "epoch": 2.750091552734375e-05, + "step": 18023, + "training_step_time": 0.12374234199523926 + }, + { + "epoch": 2.750244140625e-05, + "model_forward_time": 0.025568008422851562, + "step": 18024 + }, + { + "epoch": 2.750244140625e-05, + "step": 18024, + "training_step_time": 0.1431748867034912 + }, + { + "epoch": 2.750396728515625e-05, + "model_forward_time": 0.026306867599487305, + "step": 18025 + }, + { + "epoch": 2.750396728515625e-05, + "step": 18025, + "training_step_time": 0.14913010597229004 + }, + { + "epoch": 2.75054931640625e-05, + "model_forward_time": 0.024827003479003906, + "step": 18026 + }, + { + "epoch": 2.75054931640625e-05, + "step": 18026, + "training_step_time": 0.10567975044250488 + }, + { + "epoch": 2.750701904296875e-05, + "model_forward_time": 0.025075197219848633, + "step": 18027 + }, + { + "epoch": 2.750701904296875e-05, + "step": 18027, + "training_step_time": 0.10854268074035645 + }, + { + "epoch": 2.7508544921875e-05, + "model_forward_time": 0.025426149368286133, + "step": 18028 + }, + { + "epoch": 2.7508544921875e-05, + "step": 18028, + "training_step_time": 0.13500595092773438 + }, + { + "epoch": 2.751007080078125e-05, + "model_forward_time": 0.025188922882080078, + "step": 18029 + }, + { + "epoch": 2.751007080078125e-05, + "step": 18029, + "training_step_time": 0.1299581527709961 + }, + { + "epoch": 2.75115966796875e-05, + "grad_norm": 0.2631520628929138, + "learning_rate": 3.756550564175727e-05, + "loss": 0.0193, + "step": 18030 + }, + { + "epoch": 2.75115966796875e-05, + "model_forward_time": 0.0247802734375, + "step": 18030 + }, + { + "epoch": 2.75115966796875e-05, + "step": 18030, + "training_step_time": 0.10551571846008301 + }, + { + "epoch": 2.751312255859375e-05, + "model_forward_time": 0.025038480758666992, + "step": 18031 + }, + { + "epoch": 2.751312255859375e-05, + "step": 18031, + "training_step_time": 0.11161971092224121 + }, + { + "epoch": 2.75146484375e-05, + "model_forward_time": 0.025191783905029297, + "step": 18032 + }, + { + "epoch": 2.75146484375e-05, + "step": 18032, + "training_step_time": 0.11246919631958008 + }, + { + "epoch": 2.751617431640625e-05, + "model_forward_time": 0.025264501571655273, + "step": 18033 + }, + { + "epoch": 2.751617431640625e-05, + "step": 18033, + "training_step_time": 0.10839033126831055 + }, + { + "epoch": 2.75177001953125e-05, + "model_forward_time": 0.02579522132873535, + "step": 18034 + }, + { + "epoch": 2.75177001953125e-05, + "step": 18034, + "training_step_time": 0.10805821418762207 + }, + { + "epoch": 2.751922607421875e-05, + "model_forward_time": 0.025609493255615234, + "step": 18035 + }, + { + "epoch": 2.751922607421875e-05, + "step": 18035, + "training_step_time": 0.10558676719665527 + }, + { + "epoch": 2.7520751953125e-05, + "model_forward_time": 0.025491952896118164, + "step": 18036 + }, + { + "epoch": 2.7520751953125e-05, + "step": 18036, + "training_step_time": 0.10474371910095215 + }, + { + "epoch": 2.752227783203125e-05, + "model_forward_time": 0.02527594566345215, + "step": 18037 + }, + { + "epoch": 2.752227783203125e-05, + "step": 18037, + "training_step_time": 0.10887598991394043 + }, + { + "epoch": 2.75238037109375e-05, + "model_forward_time": 0.02546977996826172, + "step": 18038 + }, + { + "epoch": 2.75238037109375e-05, + "step": 18038, + "training_step_time": 0.1085824966430664 + }, + { + "epoch": 2.752532958984375e-05, + "model_forward_time": 0.02501201629638672, + "step": 18039 + }, + { + "epoch": 2.752532958984375e-05, + "step": 18039, + "training_step_time": 0.10491251945495605 + }, + { + "epoch": 2.752685546875e-05, + "grad_norm": 0.13272154331207275, + "learning_rate": 3.751212910634867e-05, + "loss": 0.0127, + "step": 18040 + }, + { + "epoch": 2.752685546875e-05, + "model_forward_time": 0.025127410888671875, + "step": 18040 + }, + { + "epoch": 2.752685546875e-05, + "step": 18040, + "training_step_time": 0.11132955551147461 + }, + { + "epoch": 2.752838134765625e-05, + "model_forward_time": 0.027968168258666992, + "step": 18041 + }, + { + "epoch": 2.752838134765625e-05, + "step": 18041, + "training_step_time": 0.1144716739654541 + }, + { + "epoch": 2.75299072265625e-05, + "model_forward_time": 0.02566051483154297, + "step": 18042 + }, + { + "epoch": 2.75299072265625e-05, + "step": 18042, + "training_step_time": 0.16414642333984375 + }, + { + "epoch": 2.753143310546875e-05, + "model_forward_time": 0.024952411651611328, + "step": 18043 + }, + { + "epoch": 2.753143310546875e-05, + "step": 18043, + "training_step_time": 0.1578686237335205 + }, + { + "epoch": 2.7532958984375e-05, + "model_forward_time": 0.026008129119873047, + "step": 18044 + }, + { + "epoch": 2.7532958984375e-05, + "step": 18044, + "training_step_time": 0.11167311668395996 + }, + { + "epoch": 2.753448486328125e-05, + "model_forward_time": 0.024770021438598633, + "step": 18045 + }, + { + "epoch": 2.753448486328125e-05, + "step": 18045, + "training_step_time": 0.1226344108581543 + }, + { + "epoch": 2.75360107421875e-05, + "model_forward_time": 0.027828454971313477, + "step": 18046 + }, + { + "epoch": 2.75360107421875e-05, + "step": 18046, + "training_step_time": 0.11861753463745117 + }, + { + "epoch": 2.753753662109375e-05, + "model_forward_time": 0.025232315063476562, + "step": 18047 + }, + { + "epoch": 2.753753662109375e-05, + "step": 18047, + "training_step_time": 0.1080009937286377 + }, + { + "epoch": 2.75390625e-05, + "model_forward_time": 0.02543950080871582, + "step": 18048 + }, + { + "epoch": 2.75390625e-05, + "step": 18048, + "training_step_time": 0.10968279838562012 + }, + { + "epoch": 2.754058837890625e-05, + "model_forward_time": 0.024803876876831055, + "step": 18049 + }, + { + "epoch": 2.754058837890625e-05, + "step": 18049, + "training_step_time": 0.10719132423400879 + }, + { + "epoch": 2.75421142578125e-05, + "grad_norm": 0.4691051244735718, + "learning_rate": 3.7458767744880765e-05, + "loss": 0.0123, + "step": 18050 + }, + { + "epoch": 2.75421142578125e-05, + "model_forward_time": 0.0252687931060791, + "step": 18050 + }, + { + "epoch": 2.75421142578125e-05, + "step": 18050, + "training_step_time": 0.10846114158630371 + }, + { + "epoch": 2.754364013671875e-05, + "model_forward_time": 0.025236129760742188, + "step": 18051 + }, + { + "epoch": 2.754364013671875e-05, + "step": 18051, + "training_step_time": 0.10599970817565918 + }, + { + "epoch": 2.7545166015625e-05, + "model_forward_time": 0.02538013458251953, + "step": 18052 + }, + { + "epoch": 2.7545166015625e-05, + "step": 18052, + "training_step_time": 0.10949397087097168 + }, + { + "epoch": 2.754669189453125e-05, + "model_forward_time": 0.02544379234313965, + "step": 18053 + }, + { + "epoch": 2.754669189453125e-05, + "step": 18053, + "training_step_time": 0.10610055923461914 + }, + { + "epoch": 2.75482177734375e-05, + "model_forward_time": 0.025038480758666992, + "step": 18054 + }, + { + "epoch": 2.75482177734375e-05, + "step": 18054, + "training_step_time": 0.10511040687561035 + }, + { + "epoch": 2.754974365234375e-05, + "model_forward_time": 0.025271892547607422, + "step": 18055 + }, + { + "epoch": 2.754974365234375e-05, + "step": 18055, + "training_step_time": 0.10669755935668945 + }, + { + "epoch": 2.755126953125e-05, + "model_forward_time": 0.025285959243774414, + "step": 18056 + }, + { + "epoch": 2.755126953125e-05, + "step": 18056, + "training_step_time": 0.1104896068572998 + }, + { + "epoch": 2.755279541015625e-05, + "model_forward_time": 0.025473594665527344, + "step": 18057 + }, + { + "epoch": 2.755279541015625e-05, + "step": 18057, + "training_step_time": 0.10799503326416016 + }, + { + "epoch": 2.75543212890625e-05, + "model_forward_time": 0.025156497955322266, + "step": 18058 + }, + { + "epoch": 2.75543212890625e-05, + "step": 18058, + "training_step_time": 0.10685944557189941 + }, + { + "epoch": 2.755584716796875e-05, + "model_forward_time": 0.02577948570251465, + "step": 18059 + }, + { + "epoch": 2.755584716796875e-05, + "step": 18059, + "training_step_time": 0.10591387748718262 + }, + { + "epoch": 2.7557373046875e-05, + "grad_norm": 0.4659165143966675, + "learning_rate": 3.74054216221926e-05, + "loss": 0.0102, + "step": 18060 + }, + { + "epoch": 2.7557373046875e-05, + "model_forward_time": 0.025429964065551758, + "step": 18060 + }, + { + "epoch": 2.7557373046875e-05, + "step": 18060, + "training_step_time": 0.10542178153991699 + }, + { + "epoch": 2.755889892578125e-05, + "model_forward_time": 0.02537059783935547, + "step": 18061 + }, + { + "epoch": 2.755889892578125e-05, + "step": 18061, + "training_step_time": 0.10715532302856445 + }, + { + "epoch": 2.75604248046875e-05, + "model_forward_time": 0.02581000328063965, + "step": 18062 + }, + { + "epoch": 2.75604248046875e-05, + "step": 18062, + "training_step_time": 0.10545921325683594 + }, + { + "epoch": 2.756195068359375e-05, + "model_forward_time": 0.025296926498413086, + "step": 18063 + }, + { + "epoch": 2.756195068359375e-05, + "step": 18063, + "training_step_time": 0.15051770210266113 + }, + { + "epoch": 2.75634765625e-05, + "model_forward_time": 0.0249478816986084, + "step": 18064 + }, + { + "epoch": 2.75634765625e-05, + "step": 18064, + "training_step_time": 0.10940718650817871 + }, + { + "epoch": 2.756500244140625e-05, + "model_forward_time": 0.027528047561645508, + "step": 18065 + }, + { + "epoch": 2.756500244140625e-05, + "step": 18065, + "training_step_time": 0.13128972053527832 + }, + { + "epoch": 2.75665283203125e-05, + "model_forward_time": 0.025417089462280273, + "step": 18066 + }, + { + "epoch": 2.75665283203125e-05, + "step": 18066, + "training_step_time": 0.16022133827209473 + }, + { + "epoch": 2.756805419921875e-05, + "model_forward_time": 0.024984121322631836, + "step": 18067 + }, + { + "epoch": 2.756805419921875e-05, + "step": 18067, + "training_step_time": 0.10365819931030273 + }, + { + "epoch": 2.7569580078125e-05, + "model_forward_time": 0.024680376052856445, + "step": 18068 + }, + { + "epoch": 2.7569580078125e-05, + "step": 18068, + "training_step_time": 0.14702725410461426 + }, + { + "epoch": 2.757110595703125e-05, + "model_forward_time": 0.02456974983215332, + "step": 18069 + }, + { + "epoch": 2.757110595703125e-05, + "step": 18069, + "training_step_time": 0.1059417724609375 + }, + { + "epoch": 2.75726318359375e-05, + "grad_norm": 0.21708019077777863, + "learning_rate": 3.7352090803104765e-05, + "loss": 0.013, + "step": 18070 + }, + { + "epoch": 2.75726318359375e-05, + "model_forward_time": 0.025348186492919922, + "step": 18070 + }, + { + "epoch": 2.75726318359375e-05, + "step": 18070, + "training_step_time": 0.1965010166168213 + }, + { + "epoch": 2.757415771484375e-05, + "model_forward_time": 0.02434229850769043, + "step": 18071 + }, + { + "epoch": 2.757415771484375e-05, + "step": 18071, + "training_step_time": 0.13995099067687988 + }, + { + "epoch": 2.757568359375e-05, + "model_forward_time": 0.024852752685546875, + "step": 18072 + }, + { + "epoch": 2.757568359375e-05, + "step": 18072, + "training_step_time": 0.1960604190826416 + }, + { + "epoch": 2.757720947265625e-05, + "model_forward_time": 0.024295806884765625, + "step": 18073 + }, + { + "epoch": 2.757720947265625e-05, + "step": 18073, + "training_step_time": 0.10580563545227051 + }, + { + "epoch": 2.75787353515625e-05, + "model_forward_time": 0.02414679527282715, + "step": 18074 + }, + { + "epoch": 2.75787353515625e-05, + "step": 18074, + "training_step_time": 0.10179758071899414 + }, + { + "epoch": 2.758026123046875e-05, + "model_forward_time": 0.02497386932373047, + "step": 18075 + }, + { + "epoch": 2.758026123046875e-05, + "step": 18075, + "training_step_time": 0.12715911865234375 + }, + { + "epoch": 2.7581787109375e-05, + "model_forward_time": 0.025264978408813477, + "step": 18076 + }, + { + "epoch": 2.7581787109375e-05, + "step": 18076, + "training_step_time": 0.15166163444519043 + }, + { + "epoch": 2.758331298828125e-05, + "model_forward_time": 0.024080514907836914, + "step": 18077 + }, + { + "epoch": 2.758331298828125e-05, + "step": 18077, + "training_step_time": 0.2084980010986328 + }, + { + "epoch": 2.75848388671875e-05, + "model_forward_time": 0.023659229278564453, + "step": 18078 + }, + { + "epoch": 2.75848388671875e-05, + "step": 18078, + "training_step_time": 0.1961688995361328 + }, + { + "epoch": 2.758636474609375e-05, + "model_forward_time": 0.02367377281188965, + "step": 18079 + }, + { + "epoch": 2.758636474609375e-05, + "step": 18079, + "training_step_time": 0.18773531913757324 + }, + { + "epoch": 2.7587890625e-05, + "grad_norm": 0.2568994462490082, + "learning_rate": 3.7298775352419206e-05, + "loss": 0.0082, + "step": 18080 + }, + { + "epoch": 2.7587890625e-05, + "model_forward_time": 0.023906469345092773, + "step": 18080 + }, + { + "epoch": 2.7587890625e-05, + "step": 18080, + "training_step_time": 0.17955398559570312 + }, + { + "epoch": 2.758941650390625e-05, + "model_forward_time": 0.02474188804626465, + "step": 18081 + }, + { + "epoch": 2.758941650390625e-05, + "step": 18081, + "training_step_time": 0.1676037311553955 + }, + { + "epoch": 2.75909423828125e-05, + "model_forward_time": 0.024628639221191406, + "step": 18082 + }, + { + "epoch": 2.75909423828125e-05, + "step": 18082, + "training_step_time": 0.15869379043579102 + }, + { + "epoch": 2.759246826171875e-05, + "model_forward_time": 0.023293018341064453, + "step": 18083 + }, + { + "epoch": 2.759246826171875e-05, + "step": 18083, + "training_step_time": 0.19436335563659668 + }, + { + "epoch": 2.7593994140625e-05, + "model_forward_time": 0.02470088005065918, + "step": 18084 + }, + { + "epoch": 2.7593994140625e-05, + "step": 18084, + "training_step_time": 0.1279900074005127 + }, + { + "epoch": 2.759552001953125e-05, + "model_forward_time": 0.024484634399414062, + "step": 18085 + }, + { + "epoch": 2.759552001953125e-05, + "step": 18085, + "training_step_time": 0.12510132789611816 + }, + { + "epoch": 2.75970458984375e-05, + "model_forward_time": 0.025023698806762695, + "step": 18086 + }, + { + "epoch": 2.75970458984375e-05, + "step": 18086, + "training_step_time": 0.12277531623840332 + }, + { + "epoch": 2.759857177734375e-05, + "model_forward_time": 0.0250091552734375, + "step": 18087 + }, + { + "epoch": 2.759857177734375e-05, + "step": 18087, + "training_step_time": 0.1942758560180664 + }, + { + "epoch": 2.760009765625e-05, + "model_forward_time": 0.024823427200317383, + "step": 18088 + }, + { + "epoch": 2.760009765625e-05, + "step": 18088, + "training_step_time": 0.12148809432983398 + }, + { + "epoch": 2.760162353515625e-05, + "model_forward_time": 0.027415990829467773, + "step": 18089 + }, + { + "epoch": 2.760162353515625e-05, + "step": 18089, + "training_step_time": 0.11230754852294922 + }, + { + "epoch": 2.76031494140625e-05, + "grad_norm": 0.19425970315933228, + "learning_rate": 3.7245475334919246e-05, + "loss": 0.011, + "step": 18090 + }, + { + "epoch": 2.76031494140625e-05, + "model_forward_time": 0.02588486671447754, + "step": 18090 + }, + { + "epoch": 2.76031494140625e-05, + "step": 18090, + "training_step_time": 0.10913205146789551 + }, + { + "epoch": 2.760467529296875e-05, + "model_forward_time": 0.025197267532348633, + "step": 18091 + }, + { + "epoch": 2.760467529296875e-05, + "step": 18091, + "training_step_time": 0.10808968544006348 + }, + { + "epoch": 2.7606201171875e-05, + "model_forward_time": 0.025452613830566406, + "step": 18092 + }, + { + "epoch": 2.7606201171875e-05, + "step": 18092, + "training_step_time": 0.1070561408996582 + }, + { + "epoch": 2.760772705078125e-05, + "model_forward_time": 0.025223970413208008, + "step": 18093 + }, + { + "epoch": 2.760772705078125e-05, + "step": 18093, + "training_step_time": 0.10733151435852051 + }, + { + "epoch": 2.76092529296875e-05, + "model_forward_time": 0.02486133575439453, + "step": 18094 + }, + { + "epoch": 2.76092529296875e-05, + "step": 18094, + "training_step_time": 0.10876965522766113 + }, + { + "epoch": 2.761077880859375e-05, + "model_forward_time": 0.024648666381835938, + "step": 18095 + }, + { + "epoch": 2.761077880859375e-05, + "step": 18095, + "training_step_time": 0.11098361015319824 + }, + { + "epoch": 2.76123046875e-05, + "model_forward_time": 0.024187088012695312, + "step": 18096 + }, + { + "epoch": 2.76123046875e-05, + "step": 18096, + "training_step_time": 0.10561871528625488 + }, + { + "epoch": 2.761383056640625e-05, + "model_forward_time": 0.025218963623046875, + "step": 18097 + }, + { + "epoch": 2.761383056640625e-05, + "step": 18097, + "training_step_time": 0.11185169219970703 + }, + { + "epoch": 2.76153564453125e-05, + "model_forward_time": 0.02584671974182129, + "step": 18098 + }, + { + "epoch": 2.76153564453125e-05, + "step": 18098, + "training_step_time": 0.1062629222869873 + }, + { + "epoch": 2.761688232421875e-05, + "model_forward_time": 0.025019407272338867, + "step": 18099 + }, + { + "epoch": 2.761688232421875e-05, + "step": 18099, + "training_step_time": 0.10460329055786133 + }, + { + "epoch": 2.7618408203125e-05, + "grad_norm": 0.2011028677225113, + "learning_rate": 3.719219081536942e-05, + "loss": 0.0172, + "step": 18100 + }, + { + "epoch": 2.7618408203125e-05, + "model_forward_time": 0.028470277786254883, + "step": 18100 + }, + { + "epoch": 2.7618408203125e-05, + "step": 18100, + "training_step_time": 0.10840082168579102 + }, + { + "epoch": 2.761993408203125e-05, + "model_forward_time": 0.02505207061767578, + "step": 18101 + }, + { + "epoch": 2.761993408203125e-05, + "step": 18101, + "training_step_time": 0.10689377784729004 + }, + { + "epoch": 2.76214599609375e-05, + "model_forward_time": 0.025411367416381836, + "step": 18102 + }, + { + "epoch": 2.76214599609375e-05, + "step": 18102, + "training_step_time": 0.10832095146179199 + }, + { + "epoch": 2.762298583984375e-05, + "model_forward_time": 0.02538132667541504, + "step": 18103 + }, + { + "epoch": 2.762298583984375e-05, + "step": 18103, + "training_step_time": 0.10732579231262207 + }, + { + "epoch": 2.762451171875e-05, + "model_forward_time": 0.025362491607666016, + "step": 18104 + }, + { + "epoch": 2.762451171875e-05, + "step": 18104, + "training_step_time": 0.17307043075561523 + }, + { + "epoch": 2.762603759765625e-05, + "model_forward_time": 0.0247347354888916, + "step": 18105 + }, + { + "epoch": 2.762603759765625e-05, + "step": 18105, + "training_step_time": 0.11890149116516113 + }, + { + "epoch": 2.76275634765625e-05, + "model_forward_time": 0.02479076385498047, + "step": 18106 + }, + { + "epoch": 2.76275634765625e-05, + "step": 18106, + "training_step_time": 0.1668558120727539 + }, + { + "epoch": 2.762908935546875e-05, + "model_forward_time": 0.02463555335998535, + "step": 18107 + }, + { + "epoch": 2.762908935546875e-05, + "step": 18107, + "training_step_time": 0.14542913436889648 + }, + { + "epoch": 2.7630615234375e-05, + "model_forward_time": 0.02484893798828125, + "step": 18108 + }, + { + "epoch": 2.7630615234375e-05, + "step": 18108, + "training_step_time": 0.12649965286254883 + }, + { + "epoch": 2.763214111328125e-05, + "model_forward_time": 0.025589466094970703, + "step": 18109 + }, + { + "epoch": 2.763214111328125e-05, + "step": 18109, + "training_step_time": 0.10438394546508789 + }, + { + "epoch": 2.76336669921875e-05, + "grad_norm": 0.4140009582042694, + "learning_rate": 3.713892185851548e-05, + "loss": 0.0087, + "step": 18110 + }, + { + "epoch": 2.76336669921875e-05, + "model_forward_time": 0.02533268928527832, + "step": 18110 + }, + { + "epoch": 2.76336669921875e-05, + "step": 18110, + "training_step_time": 0.14310765266418457 + }, + { + "epoch": 2.763519287109375e-05, + "model_forward_time": 0.025089263916015625, + "step": 18111 + }, + { + "epoch": 2.763519287109375e-05, + "step": 18111, + "training_step_time": 0.14928841590881348 + }, + { + "epoch": 2.763671875e-05, + "model_forward_time": 0.025203227996826172, + "step": 18112 + }, + { + "epoch": 2.763671875e-05, + "step": 18112, + "training_step_time": 0.1381986141204834 + }, + { + "epoch": 2.763824462890625e-05, + "model_forward_time": 0.02474689483642578, + "step": 18113 + }, + { + "epoch": 2.763824462890625e-05, + "step": 18113, + "training_step_time": 0.1374964714050293 + }, + { + "epoch": 2.76397705078125e-05, + "model_forward_time": 0.024968862533569336, + "step": 18114 + }, + { + "epoch": 2.76397705078125e-05, + "step": 18114, + "training_step_time": 0.20070242881774902 + }, + { + "epoch": 2.764129638671875e-05, + "model_forward_time": 0.024248838424682617, + "step": 18115 + }, + { + "epoch": 2.764129638671875e-05, + "step": 18115, + "training_step_time": 0.14917302131652832 + }, + { + "epoch": 2.7642822265625e-05, + "model_forward_time": 0.024539709091186523, + "step": 18116 + }, + { + "epoch": 2.7642822265625e-05, + "step": 18116, + "training_step_time": 0.13200664520263672 + }, + { + "epoch": 2.764434814453125e-05, + "model_forward_time": 0.02427816390991211, + "step": 18117 + }, + { + "epoch": 2.764434814453125e-05, + "step": 18117, + "training_step_time": 0.13616585731506348 + }, + { + "epoch": 2.76458740234375e-05, + "model_forward_time": 0.027407169342041016, + "step": 18118 + }, + { + "epoch": 2.76458740234375e-05, + "step": 18118, + "training_step_time": 0.1273195743560791 + }, + { + "epoch": 2.764739990234375e-05, + "model_forward_time": 0.025042057037353516, + "step": 18119 + }, + { + "epoch": 2.764739990234375e-05, + "step": 18119, + "training_step_time": 0.11828994750976562 + }, + { + "epoch": 2.764892578125e-05, + "grad_norm": 0.5197622776031494, + "learning_rate": 3.7085668529084184e-05, + "loss": 0.0103, + "step": 18120 + }, + { + "epoch": 2.764892578125e-05, + "model_forward_time": 0.025221586227416992, + "step": 18120 + }, + { + "epoch": 2.764892578125e-05, + "step": 18120, + "training_step_time": 0.1313326358795166 + }, + { + "epoch": 2.765045166015625e-05, + "model_forward_time": 0.02507805824279785, + "step": 18121 + }, + { + "epoch": 2.765045166015625e-05, + "step": 18121, + "training_step_time": 0.10732698440551758 + }, + { + "epoch": 2.76519775390625e-05, + "model_forward_time": 0.025742769241333008, + "step": 18122 + }, + { + "epoch": 2.76519775390625e-05, + "step": 18122, + "training_step_time": 0.10694074630737305 + }, + { + "epoch": 2.765350341796875e-05, + "model_forward_time": 0.02510380744934082, + "step": 18123 + }, + { + "epoch": 2.765350341796875e-05, + "step": 18123, + "training_step_time": 0.10821866989135742 + }, + { + "epoch": 2.7655029296875e-05, + "model_forward_time": 0.025131940841674805, + "step": 18124 + }, + { + "epoch": 2.7655029296875e-05, + "step": 18124, + "training_step_time": 0.10664939880371094 + }, + { + "epoch": 2.765655517578125e-05, + "model_forward_time": 0.02499532699584961, + "step": 18125 + }, + { + "epoch": 2.765655517578125e-05, + "step": 18125, + "training_step_time": 0.10856246948242188 + }, + { + "epoch": 2.76580810546875e-05, + "model_forward_time": 0.02704596519470215, + "step": 18126 + }, + { + "epoch": 2.76580810546875e-05, + "step": 18126, + "training_step_time": 0.11043643951416016 + }, + { + "epoch": 2.765960693359375e-05, + "model_forward_time": 0.025505781173706055, + "step": 18127 + }, + { + "epoch": 2.765960693359375e-05, + "step": 18127, + "training_step_time": 0.1676006317138672 + }, + { + "epoch": 2.76611328125e-05, + "model_forward_time": 0.025732040405273438, + "step": 18128 + }, + { + "epoch": 2.76611328125e-05, + "step": 18128, + "training_step_time": 0.1321582794189453 + }, + { + "epoch": 2.766265869140625e-05, + "model_forward_time": 0.024463891983032227, + "step": 18129 + }, + { + "epoch": 2.766265869140625e-05, + "step": 18129, + "training_step_time": 0.1138925552368164 + }, + { + "epoch": 2.76641845703125e-05, + "grad_norm": 0.3108060359954834, + "learning_rate": 3.703243089178337e-05, + "loss": 0.013, + "step": 18130 + }, + { + "epoch": 2.76641845703125e-05, + "model_forward_time": 0.02553868293762207, + "step": 18130 + }, + { + "epoch": 2.76641845703125e-05, + "step": 18130, + "training_step_time": 0.10431957244873047 + }, + { + "epoch": 2.766571044921875e-05, + "model_forward_time": 0.025703907012939453, + "step": 18131 + }, + { + "epoch": 2.766571044921875e-05, + "step": 18131, + "training_step_time": 0.11706137657165527 + }, + { + "epoch": 2.7667236328125e-05, + "model_forward_time": 0.02522444725036621, + "step": 18132 + }, + { + "epoch": 2.7667236328125e-05, + "step": 18132, + "training_step_time": 0.1340196132659912 + }, + { + "epoch": 2.766876220703125e-05, + "model_forward_time": 0.024974346160888672, + "step": 18133 + }, + { + "epoch": 2.766876220703125e-05, + "step": 18133, + "training_step_time": 0.19586873054504395 + }, + { + "epoch": 2.76702880859375e-05, + "model_forward_time": 0.024336814880371094, + "step": 18134 + }, + { + "epoch": 2.76702880859375e-05, + "step": 18134, + "training_step_time": 0.153656005859375 + }, + { + "epoch": 2.767181396484375e-05, + "model_forward_time": 0.024678707122802734, + "step": 18135 + }, + { + "epoch": 2.767181396484375e-05, + "step": 18135, + "training_step_time": 0.1350870132446289 + }, + { + "epoch": 2.767333984375e-05, + "model_forward_time": 0.024815082550048828, + "step": 18136 + }, + { + "epoch": 2.767333984375e-05, + "step": 18136, + "training_step_time": 0.12999582290649414 + }, + { + "epoch": 2.767486572265625e-05, + "model_forward_time": 0.025023937225341797, + "step": 18137 + }, + { + "epoch": 2.767486572265625e-05, + "step": 18137, + "training_step_time": 0.12358975410461426 + }, + { + "epoch": 2.76763916015625e-05, + "model_forward_time": 0.02514791488647461, + "step": 18138 + }, + { + "epoch": 2.76763916015625e-05, + "step": 18138, + "training_step_time": 0.12296390533447266 + }, + { + "epoch": 2.767791748046875e-05, + "model_forward_time": 0.02565932273864746, + "step": 18139 + }, + { + "epoch": 2.767791748046875e-05, + "step": 18139, + "training_step_time": 0.11563706398010254 + }, + { + "epoch": 2.7679443359375e-05, + "grad_norm": 0.18115819990634918, + "learning_rate": 3.697920901130178e-05, + "loss": 0.007, + "step": 18140 + }, + { + "epoch": 2.7679443359375e-05, + "model_forward_time": 0.027875900268554688, + "step": 18140 + }, + { + "epoch": 2.7679443359375e-05, + "step": 18140, + "training_step_time": 0.11524224281311035 + }, + { + "epoch": 2.768096923828125e-05, + "model_forward_time": 0.025517940521240234, + "step": 18141 + }, + { + "epoch": 2.768096923828125e-05, + "step": 18141, + "training_step_time": 0.11596441268920898 + }, + { + "epoch": 2.76824951171875e-05, + "model_forward_time": 0.025122880935668945, + "step": 18142 + }, + { + "epoch": 2.76824951171875e-05, + "step": 18142, + "training_step_time": 0.11036348342895508 + }, + { + "epoch": 2.768402099609375e-05, + "model_forward_time": 0.02572941780090332, + "step": 18143 + }, + { + "epoch": 2.768402099609375e-05, + "step": 18143, + "training_step_time": 0.10387086868286133 + }, + { + "epoch": 2.7685546875e-05, + "model_forward_time": 0.02544093132019043, + "step": 18144 + }, + { + "epoch": 2.7685546875e-05, + "step": 18144, + "training_step_time": 0.10453033447265625 + }, + { + "epoch": 2.768707275390625e-05, + "model_forward_time": 0.02531719207763672, + "step": 18145 + }, + { + "epoch": 2.768707275390625e-05, + "step": 18145, + "training_step_time": 0.10442113876342773 + }, + { + "epoch": 2.76885986328125e-05, + "model_forward_time": 0.02498149871826172, + "step": 18146 + }, + { + "epoch": 2.76885986328125e-05, + "step": 18146, + "training_step_time": 0.10443997383117676 + }, + { + "epoch": 2.769012451171875e-05, + "model_forward_time": 0.025177001953125, + "step": 18147 + }, + { + "epoch": 2.769012451171875e-05, + "step": 18147, + "training_step_time": 0.10314130783081055 + }, + { + "epoch": 2.7691650390625e-05, + "model_forward_time": 0.024719715118408203, + "step": 18148 + }, + { + "epoch": 2.7691650390625e-05, + "step": 18148, + "training_step_time": 0.17686700820922852 + }, + { + "epoch": 2.769317626953125e-05, + "model_forward_time": 0.024550676345825195, + "step": 18149 + }, + { + "epoch": 2.769317626953125e-05, + "step": 18149, + "training_step_time": 0.12951898574829102 + }, + { + "epoch": 2.76947021484375e-05, + "grad_norm": 0.4410998821258545, + "learning_rate": 3.6926002952309016e-05, + "loss": 0.0094, + "step": 18150 + }, + { + "epoch": 2.76947021484375e-05, + "model_forward_time": 0.024393558502197266, + "step": 18150 + }, + { + "epoch": 2.76947021484375e-05, + "step": 18150, + "training_step_time": 0.1180570125579834 + }, + { + "epoch": 2.769622802734375e-05, + "model_forward_time": 0.02485823631286621, + "step": 18151 + }, + { + "epoch": 2.769622802734375e-05, + "step": 18151, + "training_step_time": 0.11525511741638184 + }, + { + "epoch": 2.769775390625e-05, + "model_forward_time": 0.025414228439331055, + "step": 18152 + }, + { + "epoch": 2.769775390625e-05, + "step": 18152, + "training_step_time": 0.18957757949829102 + }, + { + "epoch": 2.769927978515625e-05, + "model_forward_time": 0.025480031967163086, + "step": 18153 + }, + { + "epoch": 2.769927978515625e-05, + "step": 18153, + "training_step_time": 0.10802483558654785 + }, + { + "epoch": 2.77008056640625e-05, + "model_forward_time": 0.024160385131835938, + "step": 18154 + }, + { + "epoch": 2.77008056640625e-05, + "step": 18154, + "training_step_time": 0.14633464813232422 + }, + { + "epoch": 2.770233154296875e-05, + "model_forward_time": 0.025056838989257812, + "step": 18155 + }, + { + "epoch": 2.770233154296875e-05, + "step": 18155, + "training_step_time": 0.1696178913116455 + }, + { + "epoch": 2.7703857421875e-05, + "model_forward_time": 0.02483367919921875, + "step": 18156 + }, + { + "epoch": 2.7703857421875e-05, + "step": 18156, + "training_step_time": 0.11711668968200684 + }, + { + "epoch": 2.770538330078125e-05, + "model_forward_time": 0.024790048599243164, + "step": 18157 + }, + { + "epoch": 2.770538330078125e-05, + "step": 18157, + "training_step_time": 0.11730551719665527 + }, + { + "epoch": 2.77069091796875e-05, + "model_forward_time": 0.02529311180114746, + "step": 18158 + }, + { + "epoch": 2.77069091796875e-05, + "step": 18158, + "training_step_time": 0.12294721603393555 + }, + { + "epoch": 2.770843505859375e-05, + "model_forward_time": 0.02542591094970703, + "step": 18159 + }, + { + "epoch": 2.770843505859375e-05, + "step": 18159, + "training_step_time": 0.1450653076171875 + }, + { + "epoch": 2.77099609375e-05, + "grad_norm": 0.18899288773536682, + "learning_rate": 3.687281277945547e-05, + "loss": 0.0105, + "step": 18160 + }, + { + "epoch": 2.77099609375e-05, + "model_forward_time": 0.02483510971069336, + "step": 18160 + }, + { + "epoch": 2.77099609375e-05, + "step": 18160, + "training_step_time": 0.11096620559692383 + }, + { + "epoch": 2.771148681640625e-05, + "model_forward_time": 0.025243520736694336, + "step": 18161 + }, + { + "epoch": 2.771148681640625e-05, + "step": 18161, + "training_step_time": 0.10970807075500488 + }, + { + "epoch": 2.77130126953125e-05, + "model_forward_time": 0.024886608123779297, + "step": 18162 + }, + { + "epoch": 2.77130126953125e-05, + "step": 18162, + "training_step_time": 0.12086653709411621 + }, + { + "epoch": 2.771453857421875e-05, + "model_forward_time": 0.025333166122436523, + "step": 18163 + }, + { + "epoch": 2.771453857421875e-05, + "step": 18163, + "training_step_time": 0.125319242477417 + }, + { + "epoch": 2.7716064453125e-05, + "model_forward_time": 0.02520155906677246, + "step": 18164 + }, + { + "epoch": 2.7716064453125e-05, + "step": 18164, + "training_step_time": 0.11907243728637695 + }, + { + "epoch": 2.771759033203125e-05, + "model_forward_time": 0.025361061096191406, + "step": 18165 + }, + { + "epoch": 2.771759033203125e-05, + "step": 18165, + "training_step_time": 0.12551188468933105 + }, + { + "epoch": 2.77191162109375e-05, + "model_forward_time": 0.025522947311401367, + "step": 18166 + }, + { + "epoch": 2.77191162109375e-05, + "step": 18166, + "training_step_time": 0.11586213111877441 + }, + { + "epoch": 2.772064208984375e-05, + "model_forward_time": 0.027037382125854492, + "step": 18167 + }, + { + "epoch": 2.772064208984375e-05, + "step": 18167, + "training_step_time": 0.13370609283447266 + }, + { + "epoch": 2.772216796875e-05, + "model_forward_time": 0.024961233139038086, + "step": 18168 + }, + { + "epoch": 2.772216796875e-05, + "step": 18168, + "training_step_time": 0.10550260543823242 + }, + { + "epoch": 2.772369384765625e-05, + "model_forward_time": 0.025358200073242188, + "step": 18169 + }, + { + "epoch": 2.772369384765625e-05, + "step": 18169, + "training_step_time": 0.10926318168640137 + }, + { + "epoch": 2.77252197265625e-05, + "grad_norm": 0.39983922243118286, + "learning_rate": 3.68196385573722e-05, + "loss": 0.0121, + "step": 18170 + }, + { + "epoch": 2.77252197265625e-05, + "model_forward_time": 0.02585315704345703, + "step": 18170 + }, + { + "epoch": 2.77252197265625e-05, + "step": 18170, + "training_step_time": 0.10893988609313965 + }, + { + "epoch": 2.772674560546875e-05, + "model_forward_time": 0.02562117576599121, + "step": 18171 + }, + { + "epoch": 2.772674560546875e-05, + "step": 18171, + "training_step_time": 0.10671329498291016 + }, + { + "epoch": 2.7728271484375e-05, + "model_forward_time": 0.025365114212036133, + "step": 18172 + }, + { + "epoch": 2.7728271484375e-05, + "step": 18172, + "training_step_time": 0.1687023639678955 + }, + { + "epoch": 2.772979736328125e-05, + "model_forward_time": 0.02462172508239746, + "step": 18173 + }, + { + "epoch": 2.772979736328125e-05, + "step": 18173, + "training_step_time": 0.14101552963256836 + }, + { + "epoch": 2.77313232421875e-05, + "model_forward_time": 0.024817466735839844, + "step": 18174 + }, + { + "epoch": 2.77313232421875e-05, + "step": 18174, + "training_step_time": 0.11067318916320801 + }, + { + "epoch": 2.773284912109375e-05, + "model_forward_time": 0.025244951248168945, + "step": 18175 + }, + { + "epoch": 2.773284912109375e-05, + "step": 18175, + "training_step_time": 0.10966300964355469 + }, + { + "epoch": 2.7734375e-05, + "model_forward_time": 0.026389360427856445, + "step": 18176 + }, + { + "epoch": 2.7734375e-05, + "step": 18176, + "training_step_time": 0.10791897773742676 + }, + { + "epoch": 2.773590087890625e-05, + "model_forward_time": 0.025504589080810547, + "step": 18177 + }, + { + "epoch": 2.773590087890625e-05, + "step": 18177, + "training_step_time": 0.1130526065826416 + }, + { + "epoch": 2.77374267578125e-05, + "model_forward_time": 0.026491880416870117, + "step": 18178 + }, + { + "epoch": 2.77374267578125e-05, + "step": 18178, + "training_step_time": 0.12763357162475586 + }, + { + "epoch": 2.773895263671875e-05, + "model_forward_time": 0.02591109275817871, + "step": 18179 + }, + { + "epoch": 2.773895263671875e-05, + "step": 18179, + "training_step_time": 0.13400602340698242 + }, + { + "epoch": 2.7740478515625e-05, + "grad_norm": 0.19778363406658173, + "learning_rate": 3.676648035067093e-05, + "loss": 0.0074, + "step": 18180 + }, + { + "epoch": 2.7740478515625e-05, + "model_forward_time": 0.02493882179260254, + "step": 18180 + }, + { + "epoch": 2.7740478515625e-05, + "step": 18180, + "training_step_time": 0.10508608818054199 + }, + { + "epoch": 2.774200439453125e-05, + "model_forward_time": 0.02560257911682129, + "step": 18181 + }, + { + "epoch": 2.774200439453125e-05, + "step": 18181, + "training_step_time": 0.1059730052947998 + }, + { + "epoch": 2.77435302734375e-05, + "model_forward_time": 0.025562047958374023, + "step": 18182 + }, + { + "epoch": 2.77435302734375e-05, + "step": 18182, + "training_step_time": 0.10698080062866211 + }, + { + "epoch": 2.774505615234375e-05, + "model_forward_time": 0.025084972381591797, + "step": 18183 + }, + { + "epoch": 2.774505615234375e-05, + "step": 18183, + "training_step_time": 0.10579085350036621 + }, + { + "epoch": 2.774658203125e-05, + "model_forward_time": 0.025585174560546875, + "step": 18184 + }, + { + "epoch": 2.774658203125e-05, + "step": 18184, + "training_step_time": 0.10533595085144043 + }, + { + "epoch": 2.774810791015625e-05, + "model_forward_time": 0.026225566864013672, + "step": 18185 + }, + { + "epoch": 2.774810791015625e-05, + "step": 18185, + "training_step_time": 0.10784554481506348 + }, + { + "epoch": 2.77496337890625e-05, + "model_forward_time": 0.02575397491455078, + "step": 18186 + }, + { + "epoch": 2.77496337890625e-05, + "step": 18186, + "training_step_time": 0.10736346244812012 + }, + { + "epoch": 2.775115966796875e-05, + "model_forward_time": 0.025572776794433594, + "step": 18187 + }, + { + "epoch": 2.775115966796875e-05, + "step": 18187, + "training_step_time": 0.10652446746826172 + }, + { + "epoch": 2.7752685546875e-05, + "model_forward_time": 0.025231122970581055, + "step": 18188 + }, + { + "epoch": 2.7752685546875e-05, + "step": 18188, + "training_step_time": 0.10638213157653809 + }, + { + "epoch": 2.775421142578125e-05, + "model_forward_time": 0.025702953338623047, + "step": 18189 + }, + { + "epoch": 2.775421142578125e-05, + "step": 18189, + "training_step_time": 0.10791540145874023 + }, + { + "epoch": 2.77557373046875e-05, + "grad_norm": 0.18023350834846497, + "learning_rate": 3.6713338223943867e-05, + "loss": 0.0112, + "step": 18190 + }, + { + "epoch": 2.77557373046875e-05, + "model_forward_time": 0.025295495986938477, + "step": 18190 + }, + { + "epoch": 2.77557373046875e-05, + "step": 18190, + "training_step_time": 0.10558056831359863 + }, + { + "epoch": 2.775726318359375e-05, + "model_forward_time": 0.025256872177124023, + "step": 18191 + }, + { + "epoch": 2.775726318359375e-05, + "step": 18191, + "training_step_time": 0.10480761528015137 + }, + { + "epoch": 2.77587890625e-05, + "model_forward_time": 0.02544093132019043, + "step": 18192 + }, + { + "epoch": 2.77587890625e-05, + "step": 18192, + "training_step_time": 0.10573649406433105 + }, + { + "epoch": 2.776031494140625e-05, + "model_forward_time": 0.025029420852661133, + "step": 18193 + }, + { + "epoch": 2.776031494140625e-05, + "step": 18193, + "training_step_time": 0.10547256469726562 + }, + { + "epoch": 2.77618408203125e-05, + "model_forward_time": 0.025355815887451172, + "step": 18194 + }, + { + "epoch": 2.77618408203125e-05, + "step": 18194, + "training_step_time": 0.11199140548706055 + }, + { + "epoch": 2.776336669921875e-05, + "model_forward_time": 0.025072336196899414, + "step": 18195 + }, + { + "epoch": 2.776336669921875e-05, + "step": 18195, + "training_step_time": 0.17957091331481934 + }, + { + "epoch": 2.7764892578125e-05, + "model_forward_time": 0.024799585342407227, + "step": 18196 + }, + { + "epoch": 2.7764892578125e-05, + "step": 18196, + "training_step_time": 0.10981535911560059 + }, + { + "epoch": 2.776641845703125e-05, + "model_forward_time": 0.025828838348388672, + "step": 18197 + }, + { + "epoch": 2.776641845703125e-05, + "step": 18197, + "training_step_time": 0.1475992202758789 + }, + { + "epoch": 2.77679443359375e-05, + "model_forward_time": 0.025468826293945312, + "step": 18198 + }, + { + "epoch": 2.77679443359375e-05, + "step": 18198, + "training_step_time": 0.15376830101013184 + }, + { + "epoch": 2.776947021484375e-05, + "model_forward_time": 0.0243833065032959, + "step": 18199 + }, + { + "epoch": 2.776947021484375e-05, + "step": 18199, + "training_step_time": 0.22815275192260742 + }, + { + "epoch": 2.777099609375e-05, + "grad_norm": 0.1301114410161972, + "learning_rate": 3.666021224176369e-05, + "loss": 0.0066, + "step": 18200 + }, + { + "epoch": 2.777099609375e-05, + "model_forward_time": 0.025046348571777344, + "step": 18200 + }, + { + "epoch": 2.777099609375e-05, + "step": 18200, + "training_step_time": 0.16370177268981934 + }, + { + "epoch": 2.777252197265625e-05, + "model_forward_time": 0.024945497512817383, + "step": 18201 + }, + { + "epoch": 2.777252197265625e-05, + "step": 18201, + "training_step_time": 0.12728333473205566 + }, + { + "epoch": 2.77740478515625e-05, + "model_forward_time": 0.025346040725708008, + "step": 18202 + }, + { + "epoch": 2.77740478515625e-05, + "step": 18202, + "training_step_time": 0.1124577522277832 + }, + { + "epoch": 2.777557373046875e-05, + "model_forward_time": 0.028141260147094727, + "step": 18203 + }, + { + "epoch": 2.777557373046875e-05, + "step": 18203, + "training_step_time": 0.11623358726501465 + }, + { + "epoch": 2.7777099609375e-05, + "model_forward_time": 0.02539801597595215, + "step": 18204 + }, + { + "epoch": 2.7777099609375e-05, + "step": 18204, + "training_step_time": 0.1962277889251709 + }, + { + "epoch": 2.777862548828125e-05, + "model_forward_time": 0.025658845901489258, + "step": 18205 + }, + { + "epoch": 2.777862548828125e-05, + "step": 18205, + "training_step_time": 0.19392824172973633 + }, + { + "epoch": 2.77801513671875e-05, + "model_forward_time": 0.024623870849609375, + "step": 18206 + }, + { + "epoch": 2.77801513671875e-05, + "step": 18206, + "training_step_time": 0.10860705375671387 + }, + { + "epoch": 2.778167724609375e-05, + "model_forward_time": 0.025190353393554688, + "step": 18207 + }, + { + "epoch": 2.778167724609375e-05, + "step": 18207, + "training_step_time": 0.11414504051208496 + }, + { + "epoch": 2.7783203125e-05, + "model_forward_time": 0.02712726593017578, + "step": 18208 + }, + { + "epoch": 2.7783203125e-05, + "step": 18208, + "training_step_time": 0.11818647384643555 + }, + { + "epoch": 2.778472900390625e-05, + "model_forward_time": 0.025449752807617188, + "step": 18209 + }, + { + "epoch": 2.778472900390625e-05, + "step": 18209, + "training_step_time": 0.12958788871765137 + }, + { + "epoch": 2.77862548828125e-05, + "grad_norm": 0.24471884965896606, + "learning_rate": 3.6607102468683526e-05, + "loss": 0.0103, + "step": 18210 + }, + { + "epoch": 2.77862548828125e-05, + "model_forward_time": 0.025458097457885742, + "step": 18210 + }, + { + "epoch": 2.77862548828125e-05, + "step": 18210, + "training_step_time": 0.1079249382019043 + }, + { + "epoch": 2.778778076171875e-05, + "model_forward_time": 0.025824785232543945, + "step": 18211 + }, + { + "epoch": 2.778778076171875e-05, + "step": 18211, + "training_step_time": 0.11292338371276855 + }, + { + "epoch": 2.7789306640625e-05, + "model_forward_time": 0.025556564331054688, + "step": 18212 + }, + { + "epoch": 2.7789306640625e-05, + "step": 18212, + "training_step_time": 0.10806417465209961 + }, + { + "epoch": 2.779083251953125e-05, + "model_forward_time": 0.025482177734375, + "step": 18213 + }, + { + "epoch": 2.779083251953125e-05, + "step": 18213, + "training_step_time": 0.10474801063537598 + }, + { + "epoch": 2.77923583984375e-05, + "model_forward_time": 0.02543783187866211, + "step": 18214 + }, + { + "epoch": 2.77923583984375e-05, + "step": 18214, + "training_step_time": 0.1116631031036377 + }, + { + "epoch": 2.779388427734375e-05, + "model_forward_time": 0.02560591697692871, + "step": 18215 + }, + { + "epoch": 2.779388427734375e-05, + "step": 18215, + "training_step_time": 0.10955500602722168 + }, + { + "epoch": 2.779541015625e-05, + "model_forward_time": 0.02567601203918457, + "step": 18216 + }, + { + "epoch": 2.779541015625e-05, + "step": 18216, + "training_step_time": 0.10474205017089844 + }, + { + "epoch": 2.779693603515625e-05, + "model_forward_time": 0.025799036026000977, + "step": 18217 + }, + { + "epoch": 2.779693603515625e-05, + "step": 18217, + "training_step_time": 0.10496282577514648 + }, + { + "epoch": 2.77984619140625e-05, + "model_forward_time": 0.025357484817504883, + "step": 18218 + }, + { + "epoch": 2.77984619140625e-05, + "step": 18218, + "training_step_time": 0.10508894920349121 + }, + { + "epoch": 2.779998779296875e-05, + "model_forward_time": 0.02510690689086914, + "step": 18219 + }, + { + "epoch": 2.779998779296875e-05, + "step": 18219, + "training_step_time": 0.12858891487121582 + }, + { + "epoch": 2.7801513671875e-05, + "grad_norm": 0.1059555932879448, + "learning_rate": 3.655400896923672e-05, + "loss": 0.0053, + "step": 18220 + }, + { + "epoch": 2.7801513671875e-05, + "model_forward_time": 0.025163650512695312, + "step": 18220 + }, + { + "epoch": 2.7801513671875e-05, + "step": 18220, + "training_step_time": 0.13014960289001465 + }, + { + "epoch": 2.780303955078125e-05, + "model_forward_time": 0.024983644485473633, + "step": 18221 + }, + { + "epoch": 2.780303955078125e-05, + "step": 18221, + "training_step_time": 0.10358047485351562 + }, + { + "epoch": 2.78045654296875e-05, + "model_forward_time": 0.02560138702392578, + "step": 18222 + }, + { + "epoch": 2.78045654296875e-05, + "step": 18222, + "training_step_time": 0.12018156051635742 + }, + { + "epoch": 2.780609130859375e-05, + "model_forward_time": 0.026764392852783203, + "step": 18223 + }, + { + "epoch": 2.780609130859375e-05, + "step": 18223, + "training_step_time": 0.1076352596282959 + }, + { + "epoch": 2.78076171875e-05, + "model_forward_time": 0.025504589080810547, + "step": 18224 + }, + { + "epoch": 2.78076171875e-05, + "step": 18224, + "training_step_time": 0.10963940620422363 + }, + { + "epoch": 2.780914306640625e-05, + "model_forward_time": 0.025185346603393555, + "step": 18225 + }, + { + "epoch": 2.780914306640625e-05, + "step": 18225, + "training_step_time": 0.11594223976135254 + }, + { + "epoch": 2.78106689453125e-05, + "model_forward_time": 0.02525615692138672, + "step": 18226 + }, + { + "epoch": 2.78106689453125e-05, + "step": 18226, + "training_step_time": 0.11114740371704102 + }, + { + "epoch": 2.781219482421875e-05, + "model_forward_time": 0.025641202926635742, + "step": 18227 + }, + { + "epoch": 2.781219482421875e-05, + "step": 18227, + "training_step_time": 0.10615658760070801 + }, + { + "epoch": 2.7813720703125e-05, + "model_forward_time": 0.02593827247619629, + "step": 18228 + }, + { + "epoch": 2.7813720703125e-05, + "step": 18228, + "training_step_time": 0.1060175895690918 + }, + { + "epoch": 2.781524658203125e-05, + "model_forward_time": 0.02541208267211914, + "step": 18229 + }, + { + "epoch": 2.781524658203125e-05, + "step": 18229, + "training_step_time": 0.1056976318359375 + }, + { + "epoch": 2.78167724609375e-05, + "grad_norm": 0.1257658302783966, + "learning_rate": 3.650093180793689e-05, + "loss": 0.0081, + "step": 18230 + }, + { + "epoch": 2.78167724609375e-05, + "model_forward_time": 0.025366783142089844, + "step": 18230 + }, + { + "epoch": 2.78167724609375e-05, + "step": 18230, + "training_step_time": 0.10752391815185547 + }, + { + "epoch": 2.781829833984375e-05, + "model_forward_time": 0.025318145751953125, + "step": 18231 + }, + { + "epoch": 2.781829833984375e-05, + "step": 18231, + "training_step_time": 0.11149001121520996 + }, + { + "epoch": 2.781982421875e-05, + "model_forward_time": 0.02529764175415039, + "step": 18232 + }, + { + "epoch": 2.781982421875e-05, + "step": 18232, + "training_step_time": 0.10521078109741211 + }, + { + "epoch": 2.782135009765625e-05, + "model_forward_time": 0.025228500366210938, + "step": 18233 + }, + { + "epoch": 2.782135009765625e-05, + "step": 18233, + "training_step_time": 0.10685896873474121 + }, + { + "epoch": 2.78228759765625e-05, + "model_forward_time": 0.02555370330810547, + "step": 18234 + }, + { + "epoch": 2.78228759765625e-05, + "step": 18234, + "training_step_time": 0.10959029197692871 + }, + { + "epoch": 2.782440185546875e-05, + "model_forward_time": 0.025466442108154297, + "step": 18235 + }, + { + "epoch": 2.782440185546875e-05, + "step": 18235, + "training_step_time": 0.10811138153076172 + }, + { + "epoch": 2.7825927734375e-05, + "model_forward_time": 0.025349855422973633, + "step": 18236 + }, + { + "epoch": 2.7825927734375e-05, + "step": 18236, + "training_step_time": 0.10788393020629883 + }, + { + "epoch": 2.782745361328125e-05, + "model_forward_time": 0.02436208724975586, + "step": 18237 + }, + { + "epoch": 2.782745361328125e-05, + "step": 18237, + "training_step_time": 0.10637521743774414 + }, + { + "epoch": 2.78289794921875e-05, + "model_forward_time": 0.02462029457092285, + "step": 18238 + }, + { + "epoch": 2.78289794921875e-05, + "step": 18238, + "training_step_time": 0.10635614395141602 + }, + { + "epoch": 2.783050537109375e-05, + "model_forward_time": 0.024852514266967773, + "step": 18239 + }, + { + "epoch": 2.783050537109375e-05, + "step": 18239, + "training_step_time": 0.11096882820129395 + }, + { + "epoch": 2.783203125e-05, + "grad_norm": 0.3065794110298157, + "learning_rate": 3.6447871049277796e-05, + "loss": 0.0086, + "step": 18240 + }, + { + "epoch": 2.783203125e-05, + "model_forward_time": 0.025746822357177734, + "step": 18240 + }, + { + "epoch": 2.783203125e-05, + "step": 18240, + "training_step_time": 0.1088871955871582 + }, + { + "epoch": 2.783355712890625e-05, + "model_forward_time": 0.02533864974975586, + "step": 18241 + }, + { + "epoch": 2.783355712890625e-05, + "step": 18241, + "training_step_time": 0.21394085884094238 + }, + { + "epoch": 2.78350830078125e-05, + "model_forward_time": 0.024507761001586914, + "step": 18242 + }, + { + "epoch": 2.78350830078125e-05, + "step": 18242, + "training_step_time": 0.11600279808044434 + }, + { + "epoch": 2.783660888671875e-05, + "model_forward_time": 0.024621009826660156, + "step": 18243 + }, + { + "epoch": 2.783660888671875e-05, + "step": 18243, + "training_step_time": 0.11689233779907227 + }, + { + "epoch": 2.7838134765625e-05, + "model_forward_time": 0.025287866592407227, + "step": 18244 + }, + { + "epoch": 2.7838134765625e-05, + "step": 18244, + "training_step_time": 0.15595769882202148 + }, + { + "epoch": 2.783966064453125e-05, + "model_forward_time": 0.02460789680480957, + "step": 18245 + }, + { + "epoch": 2.783966064453125e-05, + "step": 18245, + "training_step_time": 0.1749579906463623 + }, + { + "epoch": 2.78411865234375e-05, + "model_forward_time": 0.028271198272705078, + "step": 18246 + }, + { + "epoch": 2.78411865234375e-05, + "step": 18246, + "training_step_time": 0.12980031967163086 + }, + { + "epoch": 2.784271240234375e-05, + "model_forward_time": 0.024895906448364258, + "step": 18247 + }, + { + "epoch": 2.784271240234375e-05, + "step": 18247, + "training_step_time": 0.20570969581604004 + }, + { + "epoch": 2.784423828125e-05, + "model_forward_time": 0.024450302124023438, + "step": 18248 + }, + { + "epoch": 2.784423828125e-05, + "step": 18248, + "training_step_time": 0.11033034324645996 + }, + { + "epoch": 2.784576416015625e-05, + "model_forward_time": 0.025096654891967773, + "step": 18249 + }, + { + "epoch": 2.784576416015625e-05, + "step": 18249, + "training_step_time": 0.11541080474853516 + }, + { + "epoch": 2.78472900390625e-05, + "grad_norm": 0.24270372092723846, + "learning_rate": 3.639482675773324e-05, + "loss": 0.0137, + "step": 18250 + }, + { + "epoch": 2.78472900390625e-05, + "model_forward_time": 0.025186538696289062, + "step": 18250 + }, + { + "epoch": 2.78472900390625e-05, + "step": 18250, + "training_step_time": 0.13156437873840332 + }, + { + "epoch": 2.784881591796875e-05, + "model_forward_time": 0.025368928909301758, + "step": 18251 + }, + { + "epoch": 2.784881591796875e-05, + "step": 18251, + "training_step_time": 0.1927814483642578 + }, + { + "epoch": 2.7850341796875e-05, + "model_forward_time": 0.024472475051879883, + "step": 18252 + }, + { + "epoch": 2.7850341796875e-05, + "step": 18252, + "training_step_time": 0.12273263931274414 + }, + { + "epoch": 2.785186767578125e-05, + "model_forward_time": 0.024443626403808594, + "step": 18253 + }, + { + "epoch": 2.785186767578125e-05, + "step": 18253, + "training_step_time": 0.10458683967590332 + }, + { + "epoch": 2.78533935546875e-05, + "model_forward_time": 0.025203943252563477, + "step": 18254 + }, + { + "epoch": 2.78533935546875e-05, + "step": 18254, + "training_step_time": 0.11017036437988281 + }, + { + "epoch": 2.785491943359375e-05, + "model_forward_time": 0.025699138641357422, + "step": 18255 + }, + { + "epoch": 2.785491943359375e-05, + "step": 18255, + "training_step_time": 0.12563204765319824 + }, + { + "epoch": 2.78564453125e-05, + "model_forward_time": 0.02537393569946289, + "step": 18256 + }, + { + "epoch": 2.78564453125e-05, + "step": 18256, + "training_step_time": 0.11905717849731445 + }, + { + "epoch": 2.785797119140625e-05, + "model_forward_time": 0.025377988815307617, + "step": 18257 + }, + { + "epoch": 2.785797119140625e-05, + "step": 18257, + "training_step_time": 0.11957621574401855 + }, + { + "epoch": 2.78594970703125e-05, + "model_forward_time": 0.025751829147338867, + "step": 18258 + }, + { + "epoch": 2.78594970703125e-05, + "step": 18258, + "training_step_time": 0.11984014511108398 + }, + { + "epoch": 2.786102294921875e-05, + "model_forward_time": 0.025429248809814453, + "step": 18259 + }, + { + "epoch": 2.786102294921875e-05, + "step": 18259, + "training_step_time": 0.10334491729736328 + }, + { + "epoch": 2.7862548828125e-05, + "grad_norm": 0.25312596559524536, + "learning_rate": 3.634179899775708e-05, + "loss": 0.0089, + "step": 18260 + }, + { + "epoch": 2.7862548828125e-05, + "model_forward_time": 0.025567293167114258, + "step": 18260 + }, + { + "epoch": 2.7862548828125e-05, + "step": 18260, + "training_step_time": 0.10787129402160645 + }, + { + "epoch": 2.786407470703125e-05, + "model_forward_time": 0.025156497955322266, + "step": 18261 + }, + { + "epoch": 2.786407470703125e-05, + "step": 18261, + "training_step_time": 0.10639476776123047 + }, + { + "epoch": 2.78656005859375e-05, + "model_forward_time": 0.02548050880432129, + "step": 18262 + }, + { + "epoch": 2.78656005859375e-05, + "step": 18262, + "training_step_time": 0.10518908500671387 + }, + { + "epoch": 2.786712646484375e-05, + "model_forward_time": 0.025381088256835938, + "step": 18263 + }, + { + "epoch": 2.786712646484375e-05, + "step": 18263, + "training_step_time": 0.10773634910583496 + }, + { + "epoch": 2.786865234375e-05, + "model_forward_time": 0.025629281997680664, + "step": 18264 + }, + { + "epoch": 2.786865234375e-05, + "step": 18264, + "training_step_time": 0.17014408111572266 + }, + { + "epoch": 2.787017822265625e-05, + "model_forward_time": 0.025063514709472656, + "step": 18265 + }, + { + "epoch": 2.787017822265625e-05, + "step": 18265, + "training_step_time": 0.21938323974609375 + }, + { + "epoch": 2.78717041015625e-05, + "model_forward_time": 0.024457693099975586, + "step": 18266 + }, + { + "epoch": 2.78717041015625e-05, + "step": 18266, + "training_step_time": 0.18399739265441895 + }, + { + "epoch": 2.787322998046875e-05, + "model_forward_time": 0.02755141258239746, + "step": 18267 + }, + { + "epoch": 2.787322998046875e-05, + "step": 18267, + "training_step_time": 0.17259716987609863 + }, + { + "epoch": 2.7874755859375e-05, + "model_forward_time": 0.024562358856201172, + "step": 18268 + }, + { + "epoch": 2.7874755859375e-05, + "step": 18268, + "training_step_time": 0.1928555965423584 + }, + { + "epoch": 2.787628173828125e-05, + "model_forward_time": 0.02499222755432129, + "step": 18269 + }, + { + "epoch": 2.787628173828125e-05, + "step": 18269, + "training_step_time": 0.2004554271697998 + }, + { + "epoch": 2.78778076171875e-05, + "grad_norm": 0.4324885606765747, + "learning_rate": 3.628878783378302e-05, + "loss": 0.0144, + "step": 18270 + }, + { + "epoch": 2.78778076171875e-05, + "model_forward_time": 0.02541065216064453, + "step": 18270 + }, + { + "epoch": 2.78778076171875e-05, + "step": 18270, + "training_step_time": 0.13161182403564453 + }, + { + "epoch": 2.787933349609375e-05, + "model_forward_time": 0.023491621017456055, + "step": 18271 + }, + { + "epoch": 2.787933349609375e-05, + "step": 18271, + "training_step_time": 0.12691235542297363 + }, + { + "epoch": 2.7880859375e-05, + "model_forward_time": 0.024225234985351562, + "step": 18272 + }, + { + "epoch": 2.7880859375e-05, + "step": 18272, + "training_step_time": 0.12451887130737305 + }, + { + "epoch": 2.788238525390625e-05, + "model_forward_time": 0.02633523941040039, + "step": 18273 + }, + { + "epoch": 2.788238525390625e-05, + "step": 18273, + "training_step_time": 0.11860537528991699 + }, + { + "epoch": 2.78839111328125e-05, + "model_forward_time": 0.025267839431762695, + "step": 18274 + }, + { + "epoch": 2.78839111328125e-05, + "step": 18274, + "training_step_time": 0.11661529541015625 + }, + { + "epoch": 2.788543701171875e-05, + "model_forward_time": 0.025425195693969727, + "step": 18275 + }, + { + "epoch": 2.788543701171875e-05, + "step": 18275, + "training_step_time": 0.10848474502563477 + }, + { + "epoch": 2.7886962890625e-05, + "model_forward_time": 0.025292396545410156, + "step": 18276 + }, + { + "epoch": 2.7886962890625e-05, + "step": 18276, + "training_step_time": 0.10890531539916992 + }, + { + "epoch": 2.788848876953125e-05, + "model_forward_time": 0.02562260627746582, + "step": 18277 + }, + { + "epoch": 2.788848876953125e-05, + "step": 18277, + "training_step_time": 0.1089627742767334 + }, + { + "epoch": 2.78900146484375e-05, + "model_forward_time": 0.02563309669494629, + "step": 18278 + }, + { + "epoch": 2.78900146484375e-05, + "step": 18278, + "training_step_time": 0.10908651351928711 + }, + { + "epoch": 2.789154052734375e-05, + "model_forward_time": 0.025481700897216797, + "step": 18279 + }, + { + "epoch": 2.789154052734375e-05, + "step": 18279, + "training_step_time": 0.10716485977172852 + }, + { + "epoch": 2.789306640625e-05, + "grad_norm": 0.16999660432338715, + "learning_rate": 3.6235793330224635e-05, + "loss": 0.0064, + "step": 18280 + }, + { + "epoch": 2.789306640625e-05, + "model_forward_time": 0.02515864372253418, + "step": 18280 + }, + { + "epoch": 2.789306640625e-05, + "step": 18280, + "training_step_time": 0.10670804977416992 + }, + { + "epoch": 2.789459228515625e-05, + "model_forward_time": 0.025453805923461914, + "step": 18281 + }, + { + "epoch": 2.789459228515625e-05, + "step": 18281, + "training_step_time": 0.11113977432250977 + }, + { + "epoch": 2.78961181640625e-05, + "model_forward_time": 0.02538895606994629, + "step": 18282 + }, + { + "epoch": 2.78961181640625e-05, + "step": 18282, + "training_step_time": 0.10705280303955078 + }, + { + "epoch": 2.789764404296875e-05, + "model_forward_time": 0.02567315101623535, + "step": 18283 + }, + { + "epoch": 2.789764404296875e-05, + "step": 18283, + "training_step_time": 0.10621809959411621 + }, + { + "epoch": 2.7899169921875e-05, + "model_forward_time": 0.02526998519897461, + "step": 18284 + }, + { + "epoch": 2.7899169921875e-05, + "step": 18284, + "training_step_time": 0.12465810775756836 + }, + { + "epoch": 2.790069580078125e-05, + "model_forward_time": 0.025272369384765625, + "step": 18285 + }, + { + "epoch": 2.790069580078125e-05, + "step": 18285, + "training_step_time": 0.11791062355041504 + }, + { + "epoch": 2.79022216796875e-05, + "model_forward_time": 0.02541375160217285, + "step": 18286 + }, + { + "epoch": 2.79022216796875e-05, + "step": 18286, + "training_step_time": 0.13313007354736328 + }, + { + "epoch": 2.790374755859375e-05, + "model_forward_time": 0.02515268325805664, + "step": 18287 + }, + { + "epoch": 2.790374755859375e-05, + "step": 18287, + "training_step_time": 0.15256285667419434 + }, + { + "epoch": 2.79052734375e-05, + "model_forward_time": 0.024395227432250977, + "step": 18288 + }, + { + "epoch": 2.79052734375e-05, + "step": 18288, + "training_step_time": 0.10927748680114746 + }, + { + "epoch": 2.790679931640625e-05, + "model_forward_time": 0.02699589729309082, + "step": 18289 + }, + { + "epoch": 2.790679931640625e-05, + "step": 18289, + "training_step_time": 0.11343097686767578 + }, + { + "epoch": 2.79083251953125e-05, + "grad_norm": 0.3422635495662689, + "learning_rate": 3.618281555147522e-05, + "loss": 0.0111, + "step": 18290 + }, + { + "epoch": 2.79083251953125e-05, + "model_forward_time": 0.02630162239074707, + "step": 18290 + }, + { + "epoch": 2.79083251953125e-05, + "step": 18290, + "training_step_time": 0.10517454147338867 + }, + { + "epoch": 2.790985107421875e-05, + "model_forward_time": 0.025934457778930664, + "step": 18291 + }, + { + "epoch": 2.790985107421875e-05, + "step": 18291, + "training_step_time": 0.1387767791748047 + }, + { + "epoch": 2.7911376953125e-05, + "model_forward_time": 0.024608850479125977, + "step": 18292 + }, + { + "epoch": 2.7911376953125e-05, + "step": 18292, + "training_step_time": 0.12232422828674316 + }, + { + "epoch": 2.791290283203125e-05, + "model_forward_time": 0.025117158889770508, + "step": 18293 + }, + { + "epoch": 2.791290283203125e-05, + "step": 18293, + "training_step_time": 0.10627508163452148 + }, + { + "epoch": 2.79144287109375e-05, + "model_forward_time": 0.025742292404174805, + "step": 18294 + }, + { + "epoch": 2.79144287109375e-05, + "step": 18294, + "training_step_time": 0.12194013595581055 + }, + { + "epoch": 2.791595458984375e-05, + "model_forward_time": 0.025872468948364258, + "step": 18295 + }, + { + "epoch": 2.791595458984375e-05, + "step": 18295, + "training_step_time": 0.12984180450439453 + }, + { + "epoch": 2.791748046875e-05, + "model_forward_time": 0.025594711303710938, + "step": 18296 + }, + { + "epoch": 2.791748046875e-05, + "step": 18296, + "training_step_time": 0.13345718383789062 + }, + { + "epoch": 2.791900634765625e-05, + "model_forward_time": 0.0255889892578125, + "step": 18297 + }, + { + "epoch": 2.791900634765625e-05, + "step": 18297, + "training_step_time": 0.1466231346130371 + }, + { + "epoch": 2.79205322265625e-05, + "model_forward_time": 0.024729490280151367, + "step": 18298 + }, + { + "epoch": 2.79205322265625e-05, + "step": 18298, + "training_step_time": 0.12056779861450195 + }, + { + "epoch": 2.792205810546875e-05, + "model_forward_time": 0.024597883224487305, + "step": 18299 + }, + { + "epoch": 2.792205810546875e-05, + "step": 18299, + "training_step_time": 0.21015191078186035 + }, + { + "epoch": 2.7923583984375e-05, + "grad_norm": 0.3045021891593933, + "learning_rate": 3.612985456190778e-05, + "loss": 0.0083, + "step": 18300 + }, + { + "epoch": 2.7923583984375e-05, + "model_forward_time": 0.024926424026489258, + "step": 18300 + }, + { + "epoch": 2.7923583984375e-05, + "step": 18300, + "training_step_time": 0.1305677890777588 + }, + { + "epoch": 2.792510986328125e-05, + "model_forward_time": 0.027011632919311523, + "step": 18301 + }, + { + "epoch": 2.792510986328125e-05, + "step": 18301, + "training_step_time": 0.11493659019470215 + }, + { + "epoch": 2.79266357421875e-05, + "model_forward_time": 0.025228023529052734, + "step": 18302 + }, + { + "epoch": 2.79266357421875e-05, + "step": 18302, + "training_step_time": 0.11197972297668457 + }, + { + "epoch": 2.792816162109375e-05, + "model_forward_time": 0.025374889373779297, + "step": 18303 + }, + { + "epoch": 2.792816162109375e-05, + "step": 18303, + "training_step_time": 0.11043453216552734 + }, + { + "epoch": 2.79296875e-05, + "model_forward_time": 0.02577376365661621, + "step": 18304 + }, + { + "epoch": 2.79296875e-05, + "step": 18304, + "training_step_time": 0.10995841026306152 + }, + { + "epoch": 2.793121337890625e-05, + "model_forward_time": 0.02533745765686035, + "step": 18305 + }, + { + "epoch": 2.793121337890625e-05, + "step": 18305, + "training_step_time": 0.10722613334655762 + }, + { + "epoch": 2.79327392578125e-05, + "model_forward_time": 0.027135848999023438, + "step": 18306 + }, + { + "epoch": 2.79327392578125e-05, + "step": 18306, + "training_step_time": 0.10967206954956055 + }, + { + "epoch": 2.793426513671875e-05, + "model_forward_time": 0.02510976791381836, + "step": 18307 + }, + { + "epoch": 2.793426513671875e-05, + "step": 18307, + "training_step_time": 0.10702037811279297 + }, + { + "epoch": 2.7935791015625e-05, + "model_forward_time": 0.025091886520385742, + "step": 18308 + }, + { + "epoch": 2.7935791015625e-05, + "step": 18308, + "training_step_time": 0.10604023933410645 + }, + { + "epoch": 2.793731689453125e-05, + "model_forward_time": 0.025442123413085938, + "step": 18309 + }, + { + "epoch": 2.793731689453125e-05, + "step": 18309, + "training_step_time": 0.1079108715057373 + }, + { + "epoch": 2.79388427734375e-05, + "grad_norm": 0.1470704823732376, + "learning_rate": 3.607691042587492e-05, + "loss": 0.0116, + "step": 18310 + }, + { + "epoch": 2.79388427734375e-05, + "model_forward_time": 0.024973392486572266, + "step": 18310 + }, + { + "epoch": 2.79388427734375e-05, + "step": 18310, + "training_step_time": 0.10716629028320312 + }, + { + "epoch": 2.794036865234375e-05, + "model_forward_time": 0.02525019645690918, + "step": 18311 + }, + { + "epoch": 2.794036865234375e-05, + "step": 18311, + "training_step_time": 0.12679576873779297 + }, + { + "epoch": 2.794189453125e-05, + "model_forward_time": 0.025838851928710938, + "step": 18312 + }, + { + "epoch": 2.794189453125e-05, + "step": 18312, + "training_step_time": 0.1123208999633789 + }, + { + "epoch": 2.794342041015625e-05, + "model_forward_time": 0.02532339096069336, + "step": 18313 + }, + { + "epoch": 2.794342041015625e-05, + "step": 18313, + "training_step_time": 0.11084699630737305 + }, + { + "epoch": 2.79449462890625e-05, + "model_forward_time": 0.025145769119262695, + "step": 18314 + }, + { + "epoch": 2.79449462890625e-05, + "step": 18314, + "training_step_time": 0.11454176902770996 + }, + { + "epoch": 2.794647216796875e-05, + "model_forward_time": 0.02503037452697754, + "step": 18315 + }, + { + "epoch": 2.794647216796875e-05, + "step": 18315, + "training_step_time": 0.10746407508850098 + }, + { + "epoch": 2.7947998046875e-05, + "model_forward_time": 0.025521516799926758, + "step": 18316 + }, + { + "epoch": 2.7947998046875e-05, + "step": 18316, + "training_step_time": 0.19453072547912598 + }, + { + "epoch": 2.794952392578125e-05, + "model_forward_time": 0.02474212646484375, + "step": 18317 + }, + { + "epoch": 2.794952392578125e-05, + "step": 18317, + "training_step_time": 0.11647605895996094 + }, + { + "epoch": 2.79510498046875e-05, + "model_forward_time": 0.025281667709350586, + "step": 18318 + }, + { + "epoch": 2.79510498046875e-05, + "step": 18318, + "training_step_time": 0.10419130325317383 + }, + { + "epoch": 2.795257568359375e-05, + "model_forward_time": 0.025305747985839844, + "step": 18319 + }, + { + "epoch": 2.795257568359375e-05, + "step": 18319, + "training_step_time": 0.10953283309936523 + }, + { + "epoch": 2.79541015625e-05, + "grad_norm": 0.2538145184516907, + "learning_rate": 3.602398320770875e-05, + "loss": 0.0102, + "step": 18320 + }, + { + "epoch": 2.79541015625e-05, + "model_forward_time": 0.025603532791137695, + "step": 18320 + }, + { + "epoch": 2.79541015625e-05, + "step": 18320, + "training_step_time": 0.10664629936218262 + }, + { + "epoch": 2.795562744140625e-05, + "model_forward_time": 0.02505016326904297, + "step": 18321 + }, + { + "epoch": 2.795562744140625e-05, + "step": 18321, + "training_step_time": 0.10572004318237305 + }, + { + "epoch": 2.79571533203125e-05, + "model_forward_time": 0.025905609130859375, + "step": 18322 + }, + { + "epoch": 2.79571533203125e-05, + "step": 18322, + "training_step_time": 0.10814404487609863 + }, + { + "epoch": 2.795867919921875e-05, + "model_forward_time": 0.026244401931762695, + "step": 18323 + }, + { + "epoch": 2.795867919921875e-05, + "step": 18323, + "training_step_time": 0.11467742919921875 + }, + { + "epoch": 2.7960205078125e-05, + "model_forward_time": 0.025305986404418945, + "step": 18324 + }, + { + "epoch": 2.7960205078125e-05, + "step": 18324, + "training_step_time": 0.10579109191894531 + }, + { + "epoch": 2.796173095703125e-05, + "model_forward_time": 0.025427579879760742, + "step": 18325 + }, + { + "epoch": 2.796173095703125e-05, + "step": 18325, + "training_step_time": 0.10454297065734863 + }, + { + "epoch": 2.79632568359375e-05, + "model_forward_time": 0.024906158447265625, + "step": 18326 + }, + { + "epoch": 2.79632568359375e-05, + "step": 18326, + "training_step_time": 0.10759568214416504 + }, + { + "epoch": 2.796478271484375e-05, + "model_forward_time": 0.025582075119018555, + "step": 18327 + }, + { + "epoch": 2.796478271484375e-05, + "step": 18327, + "training_step_time": 0.10872650146484375 + }, + { + "epoch": 2.796630859375e-05, + "model_forward_time": 0.025388717651367188, + "step": 18328 + }, + { + "epoch": 2.796630859375e-05, + "step": 18328, + "training_step_time": 0.1077432632446289 + }, + { + "epoch": 2.796783447265625e-05, + "model_forward_time": 0.025267362594604492, + "step": 18329 + }, + { + "epoch": 2.796783447265625e-05, + "step": 18329, + "training_step_time": 0.1051628589630127 + }, + { + "epoch": 2.79693603515625e-05, + "grad_norm": 0.1745704561471939, + "learning_rate": 3.597107297172084e-05, + "loss": 0.009, + "step": 18330 + }, + { + "epoch": 2.79693603515625e-05, + "model_forward_time": 0.02561354637145996, + "step": 18330 + }, + { + "epoch": 2.79693603515625e-05, + "step": 18330, + "training_step_time": 0.10848546028137207 + }, + { + "epoch": 2.797088623046875e-05, + "model_forward_time": 0.02574777603149414, + "step": 18331 + }, + { + "epoch": 2.797088623046875e-05, + "step": 18331, + "training_step_time": 0.17898988723754883 + }, + { + "epoch": 2.7972412109375e-05, + "model_forward_time": 0.025195837020874023, + "step": 18332 + }, + { + "epoch": 2.7972412109375e-05, + "step": 18332, + "training_step_time": 0.1186821460723877 + }, + { + "epoch": 2.797393798828125e-05, + "model_forward_time": 0.024928808212280273, + "step": 18333 + }, + { + "epoch": 2.797393798828125e-05, + "step": 18333, + "training_step_time": 0.1316087245941162 + }, + { + "epoch": 2.79754638671875e-05, + "model_forward_time": 0.02517390251159668, + "step": 18334 + }, + { + "epoch": 2.79754638671875e-05, + "step": 18334, + "training_step_time": 0.16114306449890137 + }, + { + "epoch": 2.797698974609375e-05, + "model_forward_time": 0.025583982467651367, + "step": 18335 + }, + { + "epoch": 2.797698974609375e-05, + "step": 18335, + "training_step_time": 0.17955422401428223 + }, + { + "epoch": 2.7978515625e-05, + "model_forward_time": 0.024855852127075195, + "step": 18336 + }, + { + "epoch": 2.7978515625e-05, + "step": 18336, + "training_step_time": 0.15543723106384277 + }, + { + "epoch": 2.798004150390625e-05, + "model_forward_time": 0.024886369705200195, + "step": 18337 + }, + { + "epoch": 2.798004150390625e-05, + "step": 18337, + "training_step_time": 0.20273065567016602 + }, + { + "epoch": 2.79815673828125e-05, + "model_forward_time": 0.02460026741027832, + "step": 18338 + }, + { + "epoch": 2.79815673828125e-05, + "step": 18338, + "training_step_time": 0.12636876106262207 + }, + { + "epoch": 2.798309326171875e-05, + "model_forward_time": 0.026241302490234375, + "step": 18339 + }, + { + "epoch": 2.798309326171875e-05, + "step": 18339, + "training_step_time": 0.11115455627441406 + }, + { + "epoch": 2.7984619140625e-05, + "grad_norm": 0.25465038418769836, + "learning_rate": 3.591817978220212e-05, + "loss": 0.0105, + "step": 18340 + }, + { + "epoch": 2.7984619140625e-05, + "model_forward_time": 0.026186227798461914, + "step": 18340 + }, + { + "epoch": 2.7984619140625e-05, + "step": 18340, + "training_step_time": 0.1095130443572998 + }, + { + "epoch": 2.798614501953125e-05, + "model_forward_time": 0.025721073150634766, + "step": 18341 + }, + { + "epoch": 2.798614501953125e-05, + "step": 18341, + "training_step_time": 0.21494841575622559 + }, + { + "epoch": 2.79876708984375e-05, + "model_forward_time": 0.025144338607788086, + "step": 18342 + }, + { + "epoch": 2.79876708984375e-05, + "step": 18342, + "training_step_time": 0.16524791717529297 + }, + { + "epoch": 2.798919677734375e-05, + "model_forward_time": 0.024668455123901367, + "step": 18343 + }, + { + "epoch": 2.798919677734375e-05, + "step": 18343, + "training_step_time": 0.11128783226013184 + }, + { + "epoch": 2.799072265625e-05, + "model_forward_time": 0.02464151382446289, + "step": 18344 + }, + { + "epoch": 2.799072265625e-05, + "step": 18344, + "training_step_time": 0.10782194137573242 + }, + { + "epoch": 2.799224853515625e-05, + "model_forward_time": 0.025577545166015625, + "step": 18345 + }, + { + "epoch": 2.799224853515625e-05, + "step": 18345, + "training_step_time": 0.11761832237243652 + }, + { + "epoch": 2.79937744140625e-05, + "model_forward_time": 0.024912118911743164, + "step": 18346 + }, + { + "epoch": 2.79937744140625e-05, + "step": 18346, + "training_step_time": 0.12977242469787598 + }, + { + "epoch": 2.799530029296875e-05, + "model_forward_time": 0.02516937255859375, + "step": 18347 + }, + { + "epoch": 2.799530029296875e-05, + "step": 18347, + "training_step_time": 0.12907767295837402 + }, + { + "epoch": 2.7996826171875e-05, + "model_forward_time": 0.02736210823059082, + "step": 18348 + }, + { + "epoch": 2.7996826171875e-05, + "step": 18348, + "training_step_time": 0.11854243278503418 + }, + { + "epoch": 2.799835205078125e-05, + "model_forward_time": 0.02537250518798828, + "step": 18349 + }, + { + "epoch": 2.799835205078125e-05, + "step": 18349, + "training_step_time": 0.10651755332946777 + }, + { + "epoch": 2.79998779296875e-05, + "grad_norm": 0.14040468633174896, + "learning_rate": 3.586530370342279e-05, + "loss": 0.0071, + "step": 18350 + }, + { + "epoch": 2.79998779296875e-05, + "model_forward_time": 0.025244951248168945, + "step": 18350 + }, + { + "epoch": 2.79998779296875e-05, + "step": 18350, + "training_step_time": 0.10501599311828613 + }, + { + "epoch": 2.800140380859375e-05, + "model_forward_time": 0.025095224380493164, + "step": 18351 + }, + { + "epoch": 2.800140380859375e-05, + "step": 18351, + "training_step_time": 0.10441708564758301 + }, + { + "epoch": 2.80029296875e-05, + "model_forward_time": 0.02532815933227539, + "step": 18352 + }, + { + "epoch": 2.80029296875e-05, + "step": 18352, + "training_step_time": 0.10570383071899414 + }, + { + "epoch": 2.800445556640625e-05, + "model_forward_time": 0.0251924991607666, + "step": 18353 + }, + { + "epoch": 2.800445556640625e-05, + "step": 18353, + "training_step_time": 0.10441207885742188 + }, + { + "epoch": 2.80059814453125e-05, + "model_forward_time": 0.025303125381469727, + "step": 18354 + }, + { + "epoch": 2.80059814453125e-05, + "step": 18354, + "training_step_time": 0.10609126091003418 + }, + { + "epoch": 2.800750732421875e-05, + "model_forward_time": 0.025273561477661133, + "step": 18355 + }, + { + "epoch": 2.800750732421875e-05, + "step": 18355, + "training_step_time": 0.10513925552368164 + }, + { + "epoch": 2.8009033203125e-05, + "model_forward_time": 0.025633811950683594, + "step": 18356 + }, + { + "epoch": 2.8009033203125e-05, + "step": 18356, + "training_step_time": 0.10897946357727051 + }, + { + "epoch": 2.801055908203125e-05, + "model_forward_time": 0.025460004806518555, + "step": 18357 + }, + { + "epoch": 2.801055908203125e-05, + "step": 18357, + "training_step_time": 0.12163329124450684 + }, + { + "epoch": 2.80120849609375e-05, + "model_forward_time": 0.025160789489746094, + "step": 18358 + }, + { + "epoch": 2.80120849609375e-05, + "step": 18358, + "training_step_time": 0.10438084602355957 + }, + { + "epoch": 2.801361083984375e-05, + "model_forward_time": 0.024986743927001953, + "step": 18359 + }, + { + "epoch": 2.801361083984375e-05, + "step": 18359, + "training_step_time": 0.12178850173950195 + }, + { + "epoch": 2.801513671875e-05, + "grad_norm": 0.25987479090690613, + "learning_rate": 3.581244479963225e-05, + "loss": 0.0088, + "step": 18360 + }, + { + "epoch": 2.801513671875e-05, + "model_forward_time": 0.02523040771484375, + "step": 18360 + }, + { + "epoch": 2.801513671875e-05, + "step": 18360, + "training_step_time": 0.1124269962310791 + }, + { + "epoch": 2.801666259765625e-05, + "model_forward_time": 0.02678990364074707, + "step": 18361 + }, + { + "epoch": 2.801666259765625e-05, + "step": 18361, + "training_step_time": 0.10658144950866699 + }, + { + "epoch": 2.80181884765625e-05, + "model_forward_time": 0.025498628616333008, + "step": 18362 + }, + { + "epoch": 2.80181884765625e-05, + "step": 18362, + "training_step_time": 0.19771933555603027 + }, + { + "epoch": 2.801971435546875e-05, + "model_forward_time": 0.024520158767700195, + "step": 18363 + }, + { + "epoch": 2.801971435546875e-05, + "step": 18363, + "training_step_time": 0.1043539047241211 + }, + { + "epoch": 2.8021240234375e-05, + "model_forward_time": 0.02482295036315918, + "step": 18364 + }, + { + "epoch": 2.8021240234375e-05, + "step": 18364, + "training_step_time": 0.10703754425048828 + }, + { + "epoch": 2.802276611328125e-05, + "model_forward_time": 0.025222063064575195, + "step": 18365 + }, + { + "epoch": 2.802276611328125e-05, + "step": 18365, + "training_step_time": 0.11151504516601562 + }, + { + "epoch": 2.80242919921875e-05, + "model_forward_time": 0.024646997451782227, + "step": 18366 + }, + { + "epoch": 2.80242919921875e-05, + "step": 18366, + "training_step_time": 0.1063683032989502 + }, + { + "epoch": 2.802581787109375e-05, + "model_forward_time": 0.024829387664794922, + "step": 18367 + }, + { + "epoch": 2.802581787109375e-05, + "step": 18367, + "training_step_time": 0.10512495040893555 + }, + { + "epoch": 2.802734375e-05, + "model_forward_time": 0.02535533905029297, + "step": 18368 + }, + { + "epoch": 2.802734375e-05, + "step": 18368, + "training_step_time": 0.10500288009643555 + }, + { + "epoch": 2.802886962890625e-05, + "model_forward_time": 0.025400161743164062, + "step": 18369 + }, + { + "epoch": 2.802886962890625e-05, + "step": 18369, + "training_step_time": 0.1052854061126709 + }, + { + "epoch": 2.80303955078125e-05, + "grad_norm": 0.42455539107322693, + "learning_rate": 3.57596031350591e-05, + "loss": 0.007, + "step": 18370 + }, + { + "epoch": 2.80303955078125e-05, + "model_forward_time": 0.02508378028869629, + "step": 18370 + }, + { + "epoch": 2.80303955078125e-05, + "step": 18370, + "training_step_time": 0.10556197166442871 + }, + { + "epoch": 2.803192138671875e-05, + "model_forward_time": 0.025608539581298828, + "step": 18371 + }, + { + "epoch": 2.803192138671875e-05, + "step": 18371, + "training_step_time": 0.14202189445495605 + }, + { + "epoch": 2.8033447265625e-05, + "model_forward_time": 0.025231599807739258, + "step": 18372 + }, + { + "epoch": 2.8033447265625e-05, + "step": 18372, + "training_step_time": 0.15815329551696777 + }, + { + "epoch": 2.803497314453125e-05, + "model_forward_time": 0.024489164352416992, + "step": 18373 + }, + { + "epoch": 2.803497314453125e-05, + "step": 18373, + "training_step_time": 0.14473247528076172 + }, + { + "epoch": 2.80364990234375e-05, + "model_forward_time": 0.024794816970825195, + "step": 18374 + }, + { + "epoch": 2.80364990234375e-05, + "step": 18374, + "training_step_time": 0.14179587364196777 + }, + { + "epoch": 2.803802490234375e-05, + "model_forward_time": 0.02445530891418457, + "step": 18375 + }, + { + "epoch": 2.803802490234375e-05, + "step": 18375, + "training_step_time": 0.1239786148071289 + }, + { + "epoch": 2.803955078125e-05, + "model_forward_time": 0.024916648864746094, + "step": 18376 + }, + { + "epoch": 2.803955078125e-05, + "step": 18376, + "training_step_time": 0.12299704551696777 + }, + { + "epoch": 2.804107666015625e-05, + "model_forward_time": 0.025094032287597656, + "step": 18377 + }, + { + "epoch": 2.804107666015625e-05, + "step": 18377, + "training_step_time": 0.1181344985961914 + }, + { + "epoch": 2.80426025390625e-05, + "model_forward_time": 0.025272369384765625, + "step": 18378 + }, + { + "epoch": 2.80426025390625e-05, + "step": 18378, + "training_step_time": 0.11593127250671387 + }, + { + "epoch": 2.804412841796875e-05, + "model_forward_time": 0.02548527717590332, + "step": 18379 + }, + { + "epoch": 2.804412841796875e-05, + "step": 18379, + "training_step_time": 0.15390753746032715 + }, + { + "epoch": 2.8045654296875e-05, + "grad_norm": 0.20815393328666687, + "learning_rate": 3.570677877391092e-05, + "loss": 0.0093, + "step": 18380 + }, + { + "epoch": 2.8045654296875e-05, + "model_forward_time": 0.024593591690063477, + "step": 18380 + }, + { + "epoch": 2.8045654296875e-05, + "step": 18380, + "training_step_time": 0.20925521850585938 + }, + { + "epoch": 2.804718017578125e-05, + "model_forward_time": 0.024608373641967773, + "step": 18381 + }, + { + "epoch": 2.804718017578125e-05, + "step": 18381, + "training_step_time": 0.12543153762817383 + }, + { + "epoch": 2.80487060546875e-05, + "model_forward_time": 0.02474665641784668, + "step": 18382 + }, + { + "epoch": 2.80487060546875e-05, + "step": 18382, + "training_step_time": 0.19297575950622559 + }, + { + "epoch": 2.805023193359375e-05, + "model_forward_time": 0.024935245513916016, + "step": 18383 + }, + { + "epoch": 2.805023193359375e-05, + "step": 18383, + "training_step_time": 0.11851978302001953 + }, + { + "epoch": 2.80517578125e-05, + "model_forward_time": 0.02477264404296875, + "step": 18384 + }, + { + "epoch": 2.80517578125e-05, + "step": 18384, + "training_step_time": 0.10373735427856445 + }, + { + "epoch": 2.805328369140625e-05, + "model_forward_time": 0.02557992935180664, + "step": 18385 + }, + { + "epoch": 2.805328369140625e-05, + "step": 18385, + "training_step_time": 0.11870336532592773 + }, + { + "epoch": 2.80548095703125e-05, + "model_forward_time": 0.025134801864624023, + "step": 18386 + }, + { + "epoch": 2.80548095703125e-05, + "step": 18386, + "training_step_time": 0.10883426666259766 + }, + { + "epoch": 2.805633544921875e-05, + "model_forward_time": 0.02520585060119629, + "step": 18387 + }, + { + "epoch": 2.805633544921875e-05, + "step": 18387, + "training_step_time": 0.22242188453674316 + }, + { + "epoch": 2.8057861328125e-05, + "model_forward_time": 0.024539470672607422, + "step": 18388 + }, + { + "epoch": 2.8057861328125e-05, + "step": 18388, + "training_step_time": 0.1521773338317871 + }, + { + "epoch": 2.805938720703125e-05, + "model_forward_time": 0.024480819702148438, + "step": 18389 + }, + { + "epoch": 2.805938720703125e-05, + "step": 18389, + "training_step_time": 0.15972447395324707 + }, + { + "epoch": 2.80609130859375e-05, + "grad_norm": 0.11801661550998688, + "learning_rate": 3.5653971780374295e-05, + "loss": 0.0073, + "step": 18390 + }, + { + "epoch": 2.80609130859375e-05, + "model_forward_time": 0.024174213409423828, + "step": 18390 + }, + { + "epoch": 2.80609130859375e-05, + "step": 18390, + "training_step_time": 0.16575264930725098 + }, + { + "epoch": 2.806243896484375e-05, + "model_forward_time": 0.024132251739501953, + "step": 18391 + }, + { + "epoch": 2.806243896484375e-05, + "step": 18391, + "training_step_time": 0.13094019889831543 + }, + { + "epoch": 2.806396484375e-05, + "model_forward_time": 0.024740934371948242, + "step": 18392 + }, + { + "epoch": 2.806396484375e-05, + "step": 18392, + "training_step_time": 0.11393380165100098 + }, + { + "epoch": 2.806549072265625e-05, + "model_forward_time": 0.025031089782714844, + "step": 18393 + }, + { + "epoch": 2.806549072265625e-05, + "step": 18393, + "training_step_time": 0.12122154235839844 + }, + { + "epoch": 2.80670166015625e-05, + "model_forward_time": 0.025151491165161133, + "step": 18394 + }, + { + "epoch": 2.80670166015625e-05, + "step": 18394, + "training_step_time": 0.1111593246459961 + }, + { + "epoch": 2.806854248046875e-05, + "model_forward_time": 0.024866580963134766, + "step": 18395 + }, + { + "epoch": 2.806854248046875e-05, + "step": 18395, + "training_step_time": 0.10981321334838867 + }, + { + "epoch": 2.8070068359375e-05, + "model_forward_time": 0.02387523651123047, + "step": 18396 + }, + { + "epoch": 2.8070068359375e-05, + "step": 18396, + "training_step_time": 0.10695528984069824 + }, + { + "epoch": 2.807159423828125e-05, + "model_forward_time": 0.02476215362548828, + "step": 18397 + }, + { + "epoch": 2.807159423828125e-05, + "step": 18397, + "training_step_time": 0.1074059009552002 + }, + { + "epoch": 2.80731201171875e-05, + "model_forward_time": 0.024916410446166992, + "step": 18398 + }, + { + "epoch": 2.80731201171875e-05, + "step": 18398, + "training_step_time": 0.10904502868652344 + }, + { + "epoch": 2.807464599609375e-05, + "model_forward_time": 0.02487945556640625, + "step": 18399 + }, + { + "epoch": 2.807464599609375e-05, + "step": 18399, + "training_step_time": 0.10808920860290527 + }, + { + "epoch": 2.8076171875e-05, + "grad_norm": 0.6371890902519226, + "learning_rate": 3.56011822186147e-05, + "loss": 0.0156, + "step": 18400 + }, + { + "epoch": 2.8076171875e-05, + "model_forward_time": 0.025880098342895508, + "step": 18400 + }, + { + "epoch": 2.8076171875e-05, + "step": 18400, + "training_step_time": 0.1888105869293213 + }, + { + "epoch": 2.807769775390625e-05, + "model_forward_time": 0.024862051010131836, + "step": 18401 + }, + { + "epoch": 2.807769775390625e-05, + "step": 18401, + "training_step_time": 0.1420910358428955 + }, + { + "epoch": 2.80792236328125e-05, + "model_forward_time": 0.02399420738220215, + "step": 18402 + }, + { + "epoch": 2.80792236328125e-05, + "step": 18402, + "training_step_time": 0.10989713668823242 + }, + { + "epoch": 2.808074951171875e-05, + "model_forward_time": 0.02477264404296875, + "step": 18403 + }, + { + "epoch": 2.808074951171875e-05, + "step": 18403, + "training_step_time": 0.11352252960205078 + }, + { + "epoch": 2.8082275390625e-05, + "model_forward_time": 0.024726152420043945, + "step": 18404 + }, + { + "epoch": 2.8082275390625e-05, + "step": 18404, + "training_step_time": 0.11578106880187988 + }, + { + "epoch": 2.808380126953125e-05, + "model_forward_time": 0.024883031845092773, + "step": 18405 + }, + { + "epoch": 2.808380126953125e-05, + "step": 18405, + "training_step_time": 0.1070871353149414 + }, + { + "epoch": 2.80853271484375e-05, + "model_forward_time": 0.024727582931518555, + "step": 18406 + }, + { + "epoch": 2.80853271484375e-05, + "step": 18406, + "training_step_time": 0.2001943588256836 + }, + { + "epoch": 2.808685302734375e-05, + "model_forward_time": 0.024663209915161133, + "step": 18407 + }, + { + "epoch": 2.808685302734375e-05, + "step": 18407, + "training_step_time": 0.10566210746765137 + }, + { + "epoch": 2.808837890625e-05, + "model_forward_time": 0.024473905563354492, + "step": 18408 + }, + { + "epoch": 2.808837890625e-05, + "step": 18408, + "training_step_time": 0.10317206382751465 + }, + { + "epoch": 2.808990478515625e-05, + "model_forward_time": 0.024999380111694336, + "step": 18409 + }, + { + "epoch": 2.808990478515625e-05, + "step": 18409, + "training_step_time": 0.10439682006835938 + }, + { + "epoch": 2.80914306640625e-05, + "grad_norm": 0.3864893913269043, + "learning_rate": 3.554841015277641e-05, + "loss": 0.0127, + "step": 18410 + }, + { + "epoch": 2.80914306640625e-05, + "model_forward_time": 0.025127649307250977, + "step": 18410 + }, + { + "epoch": 2.80914306640625e-05, + "step": 18410, + "training_step_time": 0.10350608825683594 + }, + { + "epoch": 2.809295654296875e-05, + "model_forward_time": 0.02503228187561035, + "step": 18411 + }, + { + "epoch": 2.809295654296875e-05, + "step": 18411, + "training_step_time": 0.10484910011291504 + }, + { + "epoch": 2.8094482421875e-05, + "model_forward_time": 0.025030136108398438, + "step": 18412 + }, + { + "epoch": 2.8094482421875e-05, + "step": 18412, + "training_step_time": 0.10496020317077637 + }, + { + "epoch": 2.809600830078125e-05, + "model_forward_time": 0.025463342666625977, + "step": 18413 + }, + { + "epoch": 2.809600830078125e-05, + "step": 18413, + "training_step_time": 0.10821986198425293 + }, + { + "epoch": 2.80975341796875e-05, + "model_forward_time": 0.024854183197021484, + "step": 18414 + }, + { + "epoch": 2.80975341796875e-05, + "step": 18414, + "training_step_time": 0.10462737083435059 + }, + { + "epoch": 2.809906005859375e-05, + "model_forward_time": 0.02503800392150879, + "step": 18415 + }, + { + "epoch": 2.809906005859375e-05, + "step": 18415, + "training_step_time": 0.11236786842346191 + }, + { + "epoch": 2.81005859375e-05, + "model_forward_time": 0.02520465850830078, + "step": 18416 + }, + { + "epoch": 2.81005859375e-05, + "step": 18416, + "training_step_time": 0.10600948333740234 + }, + { + "epoch": 2.810211181640625e-05, + "model_forward_time": 0.024947643280029297, + "step": 18417 + }, + { + "epoch": 2.810211181640625e-05, + "step": 18417, + "training_step_time": 0.10386013984680176 + }, + { + "epoch": 2.81036376953125e-05, + "model_forward_time": 0.025141239166259766, + "step": 18418 + }, + { + "epoch": 2.81036376953125e-05, + "step": 18418, + "training_step_time": 0.1051034927368164 + }, + { + "epoch": 2.810516357421875e-05, + "model_forward_time": 0.024964094161987305, + "step": 18419 + }, + { + "epoch": 2.810516357421875e-05, + "step": 18419, + "training_step_time": 0.10456967353820801 + }, + { + "epoch": 2.8106689453125e-05, + "grad_norm": 0.2138444483280182, + "learning_rate": 3.5495655646982505e-05, + "loss": 0.0133, + "step": 18420 + }, + { + "epoch": 2.8106689453125e-05, + "model_forward_time": 0.0247342586517334, + "step": 18420 + }, + { + "epoch": 2.8106689453125e-05, + "step": 18420, + "training_step_time": 0.21396708488464355 + }, + { + "epoch": 2.810821533203125e-05, + "model_forward_time": 0.02487802505493164, + "step": 18421 + }, + { + "epoch": 2.810821533203125e-05, + "step": 18421, + "training_step_time": 0.12329959869384766 + }, + { + "epoch": 2.81097412109375e-05, + "model_forward_time": 0.02423095703125, + "step": 18422 + }, + { + "epoch": 2.81097412109375e-05, + "step": 18422, + "training_step_time": 0.13443446159362793 + }, + { + "epoch": 2.811126708984375e-05, + "model_forward_time": 0.025026321411132812, + "step": 18423 + }, + { + "epoch": 2.811126708984375e-05, + "step": 18423, + "training_step_time": 0.16050934791564941 + }, + { + "epoch": 2.811279296875e-05, + "model_forward_time": 0.024158239364624023, + "step": 18424 + }, + { + "epoch": 2.811279296875e-05, + "step": 18424, + "training_step_time": 0.21318364143371582 + }, + { + "epoch": 2.811431884765625e-05, + "model_forward_time": 0.024852991104125977, + "step": 18425 + }, + { + "epoch": 2.811431884765625e-05, + "step": 18425, + "training_step_time": 0.11339306831359863 + }, + { + "epoch": 2.81158447265625e-05, + "model_forward_time": 0.02451014518737793, + "step": 18426 + }, + { + "epoch": 2.81158447265625e-05, + "step": 18426, + "training_step_time": 0.10437846183776855 + }, + { + "epoch": 2.811737060546875e-05, + "model_forward_time": 0.025655031204223633, + "step": 18427 + }, + { + "epoch": 2.811737060546875e-05, + "step": 18427, + "training_step_time": 0.1085519790649414 + }, + { + "epoch": 2.8118896484375e-05, + "model_forward_time": 0.0254213809967041, + "step": 18428 + }, + { + "epoch": 2.8118896484375e-05, + "step": 18428, + "training_step_time": 0.11316752433776855 + }, + { + "epoch": 2.812042236328125e-05, + "model_forward_time": 0.025228023529052734, + "step": 18429 + }, + { + "epoch": 2.812042236328125e-05, + "step": 18429, + "training_step_time": 0.11049842834472656 + }, + { + "epoch": 2.81219482421875e-05, + "grad_norm": 0.17229513823986053, + "learning_rate": 3.544291876533466e-05, + "loss": 0.0084, + "step": 18430 + }, + { + "epoch": 2.81219482421875e-05, + "model_forward_time": 0.02562689781188965, + "step": 18430 + }, + { + "epoch": 2.81219482421875e-05, + "step": 18430, + "training_step_time": 0.1096339225769043 + }, + { + "epoch": 2.812347412109375e-05, + "model_forward_time": 0.025182008743286133, + "step": 18431 + }, + { + "epoch": 2.812347412109375e-05, + "step": 18431, + "training_step_time": 0.22024297714233398 + }, + { + "epoch": 2.8125e-05, + "model_forward_time": 0.024271488189697266, + "step": 18432 + }, + { + "epoch": 2.8125e-05, + "step": 18432, + "training_step_time": 0.13884210586547852 + }, + { + "epoch": 2.812652587890625e-05, + "model_forward_time": 0.024174928665161133, + "step": 18433 + }, + { + "epoch": 2.812652587890625e-05, + "step": 18433, + "training_step_time": 0.14784836769104004 + }, + { + "epoch": 2.81280517578125e-05, + "model_forward_time": 0.024707317352294922, + "step": 18434 + }, + { + "epoch": 2.81280517578125e-05, + "step": 18434, + "training_step_time": 0.1278843879699707 + }, + { + "epoch": 2.812957763671875e-05, + "model_forward_time": 0.024101734161376953, + "step": 18435 + }, + { + "epoch": 2.812957763671875e-05, + "step": 18435, + "training_step_time": 0.2028806209564209 + }, + { + "epoch": 2.8131103515625e-05, + "model_forward_time": 0.025168657302856445, + "step": 18436 + }, + { + "epoch": 2.8131103515625e-05, + "step": 18436, + "training_step_time": 0.13534832000732422 + }, + { + "epoch": 2.813262939453125e-05, + "model_forward_time": 0.024213314056396484, + "step": 18437 + }, + { + "epoch": 2.813262939453125e-05, + "step": 18437, + "training_step_time": 0.11329245567321777 + }, + { + "epoch": 2.81341552734375e-05, + "model_forward_time": 0.02493906021118164, + "step": 18438 + }, + { + "epoch": 2.81341552734375e-05, + "step": 18438, + "training_step_time": 0.11572647094726562 + }, + { + "epoch": 2.813568115234375e-05, + "model_forward_time": 0.023944377899169922, + "step": 18439 + }, + { + "epoch": 2.813568115234375e-05, + "step": 18439, + "training_step_time": 0.11264848709106445 + }, + { + "epoch": 2.813720703125e-05, + "grad_norm": 0.3152703642845154, + "learning_rate": 3.539019957191315e-05, + "loss": 0.0082, + "step": 18440 + }, + { + "epoch": 2.813720703125e-05, + "model_forward_time": 0.025650501251220703, + "step": 18440 + }, + { + "epoch": 2.813720703125e-05, + "step": 18440, + "training_step_time": 0.10782742500305176 + }, + { + "epoch": 2.813873291015625e-05, + "model_forward_time": 0.024502277374267578, + "step": 18441 + }, + { + "epoch": 2.813873291015625e-05, + "step": 18441, + "training_step_time": 0.11281180381774902 + }, + { + "epoch": 2.81402587890625e-05, + "model_forward_time": 0.025213956832885742, + "step": 18442 + }, + { + "epoch": 2.81402587890625e-05, + "step": 18442, + "training_step_time": 0.10926604270935059 + }, + { + "epoch": 2.814178466796875e-05, + "model_forward_time": 0.024883508682250977, + "step": 18443 + }, + { + "epoch": 2.814178466796875e-05, + "step": 18443, + "training_step_time": 0.10839986801147461 + }, + { + "epoch": 2.8143310546875e-05, + "model_forward_time": 0.025763988494873047, + "step": 18444 + }, + { + "epoch": 2.8143310546875e-05, + "step": 18444, + "training_step_time": 0.11085271835327148 + }, + { + "epoch": 2.814483642578125e-05, + "model_forward_time": 0.02500605583190918, + "step": 18445 + }, + { + "epoch": 2.814483642578125e-05, + "step": 18445, + "training_step_time": 0.16804289817810059 + }, + { + "epoch": 2.81463623046875e-05, + "model_forward_time": 0.02451467514038086, + "step": 18446 + }, + { + "epoch": 2.81463623046875e-05, + "step": 18446, + "training_step_time": 0.14084267616271973 + }, + { + "epoch": 2.814788818359375e-05, + "model_forward_time": 0.0244596004486084, + "step": 18447 + }, + { + "epoch": 2.814788818359375e-05, + "step": 18447, + "training_step_time": 0.10860490798950195 + }, + { + "epoch": 2.81494140625e-05, + "model_forward_time": 0.02468395233154297, + "step": 18448 + }, + { + "epoch": 2.81494140625e-05, + "step": 18448, + "training_step_time": 0.1138613224029541 + }, + { + "epoch": 2.815093994140625e-05, + "model_forward_time": 0.024578571319580078, + "step": 18449 + }, + { + "epoch": 2.815093994140625e-05, + "step": 18449, + "training_step_time": 0.11719465255737305 + }, + { + "epoch": 2.81524658203125e-05, + "grad_norm": 0.1702016443014145, + "learning_rate": 3.533749813077677e-05, + "loss": 0.0144, + "step": 18450 + }, + { + "epoch": 2.81524658203125e-05, + "model_forward_time": 0.024478435516357422, + "step": 18450 + }, + { + "epoch": 2.81524658203125e-05, + "step": 18450, + "training_step_time": 0.10366058349609375 + }, + { + "epoch": 2.815399169921875e-05, + "model_forward_time": 0.02488422393798828, + "step": 18451 + }, + { + "epoch": 2.815399169921875e-05, + "step": 18451, + "training_step_time": 0.20134234428405762 + }, + { + "epoch": 2.8155517578125e-05, + "model_forward_time": 0.02420210838317871, + "step": 18452 + }, + { + "epoch": 2.8155517578125e-05, + "step": 18452, + "training_step_time": 0.10092902183532715 + }, + { + "epoch": 2.815704345703125e-05, + "model_forward_time": 0.024678945541381836, + "step": 18453 + }, + { + "epoch": 2.815704345703125e-05, + "step": 18453, + "training_step_time": 0.10239624977111816 + }, + { + "epoch": 2.81585693359375e-05, + "model_forward_time": 0.024515151977539062, + "step": 18454 + }, + { + "epoch": 2.81585693359375e-05, + "step": 18454, + "training_step_time": 0.10326099395751953 + }, + { + "epoch": 2.816009521484375e-05, + "model_forward_time": 0.024840116500854492, + "step": 18455 + }, + { + "epoch": 2.816009521484375e-05, + "step": 18455, + "training_step_time": 0.10522174835205078 + }, + { + "epoch": 2.816162109375e-05, + "model_forward_time": 0.024948596954345703, + "step": 18456 + }, + { + "epoch": 2.816162109375e-05, + "step": 18456, + "training_step_time": 0.10436892509460449 + }, + { + "epoch": 2.816314697265625e-05, + "model_forward_time": 0.024748802185058594, + "step": 18457 + }, + { + "epoch": 2.816314697265625e-05, + "step": 18457, + "training_step_time": 0.10403943061828613 + }, + { + "epoch": 2.81646728515625e-05, + "model_forward_time": 0.0251004695892334, + "step": 18458 + }, + { + "epoch": 2.81646728515625e-05, + "step": 18458, + "training_step_time": 0.10621166229248047 + }, + { + "epoch": 2.816619873046875e-05, + "model_forward_time": 0.025872230529785156, + "step": 18459 + }, + { + "epoch": 2.816619873046875e-05, + "step": 18459, + "training_step_time": 0.11031723022460938 + }, + { + "epoch": 2.8167724609375e-05, + "grad_norm": 0.4336320161819458, + "learning_rate": 3.528481450596274e-05, + "loss": 0.0117, + "step": 18460 + }, + { + "epoch": 2.8167724609375e-05, + "model_forward_time": 0.02498030662536621, + "step": 18460 + }, + { + "epoch": 2.8167724609375e-05, + "step": 18460, + "training_step_time": 0.10858559608459473 + }, + { + "epoch": 2.816925048828125e-05, + "model_forward_time": 0.024907350540161133, + "step": 18461 + }, + { + "epoch": 2.816925048828125e-05, + "step": 18461, + "training_step_time": 0.10492515563964844 + }, + { + "epoch": 2.81707763671875e-05, + "model_forward_time": 0.025012731552124023, + "step": 18462 + }, + { + "epoch": 2.81707763671875e-05, + "step": 18462, + "training_step_time": 0.10420846939086914 + }, + { + "epoch": 2.817230224609375e-05, + "model_forward_time": 0.025034189224243164, + "step": 18463 + }, + { + "epoch": 2.817230224609375e-05, + "step": 18463, + "training_step_time": 0.10517311096191406 + }, + { + "epoch": 2.8173828125e-05, + "model_forward_time": 0.024698734283447266, + "step": 18464 + }, + { + "epoch": 2.8173828125e-05, + "step": 18464, + "training_step_time": 0.10434794425964355 + }, + { + "epoch": 2.817535400390625e-05, + "model_forward_time": 0.02493000030517578, + "step": 18465 + }, + { + "epoch": 2.817535400390625e-05, + "step": 18465, + "training_step_time": 0.2137312889099121 + }, + { + "epoch": 2.81768798828125e-05, + "model_forward_time": 0.023679494857788086, + "step": 18466 + }, + { + "epoch": 2.81768798828125e-05, + "step": 18466, + "training_step_time": 0.11928844451904297 + }, + { + "epoch": 2.817840576171875e-05, + "model_forward_time": 0.027657270431518555, + "step": 18467 + }, + { + "epoch": 2.817840576171875e-05, + "step": 18467, + "training_step_time": 0.12653756141662598 + }, + { + "epoch": 2.8179931640625e-05, + "model_forward_time": 0.025334835052490234, + "step": 18468 + }, + { + "epoch": 2.8179931640625e-05, + "step": 18468, + "training_step_time": 0.150923490524292 + }, + { + "epoch": 2.818145751953125e-05, + "model_forward_time": 0.025049686431884766, + "step": 18469 + }, + { + "epoch": 2.818145751953125e-05, + "step": 18469, + "training_step_time": 0.10610723495483398 + }, + { + "epoch": 2.81829833984375e-05, + "grad_norm": 0.6569622755050659, + "learning_rate": 3.523214876148664e-05, + "loss": 0.016, + "step": 18470 + }, + { + "epoch": 2.81829833984375e-05, + "model_forward_time": 0.024706602096557617, + "step": 18470 + }, + { + "epoch": 2.81829833984375e-05, + "step": 18470, + "training_step_time": 0.11812901496887207 + }, + { + "epoch": 2.818450927734375e-05, + "model_forward_time": 0.024754047393798828, + "step": 18471 + }, + { + "epoch": 2.818450927734375e-05, + "step": 18471, + "training_step_time": 0.11610174179077148 + }, + { + "epoch": 2.818603515625e-05, + "model_forward_time": 0.024856090545654297, + "step": 18472 + }, + { + "epoch": 2.818603515625e-05, + "step": 18472, + "training_step_time": 0.1052849292755127 + }, + { + "epoch": 2.818756103515625e-05, + "model_forward_time": 0.0242919921875, + "step": 18473 + }, + { + "epoch": 2.818756103515625e-05, + "step": 18473, + "training_step_time": 0.10423660278320312 + }, + { + "epoch": 2.81890869140625e-05, + "model_forward_time": 0.02505803108215332, + "step": 18474 + }, + { + "epoch": 2.81890869140625e-05, + "step": 18474, + "training_step_time": 0.11999034881591797 + }, + { + "epoch": 2.819061279296875e-05, + "model_forward_time": 0.02505970001220703, + "step": 18475 + }, + { + "epoch": 2.819061279296875e-05, + "step": 18475, + "training_step_time": 0.1090238094329834 + }, + { + "epoch": 2.8192138671875e-05, + "model_forward_time": 0.025188207626342773, + "step": 18476 + }, + { + "epoch": 2.8192138671875e-05, + "step": 18476, + "training_step_time": 0.10657024383544922 + }, + { + "epoch": 2.819366455078125e-05, + "model_forward_time": 0.024991989135742188, + "step": 18477 + }, + { + "epoch": 2.819366455078125e-05, + "step": 18477, + "training_step_time": 0.10558724403381348 + }, + { + "epoch": 2.81951904296875e-05, + "model_forward_time": 0.025236845016479492, + "step": 18478 + }, + { + "epoch": 2.81951904296875e-05, + "step": 18478, + "training_step_time": 0.11783647537231445 + }, + { + "epoch": 2.819671630859375e-05, + "model_forward_time": 0.024996280670166016, + "step": 18479 + }, + { + "epoch": 2.819671630859375e-05, + "step": 18479, + "training_step_time": 0.11097168922424316 + }, + { + "epoch": 2.81982421875e-05, + "grad_norm": 0.3495062589645386, + "learning_rate": 3.517950096134232e-05, + "loss": 0.0114, + "step": 18480 + }, + { + "epoch": 2.81982421875e-05, + "model_forward_time": 0.025040149688720703, + "step": 18480 + }, + { + "epoch": 2.81982421875e-05, + "step": 18480, + "training_step_time": 0.15271759033203125 + }, + { + "epoch": 2.819976806640625e-05, + "model_forward_time": 0.024540424346923828, + "step": 18481 + }, + { + "epoch": 2.819976806640625e-05, + "step": 18481, + "training_step_time": 0.10719776153564453 + }, + { + "epoch": 2.82012939453125e-05, + "model_forward_time": 0.024373531341552734, + "step": 18482 + }, + { + "epoch": 2.82012939453125e-05, + "step": 18482, + "training_step_time": 0.11297273635864258 + }, + { + "epoch": 2.820281982421875e-05, + "model_forward_time": 0.024776458740234375, + "step": 18483 + }, + { + "epoch": 2.820281982421875e-05, + "step": 18483, + "training_step_time": 0.12767314910888672 + }, + { + "epoch": 2.8204345703125e-05, + "model_forward_time": 0.02525472640991211, + "step": 18484 + }, + { + "epoch": 2.8204345703125e-05, + "step": 18484, + "training_step_time": 0.12236428260803223 + }, + { + "epoch": 2.820587158203125e-05, + "model_forward_time": 0.025211334228515625, + "step": 18485 + }, + { + "epoch": 2.820587158203125e-05, + "step": 18485, + "training_step_time": 0.10983157157897949 + }, + { + "epoch": 2.82073974609375e-05, + "model_forward_time": 0.025716781616210938, + "step": 18486 + }, + { + "epoch": 2.82073974609375e-05, + "step": 18486, + "training_step_time": 0.1118934154510498 + }, + { + "epoch": 2.820892333984375e-05, + "model_forward_time": 0.024672985076904297, + "step": 18487 + }, + { + "epoch": 2.820892333984375e-05, + "step": 18487, + "training_step_time": 0.10914731025695801 + }, + { + "epoch": 2.821044921875e-05, + "model_forward_time": 0.024858951568603516, + "step": 18488 + }, + { + "epoch": 2.821044921875e-05, + "step": 18488, + "training_step_time": 0.10629916191101074 + }, + { + "epoch": 2.821197509765625e-05, + "model_forward_time": 0.024823665618896484, + "step": 18489 + }, + { + "epoch": 2.821197509765625e-05, + "step": 18489, + "training_step_time": 0.10515975952148438 + }, + { + "epoch": 2.82135009765625e-05, + "grad_norm": 0.4059763550758362, + "learning_rate": 3.512687116950182e-05, + "loss": 0.0091, + "step": 18490 + }, + { + "epoch": 2.82135009765625e-05, + "model_forward_time": 0.02484583854675293, + "step": 18490 + }, + { + "epoch": 2.82135009765625e-05, + "step": 18490, + "training_step_time": 0.11062741279602051 + }, + { + "epoch": 2.821502685546875e-05, + "model_forward_time": 0.02500009536743164, + "step": 18491 + }, + { + "epoch": 2.821502685546875e-05, + "step": 18491, + "training_step_time": 0.10881686210632324 + }, + { + "epoch": 2.8216552734375e-05, + "model_forward_time": 0.024543285369873047, + "step": 18492 + }, + { + "epoch": 2.8216552734375e-05, + "step": 18492, + "training_step_time": 0.10438656806945801 + }, + { + "epoch": 2.821807861328125e-05, + "model_forward_time": 0.02492070198059082, + "step": 18493 + }, + { + "epoch": 2.821807861328125e-05, + "step": 18493, + "training_step_time": 0.17460227012634277 + }, + { + "epoch": 2.82196044921875e-05, + "model_forward_time": 0.026612520217895508, + "step": 18494 + }, + { + "epoch": 2.82196044921875e-05, + "step": 18494, + "training_step_time": 0.13511252403259277 + }, + { + "epoch": 2.822113037109375e-05, + "model_forward_time": 0.024184465408325195, + "step": 18495 + }, + { + "epoch": 2.822113037109375e-05, + "step": 18495, + "training_step_time": 0.11011481285095215 + }, + { + "epoch": 2.822265625e-05, + "model_forward_time": 0.025092363357543945, + "step": 18496 + }, + { + "epoch": 2.822265625e-05, + "step": 18496, + "training_step_time": 0.11318206787109375 + }, + { + "epoch": 2.822418212890625e-05, + "model_forward_time": 0.02474689483642578, + "step": 18497 + }, + { + "epoch": 2.822418212890625e-05, + "step": 18497, + "training_step_time": 0.11718869209289551 + }, + { + "epoch": 2.82257080078125e-05, + "model_forward_time": 0.025321006774902344, + "step": 18498 + }, + { + "epoch": 2.82257080078125e-05, + "step": 18498, + "training_step_time": 0.10908770561218262 + }, + { + "epoch": 2.822723388671875e-05, + "model_forward_time": 0.025540590286254883, + "step": 18499 + }, + { + "epoch": 2.822723388671875e-05, + "step": 18499, + "training_step_time": 0.19304633140563965 + }, + { + "epoch": 2.8228759765625e-05, + "grad_norm": 0.19981293380260468, + "learning_rate": 3.5074259449915284e-05, + "loss": 0.0187, + "step": 18500 + }, + { + "epoch": 2.8228759765625e-05, + "model_forward_time": 0.02621603012084961, + "step": 18500 + }, + { + "epoch": 2.8228759765625e-05, + "step": 18500, + "training_step_time": 0.10569477081298828 + }, + { + "epoch": 2.823028564453125e-05, + "model_forward_time": 0.024576902389526367, + "step": 18501 + }, + { + "epoch": 2.823028564453125e-05, + "step": 18501, + "training_step_time": 0.10567927360534668 + }, + { + "epoch": 2.82318115234375e-05, + "model_forward_time": 0.025256872177124023, + "step": 18502 + }, + { + "epoch": 2.82318115234375e-05, + "step": 18502, + "training_step_time": 0.10662603378295898 + }, + { + "epoch": 2.823333740234375e-05, + "model_forward_time": 0.02559208869934082, + "step": 18503 + }, + { + "epoch": 2.823333740234375e-05, + "step": 18503, + "training_step_time": 0.10790085792541504 + }, + { + "epoch": 2.823486328125e-05, + "model_forward_time": 0.02549600601196289, + "step": 18504 + }, + { + "epoch": 2.823486328125e-05, + "step": 18504, + "training_step_time": 0.10847711563110352 + }, + { + "epoch": 2.823638916015625e-05, + "model_forward_time": 0.025139331817626953, + "step": 18505 + }, + { + "epoch": 2.823638916015625e-05, + "step": 18505, + "training_step_time": 0.11087536811828613 + }, + { + "epoch": 2.82379150390625e-05, + "model_forward_time": 0.02811431884765625, + "step": 18506 + }, + { + "epoch": 2.82379150390625e-05, + "step": 18506, + "training_step_time": 0.1909770965576172 + }, + { + "epoch": 2.823944091796875e-05, + "model_forward_time": 0.024459362030029297, + "step": 18507 + }, + { + "epoch": 2.823944091796875e-05, + "step": 18507, + "training_step_time": 0.20830273628234863 + }, + { + "epoch": 2.8240966796875e-05, + "model_forward_time": 0.023904800415039062, + "step": 18508 + }, + { + "epoch": 2.8240966796875e-05, + "step": 18508, + "training_step_time": 0.20653676986694336 + }, + { + "epoch": 2.824249267578125e-05, + "model_forward_time": 0.02499079704284668, + "step": 18509 + }, + { + "epoch": 2.824249267578125e-05, + "step": 18509, + "training_step_time": 0.19432973861694336 + }, + { + "epoch": 2.82440185546875e-05, + "grad_norm": 0.16926681995391846, + "learning_rate": 3.5021665866510925e-05, + "loss": 0.0111, + "step": 18510 + }, + { + "epoch": 2.82440185546875e-05, + "model_forward_time": 0.02367877960205078, + "step": 18510 + }, + { + "epoch": 2.82440185546875e-05, + "step": 18510, + "training_step_time": 0.1886577606201172 + }, + { + "epoch": 2.824554443359375e-05, + "model_forward_time": 0.024843454360961914, + "step": 18511 + }, + { + "epoch": 2.824554443359375e-05, + "step": 18511, + "training_step_time": 0.2145678997039795 + }, + { + "epoch": 2.82470703125e-05, + "model_forward_time": 0.024446964263916016, + "step": 18512 + }, + { + "epoch": 2.82470703125e-05, + "step": 18512, + "training_step_time": 0.17141127586364746 + }, + { + "epoch": 2.824859619140625e-05, + "model_forward_time": 0.024988889694213867, + "step": 18513 + }, + { + "epoch": 2.824859619140625e-05, + "step": 18513, + "training_step_time": 0.18264245986938477 + }, + { + "epoch": 2.82501220703125e-05, + "model_forward_time": 0.024652481079101562, + "step": 18514 + }, + { + "epoch": 2.82501220703125e-05, + "step": 18514, + "training_step_time": 0.11088371276855469 + }, + { + "epoch": 2.825164794921875e-05, + "model_forward_time": 0.025249719619750977, + "step": 18515 + }, + { + "epoch": 2.825164794921875e-05, + "step": 18515, + "training_step_time": 0.10362815856933594 + }, + { + "epoch": 2.8253173828125e-05, + "model_forward_time": 0.024415016174316406, + "step": 18516 + }, + { + "epoch": 2.8253173828125e-05, + "step": 18516, + "training_step_time": 0.13913512229919434 + }, + { + "epoch": 2.825469970703125e-05, + "model_forward_time": 0.025102615356445312, + "step": 18517 + }, + { + "epoch": 2.825469970703125e-05, + "step": 18517, + "training_step_time": 0.11839723587036133 + }, + { + "epoch": 2.82562255859375e-05, + "model_forward_time": 0.025087356567382812, + "step": 18518 + }, + { + "epoch": 2.82562255859375e-05, + "step": 18518, + "training_step_time": 0.10636663436889648 + }, + { + "epoch": 2.825775146484375e-05, + "model_forward_time": 0.025368928909301758, + "step": 18519 + }, + { + "epoch": 2.825775146484375e-05, + "step": 18519, + "training_step_time": 0.12046194076538086 + }, + { + "epoch": 2.825927734375e-05, + "grad_norm": 0.24763013422489166, + "learning_rate": 3.496909048319489e-05, + "loss": 0.0086, + "step": 18520 + }, + { + "epoch": 2.825927734375e-05, + "model_forward_time": 0.025498151779174805, + "step": 18520 + }, + { + "epoch": 2.825927734375e-05, + "step": 18520, + "training_step_time": 0.2077922821044922 + }, + { + "epoch": 2.826080322265625e-05, + "model_forward_time": 0.024425983428955078, + "step": 18521 + }, + { + "epoch": 2.826080322265625e-05, + "step": 18521, + "training_step_time": 0.11708188056945801 + }, + { + "epoch": 2.82623291015625e-05, + "model_forward_time": 0.02465057373046875, + "step": 18522 + }, + { + "epoch": 2.82623291015625e-05, + "step": 18522, + "training_step_time": 0.10424351692199707 + }, + { + "epoch": 2.826385498046875e-05, + "model_forward_time": 0.025422334671020508, + "step": 18523 + }, + { + "epoch": 2.826385498046875e-05, + "step": 18523, + "training_step_time": 0.1126410961151123 + }, + { + "epoch": 2.8265380859375e-05, + "model_forward_time": 0.02601146697998047, + "step": 18524 + }, + { + "epoch": 2.8265380859375e-05, + "step": 18524, + "training_step_time": 0.11643123626708984 + }, + { + "epoch": 2.826690673828125e-05, + "model_forward_time": 0.025059223175048828, + "step": 18525 + }, + { + "epoch": 2.826690673828125e-05, + "step": 18525, + "training_step_time": 0.10929226875305176 + }, + { + "epoch": 2.82684326171875e-05, + "model_forward_time": 0.025072336196899414, + "step": 18526 + }, + { + "epoch": 2.82684326171875e-05, + "step": 18526, + "training_step_time": 0.12286496162414551 + }, + { + "epoch": 2.826995849609375e-05, + "model_forward_time": 0.025107145309448242, + "step": 18527 + }, + { + "epoch": 2.826995849609375e-05, + "step": 18527, + "training_step_time": 0.12392258644104004 + }, + { + "epoch": 2.8271484375e-05, + "model_forward_time": 0.02485823631286621, + "step": 18528 + }, + { + "epoch": 2.8271484375e-05, + "step": 18528, + "training_step_time": 0.11281752586364746 + }, + { + "epoch": 2.827301025390625e-05, + "model_forward_time": 0.02467823028564453, + "step": 18529 + }, + { + "epoch": 2.827301025390625e-05, + "step": 18529, + "training_step_time": 0.1154017448425293 + }, + { + "epoch": 2.82745361328125e-05, + "grad_norm": 0.2802363336086273, + "learning_rate": 3.491653336385124e-05, + "loss": 0.0134, + "step": 18530 + }, + { + "epoch": 2.82745361328125e-05, + "model_forward_time": 0.02510380744934082, + "step": 18530 + }, + { + "epoch": 2.82745361328125e-05, + "step": 18530, + "training_step_time": 0.11180448532104492 + }, + { + "epoch": 2.827606201171875e-05, + "model_forward_time": 0.02531266212463379, + "step": 18531 + }, + { + "epoch": 2.827606201171875e-05, + "step": 18531, + "training_step_time": 0.11049199104309082 + }, + { + "epoch": 2.8277587890625e-05, + "model_forward_time": 0.025597810745239258, + "step": 18532 + }, + { + "epoch": 2.8277587890625e-05, + "step": 18532, + "training_step_time": 0.10997414588928223 + }, + { + "epoch": 2.827911376953125e-05, + "model_forward_time": 0.025289058685302734, + "step": 18533 + }, + { + "epoch": 2.827911376953125e-05, + "step": 18533, + "training_step_time": 0.11003828048706055 + }, + { + "epoch": 2.82806396484375e-05, + "model_forward_time": 0.025589704513549805, + "step": 18534 + }, + { + "epoch": 2.82806396484375e-05, + "step": 18534, + "training_step_time": 0.10817790031433105 + }, + { + "epoch": 2.828216552734375e-05, + "model_forward_time": 0.025180339813232422, + "step": 18535 + }, + { + "epoch": 2.828216552734375e-05, + "step": 18535, + "training_step_time": 0.17873191833496094 + }, + { + "epoch": 2.828369140625e-05, + "model_forward_time": 0.024404525756835938, + "step": 18536 + }, + { + "epoch": 2.828369140625e-05, + "step": 18536, + "training_step_time": 0.14315271377563477 + }, + { + "epoch": 2.828521728515625e-05, + "model_forward_time": 0.02426004409790039, + "step": 18537 + }, + { + "epoch": 2.828521728515625e-05, + "step": 18537, + "training_step_time": 0.11201834678649902 + }, + { + "epoch": 2.82867431640625e-05, + "model_forward_time": 0.024942636489868164, + "step": 18538 + }, + { + "epoch": 2.82867431640625e-05, + "step": 18538, + "training_step_time": 0.10585713386535645 + }, + { + "epoch": 2.828826904296875e-05, + "model_forward_time": 0.025745630264282227, + "step": 18539 + }, + { + "epoch": 2.828826904296875e-05, + "step": 18539, + "training_step_time": 0.11813926696777344 + }, + { + "epoch": 2.8289794921875e-05, + "grad_norm": 0.1632130742073059, + "learning_rate": 3.4863994572341843e-05, + "loss": 0.0071, + "step": 18540 + }, + { + "epoch": 2.8289794921875e-05, + "model_forward_time": 0.025328874588012695, + "step": 18540 + }, + { + "epoch": 2.8289794921875e-05, + "step": 18540, + "training_step_time": 0.18405508995056152 + }, + { + "epoch": 2.829132080078125e-05, + "model_forward_time": 0.024628162384033203, + "step": 18541 + }, + { + "epoch": 2.829132080078125e-05, + "step": 18541, + "training_step_time": 0.10892105102539062 + }, + { + "epoch": 2.82928466796875e-05, + "model_forward_time": 0.024564504623413086, + "step": 18542 + }, + { + "epoch": 2.82928466796875e-05, + "step": 18542, + "training_step_time": 0.10254669189453125 + }, + { + "epoch": 2.829437255859375e-05, + "model_forward_time": 0.025267839431762695, + "step": 18543 + }, + { + "epoch": 2.829437255859375e-05, + "step": 18543, + "training_step_time": 0.10536646842956543 + }, + { + "epoch": 2.82958984375e-05, + "model_forward_time": 0.025273799896240234, + "step": 18544 + }, + { + "epoch": 2.82958984375e-05, + "step": 18544, + "training_step_time": 0.10665297508239746 + }, + { + "epoch": 2.829742431640625e-05, + "model_forward_time": 0.0256502628326416, + "step": 18545 + }, + { + "epoch": 2.829742431640625e-05, + "step": 18545, + "training_step_time": 0.10631251335144043 + }, + { + "epoch": 2.82989501953125e-05, + "model_forward_time": 0.024371862411499023, + "step": 18546 + }, + { + "epoch": 2.82989501953125e-05, + "step": 18546, + "training_step_time": 0.10538434982299805 + }, + { + "epoch": 2.830047607421875e-05, + "model_forward_time": 0.025464773178100586, + "step": 18547 + }, + { + "epoch": 2.830047607421875e-05, + "step": 18547, + "training_step_time": 0.10851716995239258 + }, + { + "epoch": 2.8302001953125e-05, + "model_forward_time": 0.026028871536254883, + "step": 18548 + }, + { + "epoch": 2.8302001953125e-05, + "step": 18548, + "training_step_time": 0.10712552070617676 + }, + { + "epoch": 2.830352783203125e-05, + "model_forward_time": 0.025630712509155273, + "step": 18549 + }, + { + "epoch": 2.830352783203125e-05, + "step": 18549, + "training_step_time": 0.10640525817871094 + }, + { + "epoch": 2.83050537109375e-05, + "grad_norm": 0.3838638961315155, + "learning_rate": 3.4811474172506275e-05, + "loss": 0.0095, + "step": 18550 + }, + { + "epoch": 2.83050537109375e-05, + "model_forward_time": 0.02506422996520996, + "step": 18550 + }, + { + "epoch": 2.83050537109375e-05, + "step": 18550, + "training_step_time": 0.10631585121154785 + }, + { + "epoch": 2.830657958984375e-05, + "model_forward_time": 0.02508687973022461, + "step": 18551 + }, + { + "epoch": 2.830657958984375e-05, + "step": 18551, + "training_step_time": 0.10429120063781738 + }, + { + "epoch": 2.830810546875e-05, + "model_forward_time": 0.02454686164855957, + "step": 18552 + }, + { + "epoch": 2.830810546875e-05, + "step": 18552, + "training_step_time": 0.1060032844543457 + }, + { + "epoch": 2.830963134765625e-05, + "model_forward_time": 0.024301528930664062, + "step": 18553 + }, + { + "epoch": 2.830963134765625e-05, + "step": 18553, + "training_step_time": 0.10813164710998535 + }, + { + "epoch": 2.83111572265625e-05, + "model_forward_time": 0.025612831115722656, + "step": 18554 + }, + { + "epoch": 2.83111572265625e-05, + "step": 18554, + "training_step_time": 0.10598039627075195 + }, + { + "epoch": 2.831268310546875e-05, + "model_forward_time": 0.025598764419555664, + "step": 18555 + }, + { + "epoch": 2.831268310546875e-05, + "step": 18555, + "training_step_time": 0.15174555778503418 + }, + { + "epoch": 2.8314208984375e-05, + "model_forward_time": 0.025571823120117188, + "step": 18556 + }, + { + "epoch": 2.8314208984375e-05, + "step": 18556, + "training_step_time": 0.11921095848083496 + }, + { + "epoch": 2.831573486328125e-05, + "model_forward_time": 0.025020122528076172, + "step": 18557 + }, + { + "epoch": 2.831573486328125e-05, + "step": 18557, + "training_step_time": 0.10972023010253906 + }, + { + "epoch": 2.83172607421875e-05, + "model_forward_time": 0.025304317474365234, + "step": 18558 + }, + { + "epoch": 2.83172607421875e-05, + "step": 18558, + "training_step_time": 0.12078213691711426 + }, + { + "epoch": 2.831878662109375e-05, + "model_forward_time": 0.02517104148864746, + "step": 18559 + }, + { + "epoch": 2.831878662109375e-05, + "step": 18559, + "training_step_time": 0.10640954971313477 + }, + { + "epoch": 2.83203125e-05, + "grad_norm": 0.13953132927417755, + "learning_rate": 3.475897222816178e-05, + "loss": 0.0197, + "step": 18560 + }, + { + "epoch": 2.83203125e-05, + "model_forward_time": 0.025437593460083008, + "step": 18560 + }, + { + "epoch": 2.83203125e-05, + "step": 18560, + "training_step_time": 0.11421418190002441 + }, + { + "epoch": 2.832183837890625e-05, + "model_forward_time": 0.025550127029418945, + "step": 18561 + }, + { + "epoch": 2.832183837890625e-05, + "step": 18561, + "training_step_time": 0.1122283935546875 + }, + { + "epoch": 2.83233642578125e-05, + "model_forward_time": 0.025095701217651367, + "step": 18562 + }, + { + "epoch": 2.83233642578125e-05, + "step": 18562, + "training_step_time": 0.1156926155090332 + }, + { + "epoch": 2.832489013671875e-05, + "model_forward_time": 0.025767803192138672, + "step": 18563 + }, + { + "epoch": 2.832489013671875e-05, + "step": 18563, + "training_step_time": 0.10941243171691895 + }, + { + "epoch": 2.8326416015625e-05, + "model_forward_time": 0.0251617431640625, + "step": 18564 + }, + { + "epoch": 2.8326416015625e-05, + "step": 18564, + "training_step_time": 0.20952510833740234 + }, + { + "epoch": 2.832794189453125e-05, + "model_forward_time": 0.0247495174407959, + "step": 18565 + }, + { + "epoch": 2.832794189453125e-05, + "step": 18565, + "training_step_time": 0.1151437759399414 + }, + { + "epoch": 2.83294677734375e-05, + "model_forward_time": 0.024554014205932617, + "step": 18566 + }, + { + "epoch": 2.83294677734375e-05, + "step": 18566, + "training_step_time": 0.11259174346923828 + }, + { + "epoch": 2.833099365234375e-05, + "model_forward_time": 0.02532482147216797, + "step": 18567 + }, + { + "epoch": 2.833099365234375e-05, + "step": 18567, + "training_step_time": 0.11679863929748535 + }, + { + "epoch": 2.833251953125e-05, + "model_forward_time": 0.025187969207763672, + "step": 18568 + }, + { + "epoch": 2.833251953125e-05, + "step": 18568, + "training_step_time": 0.12688899040222168 + }, + { + "epoch": 2.833404541015625e-05, + "model_forward_time": 0.025196075439453125, + "step": 18569 + }, + { + "epoch": 2.833404541015625e-05, + "step": 18569, + "training_step_time": 0.10839724540710449 + }, + { + "epoch": 2.83355712890625e-05, + "grad_norm": 0.23392415046691895, + "learning_rate": 3.470648880310313e-05, + "loss": 0.0083, + "step": 18570 + }, + { + "epoch": 2.83355712890625e-05, + "model_forward_time": 0.025681018829345703, + "step": 18570 + }, + { + "epoch": 2.83355712890625e-05, + "step": 18570, + "training_step_time": 0.10769271850585938 + }, + { + "epoch": 2.833709716796875e-05, + "model_forward_time": 0.025830984115600586, + "step": 18571 + }, + { + "epoch": 2.833709716796875e-05, + "step": 18571, + "training_step_time": 0.12471246719360352 + }, + { + "epoch": 2.8338623046875e-05, + "model_forward_time": 0.02533745765686035, + "step": 18572 + }, + { + "epoch": 2.8338623046875e-05, + "step": 18572, + "training_step_time": 0.10793638229370117 + }, + { + "epoch": 2.834014892578125e-05, + "model_forward_time": 0.025129079818725586, + "step": 18573 + }, + { + "epoch": 2.834014892578125e-05, + "step": 18573, + "training_step_time": 0.11579632759094238 + }, + { + "epoch": 2.83416748046875e-05, + "model_forward_time": 0.025225400924682617, + "step": 18574 + }, + { + "epoch": 2.83416748046875e-05, + "step": 18574, + "training_step_time": 0.13214659690856934 + }, + { + "epoch": 2.834320068359375e-05, + "model_forward_time": 0.02486109733581543, + "step": 18575 + }, + { + "epoch": 2.834320068359375e-05, + "step": 18575, + "training_step_time": 0.11676359176635742 + }, + { + "epoch": 2.83447265625e-05, + "model_forward_time": 0.025183439254760742, + "step": 18576 + }, + { + "epoch": 2.83447265625e-05, + "step": 18576, + "training_step_time": 0.12166857719421387 + }, + { + "epoch": 2.834625244140625e-05, + "model_forward_time": 0.024954795837402344, + "step": 18577 + }, + { + "epoch": 2.834625244140625e-05, + "step": 18577, + "training_step_time": 0.10669779777526855 + }, + { + "epoch": 2.83477783203125e-05, + "model_forward_time": 0.025155067443847656, + "step": 18578 + }, + { + "epoch": 2.83477783203125e-05, + "step": 18578, + "training_step_time": 0.11272668838500977 + }, + { + "epoch": 2.834930419921875e-05, + "model_forward_time": 0.025492429733276367, + "step": 18579 + }, + { + "epoch": 2.834930419921875e-05, + "step": 18579, + "training_step_time": 0.10658526420593262 + }, + { + "epoch": 2.8350830078125e-05, + "grad_norm": 0.3779175579547882, + "learning_rate": 3.465402396110269e-05, + "loss": 0.0074, + "step": 18580 + }, + { + "epoch": 2.8350830078125e-05, + "model_forward_time": 0.025673866271972656, + "step": 18580 + }, + { + "epoch": 2.8350830078125e-05, + "step": 18580, + "training_step_time": 0.10643339157104492 + }, + { + "epoch": 2.835235595703125e-05, + "model_forward_time": 0.025476455688476562, + "step": 18581 + }, + { + "epoch": 2.835235595703125e-05, + "step": 18581, + "training_step_time": 0.10725212097167969 + }, + { + "epoch": 2.83538818359375e-05, + "model_forward_time": 0.026587486267089844, + "step": 18582 + }, + { + "epoch": 2.83538818359375e-05, + "step": 18582, + "training_step_time": 0.10766339302062988 + }, + { + "epoch": 2.835540771484375e-05, + "model_forward_time": 0.024753570556640625, + "step": 18583 + }, + { + "epoch": 2.835540771484375e-05, + "step": 18583, + "training_step_time": 0.13511013984680176 + }, + { + "epoch": 2.835693359375e-05, + "model_forward_time": 0.024773836135864258, + "step": 18584 + }, + { + "epoch": 2.835693359375e-05, + "step": 18584, + "training_step_time": 0.13988494873046875 + }, + { + "epoch": 2.835845947265625e-05, + "model_forward_time": 0.024242877960205078, + "step": 18585 + }, + { + "epoch": 2.835845947265625e-05, + "step": 18585, + "training_step_time": 0.10854220390319824 + }, + { + "epoch": 2.83599853515625e-05, + "model_forward_time": 0.024918079376220703, + "step": 18586 + }, + { + "epoch": 2.83599853515625e-05, + "step": 18586, + "training_step_time": 0.1144874095916748 + }, + { + "epoch": 2.836151123046875e-05, + "model_forward_time": 0.025079727172851562, + "step": 18587 + }, + { + "epoch": 2.836151123046875e-05, + "step": 18587, + "training_step_time": 0.11253857612609863 + }, + { + "epoch": 2.8363037109375e-05, + "model_forward_time": 0.024905681610107422, + "step": 18588 + }, + { + "epoch": 2.8363037109375e-05, + "step": 18588, + "training_step_time": 0.10449695587158203 + }, + { + "epoch": 2.836456298828125e-05, + "model_forward_time": 0.024859189987182617, + "step": 18589 + }, + { + "epoch": 2.836456298828125e-05, + "step": 18589, + "training_step_time": 0.1983489990234375 + }, + { + "epoch": 2.83660888671875e-05, + "grad_norm": 0.27629610896110535, + "learning_rate": 3.460157776591018e-05, + "loss": 0.0089, + "step": 18590 + }, + { + "epoch": 2.83660888671875e-05, + "model_forward_time": 0.023938655853271484, + "step": 18590 + }, + { + "epoch": 2.83660888671875e-05, + "step": 18590, + "training_step_time": 0.10195159912109375 + }, + { + "epoch": 2.836761474609375e-05, + "model_forward_time": 0.024169445037841797, + "step": 18591 + }, + { + "epoch": 2.836761474609375e-05, + "step": 18591, + "training_step_time": 0.10474252700805664 + }, + { + "epoch": 2.8369140625e-05, + "model_forward_time": 0.024951696395874023, + "step": 18592 + }, + { + "epoch": 2.8369140625e-05, + "step": 18592, + "training_step_time": 0.10837626457214355 + }, + { + "epoch": 2.837066650390625e-05, + "model_forward_time": 0.024923086166381836, + "step": 18593 + }, + { + "epoch": 2.837066650390625e-05, + "step": 18593, + "training_step_time": 0.1066136360168457 + }, + { + "epoch": 2.83721923828125e-05, + "model_forward_time": 0.02521538734436035, + "step": 18594 + }, + { + "epoch": 2.83721923828125e-05, + "step": 18594, + "training_step_time": 0.1096501350402832 + }, + { + "epoch": 2.837371826171875e-05, + "model_forward_time": 0.02505970001220703, + "step": 18595 + }, + { + "epoch": 2.837371826171875e-05, + "step": 18595, + "training_step_time": 0.1084134578704834 + }, + { + "epoch": 2.8375244140625e-05, + "model_forward_time": 0.02508401870727539, + "step": 18596 + }, + { + "epoch": 2.8375244140625e-05, + "step": 18596, + "training_step_time": 0.10593843460083008 + }, + { + "epoch": 2.837677001953125e-05, + "model_forward_time": 0.024842023849487305, + "step": 18597 + }, + { + "epoch": 2.837677001953125e-05, + "step": 18597, + "training_step_time": 0.10787582397460938 + }, + { + "epoch": 2.83782958984375e-05, + "model_forward_time": 0.02503204345703125, + "step": 18598 + }, + { + "epoch": 2.83782958984375e-05, + "step": 18598, + "training_step_time": 0.10834431648254395 + }, + { + "epoch": 2.837982177734375e-05, + "model_forward_time": 0.02506256103515625, + "step": 18599 + }, + { + "epoch": 2.837982177734375e-05, + "step": 18599, + "training_step_time": 0.10694622993469238 + }, + { + "epoch": 2.838134765625e-05, + "grad_norm": 0.2892727255821228, + "learning_rate": 3.4549150281252636e-05, + "loss": 0.0184, + "step": 18600 + }, + { + "epoch": 2.838134765625e-05, + "model_forward_time": 0.02483367919921875, + "step": 18600 + }, + { + "epoch": 2.838134765625e-05, + "step": 18600, + "training_step_time": 0.10897397994995117 + }, + { + "epoch": 2.838287353515625e-05, + "model_forward_time": 0.024903535842895508, + "step": 18601 + }, + { + "epoch": 2.838287353515625e-05, + "step": 18601, + "training_step_time": 0.10528945922851562 + }, + { + "epoch": 2.83843994140625e-05, + "model_forward_time": 0.02633047103881836, + "step": 18602 + }, + { + "epoch": 2.83843994140625e-05, + "step": 18602, + "training_step_time": 0.10734415054321289 + }, + { + "epoch": 2.838592529296875e-05, + "model_forward_time": 0.029154539108276367, + "step": 18603 + }, + { + "epoch": 2.838592529296875e-05, + "step": 18603, + "training_step_time": 0.13930416107177734 + }, + { + "epoch": 2.8387451171875e-05, + "model_forward_time": 0.025093555450439453, + "step": 18604 + }, + { + "epoch": 2.8387451171875e-05, + "step": 18604, + "training_step_time": 0.19516634941101074 + }, + { + "epoch": 2.838897705078125e-05, + "model_forward_time": 0.02456045150756836, + "step": 18605 + }, + { + "epoch": 2.838897705078125e-05, + "step": 18605, + "training_step_time": 0.16394400596618652 + }, + { + "epoch": 2.83905029296875e-05, + "model_forward_time": 0.024827003479003906, + "step": 18606 + }, + { + "epoch": 2.83905029296875e-05, + "step": 18606, + "training_step_time": 0.1476726531982422 + }, + { + "epoch": 2.839202880859375e-05, + "model_forward_time": 0.024030208587646484, + "step": 18607 + }, + { + "epoch": 2.839202880859375e-05, + "step": 18607, + "training_step_time": 0.11099028587341309 + }, + { + "epoch": 2.83935546875e-05, + "model_forward_time": 0.02470231056213379, + "step": 18608 + }, + { + "epoch": 2.83935546875e-05, + "step": 18608, + "training_step_time": 0.13009953498840332 + }, + { + "epoch": 2.839508056640625e-05, + "model_forward_time": 0.025391340255737305, + "step": 18609 + }, + { + "epoch": 2.839508056640625e-05, + "step": 18609, + "training_step_time": 0.12215685844421387 + }, + { + "epoch": 2.83966064453125e-05, + "grad_norm": 0.1822260022163391, + "learning_rate": 3.449674157083443e-05, + "loss": 0.0066, + "step": 18610 + }, + { + "epoch": 2.83966064453125e-05, + "model_forward_time": 0.025275230407714844, + "step": 18610 + }, + { + "epoch": 2.83966064453125e-05, + "step": 18610, + "training_step_time": 0.10413718223571777 + }, + { + "epoch": 2.839813232421875e-05, + "model_forward_time": 0.02517557144165039, + "step": 18611 + }, + { + "epoch": 2.839813232421875e-05, + "step": 18611, + "training_step_time": 0.1081998348236084 + }, + { + "epoch": 2.8399658203125e-05, + "model_forward_time": 0.02533578872680664, + "step": 18612 + }, + { + "epoch": 2.8399658203125e-05, + "step": 18612, + "training_step_time": 0.11701369285583496 + }, + { + "epoch": 2.840118408203125e-05, + "model_forward_time": 0.02523946762084961, + "step": 18613 + }, + { + "epoch": 2.840118408203125e-05, + "step": 18613, + "training_step_time": 0.10722160339355469 + }, + { + "epoch": 2.84027099609375e-05, + "model_forward_time": 0.02516007423400879, + "step": 18614 + }, + { + "epoch": 2.84027099609375e-05, + "step": 18614, + "training_step_time": 0.10469698905944824 + }, + { + "epoch": 2.840423583984375e-05, + "model_forward_time": 0.02542257308959961, + "step": 18615 + }, + { + "epoch": 2.840423583984375e-05, + "step": 18615, + "training_step_time": 0.16954612731933594 + }, + { + "epoch": 2.840576171875e-05, + "model_forward_time": 0.02453470230102539, + "step": 18616 + }, + { + "epoch": 2.840576171875e-05, + "step": 18616, + "training_step_time": 0.16470098495483398 + }, + { + "epoch": 2.840728759765625e-05, + "model_forward_time": 0.025079011917114258, + "step": 18617 + }, + { + "epoch": 2.840728759765625e-05, + "step": 18617, + "training_step_time": 0.10504436492919922 + }, + { + "epoch": 2.84088134765625e-05, + "model_forward_time": 0.024733781814575195, + "step": 18618 + }, + { + "epoch": 2.84088134765625e-05, + "step": 18618, + "training_step_time": 0.1741955280303955 + }, + { + "epoch": 2.841033935546875e-05, + "model_forward_time": 0.024254560470581055, + "step": 18619 + }, + { + "epoch": 2.841033935546875e-05, + "step": 18619, + "training_step_time": 0.1267390251159668 + }, + { + "epoch": 2.8411865234375e-05, + "grad_norm": 0.1415482759475708, + "learning_rate": 3.444435169833706e-05, + "loss": 0.0178, + "step": 18620 + }, + { + "epoch": 2.8411865234375e-05, + "model_forward_time": 0.024969100952148438, + "step": 18620 + }, + { + "epoch": 2.8411865234375e-05, + "step": 18620, + "training_step_time": 0.2275407314300537 + }, + { + "epoch": 2.841339111328125e-05, + "model_forward_time": 0.024435997009277344, + "step": 18621 + }, + { + "epoch": 2.841339111328125e-05, + "step": 18621, + "training_step_time": 0.14788198471069336 + }, + { + "epoch": 2.84149169921875e-05, + "model_forward_time": 0.024081945419311523, + "step": 18622 + }, + { + "epoch": 2.84149169921875e-05, + "step": 18622, + "training_step_time": 0.19982457160949707 + }, + { + "epoch": 2.841644287109375e-05, + "model_forward_time": 0.02442622184753418, + "step": 18623 + }, + { + "epoch": 2.841644287109375e-05, + "step": 18623, + "training_step_time": 0.1292562484741211 + }, + { + "epoch": 2.841796875e-05, + "model_forward_time": 0.024149179458618164, + "step": 18624 + }, + { + "epoch": 2.841796875e-05, + "step": 18624, + "training_step_time": 0.12444806098937988 + }, + { + "epoch": 2.841949462890625e-05, + "model_forward_time": 0.02448892593383789, + "step": 18625 + }, + { + "epoch": 2.841949462890625e-05, + "step": 18625, + "training_step_time": 0.11645960807800293 + }, + { + "epoch": 2.84210205078125e-05, + "model_forward_time": 0.025170326232910156, + "step": 18626 + }, + { + "epoch": 2.84210205078125e-05, + "step": 18626, + "training_step_time": 0.11583590507507324 + }, + { + "epoch": 2.842254638671875e-05, + "model_forward_time": 0.025203704833984375, + "step": 18627 + }, + { + "epoch": 2.842254638671875e-05, + "step": 18627, + "training_step_time": 0.16457271575927734 + }, + { + "epoch": 2.8424072265625e-05, + "model_forward_time": 0.02428889274597168, + "step": 18628 + }, + { + "epoch": 2.8424072265625e-05, + "step": 18628, + "training_step_time": 0.13883423805236816 + }, + { + "epoch": 2.842559814453125e-05, + "model_forward_time": 0.024995803833007812, + "step": 18629 + }, + { + "epoch": 2.842559814453125e-05, + "step": 18629, + "training_step_time": 0.10922646522521973 + }, + { + "epoch": 2.84271240234375e-05, + "grad_norm": 0.6063808798789978, + "learning_rate": 3.439198072741921e-05, + "loss": 0.0328, + "step": 18630 + }, + { + "epoch": 2.84271240234375e-05, + "model_forward_time": 0.025669336318969727, + "step": 18630 + }, + { + "epoch": 2.84271240234375e-05, + "step": 18630, + "training_step_time": 0.10818219184875488 + }, + { + "epoch": 2.842864990234375e-05, + "model_forward_time": 0.025558948516845703, + "step": 18631 + }, + { + "epoch": 2.842864990234375e-05, + "step": 18631, + "training_step_time": 0.11385750770568848 + }, + { + "epoch": 2.843017578125e-05, + "model_forward_time": 0.02555370330810547, + "step": 18632 + }, + { + "epoch": 2.843017578125e-05, + "step": 18632, + "training_step_time": 0.11066532135009766 + }, + { + "epoch": 2.843170166015625e-05, + "model_forward_time": 0.025353193283081055, + "step": 18633 + }, + { + "epoch": 2.843170166015625e-05, + "step": 18633, + "training_step_time": 0.18738579750061035 + }, + { + "epoch": 2.84332275390625e-05, + "model_forward_time": 0.02469611167907715, + "step": 18634 + }, + { + "epoch": 2.84332275390625e-05, + "step": 18634, + "training_step_time": 0.10416841506958008 + }, + { + "epoch": 2.843475341796875e-05, + "model_forward_time": 0.024324417114257812, + "step": 18635 + }, + { + "epoch": 2.843475341796875e-05, + "step": 18635, + "training_step_time": 0.10544133186340332 + }, + { + "epoch": 2.8436279296875e-05, + "model_forward_time": 0.0242311954498291, + "step": 18636 + }, + { + "epoch": 2.8436279296875e-05, + "step": 18636, + "training_step_time": 0.10328340530395508 + }, + { + "epoch": 2.843780517578125e-05, + "model_forward_time": 0.02495861053466797, + "step": 18637 + }, + { + "epoch": 2.843780517578125e-05, + "step": 18637, + "training_step_time": 0.10842561721801758 + }, + { + "epoch": 2.84393310546875e-05, + "model_forward_time": 0.025214433670043945, + "step": 18638 + }, + { + "epoch": 2.84393310546875e-05, + "step": 18638, + "training_step_time": 0.1093759536743164 + }, + { + "epoch": 2.844085693359375e-05, + "model_forward_time": 0.024940013885498047, + "step": 18639 + }, + { + "epoch": 2.844085693359375e-05, + "step": 18639, + "training_step_time": 0.10430526733398438 + }, + { + "epoch": 2.84423828125e-05, + "grad_norm": 0.240308478474617, + "learning_rate": 3.4339628721716505e-05, + "loss": 0.0121, + "step": 18640 + }, + { + "epoch": 2.84423828125e-05, + "model_forward_time": 0.024692296981811523, + "step": 18640 + }, + { + "epoch": 2.84423828125e-05, + "step": 18640, + "training_step_time": 0.10426831245422363 + }, + { + "epoch": 2.844390869140625e-05, + "model_forward_time": 0.024988174438476562, + "step": 18641 + }, + { + "epoch": 2.844390869140625e-05, + "step": 18641, + "training_step_time": 0.10498380661010742 + }, + { + "epoch": 2.84454345703125e-05, + "model_forward_time": 0.025749683380126953, + "step": 18642 + }, + { + "epoch": 2.84454345703125e-05, + "step": 18642, + "training_step_time": 0.10556173324584961 + }, + { + "epoch": 2.844696044921875e-05, + "model_forward_time": 0.027891159057617188, + "step": 18643 + }, + { + "epoch": 2.844696044921875e-05, + "step": 18643, + "training_step_time": 0.10684370994567871 + }, + { + "epoch": 2.8448486328125e-05, + "model_forward_time": 0.025338172912597656, + "step": 18644 + }, + { + "epoch": 2.8448486328125e-05, + "step": 18644, + "training_step_time": 0.10548973083496094 + }, + { + "epoch": 2.845001220703125e-05, + "model_forward_time": 0.02480316162109375, + "step": 18645 + }, + { + "epoch": 2.845001220703125e-05, + "step": 18645, + "training_step_time": 0.10641169548034668 + }, + { + "epoch": 2.84515380859375e-05, + "model_forward_time": 0.025333166122436523, + "step": 18646 + }, + { + "epoch": 2.84515380859375e-05, + "step": 18646, + "training_step_time": 0.10857963562011719 + }, + { + "epoch": 2.845306396484375e-05, + "model_forward_time": 0.02534008026123047, + "step": 18647 + }, + { + "epoch": 2.845306396484375e-05, + "step": 18647, + "training_step_time": 0.10523009300231934 + }, + { + "epoch": 2.845458984375e-05, + "model_forward_time": 0.025205373764038086, + "step": 18648 + }, + { + "epoch": 2.845458984375e-05, + "step": 18648, + "training_step_time": 0.15774059295654297 + }, + { + "epoch": 2.845611572265625e-05, + "model_forward_time": 0.024193763732910156, + "step": 18649 + }, + { + "epoch": 2.845611572265625e-05, + "step": 18649, + "training_step_time": 0.13544654846191406 + }, + { + "epoch": 2.84576416015625e-05, + "grad_norm": 0.28116506338119507, + "learning_rate": 3.4287295744841586e-05, + "loss": 0.0095, + "step": 18650 + }, + { + "epoch": 2.84576416015625e-05, + "model_forward_time": 0.0254056453704834, + "step": 18650 + }, + { + "epoch": 2.84576416015625e-05, + "step": 18650, + "training_step_time": 0.14436936378479004 + }, + { + "epoch": 2.845916748046875e-05, + "model_forward_time": 0.024399757385253906, + "step": 18651 + }, + { + "epoch": 2.845916748046875e-05, + "step": 18651, + "training_step_time": 0.16254973411560059 + }, + { + "epoch": 2.8460693359375e-05, + "model_forward_time": 0.024152755737304688, + "step": 18652 + }, + { + "epoch": 2.8460693359375e-05, + "step": 18652, + "training_step_time": 0.18091177940368652 + }, + { + "epoch": 2.846221923828125e-05, + "model_forward_time": 0.02463817596435547, + "step": 18653 + }, + { + "epoch": 2.846221923828125e-05, + "step": 18653, + "training_step_time": 0.15601706504821777 + }, + { + "epoch": 2.84637451171875e-05, + "model_forward_time": 0.024683475494384766, + "step": 18654 + }, + { + "epoch": 2.84637451171875e-05, + "step": 18654, + "training_step_time": 0.11061573028564453 + }, + { + "epoch": 2.846527099609375e-05, + "model_forward_time": 0.02415919303894043, + "step": 18655 + }, + { + "epoch": 2.846527099609375e-05, + "step": 18655, + "training_step_time": 0.20746517181396484 + }, + { + "epoch": 2.8466796875e-05, + "model_forward_time": 0.024567842483520508, + "step": 18656 + }, + { + "epoch": 2.8466796875e-05, + "step": 18656, + "training_step_time": 0.11252570152282715 + }, + { + "epoch": 2.846832275390625e-05, + "model_forward_time": 0.02443671226501465, + "step": 18657 + }, + { + "epoch": 2.846832275390625e-05, + "step": 18657, + "training_step_time": 0.11144828796386719 + }, + { + "epoch": 2.84698486328125e-05, + "model_forward_time": 0.025071382522583008, + "step": 18658 + }, + { + "epoch": 2.84698486328125e-05, + "step": 18658, + "training_step_time": 0.2093505859375 + }, + { + "epoch": 2.847137451171875e-05, + "model_forward_time": 0.02839946746826172, + "step": 18659 + }, + { + "epoch": 2.847137451171875e-05, + "step": 18659, + "training_step_time": 0.12821197509765625 + }, + { + "epoch": 2.8472900390625e-05, + "grad_norm": 0.22209402918815613, + "learning_rate": 3.423498186038393e-05, + "loss": 0.0105, + "step": 18660 + }, + { + "epoch": 2.8472900390625e-05, + "model_forward_time": 0.024376392364501953, + "step": 18660 + }, + { + "epoch": 2.8472900390625e-05, + "step": 18660, + "training_step_time": 0.10211396217346191 + }, + { + "epoch": 2.847442626953125e-05, + "model_forward_time": 0.025356054306030273, + "step": 18661 + }, + { + "epoch": 2.847442626953125e-05, + "step": 18661, + "training_step_time": 0.10447525978088379 + }, + { + "epoch": 2.84759521484375e-05, + "model_forward_time": 0.025217533111572266, + "step": 18662 + }, + { + "epoch": 2.84759521484375e-05, + "step": 18662, + "training_step_time": 0.10500764846801758 + }, + { + "epoch": 2.847747802734375e-05, + "model_forward_time": 0.025259971618652344, + "step": 18663 + }, + { + "epoch": 2.847747802734375e-05, + "step": 18663, + "training_step_time": 0.11558294296264648 + }, + { + "epoch": 2.847900390625e-05, + "model_forward_time": 0.024763107299804688, + "step": 18664 + }, + { + "epoch": 2.847900390625e-05, + "step": 18664, + "training_step_time": 0.10416650772094727 + }, + { + "epoch": 2.848052978515625e-05, + "model_forward_time": 0.025094032287597656, + "step": 18665 + }, + { + "epoch": 2.848052978515625e-05, + "step": 18665, + "training_step_time": 0.11466670036315918 + }, + { + "epoch": 2.84820556640625e-05, + "model_forward_time": 0.025075674057006836, + "step": 18666 + }, + { + "epoch": 2.84820556640625e-05, + "step": 18666, + "training_step_time": 0.1301407814025879 + }, + { + "epoch": 2.848358154296875e-05, + "model_forward_time": 0.025715351104736328, + "step": 18667 + }, + { + "epoch": 2.848358154296875e-05, + "step": 18667, + "training_step_time": 0.11510562896728516 + }, + { + "epoch": 2.8485107421875e-05, + "model_forward_time": 0.024806976318359375, + "step": 18668 + }, + { + "epoch": 2.8485107421875e-05, + "step": 18668, + "training_step_time": 0.12157678604125977 + }, + { + "epoch": 2.848663330078125e-05, + "model_forward_time": 0.025059938430786133, + "step": 18669 + }, + { + "epoch": 2.848663330078125e-05, + "step": 18669, + "training_step_time": 0.11524724960327148 + }, + { + "epoch": 2.84881591796875e-05, + "grad_norm": 0.22405150532722473, + "learning_rate": 3.418268713190986e-05, + "loss": 0.0117, + "step": 18670 + }, + { + "epoch": 2.84881591796875e-05, + "model_forward_time": 0.024973392486572266, + "step": 18670 + }, + { + "epoch": 2.84881591796875e-05, + "step": 18670, + "training_step_time": 0.1145637035369873 + }, + { + "epoch": 2.848968505859375e-05, + "model_forward_time": 0.02456974983215332, + "step": 18671 + }, + { + "epoch": 2.848968505859375e-05, + "step": 18671, + "training_step_time": 0.1123507022857666 + }, + { + "epoch": 2.84912109375e-05, + "model_forward_time": 0.02492213249206543, + "step": 18672 + }, + { + "epoch": 2.84912109375e-05, + "step": 18672, + "training_step_time": 0.11482691764831543 + }, + { + "epoch": 2.849273681640625e-05, + "model_forward_time": 0.02473759651184082, + "step": 18673 + }, + { + "epoch": 2.849273681640625e-05, + "step": 18673, + "training_step_time": 0.18982887268066406 + }, + { + "epoch": 2.84942626953125e-05, + "model_forward_time": 0.023865461349487305, + "step": 18674 + }, + { + "epoch": 2.84942626953125e-05, + "step": 18674, + "training_step_time": 0.12369060516357422 + }, + { + "epoch": 2.849578857421875e-05, + "model_forward_time": 0.023891925811767578, + "step": 18675 + }, + { + "epoch": 2.849578857421875e-05, + "step": 18675, + "training_step_time": 0.1091470718383789 + }, + { + "epoch": 2.8497314453125e-05, + "model_forward_time": 0.025587797164916992, + "step": 18676 + }, + { + "epoch": 2.8497314453125e-05, + "step": 18676, + "training_step_time": 0.10956287384033203 + }, + { + "epoch": 2.849884033203125e-05, + "model_forward_time": 0.02534174919128418, + "step": 18677 + }, + { + "epoch": 2.849884033203125e-05, + "step": 18677, + "training_step_time": 0.12403154373168945 + }, + { + "epoch": 2.85003662109375e-05, + "model_forward_time": 0.025243520736694336, + "step": 18678 + }, + { + "epoch": 2.85003662109375e-05, + "step": 18678, + "training_step_time": 0.10702896118164062 + }, + { + "epoch": 2.850189208984375e-05, + "model_forward_time": 0.024883270263671875, + "step": 18679 + }, + { + "epoch": 2.850189208984375e-05, + "step": 18679, + "training_step_time": 0.19613409042358398 + }, + { + "epoch": 2.850341796875e-05, + "grad_norm": 0.30229589343070984, + "learning_rate": 3.413041162296241e-05, + "loss": 0.0101, + "step": 18680 + }, + { + "epoch": 2.850341796875e-05, + "model_forward_time": 0.0244295597076416, + "step": 18680 + }, + { + "epoch": 2.850341796875e-05, + "step": 18680, + "training_step_time": 0.10483694076538086 + }, + { + "epoch": 2.850494384765625e-05, + "model_forward_time": 0.024383068084716797, + "step": 18681 + }, + { + "epoch": 2.850494384765625e-05, + "step": 18681, + "training_step_time": 0.10775184631347656 + }, + { + "epoch": 2.85064697265625e-05, + "model_forward_time": 0.025890111923217773, + "step": 18682 + }, + { + "epoch": 2.85064697265625e-05, + "step": 18682, + "training_step_time": 0.10780072212219238 + }, + { + "epoch": 2.850799560546875e-05, + "model_forward_time": 0.02549433708190918, + "step": 18683 + }, + { + "epoch": 2.850799560546875e-05, + "step": 18683, + "training_step_time": 0.1060638427734375 + }, + { + "epoch": 2.8509521484375e-05, + "model_forward_time": 0.025543689727783203, + "step": 18684 + }, + { + "epoch": 2.8509521484375e-05, + "step": 18684, + "training_step_time": 0.10519814491271973 + }, + { + "epoch": 2.851104736328125e-05, + "model_forward_time": 0.024728059768676758, + "step": 18685 + }, + { + "epoch": 2.851104736328125e-05, + "step": 18685, + "training_step_time": 0.10474085807800293 + }, + { + "epoch": 2.85125732421875e-05, + "model_forward_time": 0.024762630462646484, + "step": 18686 + }, + { + "epoch": 2.85125732421875e-05, + "step": 18686, + "training_step_time": 0.10592412948608398 + }, + { + "epoch": 2.851409912109375e-05, + "model_forward_time": 0.025117158889770508, + "step": 18687 + }, + { + "epoch": 2.851409912109375e-05, + "step": 18687, + "training_step_time": 0.10524106025695801 + }, + { + "epoch": 2.8515625e-05, + "model_forward_time": 0.025257349014282227, + "step": 18688 + }, + { + "epoch": 2.8515625e-05, + "step": 18688, + "training_step_time": 0.10725855827331543 + }, + { + "epoch": 2.851715087890625e-05, + "model_forward_time": 0.02482295036315918, + "step": 18689 + }, + { + "epoch": 2.851715087890625e-05, + "step": 18689, + "training_step_time": 0.10512137413024902 + }, + { + "epoch": 2.85186767578125e-05, + "grad_norm": 0.148757666349411, + "learning_rate": 3.407815539706124e-05, + "loss": 0.0097, + "step": 18690 + }, + { + "epoch": 2.85186767578125e-05, + "model_forward_time": 0.024880647659301758, + "step": 18690 + }, + { + "epoch": 2.85186767578125e-05, + "step": 18690, + "training_step_time": 0.10797357559204102 + }, + { + "epoch": 2.852020263671875e-05, + "model_forward_time": 0.025351285934448242, + "step": 18691 + }, + { + "epoch": 2.852020263671875e-05, + "step": 18691, + "training_step_time": 0.10604691505432129 + }, + { + "epoch": 2.8521728515625e-05, + "model_forward_time": 0.024843215942382812, + "step": 18692 + }, + { + "epoch": 2.8521728515625e-05, + "step": 18692, + "training_step_time": 0.10420584678649902 + }, + { + "epoch": 2.852325439453125e-05, + "model_forward_time": 0.025293588638305664, + "step": 18693 + }, + { + "epoch": 2.852325439453125e-05, + "step": 18693, + "training_step_time": 0.1042935848236084 + }, + { + "epoch": 2.85247802734375e-05, + "model_forward_time": 0.025359392166137695, + "step": 18694 + }, + { + "epoch": 2.85247802734375e-05, + "step": 18694, + "training_step_time": 0.19097590446472168 + }, + { + "epoch": 2.852630615234375e-05, + "model_forward_time": 0.0243072509765625, + "step": 18695 + }, + { + "epoch": 2.852630615234375e-05, + "step": 18695, + "training_step_time": 0.12299060821533203 + }, + { + "epoch": 2.852783203125e-05, + "model_forward_time": 0.024074554443359375, + "step": 18696 + }, + { + "epoch": 2.852783203125e-05, + "step": 18696, + "training_step_time": 0.131317138671875 + }, + { + "epoch": 2.852935791015625e-05, + "model_forward_time": 0.02502727508544922, + "step": 18697 + }, + { + "epoch": 2.852935791015625e-05, + "step": 18697, + "training_step_time": 0.16652536392211914 + }, + { + "epoch": 2.85308837890625e-05, + "model_forward_time": 0.024404525756835938, + "step": 18698 + }, + { + "epoch": 2.85308837890625e-05, + "step": 18698, + "training_step_time": 0.21195220947265625 + }, + { + "epoch": 2.853240966796875e-05, + "model_forward_time": 0.024669408798217773, + "step": 18699 + }, + { + "epoch": 2.853240966796875e-05, + "step": 18699, + "training_step_time": 0.10062789916992188 + }, + { + "epoch": 2.8533935546875e-05, + "grad_norm": 0.18390384316444397, + "learning_rate": 3.40259185177026e-05, + "loss": 0.0056, + "step": 18700 + }, + { + "epoch": 2.8533935546875e-05, + "model_forward_time": 0.024452686309814453, + "step": 18700 + }, + { + "epoch": 2.8533935546875e-05, + "step": 18700, + "training_step_time": 0.10303497314453125 + }, + { + "epoch": 2.853546142578125e-05, + "model_forward_time": 0.02494215965270996, + "step": 18701 + }, + { + "epoch": 2.853546142578125e-05, + "step": 18701, + "training_step_time": 0.11635208129882812 + }, + { + "epoch": 2.85369873046875e-05, + "model_forward_time": 0.025289297103881836, + "step": 18702 + }, + { + "epoch": 2.85369873046875e-05, + "step": 18702, + "training_step_time": 0.10718560218811035 + }, + { + "epoch": 2.853851318359375e-05, + "model_forward_time": 0.025139808654785156, + "step": 18703 + }, + { + "epoch": 2.853851318359375e-05, + "step": 18703, + "training_step_time": 0.10726165771484375 + }, + { + "epoch": 2.85400390625e-05, + "model_forward_time": 0.02516341209411621, + "step": 18704 + }, + { + "epoch": 2.85400390625e-05, + "step": 18704, + "training_step_time": 0.21512174606323242 + }, + { + "epoch": 2.854156494140625e-05, + "model_forward_time": 0.024864673614501953, + "step": 18705 + }, + { + "epoch": 2.854156494140625e-05, + "step": 18705, + "training_step_time": 0.11003494262695312 + }, + { + "epoch": 2.85430908203125e-05, + "model_forward_time": 0.024806499481201172, + "step": 18706 + }, + { + "epoch": 2.85430908203125e-05, + "step": 18706, + "training_step_time": 0.10397219657897949 + }, + { + "epoch": 2.854461669921875e-05, + "model_forward_time": 0.02498030662536621, + "step": 18707 + }, + { + "epoch": 2.854461669921875e-05, + "step": 18707, + "training_step_time": 0.10543298721313477 + }, + { + "epoch": 2.8546142578125e-05, + "model_forward_time": 0.02501392364501953, + "step": 18708 + }, + { + "epoch": 2.8546142578125e-05, + "step": 18708, + "training_step_time": 0.1060023307800293 + }, + { + "epoch": 2.854766845703125e-05, + "model_forward_time": 0.02534174919128418, + "step": 18709 + }, + { + "epoch": 2.854766845703125e-05, + "step": 18709, + "training_step_time": 0.20882630348205566 + }, + { + "epoch": 2.85491943359375e-05, + "grad_norm": 0.152579203248024, + "learning_rate": 3.397370104835922e-05, + "loss": 0.0089, + "step": 18710 + }, + { + "epoch": 2.85491943359375e-05, + "model_forward_time": 0.02409815788269043, + "step": 18710 + }, + { + "epoch": 2.85491943359375e-05, + "step": 18710, + "training_step_time": 0.10209155082702637 + }, + { + "epoch": 2.855072021484375e-05, + "model_forward_time": 0.024341583251953125, + "step": 18711 + }, + { + "epoch": 2.855072021484375e-05, + "step": 18711, + "training_step_time": 0.11507081985473633 + }, + { + "epoch": 2.855224609375e-05, + "model_forward_time": 0.02759838104248047, + "step": 18712 + }, + { + "epoch": 2.855224609375e-05, + "step": 18712, + "training_step_time": 0.1255021095275879 + }, + { + "epoch": 2.855377197265625e-05, + "model_forward_time": 0.025249004364013672, + "step": 18713 + }, + { + "epoch": 2.855377197265625e-05, + "step": 18713, + "training_step_time": 0.12906789779663086 + }, + { + "epoch": 2.85552978515625e-05, + "model_forward_time": 0.025418996810913086, + "step": 18714 + }, + { + "epoch": 2.85552978515625e-05, + "step": 18714, + "training_step_time": 0.11107993125915527 + }, + { + "epoch": 2.855682373046875e-05, + "model_forward_time": 0.025378942489624023, + "step": 18715 + }, + { + "epoch": 2.855682373046875e-05, + "step": 18715, + "training_step_time": 0.11041736602783203 + }, + { + "epoch": 2.8558349609375e-05, + "model_forward_time": 0.024824142456054688, + "step": 18716 + }, + { + "epoch": 2.8558349609375e-05, + "step": 18716, + "training_step_time": 0.10512495040893555 + }, + { + "epoch": 2.855987548828125e-05, + "model_forward_time": 0.025124073028564453, + "step": 18717 + }, + { + "epoch": 2.855987548828125e-05, + "step": 18717, + "training_step_time": 0.10397648811340332 + }, + { + "epoch": 2.85614013671875e-05, + "model_forward_time": 0.02553248405456543, + "step": 18718 + }, + { + "epoch": 2.85614013671875e-05, + "step": 18718, + "training_step_time": 0.10438919067382812 + }, + { + "epoch": 2.856292724609375e-05, + "model_forward_time": 0.02518320083618164, + "step": 18719 + }, + { + "epoch": 2.856292724609375e-05, + "step": 18719, + "training_step_time": 0.16293001174926758 + }, + { + "epoch": 2.8564453125e-05, + "grad_norm": 0.24660253524780273, + "learning_rate": 3.392150305248024e-05, + "loss": 0.0056, + "step": 18720 + }, + { + "epoch": 2.8564453125e-05, + "model_forward_time": 0.024881601333618164, + "step": 18720 + }, + { + "epoch": 2.8564453125e-05, + "step": 18720, + "training_step_time": 0.13608694076538086 + }, + { + "epoch": 2.856597900390625e-05, + "model_forward_time": 0.024523496627807617, + "step": 18721 + }, + { + "epoch": 2.856597900390625e-05, + "step": 18721, + "training_step_time": 0.1143651008605957 + }, + { + "epoch": 2.85675048828125e-05, + "model_forward_time": 0.024621009826660156, + "step": 18722 + }, + { + "epoch": 2.85675048828125e-05, + "step": 18722, + "training_step_time": 0.10692644119262695 + }, + { + "epoch": 2.856903076171875e-05, + "model_forward_time": 0.025336265563964844, + "step": 18723 + }, + { + "epoch": 2.856903076171875e-05, + "step": 18723, + "training_step_time": 0.10908961296081543 + }, + { + "epoch": 2.8570556640625e-05, + "model_forward_time": 0.02513432502746582, + "step": 18724 + }, + { + "epoch": 2.8570556640625e-05, + "step": 18724, + "training_step_time": 0.1632089614868164 + }, + { + "epoch": 2.857208251953125e-05, + "model_forward_time": 0.02430558204650879, + "step": 18725 + }, + { + "epoch": 2.857208251953125e-05, + "step": 18725, + "training_step_time": 0.10711979866027832 + }, + { + "epoch": 2.85736083984375e-05, + "model_forward_time": 0.024551868438720703, + "step": 18726 + }, + { + "epoch": 2.85736083984375e-05, + "step": 18726, + "training_step_time": 0.10663676261901855 + }, + { + "epoch": 2.857513427734375e-05, + "model_forward_time": 0.026419401168823242, + "step": 18727 + }, + { + "epoch": 2.857513427734375e-05, + "step": 18727, + "training_step_time": 0.11676025390625 + }, + { + "epoch": 2.857666015625e-05, + "model_forward_time": 0.025155305862426758, + "step": 18728 + }, + { + "epoch": 2.857666015625e-05, + "step": 18728, + "training_step_time": 0.14812874794006348 + }, + { + "epoch": 2.857818603515625e-05, + "model_forward_time": 0.023887157440185547, + "step": 18729 + }, + { + "epoch": 2.857818603515625e-05, + "step": 18729, + "training_step_time": 0.12357473373413086 + }, + { + "epoch": 2.85797119140625e-05, + "grad_norm": 0.420911580324173, + "learning_rate": 3.386932459349114e-05, + "loss": 0.0301, + "step": 18730 + }, + { + "epoch": 2.85797119140625e-05, + "model_forward_time": 0.023508548736572266, + "step": 18730 + }, + { + "epoch": 2.85797119140625e-05, + "step": 18730, + "training_step_time": 0.12253522872924805 + }, + { + "epoch": 2.858123779296875e-05, + "model_forward_time": 0.024149417877197266, + "step": 18731 + }, + { + "epoch": 2.858123779296875e-05, + "step": 18731, + "training_step_time": 0.12899017333984375 + }, + { + "epoch": 2.8582763671875e-05, + "model_forward_time": 0.024247407913208008, + "step": 18732 + }, + { + "epoch": 2.8582763671875e-05, + "step": 18732, + "training_step_time": 0.12786555290222168 + }, + { + "epoch": 2.858428955078125e-05, + "model_forward_time": 0.023737430572509766, + "step": 18733 + }, + { + "epoch": 2.858428955078125e-05, + "step": 18733, + "training_step_time": 0.1237335205078125 + }, + { + "epoch": 2.85858154296875e-05, + "model_forward_time": 0.024068832397460938, + "step": 18734 + }, + { + "epoch": 2.85858154296875e-05, + "step": 18734, + "training_step_time": 0.1225881576538086 + }, + { + "epoch": 2.858734130859375e-05, + "model_forward_time": 0.024452924728393555, + "step": 18735 + }, + { + "epoch": 2.858734130859375e-05, + "step": 18735, + "training_step_time": 0.12265539169311523 + }, + { + "epoch": 2.85888671875e-05, + "model_forward_time": 0.024499893188476562, + "step": 18736 + }, + { + "epoch": 2.85888671875e-05, + "step": 18736, + "training_step_time": 0.12138748168945312 + }, + { + "epoch": 2.859039306640625e-05, + "model_forward_time": 0.024144411087036133, + "step": 18737 + }, + { + "epoch": 2.859039306640625e-05, + "step": 18737, + "training_step_time": 0.11595535278320312 + }, + { + "epoch": 2.85919189453125e-05, + "model_forward_time": 0.024477243423461914, + "step": 18738 + }, + { + "epoch": 2.85919189453125e-05, + "step": 18738, + "training_step_time": 0.12108898162841797 + }, + { + "epoch": 2.859344482421875e-05, + "model_forward_time": 0.025574684143066406, + "step": 18739 + }, + { + "epoch": 2.859344482421875e-05, + "step": 18739, + "training_step_time": 0.17233800888061523 + }, + { + "epoch": 2.8594970703125e-05, + "grad_norm": 0.2880837619304657, + "learning_rate": 3.3817165734793705e-05, + "loss": 0.0067, + "step": 18740 + }, + { + "epoch": 2.8594970703125e-05, + "model_forward_time": 0.026782512664794922, + "step": 18740 + }, + { + "epoch": 2.8594970703125e-05, + "step": 18740, + "training_step_time": 0.1655445098876953 + }, + { + "epoch": 2.859649658203125e-05, + "model_forward_time": 0.027322769165039062, + "step": 18741 + }, + { + "epoch": 2.859649658203125e-05, + "step": 18741, + "training_step_time": 0.2655484676361084 + }, + { + "epoch": 2.85980224609375e-05, + "model_forward_time": 0.029373884201049805, + "step": 18742 + }, + { + "epoch": 2.85980224609375e-05, + "step": 18742, + "training_step_time": 0.22951984405517578 + }, + { + "epoch": 2.859954833984375e-05, + "model_forward_time": 0.029935598373413086, + "step": 18743 + }, + { + "epoch": 2.859954833984375e-05, + "step": 18743, + "training_step_time": 0.3666236400604248 + }, + { + "epoch": 2.860107421875e-05, + "model_forward_time": 0.03392672538757324, + "step": 18744 + }, + { + "epoch": 2.860107421875e-05, + "step": 18744, + "training_step_time": 0.305267333984375 + }, + { + "epoch": 2.860260009765625e-05, + "model_forward_time": 0.030944347381591797, + "step": 18745 + }, + { + "epoch": 2.860260009765625e-05, + "step": 18745, + "training_step_time": 0.3896608352661133 + }, + { + "epoch": 2.86041259765625e-05, + "model_forward_time": 0.03164196014404297, + "step": 18746 + }, + { + "epoch": 2.86041259765625e-05, + "step": 18746, + "training_step_time": 0.27266907691955566 + }, + { + "epoch": 2.860565185546875e-05, + "model_forward_time": 0.030896902084350586, + "step": 18747 + }, + { + "epoch": 2.860565185546875e-05, + "step": 18747, + "training_step_time": 0.3975076675415039 + }, + { + "epoch": 2.8607177734375e-05, + "model_forward_time": 0.028563976287841797, + "step": 18748 + }, + { + "epoch": 2.8607177734375e-05, + "step": 18748, + "training_step_time": 0.31119751930236816 + }, + { + "epoch": 2.860870361328125e-05, + "model_forward_time": 0.03395700454711914, + "step": 18749 + }, + { + "epoch": 2.860870361328125e-05, + "step": 18749, + "training_step_time": 0.3223867416381836 + }, + { + "epoch": 2.86102294921875e-05, + "grad_norm": 0.2670065760612488, + "learning_rate": 3.3765026539765834e-05, + "loss": 0.0063, + "step": 18750 + }, + { + "epoch": 2.86102294921875e-05, + "model_forward_time": 0.033078670501708984, + "step": 18750 + }, + { + "epoch": 2.86102294921875e-05, + "step": 18750, + "training_step_time": 0.29189157485961914 + }, + { + "epoch": 2.861175537109375e-05, + "model_forward_time": 0.02948737144470215, + "step": 18751 + }, + { + "epoch": 2.861175537109375e-05, + "step": 18751, + "training_step_time": 0.21408891677856445 + }, + { + "epoch": 2.861328125e-05, + "model_forward_time": 0.02936577796936035, + "step": 18752 + }, + { + "epoch": 2.861328125e-05, + "step": 18752, + "training_step_time": 0.2611715793609619 + }, + { + "epoch": 2.861480712890625e-05, + "model_forward_time": 0.029602766036987305, + "step": 18753 + }, + { + "epoch": 2.861480712890625e-05, + "step": 18753, + "training_step_time": 0.17420554161071777 + }, + { + "epoch": 2.86163330078125e-05, + "model_forward_time": 0.030362606048583984, + "step": 18754 + }, + { + "epoch": 2.86163330078125e-05, + "step": 18754, + "training_step_time": 0.24542832374572754 + }, + { + "epoch": 2.861785888671875e-05, + "model_forward_time": 0.03265523910522461, + "step": 18755 + }, + { + "epoch": 2.861785888671875e-05, + "step": 18755, + "training_step_time": 0.1388874053955078 + }, + { + "epoch": 2.8619384765625e-05, + "model_forward_time": 0.0278778076171875, + "step": 18756 + }, + { + "epoch": 2.8619384765625e-05, + "step": 18756, + "training_step_time": 0.12894439697265625 + }, + { + "epoch": 2.862091064453125e-05, + "model_forward_time": 0.027524948120117188, + "step": 18757 + }, + { + "epoch": 2.862091064453125e-05, + "step": 18757, + "training_step_time": 0.1291515827178955 + }, + { + "epoch": 2.86224365234375e-05, + "model_forward_time": 0.02694082260131836, + "step": 18758 + }, + { + "epoch": 2.86224365234375e-05, + "step": 18758, + "training_step_time": 0.12988972663879395 + }, + { + "epoch": 2.862396240234375e-05, + "model_forward_time": 0.026509761810302734, + "step": 18759 + }, + { + "epoch": 2.862396240234375e-05, + "step": 18759, + "training_step_time": 0.1183319091796875 + }, + { + "epoch": 2.862548828125e-05, + "grad_norm": 0.3997817933559418, + "learning_rate": 3.371290707176158e-05, + "loss": 0.0092, + "step": 18760 + }, + { + "epoch": 2.862548828125e-05, + "model_forward_time": 0.026118040084838867, + "step": 18760 + }, + { + "epoch": 2.862548828125e-05, + "step": 18760, + "training_step_time": 0.11597084999084473 + }, + { + "epoch": 2.862701416015625e-05, + "model_forward_time": 0.025996923446655273, + "step": 18761 + }, + { + "epoch": 2.862701416015625e-05, + "step": 18761, + "training_step_time": 0.11576342582702637 + }, + { + "epoch": 2.86285400390625e-05, + "model_forward_time": 0.025664329528808594, + "step": 18762 + }, + { + "epoch": 2.86285400390625e-05, + "step": 18762, + "training_step_time": 0.1083831787109375 + }, + { + "epoch": 2.863006591796875e-05, + "model_forward_time": 0.02584528923034668, + "step": 18763 + }, + { + "epoch": 2.863006591796875e-05, + "step": 18763, + "training_step_time": 0.10778164863586426 + }, + { + "epoch": 2.8631591796875e-05, + "model_forward_time": 0.024608135223388672, + "step": 18764 + }, + { + "epoch": 2.8631591796875e-05, + "step": 18764, + "training_step_time": 0.1079263687133789 + }, + { + "epoch": 2.863311767578125e-05, + "model_forward_time": 0.024626970291137695, + "step": 18765 + }, + { + "epoch": 2.863311767578125e-05, + "step": 18765, + "training_step_time": 0.10841751098632812 + }, + { + "epoch": 2.86346435546875e-05, + "model_forward_time": 0.024639129638671875, + "step": 18766 + }, + { + "epoch": 2.86346435546875e-05, + "step": 18766, + "training_step_time": 0.11050081253051758 + }, + { + "epoch": 2.863616943359375e-05, + "model_forward_time": 0.024825334548950195, + "step": 18767 + }, + { + "epoch": 2.863616943359375e-05, + "step": 18767, + "training_step_time": 0.10764741897583008 + }, + { + "epoch": 2.86376953125e-05, + "model_forward_time": 0.02548837661743164, + "step": 18768 + }, + { + "epoch": 2.86376953125e-05, + "step": 18768, + "training_step_time": 0.1622178554534912 + }, + { + "epoch": 2.863922119140625e-05, + "model_forward_time": 0.02474236488342285, + "step": 18769 + }, + { + "epoch": 2.863922119140625e-05, + "step": 18769, + "training_step_time": 0.12578749656677246 + }, + { + "epoch": 2.86407470703125e-05, + "grad_norm": 0.10352246463298798, + "learning_rate": 3.366080739411101e-05, + "loss": 0.0225, + "step": 18770 + }, + { + "epoch": 2.86407470703125e-05, + "model_forward_time": 0.024154186248779297, + "step": 18770 + }, + { + "epoch": 2.86407470703125e-05, + "step": 18770, + "training_step_time": 0.12054085731506348 + }, + { + "epoch": 2.864227294921875e-05, + "model_forward_time": 0.02568507194519043, + "step": 18771 + }, + { + "epoch": 2.864227294921875e-05, + "step": 18771, + "training_step_time": 0.10467982292175293 + }, + { + "epoch": 2.8643798828125e-05, + "model_forward_time": 0.024552583694458008, + "step": 18772 + }, + { + "epoch": 2.8643798828125e-05, + "step": 18772, + "training_step_time": 0.1508169174194336 + }, + { + "epoch": 2.864532470703125e-05, + "model_forward_time": 0.025323152542114258, + "step": 18773 + }, + { + "epoch": 2.864532470703125e-05, + "step": 18773, + "training_step_time": 0.13198232650756836 + }, + { + "epoch": 2.86468505859375e-05, + "model_forward_time": 0.024127483367919922, + "step": 18774 + }, + { + "epoch": 2.86468505859375e-05, + "step": 18774, + "training_step_time": 0.11276078224182129 + }, + { + "epoch": 2.864837646484375e-05, + "model_forward_time": 0.024816274642944336, + "step": 18775 + }, + { + "epoch": 2.864837646484375e-05, + "step": 18775, + "training_step_time": 0.10796546936035156 + }, + { + "epoch": 2.864990234375e-05, + "model_forward_time": 0.02396845817565918, + "step": 18776 + }, + { + "epoch": 2.864990234375e-05, + "step": 18776, + "training_step_time": 0.14810919761657715 + }, + { + "epoch": 2.865142822265625e-05, + "model_forward_time": 0.024390459060668945, + "step": 18777 + }, + { + "epoch": 2.865142822265625e-05, + "step": 18777, + "training_step_time": 0.1488649845123291 + }, + { + "epoch": 2.86529541015625e-05, + "model_forward_time": 0.024006128311157227, + "step": 18778 + }, + { + "epoch": 2.86529541015625e-05, + "step": 18778, + "training_step_time": 0.13861393928527832 + }, + { + "epoch": 2.865447998046875e-05, + "model_forward_time": 0.02414989471435547, + "step": 18779 + }, + { + "epoch": 2.865447998046875e-05, + "step": 18779, + "training_step_time": 0.1290268898010254 + }, + { + "epoch": 2.8656005859375e-05, + "grad_norm": 0.3721567392349243, + "learning_rate": 3.360872757012011e-05, + "loss": 0.0195, + "step": 18780 + }, + { + "epoch": 2.8656005859375e-05, + "model_forward_time": 0.024308443069458008, + "step": 18780 + }, + { + "epoch": 2.8656005859375e-05, + "step": 18780, + "training_step_time": 0.19645261764526367 + }, + { + "epoch": 2.865753173828125e-05, + "model_forward_time": 0.02265334129333496, + "step": 18781 + }, + { + "epoch": 2.865753173828125e-05, + "step": 18781, + "training_step_time": 0.11078906059265137 + }, + { + "epoch": 2.86590576171875e-05, + "model_forward_time": 0.024447202682495117, + "step": 18782 + }, + { + "epoch": 2.86590576171875e-05, + "step": 18782, + "training_step_time": 0.1097869873046875 + }, + { + "epoch": 2.866058349609375e-05, + "model_forward_time": 0.025259733200073242, + "step": 18783 + }, + { + "epoch": 2.866058349609375e-05, + "step": 18783, + "training_step_time": 0.11209535598754883 + }, + { + "epoch": 2.8662109375e-05, + "model_forward_time": 0.02499842643737793, + "step": 18784 + }, + { + "epoch": 2.8662109375e-05, + "step": 18784, + "training_step_time": 0.1071779727935791 + }, + { + "epoch": 2.866363525390625e-05, + "model_forward_time": 0.0244600772857666, + "step": 18785 + }, + { + "epoch": 2.866363525390625e-05, + "step": 18785, + "training_step_time": 0.10940384864807129 + }, + { + "epoch": 2.86651611328125e-05, + "model_forward_time": 0.024889230728149414, + "step": 18786 + }, + { + "epoch": 2.86651611328125e-05, + "step": 18786, + "training_step_time": 0.15638971328735352 + }, + { + "epoch": 2.866668701171875e-05, + "model_forward_time": 0.024312496185302734, + "step": 18787 + }, + { + "epoch": 2.866668701171875e-05, + "step": 18787, + "training_step_time": 0.1111001968383789 + }, + { + "epoch": 2.8668212890625e-05, + "model_forward_time": 0.02421259880065918, + "step": 18788 + }, + { + "epoch": 2.8668212890625e-05, + "step": 18788, + "training_step_time": 0.10922741889953613 + }, + { + "epoch": 2.866973876953125e-05, + "model_forward_time": 0.02550053596496582, + "step": 18789 + }, + { + "epoch": 2.866973876953125e-05, + "step": 18789, + "training_step_time": 0.11527729034423828 + }, + { + "epoch": 2.86712646484375e-05, + "grad_norm": 0.3040342926979065, + "learning_rate": 3.355666766307084e-05, + "loss": 0.0082, + "step": 18790 + }, + { + "epoch": 2.86712646484375e-05, + "model_forward_time": 0.025507450103759766, + "step": 18790 + }, + { + "epoch": 2.86712646484375e-05, + "step": 18790, + "training_step_time": 0.12912964820861816 + }, + { + "epoch": 2.867279052734375e-05, + "model_forward_time": 0.02561044692993164, + "step": 18791 + }, + { + "epoch": 2.867279052734375e-05, + "step": 18791, + "training_step_time": 0.10654735565185547 + }, + { + "epoch": 2.867431640625e-05, + "model_forward_time": 0.02562093734741211, + "step": 18792 + }, + { + "epoch": 2.867431640625e-05, + "step": 18792, + "training_step_time": 0.11953258514404297 + }, + { + "epoch": 2.867584228515625e-05, + "model_forward_time": 0.02463698387145996, + "step": 18793 + }, + { + "epoch": 2.867584228515625e-05, + "step": 18793, + "training_step_time": 0.10654902458190918 + }, + { + "epoch": 2.86773681640625e-05, + "model_forward_time": 0.025615215301513672, + "step": 18794 + }, + { + "epoch": 2.86773681640625e-05, + "step": 18794, + "training_step_time": 0.12594366073608398 + }, + { + "epoch": 2.867889404296875e-05, + "model_forward_time": 0.024945497512817383, + "step": 18795 + }, + { + "epoch": 2.867889404296875e-05, + "step": 18795, + "training_step_time": 0.13231778144836426 + }, + { + "epoch": 2.8680419921875e-05, + "model_forward_time": 0.02504277229309082, + "step": 18796 + }, + { + "epoch": 2.8680419921875e-05, + "step": 18796, + "training_step_time": 0.1078488826751709 + }, + { + "epoch": 2.868194580078125e-05, + "model_forward_time": 0.025126934051513672, + "step": 18797 + }, + { + "epoch": 2.868194580078125e-05, + "step": 18797, + "training_step_time": 0.11077523231506348 + }, + { + "epoch": 2.86834716796875e-05, + "model_forward_time": 0.02547144889831543, + "step": 18798 + }, + { + "epoch": 2.86834716796875e-05, + "step": 18798, + "training_step_time": 0.12065887451171875 + }, + { + "epoch": 2.868499755859375e-05, + "model_forward_time": 0.025050640106201172, + "step": 18799 + }, + { + "epoch": 2.868499755859375e-05, + "step": 18799, + "training_step_time": 0.10941720008850098 + }, + { + "epoch": 2.86865234375e-05, + "grad_norm": 0.2032412737607956, + "learning_rate": 3.350462773622086e-05, + "loss": 0.0081, + "step": 18800 + }, + { + "epoch": 2.86865234375e-05, + "model_forward_time": 0.025215864181518555, + "step": 18800 + }, + { + "epoch": 2.86865234375e-05, + "step": 18800, + "training_step_time": 0.19737601280212402 + }, + { + "epoch": 2.868804931640625e-05, + "model_forward_time": 0.025456666946411133, + "step": 18801 + }, + { + "epoch": 2.868804931640625e-05, + "step": 18801, + "training_step_time": 0.1844949722290039 + }, + { + "epoch": 2.86895751953125e-05, + "model_forward_time": 0.023549556732177734, + "step": 18802 + }, + { + "epoch": 2.86895751953125e-05, + "step": 18802, + "training_step_time": 0.17142558097839355 + }, + { + "epoch": 2.869110107421875e-05, + "model_forward_time": 0.023768186569213867, + "step": 18803 + }, + { + "epoch": 2.869110107421875e-05, + "step": 18803, + "training_step_time": 0.14828705787658691 + }, + { + "epoch": 2.8692626953125e-05, + "model_forward_time": 0.0239255428314209, + "step": 18804 + }, + { + "epoch": 2.8692626953125e-05, + "step": 18804, + "training_step_time": 0.13823962211608887 + }, + { + "epoch": 2.869415283203125e-05, + "model_forward_time": 0.02364063262939453, + "step": 18805 + }, + { + "epoch": 2.869415283203125e-05, + "step": 18805, + "training_step_time": 0.13443231582641602 + }, + { + "epoch": 2.86956787109375e-05, + "model_forward_time": 0.02341628074645996, + "step": 18806 + }, + { + "epoch": 2.86956787109375e-05, + "step": 18806, + "training_step_time": 0.12470197677612305 + }, + { + "epoch": 2.869720458984375e-05, + "model_forward_time": 0.02453446388244629, + "step": 18807 + }, + { + "epoch": 2.869720458984375e-05, + "step": 18807, + "training_step_time": 0.12043285369873047 + }, + { + "epoch": 2.869873046875e-05, + "model_forward_time": 0.025365829467773438, + "step": 18808 + }, + { + "epoch": 2.869873046875e-05, + "step": 18808, + "training_step_time": 0.11916899681091309 + }, + { + "epoch": 2.870025634765625e-05, + "model_forward_time": 0.02422189712524414, + "step": 18809 + }, + { + "epoch": 2.870025634765625e-05, + "step": 18809, + "training_step_time": 0.11345839500427246 + }, + { + "epoch": 2.87017822265625e-05, + "grad_norm": 0.24520441889762878, + "learning_rate": 3.3452607852803584e-05, + "loss": 0.0102, + "step": 18810 + }, + { + "epoch": 2.87017822265625e-05, + "model_forward_time": 0.024350881576538086, + "step": 18810 + }, + { + "epoch": 2.87017822265625e-05, + "step": 18810, + "training_step_time": 0.11013674736022949 + }, + { + "epoch": 2.870330810546875e-05, + "model_forward_time": 0.0273134708404541, + "step": 18811 + }, + { + "epoch": 2.870330810546875e-05, + "step": 18811, + "training_step_time": 0.19591379165649414 + }, + { + "epoch": 2.8704833984375e-05, + "model_forward_time": 0.02513575553894043, + "step": 18812 + }, + { + "epoch": 2.8704833984375e-05, + "step": 18812, + "training_step_time": 0.12509989738464355 + }, + { + "epoch": 2.870635986328125e-05, + "model_forward_time": 0.0239107608795166, + "step": 18813 + }, + { + "epoch": 2.870635986328125e-05, + "step": 18813, + "training_step_time": 0.12426233291625977 + }, + { + "epoch": 2.87078857421875e-05, + "model_forward_time": 0.025073528289794922, + "step": 18814 + }, + { + "epoch": 2.87078857421875e-05, + "step": 18814, + "training_step_time": 0.13442611694335938 + }, + { + "epoch": 2.870941162109375e-05, + "model_forward_time": 0.02499532699584961, + "step": 18815 + }, + { + "epoch": 2.870941162109375e-05, + "step": 18815, + "training_step_time": 0.11906003952026367 + }, + { + "epoch": 2.87109375e-05, + "model_forward_time": 0.0249788761138916, + "step": 18816 + }, + { + "epoch": 2.87109375e-05, + "step": 18816, + "training_step_time": 0.1353907585144043 + }, + { + "epoch": 2.871246337890625e-05, + "model_forward_time": 0.025841474533081055, + "step": 18817 + }, + { + "epoch": 2.871246337890625e-05, + "step": 18817, + "training_step_time": 0.10914278030395508 + }, + { + "epoch": 2.87139892578125e-05, + "model_forward_time": 0.025089502334594727, + "step": 18818 + }, + { + "epoch": 2.87139892578125e-05, + "step": 18818, + "training_step_time": 0.10712289810180664 + }, + { + "epoch": 2.871551513671875e-05, + "model_forward_time": 0.025639057159423828, + "step": 18819 + }, + { + "epoch": 2.871551513671875e-05, + "step": 18819, + "training_step_time": 0.10757946968078613 + }, + { + "epoch": 2.8717041015625e-05, + "grad_norm": 0.2976893186569214, + "learning_rate": 3.3400608076028094e-05, + "loss": 0.011, + "step": 18820 + }, + { + "epoch": 2.8717041015625e-05, + "model_forward_time": 0.024344921112060547, + "step": 18820 + }, + { + "epoch": 2.8717041015625e-05, + "step": 18820, + "training_step_time": 0.14330577850341797 + }, + { + "epoch": 2.871856689453125e-05, + "model_forward_time": 0.024757862091064453, + "step": 18821 + }, + { + "epoch": 2.871856689453125e-05, + "step": 18821, + "training_step_time": 0.16858768463134766 + }, + { + "epoch": 2.87200927734375e-05, + "model_forward_time": 0.02429342269897461, + "step": 18822 + }, + { + "epoch": 2.87200927734375e-05, + "step": 18822, + "training_step_time": 0.1139528751373291 + }, + { + "epoch": 2.872161865234375e-05, + "model_forward_time": 0.02384352684020996, + "step": 18823 + }, + { + "epoch": 2.872161865234375e-05, + "step": 18823, + "training_step_time": 0.12934017181396484 + }, + { + "epoch": 2.872314453125e-05, + "model_forward_time": 0.025946617126464844, + "step": 18824 + }, + { + "epoch": 2.872314453125e-05, + "step": 18824, + "training_step_time": 0.21086716651916504 + }, + { + "epoch": 2.872467041015625e-05, + "model_forward_time": 0.024882793426513672, + "step": 18825 + }, + { + "epoch": 2.872467041015625e-05, + "step": 18825, + "training_step_time": 0.11085939407348633 + }, + { + "epoch": 2.87261962890625e-05, + "model_forward_time": 0.02462482452392578, + "step": 18826 + }, + { + "epoch": 2.87261962890625e-05, + "step": 18826, + "training_step_time": 0.10790777206420898 + }, + { + "epoch": 2.872772216796875e-05, + "model_forward_time": 0.02523350715637207, + "step": 18827 + }, + { + "epoch": 2.872772216796875e-05, + "step": 18827, + "training_step_time": 0.10863780975341797 + }, + { + "epoch": 2.8729248046875e-05, + "model_forward_time": 0.025264978408813477, + "step": 18828 + }, + { + "epoch": 2.8729248046875e-05, + "step": 18828, + "training_step_time": 0.10814142227172852 + }, + { + "epoch": 2.873077392578125e-05, + "model_forward_time": 0.024739742279052734, + "step": 18829 + }, + { + "epoch": 2.873077392578125e-05, + "step": 18829, + "training_step_time": 0.10726213455200195 + }, + { + "epoch": 2.87322998046875e-05, + "grad_norm": 0.14117495715618134, + "learning_rate": 3.3348628469079e-05, + "loss": 0.0074, + "step": 18830 + }, + { + "epoch": 2.87322998046875e-05, + "model_forward_time": 0.024840831756591797, + "step": 18830 + }, + { + "epoch": 2.87322998046875e-05, + "step": 18830, + "training_step_time": 0.1357409954071045 + }, + { + "epoch": 2.873382568359375e-05, + "model_forward_time": 0.025346040725708008, + "step": 18831 + }, + { + "epoch": 2.873382568359375e-05, + "step": 18831, + "training_step_time": 0.1106564998626709 + }, + { + "epoch": 2.87353515625e-05, + "model_forward_time": 0.0249481201171875, + "step": 18832 + }, + { + "epoch": 2.87353515625e-05, + "step": 18832, + "training_step_time": 0.11199545860290527 + }, + { + "epoch": 2.873687744140625e-05, + "model_forward_time": 0.02504134178161621, + "step": 18833 + }, + { + "epoch": 2.873687744140625e-05, + "step": 18833, + "training_step_time": 0.12252545356750488 + }, + { + "epoch": 2.87384033203125e-05, + "model_forward_time": 0.02536916732788086, + "step": 18834 + }, + { + "epoch": 2.87384033203125e-05, + "step": 18834, + "training_step_time": 0.13723444938659668 + }, + { + "epoch": 2.873992919921875e-05, + "model_forward_time": 0.02453160285949707, + "step": 18835 + }, + { + "epoch": 2.873992919921875e-05, + "step": 18835, + "training_step_time": 0.10772299766540527 + }, + { + "epoch": 2.8741455078125e-05, + "model_forward_time": 0.0253903865814209, + "step": 18836 + }, + { + "epoch": 2.8741455078125e-05, + "step": 18836, + "training_step_time": 0.11319422721862793 + }, + { + "epoch": 2.874298095703125e-05, + "model_forward_time": 0.024934768676757812, + "step": 18837 + }, + { + "epoch": 2.874298095703125e-05, + "step": 18837, + "training_step_time": 0.18870186805725098 + }, + { + "epoch": 2.87445068359375e-05, + "model_forward_time": 0.024133920669555664, + "step": 18838 + }, + { + "epoch": 2.87445068359375e-05, + "step": 18838, + "training_step_time": 0.13958191871643066 + }, + { + "epoch": 2.874603271484375e-05, + "model_forward_time": 0.02398085594177246, + "step": 18839 + }, + { + "epoch": 2.874603271484375e-05, + "step": 18839, + "training_step_time": 0.1088249683380127 + }, + { + "epoch": 2.874755859375e-05, + "grad_norm": 0.34608063101768494, + "learning_rate": 3.329666909511645e-05, + "loss": 0.0085, + "step": 18840 + }, + { + "epoch": 2.874755859375e-05, + "model_forward_time": 0.024825572967529297, + "step": 18840 + }, + { + "epoch": 2.874755859375e-05, + "step": 18840, + "training_step_time": 0.10867643356323242 + }, + { + "epoch": 2.874908447265625e-05, + "model_forward_time": 0.025487661361694336, + "step": 18841 + }, + { + "epoch": 2.874908447265625e-05, + "step": 18841, + "training_step_time": 0.11101269721984863 + }, + { + "epoch": 2.87506103515625e-05, + "model_forward_time": 0.025203704833984375, + "step": 18842 + }, + { + "epoch": 2.87506103515625e-05, + "step": 18842, + "training_step_time": 0.15793371200561523 + }, + { + "epoch": 2.875213623046875e-05, + "model_forward_time": 0.025016307830810547, + "step": 18843 + }, + { + "epoch": 2.875213623046875e-05, + "step": 18843, + "training_step_time": 0.1491403579711914 + }, + { + "epoch": 2.8753662109375e-05, + "model_forward_time": 0.023955821990966797, + "step": 18844 + }, + { + "epoch": 2.8753662109375e-05, + "step": 18844, + "training_step_time": 0.10569334030151367 + }, + { + "epoch": 2.875518798828125e-05, + "model_forward_time": 0.024773597717285156, + "step": 18845 + }, + { + "epoch": 2.875518798828125e-05, + "step": 18845, + "training_step_time": 0.1034698486328125 + }, + { + "epoch": 2.87567138671875e-05, + "model_forward_time": 0.025126218795776367, + "step": 18846 + }, + { + "epoch": 2.87567138671875e-05, + "step": 18846, + "training_step_time": 0.10629153251647949 + }, + { + "epoch": 2.875823974609375e-05, + "model_forward_time": 0.02525019645690918, + "step": 18847 + }, + { + "epoch": 2.875823974609375e-05, + "step": 18847, + "training_step_time": 0.10510468482971191 + }, + { + "epoch": 2.8759765625e-05, + "model_forward_time": 0.02504134178161621, + "step": 18848 + }, + { + "epoch": 2.8759765625e-05, + "step": 18848, + "training_step_time": 0.1090080738067627 + }, + { + "epoch": 2.876129150390625e-05, + "model_forward_time": 0.025463581085205078, + "step": 18849 + }, + { + "epoch": 2.876129150390625e-05, + "step": 18849, + "training_step_time": 0.10808038711547852 + }, + { + "epoch": 2.87628173828125e-05, + "grad_norm": 0.21739520132541656, + "learning_rate": 3.324473001727597e-05, + "loss": 0.0102, + "step": 18850 + }, + { + "epoch": 2.87628173828125e-05, + "model_forward_time": 0.025060176849365234, + "step": 18850 + }, + { + "epoch": 2.87628173828125e-05, + "step": 18850, + "training_step_time": 0.10759091377258301 + }, + { + "epoch": 2.876434326171875e-05, + "model_forward_time": 0.024988174438476562, + "step": 18851 + }, + { + "epoch": 2.876434326171875e-05, + "step": 18851, + "training_step_time": 0.11188125610351562 + }, + { + "epoch": 2.8765869140625e-05, + "model_forward_time": 0.02524566650390625, + "step": 18852 + }, + { + "epoch": 2.8765869140625e-05, + "step": 18852, + "training_step_time": 0.11302471160888672 + }, + { + "epoch": 2.876739501953125e-05, + "model_forward_time": 0.02375006675720215, + "step": 18853 + }, + { + "epoch": 2.876739501953125e-05, + "step": 18853, + "training_step_time": 0.1068568229675293 + }, + { + "epoch": 2.87689208984375e-05, + "model_forward_time": 0.02502727508544922, + "step": 18854 + }, + { + "epoch": 2.87689208984375e-05, + "step": 18854, + "training_step_time": 0.11102986335754395 + }, + { + "epoch": 2.877044677734375e-05, + "model_forward_time": 0.027033090591430664, + "step": 18855 + }, + { + "epoch": 2.877044677734375e-05, + "step": 18855, + "training_step_time": 0.11098694801330566 + }, + { + "epoch": 2.877197265625e-05, + "model_forward_time": 0.02537369728088379, + "step": 18856 + }, + { + "epoch": 2.877197265625e-05, + "step": 18856, + "training_step_time": 0.11242079734802246 + }, + { + "epoch": 2.877349853515625e-05, + "model_forward_time": 0.025435686111450195, + "step": 18857 + }, + { + "epoch": 2.877349853515625e-05, + "step": 18857, + "training_step_time": 0.14428019523620605 + }, + { + "epoch": 2.87750244140625e-05, + "model_forward_time": 0.02502298355102539, + "step": 18858 + }, + { + "epoch": 2.87750244140625e-05, + "step": 18858, + "training_step_time": 0.11732602119445801 + }, + { + "epoch": 2.877655029296875e-05, + "model_forward_time": 0.024841785430908203, + "step": 18859 + }, + { + "epoch": 2.877655029296875e-05, + "step": 18859, + "training_step_time": 0.12676048278808594 + }, + { + "epoch": 2.8778076171875e-05, + "grad_norm": 0.25564438104629517, + "learning_rate": 3.3192811298668434e-05, + "loss": 0.0134, + "step": 18860 + }, + { + "epoch": 2.8778076171875e-05, + "model_forward_time": 0.024605751037597656, + "step": 18860 + }, + { + "epoch": 2.8778076171875e-05, + "step": 18860, + "training_step_time": 0.16477441787719727 + }, + { + "epoch": 2.877960205078125e-05, + "model_forward_time": 0.024392366409301758, + "step": 18861 + }, + { + "epoch": 2.877960205078125e-05, + "step": 18861, + "training_step_time": 0.2183387279510498 + }, + { + "epoch": 2.87811279296875e-05, + "model_forward_time": 0.02406620979309082, + "step": 18862 + }, + { + "epoch": 2.87811279296875e-05, + "step": 18862, + "training_step_time": 0.11933612823486328 + }, + { + "epoch": 2.878265380859375e-05, + "model_forward_time": 0.02436375617980957, + "step": 18863 + }, + { + "epoch": 2.878265380859375e-05, + "step": 18863, + "training_step_time": 0.10696196556091309 + }, + { + "epoch": 2.87841796875e-05, + "model_forward_time": 0.024748802185058594, + "step": 18864 + }, + { + "epoch": 2.87841796875e-05, + "step": 18864, + "training_step_time": 0.10242009162902832 + }, + { + "epoch": 2.878570556640625e-05, + "model_forward_time": 0.023896455764770508, + "step": 18865 + }, + { + "epoch": 2.878570556640625e-05, + "step": 18865, + "training_step_time": 0.13183164596557617 + }, + { + "epoch": 2.87872314453125e-05, + "model_forward_time": 0.02444171905517578, + "step": 18866 + }, + { + "epoch": 2.87872314453125e-05, + "step": 18866, + "training_step_time": 0.12638354301452637 + }, + { + "epoch": 2.878875732421875e-05, + "model_forward_time": 0.024751901626586914, + "step": 18867 + }, + { + "epoch": 2.878875732421875e-05, + "step": 18867, + "training_step_time": 0.10514569282531738 + }, + { + "epoch": 2.8790283203125e-05, + "model_forward_time": 0.025467872619628906, + "step": 18868 + }, + { + "epoch": 2.8790283203125e-05, + "step": 18868, + "training_step_time": 0.10662603378295898 + }, + { + "epoch": 2.879180908203125e-05, + "model_forward_time": 0.026114702224731445, + "step": 18869 + }, + { + "epoch": 2.879180908203125e-05, + "step": 18869, + "training_step_time": 0.12800121307373047 + }, + { + "epoch": 2.87933349609375e-05, + "grad_norm": 0.2927517592906952, + "learning_rate": 3.3140913002379995e-05, + "loss": 0.0118, + "step": 18870 + }, + { + "epoch": 2.87933349609375e-05, + "model_forward_time": 0.025313854217529297, + "step": 18870 + }, + { + "epoch": 2.87933349609375e-05, + "step": 18870, + "training_step_time": 0.20552921295166016 + }, + { + "epoch": 2.879486083984375e-05, + "model_forward_time": 0.0244293212890625, + "step": 18871 + }, + { + "epoch": 2.879486083984375e-05, + "step": 18871, + "training_step_time": 0.1035158634185791 + }, + { + "epoch": 2.879638671875e-05, + "model_forward_time": 0.02451610565185547, + "step": 18872 + }, + { + "epoch": 2.879638671875e-05, + "step": 18872, + "training_step_time": 0.10397028923034668 + }, + { + "epoch": 2.879791259765625e-05, + "model_forward_time": 0.02529168128967285, + "step": 18873 + }, + { + "epoch": 2.879791259765625e-05, + "step": 18873, + "training_step_time": 0.10483884811401367 + }, + { + "epoch": 2.87994384765625e-05, + "model_forward_time": 0.025180816650390625, + "step": 18874 + }, + { + "epoch": 2.87994384765625e-05, + "step": 18874, + "training_step_time": 0.10379862785339355 + }, + { + "epoch": 2.880096435546875e-05, + "model_forward_time": 0.02538132667541504, + "step": 18875 + }, + { + "epoch": 2.880096435546875e-05, + "step": 18875, + "training_step_time": 0.20954585075378418 + }, + { + "epoch": 2.8802490234375e-05, + "model_forward_time": 0.02449345588684082, + "step": 18876 + }, + { + "epoch": 2.8802490234375e-05, + "step": 18876, + "training_step_time": 0.10798287391662598 + }, + { + "epoch": 2.880401611328125e-05, + "model_forward_time": 0.024219989776611328, + "step": 18877 + }, + { + "epoch": 2.880401611328125e-05, + "step": 18877, + "training_step_time": 0.11140227317810059 + }, + { + "epoch": 2.88055419921875e-05, + "model_forward_time": 0.02522587776184082, + "step": 18878 + }, + { + "epoch": 2.88055419921875e-05, + "step": 18878, + "training_step_time": 0.12182855606079102 + }, + { + "epoch": 2.880706787109375e-05, + "model_forward_time": 0.024939775466918945, + "step": 18879 + }, + { + "epoch": 2.880706787109375e-05, + "step": 18879, + "training_step_time": 0.1306607723236084 + }, + { + "epoch": 2.880859375e-05, + "grad_norm": 0.17261019349098206, + "learning_rate": 3.308903519147194e-05, + "loss": 0.0082, + "step": 18880 + }, + { + "epoch": 2.880859375e-05, + "model_forward_time": 0.02469038963317871, + "step": 18880 + }, + { + "epoch": 2.880859375e-05, + "step": 18880, + "training_step_time": 0.10517716407775879 + }, + { + "epoch": 2.881011962890625e-05, + "model_forward_time": 0.025018930435180664, + "step": 18881 + }, + { + "epoch": 2.881011962890625e-05, + "step": 18881, + "training_step_time": 0.11643123626708984 + }, + { + "epoch": 2.88116455078125e-05, + "model_forward_time": 0.0251157283782959, + "step": 18882 + }, + { + "epoch": 2.88116455078125e-05, + "step": 18882, + "training_step_time": 0.12379169464111328 + }, + { + "epoch": 2.881317138671875e-05, + "model_forward_time": 0.025612831115722656, + "step": 18883 + }, + { + "epoch": 2.881317138671875e-05, + "step": 18883, + "training_step_time": 0.143751859664917 + }, + { + "epoch": 2.8814697265625e-05, + "model_forward_time": 0.024871349334716797, + "step": 18884 + }, + { + "epoch": 2.8814697265625e-05, + "step": 18884, + "training_step_time": 0.13404083251953125 + }, + { + "epoch": 2.881622314453125e-05, + "model_forward_time": 0.024524688720703125, + "step": 18885 + }, + { + "epoch": 2.881622314453125e-05, + "step": 18885, + "training_step_time": 0.19545984268188477 + }, + { + "epoch": 2.88177490234375e-05, + "model_forward_time": 0.02385711669921875, + "step": 18886 + }, + { + "epoch": 2.88177490234375e-05, + "step": 18886, + "training_step_time": 0.10447454452514648 + }, + { + "epoch": 2.881927490234375e-05, + "model_forward_time": 0.024504899978637695, + "step": 18887 + }, + { + "epoch": 2.881927490234375e-05, + "step": 18887, + "training_step_time": 0.10370159149169922 + }, + { + "epoch": 2.882080078125e-05, + "model_forward_time": 0.025098562240600586, + "step": 18888 + }, + { + "epoch": 2.882080078125e-05, + "step": 18888, + "training_step_time": 0.10895490646362305 + }, + { + "epoch": 2.882232666015625e-05, + "model_forward_time": 0.02541327476501465, + "step": 18889 + }, + { + "epoch": 2.882232666015625e-05, + "step": 18889, + "training_step_time": 0.1052088737487793 + }, + { + "epoch": 2.88238525390625e-05, + "grad_norm": 0.3890341520309448, + "learning_rate": 3.3037177928980735e-05, + "loss": 0.009, + "step": 18890 + }, + { + "epoch": 2.88238525390625e-05, + "model_forward_time": 0.025043010711669922, + "step": 18890 + }, + { + "epoch": 2.88238525390625e-05, + "step": 18890, + "training_step_time": 0.10892295837402344 + }, + { + "epoch": 2.882537841796875e-05, + "model_forward_time": 0.024786949157714844, + "step": 18891 + }, + { + "epoch": 2.882537841796875e-05, + "step": 18891, + "training_step_time": 0.10414409637451172 + }, + { + "epoch": 2.8826904296875e-05, + "model_forward_time": 0.024820566177368164, + "step": 18892 + }, + { + "epoch": 2.8826904296875e-05, + "step": 18892, + "training_step_time": 0.11092233657836914 + }, + { + "epoch": 2.882843017578125e-05, + "model_forward_time": 0.025178194046020508, + "step": 18893 + }, + { + "epoch": 2.882843017578125e-05, + "step": 18893, + "training_step_time": 0.1123661994934082 + }, + { + "epoch": 2.88299560546875e-05, + "model_forward_time": 0.02521991729736328, + "step": 18894 + }, + { + "epoch": 2.88299560546875e-05, + "step": 18894, + "training_step_time": 0.13781023025512695 + }, + { + "epoch": 2.883148193359375e-05, + "model_forward_time": 0.024681806564331055, + "step": 18895 + }, + { + "epoch": 2.883148193359375e-05, + "step": 18895, + "training_step_time": 0.15827655792236328 + }, + { + "epoch": 2.88330078125e-05, + "model_forward_time": 0.024061203002929688, + "step": 18896 + }, + { + "epoch": 2.88330078125e-05, + "step": 18896, + "training_step_time": 0.14672541618347168 + }, + { + "epoch": 2.883453369140625e-05, + "model_forward_time": 0.024347782135009766, + "step": 18897 + }, + { + "epoch": 2.883453369140625e-05, + "step": 18897, + "training_step_time": 0.12808585166931152 + }, + { + "epoch": 2.88360595703125e-05, + "model_forward_time": 0.024402379989624023, + "step": 18898 + }, + { + "epoch": 2.88360595703125e-05, + "step": 18898, + "training_step_time": 0.12591242790222168 + }, + { + "epoch": 2.883758544921875e-05, + "model_forward_time": 0.024922609329223633, + "step": 18899 + }, + { + "epoch": 2.883758544921875e-05, + "step": 18899, + "training_step_time": 0.11777973175048828 + }, + { + "epoch": 2.8839111328125e-05, + "grad_norm": 0.12064553052186966, + "learning_rate": 3.298534127791785e-05, + "loss": 0.0209, + "step": 18900 + }, + { + "epoch": 2.8839111328125e-05, + "model_forward_time": 0.025566577911376953, + "step": 18900 + }, + { + "epoch": 2.8839111328125e-05, + "step": 18900, + "training_step_time": 0.22234463691711426 + }, + { + "epoch": 2.884063720703125e-05, + "model_forward_time": 0.024265766143798828, + "step": 18901 + }, + { + "epoch": 2.884063720703125e-05, + "step": 18901, + "training_step_time": 0.11425304412841797 + }, + { + "epoch": 2.88421630859375e-05, + "model_forward_time": 0.024271249771118164, + "step": 18902 + }, + { + "epoch": 2.88421630859375e-05, + "step": 18902, + "training_step_time": 0.13135027885437012 + }, + { + "epoch": 2.884368896484375e-05, + "model_forward_time": 0.024553298950195312, + "step": 18903 + }, + { + "epoch": 2.884368896484375e-05, + "step": 18903, + "training_step_time": 0.10552382469177246 + }, + { + "epoch": 2.884521484375e-05, + "model_forward_time": 0.0251922607421875, + "step": 18904 + }, + { + "epoch": 2.884521484375e-05, + "step": 18904, + "training_step_time": 0.15097498893737793 + }, + { + "epoch": 2.884674072265625e-05, + "model_forward_time": 0.024839162826538086, + "step": 18905 + }, + { + "epoch": 2.884674072265625e-05, + "step": 18905, + "training_step_time": 0.12385249137878418 + }, + { + "epoch": 2.88482666015625e-05, + "model_forward_time": 0.02461862564086914, + "step": 18906 + }, + { + "epoch": 2.88482666015625e-05, + "step": 18906, + "training_step_time": 0.1228024959564209 + }, + { + "epoch": 2.884979248046875e-05, + "model_forward_time": 0.025215625762939453, + "step": 18907 + }, + { + "epoch": 2.884979248046875e-05, + "step": 18907, + "training_step_time": 0.11106204986572266 + }, + { + "epoch": 2.8851318359375e-05, + "model_forward_time": 0.025383949279785156, + "step": 18908 + }, + { + "epoch": 2.8851318359375e-05, + "step": 18908, + "training_step_time": 0.10761308670043945 + }, + { + "epoch": 2.885284423828125e-05, + "model_forward_time": 0.025107622146606445, + "step": 18909 + }, + { + "epoch": 2.885284423828125e-05, + "step": 18909, + "training_step_time": 0.10526371002197266 + }, + { + "epoch": 2.88543701171875e-05, + "grad_norm": 0.14067591726779938, + "learning_rate": 3.2933525301269684e-05, + "loss": 0.0062, + "step": 18910 + }, + { + "epoch": 2.88543701171875e-05, + "model_forward_time": 0.02489185333251953, + "step": 18910 + }, + { + "epoch": 2.88543701171875e-05, + "step": 18910, + "training_step_time": 0.11405158042907715 + }, + { + "epoch": 2.885589599609375e-05, + "model_forward_time": 0.02472662925720215, + "step": 18911 + }, + { + "epoch": 2.885589599609375e-05, + "step": 18911, + "training_step_time": 0.11493587493896484 + }, + { + "epoch": 2.8857421875e-05, + "model_forward_time": 0.025727272033691406, + "step": 18912 + }, + { + "epoch": 2.8857421875e-05, + "step": 18912, + "training_step_time": 0.10759878158569336 + }, + { + "epoch": 2.885894775390625e-05, + "model_forward_time": 0.02562856674194336, + "step": 18913 + }, + { + "epoch": 2.885894775390625e-05, + "step": 18913, + "training_step_time": 0.12086153030395508 + }, + { + "epoch": 2.88604736328125e-05, + "model_forward_time": 0.025790929794311523, + "step": 18914 + }, + { + "epoch": 2.88604736328125e-05, + "step": 18914, + "training_step_time": 0.11515378952026367 + }, + { + "epoch": 2.886199951171875e-05, + "model_forward_time": 0.02579212188720703, + "step": 18915 + }, + { + "epoch": 2.886199951171875e-05, + "step": 18915, + "training_step_time": 0.11622762680053711 + }, + { + "epoch": 2.8863525390625e-05, + "model_forward_time": 0.02542877197265625, + "step": 18916 + }, + { + "epoch": 2.8863525390625e-05, + "step": 18916, + "training_step_time": 0.11065387725830078 + }, + { + "epoch": 2.886505126953125e-05, + "model_forward_time": 0.025629520416259766, + "step": 18917 + }, + { + "epoch": 2.886505126953125e-05, + "step": 18917, + "training_step_time": 0.10785579681396484 + }, + { + "epoch": 2.88665771484375e-05, + "model_forward_time": 0.025242090225219727, + "step": 18918 + }, + { + "epoch": 2.88665771484375e-05, + "step": 18918, + "training_step_time": 0.10523772239685059 + }, + { + "epoch": 2.886810302734375e-05, + "model_forward_time": 0.02515697479248047, + "step": 18919 + }, + { + "epoch": 2.886810302734375e-05, + "step": 18919, + "training_step_time": 0.10547852516174316 + }, + { + "epoch": 2.886962890625e-05, + "grad_norm": 0.22273331880569458, + "learning_rate": 3.288173006199755e-05, + "loss": 0.0078, + "step": 18920 + }, + { + "epoch": 2.886962890625e-05, + "model_forward_time": 0.025501251220703125, + "step": 18920 + }, + { + "epoch": 2.886962890625e-05, + "step": 18920, + "training_step_time": 0.1068274974822998 + }, + { + "epoch": 2.887115478515625e-05, + "model_forward_time": 0.024096965789794922, + "step": 18921 + }, + { + "epoch": 2.887115478515625e-05, + "step": 18921, + "training_step_time": 0.18000006675720215 + }, + { + "epoch": 2.88726806640625e-05, + "model_forward_time": 0.02391815185546875, + "step": 18922 + }, + { + "epoch": 2.88726806640625e-05, + "step": 18922, + "training_step_time": 0.1116325855255127 + }, + { + "epoch": 2.887420654296875e-05, + "model_forward_time": 0.023923873901367188, + "step": 18923 + }, + { + "epoch": 2.887420654296875e-05, + "step": 18923, + "training_step_time": 0.11114668846130371 + }, + { + "epoch": 2.8875732421875e-05, + "model_forward_time": 0.0239408016204834, + "step": 18924 + }, + { + "epoch": 2.8875732421875e-05, + "step": 18924, + "training_step_time": 0.12334465980529785 + }, + { + "epoch": 2.887725830078125e-05, + "model_forward_time": 0.0251007080078125, + "step": 18925 + }, + { + "epoch": 2.887725830078125e-05, + "step": 18925, + "training_step_time": 0.11938762664794922 + }, + { + "epoch": 2.88787841796875e-05, + "model_forward_time": 0.02538275718688965, + "step": 18926 + }, + { + "epoch": 2.88787841796875e-05, + "step": 18926, + "training_step_time": 0.12231326103210449 + }, + { + "epoch": 2.888031005859375e-05, + "model_forward_time": 0.02518010139465332, + "step": 18927 + }, + { + "epoch": 2.888031005859375e-05, + "step": 18927, + "training_step_time": 0.18668341636657715 + }, + { + "epoch": 2.88818359375e-05, + "model_forward_time": 0.025701045989990234, + "step": 18928 + }, + { + "epoch": 2.88818359375e-05, + "step": 18928, + "training_step_time": 0.13675308227539062 + }, + { + "epoch": 2.888336181640625e-05, + "model_forward_time": 0.024899721145629883, + "step": 18929 + }, + { + "epoch": 2.888336181640625e-05, + "step": 18929, + "training_step_time": 0.10778069496154785 + }, + { + "epoch": 2.88848876953125e-05, + "grad_norm": 0.2892070412635803, + "learning_rate": 3.282995562303754e-05, + "loss": 0.0125, + "step": 18930 + }, + { + "epoch": 2.88848876953125e-05, + "model_forward_time": 0.025589704513549805, + "step": 18930 + }, + { + "epoch": 2.88848876953125e-05, + "step": 18930, + "training_step_time": 0.1076207160949707 + }, + { + "epoch": 2.888641357421875e-05, + "model_forward_time": 0.025669336318969727, + "step": 18931 + }, + { + "epoch": 2.888641357421875e-05, + "step": 18931, + "training_step_time": 0.11063218116760254 + }, + { + "epoch": 2.8887939453125e-05, + "model_forward_time": 0.02666759490966797, + "step": 18932 + }, + { + "epoch": 2.8887939453125e-05, + "step": 18932, + "training_step_time": 0.10848879814147949 + }, + { + "epoch": 2.888946533203125e-05, + "model_forward_time": 0.025424480438232422, + "step": 18933 + }, + { + "epoch": 2.888946533203125e-05, + "step": 18933, + "training_step_time": 0.194899320602417 + }, + { + "epoch": 2.88909912109375e-05, + "model_forward_time": 0.024498701095581055, + "step": 18934 + }, + { + "epoch": 2.88909912109375e-05, + "step": 18934, + "training_step_time": 0.10633397102355957 + }, + { + "epoch": 2.889251708984375e-05, + "model_forward_time": 0.02478957176208496, + "step": 18935 + }, + { + "epoch": 2.889251708984375e-05, + "step": 18935, + "training_step_time": 0.10169696807861328 + }, + { + "epoch": 2.889404296875e-05, + "model_forward_time": 0.025592565536499023, + "step": 18936 + }, + { + "epoch": 2.889404296875e-05, + "step": 18936, + "training_step_time": 0.10772538185119629 + }, + { + "epoch": 2.889556884765625e-05, + "model_forward_time": 0.025572538375854492, + "step": 18937 + }, + { + "epoch": 2.889556884765625e-05, + "step": 18937, + "training_step_time": 0.10413980484008789 + }, + { + "epoch": 2.88970947265625e-05, + "model_forward_time": 0.025280475616455078, + "step": 18938 + }, + { + "epoch": 2.88970947265625e-05, + "step": 18938, + "training_step_time": 0.10274791717529297 + }, + { + "epoch": 2.889862060546875e-05, + "model_forward_time": 0.02498912811279297, + "step": 18939 + }, + { + "epoch": 2.889862060546875e-05, + "step": 18939, + "training_step_time": 0.10342025756835938 + }, + { + "epoch": 2.8900146484375e-05, + "grad_norm": 0.3651007115840912, + "learning_rate": 3.2778202047300444e-05, + "loss": 0.0068, + "step": 18940 + }, + { + "epoch": 2.8900146484375e-05, + "model_forward_time": 0.02526116371154785, + "step": 18940 + }, + { + "epoch": 2.8900146484375e-05, + "step": 18940, + "training_step_time": 0.10467076301574707 + }, + { + "epoch": 2.890167236328125e-05, + "model_forward_time": 0.025110960006713867, + "step": 18941 + }, + { + "epoch": 2.890167236328125e-05, + "step": 18941, + "training_step_time": 0.10449719429016113 + }, + { + "epoch": 2.89031982421875e-05, + "model_forward_time": 0.025330305099487305, + "step": 18942 + }, + { + "epoch": 2.89031982421875e-05, + "step": 18942, + "training_step_time": 0.10906863212585449 + }, + { + "epoch": 2.890472412109375e-05, + "model_forward_time": 0.024963855743408203, + "step": 18943 + }, + { + "epoch": 2.890472412109375e-05, + "step": 18943, + "training_step_time": 0.10756993293762207 + }, + { + "epoch": 2.890625e-05, + "model_forward_time": 0.02512216567993164, + "step": 18944 + }, + { + "epoch": 2.890625e-05, + "step": 18944, + "training_step_time": 0.1048429012298584 + }, + { + "epoch": 2.890777587890625e-05, + "model_forward_time": 0.025414705276489258, + "step": 18945 + }, + { + "epoch": 2.890777587890625e-05, + "step": 18945, + "training_step_time": 0.10666513442993164 + }, + { + "epoch": 2.89093017578125e-05, + "model_forward_time": 0.02587747573852539, + "step": 18946 + }, + { + "epoch": 2.89093017578125e-05, + "step": 18946, + "training_step_time": 0.10871744155883789 + }, + { + "epoch": 2.891082763671875e-05, + "model_forward_time": 0.025266170501708984, + "step": 18947 + }, + { + "epoch": 2.891082763671875e-05, + "step": 18947, + "training_step_time": 0.10972309112548828 + }, + { + "epoch": 2.8912353515625e-05, + "model_forward_time": 0.026609420776367188, + "step": 18948 + }, + { + "epoch": 2.8912353515625e-05, + "step": 18948, + "training_step_time": 0.12633204460144043 + }, + { + "epoch": 2.891387939453125e-05, + "model_forward_time": 0.025199413299560547, + "step": 18949 + }, + { + "epoch": 2.891387939453125e-05, + "step": 18949, + "training_step_time": 0.11192011833190918 + }, + { + "epoch": 2.89154052734375e-05, + "grad_norm": 0.21987299621105194, + "learning_rate": 3.272646939767179e-05, + "loss": 0.0062, + "step": 18950 + }, + { + "epoch": 2.89154052734375e-05, + "model_forward_time": 0.02509331703186035, + "step": 18950 + }, + { + "epoch": 2.89154052734375e-05, + "step": 18950, + "training_step_time": 0.1367940902709961 + }, + { + "epoch": 2.891693115234375e-05, + "model_forward_time": 0.024996042251586914, + "step": 18951 + }, + { + "epoch": 2.891693115234375e-05, + "step": 18951, + "training_step_time": 0.16524410247802734 + }, + { + "epoch": 2.891845703125e-05, + "model_forward_time": 0.02559947967529297, + "step": 18952 + }, + { + "epoch": 2.891845703125e-05, + "step": 18952, + "training_step_time": 0.21694207191467285 + }, + { + "epoch": 2.891998291015625e-05, + "model_forward_time": 0.02449488639831543, + "step": 18953 + }, + { + "epoch": 2.891998291015625e-05, + "step": 18953, + "training_step_time": 0.1099400520324707 + }, + { + "epoch": 2.89215087890625e-05, + "model_forward_time": 0.024363994598388672, + "step": 18954 + }, + { + "epoch": 2.89215087890625e-05, + "step": 18954, + "training_step_time": 0.10453367233276367 + }, + { + "epoch": 2.892303466796875e-05, + "model_forward_time": 0.024936914443969727, + "step": 18955 + }, + { + "epoch": 2.892303466796875e-05, + "step": 18955, + "training_step_time": 0.10587525367736816 + }, + { + "epoch": 2.8924560546875e-05, + "model_forward_time": 0.024736881256103516, + "step": 18956 + }, + { + "epoch": 2.8924560546875e-05, + "step": 18956, + "training_step_time": 0.10306644439697266 + }, + { + "epoch": 2.892608642578125e-05, + "model_forward_time": 0.02451920509338379, + "step": 18957 + }, + { + "epoch": 2.892608642578125e-05, + "step": 18957, + "training_step_time": 0.1050574779510498 + }, + { + "epoch": 2.89276123046875e-05, + "model_forward_time": 0.024784326553344727, + "step": 18958 + }, + { + "epoch": 2.89276123046875e-05, + "step": 18958, + "training_step_time": 0.11274957656860352 + }, + { + "epoch": 2.892913818359375e-05, + "model_forward_time": 0.02568531036376953, + "step": 18959 + }, + { + "epoch": 2.892913818359375e-05, + "step": 18959, + "training_step_time": 0.1206810474395752 + }, + { + "epoch": 2.89306640625e-05, + "grad_norm": 0.28759485483169556, + "learning_rate": 3.267475773701161e-05, + "loss": 0.011, + "step": 18960 + }, + { + "epoch": 2.89306640625e-05, + "model_forward_time": 0.025647401809692383, + "step": 18960 + }, + { + "epoch": 2.89306640625e-05, + "step": 18960, + "training_step_time": 0.10648632049560547 + }, + { + "epoch": 2.893218994140625e-05, + "model_forward_time": 0.025524139404296875, + "step": 18961 + }, + { + "epoch": 2.893218994140625e-05, + "step": 18961, + "training_step_time": 0.23932456970214844 + }, + { + "epoch": 2.89337158203125e-05, + "model_forward_time": 0.024968385696411133, + "step": 18962 + }, + { + "epoch": 2.89337158203125e-05, + "step": 18962, + "training_step_time": 0.2108609676361084 + }, + { + "epoch": 2.893524169921875e-05, + "model_forward_time": 0.024247169494628906, + "step": 18963 + }, + { + "epoch": 2.893524169921875e-05, + "step": 18963, + "training_step_time": 0.21035385131835938 + }, + { + "epoch": 2.8936767578125e-05, + "model_forward_time": 0.024971485137939453, + "step": 18964 + }, + { + "epoch": 2.8936767578125e-05, + "step": 18964, + "training_step_time": 0.2032630443572998 + }, + { + "epoch": 2.893829345703125e-05, + "model_forward_time": 0.028270721435546875, + "step": 18965 + }, + { + "epoch": 2.893829345703125e-05, + "step": 18965, + "training_step_time": 0.20017123222351074 + }, + { + "epoch": 2.89398193359375e-05, + "model_forward_time": 0.024212360382080078, + "step": 18966 + }, + { + "epoch": 2.89398193359375e-05, + "step": 18966, + "training_step_time": 0.19023537635803223 + }, + { + "epoch": 2.894134521484375e-05, + "model_forward_time": 0.024563312530517578, + "step": 18967 + }, + { + "epoch": 2.894134521484375e-05, + "step": 18967, + "training_step_time": 0.21092939376831055 + }, + { + "epoch": 2.894287109375e-05, + "model_forward_time": 0.02434849739074707, + "step": 18968 + }, + { + "epoch": 2.894287109375e-05, + "step": 18968, + "training_step_time": 0.17253661155700684 + }, + { + "epoch": 2.894439697265625e-05, + "model_forward_time": 0.024616241455078125, + "step": 18969 + }, + { + "epoch": 2.894439697265625e-05, + "step": 18969, + "training_step_time": 0.1607038974761963 + }, + { + "epoch": 2.89459228515625e-05, + "grad_norm": 0.33754733204841614, + "learning_rate": 3.262306712815444e-05, + "loss": 0.0106, + "step": 18970 + }, + { + "epoch": 2.89459228515625e-05, + "model_forward_time": 0.0240936279296875, + "step": 18970 + }, + { + "epoch": 2.89459228515625e-05, + "step": 18970, + "training_step_time": 0.11397457122802734 + }, + { + "epoch": 2.894744873046875e-05, + "model_forward_time": 0.025021076202392578, + "step": 18971 + }, + { + "epoch": 2.894744873046875e-05, + "step": 18971, + "training_step_time": 0.10851359367370605 + }, + { + "epoch": 2.8948974609375e-05, + "model_forward_time": 0.025980472564697266, + "step": 18972 + }, + { + "epoch": 2.8948974609375e-05, + "step": 18972, + "training_step_time": 0.11702823638916016 + }, + { + "epoch": 2.895050048828125e-05, + "model_forward_time": 0.025939464569091797, + "step": 18973 + }, + { + "epoch": 2.895050048828125e-05, + "step": 18973, + "training_step_time": 0.10886120796203613 + }, + { + "epoch": 2.89520263671875e-05, + "model_forward_time": 0.025954484939575195, + "step": 18974 + }, + { + "epoch": 2.89520263671875e-05, + "step": 18974, + "training_step_time": 0.10896635055541992 + }, + { + "epoch": 2.895355224609375e-05, + "model_forward_time": 0.025478601455688477, + "step": 18975 + }, + { + "epoch": 2.895355224609375e-05, + "step": 18975, + "training_step_time": 0.19779515266418457 + }, + { + "epoch": 2.8955078125e-05, + "model_forward_time": 0.02449941635131836, + "step": 18976 + }, + { + "epoch": 2.8955078125e-05, + "step": 18976, + "training_step_time": 0.10136103630065918 + }, + { + "epoch": 2.895660400390625e-05, + "model_forward_time": 0.024809598922729492, + "step": 18977 + }, + { + "epoch": 2.895660400390625e-05, + "step": 18977, + "training_step_time": 0.10315537452697754 + }, + { + "epoch": 2.89581298828125e-05, + "model_forward_time": 0.027747631072998047, + "step": 18978 + }, + { + "epoch": 2.89581298828125e-05, + "step": 18978, + "training_step_time": 0.10747432708740234 + }, + { + "epoch": 2.895965576171875e-05, + "model_forward_time": 0.025395631790161133, + "step": 18979 + }, + { + "epoch": 2.895965576171875e-05, + "step": 18979, + "training_step_time": 0.10577106475830078 + }, + { + "epoch": 2.8961181640625e-05, + "grad_norm": 0.3815549612045288, + "learning_rate": 3.257139763390925e-05, + "loss": 0.0107, + "step": 18980 + }, + { + "epoch": 2.8961181640625e-05, + "model_forward_time": 0.02577948570251465, + "step": 18980 + }, + { + "epoch": 2.8961181640625e-05, + "step": 18980, + "training_step_time": 0.10616731643676758 + }, + { + "epoch": 2.896270751953125e-05, + "model_forward_time": 0.025363683700561523, + "step": 18981 + }, + { + "epoch": 2.896270751953125e-05, + "step": 18981, + "training_step_time": 0.1056203842163086 + }, + { + "epoch": 2.89642333984375e-05, + "model_forward_time": 0.025207042694091797, + "step": 18982 + }, + { + "epoch": 2.89642333984375e-05, + "step": 18982, + "training_step_time": 0.10625195503234863 + }, + { + "epoch": 2.896575927734375e-05, + "model_forward_time": 0.025478601455688477, + "step": 18983 + }, + { + "epoch": 2.896575927734375e-05, + "step": 18983, + "training_step_time": 0.10497713088989258 + }, + { + "epoch": 2.896728515625e-05, + "model_forward_time": 0.024956226348876953, + "step": 18984 + }, + { + "epoch": 2.896728515625e-05, + "step": 18984, + "training_step_time": 0.1054391860961914 + }, + { + "epoch": 2.896881103515625e-05, + "model_forward_time": 0.02547168731689453, + "step": 18985 + }, + { + "epoch": 2.896881103515625e-05, + "step": 18985, + "training_step_time": 0.10461187362670898 + }, + { + "epoch": 2.89703369140625e-05, + "model_forward_time": 0.025010347366333008, + "step": 18986 + }, + { + "epoch": 2.89703369140625e-05, + "step": 18986, + "training_step_time": 0.10944485664367676 + }, + { + "epoch": 2.897186279296875e-05, + "model_forward_time": 0.028072357177734375, + "step": 18987 + }, + { + "epoch": 2.897186279296875e-05, + "step": 18987, + "training_step_time": 0.11066174507141113 + }, + { + "epoch": 2.8973388671875e-05, + "model_forward_time": 0.0254364013671875, + "step": 18988 + }, + { + "epoch": 2.8973388671875e-05, + "step": 18988, + "training_step_time": 0.16875052452087402 + }, + { + "epoch": 2.897491455078125e-05, + "model_forward_time": 0.024350643157958984, + "step": 18989 + }, + { + "epoch": 2.897491455078125e-05, + "step": 18989, + "training_step_time": 0.1988825798034668 + }, + { + "epoch": 2.89764404296875e-05, + "grad_norm": 0.26489847898483276, + "learning_rate": 3.251974931705933e-05, + "loss": 0.0064, + "step": 18990 + }, + { + "epoch": 2.89764404296875e-05, + "model_forward_time": 0.024118423461914062, + "step": 18990 + }, + { + "epoch": 2.89764404296875e-05, + "step": 18990, + "training_step_time": 0.2319011688232422 + }, + { + "epoch": 2.897796630859375e-05, + "model_forward_time": 0.024244070053100586, + "step": 18991 + }, + { + "epoch": 2.897796630859375e-05, + "step": 18991, + "training_step_time": 0.18625211715698242 + }, + { + "epoch": 2.89794921875e-05, + "model_forward_time": 0.030688762664794922, + "step": 18992 + }, + { + "epoch": 2.89794921875e-05, + "step": 18992, + "training_step_time": 0.20748448371887207 + }, + { + "epoch": 2.898101806640625e-05, + "model_forward_time": 0.0240786075592041, + "step": 18993 + }, + { + "epoch": 2.898101806640625e-05, + "step": 18993, + "training_step_time": 0.16120147705078125 + }, + { + "epoch": 2.89825439453125e-05, + "model_forward_time": 0.023673534393310547, + "step": 18994 + }, + { + "epoch": 2.89825439453125e-05, + "step": 18994, + "training_step_time": 0.13706064224243164 + }, + { + "epoch": 2.898406982421875e-05, + "model_forward_time": 0.02485370635986328, + "step": 18995 + }, + { + "epoch": 2.898406982421875e-05, + "step": 18995, + "training_step_time": 0.10296082496643066 + }, + { + "epoch": 2.8985595703125e-05, + "model_forward_time": 0.02431321144104004, + "step": 18996 + }, + { + "epoch": 2.8985595703125e-05, + "step": 18996, + "training_step_time": 0.10668230056762695 + }, + { + "epoch": 2.898712158203125e-05, + "model_forward_time": 0.02469182014465332, + "step": 18997 + }, + { + "epoch": 2.898712158203125e-05, + "step": 18997, + "training_step_time": 0.11056208610534668 + }, + { + "epoch": 2.89886474609375e-05, + "model_forward_time": 0.025389909744262695, + "step": 18998 + }, + { + "epoch": 2.89886474609375e-05, + "step": 18998, + "training_step_time": 0.11526060104370117 + }, + { + "epoch": 2.899017333984375e-05, + "model_forward_time": 0.025658607482910156, + "step": 18999 + }, + { + "epoch": 2.899017333984375e-05, + "step": 18999, + "training_step_time": 0.10592842102050781 + }, + { + "epoch": 2.899169921875e-05, + "grad_norm": 0.339771568775177, + "learning_rate": 3.2468122240362284e-05, + "loss": 0.0139, + "step": 19000 + }, + { + "epoch": 2.899169921875e-05, + "model_forward_time": 0.02521228790283203, + "step": 19000 + }, + { + "epoch": 2.899169921875e-05, + "step": 19000, + "training_step_time": 0.0976862907409668 + }, + { + "epoch": 2.899322509765625e-05, + "model_forward_time": 0.023215532302856445, + "step": 19001 + }, + { + "epoch": 2.899322509765625e-05, + "step": 19001, + "training_step_time": 0.09881019592285156 + }, + { + "epoch": 2.89947509765625e-05, + "model_forward_time": 0.024942874908447266, + "step": 19002 + }, + { + "epoch": 2.89947509765625e-05, + "step": 19002, + "training_step_time": 0.10318708419799805 + }, + { + "epoch": 2.899627685546875e-05, + "model_forward_time": 0.025781631469726562, + "step": 19003 + }, + { + "epoch": 2.899627685546875e-05, + "step": 19003, + "training_step_time": 0.10401296615600586 + }, + { + "epoch": 2.8997802734375e-05, + "model_forward_time": 0.025183439254760742, + "step": 19004 + }, + { + "epoch": 2.8997802734375e-05, + "step": 19004, + "training_step_time": 0.11464095115661621 + }, + { + "epoch": 2.899932861328125e-05, + "model_forward_time": 0.027678966522216797, + "step": 19005 + }, + { + "epoch": 2.899932861328125e-05, + "step": 19005, + "training_step_time": 0.13311052322387695 + }, + { + "epoch": 2.90008544921875e-05, + "model_forward_time": 0.025727033615112305, + "step": 19006 + }, + { + "epoch": 2.90008544921875e-05, + "step": 19006, + "training_step_time": 0.10665631294250488 + }, + { + "epoch": 2.900238037109375e-05, + "model_forward_time": 0.025464296340942383, + "step": 19007 + }, + { + "epoch": 2.900238037109375e-05, + "step": 19007, + "training_step_time": 0.11227750778198242 + }, + { + "epoch": 2.900390625e-05, + "model_forward_time": 0.025686264038085938, + "step": 19008 + }, + { + "epoch": 2.900390625e-05, + "step": 19008, + "training_step_time": 0.12848639488220215 + }, + { + "epoch": 2.900543212890625e-05, + "model_forward_time": 0.025217056274414062, + "step": 19009 + }, + { + "epoch": 2.900543212890625e-05, + "step": 19009, + "training_step_time": 0.19919180870056152 + }, + { + "epoch": 2.90069580078125e-05, + "grad_norm": 0.24529385566711426, + "learning_rate": 3.241651646654986e-05, + "loss": 0.0111, + "step": 19010 + }, + { + "epoch": 2.90069580078125e-05, + "model_forward_time": 0.024768590927124023, + "step": 19010 + }, + { + "epoch": 2.90069580078125e-05, + "step": 19010, + "training_step_time": 0.10742807388305664 + }, + { + "epoch": 2.900848388671875e-05, + "model_forward_time": 0.02458667755126953, + "step": 19011 + }, + { + "epoch": 2.900848388671875e-05, + "step": 19011, + "training_step_time": 0.10912013053894043 + }, + { + "epoch": 2.9010009765625e-05, + "model_forward_time": 0.025495290756225586, + "step": 19012 + }, + { + "epoch": 2.9010009765625e-05, + "step": 19012, + "training_step_time": 0.11466550827026367 + }, + { + "epoch": 2.901153564453125e-05, + "model_forward_time": 0.02555561065673828, + "step": 19013 + }, + { + "epoch": 2.901153564453125e-05, + "step": 19013, + "training_step_time": 0.1107628345489502 + }, + { + "epoch": 2.90130615234375e-05, + "model_forward_time": 0.025579452514648438, + "step": 19014 + }, + { + "epoch": 2.90130615234375e-05, + "step": 19014, + "training_step_time": 0.1791071891784668 + }, + { + "epoch": 2.901458740234375e-05, + "model_forward_time": 0.02455282211303711, + "step": 19015 + }, + { + "epoch": 2.901458740234375e-05, + "step": 19015, + "training_step_time": 0.1121516227722168 + }, + { + "epoch": 2.901611328125e-05, + "model_forward_time": 0.024873733520507812, + "step": 19016 + }, + { + "epoch": 2.901611328125e-05, + "step": 19016, + "training_step_time": 0.1127316951751709 + }, + { + "epoch": 2.901763916015625e-05, + "model_forward_time": 0.02541375160217285, + "step": 19017 + }, + { + "epoch": 2.901763916015625e-05, + "step": 19017, + "training_step_time": 0.12691140174865723 + }, + { + "epoch": 2.90191650390625e-05, + "model_forward_time": 0.02562570571899414, + "step": 19018 + }, + { + "epoch": 2.90191650390625e-05, + "step": 19018, + "training_step_time": 0.1147010326385498 + }, + { + "epoch": 2.902069091796875e-05, + "model_forward_time": 0.025420427322387695, + "step": 19019 + }, + { + "epoch": 2.902069091796875e-05, + "step": 19019, + "training_step_time": 0.12376856803894043 + }, + { + "epoch": 2.9022216796875e-05, + "grad_norm": 0.27364978194236755, + "learning_rate": 3.236493205832795e-05, + "loss": 0.009, + "step": 19020 + }, + { + "epoch": 2.9022216796875e-05, + "model_forward_time": 0.02543807029724121, + "step": 19020 + }, + { + "epoch": 2.9022216796875e-05, + "step": 19020, + "training_step_time": 0.1524949073791504 + }, + { + "epoch": 2.902374267578125e-05, + "model_forward_time": 0.0251312255859375, + "step": 19021 + }, + { + "epoch": 2.902374267578125e-05, + "step": 19021, + "training_step_time": 0.11231803894042969 + }, + { + "epoch": 2.90252685546875e-05, + "model_forward_time": 0.025129079818725586, + "step": 19022 + }, + { + "epoch": 2.90252685546875e-05, + "step": 19022, + "training_step_time": 0.11091828346252441 + }, + { + "epoch": 2.902679443359375e-05, + "model_forward_time": 0.024714231491088867, + "step": 19023 + }, + { + "epoch": 2.902679443359375e-05, + "step": 19023, + "training_step_time": 0.10529446601867676 + }, + { + "epoch": 2.90283203125e-05, + "model_forward_time": 0.025334596633911133, + "step": 19024 + }, + { + "epoch": 2.90283203125e-05, + "step": 19024, + "training_step_time": 0.11040306091308594 + }, + { + "epoch": 2.902984619140625e-05, + "model_forward_time": 0.025220155715942383, + "step": 19025 + }, + { + "epoch": 2.902984619140625e-05, + "step": 19025, + "training_step_time": 0.18441152572631836 + }, + { + "epoch": 2.90313720703125e-05, + "model_forward_time": 0.024129390716552734, + "step": 19026 + }, + { + "epoch": 2.90313720703125e-05, + "step": 19026, + "training_step_time": 0.16012978553771973 + }, + { + "epoch": 2.903289794921875e-05, + "model_forward_time": 0.024023771286010742, + "step": 19027 + }, + { + "epoch": 2.903289794921875e-05, + "step": 19027, + "training_step_time": 0.10694622993469238 + }, + { + "epoch": 2.9034423828125e-05, + "model_forward_time": 0.024895191192626953, + "step": 19028 + }, + { + "epoch": 2.9034423828125e-05, + "step": 19028, + "training_step_time": 0.10486650466918945 + }, + { + "epoch": 2.903594970703125e-05, + "model_forward_time": 0.025260448455810547, + "step": 19029 + }, + { + "epoch": 2.903594970703125e-05, + "step": 19029, + "training_step_time": 0.10531473159790039 + }, + { + "epoch": 2.90374755859375e-05, + "grad_norm": 0.24423016607761383, + "learning_rate": 3.231336907837646e-05, + "loss": 0.0058, + "step": 19030 + }, + { + "epoch": 2.90374755859375e-05, + "model_forward_time": 0.02558612823486328, + "step": 19030 + }, + { + "epoch": 2.90374755859375e-05, + "step": 19030, + "training_step_time": 0.10730934143066406 + }, + { + "epoch": 2.903900146484375e-05, + "model_forward_time": 0.02528691291809082, + "step": 19031 + }, + { + "epoch": 2.903900146484375e-05, + "step": 19031, + "training_step_time": 0.11119508743286133 + }, + { + "epoch": 2.904052734375e-05, + "model_forward_time": 0.025862932205200195, + "step": 19032 + }, + { + "epoch": 2.904052734375e-05, + "step": 19032, + "training_step_time": 0.10508871078491211 + }, + { + "epoch": 2.904205322265625e-05, + "model_forward_time": 0.02496814727783203, + "step": 19033 + }, + { + "epoch": 2.904205322265625e-05, + "step": 19033, + "training_step_time": 0.10631990432739258 + }, + { + "epoch": 2.90435791015625e-05, + "model_forward_time": 0.025215864181518555, + "step": 19034 + }, + { + "epoch": 2.90435791015625e-05, + "step": 19034, + "training_step_time": 0.10454440116882324 + }, + { + "epoch": 2.904510498046875e-05, + "model_forward_time": 0.025423288345336914, + "step": 19035 + }, + { + "epoch": 2.904510498046875e-05, + "step": 19035, + "training_step_time": 0.10506534576416016 + }, + { + "epoch": 2.9046630859375e-05, + "model_forward_time": 0.02563023567199707, + "step": 19036 + }, + { + "epoch": 2.9046630859375e-05, + "step": 19036, + "training_step_time": 0.10559296607971191 + }, + { + "epoch": 2.904815673828125e-05, + "model_forward_time": 0.025359153747558594, + "step": 19037 + }, + { + "epoch": 2.904815673828125e-05, + "step": 19037, + "training_step_time": 0.10909175872802734 + }, + { + "epoch": 2.90496826171875e-05, + "model_forward_time": 0.025300979614257812, + "step": 19038 + }, + { + "epoch": 2.90496826171875e-05, + "step": 19038, + "training_step_time": 0.10549330711364746 + }, + { + "epoch": 2.905120849609375e-05, + "model_forward_time": 0.025072574615478516, + "step": 19039 + }, + { + "epoch": 2.905120849609375e-05, + "step": 19039, + "training_step_time": 0.10524821281433105 + }, + { + "epoch": 2.9052734375e-05, + "grad_norm": 0.18859590590000153, + "learning_rate": 3.226182758934927e-05, + "loss": 0.0125, + "step": 19040 + }, + { + "epoch": 2.9052734375e-05, + "model_forward_time": 0.02525925636291504, + "step": 19040 + }, + { + "epoch": 2.9052734375e-05, + "step": 19040, + "training_step_time": 0.10652732849121094 + }, + { + "epoch": 2.905426025390625e-05, + "model_forward_time": 0.025722742080688477, + "step": 19041 + }, + { + "epoch": 2.905426025390625e-05, + "step": 19041, + "training_step_time": 0.19288396835327148 + }, + { + "epoch": 2.90557861328125e-05, + "model_forward_time": 0.024660110473632812, + "step": 19042 + }, + { + "epoch": 2.90557861328125e-05, + "step": 19042, + "training_step_time": 0.11337065696716309 + }, + { + "epoch": 2.905731201171875e-05, + "model_forward_time": 0.024973630905151367, + "step": 19043 + }, + { + "epoch": 2.905731201171875e-05, + "step": 19043, + "training_step_time": 0.11511659622192383 + }, + { + "epoch": 2.9058837890625e-05, + "model_forward_time": 0.025489330291748047, + "step": 19044 + }, + { + "epoch": 2.9058837890625e-05, + "step": 19044, + "training_step_time": 0.12489986419677734 + }, + { + "epoch": 2.906036376953125e-05, + "model_forward_time": 0.025351762771606445, + "step": 19045 + }, + { + "epoch": 2.906036376953125e-05, + "step": 19045, + "training_step_time": 0.15376496315002441 + }, + { + "epoch": 2.90618896484375e-05, + "model_forward_time": 0.025669097900390625, + "step": 19046 + }, + { + "epoch": 2.90618896484375e-05, + "step": 19046, + "training_step_time": 0.1374986171722412 + }, + { + "epoch": 2.906341552734375e-05, + "model_forward_time": 0.024574756622314453, + "step": 19047 + }, + { + "epoch": 2.906341552734375e-05, + "step": 19047, + "training_step_time": 0.12349486351013184 + }, + { + "epoch": 2.906494140625e-05, + "model_forward_time": 0.02469158172607422, + "step": 19048 + }, + { + "epoch": 2.906494140625e-05, + "step": 19048, + "training_step_time": 0.11615395545959473 + }, + { + "epoch": 2.906646728515625e-05, + "model_forward_time": 0.0249788761138916, + "step": 19049 + }, + { + "epoch": 2.906646728515625e-05, + "step": 19049, + "training_step_time": 0.10844206809997559 + }, + { + "epoch": 2.90679931640625e-05, + "grad_norm": 0.14127777516841888, + "learning_rate": 3.221030765387417e-05, + "loss": 0.012, + "step": 19050 + }, + { + "epoch": 2.90679931640625e-05, + "model_forward_time": 0.025008678436279297, + "step": 19050 + }, + { + "epoch": 2.90679931640625e-05, + "step": 19050, + "training_step_time": 0.13796615600585938 + }, + { + "epoch": 2.906951904296875e-05, + "model_forward_time": 0.024981260299682617, + "step": 19051 + }, + { + "epoch": 2.906951904296875e-05, + "step": 19051, + "training_step_time": 0.10809111595153809 + }, + { + "epoch": 2.9071044921875e-05, + "model_forward_time": 0.02520895004272461, + "step": 19052 + }, + { + "epoch": 2.9071044921875e-05, + "step": 19052, + "training_step_time": 0.19357848167419434 + }, + { + "epoch": 2.907257080078125e-05, + "model_forward_time": 0.024652957916259766, + "step": 19053 + }, + { + "epoch": 2.907257080078125e-05, + "step": 19053, + "training_step_time": 0.12835478782653809 + }, + { + "epoch": 2.90740966796875e-05, + "model_forward_time": 0.02606368064880371, + "step": 19054 + }, + { + "epoch": 2.90740966796875e-05, + "step": 19054, + "training_step_time": 0.16054463386535645 + }, + { + "epoch": 2.907562255859375e-05, + "model_forward_time": 0.024468660354614258, + "step": 19055 + }, + { + "epoch": 2.907562255859375e-05, + "step": 19055, + "training_step_time": 0.10909700393676758 + }, + { + "epoch": 2.90771484375e-05, + "model_forward_time": 0.0249786376953125, + "step": 19056 + }, + { + "epoch": 2.90771484375e-05, + "step": 19056, + "training_step_time": 0.11012411117553711 + }, + { + "epoch": 2.907867431640625e-05, + "model_forward_time": 0.025438785552978516, + "step": 19057 + }, + { + "epoch": 2.907867431640625e-05, + "step": 19057, + "training_step_time": 0.10492491722106934 + }, + { + "epoch": 2.90802001953125e-05, + "model_forward_time": 0.025462865829467773, + "step": 19058 + }, + { + "epoch": 2.90802001953125e-05, + "step": 19058, + "training_step_time": 0.10672140121459961 + }, + { + "epoch": 2.908172607421875e-05, + "model_forward_time": 0.02576756477355957, + "step": 19059 + }, + { + "epoch": 2.908172607421875e-05, + "step": 19059, + "training_step_time": 0.2056431770324707 + }, + { + "epoch": 2.9083251953125e-05, + "grad_norm": 0.243336021900177, + "learning_rate": 3.2158809334552745e-05, + "loss": 0.0105, + "step": 19060 + }, + { + "epoch": 2.9083251953125e-05, + "model_forward_time": 0.024601221084594727, + "step": 19060 + }, + { + "epoch": 2.9083251953125e-05, + "step": 19060, + "training_step_time": 0.10589480400085449 + }, + { + "epoch": 2.908477783203125e-05, + "model_forward_time": 0.023767471313476562, + "step": 19061 + }, + { + "epoch": 2.908477783203125e-05, + "step": 19061, + "training_step_time": 0.10448718070983887 + }, + { + "epoch": 2.90863037109375e-05, + "model_forward_time": 0.025023221969604492, + "step": 19062 + }, + { + "epoch": 2.90863037109375e-05, + "step": 19062, + "training_step_time": 0.12453031539916992 + }, + { + "epoch": 2.908782958984375e-05, + "model_forward_time": 0.025569915771484375, + "step": 19063 + }, + { + "epoch": 2.908782958984375e-05, + "step": 19063, + "training_step_time": 0.12300658226013184 + }, + { + "epoch": 2.908935546875e-05, + "model_forward_time": 0.025118589401245117, + "step": 19064 + }, + { + "epoch": 2.908935546875e-05, + "step": 19064, + "training_step_time": 0.1176755428314209 + }, + { + "epoch": 2.909088134765625e-05, + "model_forward_time": 0.025014162063598633, + "step": 19065 + }, + { + "epoch": 2.909088134765625e-05, + "step": 19065, + "training_step_time": 0.18131256103515625 + }, + { + "epoch": 2.90924072265625e-05, + "model_forward_time": 0.02466559410095215, + "step": 19066 + }, + { + "epoch": 2.90924072265625e-05, + "step": 19066, + "training_step_time": 0.10652422904968262 + }, + { + "epoch": 2.909393310546875e-05, + "model_forward_time": 0.02463507652282715, + "step": 19067 + }, + { + "epoch": 2.909393310546875e-05, + "step": 19067, + "training_step_time": 0.19243121147155762 + }, + { + "epoch": 2.9095458984375e-05, + "model_forward_time": 0.024670124053955078, + "step": 19068 + }, + { + "epoch": 2.9095458984375e-05, + "step": 19068, + "training_step_time": 0.12286734580993652 + }, + { + "epoch": 2.909698486328125e-05, + "model_forward_time": 0.023886442184448242, + "step": 19069 + }, + { + "epoch": 2.909698486328125e-05, + "step": 19069, + "training_step_time": 0.10750627517700195 + }, + { + "epoch": 2.90985107421875e-05, + "grad_norm": 0.22614695131778717, + "learning_rate": 3.210733269396028e-05, + "loss": 0.0127, + "step": 19070 + }, + { + "epoch": 2.90985107421875e-05, + "model_forward_time": 0.025374889373779297, + "step": 19070 + }, + { + "epoch": 2.90985107421875e-05, + "step": 19070, + "training_step_time": 0.11409902572631836 + }, + { + "epoch": 2.910003662109375e-05, + "model_forward_time": 0.025227785110473633, + "step": 19071 + }, + { + "epoch": 2.910003662109375e-05, + "step": 19071, + "training_step_time": 0.11868739128112793 + }, + { + "epoch": 2.91015625e-05, + "model_forward_time": 0.025865554809570312, + "step": 19072 + }, + { + "epoch": 2.91015625e-05, + "step": 19072, + "training_step_time": 0.10512804985046387 + }, + { + "epoch": 2.910308837890625e-05, + "model_forward_time": 0.025388002395629883, + "step": 19073 + }, + { + "epoch": 2.910308837890625e-05, + "step": 19073, + "training_step_time": 0.10745716094970703 + }, + { + "epoch": 2.91046142578125e-05, + "model_forward_time": 0.025316476821899414, + "step": 19074 + }, + { + "epoch": 2.91046142578125e-05, + "step": 19074, + "training_step_time": 0.10283613204956055 + }, + { + "epoch": 2.910614013671875e-05, + "model_forward_time": 0.02558445930480957, + "step": 19075 + }, + { + "epoch": 2.910614013671875e-05, + "step": 19075, + "training_step_time": 0.10521364212036133 + }, + { + "epoch": 2.9107666015625e-05, + "model_forward_time": 0.025439023971557617, + "step": 19076 + }, + { + "epoch": 2.9107666015625e-05, + "step": 19076, + "training_step_time": 0.10368180274963379 + }, + { + "epoch": 2.910919189453125e-05, + "model_forward_time": 0.02516627311706543, + "step": 19077 + }, + { + "epoch": 2.910919189453125e-05, + "step": 19077, + "training_step_time": 0.10654211044311523 + }, + { + "epoch": 2.91107177734375e-05, + "model_forward_time": 0.025326251983642578, + "step": 19078 + }, + { + "epoch": 2.91107177734375e-05, + "step": 19078, + "training_step_time": 0.16875123977661133 + }, + { + "epoch": 2.911224365234375e-05, + "model_forward_time": 0.02497720718383789, + "step": 19079 + }, + { + "epoch": 2.911224365234375e-05, + "step": 19079, + "training_step_time": 0.19500088691711426 + }, + { + "epoch": 2.911376953125e-05, + "grad_norm": 0.1602323353290558, + "learning_rate": 3.205587779464576e-05, + "loss": 0.0065, + "step": 19080 + }, + { + "epoch": 2.911376953125e-05, + "model_forward_time": 0.024251461029052734, + "step": 19080 + }, + { + "epoch": 2.911376953125e-05, + "step": 19080, + "training_step_time": 0.18586254119873047 + }, + { + "epoch": 2.911529541015625e-05, + "model_forward_time": 0.024263620376586914, + "step": 19081 + }, + { + "epoch": 2.911529541015625e-05, + "step": 19081, + "training_step_time": 0.18398070335388184 + }, + { + "epoch": 2.91168212890625e-05, + "model_forward_time": 0.02419567108154297, + "step": 19082 + }, + { + "epoch": 2.91168212890625e-05, + "step": 19082, + "training_step_time": 0.16943860054016113 + }, + { + "epoch": 2.911834716796875e-05, + "model_forward_time": 0.024126529693603516, + "step": 19083 + }, + { + "epoch": 2.911834716796875e-05, + "step": 19083, + "training_step_time": 0.21080756187438965 + }, + { + "epoch": 2.9119873046875e-05, + "model_forward_time": 0.025321483612060547, + "step": 19084 + }, + { + "epoch": 2.9119873046875e-05, + "step": 19084, + "training_step_time": 0.13036131858825684 + }, + { + "epoch": 2.912139892578125e-05, + "model_forward_time": 0.024187803268432617, + "step": 19085 + }, + { + "epoch": 2.912139892578125e-05, + "step": 19085, + "training_step_time": 0.12222146987915039 + }, + { + "epoch": 2.91229248046875e-05, + "model_forward_time": 0.025311708450317383, + "step": 19086 + }, + { + "epoch": 2.91229248046875e-05, + "step": 19086, + "training_step_time": 0.1370401382446289 + }, + { + "epoch": 2.912445068359375e-05, + "model_forward_time": 0.024813175201416016, + "step": 19087 + }, + { + "epoch": 2.912445068359375e-05, + "step": 19087, + "training_step_time": 0.1163029670715332 + }, + { + "epoch": 2.91259765625e-05, + "model_forward_time": 0.02492690086364746, + "step": 19088 + }, + { + "epoch": 2.91259765625e-05, + "step": 19088, + "training_step_time": 0.13187146186828613 + }, + { + "epoch": 2.912750244140625e-05, + "model_forward_time": 0.02523636817932129, + "step": 19089 + }, + { + "epoch": 2.912750244140625e-05, + "step": 19089, + "training_step_time": 0.11110043525695801 + }, + { + "epoch": 2.91290283203125e-05, + "grad_norm": 0.10157324373722076, + "learning_rate": 3.2004444699131727e-05, + "loss": 0.0142, + "step": 19090 + }, + { + "epoch": 2.91290283203125e-05, + "model_forward_time": 0.02501225471496582, + "step": 19090 + }, + { + "epoch": 2.91290283203125e-05, + "step": 19090, + "training_step_time": 0.10602593421936035 + }, + { + "epoch": 2.913055419921875e-05, + "model_forward_time": 0.02499246597290039, + "step": 19091 + }, + { + "epoch": 2.913055419921875e-05, + "step": 19091, + "training_step_time": 0.10865950584411621 + }, + { + "epoch": 2.9132080078125e-05, + "model_forward_time": 0.02554464340209961, + "step": 19092 + }, + { + "epoch": 2.9132080078125e-05, + "step": 19092, + "training_step_time": 0.1098332405090332 + }, + { + "epoch": 2.913360595703125e-05, + "model_forward_time": 0.024596214294433594, + "step": 19093 + }, + { + "epoch": 2.913360595703125e-05, + "step": 19093, + "training_step_time": 0.13742828369140625 + }, + { + "epoch": 2.91351318359375e-05, + "model_forward_time": 0.02461695671081543, + "step": 19094 + }, + { + "epoch": 2.91351318359375e-05, + "step": 19094, + "training_step_time": 0.16405320167541504 + }, + { + "epoch": 2.913665771484375e-05, + "model_forward_time": 0.025019407272338867, + "step": 19095 + }, + { + "epoch": 2.913665771484375e-05, + "step": 19095, + "training_step_time": 0.11177968978881836 + }, + { + "epoch": 2.913818359375e-05, + "model_forward_time": 0.024298667907714844, + "step": 19096 + }, + { + "epoch": 2.913818359375e-05, + "step": 19096, + "training_step_time": 0.1304616928100586 + }, + { + "epoch": 2.913970947265625e-05, + "model_forward_time": 0.024941444396972656, + "step": 19097 + }, + { + "epoch": 2.913970947265625e-05, + "step": 19097, + "training_step_time": 0.21898126602172852 + }, + { + "epoch": 2.91412353515625e-05, + "model_forward_time": 0.024863719940185547, + "step": 19098 + }, + { + "epoch": 2.91412353515625e-05, + "step": 19098, + "training_step_time": 0.11178970336914062 + }, + { + "epoch": 2.914276123046875e-05, + "model_forward_time": 0.024909257888793945, + "step": 19099 + }, + { + "epoch": 2.914276123046875e-05, + "step": 19099, + "training_step_time": 0.10520005226135254 + }, + { + "epoch": 2.9144287109375e-05, + "grad_norm": 0.14794135093688965, + "learning_rate": 3.1953033469914276e-05, + "loss": 0.0151, + "step": 19100 + }, + { + "epoch": 2.9144287109375e-05, + "model_forward_time": 0.025289535522460938, + "step": 19100 + }, + { + "epoch": 2.9144287109375e-05, + "step": 19100, + "training_step_time": 0.10608530044555664 + }, + { + "epoch": 2.914581298828125e-05, + "model_forward_time": 0.025264263153076172, + "step": 19101 + }, + { + "epoch": 2.914581298828125e-05, + "step": 19101, + "training_step_time": 0.10851550102233887 + }, + { + "epoch": 2.91473388671875e-05, + "model_forward_time": 0.025126218795776367, + "step": 19102 + }, + { + "epoch": 2.91473388671875e-05, + "step": 19102, + "training_step_time": 0.1536405086517334 + }, + { + "epoch": 2.914886474609375e-05, + "model_forward_time": 0.024659156799316406, + "step": 19103 + }, + { + "epoch": 2.914886474609375e-05, + "step": 19103, + "training_step_time": 0.10673069953918457 + }, + { + "epoch": 2.9150390625e-05, + "model_forward_time": 0.026833534240722656, + "step": 19104 + }, + { + "epoch": 2.9150390625e-05, + "step": 19104, + "training_step_time": 0.10928058624267578 + }, + { + "epoch": 2.915191650390625e-05, + "model_forward_time": 0.025177717208862305, + "step": 19105 + }, + { + "epoch": 2.915191650390625e-05, + "step": 19105, + "training_step_time": 0.12807059288024902 + }, + { + "epoch": 2.91534423828125e-05, + "model_forward_time": 0.025484323501586914, + "step": 19106 + }, + { + "epoch": 2.91534423828125e-05, + "step": 19106, + "training_step_time": 0.12203383445739746 + }, + { + "epoch": 2.915496826171875e-05, + "model_forward_time": 0.02539825439453125, + "step": 19107 + }, + { + "epoch": 2.915496826171875e-05, + "step": 19107, + "training_step_time": 0.12215495109558105 + }, + { + "epoch": 2.9156494140625e-05, + "model_forward_time": 0.02493143081665039, + "step": 19108 + }, + { + "epoch": 2.9156494140625e-05, + "step": 19108, + "training_step_time": 0.148115873336792 + }, + { + "epoch": 2.915802001953125e-05, + "model_forward_time": 0.02460765838623047, + "step": 19109 + }, + { + "epoch": 2.915802001953125e-05, + "step": 19109, + "training_step_time": 0.14225530624389648 + }, + { + "epoch": 2.91595458984375e-05, + "grad_norm": 0.10831483453512192, + "learning_rate": 3.190164416946285e-05, + "loss": 0.0081, + "step": 19110 + }, + { + "epoch": 2.91595458984375e-05, + "model_forward_time": 0.02434086799621582, + "step": 19110 + }, + { + "epoch": 2.91595458984375e-05, + "step": 19110, + "training_step_time": 0.10690784454345703 + }, + { + "epoch": 2.916107177734375e-05, + "model_forward_time": 0.024959564208984375, + "step": 19111 + }, + { + "epoch": 2.916107177734375e-05, + "step": 19111, + "training_step_time": 0.12288403511047363 + }, + { + "epoch": 2.916259765625e-05, + "model_forward_time": 0.025275468826293945, + "step": 19112 + }, + { + "epoch": 2.916259765625e-05, + "step": 19112, + "training_step_time": 0.12377786636352539 + }, + { + "epoch": 2.916412353515625e-05, + "model_forward_time": 0.025150299072265625, + "step": 19113 + }, + { + "epoch": 2.916412353515625e-05, + "step": 19113, + "training_step_time": 0.10667181015014648 + }, + { + "epoch": 2.91656494140625e-05, + "model_forward_time": 0.02527022361755371, + "step": 19114 + }, + { + "epoch": 2.91656494140625e-05, + "step": 19114, + "training_step_time": 0.19100117683410645 + }, + { + "epoch": 2.916717529296875e-05, + "model_forward_time": 0.024626493453979492, + "step": 19115 + }, + { + "epoch": 2.916717529296875e-05, + "step": 19115, + "training_step_time": 0.10141468048095703 + }, + { + "epoch": 2.9168701171875e-05, + "model_forward_time": 0.024565458297729492, + "step": 19116 + }, + { + "epoch": 2.9168701171875e-05, + "step": 19116, + "training_step_time": 0.10399723052978516 + }, + { + "epoch": 2.917022705078125e-05, + "model_forward_time": 0.02532052993774414, + "step": 19117 + }, + { + "epoch": 2.917022705078125e-05, + "step": 19117, + "training_step_time": 0.10423493385314941 + }, + { + "epoch": 2.91717529296875e-05, + "model_forward_time": 0.02520275115966797, + "step": 19118 + }, + { + "epoch": 2.91717529296875e-05, + "step": 19118, + "training_step_time": 0.10827040672302246 + }, + { + "epoch": 2.917327880859375e-05, + "model_forward_time": 0.025252819061279297, + "step": 19119 + }, + { + "epoch": 2.917327880859375e-05, + "step": 19119, + "training_step_time": 0.10921502113342285 + }, + { + "epoch": 2.91748046875e-05, + "grad_norm": 0.07747375965118408, + "learning_rate": 3.1850276860220346e-05, + "loss": 0.0057, + "step": 19120 + }, + { + "epoch": 2.91748046875e-05, + "model_forward_time": 0.024976253509521484, + "step": 19120 + }, + { + "epoch": 2.91748046875e-05, + "step": 19120, + "training_step_time": 0.1065826416015625 + }, + { + "epoch": 2.917633056640625e-05, + "model_forward_time": 0.025442123413085938, + "step": 19121 + }, + { + "epoch": 2.917633056640625e-05, + "step": 19121, + "training_step_time": 0.10892963409423828 + }, + { + "epoch": 2.91778564453125e-05, + "model_forward_time": 0.02556586265563965, + "step": 19122 + }, + { + "epoch": 2.91778564453125e-05, + "step": 19122, + "training_step_time": 0.10950636863708496 + }, + { + "epoch": 2.917938232421875e-05, + "model_forward_time": 0.02536463737487793, + "step": 19123 + }, + { + "epoch": 2.917938232421875e-05, + "step": 19123, + "training_step_time": 0.10784459114074707 + }, + { + "epoch": 2.9180908203125e-05, + "model_forward_time": 0.02524089813232422, + "step": 19124 + }, + { + "epoch": 2.9180908203125e-05, + "step": 19124, + "training_step_time": 0.10599493980407715 + }, + { + "epoch": 2.918243408203125e-05, + "model_forward_time": 0.025662660598754883, + "step": 19125 + }, + { + "epoch": 2.918243408203125e-05, + "step": 19125, + "training_step_time": 0.1109166145324707 + }, + { + "epoch": 2.91839599609375e-05, + "model_forward_time": 0.025733470916748047, + "step": 19126 + }, + { + "epoch": 2.91839599609375e-05, + "step": 19126, + "training_step_time": 0.10693073272705078 + }, + { + "epoch": 2.918548583984375e-05, + "model_forward_time": 0.02564382553100586, + "step": 19127 + }, + { + "epoch": 2.918548583984375e-05, + "step": 19127, + "training_step_time": 0.10742783546447754 + }, + { + "epoch": 2.918701171875e-05, + "model_forward_time": 0.025267362594604492, + "step": 19128 + }, + { + "epoch": 2.918701171875e-05, + "step": 19128, + "training_step_time": 0.10490202903747559 + }, + { + "epoch": 2.918853759765625e-05, + "model_forward_time": 0.02510547637939453, + "step": 19129 + }, + { + "epoch": 2.918853759765625e-05, + "step": 19129, + "training_step_time": 0.16948533058166504 + }, + { + "epoch": 2.91900634765625e-05, + "grad_norm": 0.09368924051523209, + "learning_rate": 3.1798931604602864e-05, + "loss": 0.0173, + "step": 19130 + }, + { + "epoch": 2.91900634765625e-05, + "model_forward_time": 0.025140762329101562, + "step": 19130 + }, + { + "epoch": 2.91900634765625e-05, + "step": 19130, + "training_step_time": 0.11592221260070801 + }, + { + "epoch": 2.919158935546875e-05, + "model_forward_time": 0.024262428283691406, + "step": 19131 + }, + { + "epoch": 2.919158935546875e-05, + "step": 19131, + "training_step_time": 0.10565185546875 + }, + { + "epoch": 2.9193115234375e-05, + "model_forward_time": 0.025305986404418945, + "step": 19132 + }, + { + "epoch": 2.9193115234375e-05, + "step": 19132, + "training_step_time": 0.12161064147949219 + }, + { + "epoch": 2.919464111328125e-05, + "model_forward_time": 0.025467395782470703, + "step": 19133 + }, + { + "epoch": 2.919464111328125e-05, + "step": 19133, + "training_step_time": 0.10840535163879395 + }, + { + "epoch": 2.91961669921875e-05, + "model_forward_time": 0.02503204345703125, + "step": 19134 + }, + { + "epoch": 2.91961669921875e-05, + "step": 19134, + "training_step_time": 0.11540889739990234 + }, + { + "epoch": 2.919769287109375e-05, + "model_forward_time": 0.025942564010620117, + "step": 19135 + }, + { + "epoch": 2.919769287109375e-05, + "step": 19135, + "training_step_time": 0.11507081985473633 + }, + { + "epoch": 2.919921875e-05, + "model_forward_time": 0.025327205657958984, + "step": 19136 + }, + { + "epoch": 2.919921875e-05, + "step": 19136, + "training_step_time": 0.11298227310180664 + }, + { + "epoch": 2.920074462890625e-05, + "model_forward_time": 0.025176286697387695, + "step": 19137 + }, + { + "epoch": 2.920074462890625e-05, + "step": 19137, + "training_step_time": 0.10883688926696777 + }, + { + "epoch": 2.92022705078125e-05, + "model_forward_time": 0.025403261184692383, + "step": 19138 + }, + { + "epoch": 2.92022705078125e-05, + "step": 19138, + "training_step_time": 0.10388469696044922 + }, + { + "epoch": 2.920379638671875e-05, + "model_forward_time": 0.025003671646118164, + "step": 19139 + }, + { + "epoch": 2.920379638671875e-05, + "step": 19139, + "training_step_time": 0.12038946151733398 + }, + { + "epoch": 2.9205322265625e-05, + "grad_norm": 0.12927605211734772, + "learning_rate": 3.1747608464999725e-05, + "loss": 0.0055, + "step": 19140 + }, + { + "epoch": 2.9205322265625e-05, + "model_forward_time": 0.02436971664428711, + "step": 19140 + }, + { + "epoch": 2.9205322265625e-05, + "step": 19140, + "training_step_time": 0.11724567413330078 + }, + { + "epoch": 2.920684814453125e-05, + "model_forward_time": 0.025753259658813477, + "step": 19141 + }, + { + "epoch": 2.920684814453125e-05, + "step": 19141, + "training_step_time": 0.10664081573486328 + }, + { + "epoch": 2.92083740234375e-05, + "model_forward_time": 0.025559425354003906, + "step": 19142 + }, + { + "epoch": 2.92083740234375e-05, + "step": 19142, + "training_step_time": 0.1165318489074707 + }, + { + "epoch": 2.920989990234375e-05, + "model_forward_time": 0.0253753662109375, + "step": 19143 + }, + { + "epoch": 2.920989990234375e-05, + "step": 19143, + "training_step_time": 0.11254453659057617 + }, + { + "epoch": 2.921142578125e-05, + "model_forward_time": 0.025761127471923828, + "step": 19144 + }, + { + "epoch": 2.921142578125e-05, + "step": 19144, + "training_step_time": 0.12736988067626953 + }, + { + "epoch": 2.921295166015625e-05, + "model_forward_time": 0.02578902244567871, + "step": 19145 + }, + { + "epoch": 2.921295166015625e-05, + "step": 19145, + "training_step_time": 0.12256217002868652 + }, + { + "epoch": 2.92144775390625e-05, + "model_forward_time": 0.02558135986328125, + "step": 19146 + }, + { + "epoch": 2.92144775390625e-05, + "step": 19146, + "training_step_time": 0.10568642616271973 + }, + { + "epoch": 2.921600341796875e-05, + "model_forward_time": 0.025492429733276367, + "step": 19147 + }, + { + "epoch": 2.921600341796875e-05, + "step": 19147, + "training_step_time": 0.10626721382141113 + }, + { + "epoch": 2.9217529296875e-05, + "model_forward_time": 0.025780916213989258, + "step": 19148 + }, + { + "epoch": 2.9217529296875e-05, + "step": 19148, + "training_step_time": 0.11117005348205566 + }, + { + "epoch": 2.921905517578125e-05, + "model_forward_time": 0.025304317474365234, + "step": 19149 + }, + { + "epoch": 2.921905517578125e-05, + "step": 19149, + "training_step_time": 0.10689902305603027 + }, + { + "epoch": 2.92205810546875e-05, + "grad_norm": 0.25562000274658203, + "learning_rate": 3.169630750377337e-05, + "loss": 0.0036, + "step": 19150 + }, + { + "epoch": 2.92205810546875e-05, + "model_forward_time": 0.025221824645996094, + "step": 19150 + }, + { + "epoch": 2.92205810546875e-05, + "step": 19150, + "training_step_time": 0.1511378288269043 + }, + { + "epoch": 2.922210693359375e-05, + "model_forward_time": 0.025392770767211914, + "step": 19151 + }, + { + "epoch": 2.922210693359375e-05, + "step": 19151, + "training_step_time": 0.10863232612609863 + }, + { + "epoch": 2.92236328125e-05, + "model_forward_time": 0.025344371795654297, + "step": 19152 + }, + { + "epoch": 2.92236328125e-05, + "step": 19152, + "training_step_time": 0.1107943058013916 + }, + { + "epoch": 2.922515869140625e-05, + "model_forward_time": 0.02565622329711914, + "step": 19153 + }, + { + "epoch": 2.922515869140625e-05, + "step": 19153, + "training_step_time": 0.12649130821228027 + }, + { + "epoch": 2.92266845703125e-05, + "model_forward_time": 0.025661945343017578, + "step": 19154 + }, + { + "epoch": 2.92266845703125e-05, + "step": 19154, + "training_step_time": 0.1254258155822754 + }, + { + "epoch": 2.922821044921875e-05, + "model_forward_time": 0.0252377986907959, + "step": 19155 + }, + { + "epoch": 2.922821044921875e-05, + "step": 19155, + "training_step_time": 0.11474895477294922 + }, + { + "epoch": 2.9229736328125e-05, + "model_forward_time": 0.025528430938720703, + "step": 19156 + }, + { + "epoch": 2.9229736328125e-05, + "step": 19156, + "training_step_time": 0.13181471824645996 + }, + { + "epoch": 2.923126220703125e-05, + "model_forward_time": 0.025496959686279297, + "step": 19157 + }, + { + "epoch": 2.923126220703125e-05, + "step": 19157, + "training_step_time": 0.13738679885864258 + }, + { + "epoch": 2.92327880859375e-05, + "model_forward_time": 0.024787187576293945, + "step": 19158 + }, + { + "epoch": 2.92327880859375e-05, + "step": 19158, + "training_step_time": 0.11504459381103516 + }, + { + "epoch": 2.923431396484375e-05, + "model_forward_time": 0.02484440803527832, + "step": 19159 + }, + { + "epoch": 2.923431396484375e-05, + "step": 19159, + "training_step_time": 0.10437202453613281 + }, + { + "epoch": 2.923583984375e-05, + "grad_norm": 0.20046649873256683, + "learning_rate": 3.1645028783259345e-05, + "loss": 0.0087, + "step": 19160 + }, + { + "epoch": 2.923583984375e-05, + "model_forward_time": 0.025221824645996094, + "step": 19160 + }, + { + "epoch": 2.923583984375e-05, + "step": 19160, + "training_step_time": 0.1115577220916748 + }, + { + "epoch": 2.923736572265625e-05, + "model_forward_time": 0.02541637420654297, + "step": 19161 + }, + { + "epoch": 2.923736572265625e-05, + "step": 19161, + "training_step_time": 0.10995650291442871 + }, + { + "epoch": 2.92388916015625e-05, + "model_forward_time": 0.025473356246948242, + "step": 19162 + }, + { + "epoch": 2.92388916015625e-05, + "step": 19162, + "training_step_time": 0.1905820369720459 + }, + { + "epoch": 2.924041748046875e-05, + "model_forward_time": 0.024717092514038086, + "step": 19163 + }, + { + "epoch": 2.924041748046875e-05, + "step": 19163, + "training_step_time": 0.10255980491638184 + }, + { + "epoch": 2.9241943359375e-05, + "model_forward_time": 0.024859905242919922, + "step": 19164 + }, + { + "epoch": 2.9241943359375e-05, + "step": 19164, + "training_step_time": 0.1027383804321289 + }, + { + "epoch": 2.924346923828125e-05, + "model_forward_time": 0.025934934616088867, + "step": 19165 + }, + { + "epoch": 2.924346923828125e-05, + "step": 19165, + "training_step_time": 0.10698938369750977 + }, + { + "epoch": 2.92449951171875e-05, + "model_forward_time": 0.0251619815826416, + "step": 19166 + }, + { + "epoch": 2.92449951171875e-05, + "step": 19166, + "training_step_time": 0.10729384422302246 + }, + { + "epoch": 2.924652099609375e-05, + "model_forward_time": 0.025444746017456055, + "step": 19167 + }, + { + "epoch": 2.924652099609375e-05, + "step": 19167, + "training_step_time": 0.10352468490600586 + }, + { + "epoch": 2.9248046875e-05, + "model_forward_time": 0.025356531143188477, + "step": 19168 + }, + { + "epoch": 2.9248046875e-05, + "step": 19168, + "training_step_time": 0.1057133674621582 + }, + { + "epoch": 2.924957275390625e-05, + "model_forward_time": 0.025600671768188477, + "step": 19169 + }, + { + "epoch": 2.924957275390625e-05, + "step": 19169, + "training_step_time": 0.10941004753112793 + }, + { + "epoch": 2.92510986328125e-05, + "grad_norm": 0.11145558208227158, + "learning_rate": 3.1593772365766105e-05, + "loss": 0.0063, + "step": 19170 + }, + { + "epoch": 2.92510986328125e-05, + "model_forward_time": 0.02516627311706543, + "step": 19170 + }, + { + "epoch": 2.92510986328125e-05, + "step": 19170, + "training_step_time": 0.10455107688903809 + }, + { + "epoch": 2.925262451171875e-05, + "model_forward_time": 0.026804685592651367, + "step": 19171 + }, + { + "epoch": 2.925262451171875e-05, + "step": 19171, + "training_step_time": 0.10468578338623047 + }, + { + "epoch": 2.9254150390625e-05, + "model_forward_time": 0.025454282760620117, + "step": 19172 + }, + { + "epoch": 2.9254150390625e-05, + "step": 19172, + "training_step_time": 0.10536623001098633 + }, + { + "epoch": 2.925567626953125e-05, + "model_forward_time": 0.025073528289794922, + "step": 19173 + }, + { + "epoch": 2.925567626953125e-05, + "step": 19173, + "training_step_time": 0.1084897518157959 + }, + { + "epoch": 2.92572021484375e-05, + "model_forward_time": 0.025296926498413086, + "step": 19174 + }, + { + "epoch": 2.92572021484375e-05, + "step": 19174, + "training_step_time": 0.10695695877075195 + }, + { + "epoch": 2.925872802734375e-05, + "model_forward_time": 0.02520275115966797, + "step": 19175 + }, + { + "epoch": 2.925872802734375e-05, + "step": 19175, + "training_step_time": 0.10709023475646973 + }, + { + "epoch": 2.926025390625e-05, + "model_forward_time": 0.02548527717590332, + "step": 19176 + }, + { + "epoch": 2.926025390625e-05, + "step": 19176, + "training_step_time": 0.10820221900939941 + }, + { + "epoch": 2.926177978515625e-05, + "model_forward_time": 0.025419235229492188, + "step": 19177 + }, + { + "epoch": 2.926177978515625e-05, + "step": 19177, + "training_step_time": 0.17939376831054688 + }, + { + "epoch": 2.92633056640625e-05, + "model_forward_time": 0.024326801300048828, + "step": 19178 + }, + { + "epoch": 2.92633056640625e-05, + "step": 19178, + "training_step_time": 0.24491429328918457 + }, + { + "epoch": 2.926483154296875e-05, + "model_forward_time": 0.024651288986206055, + "step": 19179 + }, + { + "epoch": 2.926483154296875e-05, + "step": 19179, + "training_step_time": 0.21010470390319824 + }, + { + "epoch": 2.9266357421875e-05, + "grad_norm": 0.27768412232398987, + "learning_rate": 3.1542538313575035e-05, + "loss": 0.0071, + "step": 19180 + }, + { + "epoch": 2.9266357421875e-05, + "model_forward_time": 0.024679899215698242, + "step": 19180 + }, + { + "epoch": 2.9266357421875e-05, + "step": 19180, + "training_step_time": 0.2001194953918457 + }, + { + "epoch": 2.926788330078125e-05, + "model_forward_time": 0.024219036102294922, + "step": 19181 + }, + { + "epoch": 2.926788330078125e-05, + "step": 19181, + "training_step_time": 0.22867369651794434 + }, + { + "epoch": 2.92694091796875e-05, + "model_forward_time": 0.023774385452270508, + "step": 19182 + }, + { + "epoch": 2.92694091796875e-05, + "step": 19182, + "training_step_time": 0.18787074089050293 + }, + { + "epoch": 2.927093505859375e-05, + "model_forward_time": 0.02380228042602539, + "step": 19183 + }, + { + "epoch": 2.927093505859375e-05, + "step": 19183, + "training_step_time": 0.17838454246520996 + }, + { + "epoch": 2.92724609375e-05, + "model_forward_time": 0.02523493766784668, + "step": 19184 + }, + { + "epoch": 2.92724609375e-05, + "step": 19184, + "training_step_time": 0.1281139850616455 + }, + { + "epoch": 2.927398681640625e-05, + "model_forward_time": 0.024333715438842773, + "step": 19185 + }, + { + "epoch": 2.927398681640625e-05, + "step": 19185, + "training_step_time": 0.1433396339416504 + }, + { + "epoch": 2.92755126953125e-05, + "model_forward_time": 0.02483534812927246, + "step": 19186 + }, + { + "epoch": 2.92755126953125e-05, + "step": 19186, + "training_step_time": 0.17440414428710938 + }, + { + "epoch": 2.927703857421875e-05, + "model_forward_time": 0.0246124267578125, + "step": 19187 + }, + { + "epoch": 2.927703857421875e-05, + "step": 19187, + "training_step_time": 0.1164860725402832 + }, + { + "epoch": 2.9278564453125e-05, + "model_forward_time": 0.024529457092285156, + "step": 19188 + }, + { + "epoch": 2.9278564453125e-05, + "step": 19188, + "training_step_time": 0.13065767288208008 + }, + { + "epoch": 2.928009033203125e-05, + "model_forward_time": 0.02524852752685547, + "step": 19189 + }, + { + "epoch": 2.928009033203125e-05, + "step": 19189, + "training_step_time": 0.10392308235168457 + }, + { + "epoch": 2.92816162109375e-05, + "grad_norm": 0.4354826509952545, + "learning_rate": 3.1491326688940345e-05, + "loss": 0.02, + "step": 19190 + }, + { + "epoch": 2.92816162109375e-05, + "model_forward_time": 0.02585911750793457, + "step": 19190 + }, + { + "epoch": 2.92816162109375e-05, + "step": 19190, + "training_step_time": 0.11215877532958984 + }, + { + "epoch": 2.928314208984375e-05, + "model_forward_time": 0.02551579475402832, + "step": 19191 + }, + { + "epoch": 2.928314208984375e-05, + "step": 19191, + "training_step_time": 0.10480785369873047 + }, + { + "epoch": 2.928466796875e-05, + "model_forward_time": 0.02552652359008789, + "step": 19192 + }, + { + "epoch": 2.928466796875e-05, + "step": 19192, + "training_step_time": 0.10409688949584961 + }, + { + "epoch": 2.928619384765625e-05, + "model_forward_time": 0.025800466537475586, + "step": 19193 + }, + { + "epoch": 2.928619384765625e-05, + "step": 19193, + "training_step_time": 0.17763137817382812 + }, + { + "epoch": 2.92877197265625e-05, + "model_forward_time": 0.025051116943359375, + "step": 19194 + }, + { + "epoch": 2.92877197265625e-05, + "step": 19194, + "training_step_time": 0.1086416244506836 + }, + { + "epoch": 2.928924560546875e-05, + "model_forward_time": 0.025941133499145508, + "step": 19195 + }, + { + "epoch": 2.928924560546875e-05, + "step": 19195, + "training_step_time": 0.11163568496704102 + }, + { + "epoch": 2.9290771484375e-05, + "model_forward_time": 0.025202035903930664, + "step": 19196 + }, + { + "epoch": 2.9290771484375e-05, + "step": 19196, + "training_step_time": 0.13249540328979492 + }, + { + "epoch": 2.929229736328125e-05, + "model_forward_time": 0.02553558349609375, + "step": 19197 + }, + { + "epoch": 2.929229736328125e-05, + "step": 19197, + "training_step_time": 0.13285374641418457 + }, + { + "epoch": 2.92938232421875e-05, + "model_forward_time": 0.02493453025817871, + "step": 19198 + }, + { + "epoch": 2.92938232421875e-05, + "step": 19198, + "training_step_time": 0.10608410835266113 + }, + { + "epoch": 2.929534912109375e-05, + "model_forward_time": 0.02508378028869629, + "step": 19199 + }, + { + "epoch": 2.929534912109375e-05, + "step": 19199, + "training_step_time": 0.10982751846313477 + }, + { + "epoch": 2.9296875e-05, + "grad_norm": 0.15480276942253113, + "learning_rate": 3.144013755408895e-05, + "loss": 0.0088, + "step": 19200 + }, + { + "epoch": 2.9296875e-05, + "model_forward_time": 0.025064468383789062, + "step": 19200 + }, + { + "epoch": 2.9296875e-05, + "step": 19200, + "training_step_time": 0.13068723678588867 + }, + { + "epoch": 2.929840087890625e-05, + "model_forward_time": 0.02538585662841797, + "step": 19201 + }, + { + "epoch": 2.929840087890625e-05, + "step": 19201, + "training_step_time": 0.10827827453613281 + }, + { + "epoch": 2.92999267578125e-05, + "model_forward_time": 0.025325298309326172, + "step": 19202 + }, + { + "epoch": 2.92999267578125e-05, + "step": 19202, + "training_step_time": 0.11127042770385742 + }, + { + "epoch": 2.930145263671875e-05, + "model_forward_time": 0.0254213809967041, + "step": 19203 + }, + { + "epoch": 2.930145263671875e-05, + "step": 19203, + "training_step_time": 0.12284207344055176 + }, + { + "epoch": 2.9302978515625e-05, + "model_forward_time": 0.025451183319091797, + "step": 19204 + }, + { + "epoch": 2.9302978515625e-05, + "step": 19204, + "training_step_time": 0.18102502822875977 + }, + { + "epoch": 2.930450439453125e-05, + "model_forward_time": 0.024601459503173828, + "step": 19205 + }, + { + "epoch": 2.930450439453125e-05, + "step": 19205, + "training_step_time": 0.11902356147766113 + }, + { + "epoch": 2.93060302734375e-05, + "model_forward_time": 0.024265766143798828, + "step": 19206 + }, + { + "epoch": 2.93060302734375e-05, + "step": 19206, + "training_step_time": 0.1156916618347168 + }, + { + "epoch": 2.930755615234375e-05, + "model_forward_time": 0.024874210357666016, + "step": 19207 + }, + { + "epoch": 2.930755615234375e-05, + "step": 19207, + "training_step_time": 0.11552190780639648 + }, + { + "epoch": 2.930908203125e-05, + "model_forward_time": 0.025356769561767578, + "step": 19208 + }, + { + "epoch": 2.930908203125e-05, + "step": 19208, + "training_step_time": 0.1118307113647461 + }, + { + "epoch": 2.931060791015625e-05, + "model_forward_time": 0.025333166122436523, + "step": 19209 + }, + { + "epoch": 2.931060791015625e-05, + "step": 19209, + "training_step_time": 0.11358356475830078 + }, + { + "epoch": 2.93121337890625e-05, + "grad_norm": 0.21166066825389862, + "learning_rate": 3.1388970971220546e-05, + "loss": 0.0064, + "step": 19210 + }, + { + "epoch": 2.93121337890625e-05, + "model_forward_time": 0.028171539306640625, + "step": 19210 + }, + { + "epoch": 2.93121337890625e-05, + "step": 19210, + "training_step_time": 0.11153912544250488 + }, + { + "epoch": 2.931365966796875e-05, + "model_forward_time": 0.026267528533935547, + "step": 19211 + }, + { + "epoch": 2.931365966796875e-05, + "step": 19211, + "training_step_time": 0.10839009284973145 + }, + { + "epoch": 2.9315185546875e-05, + "model_forward_time": 0.025510549545288086, + "step": 19212 + }, + { + "epoch": 2.9315185546875e-05, + "step": 19212, + "training_step_time": 0.10545110702514648 + }, + { + "epoch": 2.931671142578125e-05, + "model_forward_time": 0.025348186492919922, + "step": 19213 + }, + { + "epoch": 2.931671142578125e-05, + "step": 19213, + "training_step_time": 0.10542774200439453 + }, + { + "epoch": 2.93182373046875e-05, + "model_forward_time": 0.0253450870513916, + "step": 19214 + }, + { + "epoch": 2.93182373046875e-05, + "step": 19214, + "training_step_time": 0.10511970520019531 + }, + { + "epoch": 2.931976318359375e-05, + "model_forward_time": 0.025597333908081055, + "step": 19215 + }, + { + "epoch": 2.931976318359375e-05, + "step": 19215, + "training_step_time": 0.10569214820861816 + }, + { + "epoch": 2.93212890625e-05, + "model_forward_time": 0.02530694007873535, + "step": 19216 + }, + { + "epoch": 2.93212890625e-05, + "step": 19216, + "training_step_time": 0.10933709144592285 + }, + { + "epoch": 2.932281494140625e-05, + "model_forward_time": 0.025393009185791016, + "step": 19217 + }, + { + "epoch": 2.932281494140625e-05, + "step": 19217, + "training_step_time": 0.10900425910949707 + }, + { + "epoch": 2.93243408203125e-05, + "model_forward_time": 0.025661468505859375, + "step": 19218 + }, + { + "epoch": 2.93243408203125e-05, + "step": 19218, + "training_step_time": 0.11128401756286621 + }, + { + "epoch": 2.932586669921875e-05, + "model_forward_time": 0.025545120239257812, + "step": 19219 + }, + { + "epoch": 2.932586669921875e-05, + "step": 19219, + "training_step_time": 0.10916948318481445 + }, + { + "epoch": 2.9327392578125e-05, + "grad_norm": 0.3978164494037628, + "learning_rate": 3.133782700250731e-05, + "loss": 0.0061, + "step": 19220 + }, + { + "epoch": 2.9327392578125e-05, + "model_forward_time": 0.025174856185913086, + "step": 19220 + }, + { + "epoch": 2.9327392578125e-05, + "step": 19220, + "training_step_time": 0.10708427429199219 + }, + { + "epoch": 2.932891845703125e-05, + "model_forward_time": 0.025544404983520508, + "step": 19221 + }, + { + "epoch": 2.932891845703125e-05, + "step": 19221, + "training_step_time": 0.11556410789489746 + }, + { + "epoch": 2.93304443359375e-05, + "model_forward_time": 0.0253140926361084, + "step": 19222 + }, + { + "epoch": 2.93304443359375e-05, + "step": 19222, + "training_step_time": 0.11324119567871094 + }, + { + "epoch": 2.933197021484375e-05, + "model_forward_time": 0.0253143310546875, + "step": 19223 + }, + { + "epoch": 2.933197021484375e-05, + "step": 19223, + "training_step_time": 0.1232759952545166 + }, + { + "epoch": 2.933349609375e-05, + "model_forward_time": 0.025258779525756836, + "step": 19224 + }, + { + "epoch": 2.933349609375e-05, + "step": 19224, + "training_step_time": 0.11106419563293457 + }, + { + "epoch": 2.933502197265625e-05, + "model_forward_time": 0.025220632553100586, + "step": 19225 + }, + { + "epoch": 2.933502197265625e-05, + "step": 19225, + "training_step_time": 0.16109538078308105 + }, + { + "epoch": 2.93365478515625e-05, + "model_forward_time": 0.025310039520263672, + "step": 19226 + }, + { + "epoch": 2.93365478515625e-05, + "step": 19226, + "training_step_time": 0.13904047012329102 + }, + { + "epoch": 2.933807373046875e-05, + "model_forward_time": 0.024864912033081055, + "step": 19227 + }, + { + "epoch": 2.933807373046875e-05, + "step": 19227, + "training_step_time": 0.1054375171661377 + }, + { + "epoch": 2.9339599609375e-05, + "model_forward_time": 0.025435209274291992, + "step": 19228 + }, + { + "epoch": 2.9339599609375e-05, + "step": 19228, + "training_step_time": 0.10371661186218262 + }, + { + "epoch": 2.934112548828125e-05, + "model_forward_time": 0.024962663650512695, + "step": 19229 + }, + { + "epoch": 2.934112548828125e-05, + "step": 19229, + "training_step_time": 0.11001133918762207 + }, + { + "epoch": 2.93426513671875e-05, + "grad_norm": 0.21541450917720795, + "learning_rate": 3.128670571009399e-05, + "loss": 0.0107, + "step": 19230 + }, + { + "epoch": 2.93426513671875e-05, + "model_forward_time": 0.025121688842773438, + "step": 19230 + }, + { + "epoch": 2.93426513671875e-05, + "step": 19230, + "training_step_time": 0.10422134399414062 + }, + { + "epoch": 2.934417724609375e-05, + "model_forward_time": 0.02557516098022461, + "step": 19231 + }, + { + "epoch": 2.934417724609375e-05, + "step": 19231, + "training_step_time": 0.10710906982421875 + }, + { + "epoch": 2.9345703125e-05, + "model_forward_time": 0.024446725845336914, + "step": 19232 + }, + { + "epoch": 2.9345703125e-05, + "step": 19232, + "training_step_time": 0.14668703079223633 + }, + { + "epoch": 2.934722900390625e-05, + "model_forward_time": 0.02451014518737793, + "step": 19233 + }, + { + "epoch": 2.934722900390625e-05, + "step": 19233, + "training_step_time": 0.17050552368164062 + }, + { + "epoch": 2.93487548828125e-05, + "model_forward_time": 0.02489471435546875, + "step": 19234 + }, + { + "epoch": 2.93487548828125e-05, + "step": 19234, + "training_step_time": 0.11841702461242676 + }, + { + "epoch": 2.935028076171875e-05, + "model_forward_time": 0.025107383728027344, + "step": 19235 + }, + { + "epoch": 2.935028076171875e-05, + "step": 19235, + "training_step_time": 0.13881921768188477 + }, + { + "epoch": 2.9351806640625e-05, + "model_forward_time": 0.025017738342285156, + "step": 19236 + }, + { + "epoch": 2.9351806640625e-05, + "step": 19236, + "training_step_time": 0.18053746223449707 + }, + { + "epoch": 2.935333251953125e-05, + "model_forward_time": 0.02479696273803711, + "step": 19237 + }, + { + "epoch": 2.935333251953125e-05, + "step": 19237, + "training_step_time": 0.14353537559509277 + }, + { + "epoch": 2.93548583984375e-05, + "model_forward_time": 0.023694276809692383, + "step": 19238 + }, + { + "epoch": 2.93548583984375e-05, + "step": 19238, + "training_step_time": 0.12405109405517578 + }, + { + "epoch": 2.935638427734375e-05, + "model_forward_time": 0.023906946182250977, + "step": 19239 + }, + { + "epoch": 2.935638427734375e-05, + "step": 19239, + "training_step_time": 0.18067145347595215 + }, + { + "epoch": 2.935791015625e-05, + "grad_norm": 0.18409797549247742, + "learning_rate": 3.123560715609777e-05, + "loss": 0.0088, + "step": 19240 + }, + { + "epoch": 2.935791015625e-05, + "model_forward_time": 0.024544477462768555, + "step": 19240 + }, + { + "epoch": 2.935791015625e-05, + "step": 19240, + "training_step_time": 0.11346936225891113 + }, + { + "epoch": 2.935943603515625e-05, + "model_forward_time": 0.02455306053161621, + "step": 19241 + }, + { + "epoch": 2.935943603515625e-05, + "step": 19241, + "training_step_time": 0.17413115501403809 + }, + { + "epoch": 2.93609619140625e-05, + "model_forward_time": 0.02481818199157715, + "step": 19242 + }, + { + "epoch": 2.93609619140625e-05, + "step": 19242, + "training_step_time": 0.17853260040283203 + }, + { + "epoch": 2.936248779296875e-05, + "model_forward_time": 0.024631738662719727, + "step": 19243 + }, + { + "epoch": 2.936248779296875e-05, + "step": 19243, + "training_step_time": 0.17427682876586914 + }, + { + "epoch": 2.9364013671875e-05, + "model_forward_time": 0.024524211883544922, + "step": 19244 + }, + { + "epoch": 2.9364013671875e-05, + "step": 19244, + "training_step_time": 0.15829873085021973 + }, + { + "epoch": 2.936553955078125e-05, + "model_forward_time": 0.024396181106567383, + "step": 19245 + }, + { + "epoch": 2.936553955078125e-05, + "step": 19245, + "training_step_time": 0.18975448608398438 + }, + { + "epoch": 2.93670654296875e-05, + "model_forward_time": 0.025730133056640625, + "step": 19246 + }, + { + "epoch": 2.93670654296875e-05, + "step": 19246, + "training_step_time": 0.1287071704864502 + }, + { + "epoch": 2.936859130859375e-05, + "model_forward_time": 0.024869203567504883, + "step": 19247 + }, + { + "epoch": 2.936859130859375e-05, + "step": 19247, + "training_step_time": 0.10538482666015625 + }, + { + "epoch": 2.93701171875e-05, + "model_forward_time": 0.025168657302856445, + "step": 19248 + }, + { + "epoch": 2.93701171875e-05, + "step": 19248, + "training_step_time": 0.19133901596069336 + }, + { + "epoch": 2.937164306640625e-05, + "model_forward_time": 0.02466106414794922, + "step": 19249 + }, + { + "epoch": 2.937164306640625e-05, + "step": 19249, + "training_step_time": 0.10312056541442871 + }, + { + "epoch": 2.93731689453125e-05, + "grad_norm": 0.24338629841804504, + "learning_rate": 3.118453140260823e-05, + "loss": 0.008, + "step": 19250 + }, + { + "epoch": 2.93731689453125e-05, + "model_forward_time": 0.024415016174316406, + "step": 19250 + }, + { + "epoch": 2.93731689453125e-05, + "step": 19250, + "training_step_time": 0.10437583923339844 + }, + { + "epoch": 2.937469482421875e-05, + "model_forward_time": 0.02513909339904785, + "step": 19251 + }, + { + "epoch": 2.937469482421875e-05, + "step": 19251, + "training_step_time": 0.10594749450683594 + }, + { + "epoch": 2.9376220703125e-05, + "model_forward_time": 0.027559757232666016, + "step": 19252 + }, + { + "epoch": 2.9376220703125e-05, + "step": 19252, + "training_step_time": 0.1090250015258789 + }, + { + "epoch": 2.937774658203125e-05, + "model_forward_time": 0.02554798126220703, + "step": 19253 + }, + { + "epoch": 2.937774658203125e-05, + "step": 19253, + "training_step_time": 0.11000180244445801 + }, + { + "epoch": 2.93792724609375e-05, + "model_forward_time": 0.024583101272583008, + "step": 19254 + }, + { + "epoch": 2.93792724609375e-05, + "step": 19254, + "training_step_time": 0.10407161712646484 + }, + { + "epoch": 2.938079833984375e-05, + "model_forward_time": 0.026499032974243164, + "step": 19255 + }, + { + "epoch": 2.938079833984375e-05, + "step": 19255, + "training_step_time": 0.10717511177062988 + }, + { + "epoch": 2.938232421875e-05, + "model_forward_time": 0.02484273910522461, + "step": 19256 + }, + { + "epoch": 2.938232421875e-05, + "step": 19256, + "training_step_time": 0.10487723350524902 + }, + { + "epoch": 2.938385009765625e-05, + "model_forward_time": 0.02489328384399414, + "step": 19257 + }, + { + "epoch": 2.938385009765625e-05, + "step": 19257, + "training_step_time": 0.10401272773742676 + }, + { + "epoch": 2.93853759765625e-05, + "model_forward_time": 0.02525162696838379, + "step": 19258 + }, + { + "epoch": 2.93853759765625e-05, + "step": 19258, + "training_step_time": 0.10507607460021973 + }, + { + "epoch": 2.938690185546875e-05, + "model_forward_time": 0.025394678115844727, + "step": 19259 + }, + { + "epoch": 2.938690185546875e-05, + "step": 19259, + "training_step_time": 0.10581135749816895 + }, + { + "epoch": 2.9388427734375e-05, + "grad_norm": 0.21936672925949097, + "learning_rate": 3.113347851168721e-05, + "loss": 0.0064, + "step": 19260 + }, + { + "epoch": 2.9388427734375e-05, + "model_forward_time": 0.02534008026123047, + "step": 19260 + }, + { + "epoch": 2.9388427734375e-05, + "step": 19260, + "training_step_time": 0.10618782043457031 + }, + { + "epoch": 2.938995361328125e-05, + "model_forward_time": 0.025007009506225586, + "step": 19261 + }, + { + "epoch": 2.938995361328125e-05, + "step": 19261, + "training_step_time": 0.10863447189331055 + }, + { + "epoch": 2.93914794921875e-05, + "model_forward_time": 0.025121688842773438, + "step": 19262 + }, + { + "epoch": 2.93914794921875e-05, + "step": 19262, + "training_step_time": 0.10734128952026367 + }, + { + "epoch": 2.939300537109375e-05, + "model_forward_time": 0.02500629425048828, + "step": 19263 + }, + { + "epoch": 2.939300537109375e-05, + "step": 19263, + "training_step_time": 0.104827880859375 + }, + { + "epoch": 2.939453125e-05, + "model_forward_time": 0.025671005249023438, + "step": 19264 + }, + { + "epoch": 2.939453125e-05, + "step": 19264, + "training_step_time": 0.22210192680358887 + }, + { + "epoch": 2.939605712890625e-05, + "model_forward_time": 0.024561643600463867, + "step": 19265 + }, + { + "epoch": 2.939605712890625e-05, + "step": 19265, + "training_step_time": 0.11351919174194336 + }, + { + "epoch": 2.93975830078125e-05, + "model_forward_time": 0.024650096893310547, + "step": 19266 + }, + { + "epoch": 2.93975830078125e-05, + "step": 19266, + "training_step_time": 0.19347000122070312 + }, + { + "epoch": 2.939910888671875e-05, + "model_forward_time": 0.02416682243347168, + "step": 19267 + }, + { + "epoch": 2.939910888671875e-05, + "step": 19267, + "training_step_time": 0.16080927848815918 + }, + { + "epoch": 2.9400634765625e-05, + "model_forward_time": 0.025736570358276367, + "step": 19268 + }, + { + "epoch": 2.9400634765625e-05, + "step": 19268, + "training_step_time": 0.13562917709350586 + }, + { + "epoch": 2.940216064453125e-05, + "model_forward_time": 0.024166584014892578, + "step": 19269 + }, + { + "epoch": 2.940216064453125e-05, + "step": 19269, + "training_step_time": 0.10534954071044922 + }, + { + "epoch": 2.94036865234375e-05, + "grad_norm": 0.1270855814218521, + "learning_rate": 3.1082448545368814e-05, + "loss": 0.0053, + "step": 19270 + }, + { + "epoch": 2.94036865234375e-05, + "model_forward_time": 0.025366783142089844, + "step": 19270 + }, + { + "epoch": 2.94036865234375e-05, + "step": 19270, + "training_step_time": 0.10294461250305176 + }, + { + "epoch": 2.940521240234375e-05, + "model_forward_time": 0.025060415267944336, + "step": 19271 + }, + { + "epoch": 2.940521240234375e-05, + "step": 19271, + "training_step_time": 0.10331988334655762 + }, + { + "epoch": 2.940673828125e-05, + "model_forward_time": 0.025148391723632812, + "step": 19272 + }, + { + "epoch": 2.940673828125e-05, + "step": 19272, + "training_step_time": 0.10428619384765625 + }, + { + "epoch": 2.940826416015625e-05, + "model_forward_time": 0.025613069534301758, + "step": 19273 + }, + { + "epoch": 2.940826416015625e-05, + "step": 19273, + "training_step_time": 0.10636520385742188 + }, + { + "epoch": 2.94097900390625e-05, + "model_forward_time": 0.025888919830322266, + "step": 19274 + }, + { + "epoch": 2.94097900390625e-05, + "step": 19274, + "training_step_time": 0.10785150527954102 + }, + { + "epoch": 2.941131591796875e-05, + "model_forward_time": 0.024164199829101562, + "step": 19275 + }, + { + "epoch": 2.941131591796875e-05, + "step": 19275, + "training_step_time": 0.19426488876342773 + }, + { + "epoch": 2.9412841796875e-05, + "model_forward_time": 0.025041818618774414, + "step": 19276 + }, + { + "epoch": 2.9412841796875e-05, + "step": 19276, + "training_step_time": 0.21092438697814941 + }, + { + "epoch": 2.941436767578125e-05, + "model_forward_time": 0.02442622184753418, + "step": 19277 + }, + { + "epoch": 2.941436767578125e-05, + "step": 19277, + "training_step_time": 0.22937846183776855 + }, + { + "epoch": 2.94158935546875e-05, + "model_forward_time": 0.024703502655029297, + "step": 19278 + }, + { + "epoch": 2.94158935546875e-05, + "step": 19278, + "training_step_time": 0.21656465530395508 + }, + { + "epoch": 2.941741943359375e-05, + "model_forward_time": 0.024412155151367188, + "step": 19279 + }, + { + "epoch": 2.941741943359375e-05, + "step": 19279, + "training_step_time": 0.20065855979919434 + }, + { + "epoch": 2.94189453125e-05, + "grad_norm": 0.15332883596420288, + "learning_rate": 3.1031441565659235e-05, + "loss": 0.0094, + "step": 19280 + }, + { + "epoch": 2.94189453125e-05, + "model_forward_time": 0.028171300888061523, + "step": 19280 + }, + { + "epoch": 2.94189453125e-05, + "step": 19280, + "training_step_time": 0.22831201553344727 + }, + { + "epoch": 2.942047119140625e-05, + "model_forward_time": 0.02398991584777832, + "step": 19281 + }, + { + "epoch": 2.942047119140625e-05, + "step": 19281, + "training_step_time": 0.2129652500152588 + }, + { + "epoch": 2.94219970703125e-05, + "model_forward_time": 0.02465367317199707, + "step": 19282 + }, + { + "epoch": 2.94219970703125e-05, + "step": 19282, + "training_step_time": 0.1670525074005127 + }, + { + "epoch": 2.942352294921875e-05, + "model_forward_time": 0.024639606475830078, + "step": 19283 + }, + { + "epoch": 2.942352294921875e-05, + "step": 19283, + "training_step_time": 0.1670832633972168 + }, + { + "epoch": 2.9425048828125e-05, + "model_forward_time": 0.024389266967773438, + "step": 19284 + }, + { + "epoch": 2.9425048828125e-05, + "step": 19284, + "training_step_time": 0.20344996452331543 + }, + { + "epoch": 2.942657470703125e-05, + "model_forward_time": 0.024549484252929688, + "step": 19285 + }, + { + "epoch": 2.942657470703125e-05, + "step": 19285, + "training_step_time": 0.10524249076843262 + }, + { + "epoch": 2.94281005859375e-05, + "model_forward_time": 0.023940563201904297, + "step": 19286 + }, + { + "epoch": 2.94281005859375e-05, + "step": 19286, + "training_step_time": 0.1128244400024414 + }, + { + "epoch": 2.942962646484375e-05, + "model_forward_time": 0.0252225399017334, + "step": 19287 + }, + { + "epoch": 2.942962646484375e-05, + "step": 19287, + "training_step_time": 0.11997103691101074 + }, + { + "epoch": 2.943115234375e-05, + "model_forward_time": 0.025533437728881836, + "step": 19288 + }, + { + "epoch": 2.943115234375e-05, + "step": 19288, + "training_step_time": 0.1034090518951416 + }, + { + "epoch": 2.943267822265625e-05, + "model_forward_time": 0.025382041931152344, + "step": 19289 + }, + { + "epoch": 2.943267822265625e-05, + "step": 19289, + "training_step_time": 0.19354677200317383 + }, + { + "epoch": 2.94342041015625e-05, + "grad_norm": 0.4299980103969574, + "learning_rate": 3.098045763453678e-05, + "loss": 0.0137, + "step": 19290 + }, + { + "epoch": 2.94342041015625e-05, + "model_forward_time": 0.024603843688964844, + "step": 19290 + }, + { + "epoch": 2.94342041015625e-05, + "step": 19290, + "training_step_time": 0.10291409492492676 + }, + { + "epoch": 2.943572998046875e-05, + "model_forward_time": 0.02489471435546875, + "step": 19291 + }, + { + "epoch": 2.943572998046875e-05, + "step": 19291, + "training_step_time": 0.10221529006958008 + }, + { + "epoch": 2.9437255859375e-05, + "model_forward_time": 0.025199174880981445, + "step": 19292 + }, + { + "epoch": 2.9437255859375e-05, + "step": 19292, + "training_step_time": 0.11051440238952637 + }, + { + "epoch": 2.943878173828125e-05, + "model_forward_time": 0.0252225399017334, + "step": 19293 + }, + { + "epoch": 2.943878173828125e-05, + "step": 19293, + "training_step_time": 0.10925531387329102 + }, + { + "epoch": 2.94403076171875e-05, + "model_forward_time": 0.02550983428955078, + "step": 19294 + }, + { + "epoch": 2.94403076171875e-05, + "step": 19294, + "training_step_time": 0.10435199737548828 + }, + { + "epoch": 2.944183349609375e-05, + "model_forward_time": 0.025310993194580078, + "step": 19295 + }, + { + "epoch": 2.944183349609375e-05, + "step": 19295, + "training_step_time": 0.11001157760620117 + }, + { + "epoch": 2.9443359375e-05, + "model_forward_time": 0.025522232055664062, + "step": 19296 + }, + { + "epoch": 2.9443359375e-05, + "step": 19296, + "training_step_time": 0.10387015342712402 + }, + { + "epoch": 2.944488525390625e-05, + "model_forward_time": 0.0252993106842041, + "step": 19297 + }, + { + "epoch": 2.944488525390625e-05, + "step": 19297, + "training_step_time": 0.10490798950195312 + }, + { + "epoch": 2.94464111328125e-05, + "model_forward_time": 0.02535867691040039, + "step": 19298 + }, + { + "epoch": 2.94464111328125e-05, + "step": 19298, + "training_step_time": 0.10591840744018555 + }, + { + "epoch": 2.944793701171875e-05, + "model_forward_time": 0.025668621063232422, + "step": 19299 + }, + { + "epoch": 2.944793701171875e-05, + "step": 19299, + "training_step_time": 0.10705065727233887 + }, + { + "epoch": 2.9449462890625e-05, + "grad_norm": 0.22110362350940704, + "learning_rate": 3.0929496813951694e-05, + "loss": 0.0096, + "step": 19300 + }, + { + "epoch": 2.9449462890625e-05, + "model_forward_time": 0.025661706924438477, + "step": 19300 + }, + { + "epoch": 2.9449462890625e-05, + "step": 19300, + "training_step_time": 0.10493612289428711 + }, + { + "epoch": 2.945098876953125e-05, + "model_forward_time": 0.025415658950805664, + "step": 19301 + }, + { + "epoch": 2.945098876953125e-05, + "step": 19301, + "training_step_time": 0.1504533290863037 + }, + { + "epoch": 2.94525146484375e-05, + "model_forward_time": 0.025029897689819336, + "step": 19302 + }, + { + "epoch": 2.94525146484375e-05, + "step": 19302, + "training_step_time": 0.18638205528259277 + }, + { + "epoch": 2.945404052734375e-05, + "model_forward_time": 0.024095773696899414, + "step": 19303 + }, + { + "epoch": 2.945404052734375e-05, + "step": 19303, + "training_step_time": 0.1727466583251953 + }, + { + "epoch": 2.945556640625e-05, + "model_forward_time": 0.024483680725097656, + "step": 19304 + }, + { + "epoch": 2.945556640625e-05, + "step": 19304, + "training_step_time": 0.19438791275024414 + }, + { + "epoch": 2.945709228515625e-05, + "model_forward_time": 0.024800539016723633, + "step": 19305 + }, + { + "epoch": 2.945709228515625e-05, + "step": 19305, + "training_step_time": 0.21100258827209473 + }, + { + "epoch": 2.94586181640625e-05, + "model_forward_time": 0.024143695831298828, + "step": 19306 + }, + { + "epoch": 2.94586181640625e-05, + "step": 19306, + "training_step_time": 0.13538241386413574 + }, + { + "epoch": 2.946014404296875e-05, + "model_forward_time": 0.023680925369262695, + "step": 19307 + }, + { + "epoch": 2.946014404296875e-05, + "step": 19307, + "training_step_time": 0.20956993103027344 + }, + { + "epoch": 2.9461669921875e-05, + "model_forward_time": 0.024394989013671875, + "step": 19308 + }, + { + "epoch": 2.9461669921875e-05, + "step": 19308, + "training_step_time": 0.12218403816223145 + }, + { + "epoch": 2.946319580078125e-05, + "model_forward_time": 0.024245023727416992, + "step": 19309 + }, + { + "epoch": 2.946319580078125e-05, + "step": 19309, + "training_step_time": 0.11400556564331055 + }, + { + "epoch": 2.94647216796875e-05, + "grad_norm": 0.4304065406322479, + "learning_rate": 3.0878559165826236e-05, + "loss": 0.0074, + "step": 19310 + }, + { + "epoch": 2.94647216796875e-05, + "model_forward_time": 0.025407075881958008, + "step": 19310 + }, + { + "epoch": 2.94647216796875e-05, + "step": 19310, + "training_step_time": 0.11446046829223633 + }, + { + "epoch": 2.946624755859375e-05, + "model_forward_time": 0.024799346923828125, + "step": 19311 + }, + { + "epoch": 2.946624755859375e-05, + "step": 19311, + "training_step_time": 0.11042380332946777 + }, + { + "epoch": 2.94677734375e-05, + "model_forward_time": 0.025327444076538086, + "step": 19312 + }, + { + "epoch": 2.94677734375e-05, + "step": 19312, + "training_step_time": 0.10468006134033203 + }, + { + "epoch": 2.946929931640625e-05, + "model_forward_time": 0.02501654624938965, + "step": 19313 + }, + { + "epoch": 2.946929931640625e-05, + "step": 19313, + "training_step_time": 0.14600300788879395 + }, + { + "epoch": 2.94708251953125e-05, + "model_forward_time": 0.024906635284423828, + "step": 19314 + }, + { + "epoch": 2.94708251953125e-05, + "step": 19314, + "training_step_time": 0.1129465103149414 + }, + { + "epoch": 2.947235107421875e-05, + "model_forward_time": 0.024831533432006836, + "step": 19315 + }, + { + "epoch": 2.947235107421875e-05, + "step": 19315, + "training_step_time": 0.11356425285339355 + }, + { + "epoch": 2.9473876953125e-05, + "model_forward_time": 0.025443315505981445, + "step": 19316 + }, + { + "epoch": 2.9473876953125e-05, + "step": 19316, + "training_step_time": 0.11955547332763672 + }, + { + "epoch": 2.947540283203125e-05, + "model_forward_time": 0.025089263916015625, + "step": 19317 + }, + { + "epoch": 2.947540283203125e-05, + "step": 19317, + "training_step_time": 0.12111520767211914 + }, + { + "epoch": 2.94769287109375e-05, + "model_forward_time": 0.02630305290222168, + "step": 19318 + }, + { + "epoch": 2.94769287109375e-05, + "step": 19318, + "training_step_time": 0.12194705009460449 + }, + { + "epoch": 2.947845458984375e-05, + "model_forward_time": 0.02557659149169922, + "step": 19319 + }, + { + "epoch": 2.947845458984375e-05, + "step": 19319, + "training_step_time": 0.1193840503692627 + }, + { + "epoch": 2.947998046875e-05, + "grad_norm": 0.2681468427181244, + "learning_rate": 3.082764475205442e-05, + "loss": 0.0152, + "step": 19320 + }, + { + "epoch": 2.947998046875e-05, + "model_forward_time": 0.02525162696838379, + "step": 19320 + }, + { + "epoch": 2.947998046875e-05, + "step": 19320, + "training_step_time": 0.11241459846496582 + }, + { + "epoch": 2.948150634765625e-05, + "model_forward_time": 0.025516271591186523, + "step": 19321 + }, + { + "epoch": 2.948150634765625e-05, + "step": 19321, + "training_step_time": 0.1083076000213623 + }, + { + "epoch": 2.94830322265625e-05, + "model_forward_time": 0.025372982025146484, + "step": 19322 + }, + { + "epoch": 2.94830322265625e-05, + "step": 19322, + "training_step_time": 0.16967296600341797 + }, + { + "epoch": 2.948455810546875e-05, + "model_forward_time": 0.024569034576416016, + "step": 19323 + }, + { + "epoch": 2.948455810546875e-05, + "step": 19323, + "training_step_time": 0.10860252380371094 + }, + { + "epoch": 2.9486083984375e-05, + "model_forward_time": 0.02464604377746582, + "step": 19324 + }, + { + "epoch": 2.9486083984375e-05, + "step": 19324, + "training_step_time": 0.10744929313659668 + }, + { + "epoch": 2.948760986328125e-05, + "model_forward_time": 0.02521657943725586, + "step": 19325 + }, + { + "epoch": 2.948760986328125e-05, + "step": 19325, + "training_step_time": 0.13216662406921387 + }, + { + "epoch": 2.94891357421875e-05, + "model_forward_time": 0.0252835750579834, + "step": 19326 + }, + { + "epoch": 2.94891357421875e-05, + "step": 19326, + "training_step_time": 0.12733864784240723 + }, + { + "epoch": 2.949066162109375e-05, + "model_forward_time": 0.024904966354370117, + "step": 19327 + }, + { + "epoch": 2.949066162109375e-05, + "step": 19327, + "training_step_time": 0.13619756698608398 + }, + { + "epoch": 2.94921875e-05, + "model_forward_time": 0.02569127082824707, + "step": 19328 + }, + { + "epoch": 2.94921875e-05, + "step": 19328, + "training_step_time": 0.13528990745544434 + }, + { + "epoch": 2.949371337890625e-05, + "model_forward_time": 0.02448296546936035, + "step": 19329 + }, + { + "epoch": 2.949371337890625e-05, + "step": 19329, + "training_step_time": 0.10435247421264648 + }, + { + "epoch": 2.94952392578125e-05, + "grad_norm": 0.3525830805301666, + "learning_rate": 3.077675363450207e-05, + "loss": 0.0211, + "step": 19330 + }, + { + "epoch": 2.94952392578125e-05, + "model_forward_time": 0.025161266326904297, + "step": 19330 + }, + { + "epoch": 2.94952392578125e-05, + "step": 19330, + "training_step_time": 0.10590624809265137 + }, + { + "epoch": 2.949676513671875e-05, + "model_forward_time": 0.025621891021728516, + "step": 19331 + }, + { + "epoch": 2.949676513671875e-05, + "step": 19331, + "training_step_time": 0.11840987205505371 + }, + { + "epoch": 2.9498291015625e-05, + "model_forward_time": 0.02527165412902832, + "step": 19332 + }, + { + "epoch": 2.9498291015625e-05, + "step": 19332, + "training_step_time": 0.11003470420837402 + }, + { + "epoch": 2.949981689453125e-05, + "model_forward_time": 0.026260852813720703, + "step": 19333 + }, + { + "epoch": 2.949981689453125e-05, + "step": 19333, + "training_step_time": 0.1905043125152588 + }, + { + "epoch": 2.95013427734375e-05, + "model_forward_time": 0.02552008628845215, + "step": 19334 + }, + { + "epoch": 2.95013427734375e-05, + "step": 19334, + "training_step_time": 0.10716557502746582 + }, + { + "epoch": 2.950286865234375e-05, + "model_forward_time": 0.02435016632080078, + "step": 19335 + }, + { + "epoch": 2.950286865234375e-05, + "step": 19335, + "training_step_time": 0.10715746879577637 + }, + { + "epoch": 2.950439453125e-05, + "model_forward_time": 0.025578022003173828, + "step": 19336 + }, + { + "epoch": 2.950439453125e-05, + "step": 19336, + "training_step_time": 0.11125636100769043 + }, + { + "epoch": 2.950592041015625e-05, + "model_forward_time": 0.025347232818603516, + "step": 19337 + }, + { + "epoch": 2.950592041015625e-05, + "step": 19337, + "training_step_time": 0.10583090782165527 + }, + { + "epoch": 2.95074462890625e-05, + "model_forward_time": 0.025775432586669922, + "step": 19338 + }, + { + "epoch": 2.95074462890625e-05, + "step": 19338, + "training_step_time": 0.11068367958068848 + }, + { + "epoch": 2.950897216796875e-05, + "model_forward_time": 0.025562286376953125, + "step": 19339 + }, + { + "epoch": 2.950897216796875e-05, + "step": 19339, + "training_step_time": 0.11296343803405762 + }, + { + "epoch": 2.9510498046875e-05, + "grad_norm": 0.36293911933898926, + "learning_rate": 3.072588587500669e-05, + "loss": 0.026, + "step": 19340 + }, + { + "epoch": 2.9510498046875e-05, + "model_forward_time": 0.024964094161987305, + "step": 19340 + }, + { + "epoch": 2.9510498046875e-05, + "step": 19340, + "training_step_time": 0.11462855339050293 + }, + { + "epoch": 2.951202392578125e-05, + "model_forward_time": 0.02418375015258789, + "step": 19341 + }, + { + "epoch": 2.951202392578125e-05, + "step": 19341, + "training_step_time": 0.11475729942321777 + }, + { + "epoch": 2.95135498046875e-05, + "model_forward_time": 0.025511980056762695, + "step": 19342 + }, + { + "epoch": 2.95135498046875e-05, + "step": 19342, + "training_step_time": 0.11611342430114746 + }, + { + "epoch": 2.951507568359375e-05, + "model_forward_time": 0.02539515495300293, + "step": 19343 + }, + { + "epoch": 2.951507568359375e-05, + "step": 19343, + "training_step_time": 0.11210036277770996 + }, + { + "epoch": 2.95166015625e-05, + "model_forward_time": 0.025484800338745117, + "step": 19344 + }, + { + "epoch": 2.95166015625e-05, + "step": 19344, + "training_step_time": 0.10908985137939453 + }, + { + "epoch": 2.951812744140625e-05, + "model_forward_time": 0.025290727615356445, + "step": 19345 + }, + { + "epoch": 2.951812744140625e-05, + "step": 19345, + "training_step_time": 0.10505819320678711 + }, + { + "epoch": 2.95196533203125e-05, + "model_forward_time": 0.025363445281982422, + "step": 19346 + }, + { + "epoch": 2.95196533203125e-05, + "step": 19346, + "training_step_time": 0.10651993751525879 + }, + { + "epoch": 2.952117919921875e-05, + "model_forward_time": 0.025542259216308594, + "step": 19347 + }, + { + "epoch": 2.952117919921875e-05, + "step": 19347, + "training_step_time": 0.1069936752319336 + }, + { + "epoch": 2.9522705078125e-05, + "model_forward_time": 0.02524089813232422, + "step": 19348 + }, + { + "epoch": 2.9522705078125e-05, + "step": 19348, + "training_step_time": 0.10680007934570312 + }, + { + "epoch": 2.952423095703125e-05, + "model_forward_time": 0.025068044662475586, + "step": 19349 + }, + { + "epoch": 2.952423095703125e-05, + "step": 19349, + "training_step_time": 0.10931158065795898 + }, + { + "epoch": 2.95257568359375e-05, + "grad_norm": 0.17749305069446564, + "learning_rate": 3.0675041535377405e-05, + "loss": 0.0135, + "step": 19350 + }, + { + "epoch": 2.95257568359375e-05, + "model_forward_time": 0.02513885498046875, + "step": 19350 + }, + { + "epoch": 2.95257568359375e-05, + "step": 19350, + "training_step_time": 0.10729122161865234 + }, + { + "epoch": 2.952728271484375e-05, + "model_forward_time": 0.025162935256958008, + "step": 19351 + }, + { + "epoch": 2.952728271484375e-05, + "step": 19351, + "training_step_time": 0.2151949405670166 + }, + { + "epoch": 2.952880859375e-05, + "model_forward_time": 0.025155305862426758, + "step": 19352 + }, + { + "epoch": 2.952880859375e-05, + "step": 19352, + "training_step_time": 0.10356616973876953 + }, + { + "epoch": 2.953033447265625e-05, + "model_forward_time": 0.02480030059814453, + "step": 19353 + }, + { + "epoch": 2.953033447265625e-05, + "step": 19353, + "training_step_time": 0.12329268455505371 + }, + { + "epoch": 2.95318603515625e-05, + "model_forward_time": 0.025142431259155273, + "step": 19354 + }, + { + "epoch": 2.95318603515625e-05, + "step": 19354, + "training_step_time": 0.19245362281799316 + }, + { + "epoch": 2.953338623046875e-05, + "model_forward_time": 0.0240933895111084, + "step": 19355 + }, + { + "epoch": 2.953338623046875e-05, + "step": 19355, + "training_step_time": 0.10188412666320801 + }, + { + "epoch": 2.9534912109375e-05, + "model_forward_time": 0.024489164352416992, + "step": 19356 + }, + { + "epoch": 2.9534912109375e-05, + "step": 19356, + "training_step_time": 0.10262680053710938 + }, + { + "epoch": 2.953643798828125e-05, + "model_forward_time": 0.024477720260620117, + "step": 19357 + }, + { + "epoch": 2.953643798828125e-05, + "step": 19357, + "training_step_time": 0.10724163055419922 + }, + { + "epoch": 2.95379638671875e-05, + "model_forward_time": 0.02522134780883789, + "step": 19358 + }, + { + "epoch": 2.95379638671875e-05, + "step": 19358, + "training_step_time": 0.10643744468688965 + }, + { + "epoch": 2.953948974609375e-05, + "model_forward_time": 0.02412104606628418, + "step": 19359 + }, + { + "epoch": 2.953948974609375e-05, + "step": 19359, + "training_step_time": 0.10055661201477051 + }, + { + "epoch": 2.9541015625e-05, + "grad_norm": 0.21281743049621582, + "learning_rate": 3.062422067739485e-05, + "loss": 0.0047, + "step": 19360 + }, + { + "epoch": 2.9541015625e-05, + "model_forward_time": 0.024124860763549805, + "step": 19360 + }, + { + "epoch": 2.9541015625e-05, + "step": 19360, + "training_step_time": 0.14296603202819824 + }, + { + "epoch": 2.954254150390625e-05, + "model_forward_time": 0.02445220947265625, + "step": 19361 + }, + { + "epoch": 2.954254150390625e-05, + "step": 19361, + "training_step_time": 0.10367655754089355 + }, + { + "epoch": 2.95440673828125e-05, + "model_forward_time": 0.024925947189331055, + "step": 19362 + }, + { + "epoch": 2.95440673828125e-05, + "step": 19362, + "training_step_time": 0.19614362716674805 + }, + { + "epoch": 2.954559326171875e-05, + "model_forward_time": 0.024068117141723633, + "step": 19363 + }, + { + "epoch": 2.954559326171875e-05, + "step": 19363, + "training_step_time": 0.1382288932800293 + }, + { + "epoch": 2.9547119140625e-05, + "model_forward_time": 0.024335622787475586, + "step": 19364 + }, + { + "epoch": 2.9547119140625e-05, + "step": 19364, + "training_step_time": 0.11353278160095215 + }, + { + "epoch": 2.954864501953125e-05, + "model_forward_time": 0.026988744735717773, + "step": 19365 + }, + { + "epoch": 2.954864501953125e-05, + "step": 19365, + "training_step_time": 0.10896944999694824 + }, + { + "epoch": 2.95501708984375e-05, + "model_forward_time": 0.025338411331176758, + "step": 19366 + }, + { + "epoch": 2.95501708984375e-05, + "step": 19366, + "training_step_time": 0.1040339469909668 + }, + { + "epoch": 2.955169677734375e-05, + "model_forward_time": 0.025185346603393555, + "step": 19367 + }, + { + "epoch": 2.955169677734375e-05, + "step": 19367, + "training_step_time": 0.1069643497467041 + }, + { + "epoch": 2.955322265625e-05, + "model_forward_time": 0.025547266006469727, + "step": 19368 + }, + { + "epoch": 2.955322265625e-05, + "step": 19368, + "training_step_time": 0.10523128509521484 + }, + { + "epoch": 2.955474853515625e-05, + "model_forward_time": 0.02496027946472168, + "step": 19369 + }, + { + "epoch": 2.955474853515625e-05, + "step": 19369, + "training_step_time": 0.11645388603210449 + }, + { + "epoch": 2.95562744140625e-05, + "grad_norm": 0.25978705286979675, + "learning_rate": 3.057342336281122e-05, + "loss": 0.0075, + "step": 19370 + }, + { + "epoch": 2.95562744140625e-05, + "model_forward_time": 0.025130510330200195, + "step": 19370 + }, + { + "epoch": 2.95562744140625e-05, + "step": 19370, + "training_step_time": 0.1526503562927246 + }, + { + "epoch": 2.955780029296875e-05, + "model_forward_time": 0.024779319763183594, + "step": 19371 + }, + { + "epoch": 2.955780029296875e-05, + "step": 19371, + "training_step_time": 0.1971452236175537 + }, + { + "epoch": 2.9559326171875e-05, + "model_forward_time": 0.025882720947265625, + "step": 19372 + }, + { + "epoch": 2.9559326171875e-05, + "step": 19372, + "training_step_time": 0.14332938194274902 + }, + { + "epoch": 2.956085205078125e-05, + "model_forward_time": 0.024105310440063477, + "step": 19373 + }, + { + "epoch": 2.956085205078125e-05, + "step": 19373, + "training_step_time": 0.19579315185546875 + }, + { + "epoch": 2.95623779296875e-05, + "model_forward_time": 0.024492263793945312, + "step": 19374 + }, + { + "epoch": 2.95623779296875e-05, + "step": 19374, + "training_step_time": 0.12819886207580566 + }, + { + "epoch": 2.956390380859375e-05, + "model_forward_time": 0.02418375015258789, + "step": 19375 + }, + { + "epoch": 2.956390380859375e-05, + "step": 19375, + "training_step_time": 0.16190361976623535 + }, + { + "epoch": 2.95654296875e-05, + "model_forward_time": 0.024300575256347656, + "step": 19376 + }, + { + "epoch": 2.95654296875e-05, + "step": 19376, + "training_step_time": 0.1443929672241211 + }, + { + "epoch": 2.956695556640625e-05, + "model_forward_time": 0.02418231964111328, + "step": 19377 + }, + { + "epoch": 2.956695556640625e-05, + "step": 19377, + "training_step_time": 0.10849189758300781 + }, + { + "epoch": 2.95684814453125e-05, + "model_forward_time": 0.02497243881225586, + "step": 19378 + }, + { + "epoch": 2.95684814453125e-05, + "step": 19378, + "training_step_time": 0.10912656784057617 + }, + { + "epoch": 2.957000732421875e-05, + "model_forward_time": 0.028501510620117188, + "step": 19379 + }, + { + "epoch": 2.957000732421875e-05, + "step": 19379, + "training_step_time": 0.1096353530883789 + }, + { + "epoch": 2.9571533203125e-05, + "grad_norm": 0.16195520758628845, + "learning_rate": 3.052264965335e-05, + "loss": 0.0167, + "step": 19380 + }, + { + "epoch": 2.9571533203125e-05, + "model_forward_time": 0.025025367736816406, + "step": 19380 + }, + { + "epoch": 2.9571533203125e-05, + "step": 19380, + "training_step_time": 0.10773277282714844 + }, + { + "epoch": 2.957305908203125e-05, + "model_forward_time": 0.025027751922607422, + "step": 19381 + }, + { + "epoch": 2.957305908203125e-05, + "step": 19381, + "training_step_time": 0.10712695121765137 + }, + { + "epoch": 2.95745849609375e-05, + "model_forward_time": 0.025179386138916016, + "step": 19382 + }, + { + "epoch": 2.95745849609375e-05, + "step": 19382, + "training_step_time": 0.10597777366638184 + }, + { + "epoch": 2.957611083984375e-05, + "model_forward_time": 0.02498602867126465, + "step": 19383 + }, + { + "epoch": 2.957611083984375e-05, + "step": 19383, + "training_step_time": 0.10631966590881348 + }, + { + "epoch": 2.957763671875e-05, + "model_forward_time": 0.026386022567749023, + "step": 19384 + }, + { + "epoch": 2.957763671875e-05, + "step": 19384, + "training_step_time": 0.1076200008392334 + }, + { + "epoch": 2.957916259765625e-05, + "model_forward_time": 0.025328397750854492, + "step": 19385 + }, + { + "epoch": 2.957916259765625e-05, + "step": 19385, + "training_step_time": 0.10784387588500977 + }, + { + "epoch": 2.95806884765625e-05, + "model_forward_time": 0.025004148483276367, + "step": 19386 + }, + { + "epoch": 2.95806884765625e-05, + "step": 19386, + "training_step_time": 0.10548710823059082 + }, + { + "epoch": 2.958221435546875e-05, + "model_forward_time": 0.025218963623046875, + "step": 19387 + }, + { + "epoch": 2.958221435546875e-05, + "step": 19387, + "training_step_time": 0.10984134674072266 + }, + { + "epoch": 2.9583740234375e-05, + "model_forward_time": 0.02541637420654297, + "step": 19388 + }, + { + "epoch": 2.9583740234375e-05, + "step": 19388, + "training_step_time": 0.11308121681213379 + }, + { + "epoch": 2.958526611328125e-05, + "model_forward_time": 0.02499866485595703, + "step": 19389 + }, + { + "epoch": 2.958526611328125e-05, + "step": 19389, + "training_step_time": 0.10491418838500977 + }, + { + "epoch": 2.95867919921875e-05, + "grad_norm": 0.1905003786087036, + "learning_rate": 3.0471899610706038e-05, + "loss": 0.0099, + "step": 19390 + }, + { + "epoch": 2.95867919921875e-05, + "model_forward_time": 0.024810314178466797, + "step": 19390 + }, + { + "epoch": 2.95867919921875e-05, + "step": 19390, + "training_step_time": 0.10346698760986328 + }, + { + "epoch": 2.958831787109375e-05, + "model_forward_time": 0.024114608764648438, + "step": 19391 + }, + { + "epoch": 2.958831787109375e-05, + "step": 19391, + "training_step_time": 0.10409021377563477 + }, + { + "epoch": 2.958984375e-05, + "model_forward_time": 0.024738550186157227, + "step": 19392 + }, + { + "epoch": 2.958984375e-05, + "step": 19392, + "training_step_time": 0.10511088371276855 + }, + { + "epoch": 2.959136962890625e-05, + "model_forward_time": 0.02518153190612793, + "step": 19393 + }, + { + "epoch": 2.959136962890625e-05, + "step": 19393, + "training_step_time": 0.10576367378234863 + }, + { + "epoch": 2.95928955078125e-05, + "model_forward_time": 0.02478194236755371, + "step": 19394 + }, + { + "epoch": 2.95928955078125e-05, + "step": 19394, + "training_step_time": 0.1092371940612793 + }, + { + "epoch": 2.959442138671875e-05, + "model_forward_time": 0.024996280670166016, + "step": 19395 + }, + { + "epoch": 2.959442138671875e-05, + "step": 19395, + "training_step_time": 0.10967040061950684 + }, + { + "epoch": 2.9595947265625e-05, + "model_forward_time": 0.02518177032470703, + "step": 19396 + }, + { + "epoch": 2.9595947265625e-05, + "step": 19396, + "training_step_time": 0.11954307556152344 + }, + { + "epoch": 2.959747314453125e-05, + "model_forward_time": 0.024852514266967773, + "step": 19397 + }, + { + "epoch": 2.959747314453125e-05, + "step": 19397, + "training_step_time": 0.10904169082641602 + }, + { + "epoch": 2.95989990234375e-05, + "model_forward_time": 0.025056838989257812, + "step": 19398 + }, + { + "epoch": 2.95989990234375e-05, + "step": 19398, + "training_step_time": 0.17362737655639648 + }, + { + "epoch": 2.960052490234375e-05, + "model_forward_time": 0.024135112762451172, + "step": 19399 + }, + { + "epoch": 2.960052490234375e-05, + "step": 19399, + "training_step_time": 0.13387346267700195 + }, + { + "epoch": 2.960205078125e-05, + "grad_norm": 0.25740137696266174, + "learning_rate": 3.042117329654544e-05, + "loss": 0.0078, + "step": 19400 + }, + { + "epoch": 2.960205078125e-05, + "model_forward_time": 0.024519920349121094, + "step": 19400 + }, + { + "epoch": 2.960205078125e-05, + "step": 19400, + "training_step_time": 0.11896014213562012 + }, + { + "epoch": 2.960357666015625e-05, + "model_forward_time": 0.024959802627563477, + "step": 19401 + }, + { + "epoch": 2.960357666015625e-05, + "step": 19401, + "training_step_time": 0.10890579223632812 + }, + { + "epoch": 2.96051025390625e-05, + "model_forward_time": 0.02514481544494629, + "step": 19402 + }, + { + "epoch": 2.96051025390625e-05, + "step": 19402, + "training_step_time": 0.10313129425048828 + }, + { + "epoch": 2.960662841796875e-05, + "model_forward_time": 0.025104045867919922, + "step": 19403 + }, + { + "epoch": 2.960662841796875e-05, + "step": 19403, + "training_step_time": 0.10685443878173828 + }, + { + "epoch": 2.9608154296875e-05, + "model_forward_time": 0.025518417358398438, + "step": 19404 + }, + { + "epoch": 2.9608154296875e-05, + "step": 19404, + "training_step_time": 0.10692310333251953 + }, + { + "epoch": 2.960968017578125e-05, + "model_forward_time": 0.025260448455810547, + "step": 19405 + }, + { + "epoch": 2.960968017578125e-05, + "step": 19405, + "training_step_time": 0.12213921546936035 + }, + { + "epoch": 2.96112060546875e-05, + "model_forward_time": 0.025303363800048828, + "step": 19406 + }, + { + "epoch": 2.96112060546875e-05, + "step": 19406, + "training_step_time": 0.13008928298950195 + }, + { + "epoch": 2.961273193359375e-05, + "model_forward_time": 0.02525043487548828, + "step": 19407 + }, + { + "epoch": 2.961273193359375e-05, + "step": 19407, + "training_step_time": 0.14973711967468262 + }, + { + "epoch": 2.96142578125e-05, + "model_forward_time": 0.024605274200439453, + "step": 19408 + }, + { + "epoch": 2.96142578125e-05, + "step": 19408, + "training_step_time": 0.15762591361999512 + }, + { + "epoch": 2.961578369140625e-05, + "model_forward_time": 0.024689197540283203, + "step": 19409 + }, + { + "epoch": 2.961578369140625e-05, + "step": 19409, + "training_step_time": 0.1307682991027832 + }, + { + "epoch": 2.96173095703125e-05, + "grad_norm": 0.2595687806606293, + "learning_rate": 3.0370470772505433e-05, + "loss": 0.0061, + "step": 19410 + }, + { + "epoch": 2.96173095703125e-05, + "model_forward_time": 0.024315595626831055, + "step": 19410 + }, + { + "epoch": 2.96173095703125e-05, + "step": 19410, + "training_step_time": 0.1808323860168457 + }, + { + "epoch": 2.961883544921875e-05, + "model_forward_time": 0.024743318557739258, + "step": 19411 + }, + { + "epoch": 2.961883544921875e-05, + "step": 19411, + "training_step_time": 0.11928200721740723 + }, + { + "epoch": 2.9620361328125e-05, + "model_forward_time": 0.023906946182250977, + "step": 19412 + }, + { + "epoch": 2.9620361328125e-05, + "step": 19412, + "training_step_time": 0.1158149242401123 + }, + { + "epoch": 2.962188720703125e-05, + "model_forward_time": 0.023882627487182617, + "step": 19413 + }, + { + "epoch": 2.962188720703125e-05, + "step": 19413, + "training_step_time": 0.1131587028503418 + }, + { + "epoch": 2.96234130859375e-05, + "model_forward_time": 0.023974895477294922, + "step": 19414 + }, + { + "epoch": 2.96234130859375e-05, + "step": 19414, + "training_step_time": 0.10978293418884277 + }, + { + "epoch": 2.962493896484375e-05, + "model_forward_time": 0.024952173233032227, + "step": 19415 + }, + { + "epoch": 2.962493896484375e-05, + "step": 19415, + "training_step_time": 0.12639260292053223 + }, + { + "epoch": 2.962646484375e-05, + "model_forward_time": 0.024823904037475586, + "step": 19416 + }, + { + "epoch": 2.962646484375e-05, + "step": 19416, + "training_step_time": 0.11259102821350098 + }, + { + "epoch": 2.962799072265625e-05, + "model_forward_time": 0.02462005615234375, + "step": 19417 + }, + { + "epoch": 2.962799072265625e-05, + "step": 19417, + "training_step_time": 0.11641240119934082 + }, + { + "epoch": 2.96295166015625e-05, + "model_forward_time": 0.024954795837402344, + "step": 19418 + }, + { + "epoch": 2.96295166015625e-05, + "step": 19418, + "training_step_time": 0.11258506774902344 + }, + { + "epoch": 2.963104248046875e-05, + "model_forward_time": 0.025301456451416016, + "step": 19419 + }, + { + "epoch": 2.963104248046875e-05, + "step": 19419, + "training_step_time": 0.16359305381774902 + }, + { + "epoch": 2.9632568359375e-05, + "grad_norm": 0.14640112221240997, + "learning_rate": 3.03197921001944e-05, + "loss": 0.0068, + "step": 19420 + }, + { + "epoch": 2.9632568359375e-05, + "model_forward_time": 0.024810791015625, + "step": 19420 + }, + { + "epoch": 2.9632568359375e-05, + "step": 19420, + "training_step_time": 0.19085407257080078 + }, + { + "epoch": 2.963409423828125e-05, + "model_forward_time": 0.024271726608276367, + "step": 19421 + }, + { + "epoch": 2.963409423828125e-05, + "step": 19421, + "training_step_time": 0.16853785514831543 + }, + { + "epoch": 2.96356201171875e-05, + "model_forward_time": 0.024113893508911133, + "step": 19422 + }, + { + "epoch": 2.96356201171875e-05, + "step": 19422, + "training_step_time": 0.11551141738891602 + }, + { + "epoch": 2.963714599609375e-05, + "model_forward_time": 0.024299144744873047, + "step": 19423 + }, + { + "epoch": 2.963714599609375e-05, + "step": 19423, + "training_step_time": 0.1065371036529541 + }, + { + "epoch": 2.9638671875e-05, + "model_forward_time": 0.025440216064453125, + "step": 19424 + }, + { + "epoch": 2.9638671875e-05, + "step": 19424, + "training_step_time": 0.1945357322692871 + }, + { + "epoch": 2.964019775390625e-05, + "model_forward_time": 0.024390220642089844, + "step": 19425 + }, + { + "epoch": 2.964019775390625e-05, + "step": 19425, + "training_step_time": 0.10519695281982422 + }, + { + "epoch": 2.96417236328125e-05, + "model_forward_time": 0.024646759033203125, + "step": 19426 + }, + { + "epoch": 2.96417236328125e-05, + "step": 19426, + "training_step_time": 0.10261917114257812 + }, + { + "epoch": 2.964324951171875e-05, + "model_forward_time": 0.024927377700805664, + "step": 19427 + }, + { + "epoch": 2.964324951171875e-05, + "step": 19427, + "training_step_time": 0.10620450973510742 + }, + { + "epoch": 2.9644775390625e-05, + "model_forward_time": 0.02482771873474121, + "step": 19428 + }, + { + "epoch": 2.9644775390625e-05, + "step": 19428, + "training_step_time": 0.10649251937866211 + }, + { + "epoch": 2.964630126953125e-05, + "model_forward_time": 0.025658369064331055, + "step": 19429 + }, + { + "epoch": 2.964630126953125e-05, + "step": 19429, + "training_step_time": 0.10568952560424805 + }, + { + "epoch": 2.96478271484375e-05, + "grad_norm": 0.319711297750473, + "learning_rate": 3.0269137341191677e-05, + "loss": 0.0195, + "step": 19430 + }, + { + "epoch": 2.96478271484375e-05, + "model_forward_time": 0.024947643280029297, + "step": 19430 + }, + { + "epoch": 2.96478271484375e-05, + "step": 19430, + "training_step_time": 0.11019659042358398 + }, + { + "epoch": 2.964935302734375e-05, + "model_forward_time": 0.02520585060119629, + "step": 19431 + }, + { + "epoch": 2.964935302734375e-05, + "step": 19431, + "training_step_time": 0.10590481758117676 + }, + { + "epoch": 2.965087890625e-05, + "model_forward_time": 0.024945974349975586, + "step": 19432 + }, + { + "epoch": 2.965087890625e-05, + "step": 19432, + "training_step_time": 0.10547399520874023 + }, + { + "epoch": 2.965240478515625e-05, + "model_forward_time": 0.026547908782958984, + "step": 19433 + }, + { + "epoch": 2.965240478515625e-05, + "step": 19433, + "training_step_time": 0.10596537590026855 + }, + { + "epoch": 2.96539306640625e-05, + "model_forward_time": 0.025240659713745117, + "step": 19434 + }, + { + "epoch": 2.96539306640625e-05, + "step": 19434, + "training_step_time": 0.10838079452514648 + }, + { + "epoch": 2.965545654296875e-05, + "model_forward_time": 0.025402307510375977, + "step": 19435 + }, + { + "epoch": 2.965545654296875e-05, + "step": 19435, + "training_step_time": 0.10556578636169434 + }, + { + "epoch": 2.9656982421875e-05, + "model_forward_time": 0.025099515914916992, + "step": 19436 + }, + { + "epoch": 2.9656982421875e-05, + "step": 19436, + "training_step_time": 0.10491061210632324 + }, + { + "epoch": 2.965850830078125e-05, + "model_forward_time": 0.02480316162109375, + "step": 19437 + }, + { + "epoch": 2.965850830078125e-05, + "step": 19437, + "training_step_time": 0.1044769287109375 + }, + { + "epoch": 2.96600341796875e-05, + "model_forward_time": 0.02489781379699707, + "step": 19438 + }, + { + "epoch": 2.96600341796875e-05, + "step": 19438, + "training_step_time": 0.1067502498626709 + }, + { + "epoch": 2.966156005859375e-05, + "model_forward_time": 0.025623083114624023, + "step": 19439 + }, + { + "epoch": 2.966156005859375e-05, + "step": 19439, + "training_step_time": 0.11151862144470215 + }, + { + "epoch": 2.96630859375e-05, + "grad_norm": 0.15751205384731293, + "learning_rate": 3.0218506557047598e-05, + "loss": 0.0068, + "step": 19440 + }, + { + "epoch": 2.96630859375e-05, + "model_forward_time": 0.025697946548461914, + "step": 19440 + }, + { + "epoch": 2.96630859375e-05, + "step": 19440, + "training_step_time": 0.14061498641967773 + }, + { + "epoch": 2.966461181640625e-05, + "model_forward_time": 0.02424907684326172, + "step": 19441 + }, + { + "epoch": 2.966461181640625e-05, + "step": 19441, + "training_step_time": 0.1075296401977539 + }, + { + "epoch": 2.96661376953125e-05, + "model_forward_time": 0.02544999122619629, + "step": 19442 + }, + { + "epoch": 2.96661376953125e-05, + "step": 19442, + "training_step_time": 0.10875582695007324 + }, + { + "epoch": 2.966766357421875e-05, + "model_forward_time": 0.02531599998474121, + "step": 19443 + }, + { + "epoch": 2.966766357421875e-05, + "step": 19443, + "training_step_time": 0.12152576446533203 + }, + { + "epoch": 2.9669189453125e-05, + "model_forward_time": 0.024504423141479492, + "step": 19444 + }, + { + "epoch": 2.9669189453125e-05, + "step": 19444, + "training_step_time": 0.1664745807647705 + }, + { + "epoch": 2.967071533203125e-05, + "model_forward_time": 0.02435302734375, + "step": 19445 + }, + { + "epoch": 2.967071533203125e-05, + "step": 19445, + "training_step_time": 0.16179132461547852 + }, + { + "epoch": 2.96722412109375e-05, + "model_forward_time": 0.023926973342895508, + "step": 19446 + }, + { + "epoch": 2.96722412109375e-05, + "step": 19446, + "training_step_time": 0.11522841453552246 + }, + { + "epoch": 2.967376708984375e-05, + "model_forward_time": 0.024471282958984375, + "step": 19447 + }, + { + "epoch": 2.967376708984375e-05, + "step": 19447, + "training_step_time": 0.1034231185913086 + }, + { + "epoch": 2.967529296875e-05, + "model_forward_time": 0.02530074119567871, + "step": 19448 + }, + { + "epoch": 2.967529296875e-05, + "step": 19448, + "training_step_time": 0.10602331161499023 + }, + { + "epoch": 2.967681884765625e-05, + "model_forward_time": 0.025386810302734375, + "step": 19449 + }, + { + "epoch": 2.967681884765625e-05, + "step": 19449, + "training_step_time": 0.10744285583496094 + }, + { + "epoch": 2.96783447265625e-05, + "grad_norm": 0.1679406464099884, + "learning_rate": 3.0167899809283308e-05, + "loss": 0.0089, + "step": 19450 + }, + { + "epoch": 2.96783447265625e-05, + "model_forward_time": 0.02478933334350586, + "step": 19450 + }, + { + "epoch": 2.96783447265625e-05, + "step": 19450, + "training_step_time": 0.10740232467651367 + }, + { + "epoch": 2.967987060546875e-05, + "model_forward_time": 0.025142192840576172, + "step": 19451 + }, + { + "epoch": 2.967987060546875e-05, + "step": 19451, + "training_step_time": 0.10234665870666504 + }, + { + "epoch": 2.9681396484375e-05, + "model_forward_time": 0.024281978607177734, + "step": 19452 + }, + { + "epoch": 2.9681396484375e-05, + "step": 19452, + "training_step_time": 0.14697265625 + }, + { + "epoch": 2.968292236328125e-05, + "model_forward_time": 0.024203777313232422, + "step": 19453 + }, + { + "epoch": 2.968292236328125e-05, + "step": 19453, + "training_step_time": 0.15729093551635742 + }, + { + "epoch": 2.96844482421875e-05, + "model_forward_time": 0.024791955947875977, + "step": 19454 + }, + { + "epoch": 2.96844482421875e-05, + "step": 19454, + "training_step_time": 0.11343812942504883 + }, + { + "epoch": 2.968597412109375e-05, + "model_forward_time": 0.02443385124206543, + "step": 19455 + }, + { + "epoch": 2.968597412109375e-05, + "step": 19455, + "training_step_time": 0.13426446914672852 + }, + { + "epoch": 2.96875e-05, + "model_forward_time": 0.025545835494995117, + "step": 19456 + }, + { + "epoch": 2.96875e-05, + "step": 19456, + "training_step_time": 0.2019481658935547 + }, + { + "epoch": 2.968902587890625e-05, + "model_forward_time": 0.024195432662963867, + "step": 19457 + }, + { + "epoch": 2.968902587890625e-05, + "step": 19457, + "training_step_time": 0.11447429656982422 + }, + { + "epoch": 2.96905517578125e-05, + "model_forward_time": 0.024137258529663086, + "step": 19458 + }, + { + "epoch": 2.96905517578125e-05, + "step": 19458, + "training_step_time": 0.12416815757751465 + }, + { + "epoch": 2.969207763671875e-05, + "model_forward_time": 0.02571415901184082, + "step": 19459 + }, + { + "epoch": 2.969207763671875e-05, + "step": 19459, + "training_step_time": 0.12586021423339844 + }, + { + "epoch": 2.9693603515625e-05, + "grad_norm": 0.08849591016769409, + "learning_rate": 3.0117317159390794e-05, + "loss": 0.0076, + "step": 19460 + }, + { + "epoch": 2.9693603515625e-05, + "model_forward_time": 0.0251004695892334, + "step": 19460 + }, + { + "epoch": 2.9693603515625e-05, + "step": 19460, + "training_step_time": 0.16143274307250977 + }, + { + "epoch": 2.969512939453125e-05, + "model_forward_time": 0.02446913719177246, + "step": 19461 + }, + { + "epoch": 2.969512939453125e-05, + "step": 19461, + "training_step_time": 0.11586260795593262 + }, + { + "epoch": 2.96966552734375e-05, + "model_forward_time": 0.02630615234375, + "step": 19462 + }, + { + "epoch": 2.96966552734375e-05, + "step": 19462, + "training_step_time": 0.11466217041015625 + }, + { + "epoch": 2.969818115234375e-05, + "model_forward_time": 0.024039745330810547, + "step": 19463 + }, + { + "epoch": 2.969818115234375e-05, + "step": 19463, + "training_step_time": 0.11170721054077148 + }, + { + "epoch": 2.969970703125e-05, + "model_forward_time": 0.026463985443115234, + "step": 19464 + }, + { + "epoch": 2.969970703125e-05, + "step": 19464, + "training_step_time": 0.13919734954833984 + }, + { + "epoch": 2.970123291015625e-05, + "model_forward_time": 0.026562213897705078, + "step": 19465 + }, + { + "epoch": 2.970123291015625e-05, + "step": 19465, + "training_step_time": 0.13667678833007812 + }, + { + "epoch": 2.97027587890625e-05, + "model_forward_time": 0.026032686233520508, + "step": 19466 + }, + { + "epoch": 2.97027587890625e-05, + "step": 19466, + "training_step_time": 0.17267203330993652 + }, + { + "epoch": 2.970428466796875e-05, + "model_forward_time": 0.023658275604248047, + "step": 19467 + }, + { + "epoch": 2.970428466796875e-05, + "step": 19467, + "training_step_time": 0.17602777481079102 + }, + { + "epoch": 2.9705810546875e-05, + "model_forward_time": 0.023304462432861328, + "step": 19468 + }, + { + "epoch": 2.9705810546875e-05, + "step": 19468, + "training_step_time": 0.10495972633361816 + }, + { + "epoch": 2.970733642578125e-05, + "model_forward_time": 0.027095317840576172, + "step": 19469 + }, + { + "epoch": 2.970733642578125e-05, + "step": 19469, + "training_step_time": 0.19354033470153809 + }, + { + "epoch": 2.97088623046875e-05, + "grad_norm": 0.1485823690891266, + "learning_rate": 3.006675866883275e-05, + "loss": 0.0134, + "step": 19470 + }, + { + "epoch": 2.97088623046875e-05, + "model_forward_time": 0.02401113510131836, + "step": 19470 + }, + { + "epoch": 2.97088623046875e-05, + "step": 19470, + "training_step_time": 0.10397219657897949 + }, + { + "epoch": 2.971038818359375e-05, + "model_forward_time": 0.024376869201660156, + "step": 19471 + }, + { + "epoch": 2.971038818359375e-05, + "step": 19471, + "training_step_time": 0.10249710083007812 + }, + { + "epoch": 2.97119140625e-05, + "model_forward_time": 0.025582075119018555, + "step": 19472 + }, + { + "epoch": 2.97119140625e-05, + "step": 19472, + "training_step_time": 0.1060645580291748 + }, + { + "epoch": 2.971343994140625e-05, + "model_forward_time": 0.02597808837890625, + "step": 19473 + }, + { + "epoch": 2.971343994140625e-05, + "step": 19473, + "training_step_time": 0.10935664176940918 + }, + { + "epoch": 2.97149658203125e-05, + "model_forward_time": 0.025186538696289062, + "step": 19474 + }, + { + "epoch": 2.97149658203125e-05, + "step": 19474, + "training_step_time": 0.10999536514282227 + }, + { + "epoch": 2.971649169921875e-05, + "model_forward_time": 0.02534031867980957, + "step": 19475 + }, + { + "epoch": 2.971649169921875e-05, + "step": 19475, + "training_step_time": 0.10572290420532227 + }, + { + "epoch": 2.9718017578125e-05, + "model_forward_time": 0.025584697723388672, + "step": 19476 + }, + { + "epoch": 2.9718017578125e-05, + "step": 19476, + "training_step_time": 0.10885095596313477 + }, + { + "epoch": 2.971954345703125e-05, + "model_forward_time": 0.025394916534423828, + "step": 19477 + }, + { + "epoch": 2.971954345703125e-05, + "step": 19477, + "training_step_time": 0.1063392162322998 + }, + { + "epoch": 2.97210693359375e-05, + "model_forward_time": 0.025259733200073242, + "step": 19478 + }, + { + "epoch": 2.97210693359375e-05, + "step": 19478, + "training_step_time": 0.1060783863067627 + }, + { + "epoch": 2.972259521484375e-05, + "model_forward_time": 0.02486705780029297, + "step": 19479 + }, + { + "epoch": 2.972259521484375e-05, + "step": 19479, + "training_step_time": 0.10518336296081543 + }, + { + "epoch": 2.972412109375e-05, + "grad_norm": 0.2886893153190613, + "learning_rate": 3.0016224399042515e-05, + "loss": 0.006, + "step": 19480 + }, + { + "epoch": 2.972412109375e-05, + "model_forward_time": 0.02496337890625, + "step": 19480 + }, + { + "epoch": 2.972412109375e-05, + "step": 19480, + "training_step_time": 0.10764956474304199 + }, + { + "epoch": 2.972564697265625e-05, + "model_forward_time": 0.025459766387939453, + "step": 19481 + }, + { + "epoch": 2.972564697265625e-05, + "step": 19481, + "training_step_time": 0.10448813438415527 + }, + { + "epoch": 2.97271728515625e-05, + "model_forward_time": 0.02514934539794922, + "step": 19482 + }, + { + "epoch": 2.97271728515625e-05, + "step": 19482, + "training_step_time": 0.10755276679992676 + }, + { + "epoch": 2.972869873046875e-05, + "model_forward_time": 0.025012493133544922, + "step": 19483 + }, + { + "epoch": 2.972869873046875e-05, + "step": 19483, + "training_step_time": 0.10634422302246094 + }, + { + "epoch": 2.9730224609375e-05, + "model_forward_time": 0.025177717208862305, + "step": 19484 + }, + { + "epoch": 2.9730224609375e-05, + "step": 19484, + "training_step_time": 0.10847735404968262 + }, + { + "epoch": 2.973175048828125e-05, + "model_forward_time": 0.02456188201904297, + "step": 19485 + }, + { + "epoch": 2.973175048828125e-05, + "step": 19485, + "training_step_time": 0.10931944847106934 + }, + { + "epoch": 2.97332763671875e-05, + "model_forward_time": 0.02558279037475586, + "step": 19486 + }, + { + "epoch": 2.97332763671875e-05, + "step": 19486, + "training_step_time": 0.10979151725769043 + }, + { + "epoch": 2.973480224609375e-05, + "model_forward_time": 0.025223493576049805, + "step": 19487 + }, + { + "epoch": 2.973480224609375e-05, + "step": 19487, + "training_step_time": 0.20734429359436035 + }, + { + "epoch": 2.9736328125e-05, + "model_forward_time": 0.02458047866821289, + "step": 19488 + }, + { + "epoch": 2.9736328125e-05, + "step": 19488, + "training_step_time": 0.10582351684570312 + }, + { + "epoch": 2.973785400390625e-05, + "model_forward_time": 0.02407550811767578, + "step": 19489 + }, + { + "epoch": 2.973785400390625e-05, + "step": 19489, + "training_step_time": 0.12486124038696289 + }, + { + "epoch": 2.97393798828125e-05, + "grad_norm": 0.23423996567726135, + "learning_rate": 2.9965714411423972e-05, + "loss": 0.0188, + "step": 19490 + }, + { + "epoch": 2.97393798828125e-05, + "model_forward_time": 0.025362730026245117, + "step": 19490 + }, + { + "epoch": 2.97393798828125e-05, + "step": 19490, + "training_step_time": 0.16048336029052734 + }, + { + "epoch": 2.974090576171875e-05, + "model_forward_time": 0.024205923080444336, + "step": 19491 + }, + { + "epoch": 2.974090576171875e-05, + "step": 19491, + "training_step_time": 0.10168719291687012 + }, + { + "epoch": 2.9742431640625e-05, + "model_forward_time": 0.0248870849609375, + "step": 19492 + }, + { + "epoch": 2.9742431640625e-05, + "step": 19492, + "training_step_time": 0.10680079460144043 + }, + { + "epoch": 2.974395751953125e-05, + "model_forward_time": 0.024321794509887695, + "step": 19493 + }, + { + "epoch": 2.974395751953125e-05, + "step": 19493, + "training_step_time": 0.10918498039245605 + }, + { + "epoch": 2.97454833984375e-05, + "model_forward_time": 0.02410435676574707, + "step": 19494 + }, + { + "epoch": 2.97454833984375e-05, + "step": 19494, + "training_step_time": 0.11081957817077637 + }, + { + "epoch": 2.974700927734375e-05, + "model_forward_time": 0.02460026741027832, + "step": 19495 + }, + { + "epoch": 2.974700927734375e-05, + "step": 19495, + "training_step_time": 0.10891437530517578 + }, + { + "epoch": 2.974853515625e-05, + "model_forward_time": 0.025681018829345703, + "step": 19496 + }, + { + "epoch": 2.974853515625e-05, + "step": 19496, + "training_step_time": 0.10690689086914062 + }, + { + "epoch": 2.975006103515625e-05, + "model_forward_time": 0.02449774742126465, + "step": 19497 + }, + { + "epoch": 2.975006103515625e-05, + "step": 19497, + "training_step_time": 0.13414430618286133 + }, + { + "epoch": 2.97515869140625e-05, + "model_forward_time": 0.024885177612304688, + "step": 19498 + }, + { + "epoch": 2.97515869140625e-05, + "step": 19498, + "training_step_time": 0.12016129493713379 + }, + { + "epoch": 2.975311279296875e-05, + "model_forward_time": 0.024668216705322266, + "step": 19499 + }, + { + "epoch": 2.975311279296875e-05, + "step": 19499, + "training_step_time": 0.19413399696350098 + }, + { + "epoch": 2.9754638671875e-05, + "grad_norm": 0.26104629039764404, + "learning_rate": 2.991522876735154e-05, + "loss": 0.0066, + "step": 19500 + }, + { + "epoch": 2.9754638671875e-05, + "model_forward_time": 0.024544715881347656, + "step": 19500 + }, + { + "epoch": 2.9754638671875e-05, + "step": 19500, + "training_step_time": 0.12636113166809082 + }, + { + "epoch": 2.975616455078125e-05, + "model_forward_time": 0.02457118034362793, + "step": 19501 + }, + { + "epoch": 2.975616455078125e-05, + "step": 19501, + "training_step_time": 0.19796109199523926 + }, + { + "epoch": 2.97576904296875e-05, + "model_forward_time": 0.024531841278076172, + "step": 19502 + }, + { + "epoch": 2.97576904296875e-05, + "step": 19502, + "training_step_time": 0.10522866249084473 + }, + { + "epoch": 2.975921630859375e-05, + "model_forward_time": 0.024940967559814453, + "step": 19503 + }, + { + "epoch": 2.975921630859375e-05, + "step": 19503, + "training_step_time": 0.1037909984588623 + }, + { + "epoch": 2.97607421875e-05, + "model_forward_time": 0.025203466415405273, + "step": 19504 + }, + { + "epoch": 2.97607421875e-05, + "step": 19504, + "training_step_time": 0.10757970809936523 + }, + { + "epoch": 2.976226806640625e-05, + "model_forward_time": 0.025115966796875, + "step": 19505 + }, + { + "epoch": 2.976226806640625e-05, + "step": 19505, + "training_step_time": 0.17981576919555664 + }, + { + "epoch": 2.97637939453125e-05, + "model_forward_time": 0.026972055435180664, + "step": 19506 + }, + { + "epoch": 2.97637939453125e-05, + "step": 19506, + "training_step_time": 0.14259886741638184 + }, + { + "epoch": 2.976531982421875e-05, + "model_forward_time": 0.024760961532592773, + "step": 19507 + }, + { + "epoch": 2.976531982421875e-05, + "step": 19507, + "training_step_time": 0.11158537864685059 + }, + { + "epoch": 2.9766845703125e-05, + "model_forward_time": 0.02513885498046875, + "step": 19508 + }, + { + "epoch": 2.9766845703125e-05, + "step": 19508, + "training_step_time": 0.1312885284423828 + }, + { + "epoch": 2.976837158203125e-05, + "model_forward_time": 0.025066614151000977, + "step": 19509 + }, + { + "epoch": 2.976837158203125e-05, + "step": 19509, + "training_step_time": 0.15006566047668457 + }, + { + "epoch": 2.97698974609375e-05, + "grad_norm": 0.3384643793106079, + "learning_rate": 2.9864767528170002e-05, + "loss": 0.0171, + "step": 19510 + }, + { + "epoch": 2.97698974609375e-05, + "model_forward_time": 0.024920225143432617, + "step": 19510 + }, + { + "epoch": 2.97698974609375e-05, + "step": 19510, + "training_step_time": 0.17083978652954102 + }, + { + "epoch": 2.977142333984375e-05, + "model_forward_time": 0.02491450309753418, + "step": 19511 + }, + { + "epoch": 2.977142333984375e-05, + "step": 19511, + "training_step_time": 0.18915295600891113 + }, + { + "epoch": 2.977294921875e-05, + "model_forward_time": 0.02492499351501465, + "step": 19512 + }, + { + "epoch": 2.977294921875e-05, + "step": 19512, + "training_step_time": 0.11075377464294434 + }, + { + "epoch": 2.977447509765625e-05, + "model_forward_time": 0.024904966354370117, + "step": 19513 + }, + { + "epoch": 2.977447509765625e-05, + "step": 19513, + "training_step_time": 0.1067497730255127 + }, + { + "epoch": 2.97760009765625e-05, + "model_forward_time": 0.0250701904296875, + "step": 19514 + }, + { + "epoch": 2.97760009765625e-05, + "step": 19514, + "training_step_time": 0.10480237007141113 + }, + { + "epoch": 2.977752685546875e-05, + "model_forward_time": 0.024966955184936523, + "step": 19515 + }, + { + "epoch": 2.977752685546875e-05, + "step": 19515, + "training_step_time": 0.10515904426574707 + }, + { + "epoch": 2.9779052734375e-05, + "model_forward_time": 0.025329113006591797, + "step": 19516 + }, + { + "epoch": 2.9779052734375e-05, + "step": 19516, + "training_step_time": 0.10393309593200684 + }, + { + "epoch": 2.978057861328125e-05, + "model_forward_time": 0.02542710304260254, + "step": 19517 + }, + { + "epoch": 2.978057861328125e-05, + "step": 19517, + "training_step_time": 0.10538196563720703 + }, + { + "epoch": 2.97821044921875e-05, + "model_forward_time": 0.025069713592529297, + "step": 19518 + }, + { + "epoch": 2.97821044921875e-05, + "step": 19518, + "training_step_time": 0.10492539405822754 + }, + { + "epoch": 2.978363037109375e-05, + "model_forward_time": 0.025235891342163086, + "step": 19519 + }, + { + "epoch": 2.978363037109375e-05, + "step": 19519, + "training_step_time": 0.10514688491821289 + }, + { + "epoch": 2.978515625e-05, + "grad_norm": 0.31845393776893616, + "learning_rate": 2.9814330755194564e-05, + "loss": 0.0064, + "step": 19520 + }, + { + "epoch": 2.978515625e-05, + "model_forward_time": 0.025456905364990234, + "step": 19520 + }, + { + "epoch": 2.978515625e-05, + "step": 19520, + "training_step_time": 0.10809993743896484 + }, + { + "epoch": 2.978668212890625e-05, + "model_forward_time": 0.024985790252685547, + "step": 19521 + }, + { + "epoch": 2.978668212890625e-05, + "step": 19521, + "training_step_time": 0.1073908805847168 + }, + { + "epoch": 2.97882080078125e-05, + "model_forward_time": 0.025693178176879883, + "step": 19522 + }, + { + "epoch": 2.97882080078125e-05, + "step": 19522, + "training_step_time": 0.10714316368103027 + }, + { + "epoch": 2.978973388671875e-05, + "model_forward_time": 0.02463841438293457, + "step": 19523 + }, + { + "epoch": 2.978973388671875e-05, + "step": 19523, + "training_step_time": 0.1046140193939209 + }, + { + "epoch": 2.9791259765625e-05, + "model_forward_time": 0.024739503860473633, + "step": 19524 + }, + { + "epoch": 2.9791259765625e-05, + "step": 19524, + "training_step_time": 0.10309576988220215 + }, + { + "epoch": 2.979278564453125e-05, + "model_forward_time": 0.02532958984375, + "step": 19525 + }, + { + "epoch": 2.979278564453125e-05, + "step": 19525, + "training_step_time": 0.1053006649017334 + }, + { + "epoch": 2.97943115234375e-05, + "model_forward_time": 0.025354862213134766, + "step": 19526 + }, + { + "epoch": 2.97943115234375e-05, + "step": 19526, + "training_step_time": 0.1072547435760498 + }, + { + "epoch": 2.979583740234375e-05, + "model_forward_time": 0.025319576263427734, + "step": 19527 + }, + { + "epoch": 2.979583740234375e-05, + "step": 19527, + "training_step_time": 0.10787343978881836 + }, + { + "epoch": 2.979736328125e-05, + "model_forward_time": 0.025336503982543945, + "step": 19528 + }, + { + "epoch": 2.979736328125e-05, + "step": 19528, + "training_step_time": 0.10618138313293457 + }, + { + "epoch": 2.979888916015625e-05, + "model_forward_time": 0.024941682815551758, + "step": 19529 + }, + { + "epoch": 2.979888916015625e-05, + "step": 19529, + "training_step_time": 0.10894060134887695 + }, + { + "epoch": 2.98004150390625e-05, + "grad_norm": 0.18962590396404266, + "learning_rate": 2.976391850971065e-05, + "loss": 0.0066, + "step": 19530 + }, + { + "epoch": 2.98004150390625e-05, + "model_forward_time": 0.028411865234375, + "step": 19530 + }, + { + "epoch": 2.98004150390625e-05, + "step": 19530, + "training_step_time": 0.15261578559875488 + }, + { + "epoch": 2.980194091796875e-05, + "model_forward_time": 0.025447607040405273, + "step": 19531 + }, + { + "epoch": 2.980194091796875e-05, + "step": 19531, + "training_step_time": 0.11745095252990723 + }, + { + "epoch": 2.9803466796875e-05, + "model_forward_time": 0.024374723434448242, + "step": 19532 + }, + { + "epoch": 2.9803466796875e-05, + "step": 19532, + "training_step_time": 0.21504592895507812 + }, + { + "epoch": 2.980499267578125e-05, + "model_forward_time": 0.024563074111938477, + "step": 19533 + }, + { + "epoch": 2.980499267578125e-05, + "step": 19533, + "training_step_time": 0.11453509330749512 + }, + { + "epoch": 2.98065185546875e-05, + "model_forward_time": 0.024665117263793945, + "step": 19534 + }, + { + "epoch": 2.98065185546875e-05, + "step": 19534, + "training_step_time": 0.11942505836486816 + }, + { + "epoch": 2.980804443359375e-05, + "model_forward_time": 0.02391505241394043, + "step": 19535 + }, + { + "epoch": 2.980804443359375e-05, + "step": 19535, + "training_step_time": 0.19457292556762695 + }, + { + "epoch": 2.98095703125e-05, + "model_forward_time": 0.02424454689025879, + "step": 19536 + }, + { + "epoch": 2.98095703125e-05, + "step": 19536, + "training_step_time": 0.11475229263305664 + }, + { + "epoch": 2.981109619140625e-05, + "model_forward_time": 0.024344921112060547, + "step": 19537 + }, + { + "epoch": 2.981109619140625e-05, + "step": 19537, + "training_step_time": 0.11418747901916504 + }, + { + "epoch": 2.98126220703125e-05, + "model_forward_time": 0.02504277229309082, + "step": 19538 + }, + { + "epoch": 2.98126220703125e-05, + "step": 19538, + "training_step_time": 0.11642599105834961 + }, + { + "epoch": 2.981414794921875e-05, + "model_forward_time": 0.0252532958984375, + "step": 19539 + }, + { + "epoch": 2.981414794921875e-05, + "step": 19539, + "training_step_time": 0.11401176452636719 + }, + { + "epoch": 2.9815673828125e-05, + "grad_norm": 0.15322192013263702, + "learning_rate": 2.971353085297387e-05, + "loss": 0.0071, + "step": 19540 + }, + { + "epoch": 2.9815673828125e-05, + "model_forward_time": 0.0251615047454834, + "step": 19540 + }, + { + "epoch": 2.9815673828125e-05, + "step": 19540, + "training_step_time": 0.11014580726623535 + }, + { + "epoch": 2.981719970703125e-05, + "model_forward_time": 0.025318384170532227, + "step": 19541 + }, + { + "epoch": 2.981719970703125e-05, + "step": 19541, + "training_step_time": 0.1055152416229248 + }, + { + "epoch": 2.98187255859375e-05, + "model_forward_time": 0.024315834045410156, + "step": 19542 + }, + { + "epoch": 2.98187255859375e-05, + "step": 19542, + "training_step_time": 0.14560866355895996 + }, + { + "epoch": 2.982025146484375e-05, + "model_forward_time": 0.024071216583251953, + "step": 19543 + }, + { + "epoch": 2.982025146484375e-05, + "step": 19543, + "training_step_time": 0.10537433624267578 + }, + { + "epoch": 2.982177734375e-05, + "model_forward_time": 0.025277137756347656, + "step": 19544 + }, + { + "epoch": 2.982177734375e-05, + "step": 19544, + "training_step_time": 0.18425846099853516 + }, + { + "epoch": 2.982330322265625e-05, + "model_forward_time": 0.025066614151000977, + "step": 19545 + }, + { + "epoch": 2.982330322265625e-05, + "step": 19545, + "training_step_time": 0.13294649124145508 + }, + { + "epoch": 2.98248291015625e-05, + "model_forward_time": 0.024482250213623047, + "step": 19546 + }, + { + "epoch": 2.98248291015625e-05, + "step": 19546, + "training_step_time": 0.19315671920776367 + }, + { + "epoch": 2.982635498046875e-05, + "model_forward_time": 0.023893117904663086, + "step": 19547 + }, + { + "epoch": 2.982635498046875e-05, + "step": 19547, + "training_step_time": 0.10288095474243164 + }, + { + "epoch": 2.9827880859375e-05, + "model_forward_time": 0.024553775787353516, + "step": 19548 + }, + { + "epoch": 2.9827880859375e-05, + "step": 19548, + "training_step_time": 0.10717582702636719 + }, + { + "epoch": 2.982940673828125e-05, + "model_forward_time": 0.025120019912719727, + "step": 19549 + }, + { + "epoch": 2.982940673828125e-05, + "step": 19549, + "training_step_time": 0.10562658309936523 + }, + { + "epoch": 2.98309326171875e-05, + "grad_norm": 0.20454606413841248, + "learning_rate": 2.9663167846209998e-05, + "loss": 0.0085, + "step": 19550 + }, + { + "epoch": 2.98309326171875e-05, + "model_forward_time": 0.025198698043823242, + "step": 19550 + }, + { + "epoch": 2.98309326171875e-05, + "step": 19550, + "training_step_time": 0.10779333114624023 + }, + { + "epoch": 2.983245849609375e-05, + "model_forward_time": 0.025770902633666992, + "step": 19551 + }, + { + "epoch": 2.983245849609375e-05, + "step": 19551, + "training_step_time": 0.11975407600402832 + }, + { + "epoch": 2.9833984375e-05, + "model_forward_time": 0.025444746017456055, + "step": 19552 + }, + { + "epoch": 2.9833984375e-05, + "step": 19552, + "training_step_time": 0.14971399307250977 + }, + { + "epoch": 2.983551025390625e-05, + "model_forward_time": 0.024919748306274414, + "step": 19553 + }, + { + "epoch": 2.983551025390625e-05, + "step": 19553, + "training_step_time": 0.13924837112426758 + }, + { + "epoch": 2.98370361328125e-05, + "model_forward_time": 0.024295806884765625, + "step": 19554 + }, + { + "epoch": 2.98370361328125e-05, + "step": 19554, + "training_step_time": 0.16065597534179688 + }, + { + "epoch": 2.983856201171875e-05, + "model_forward_time": 0.024028539657592773, + "step": 19555 + }, + { + "epoch": 2.983856201171875e-05, + "step": 19555, + "training_step_time": 0.15027952194213867 + }, + { + "epoch": 2.9840087890625e-05, + "model_forward_time": 0.0243227481842041, + "step": 19556 + }, + { + "epoch": 2.9840087890625e-05, + "step": 19556, + "training_step_time": 0.10836601257324219 + }, + { + "epoch": 2.984161376953125e-05, + "model_forward_time": 0.024856090545654297, + "step": 19557 + }, + { + "epoch": 2.984161376953125e-05, + "step": 19557, + "training_step_time": 0.10556936264038086 + }, + { + "epoch": 2.98431396484375e-05, + "model_forward_time": 0.02561330795288086, + "step": 19558 + }, + { + "epoch": 2.98431396484375e-05, + "step": 19558, + "training_step_time": 0.1068260669708252 + }, + { + "epoch": 2.984466552734375e-05, + "model_forward_time": 0.024974584579467773, + "step": 19559 + }, + { + "epoch": 2.984466552734375e-05, + "step": 19559, + "training_step_time": 0.10716128349304199 + }, + { + "epoch": 2.984619140625e-05, + "grad_norm": 0.1670469492673874, + "learning_rate": 2.9612829550614836e-05, + "loss": 0.0145, + "step": 19560 + }, + { + "epoch": 2.984619140625e-05, + "model_forward_time": 0.026113271713256836, + "step": 19560 + }, + { + "epoch": 2.984619140625e-05, + "step": 19560, + "training_step_time": 0.19251441955566406 + }, + { + "epoch": 2.984771728515625e-05, + "model_forward_time": 0.024308443069458008, + "step": 19561 + }, + { + "epoch": 2.984771728515625e-05, + "step": 19561, + "training_step_time": 0.10585212707519531 + }, + { + "epoch": 2.98492431640625e-05, + "model_forward_time": 0.024365901947021484, + "step": 19562 + }, + { + "epoch": 2.98492431640625e-05, + "step": 19562, + "training_step_time": 0.10660457611083984 + }, + { + "epoch": 2.985076904296875e-05, + "model_forward_time": 0.025064468383789062, + "step": 19563 + }, + { + "epoch": 2.985076904296875e-05, + "step": 19563, + "training_step_time": 0.11022615432739258 + }, + { + "epoch": 2.9852294921875e-05, + "model_forward_time": 0.025190114974975586, + "step": 19564 + }, + { + "epoch": 2.9852294921875e-05, + "step": 19564, + "training_step_time": 0.11057066917419434 + }, + { + "epoch": 2.985382080078125e-05, + "model_forward_time": 0.024777650833129883, + "step": 19565 + }, + { + "epoch": 2.985382080078125e-05, + "step": 19565, + "training_step_time": 0.11058807373046875 + }, + { + "epoch": 2.98553466796875e-05, + "model_forward_time": 0.025362491607666016, + "step": 19566 + }, + { + "epoch": 2.98553466796875e-05, + "step": 19566, + "training_step_time": 0.10823297500610352 + }, + { + "epoch": 2.985687255859375e-05, + "model_forward_time": 0.024789810180664062, + "step": 19567 + }, + { + "epoch": 2.985687255859375e-05, + "step": 19567, + "training_step_time": 0.10738968849182129 + }, + { + "epoch": 2.98583984375e-05, + "model_forward_time": 0.025118112564086914, + "step": 19568 + }, + { + "epoch": 2.98583984375e-05, + "step": 19568, + "training_step_time": 0.10932087898254395 + }, + { + "epoch": 2.985992431640625e-05, + "model_forward_time": 0.024895906448364258, + "step": 19569 + }, + { + "epoch": 2.985992431640625e-05, + "step": 19569, + "training_step_time": 0.10595917701721191 + }, + { + "epoch": 2.98614501953125e-05, + "grad_norm": 0.13820356130599976, + "learning_rate": 2.956251602735413e-05, + "loss": 0.0089, + "step": 19570 + }, + { + "epoch": 2.98614501953125e-05, + "model_forward_time": 0.02538585662841797, + "step": 19570 + }, + { + "epoch": 2.98614501953125e-05, + "step": 19570, + "training_step_time": 0.10759091377258301 + }, + { + "epoch": 2.986297607421875e-05, + "model_forward_time": 0.025141239166259766, + "step": 19571 + }, + { + "epoch": 2.986297607421875e-05, + "step": 19571, + "training_step_time": 0.10559511184692383 + }, + { + "epoch": 2.9864501953125e-05, + "model_forward_time": 0.02512812614440918, + "step": 19572 + }, + { + "epoch": 2.9864501953125e-05, + "step": 19572, + "training_step_time": 0.10876679420471191 + }, + { + "epoch": 2.986602783203125e-05, + "model_forward_time": 0.02518773078918457, + "step": 19573 + }, + { + "epoch": 2.986602783203125e-05, + "step": 19573, + "training_step_time": 0.1080482006072998 + }, + { + "epoch": 2.98675537109375e-05, + "model_forward_time": 0.029588937759399414, + "step": 19574 + }, + { + "epoch": 2.98675537109375e-05, + "step": 19574, + "training_step_time": 0.11270737648010254 + }, + { + "epoch": 2.986907958984375e-05, + "model_forward_time": 0.02529764175415039, + "step": 19575 + }, + { + "epoch": 2.986907958984375e-05, + "step": 19575, + "training_step_time": 0.16208887100219727 + }, + { + "epoch": 2.987060546875e-05, + "model_forward_time": 0.02442193031311035, + "step": 19576 + }, + { + "epoch": 2.987060546875e-05, + "step": 19576, + "training_step_time": 0.10684776306152344 + }, + { + "epoch": 2.987213134765625e-05, + "model_forward_time": 0.025663137435913086, + "step": 19577 + }, + { + "epoch": 2.987213134765625e-05, + "step": 19577, + "training_step_time": 0.1072843074798584 + }, + { + "epoch": 2.98736572265625e-05, + "model_forward_time": 0.025342226028442383, + "step": 19578 + }, + { + "epoch": 2.98736572265625e-05, + "step": 19578, + "training_step_time": 0.15394115447998047 + }, + { + "epoch": 2.987518310546875e-05, + "model_forward_time": 0.024675846099853516, + "step": 19579 + }, + { + "epoch": 2.987518310546875e-05, + "step": 19579, + "training_step_time": 0.1824800968170166 + }, + { + "epoch": 2.9876708984375e-05, + "grad_norm": 0.16472890973091125, + "learning_rate": 2.9512227337563604e-05, + "loss": 0.01, + "step": 19580 + }, + { + "epoch": 2.9876708984375e-05, + "model_forward_time": 0.024648666381835938, + "step": 19580 + }, + { + "epoch": 2.9876708984375e-05, + "step": 19580, + "training_step_time": 0.1094059944152832 + }, + { + "epoch": 2.987823486328125e-05, + "model_forward_time": 0.02759575843811035, + "step": 19581 + }, + { + "epoch": 2.987823486328125e-05, + "step": 19581, + "training_step_time": 0.10732865333557129 + }, + { + "epoch": 2.98797607421875e-05, + "model_forward_time": 0.02532219886779785, + "step": 19582 + }, + { + "epoch": 2.98797607421875e-05, + "step": 19582, + "training_step_time": 0.10691976547241211 + }, + { + "epoch": 2.988128662109375e-05, + "model_forward_time": 0.02490830421447754, + "step": 19583 + }, + { + "epoch": 2.988128662109375e-05, + "step": 19583, + "training_step_time": 0.10532093048095703 + }, + { + "epoch": 2.98828125e-05, + "model_forward_time": 0.02523517608642578, + "step": 19584 + }, + { + "epoch": 2.98828125e-05, + "step": 19584, + "training_step_time": 0.10985898971557617 + }, + { + "epoch": 2.988433837890625e-05, + "model_forward_time": 0.025485515594482422, + "step": 19585 + }, + { + "epoch": 2.988433837890625e-05, + "step": 19585, + "training_step_time": 0.1072542667388916 + }, + { + "epoch": 2.98858642578125e-05, + "model_forward_time": 0.025235414505004883, + "step": 19586 + }, + { + "epoch": 2.98858642578125e-05, + "step": 19586, + "training_step_time": 0.10715627670288086 + }, + { + "epoch": 2.988739013671875e-05, + "model_forward_time": 0.025811195373535156, + "step": 19587 + }, + { + "epoch": 2.988739013671875e-05, + "step": 19587, + "training_step_time": 0.10572934150695801 + }, + { + "epoch": 2.9888916015625e-05, + "model_forward_time": 0.02470254898071289, + "step": 19588 + }, + { + "epoch": 2.9888916015625e-05, + "step": 19588, + "training_step_time": 0.15378355979919434 + }, + { + "epoch": 2.989044189453125e-05, + "model_forward_time": 0.025005102157592773, + "step": 19589 + }, + { + "epoch": 2.989044189453125e-05, + "step": 19589, + "training_step_time": 0.16457486152648926 + }, + { + "epoch": 2.98919677734375e-05, + "grad_norm": 0.1409149467945099, + "learning_rate": 2.9461963542348737e-05, + "loss": 0.0073, + "step": 19590 + }, + { + "epoch": 2.98919677734375e-05, + "model_forward_time": 0.024463415145874023, + "step": 19590 + }, + { + "epoch": 2.98919677734375e-05, + "step": 19590, + "training_step_time": 0.1068716049194336 + }, + { + "epoch": 2.989349365234375e-05, + "model_forward_time": 0.025065183639526367, + "step": 19591 + }, + { + "epoch": 2.989349365234375e-05, + "step": 19591, + "training_step_time": 0.12686634063720703 + }, + { + "epoch": 2.989501953125e-05, + "model_forward_time": 0.025710105895996094, + "step": 19592 + }, + { + "epoch": 2.989501953125e-05, + "step": 19592, + "training_step_time": 0.1659994125366211 + }, + { + "epoch": 2.989654541015625e-05, + "model_forward_time": 0.02454066276550293, + "step": 19593 + }, + { + "epoch": 2.989654541015625e-05, + "step": 19593, + "training_step_time": 0.10453462600708008 + }, + { + "epoch": 2.98980712890625e-05, + "model_forward_time": 0.02484726905822754, + "step": 19594 + }, + { + "epoch": 2.98980712890625e-05, + "step": 19594, + "training_step_time": 0.10209393501281738 + }, + { + "epoch": 2.989959716796875e-05, + "model_forward_time": 0.02641892433166504, + "step": 19595 + }, + { + "epoch": 2.989959716796875e-05, + "step": 19595, + "training_step_time": 0.10591387748718262 + }, + { + "epoch": 2.9901123046875e-05, + "model_forward_time": 0.02702498435974121, + "step": 19596 + }, + { + "epoch": 2.9901123046875e-05, + "step": 19596, + "training_step_time": 0.17159152030944824 + }, + { + "epoch": 2.990264892578125e-05, + "model_forward_time": 0.024260997772216797, + "step": 19597 + }, + { + "epoch": 2.990264892578125e-05, + "step": 19597, + "training_step_time": 0.10137414932250977 + }, + { + "epoch": 2.99041748046875e-05, + "model_forward_time": 0.024391651153564453, + "step": 19598 + }, + { + "epoch": 2.99041748046875e-05, + "step": 19598, + "training_step_time": 0.18944954872131348 + }, + { + "epoch": 2.990570068359375e-05, + "model_forward_time": 0.023992538452148438, + "step": 19599 + }, + { + "epoch": 2.990570068359375e-05, + "step": 19599, + "training_step_time": 0.10377621650695801 + }, + { + "epoch": 2.99072265625e-05, + "grad_norm": 0.32406118512153625, + "learning_rate": 2.9411724702784758e-05, + "loss": 0.0122, + "step": 19600 + }, + { + "epoch": 2.99072265625e-05, + "model_forward_time": 0.023989439010620117, + "step": 19600 + }, + { + "epoch": 2.99072265625e-05, + "step": 19600, + "training_step_time": 0.15044164657592773 + }, + { + "epoch": 2.990875244140625e-05, + "model_forward_time": 0.02483534812927246, + "step": 19601 + }, + { + "epoch": 2.990875244140625e-05, + "step": 19601, + "training_step_time": 0.19647574424743652 + }, + { + "epoch": 2.99102783203125e-05, + "model_forward_time": 0.024031400680541992, + "step": 19602 + }, + { + "epoch": 2.99102783203125e-05, + "step": 19602, + "training_step_time": 0.10714411735534668 + }, + { + "epoch": 2.991180419921875e-05, + "model_forward_time": 0.024303913116455078, + "step": 19603 + }, + { + "epoch": 2.991180419921875e-05, + "step": 19603, + "training_step_time": 0.12185359001159668 + }, + { + "epoch": 2.9913330078125e-05, + "model_forward_time": 0.025064468383789062, + "step": 19604 + }, + { + "epoch": 2.9913330078125e-05, + "step": 19604, + "training_step_time": 0.10674762725830078 + }, + { + "epoch": 2.991485595703125e-05, + "model_forward_time": 0.025110244750976562, + "step": 19605 + }, + { + "epoch": 2.991485595703125e-05, + "step": 19605, + "training_step_time": 0.188767671585083 + }, + { + "epoch": 2.99163818359375e-05, + "model_forward_time": 0.024672746658325195, + "step": 19606 + }, + { + "epoch": 2.99163818359375e-05, + "step": 19606, + "training_step_time": 0.10637378692626953 + }, + { + "epoch": 2.991790771484375e-05, + "model_forward_time": 0.024598121643066406, + "step": 19607 + }, + { + "epoch": 2.991790771484375e-05, + "step": 19607, + "training_step_time": 0.1020803451538086 + }, + { + "epoch": 2.991943359375e-05, + "model_forward_time": 0.024354219436645508, + "step": 19608 + }, + { + "epoch": 2.991943359375e-05, + "step": 19608, + "training_step_time": 0.10612273216247559 + }, + { + "epoch": 2.992095947265625e-05, + "model_forward_time": 0.025546789169311523, + "step": 19609 + }, + { + "epoch": 2.992095947265625e-05, + "step": 19609, + "training_step_time": 0.10814785957336426 + }, + { + "epoch": 2.99224853515625e-05, + "grad_norm": 0.14034727215766907, + "learning_rate": 2.936151087991663e-05, + "loss": 0.0077, + "step": 19610 + }, + { + "epoch": 2.99224853515625e-05, + "model_forward_time": 0.03080463409423828, + "step": 19610 + }, + { + "epoch": 2.99224853515625e-05, + "step": 19610, + "training_step_time": 0.11222672462463379 + }, + { + "epoch": 2.992401123046875e-05, + "model_forward_time": 0.025627851486206055, + "step": 19611 + }, + { + "epoch": 2.992401123046875e-05, + "step": 19611, + "training_step_time": 0.11129617691040039 + }, + { + "epoch": 2.9925537109375e-05, + "model_forward_time": 0.026285409927368164, + "step": 19612 + }, + { + "epoch": 2.9925537109375e-05, + "step": 19612, + "training_step_time": 0.11401128768920898 + }, + { + "epoch": 2.992706298828125e-05, + "model_forward_time": 0.024807214736938477, + "step": 19613 + }, + { + "epoch": 2.992706298828125e-05, + "step": 19613, + "training_step_time": 0.10528182983398438 + }, + { + "epoch": 2.99285888671875e-05, + "model_forward_time": 0.025075435638427734, + "step": 19614 + }, + { + "epoch": 2.99285888671875e-05, + "step": 19614, + "training_step_time": 0.10641241073608398 + }, + { + "epoch": 2.993011474609375e-05, + "model_forward_time": 0.025201797485351562, + "step": 19615 + }, + { + "epoch": 2.993011474609375e-05, + "step": 19615, + "training_step_time": 0.10706758499145508 + }, + { + "epoch": 2.9931640625e-05, + "model_forward_time": 0.025257110595703125, + "step": 19616 + }, + { + "epoch": 2.9931640625e-05, + "step": 19616, + "training_step_time": 0.10868287086486816 + }, + { + "epoch": 2.993316650390625e-05, + "model_forward_time": 0.024718761444091797, + "step": 19617 + }, + { + "epoch": 2.993316650390625e-05, + "step": 19617, + "training_step_time": 0.10501384735107422 + }, + { + "epoch": 2.99346923828125e-05, + "model_forward_time": 0.025174379348754883, + "step": 19618 + }, + { + "epoch": 2.99346923828125e-05, + "step": 19618, + "training_step_time": 0.10816812515258789 + }, + { + "epoch": 2.993621826171875e-05, + "model_forward_time": 0.025418996810913086, + "step": 19619 + }, + { + "epoch": 2.993621826171875e-05, + "step": 19619, + "training_step_time": 0.10870885848999023 + }, + { + "epoch": 2.9937744140625e-05, + "grad_norm": 0.42201992869377136, + "learning_rate": 2.931132213475884e-05, + "loss": 0.0116, + "step": 19620 + }, + { + "epoch": 2.9937744140625e-05, + "model_forward_time": 0.025223970413208008, + "step": 19620 + }, + { + "epoch": 2.9937744140625e-05, + "step": 19620, + "training_step_time": 0.10993289947509766 + }, + { + "epoch": 2.993927001953125e-05, + "model_forward_time": 0.025276660919189453, + "step": 19621 + }, + { + "epoch": 2.993927001953125e-05, + "step": 19621, + "training_step_time": 0.11061716079711914 + }, + { + "epoch": 2.99407958984375e-05, + "model_forward_time": 0.02518606185913086, + "step": 19622 + }, + { + "epoch": 2.99407958984375e-05, + "step": 19622, + "training_step_time": 0.11000728607177734 + }, + { + "epoch": 2.994232177734375e-05, + "model_forward_time": 0.025216102600097656, + "step": 19623 + }, + { + "epoch": 2.994232177734375e-05, + "step": 19623, + "training_step_time": 0.12096762657165527 + }, + { + "epoch": 2.994384765625e-05, + "model_forward_time": 0.02507948875427246, + "step": 19624 + }, + { + "epoch": 2.994384765625e-05, + "step": 19624, + "training_step_time": 0.10816836357116699 + }, + { + "epoch": 2.994537353515625e-05, + "model_forward_time": 0.025690793991088867, + "step": 19625 + }, + { + "epoch": 2.994537353515625e-05, + "step": 19625, + "training_step_time": 0.21753358840942383 + }, + { + "epoch": 2.99468994140625e-05, + "model_forward_time": 0.02683711051940918, + "step": 19626 + }, + { + "epoch": 2.99468994140625e-05, + "step": 19626, + "training_step_time": 0.1206669807434082 + }, + { + "epoch": 2.994842529296875e-05, + "model_forward_time": 0.024258136749267578, + "step": 19627 + }, + { + "epoch": 2.994842529296875e-05, + "step": 19627, + "training_step_time": 0.1037757396697998 + }, + { + "epoch": 2.9949951171875e-05, + "model_forward_time": 0.025513172149658203, + "step": 19628 + }, + { + "epoch": 2.9949951171875e-05, + "step": 19628, + "training_step_time": 0.10775232315063477 + }, + { + "epoch": 2.995147705078125e-05, + "model_forward_time": 0.025191783905029297, + "step": 19629 + }, + { + "epoch": 2.995147705078125e-05, + "step": 19629, + "training_step_time": 0.10503959655761719 + }, + { + "epoch": 2.99530029296875e-05, + "grad_norm": 0.14935193955898285, + "learning_rate": 2.9261158528295495e-05, + "loss": 0.0114, + "step": 19630 + }, + { + "epoch": 2.99530029296875e-05, + "model_forward_time": 0.025160551071166992, + "step": 19630 + }, + { + "epoch": 2.99530029296875e-05, + "step": 19630, + "training_step_time": 0.10604548454284668 + }, + { + "epoch": 2.995452880859375e-05, + "model_forward_time": 0.02519392967224121, + "step": 19631 + }, + { + "epoch": 2.995452880859375e-05, + "step": 19631, + "training_step_time": 0.10671377182006836 + }, + { + "epoch": 2.99560546875e-05, + "model_forward_time": 0.025049924850463867, + "step": 19632 + }, + { + "epoch": 2.99560546875e-05, + "step": 19632, + "training_step_time": 0.10622239112854004 + }, + { + "epoch": 2.995758056640625e-05, + "model_forward_time": 0.025531291961669922, + "step": 19633 + }, + { + "epoch": 2.995758056640625e-05, + "step": 19633, + "training_step_time": 0.10392546653747559 + }, + { + "epoch": 2.99591064453125e-05, + "model_forward_time": 0.02480769157409668, + "step": 19634 + }, + { + "epoch": 2.99591064453125e-05, + "step": 19634, + "training_step_time": 0.15025877952575684 + }, + { + "epoch": 2.996063232421875e-05, + "model_forward_time": 0.02473902702331543, + "step": 19635 + }, + { + "epoch": 2.996063232421875e-05, + "step": 19635, + "training_step_time": 0.16156482696533203 + }, + { + "epoch": 2.9962158203125e-05, + "model_forward_time": 0.024419307708740234, + "step": 19636 + }, + { + "epoch": 2.9962158203125e-05, + "step": 19636, + "training_step_time": 0.10918855667114258 + }, + { + "epoch": 2.996368408203125e-05, + "model_forward_time": 0.0249786376953125, + "step": 19637 + }, + { + "epoch": 2.996368408203125e-05, + "step": 19637, + "training_step_time": 0.1332385540008545 + }, + { + "epoch": 2.99652099609375e-05, + "model_forward_time": 0.025173664093017578, + "step": 19638 + }, + { + "epoch": 2.99652099609375e-05, + "step": 19638, + "training_step_time": 0.19610214233398438 + }, + { + "epoch": 2.996673583984375e-05, + "model_forward_time": 0.024643659591674805, + "step": 19639 + }, + { + "epoch": 2.996673583984375e-05, + "step": 19639, + "training_step_time": 0.10800290107727051 + }, + { + "epoch": 2.996826171875e-05, + "grad_norm": 0.22325342893600464, + "learning_rate": 2.9211020121480083e-05, + "loss": 0.0073, + "step": 19640 + }, + { + "epoch": 2.996826171875e-05, + "model_forward_time": 0.024869441986083984, + "step": 19640 + }, + { + "epoch": 2.996826171875e-05, + "step": 19640, + "training_step_time": 0.19586706161499023 + }, + { + "epoch": 2.996978759765625e-05, + "model_forward_time": 0.024135828018188477, + "step": 19641 + }, + { + "epoch": 2.996978759765625e-05, + "step": 19641, + "training_step_time": 0.19405770301818848 + }, + { + "epoch": 2.99713134765625e-05, + "model_forward_time": 0.024590492248535156, + "step": 19642 + }, + { + "epoch": 2.99713134765625e-05, + "step": 19642, + "training_step_time": 0.16583728790283203 + }, + { + "epoch": 2.997283935546875e-05, + "model_forward_time": 0.023678064346313477, + "step": 19643 + }, + { + "epoch": 2.997283935546875e-05, + "step": 19643, + "training_step_time": 0.14778590202331543 + }, + { + "epoch": 2.9974365234375e-05, + "model_forward_time": 0.0245211124420166, + "step": 19644 + }, + { + "epoch": 2.9974365234375e-05, + "step": 19644, + "training_step_time": 0.173325777053833 + }, + { + "epoch": 2.997589111328125e-05, + "model_forward_time": 0.024397850036621094, + "step": 19645 + }, + { + "epoch": 2.997589111328125e-05, + "step": 19645, + "training_step_time": 0.14224982261657715 + }, + { + "epoch": 2.99774169921875e-05, + "model_forward_time": 0.027773618698120117, + "step": 19646 + }, + { + "epoch": 2.99774169921875e-05, + "step": 19646, + "training_step_time": 0.19606757164001465 + }, + { + "epoch": 2.997894287109375e-05, + "model_forward_time": 0.02436542510986328, + "step": 19647 + }, + { + "epoch": 2.997894287109375e-05, + "step": 19647, + "training_step_time": 0.1328599452972412 + }, + { + "epoch": 2.998046875e-05, + "model_forward_time": 0.025101423263549805, + "step": 19648 + }, + { + "epoch": 2.998046875e-05, + "step": 19648, + "training_step_time": 0.1195838451385498 + }, + { + "epoch": 2.998199462890625e-05, + "model_forward_time": 0.025168657302856445, + "step": 19649 + }, + { + "epoch": 2.998199462890625e-05, + "step": 19649, + "training_step_time": 0.18176770210266113 + }, + { + "epoch": 2.99835205078125e-05, + "grad_norm": 0.23292744159698486, + "learning_rate": 2.916090697523549e-05, + "loss": 0.0097, + "step": 19650 + }, + { + "epoch": 2.99835205078125e-05, + "model_forward_time": 0.02465987205505371, + "step": 19650 + }, + { + "epoch": 2.99835205078125e-05, + "step": 19650, + "training_step_time": 0.11229872703552246 + }, + { + "epoch": 2.998504638671875e-05, + "model_forward_time": 0.024750947952270508, + "step": 19651 + }, + { + "epoch": 2.998504638671875e-05, + "step": 19651, + "training_step_time": 0.11211442947387695 + }, + { + "epoch": 2.9986572265625e-05, + "model_forward_time": 0.024960041046142578, + "step": 19652 + }, + { + "epoch": 2.9986572265625e-05, + "step": 19652, + "training_step_time": 0.11193346977233887 + }, + { + "epoch": 2.998809814453125e-05, + "model_forward_time": 0.025344133377075195, + "step": 19653 + }, + { + "epoch": 2.998809814453125e-05, + "step": 19653, + "training_step_time": 0.10980868339538574 + }, + { + "epoch": 2.99896240234375e-05, + "model_forward_time": 0.025328636169433594, + "step": 19654 + }, + { + "epoch": 2.99896240234375e-05, + "step": 19654, + "training_step_time": 0.1088714599609375 + }, + { + "epoch": 2.999114990234375e-05, + "model_forward_time": 0.02562117576599121, + "step": 19655 + }, + { + "epoch": 2.999114990234375e-05, + "step": 19655, + "training_step_time": 0.10650014877319336 + }, + { + "epoch": 2.999267578125e-05, + "model_forward_time": 0.025331735610961914, + "step": 19656 + }, + { + "epoch": 2.999267578125e-05, + "step": 19656, + "training_step_time": 0.10725831985473633 + }, + { + "epoch": 2.999420166015625e-05, + "model_forward_time": 0.02508068084716797, + "step": 19657 + }, + { + "epoch": 2.999420166015625e-05, + "step": 19657, + "training_step_time": 0.10398125648498535 + }, + { + "epoch": 2.99957275390625e-05, + "model_forward_time": 0.025248050689697266, + "step": 19658 + }, + { + "epoch": 2.99957275390625e-05, + "step": 19658, + "training_step_time": 0.10518431663513184 + }, + { + "epoch": 2.999725341796875e-05, + "model_forward_time": 0.025169849395751953, + "step": 19659 + }, + { + "epoch": 2.999725341796875e-05, + "step": 19659, + "training_step_time": 0.10518312454223633 + }, + { + "epoch": 2.9998779296875e-05, + "grad_norm": 0.17813117802143097, + "learning_rate": 2.9110819150453927e-05, + "loss": 0.0066, + "step": 19660 + }, + { + "epoch": 2.9998779296875e-05, + "model_forward_time": 0.025392532348632812, + "step": 19660 + }, + { + "epoch": 2.9998779296875e-05, + "step": 19660, + "training_step_time": 0.11640501022338867 + }, + { + "epoch": 3.000030517578125e-05, + "model_forward_time": 0.025184154510498047, + "step": 19661 + }, + { + "epoch": 3.000030517578125e-05, + "step": 19661, + "training_step_time": 0.11286234855651855 + }, + { + "epoch": 3.00018310546875e-05, + "model_forward_time": 0.02505040168762207, + "step": 19662 + }, + { + "epoch": 3.00018310546875e-05, + "step": 19662, + "training_step_time": 0.10632491111755371 + }, + { + "epoch": 3.000335693359375e-05, + "model_forward_time": 0.024850845336914062, + "step": 19663 + }, + { + "epoch": 3.000335693359375e-05, + "step": 19663, + "training_step_time": 0.1812114715576172 + }, + { + "epoch": 3.00048828125e-05, + "model_forward_time": 0.02472686767578125, + "step": 19664 + }, + { + "epoch": 3.00048828125e-05, + "step": 19664, + "training_step_time": 0.18145751953125 + }, + { + "epoch": 3.000640869140625e-05, + "model_forward_time": 0.025064706802368164, + "step": 19665 + }, + { + "epoch": 3.000640869140625e-05, + "step": 19665, + "training_step_time": 0.13115763664245605 + }, + { + "epoch": 3.00079345703125e-05, + "model_forward_time": 0.024035215377807617, + "step": 19666 + }, + { + "epoch": 3.00079345703125e-05, + "step": 19666, + "training_step_time": 0.10888504981994629 + }, + { + "epoch": 3.000946044921875e-05, + "model_forward_time": 0.025581836700439453, + "step": 19667 + }, + { + "epoch": 3.000946044921875e-05, + "step": 19667, + "training_step_time": 0.18021655082702637 + }, + { + "epoch": 3.0010986328125e-05, + "model_forward_time": 0.025094270706176758, + "step": 19668 + }, + { + "epoch": 3.0010986328125e-05, + "step": 19668, + "training_step_time": 0.13316726684570312 + }, + { + "epoch": 3.001251220703125e-05, + "model_forward_time": 0.024628400802612305, + "step": 19669 + }, + { + "epoch": 3.001251220703125e-05, + "step": 19669, + "training_step_time": 0.11265873908996582 + }, + { + "epoch": 3.00140380859375e-05, + "grad_norm": 0.19238826632499695, + "learning_rate": 2.9060756707996796e-05, + "loss": 0.007, + "step": 19670 + }, + { + "epoch": 3.00140380859375e-05, + "model_forward_time": 0.02514052391052246, + "step": 19670 + }, + { + "epoch": 3.00140380859375e-05, + "step": 19670, + "training_step_time": 0.11207342147827148 + }, + { + "epoch": 3.001556396484375e-05, + "model_forward_time": 0.0253603458404541, + "step": 19671 + }, + { + "epoch": 3.001556396484375e-05, + "step": 19671, + "training_step_time": 0.11382198333740234 + }, + { + "epoch": 3.001708984375e-05, + "model_forward_time": 0.025166034698486328, + "step": 19672 + }, + { + "epoch": 3.001708984375e-05, + "step": 19672, + "training_step_time": 0.11002564430236816 + }, + { + "epoch": 3.001861572265625e-05, + "model_forward_time": 0.025319337844848633, + "step": 19673 + }, + { + "epoch": 3.001861572265625e-05, + "step": 19673, + "training_step_time": 0.11128664016723633 + }, + { + "epoch": 3.00201416015625e-05, + "model_forward_time": 0.025250673294067383, + "step": 19674 + }, + { + "epoch": 3.00201416015625e-05, + "step": 19674, + "training_step_time": 0.10958981513977051 + }, + { + "epoch": 3.002166748046875e-05, + "model_forward_time": 0.026613473892211914, + "step": 19675 + }, + { + "epoch": 3.002166748046875e-05, + "step": 19675, + "training_step_time": 0.11628365516662598 + }, + { + "epoch": 3.0023193359375e-05, + "model_forward_time": 0.024479389190673828, + "step": 19676 + }, + { + "epoch": 3.0023193359375e-05, + "step": 19676, + "training_step_time": 0.13940095901489258 + }, + { + "epoch": 3.002471923828125e-05, + "model_forward_time": 0.024273157119750977, + "step": 19677 + }, + { + "epoch": 3.002471923828125e-05, + "step": 19677, + "training_step_time": 0.15883731842041016 + }, + { + "epoch": 3.00262451171875e-05, + "model_forward_time": 0.02582263946533203, + "step": 19678 + }, + { + "epoch": 3.00262451171875e-05, + "step": 19678, + "training_step_time": 0.11578488349914551 + }, + { + "epoch": 3.002777099609375e-05, + "model_forward_time": 0.024660348892211914, + "step": 19679 + }, + { + "epoch": 3.002777099609375e-05, + "step": 19679, + "training_step_time": 0.13032269477844238 + }, + { + "epoch": 3.0029296875e-05, + "grad_norm": 0.17850922048091888, + "learning_rate": 2.9010719708694722e-05, + "loss": 0.0061, + "step": 19680 + }, + { + "epoch": 3.0029296875e-05, + "model_forward_time": 0.025351285934448242, + "step": 19680 + }, + { + "epoch": 3.0029296875e-05, + "step": 19680, + "training_step_time": 0.19426560401916504 + }, + { + "epoch": 3.003082275390625e-05, + "model_forward_time": 0.02360057830810547, + "step": 19681 + }, + { + "epoch": 3.003082275390625e-05, + "step": 19681, + "training_step_time": 0.10700106620788574 + }, + { + "epoch": 3.00323486328125e-05, + "model_forward_time": 0.025031328201293945, + "step": 19682 + }, + { + "epoch": 3.00323486328125e-05, + "step": 19682, + "training_step_time": 0.15339183807373047 + }, + { + "epoch": 3.003387451171875e-05, + "model_forward_time": 0.026000261306762695, + "step": 19683 + }, + { + "epoch": 3.003387451171875e-05, + "step": 19683, + "training_step_time": 0.11007094383239746 + }, + { + "epoch": 3.0035400390625e-05, + "model_forward_time": 0.024726152420043945, + "step": 19684 + }, + { + "epoch": 3.0035400390625e-05, + "step": 19684, + "training_step_time": 0.10822200775146484 + }, + { + "epoch": 3.003692626953125e-05, + "model_forward_time": 0.024831295013427734, + "step": 19685 + }, + { + "epoch": 3.003692626953125e-05, + "step": 19685, + "training_step_time": 0.11777949333190918 + }, + { + "epoch": 3.00384521484375e-05, + "model_forward_time": 0.025411128997802734, + "step": 19686 + }, + { + "epoch": 3.00384521484375e-05, + "step": 19686, + "training_step_time": 0.12631964683532715 + }, + { + "epoch": 3.003997802734375e-05, + "model_forward_time": 0.025126218795776367, + "step": 19687 + }, + { + "epoch": 3.003997802734375e-05, + "step": 19687, + "training_step_time": 0.1203007698059082 + }, + { + "epoch": 3.004150390625e-05, + "model_forward_time": 0.024927854537963867, + "step": 19688 + }, + { + "epoch": 3.004150390625e-05, + "step": 19688, + "training_step_time": 0.157027006149292 + }, + { + "epoch": 3.004302978515625e-05, + "model_forward_time": 0.024726390838623047, + "step": 19689 + }, + { + "epoch": 3.004302978515625e-05, + "step": 19689, + "training_step_time": 0.15761280059814453 + }, + { + "epoch": 3.00445556640625e-05, + "grad_norm": 0.11738183349370956, + "learning_rate": 2.8960708213347366e-05, + "loss": 0.0124, + "step": 19690 + }, + { + "epoch": 3.00445556640625e-05, + "model_forward_time": 0.024455785751342773, + "step": 19690 + }, + { + "epoch": 3.00445556640625e-05, + "step": 19690, + "training_step_time": 0.10438394546508789 + }, + { + "epoch": 3.004608154296875e-05, + "model_forward_time": 0.02504253387451172, + "step": 19691 + }, + { + "epoch": 3.004608154296875e-05, + "step": 19691, + "training_step_time": 0.10595107078552246 + }, + { + "epoch": 3.0047607421875e-05, + "model_forward_time": 0.025060653686523438, + "step": 19692 + }, + { + "epoch": 3.0047607421875e-05, + "step": 19692, + "training_step_time": 0.11174416542053223 + }, + { + "epoch": 3.004913330078125e-05, + "model_forward_time": 0.024715423583984375, + "step": 19693 + }, + { + "epoch": 3.004913330078125e-05, + "step": 19693, + "training_step_time": 0.1072533130645752 + }, + { + "epoch": 3.00506591796875e-05, + "model_forward_time": 0.025342702865600586, + "step": 19694 + }, + { + "epoch": 3.00506591796875e-05, + "step": 19694, + "training_step_time": 0.20294976234436035 + }, + { + "epoch": 3.005218505859375e-05, + "model_forward_time": 0.02500438690185547, + "step": 19695 + }, + { + "epoch": 3.005218505859375e-05, + "step": 19695, + "training_step_time": 0.10686826705932617 + }, + { + "epoch": 3.00537109375e-05, + "model_forward_time": 0.02388310432434082, + "step": 19696 + }, + { + "epoch": 3.00537109375e-05, + "step": 19696, + "training_step_time": 0.10616850852966309 + }, + { + "epoch": 3.005523681640625e-05, + "model_forward_time": 0.025279760360717773, + "step": 19697 + }, + { + "epoch": 3.005523681640625e-05, + "step": 19697, + "training_step_time": 0.10956525802612305 + }, + { + "epoch": 3.00567626953125e-05, + "model_forward_time": 0.025069713592529297, + "step": 19698 + }, + { + "epoch": 3.00567626953125e-05, + "step": 19698, + "training_step_time": 0.1048283576965332 + }, + { + "epoch": 3.005828857421875e-05, + "model_forward_time": 0.024960756301879883, + "step": 19699 + }, + { + "epoch": 3.005828857421875e-05, + "step": 19699, + "training_step_time": 0.10488224029541016 + }, + { + "epoch": 3.0059814453125e-05, + "grad_norm": 0.10064233839511871, + "learning_rate": 2.89107222827234e-05, + "loss": 0.0069, + "step": 19700 + }, + { + "epoch": 3.0059814453125e-05, + "model_forward_time": 0.025174617767333984, + "step": 19700 + }, + { + "epoch": 3.0059814453125e-05, + "step": 19700, + "training_step_time": 0.10704684257507324 + }, + { + "epoch": 3.006134033203125e-05, + "model_forward_time": 0.02544999122619629, + "step": 19701 + }, + { + "epoch": 3.006134033203125e-05, + "step": 19701, + "training_step_time": 0.1064295768737793 + }, + { + "epoch": 3.00628662109375e-05, + "model_forward_time": 0.025246381759643555, + "step": 19702 + }, + { + "epoch": 3.00628662109375e-05, + "step": 19702, + "training_step_time": 0.10637378692626953 + }, + { + "epoch": 3.006439208984375e-05, + "model_forward_time": 0.025086641311645508, + "step": 19703 + }, + { + "epoch": 3.006439208984375e-05, + "step": 19703, + "training_step_time": 0.10762429237365723 + }, + { + "epoch": 3.006591796875e-05, + "model_forward_time": 0.025159120559692383, + "step": 19704 + }, + { + "epoch": 3.006591796875e-05, + "step": 19704, + "training_step_time": 0.10619640350341797 + }, + { + "epoch": 3.006744384765625e-05, + "model_forward_time": 0.027978897094726562, + "step": 19705 + }, + { + "epoch": 3.006744384765625e-05, + "step": 19705, + "training_step_time": 0.10939788818359375 + }, + { + "epoch": 3.00689697265625e-05, + "model_forward_time": 0.025289297103881836, + "step": 19706 + }, + { + "epoch": 3.00689697265625e-05, + "step": 19706, + "training_step_time": 0.10675477981567383 + }, + { + "epoch": 3.007049560546875e-05, + "model_forward_time": 0.025128602981567383, + "step": 19707 + }, + { + "epoch": 3.007049560546875e-05, + "step": 19707, + "training_step_time": 0.10494041442871094 + }, + { + "epoch": 3.0072021484375e-05, + "model_forward_time": 0.0251772403717041, + "step": 19708 + }, + { + "epoch": 3.0072021484375e-05, + "step": 19708, + "training_step_time": 0.10567831993103027 + }, + { + "epoch": 3.007354736328125e-05, + "model_forward_time": 0.02515888214111328, + "step": 19709 + }, + { + "epoch": 3.007354736328125e-05, + "step": 19709, + "training_step_time": 0.13473892211914062 + }, + { + "epoch": 3.00750732421875e-05, + "grad_norm": 0.08206330239772797, + "learning_rate": 2.8860761977560436e-05, + "loss": 0.0101, + "step": 19710 + }, + { + "epoch": 3.00750732421875e-05, + "model_forward_time": 0.025106430053710938, + "step": 19710 + }, + { + "epoch": 3.00750732421875e-05, + "step": 19710, + "training_step_time": 0.10725617408752441 + }, + { + "epoch": 3.007659912109375e-05, + "model_forward_time": 0.02534770965576172, + "step": 19711 + }, + { + "epoch": 3.007659912109375e-05, + "step": 19711, + "training_step_time": 0.13705062866210938 + }, + { + "epoch": 3.0078125e-05, + "model_forward_time": 0.025715351104736328, + "step": 19712 + }, + { + "epoch": 3.0078125e-05, + "step": 19712, + "training_step_time": 0.1617591381072998 + }, + { + "epoch": 3.007965087890625e-05, + "model_forward_time": 0.02455592155456543, + "step": 19713 + }, + { + "epoch": 3.007965087890625e-05, + "step": 19713, + "training_step_time": 0.21434688568115234 + }, + { + "epoch": 3.00811767578125e-05, + "model_forward_time": 0.024642229080200195, + "step": 19714 + }, + { + "epoch": 3.00811767578125e-05, + "step": 19714, + "training_step_time": 0.11678814888000488 + }, + { + "epoch": 3.008270263671875e-05, + "model_forward_time": 0.024316787719726562, + "step": 19715 + }, + { + "epoch": 3.008270263671875e-05, + "step": 19715, + "training_step_time": 0.1009368896484375 + }, + { + "epoch": 3.0084228515625e-05, + "model_forward_time": 0.025431394577026367, + "step": 19716 + }, + { + "epoch": 3.0084228515625e-05, + "step": 19716, + "training_step_time": 0.10310125350952148 + }, + { + "epoch": 3.008575439453125e-05, + "model_forward_time": 0.02516651153564453, + "step": 19717 + }, + { + "epoch": 3.008575439453125e-05, + "step": 19717, + "training_step_time": 0.10382294654846191 + }, + { + "epoch": 3.00872802734375e-05, + "model_forward_time": 0.025153636932373047, + "step": 19718 + }, + { + "epoch": 3.00872802734375e-05, + "step": 19718, + "training_step_time": 0.10476183891296387 + }, + { + "epoch": 3.008880615234375e-05, + "model_forward_time": 0.025460481643676758, + "step": 19719 + }, + { + "epoch": 3.008880615234375e-05, + "step": 19719, + "training_step_time": 0.1080467700958252 + }, + { + "epoch": 3.009033203125e-05, + "grad_norm": 0.3016381859779358, + "learning_rate": 2.881082735856499e-05, + "loss": 0.0107, + "step": 19720 + }, + { + "epoch": 3.009033203125e-05, + "model_forward_time": 0.025172948837280273, + "step": 19720 + }, + { + "epoch": 3.009033203125e-05, + "step": 19720, + "training_step_time": 0.11602568626403809 + }, + { + "epoch": 3.009185791015625e-05, + "model_forward_time": 0.025673627853393555, + "step": 19721 + }, + { + "epoch": 3.009185791015625e-05, + "step": 19721, + "training_step_time": 0.1671898365020752 + }, + { + "epoch": 3.00933837890625e-05, + "model_forward_time": 0.02429342269897461, + "step": 19722 + }, + { + "epoch": 3.00933837890625e-05, + "step": 19722, + "training_step_time": 0.26038360595703125 + }, + { + "epoch": 3.009490966796875e-05, + "model_forward_time": 0.02397298812866211, + "step": 19723 + }, + { + "epoch": 3.009490966796875e-05, + "step": 19723, + "training_step_time": 0.2115795612335205 + }, + { + "epoch": 3.0096435546875e-05, + "model_forward_time": 0.024787187576293945, + "step": 19724 + }, + { + "epoch": 3.0096435546875e-05, + "step": 19724, + "training_step_time": 0.21196889877319336 + }, + { + "epoch": 3.009796142578125e-05, + "model_forward_time": 0.02483201026916504, + "step": 19725 + }, + { + "epoch": 3.009796142578125e-05, + "step": 19725, + "training_step_time": 0.20707225799560547 + }, + { + "epoch": 3.00994873046875e-05, + "model_forward_time": 0.025256872177124023, + "step": 19726 + }, + { + "epoch": 3.00994873046875e-05, + "step": 19726, + "training_step_time": 0.19124531745910645 + }, + { + "epoch": 3.010101318359375e-05, + "model_forward_time": 0.02504706382751465, + "step": 19727 + }, + { + "epoch": 3.010101318359375e-05, + "step": 19727, + "training_step_time": 0.10524868965148926 + }, + { + "epoch": 3.01025390625e-05, + "model_forward_time": 0.024422645568847656, + "step": 19728 + }, + { + "epoch": 3.01025390625e-05, + "step": 19728, + "training_step_time": 0.13104915618896484 + }, + { + "epoch": 3.010406494140625e-05, + "model_forward_time": 0.025175094604492188, + "step": 19729 + }, + { + "epoch": 3.010406494140625e-05, + "step": 19729, + "training_step_time": 0.13187837600708008 + }, + { + "epoch": 3.01055908203125e-05, + "grad_norm": 0.3020986020565033, + "learning_rate": 2.8760918486412292e-05, + "loss": 0.0082, + "step": 19730 + }, + { + "epoch": 3.01055908203125e-05, + "model_forward_time": 0.02501058578491211, + "step": 19730 + }, + { + "epoch": 3.01055908203125e-05, + "step": 19730, + "training_step_time": 0.11344385147094727 + }, + { + "epoch": 3.010711669921875e-05, + "model_forward_time": 0.02485489845275879, + "step": 19731 + }, + { + "epoch": 3.010711669921875e-05, + "step": 19731, + "training_step_time": 0.1938316822052002 + }, + { + "epoch": 3.0108642578125e-05, + "model_forward_time": 0.023894071578979492, + "step": 19732 + }, + { + "epoch": 3.0108642578125e-05, + "step": 19732, + "training_step_time": 0.14981698989868164 + }, + { + "epoch": 3.011016845703125e-05, + "model_forward_time": 0.02461981773376465, + "step": 19733 + }, + { + "epoch": 3.011016845703125e-05, + "step": 19733, + "training_step_time": 0.12836933135986328 + }, + { + "epoch": 3.01116943359375e-05, + "model_forward_time": 0.022846460342407227, + "step": 19734 + }, + { + "epoch": 3.01116943359375e-05, + "step": 19734, + "training_step_time": 0.19185757637023926 + }, + { + "epoch": 3.011322021484375e-05, + "model_forward_time": 0.02409219741821289, + "step": 19735 + }, + { + "epoch": 3.011322021484375e-05, + "step": 19735, + "training_step_time": 0.11886119842529297 + }, + { + "epoch": 3.011474609375e-05, + "model_forward_time": 0.022912025451660156, + "step": 19736 + }, + { + "epoch": 3.011474609375e-05, + "step": 19736, + "training_step_time": 0.19974970817565918 + }, + { + "epoch": 3.011627197265625e-05, + "model_forward_time": 0.024472475051879883, + "step": 19737 + }, + { + "epoch": 3.011627197265625e-05, + "step": 19737, + "training_step_time": 0.11124706268310547 + }, + { + "epoch": 3.01177978515625e-05, + "model_forward_time": 0.024484872817993164, + "step": 19738 + }, + { + "epoch": 3.01177978515625e-05, + "step": 19738, + "training_step_time": 0.10616135597229004 + }, + { + "epoch": 3.011932373046875e-05, + "model_forward_time": 0.023968935012817383, + "step": 19739 + }, + { + "epoch": 3.011932373046875e-05, + "step": 19739, + "training_step_time": 0.10559701919555664 + }, + { + "epoch": 3.0120849609375e-05, + "grad_norm": 0.18054917454719543, + "learning_rate": 2.8711035421746367e-05, + "loss": 0.008, + "step": 19740 + }, + { + "epoch": 3.0120849609375e-05, + "model_forward_time": 0.025109529495239258, + "step": 19740 + }, + { + "epoch": 3.0120849609375e-05, + "step": 19740, + "training_step_time": 0.10874342918395996 + }, + { + "epoch": 3.012237548828125e-05, + "model_forward_time": 0.02528238296508789, + "step": 19741 + }, + { + "epoch": 3.012237548828125e-05, + "step": 19741, + "training_step_time": 0.10560011863708496 + }, + { + "epoch": 3.01239013671875e-05, + "model_forward_time": 0.025239229202270508, + "step": 19742 + }, + { + "epoch": 3.01239013671875e-05, + "step": 19742, + "training_step_time": 0.10644865036010742 + }, + { + "epoch": 3.012542724609375e-05, + "model_forward_time": 0.025270938873291016, + "step": 19743 + }, + { + "epoch": 3.012542724609375e-05, + "step": 19743, + "training_step_time": 0.10535812377929688 + }, + { + "epoch": 3.0126953125e-05, + "model_forward_time": 0.02537393569946289, + "step": 19744 + }, + { + "epoch": 3.0126953125e-05, + "step": 19744, + "training_step_time": 0.1062171459197998 + }, + { + "epoch": 3.012847900390625e-05, + "model_forward_time": 0.02492380142211914, + "step": 19745 + }, + { + "epoch": 3.012847900390625e-05, + "step": 19745, + "training_step_time": 0.10589098930358887 + }, + { + "epoch": 3.01300048828125e-05, + "model_forward_time": 0.02548384666442871, + "step": 19746 + }, + { + "epoch": 3.01300048828125e-05, + "step": 19746, + "training_step_time": 0.10793066024780273 + }, + { + "epoch": 3.013153076171875e-05, + "model_forward_time": 0.02533864974975586, + "step": 19747 + }, + { + "epoch": 3.013153076171875e-05, + "step": 19747, + "training_step_time": 0.10680818557739258 + }, + { + "epoch": 3.0133056640625e-05, + "model_forward_time": 0.02514958381652832, + "step": 19748 + }, + { + "epoch": 3.0133056640625e-05, + "step": 19748, + "training_step_time": 0.10398435592651367 + }, + { + "epoch": 3.013458251953125e-05, + "model_forward_time": 0.025442838668823242, + "step": 19749 + }, + { + "epoch": 3.013458251953125e-05, + "step": 19749, + "training_step_time": 0.10552382469177246 + }, + { + "epoch": 3.01361083984375e-05, + "grad_norm": 0.2245168834924698, + "learning_rate": 2.866117822517982e-05, + "loss": 0.0055, + "step": 19750 + }, + { + "epoch": 3.01361083984375e-05, + "model_forward_time": 0.024711132049560547, + "step": 19750 + }, + { + "epoch": 3.01361083984375e-05, + "step": 19750, + "training_step_time": 0.18805241584777832 + }, + { + "epoch": 3.013763427734375e-05, + "model_forward_time": 0.024533748626708984, + "step": 19751 + }, + { + "epoch": 3.013763427734375e-05, + "step": 19751, + "training_step_time": 0.11631083488464355 + }, + { + "epoch": 3.013916015625e-05, + "model_forward_time": 0.02417159080505371, + "step": 19752 + }, + { + "epoch": 3.013916015625e-05, + "step": 19752, + "training_step_time": 0.132887601852417 + }, + { + "epoch": 3.014068603515625e-05, + "model_forward_time": 0.02510380744934082, + "step": 19753 + }, + { + "epoch": 3.014068603515625e-05, + "step": 19753, + "training_step_time": 0.1390397548675537 + }, + { + "epoch": 3.01422119140625e-05, + "model_forward_time": 0.024521827697753906, + "step": 19754 + }, + { + "epoch": 3.01422119140625e-05, + "step": 19754, + "training_step_time": 0.11911988258361816 + }, + { + "epoch": 3.014373779296875e-05, + "model_forward_time": 0.024769306182861328, + "step": 19755 + }, + { + "epoch": 3.014373779296875e-05, + "step": 19755, + "training_step_time": 0.12621855735778809 + }, + { + "epoch": 3.0145263671875e-05, + "model_forward_time": 0.025578975677490234, + "step": 19756 + }, + { + "epoch": 3.0145263671875e-05, + "step": 19756, + "training_step_time": 0.10920333862304688 + }, + { + "epoch": 3.014678955078125e-05, + "model_forward_time": 0.02537822723388672, + "step": 19757 + }, + { + "epoch": 3.014678955078125e-05, + "step": 19757, + "training_step_time": 0.10340428352355957 + }, + { + "epoch": 3.01483154296875e-05, + "model_forward_time": 0.02498316764831543, + "step": 19758 + }, + { + "epoch": 3.01483154296875e-05, + "step": 19758, + "training_step_time": 0.10287833213806152 + }, + { + "epoch": 3.014984130859375e-05, + "model_forward_time": 0.02574920654296875, + "step": 19759 + }, + { + "epoch": 3.014984130859375e-05, + "step": 19759, + "training_step_time": 0.11110448837280273 + }, + { + "epoch": 3.01513671875e-05, + "grad_norm": 0.29145142436027527, + "learning_rate": 2.861134695729385e-05, + "loss": 0.0146, + "step": 19760 + }, + { + "epoch": 3.01513671875e-05, + "model_forward_time": 0.025334596633911133, + "step": 19760 + }, + { + "epoch": 3.01513671875e-05, + "step": 19760, + "training_step_time": 0.1078188419342041 + }, + { + "epoch": 3.015289306640625e-05, + "model_forward_time": 0.025133609771728516, + "step": 19761 + }, + { + "epoch": 3.015289306640625e-05, + "step": 19761, + "training_step_time": 0.14405155181884766 + }, + { + "epoch": 3.01544189453125e-05, + "model_forward_time": 0.025588035583496094, + "step": 19762 + }, + { + "epoch": 3.01544189453125e-05, + "step": 19762, + "training_step_time": 0.11105871200561523 + }, + { + "epoch": 3.015594482421875e-05, + "model_forward_time": 0.0244295597076416, + "step": 19763 + }, + { + "epoch": 3.015594482421875e-05, + "step": 19763, + "training_step_time": 0.15865612030029297 + }, + { + "epoch": 3.0157470703125e-05, + "model_forward_time": 0.024277210235595703, + "step": 19764 + }, + { + "epoch": 3.0157470703125e-05, + "step": 19764, + "training_step_time": 0.15872550010681152 + }, + { + "epoch": 3.015899658203125e-05, + "model_forward_time": 0.02430272102355957, + "step": 19765 + }, + { + "epoch": 3.015899658203125e-05, + "step": 19765, + "training_step_time": 0.12417864799499512 + }, + { + "epoch": 3.01605224609375e-05, + "model_forward_time": 0.024353742599487305, + "step": 19766 + }, + { + "epoch": 3.01605224609375e-05, + "step": 19766, + "training_step_time": 0.11871552467346191 + }, + { + "epoch": 3.016204833984375e-05, + "model_forward_time": 0.024990081787109375, + "step": 19767 + }, + { + "epoch": 3.016204833984375e-05, + "step": 19767, + "training_step_time": 0.19194269180297852 + }, + { + "epoch": 3.016357421875e-05, + "model_forward_time": 0.024880647659301758, + "step": 19768 + }, + { + "epoch": 3.016357421875e-05, + "step": 19768, + "training_step_time": 0.11201882362365723 + }, + { + "epoch": 3.016510009765625e-05, + "model_forward_time": 0.024860858917236328, + "step": 19769 + }, + { + "epoch": 3.016510009765625e-05, + "step": 19769, + "training_step_time": 0.15588951110839844 + }, + { + "epoch": 3.01666259765625e-05, + "grad_norm": 0.25361666083335876, + "learning_rate": 2.8561541678638142e-05, + "loss": 0.0076, + "step": 19770 + }, + { + "epoch": 3.01666259765625e-05, + "model_forward_time": 0.024788618087768555, + "step": 19770 + }, + { + "epoch": 3.01666259765625e-05, + "step": 19770, + "training_step_time": 0.14987468719482422 + }, + { + "epoch": 3.016815185546875e-05, + "model_forward_time": 0.024391651153564453, + "step": 19771 + }, + { + "epoch": 3.016815185546875e-05, + "step": 19771, + "training_step_time": 0.14062976837158203 + }, + { + "epoch": 3.0169677734375e-05, + "model_forward_time": 0.02464890480041504, + "step": 19772 + }, + { + "epoch": 3.0169677734375e-05, + "step": 19772, + "training_step_time": 0.10789680480957031 + }, + { + "epoch": 3.017120361328125e-05, + "model_forward_time": 0.02524733543395996, + "step": 19773 + }, + { + "epoch": 3.017120361328125e-05, + "step": 19773, + "training_step_time": 0.18741989135742188 + }, + { + "epoch": 3.01727294921875e-05, + "model_forward_time": 0.02436065673828125, + "step": 19774 + }, + { + "epoch": 3.01727294921875e-05, + "step": 19774, + "training_step_time": 0.1067206859588623 + }, + { + "epoch": 3.017425537109375e-05, + "model_forward_time": 0.024643898010253906, + "step": 19775 + }, + { + "epoch": 3.017425537109375e-05, + "step": 19775, + "training_step_time": 0.11038470268249512 + }, + { + "epoch": 3.017578125e-05, + "model_forward_time": 0.025953054428100586, + "step": 19776 + }, + { + "epoch": 3.017578125e-05, + "step": 19776, + "training_step_time": 0.13510823249816895 + }, + { + "epoch": 3.017730712890625e-05, + "model_forward_time": 0.025248050689697266, + "step": 19777 + }, + { + "epoch": 3.017730712890625e-05, + "step": 19777, + "training_step_time": 0.10892057418823242 + }, + { + "epoch": 3.01788330078125e-05, + "model_forward_time": 0.025916099548339844, + "step": 19778 + }, + { + "epoch": 3.01788330078125e-05, + "step": 19778, + "training_step_time": 0.1108860969543457 + }, + { + "epoch": 3.018035888671875e-05, + "model_forward_time": 0.025270938873291016, + "step": 19779 + }, + { + "epoch": 3.018035888671875e-05, + "step": 19779, + "training_step_time": 0.11475658416748047 + }, + { + "epoch": 3.0181884765625e-05, + "grad_norm": 0.3906853497028351, + "learning_rate": 2.8511762449730795e-05, + "loss": 0.0115, + "step": 19780 + }, + { + "epoch": 3.0181884765625e-05, + "model_forward_time": 0.025171518325805664, + "step": 19780 + }, + { + "epoch": 3.0181884765625e-05, + "step": 19780, + "training_step_time": 0.10463690757751465 + }, + { + "epoch": 3.018341064453125e-05, + "model_forward_time": 0.025354862213134766, + "step": 19781 + }, + { + "epoch": 3.018341064453125e-05, + "step": 19781, + "training_step_time": 0.1950528621673584 + }, + { + "epoch": 3.01849365234375e-05, + "model_forward_time": 0.024790287017822266, + "step": 19782 + }, + { + "epoch": 3.01849365234375e-05, + "step": 19782, + "training_step_time": 0.10386395454406738 + }, + { + "epoch": 3.018646240234375e-05, + "model_forward_time": 0.024918794631958008, + "step": 19783 + }, + { + "epoch": 3.018646240234375e-05, + "step": 19783, + "training_step_time": 0.1031959056854248 + }, + { + "epoch": 3.018798828125e-05, + "model_forward_time": 0.0251462459564209, + "step": 19784 + }, + { + "epoch": 3.018798828125e-05, + "step": 19784, + "training_step_time": 0.10685253143310547 + }, + { + "epoch": 3.018951416015625e-05, + "model_forward_time": 0.0254213809967041, + "step": 19785 + }, + { + "epoch": 3.018951416015625e-05, + "step": 19785, + "training_step_time": 0.10639286041259766 + }, + { + "epoch": 3.01910400390625e-05, + "model_forward_time": 0.024895429611206055, + "step": 19786 + }, + { + "epoch": 3.01910400390625e-05, + "step": 19786, + "training_step_time": 0.10412025451660156 + }, + { + "epoch": 3.019256591796875e-05, + "model_forward_time": 0.025119304656982422, + "step": 19787 + }, + { + "epoch": 3.019256591796875e-05, + "step": 19787, + "training_step_time": 0.10592508316040039 + }, + { + "epoch": 3.0194091796875e-05, + "model_forward_time": 0.025200366973876953, + "step": 19788 + }, + { + "epoch": 3.0194091796875e-05, + "step": 19788, + "training_step_time": 0.10486984252929688 + }, + { + "epoch": 3.019561767578125e-05, + "model_forward_time": 0.025241613388061523, + "step": 19789 + }, + { + "epoch": 3.019561767578125e-05, + "step": 19789, + "training_step_time": 0.11138772964477539 + }, + { + "epoch": 3.01971435546875e-05, + "grad_norm": 0.23596696555614471, + "learning_rate": 2.846200933105829e-05, + "loss": 0.0141, + "step": 19790 + }, + { + "epoch": 3.01971435546875e-05, + "model_forward_time": 0.02523493766784668, + "step": 19790 + }, + { + "epoch": 3.01971435546875e-05, + "step": 19790, + "training_step_time": 0.10589385032653809 + }, + { + "epoch": 3.019866943359375e-05, + "model_forward_time": 0.0256655216217041, + "step": 19791 + }, + { + "epoch": 3.019866943359375e-05, + "step": 19791, + "training_step_time": 0.10993361473083496 + }, + { + "epoch": 3.02001953125e-05, + "model_forward_time": 0.024235963821411133, + "step": 19792 + }, + { + "epoch": 3.02001953125e-05, + "step": 19792, + "training_step_time": 0.12194037437438965 + }, + { + "epoch": 3.020172119140625e-05, + "model_forward_time": 0.025089740753173828, + "step": 19793 + }, + { + "epoch": 3.020172119140625e-05, + "step": 19793, + "training_step_time": 0.1191873550415039 + }, + { + "epoch": 3.02032470703125e-05, + "model_forward_time": 0.025371789932250977, + "step": 19794 + }, + { + "epoch": 3.02032470703125e-05, + "step": 19794, + "training_step_time": 0.11788129806518555 + }, + { + "epoch": 3.020477294921875e-05, + "model_forward_time": 0.025250911712646484, + "step": 19795 + }, + { + "epoch": 3.020477294921875e-05, + "step": 19795, + "training_step_time": 0.14443731307983398 + }, + { + "epoch": 3.0206298828125e-05, + "model_forward_time": 0.025148630142211914, + "step": 19796 + }, + { + "epoch": 3.0206298828125e-05, + "step": 19796, + "training_step_time": 0.12986183166503906 + }, + { + "epoch": 3.020782470703125e-05, + "model_forward_time": 0.02463364601135254, + "step": 19797 + }, + { + "epoch": 3.020782470703125e-05, + "step": 19797, + "training_step_time": 0.11286616325378418 + }, + { + "epoch": 3.02093505859375e-05, + "model_forward_time": 0.025294065475463867, + "step": 19798 + }, + { + "epoch": 3.02093505859375e-05, + "step": 19798, + "training_step_time": 0.11988234519958496 + }, + { + "epoch": 3.021087646484375e-05, + "model_forward_time": 0.025386810302734375, + "step": 19799 + }, + { + "epoch": 3.021087646484375e-05, + "step": 19799, + "training_step_time": 0.11135029792785645 + }, + { + "epoch": 3.021240234375e-05, + "grad_norm": 0.16461493074893951, + "learning_rate": 2.8412282383075363e-05, + "loss": 0.0088, + "step": 19800 + }, + { + "epoch": 3.021240234375e-05, + "model_forward_time": 0.024890422821044922, + "step": 19800 + }, + { + "epoch": 3.021240234375e-05, + "step": 19800, + "training_step_time": 0.11123418807983398 + }, + { + "epoch": 3.021392822265625e-05, + "model_forward_time": 0.0250089168548584, + "step": 19801 + }, + { + "epoch": 3.021392822265625e-05, + "step": 19801, + "training_step_time": 0.11397910118103027 + }, + { + "epoch": 3.02154541015625e-05, + "model_forward_time": 0.02497100830078125, + "step": 19802 + }, + { + "epoch": 3.02154541015625e-05, + "step": 19802, + "training_step_time": 0.11146736145019531 + }, + { + "epoch": 3.021697998046875e-05, + "model_forward_time": 0.02507925033569336, + "step": 19803 + }, + { + "epoch": 3.021697998046875e-05, + "step": 19803, + "training_step_time": 0.10859322547912598 + }, + { + "epoch": 3.0218505859375e-05, + "model_forward_time": 0.024953603744506836, + "step": 19804 + }, + { + "epoch": 3.0218505859375e-05, + "step": 19804, + "training_step_time": 0.10517644882202148 + }, + { + "epoch": 3.022003173828125e-05, + "model_forward_time": 0.02515411376953125, + "step": 19805 + }, + { + "epoch": 3.022003173828125e-05, + "step": 19805, + "training_step_time": 0.11144542694091797 + }, + { + "epoch": 3.02215576171875e-05, + "model_forward_time": 0.02464437484741211, + "step": 19806 + }, + { + "epoch": 3.02215576171875e-05, + "step": 19806, + "training_step_time": 0.10696268081665039 + }, + { + "epoch": 3.022308349609375e-05, + "model_forward_time": 0.024903297424316406, + "step": 19807 + }, + { + "epoch": 3.022308349609375e-05, + "step": 19807, + "training_step_time": 0.10630345344543457 + }, + { + "epoch": 3.0224609375e-05, + "model_forward_time": 0.02466726303100586, + "step": 19808 + }, + { + "epoch": 3.0224609375e-05, + "step": 19808, + "training_step_time": 0.10550141334533691 + }, + { + "epoch": 3.022613525390625e-05, + "model_forward_time": 0.02500176429748535, + "step": 19809 + }, + { + "epoch": 3.022613525390625e-05, + "step": 19809, + "training_step_time": 0.10499358177185059 + }, + { + "epoch": 3.02276611328125e-05, + "grad_norm": 0.19356225430965424, + "learning_rate": 2.8362581666204918e-05, + "loss": 0.0079, + "step": 19810 + }, + { + "epoch": 3.02276611328125e-05, + "model_forward_time": 0.026335716247558594, + "step": 19810 + }, + { + "epoch": 3.02276611328125e-05, + "step": 19810, + "training_step_time": 0.10510730743408203 + }, + { + "epoch": 3.022918701171875e-05, + "model_forward_time": 0.024923324584960938, + "step": 19811 + }, + { + "epoch": 3.022918701171875e-05, + "step": 19811, + "training_step_time": 0.1270143985748291 + }, + { + "epoch": 3.0230712890625e-05, + "model_forward_time": 0.024589061737060547, + "step": 19812 + }, + { + "epoch": 3.0230712890625e-05, + "step": 19812, + "training_step_time": 0.11534667015075684 + }, + { + "epoch": 3.023223876953125e-05, + "model_forward_time": 0.025110960006713867, + "step": 19813 + }, + { + "epoch": 3.023223876953125e-05, + "step": 19813, + "training_step_time": 0.2022702693939209 + }, + { + "epoch": 3.02337646484375e-05, + "model_forward_time": 0.024135351181030273, + "step": 19814 + }, + { + "epoch": 3.02337646484375e-05, + "step": 19814, + "training_step_time": 0.13481831550598145 + }, + { + "epoch": 3.023529052734375e-05, + "model_forward_time": 0.024395465850830078, + "step": 19815 + }, + { + "epoch": 3.023529052734375e-05, + "step": 19815, + "training_step_time": 0.2055823802947998 + }, + { + "epoch": 3.023681640625e-05, + "model_forward_time": 0.024216651916503906, + "step": 19816 + }, + { + "epoch": 3.023681640625e-05, + "step": 19816, + "training_step_time": 0.12240171432495117 + }, + { + "epoch": 3.023834228515625e-05, + "model_forward_time": 0.024003267288208008, + "step": 19817 + }, + { + "epoch": 3.023834228515625e-05, + "step": 19817, + "training_step_time": 0.10724472999572754 + }, + { + "epoch": 3.02398681640625e-05, + "model_forward_time": 0.025269269943237305, + "step": 19818 + }, + { + "epoch": 3.02398681640625e-05, + "step": 19818, + "training_step_time": 0.10634040832519531 + }, + { + "epoch": 3.024139404296875e-05, + "model_forward_time": 0.025299787521362305, + "step": 19819 + }, + { + "epoch": 3.024139404296875e-05, + "step": 19819, + "training_step_time": 0.152357816696167 + }, + { + "epoch": 3.0242919921875e-05, + "grad_norm": 0.12694837152957916, + "learning_rate": 2.8312907240838027e-05, + "loss": 0.0085, + "step": 19820 + }, + { + "epoch": 3.0242919921875e-05, + "model_forward_time": 0.02709197998046875, + "step": 19820 + }, + { + "epoch": 3.0242919921875e-05, + "step": 19820, + "training_step_time": 0.12467336654663086 + }, + { + "epoch": 3.024444580078125e-05, + "model_forward_time": 0.024280548095703125, + "step": 19821 + }, + { + "epoch": 3.024444580078125e-05, + "step": 19821, + "training_step_time": 0.11790108680725098 + }, + { + "epoch": 3.02459716796875e-05, + "model_forward_time": 0.023859739303588867, + "step": 19822 + }, + { + "epoch": 3.02459716796875e-05, + "step": 19822, + "training_step_time": 0.1369180679321289 + }, + { + "epoch": 3.024749755859375e-05, + "model_forward_time": 0.02498769760131836, + "step": 19823 + }, + { + "epoch": 3.024749755859375e-05, + "step": 19823, + "training_step_time": 0.12589168548583984 + }, + { + "epoch": 3.02490234375e-05, + "model_forward_time": 0.023369312286376953, + "step": 19824 + }, + { + "epoch": 3.02490234375e-05, + "step": 19824, + "training_step_time": 0.20339035987854004 + }, + { + "epoch": 3.025054931640625e-05, + "model_forward_time": 0.02436661720275879, + "step": 19825 + }, + { + "epoch": 3.025054931640625e-05, + "step": 19825, + "training_step_time": 0.12917017936706543 + }, + { + "epoch": 3.02520751953125e-05, + "model_forward_time": 0.023841142654418945, + "step": 19826 + }, + { + "epoch": 3.02520751953125e-05, + "step": 19826, + "training_step_time": 0.1820850372314453 + }, + { + "epoch": 3.025360107421875e-05, + "model_forward_time": 0.024190425872802734, + "step": 19827 + }, + { + "epoch": 3.025360107421875e-05, + "step": 19827, + "training_step_time": 0.1123194694519043 + }, + { + "epoch": 3.0255126953125e-05, + "model_forward_time": 0.024533748626708984, + "step": 19828 + }, + { + "epoch": 3.0255126953125e-05, + "step": 19828, + "training_step_time": 0.10678887367248535 + }, + { + "epoch": 3.025665283203125e-05, + "model_forward_time": 0.02523946762084961, + "step": 19829 + }, + { + "epoch": 3.025665283203125e-05, + "step": 19829, + "training_step_time": 0.10825634002685547 + }, + { + "epoch": 3.02581787109375e-05, + "grad_norm": 0.1579444855451584, + "learning_rate": 2.8263259167333777e-05, + "loss": 0.0038, + "step": 19830 + }, + { + "epoch": 3.02581787109375e-05, + "model_forward_time": 0.024755001068115234, + "step": 19830 + }, + { + "epoch": 3.02581787109375e-05, + "step": 19830, + "training_step_time": 0.10955500602722168 + }, + { + "epoch": 3.025970458984375e-05, + "model_forward_time": 0.025429964065551758, + "step": 19831 + }, + { + "epoch": 3.025970458984375e-05, + "step": 19831, + "training_step_time": 0.10622549057006836 + }, + { + "epoch": 3.026123046875e-05, + "model_forward_time": 0.025336742401123047, + "step": 19832 + }, + { + "epoch": 3.026123046875e-05, + "step": 19832, + "training_step_time": 0.10773372650146484 + }, + { + "epoch": 3.026275634765625e-05, + "model_forward_time": 0.025203227996826172, + "step": 19833 + }, + { + "epoch": 3.026275634765625e-05, + "step": 19833, + "training_step_time": 0.10687088966369629 + }, + { + "epoch": 3.02642822265625e-05, + "model_forward_time": 0.025011301040649414, + "step": 19834 + }, + { + "epoch": 3.02642822265625e-05, + "step": 19834, + "training_step_time": 0.10758781433105469 + }, + { + "epoch": 3.026580810546875e-05, + "model_forward_time": 0.025038719177246094, + "step": 19835 + }, + { + "epoch": 3.026580810546875e-05, + "step": 19835, + "training_step_time": 0.10650014877319336 + }, + { + "epoch": 3.0267333984375e-05, + "model_forward_time": 0.025045394897460938, + "step": 19836 + }, + { + "epoch": 3.0267333984375e-05, + "step": 19836, + "training_step_time": 0.10807585716247559 + }, + { + "epoch": 3.026885986328125e-05, + "model_forward_time": 0.0250246524810791, + "step": 19837 + }, + { + "epoch": 3.026885986328125e-05, + "step": 19837, + "training_step_time": 0.10412359237670898 + }, + { + "epoch": 3.02703857421875e-05, + "model_forward_time": 0.0250241756439209, + "step": 19838 + }, + { + "epoch": 3.02703857421875e-05, + "step": 19838, + "training_step_time": 0.10488319396972656 + }, + { + "epoch": 3.027191162109375e-05, + "model_forward_time": 0.025052785873413086, + "step": 19839 + }, + { + "epoch": 3.027191162109375e-05, + "step": 19839, + "training_step_time": 0.10551786422729492 + }, + { + "epoch": 3.02734375e-05, + "grad_norm": 0.19443681836128235, + "learning_rate": 2.8213637506019304e-05, + "loss": 0.0059, + "step": 19840 + }, + { + "epoch": 3.02734375e-05, + "model_forward_time": 0.025023460388183594, + "step": 19840 + }, + { + "epoch": 3.02734375e-05, + "step": 19840, + "training_step_time": 0.10430026054382324 + }, + { + "epoch": 3.027496337890625e-05, + "model_forward_time": 0.025120258331298828, + "step": 19841 + }, + { + "epoch": 3.027496337890625e-05, + "step": 19841, + "training_step_time": 0.10541749000549316 + }, + { + "epoch": 3.02764892578125e-05, + "model_forward_time": 0.024962663650512695, + "step": 19842 + }, + { + "epoch": 3.02764892578125e-05, + "step": 19842, + "training_step_time": 0.1202249526977539 + }, + { + "epoch": 3.027801513671875e-05, + "model_forward_time": 0.02823781967163086, + "step": 19843 + }, + { + "epoch": 3.027801513671875e-05, + "step": 19843, + "training_step_time": 0.11669015884399414 + }, + { + "epoch": 3.0279541015625e-05, + "model_forward_time": 0.024492979049682617, + "step": 19844 + }, + { + "epoch": 3.0279541015625e-05, + "step": 19844, + "training_step_time": 0.13006806373596191 + }, + { + "epoch": 3.028106689453125e-05, + "model_forward_time": 0.024974584579467773, + "step": 19845 + }, + { + "epoch": 3.028106689453125e-05, + "step": 19845, + "training_step_time": 0.1073763370513916 + }, + { + "epoch": 3.02825927734375e-05, + "model_forward_time": 0.025258541107177734, + "step": 19846 + }, + { + "epoch": 3.02825927734375e-05, + "step": 19846, + "training_step_time": 0.18275856971740723 + }, + { + "epoch": 3.028411865234375e-05, + "model_forward_time": 0.02449321746826172, + "step": 19847 + }, + { + "epoch": 3.028411865234375e-05, + "step": 19847, + "training_step_time": 0.13634634017944336 + }, + { + "epoch": 3.028564453125e-05, + "model_forward_time": 0.02402663230895996, + "step": 19848 + }, + { + "epoch": 3.028564453125e-05, + "step": 19848, + "training_step_time": 0.1191554069519043 + }, + { + "epoch": 3.028717041015625e-05, + "model_forward_time": 0.02448868751525879, + "step": 19849 + }, + { + "epoch": 3.028717041015625e-05, + "step": 19849, + "training_step_time": 0.10529541969299316 + }, + { + "epoch": 3.02886962890625e-05, + "grad_norm": 0.631939709186554, + "learning_rate": 2.8164042317189575e-05, + "loss": 0.0165, + "step": 19850 + }, + { + "epoch": 3.02886962890625e-05, + "model_forward_time": 0.025736093521118164, + "step": 19850 + }, + { + "epoch": 3.02886962890625e-05, + "step": 19850, + "training_step_time": 0.10680341720581055 + }, + { + "epoch": 3.029022216796875e-05, + "model_forward_time": 0.025399208068847656, + "step": 19851 + }, + { + "epoch": 3.029022216796875e-05, + "step": 19851, + "training_step_time": 0.10507965087890625 + }, + { + "epoch": 3.0291748046875e-05, + "model_forward_time": 0.02488112449645996, + "step": 19852 + }, + { + "epoch": 3.0291748046875e-05, + "step": 19852, + "training_step_time": 0.10835671424865723 + }, + { + "epoch": 3.029327392578125e-05, + "model_forward_time": 0.024398088455200195, + "step": 19853 + }, + { + "epoch": 3.029327392578125e-05, + "step": 19853, + "training_step_time": 0.10718417167663574 + }, + { + "epoch": 3.02947998046875e-05, + "model_forward_time": 0.024988174438476562, + "step": 19854 + }, + { + "epoch": 3.02947998046875e-05, + "step": 19854, + "training_step_time": 0.10629868507385254 + }, + { + "epoch": 3.029632568359375e-05, + "model_forward_time": 0.02503824234008789, + "step": 19855 + }, + { + "epoch": 3.029632568359375e-05, + "step": 19855, + "training_step_time": 0.1041262149810791 + }, + { + "epoch": 3.02978515625e-05, + "model_forward_time": 0.025023937225341797, + "step": 19856 + }, + { + "epoch": 3.02978515625e-05, + "step": 19856, + "training_step_time": 0.10327363014221191 + }, + { + "epoch": 3.029937744140625e-05, + "model_forward_time": 0.02416539192199707, + "step": 19857 + }, + { + "epoch": 3.029937744140625e-05, + "step": 19857, + "training_step_time": 0.10901689529418945 + }, + { + "epoch": 3.03009033203125e-05, + "model_forward_time": 0.02634429931640625, + "step": 19858 + }, + { + "epoch": 3.03009033203125e-05, + "step": 19858, + "training_step_time": 0.1147758960723877 + }, + { + "epoch": 3.030242919921875e-05, + "model_forward_time": 0.025261402130126953, + "step": 19859 + }, + { + "epoch": 3.030242919921875e-05, + "step": 19859, + "training_step_time": 0.11034345626831055 + }, + { + "epoch": 3.0303955078125e-05, + "grad_norm": 0.3624440133571625, + "learning_rate": 2.811447366110741e-05, + "loss": 0.0159, + "step": 19860 + }, + { + "epoch": 3.0303955078125e-05, + "model_forward_time": 0.025297164916992188, + "step": 19860 + }, + { + "epoch": 3.0303955078125e-05, + "step": 19860, + "training_step_time": 0.21103334426879883 + }, + { + "epoch": 3.030548095703125e-05, + "model_forward_time": 0.024688005447387695, + "step": 19861 + }, + { + "epoch": 3.030548095703125e-05, + "step": 19861, + "training_step_time": 0.15964627265930176 + }, + { + "epoch": 3.03070068359375e-05, + "model_forward_time": 0.024794816970825195, + "step": 19862 + }, + { + "epoch": 3.03070068359375e-05, + "step": 19862, + "training_step_time": 0.18436074256896973 + }, + { + "epoch": 3.030853271484375e-05, + "model_forward_time": 0.024187564849853516, + "step": 19863 + }, + { + "epoch": 3.030853271484375e-05, + "step": 19863, + "training_step_time": 0.13009309768676758 + }, + { + "epoch": 3.031005859375e-05, + "model_forward_time": 0.02405405044555664, + "step": 19864 + }, + { + "epoch": 3.031005859375e-05, + "step": 19864, + "training_step_time": 0.11369442939758301 + }, + { + "epoch": 3.031158447265625e-05, + "model_forward_time": 0.024793148040771484, + "step": 19865 + }, + { + "epoch": 3.031158447265625e-05, + "step": 19865, + "training_step_time": 0.11489129066467285 + }, + { + "epoch": 3.03131103515625e-05, + "model_forward_time": 0.025031566619873047, + "step": 19866 + }, + { + "epoch": 3.03131103515625e-05, + "step": 19866, + "training_step_time": 0.10381817817687988 + }, + { + "epoch": 3.031463623046875e-05, + "model_forward_time": 0.025305986404418945, + "step": 19867 + }, + { + "epoch": 3.031463623046875e-05, + "step": 19867, + "training_step_time": 0.1776280403137207 + }, + { + "epoch": 3.0316162109375e-05, + "model_forward_time": 0.02449512481689453, + "step": 19868 + }, + { + "epoch": 3.0316162109375e-05, + "step": 19868, + "training_step_time": 0.17885732650756836 + }, + { + "epoch": 3.031768798828125e-05, + "model_forward_time": 0.02428150177001953, + "step": 19869 + }, + { + "epoch": 3.031768798828125e-05, + "step": 19869, + "training_step_time": 0.2342391014099121 + }, + { + "epoch": 3.03192138671875e-05, + "grad_norm": 0.1861395537853241, + "learning_rate": 2.8064931598003436e-05, + "loss": 0.0062, + "step": 19870 + }, + { + "epoch": 3.03192138671875e-05, + "model_forward_time": 0.024134159088134766, + "step": 19870 + }, + { + "epoch": 3.03192138671875e-05, + "step": 19870, + "training_step_time": 0.19604158401489258 + }, + { + "epoch": 3.032073974609375e-05, + "model_forward_time": 0.02410888671875, + "step": 19871 + }, + { + "epoch": 3.032073974609375e-05, + "step": 19871, + "training_step_time": 0.1863994598388672 + }, + { + "epoch": 3.0322265625e-05, + "model_forward_time": 0.02411341667175293, + "step": 19872 + }, + { + "epoch": 3.0322265625e-05, + "step": 19872, + "training_step_time": 0.1749098300933838 + }, + { + "epoch": 3.032379150390625e-05, + "model_forward_time": 0.024414539337158203, + "step": 19873 + }, + { + "epoch": 3.032379150390625e-05, + "step": 19873, + "training_step_time": 0.15276575088500977 + }, + { + "epoch": 3.03253173828125e-05, + "model_forward_time": 0.024059057235717773, + "step": 19874 + }, + { + "epoch": 3.03253173828125e-05, + "step": 19874, + "training_step_time": 0.13388323783874512 + }, + { + "epoch": 3.032684326171875e-05, + "model_forward_time": 0.024216175079345703, + "step": 19875 + }, + { + "epoch": 3.032684326171875e-05, + "step": 19875, + "training_step_time": 0.12927699089050293 + }, + { + "epoch": 3.0328369140625e-05, + "model_forward_time": 0.02425861358642578, + "step": 19876 + }, + { + "epoch": 3.0328369140625e-05, + "step": 19876, + "training_step_time": 0.1265885829925537 + }, + { + "epoch": 3.032989501953125e-05, + "model_forward_time": 0.024297714233398438, + "step": 19877 + }, + { + "epoch": 3.032989501953125e-05, + "step": 19877, + "training_step_time": 0.12507915496826172 + }, + { + "epoch": 3.03314208984375e-05, + "model_forward_time": 0.02460479736328125, + "step": 19878 + }, + { + "epoch": 3.03314208984375e-05, + "step": 19878, + "training_step_time": 0.11876940727233887 + }, + { + "epoch": 3.033294677734375e-05, + "model_forward_time": 0.024849891662597656, + "step": 19879 + }, + { + "epoch": 3.033294677734375e-05, + "step": 19879, + "training_step_time": 0.11263322830200195 + }, + { + "epoch": 3.033447265625e-05, + "grad_norm": 0.2712714672088623, + "learning_rate": 2.8015416188075893e-05, + "loss": 0.0168, + "step": 19880 + }, + { + "epoch": 3.033447265625e-05, + "model_forward_time": 0.02477431297302246, + "step": 19880 + }, + { + "epoch": 3.033447265625e-05, + "step": 19880, + "training_step_time": 0.11446380615234375 + }, + { + "epoch": 3.033599853515625e-05, + "model_forward_time": 0.025043725967407227, + "step": 19881 + }, + { + "epoch": 3.033599853515625e-05, + "step": 19881, + "training_step_time": 0.11242508888244629 + }, + { + "epoch": 3.03375244140625e-05, + "model_forward_time": 0.02487659454345703, + "step": 19882 + }, + { + "epoch": 3.03375244140625e-05, + "step": 19882, + "training_step_time": 0.11130809783935547 + }, + { + "epoch": 3.033905029296875e-05, + "model_forward_time": 0.02488875389099121, + "step": 19883 + }, + { + "epoch": 3.033905029296875e-05, + "step": 19883, + "training_step_time": 0.10930323600769043 + }, + { + "epoch": 3.0340576171875e-05, + "model_forward_time": 0.025063276290893555, + "step": 19884 + }, + { + "epoch": 3.0340576171875e-05, + "step": 19884, + "training_step_time": 0.18225908279418945 + }, + { + "epoch": 3.034210205078125e-05, + "model_forward_time": 0.024791240692138672, + "step": 19885 + }, + { + "epoch": 3.034210205078125e-05, + "step": 19885, + "training_step_time": 0.11960983276367188 + }, + { + "epoch": 3.03436279296875e-05, + "model_forward_time": 0.02390146255493164, + "step": 19886 + }, + { + "epoch": 3.03436279296875e-05, + "step": 19886, + "training_step_time": 0.13649940490722656 + }, + { + "epoch": 3.034515380859375e-05, + "model_forward_time": 0.02491307258605957, + "step": 19887 + }, + { + "epoch": 3.034515380859375e-05, + "step": 19887, + "training_step_time": 0.16174101829528809 + }, + { + "epoch": 3.03466796875e-05, + "model_forward_time": 0.024428367614746094, + "step": 19888 + }, + { + "epoch": 3.03466796875e-05, + "step": 19888, + "training_step_time": 0.21346759796142578 + }, + { + "epoch": 3.034820556640625e-05, + "model_forward_time": 0.023756742477416992, + "step": 19889 + }, + { + "epoch": 3.034820556640625e-05, + "step": 19889, + "training_step_time": 0.11511659622192383 + }, + { + "epoch": 3.03497314453125e-05, + "grad_norm": 0.1906876266002655, + "learning_rate": 2.7965927491490705e-05, + "loss": 0.0105, + "step": 19890 + }, + { + "epoch": 3.03497314453125e-05, + "model_forward_time": 0.02425861358642578, + "step": 19890 + }, + { + "epoch": 3.03497314453125e-05, + "step": 19890, + "training_step_time": 0.10442972183227539 + }, + { + "epoch": 3.035125732421875e-05, + "model_forward_time": 0.024774789810180664, + "step": 19891 + }, + { + "epoch": 3.035125732421875e-05, + "step": 19891, + "training_step_time": 0.10831618309020996 + }, + { + "epoch": 3.0352783203125e-05, + "model_forward_time": 0.025073528289794922, + "step": 19892 + }, + { + "epoch": 3.0352783203125e-05, + "step": 19892, + "training_step_time": 0.10739636421203613 + }, + { + "epoch": 3.035430908203125e-05, + "model_forward_time": 0.02703714370727539, + "step": 19893 + }, + { + "epoch": 3.035430908203125e-05, + "step": 19893, + "training_step_time": 0.10930705070495605 + }, + { + "epoch": 3.03558349609375e-05, + "model_forward_time": 0.025168895721435547, + "step": 19894 + }, + { + "epoch": 3.03558349609375e-05, + "step": 19894, + "training_step_time": 0.10419726371765137 + }, + { + "epoch": 3.035736083984375e-05, + "model_forward_time": 0.024979591369628906, + "step": 19895 + }, + { + "epoch": 3.035736083984375e-05, + "step": 19895, + "training_step_time": 0.10649728775024414 + }, + { + "epoch": 3.035888671875e-05, + "model_forward_time": 0.02600574493408203, + "step": 19896 + }, + { + "epoch": 3.035888671875e-05, + "step": 19896, + "training_step_time": 0.10592317581176758 + }, + { + "epoch": 3.036041259765625e-05, + "model_forward_time": 0.025988101959228516, + "step": 19897 + }, + { + "epoch": 3.036041259765625e-05, + "step": 19897, + "training_step_time": 0.11029648780822754 + }, + { + "epoch": 3.03619384765625e-05, + "model_forward_time": 0.026530027389526367, + "step": 19898 + }, + { + "epoch": 3.03619384765625e-05, + "step": 19898, + "training_step_time": 0.10671520233154297 + }, + { + "epoch": 3.036346435546875e-05, + "model_forward_time": 0.025300979614257812, + "step": 19899 + }, + { + "epoch": 3.036346435546875e-05, + "step": 19899, + "training_step_time": 0.14664077758789062 + }, + { + "epoch": 3.0364990234375e-05, + "grad_norm": 0.3329494297504425, + "learning_rate": 2.79164655683813e-05, + "loss": 0.025, + "step": 19900 + }, + { + "epoch": 3.0364990234375e-05, + "model_forward_time": 0.02467799186706543, + "step": 19900 + }, + { + "epoch": 3.0364990234375e-05, + "step": 19900, + "training_step_time": 0.1531839370727539 + }, + { + "epoch": 3.036651611328125e-05, + "model_forward_time": 0.024692058563232422, + "step": 19901 + }, + { + "epoch": 3.036651611328125e-05, + "step": 19901, + "training_step_time": 0.12768340110778809 + }, + { + "epoch": 3.03680419921875e-05, + "model_forward_time": 0.02463817596435547, + "step": 19902 + }, + { + "epoch": 3.03680419921875e-05, + "step": 19902, + "training_step_time": 0.13075494766235352 + }, + { + "epoch": 3.036956787109375e-05, + "model_forward_time": 0.025205612182617188, + "step": 19903 + }, + { + "epoch": 3.036956787109375e-05, + "step": 19903, + "training_step_time": 0.16486191749572754 + }, + { + "epoch": 3.037109375e-05, + "model_forward_time": 0.024573564529418945, + "step": 19904 + }, + { + "epoch": 3.037109375e-05, + "step": 19904, + "training_step_time": 0.1816844940185547 + }, + { + "epoch": 3.037261962890625e-05, + "model_forward_time": 0.024337291717529297, + "step": 19905 + }, + { + "epoch": 3.037261962890625e-05, + "step": 19905, + "training_step_time": 0.18549823760986328 + }, + { + "epoch": 3.03741455078125e-05, + "model_forward_time": 0.023953914642333984, + "step": 19906 + }, + { + "epoch": 3.03741455078125e-05, + "step": 19906, + "training_step_time": 0.11421084403991699 + }, + { + "epoch": 3.037567138671875e-05, + "model_forward_time": 0.024344682693481445, + "step": 19907 + }, + { + "epoch": 3.037567138671875e-05, + "step": 19907, + "training_step_time": 0.10966300964355469 + }, + { + "epoch": 3.0377197265625e-05, + "model_forward_time": 0.024970054626464844, + "step": 19908 + }, + { + "epoch": 3.0377197265625e-05, + "step": 19908, + "training_step_time": 0.1424696445465088 + }, + { + "epoch": 3.037872314453125e-05, + "model_forward_time": 0.02541661262512207, + "step": 19909 + }, + { + "epoch": 3.037872314453125e-05, + "step": 19909, + "training_step_time": 0.14164495468139648 + }, + { + "epoch": 3.03802490234375e-05, + "grad_norm": 0.27687445282936096, + "learning_rate": 2.7867030478848577e-05, + "loss": 0.0087, + "step": 19910 + }, + { + "epoch": 3.03802490234375e-05, + "model_forward_time": 0.02445840835571289, + "step": 19910 + }, + { + "epoch": 3.03802490234375e-05, + "step": 19910, + "training_step_time": 0.11130261421203613 + }, + { + "epoch": 3.038177490234375e-05, + "model_forward_time": 0.024960041046142578, + "step": 19911 + }, + { + "epoch": 3.038177490234375e-05, + "step": 19911, + "training_step_time": 0.10811614990234375 + }, + { + "epoch": 3.038330078125e-05, + "model_forward_time": 0.025246143341064453, + "step": 19912 + }, + { + "epoch": 3.038330078125e-05, + "step": 19912, + "training_step_time": 0.1452932357788086 + }, + { + "epoch": 3.038482666015625e-05, + "model_forward_time": 0.025028705596923828, + "step": 19913 + }, + { + "epoch": 3.038482666015625e-05, + "step": 19913, + "training_step_time": 0.19161510467529297 + }, + { + "epoch": 3.03863525390625e-05, + "model_forward_time": 0.0239865779876709, + "step": 19914 + }, + { + "epoch": 3.03863525390625e-05, + "step": 19914, + "training_step_time": 0.1677379608154297 + }, + { + "epoch": 3.038787841796875e-05, + "model_forward_time": 0.02416229248046875, + "step": 19915 + }, + { + "epoch": 3.038787841796875e-05, + "step": 19915, + "training_step_time": 0.1450815200805664 + }, + { + "epoch": 3.0389404296875e-05, + "model_forward_time": 0.024320125579833984, + "step": 19916 + }, + { + "epoch": 3.0389404296875e-05, + "step": 19916, + "training_step_time": 0.14340710639953613 + }, + { + "epoch": 3.039093017578125e-05, + "model_forward_time": 0.024419784545898438, + "step": 19917 + }, + { + "epoch": 3.039093017578125e-05, + "step": 19917, + "training_step_time": 0.1277015209197998 + }, + { + "epoch": 3.03924560546875e-05, + "model_forward_time": 0.024507761001586914, + "step": 19918 + }, + { + "epoch": 3.03924560546875e-05, + "step": 19918, + "training_step_time": 0.12740445137023926 + }, + { + "epoch": 3.039398193359375e-05, + "model_forward_time": 0.02516627311706543, + "step": 19919 + }, + { + "epoch": 3.039398193359375e-05, + "step": 19919, + "training_step_time": 0.12455558776855469 + }, + { + "epoch": 3.03955078125e-05, + "grad_norm": 0.1977100521326065, + "learning_rate": 2.7817622282960815e-05, + "loss": 0.0089, + "step": 19920 + }, + { + "epoch": 3.03955078125e-05, + "model_forward_time": 0.024837493896484375, + "step": 19920 + }, + { + "epoch": 3.03955078125e-05, + "step": 19920, + "training_step_time": 0.11716985702514648 + }, + { + "epoch": 3.039703369140625e-05, + "model_forward_time": 0.025469541549682617, + "step": 19921 + }, + { + "epoch": 3.039703369140625e-05, + "step": 19921, + "training_step_time": 0.11849093437194824 + }, + { + "epoch": 3.03985595703125e-05, + "model_forward_time": 0.02434563636779785, + "step": 19922 + }, + { + "epoch": 3.03985595703125e-05, + "step": 19922, + "training_step_time": 0.1139073371887207 + }, + { + "epoch": 3.040008544921875e-05, + "model_forward_time": 0.026251792907714844, + "step": 19923 + }, + { + "epoch": 3.040008544921875e-05, + "step": 19923, + "training_step_time": 0.10900497436523438 + }, + { + "epoch": 3.0401611328125e-05, + "model_forward_time": 0.025104045867919922, + "step": 19924 + }, + { + "epoch": 3.0401611328125e-05, + "step": 19924, + "training_step_time": 0.11059975624084473 + }, + { + "epoch": 3.040313720703125e-05, + "model_forward_time": 0.025193452835083008, + "step": 19925 + }, + { + "epoch": 3.040313720703125e-05, + "step": 19925, + "training_step_time": 0.10805130004882812 + }, + { + "epoch": 3.04046630859375e-05, + "model_forward_time": 0.02548384666442871, + "step": 19926 + }, + { + "epoch": 3.04046630859375e-05, + "step": 19926, + "training_step_time": 0.1086888313293457 + }, + { + "epoch": 3.040618896484375e-05, + "model_forward_time": 0.02505779266357422, + "step": 19927 + }, + { + "epoch": 3.040618896484375e-05, + "step": 19927, + "training_step_time": 0.1521008014678955 + }, + { + "epoch": 3.040771484375e-05, + "model_forward_time": 0.025010347366333008, + "step": 19928 + }, + { + "epoch": 3.040771484375e-05, + "step": 19928, + "training_step_time": 0.12192797660827637 + }, + { + "epoch": 3.040924072265625e-05, + "model_forward_time": 0.02416682243347168, + "step": 19929 + }, + { + "epoch": 3.040924072265625e-05, + "step": 19929, + "training_step_time": 0.10888171195983887 + }, + { + "epoch": 3.04107666015625e-05, + "grad_norm": 0.19256380200386047, + "learning_rate": 2.776824104075364e-05, + "loss": 0.0081, + "step": 19930 + }, + { + "epoch": 3.04107666015625e-05, + "model_forward_time": 0.024959087371826172, + "step": 19930 + }, + { + "epoch": 3.04107666015625e-05, + "step": 19930, + "training_step_time": 0.11455535888671875 + }, + { + "epoch": 3.041229248046875e-05, + "model_forward_time": 0.025134801864624023, + "step": 19931 + }, + { + "epoch": 3.041229248046875e-05, + "step": 19931, + "training_step_time": 0.10491061210632324 + }, + { + "epoch": 3.0413818359375e-05, + "model_forward_time": 0.025040864944458008, + "step": 19932 + }, + { + "epoch": 3.0413818359375e-05, + "step": 19932, + "training_step_time": 0.12108254432678223 + }, + { + "epoch": 3.041534423828125e-05, + "model_forward_time": 0.02526068687438965, + "step": 19933 + }, + { + "epoch": 3.041534423828125e-05, + "step": 19933, + "training_step_time": 0.10828113555908203 + }, + { + "epoch": 3.04168701171875e-05, + "model_forward_time": 0.02504873275756836, + "step": 19934 + }, + { + "epoch": 3.04168701171875e-05, + "step": 19934, + "training_step_time": 0.10734868049621582 + }, + { + "epoch": 3.041839599609375e-05, + "model_forward_time": 0.02499246597290039, + "step": 19935 + }, + { + "epoch": 3.041839599609375e-05, + "step": 19935, + "training_step_time": 0.10853242874145508 + }, + { + "epoch": 3.0419921875e-05, + "model_forward_time": 0.025500774383544922, + "step": 19936 + }, + { + "epoch": 3.0419921875e-05, + "step": 19936, + "training_step_time": 0.10510516166687012 + }, + { + "epoch": 3.042144775390625e-05, + "model_forward_time": 0.024642229080200195, + "step": 19937 + }, + { + "epoch": 3.042144775390625e-05, + "step": 19937, + "training_step_time": 0.10414648056030273 + }, + { + "epoch": 3.04229736328125e-05, + "model_forward_time": 0.025037288665771484, + "step": 19938 + }, + { + "epoch": 3.04229736328125e-05, + "step": 19938, + "training_step_time": 0.1120145320892334 + }, + { + "epoch": 3.042449951171875e-05, + "model_forward_time": 0.0251767635345459, + "step": 19939 + }, + { + "epoch": 3.042449951171875e-05, + "step": 19939, + "training_step_time": 0.10376644134521484 + }, + { + "epoch": 3.0426025390625e-05, + "grad_norm": 0.24935036897659302, + "learning_rate": 2.7718886812229907e-05, + "loss": 0.0078, + "step": 19940 + }, + { + "epoch": 3.0426025390625e-05, + "model_forward_time": 0.02485370635986328, + "step": 19940 + }, + { + "epoch": 3.0426025390625e-05, + "step": 19940, + "training_step_time": 0.10911083221435547 + }, + { + "epoch": 3.042755126953125e-05, + "model_forward_time": 0.02517533302307129, + "step": 19941 + }, + { + "epoch": 3.042755126953125e-05, + "step": 19941, + "training_step_time": 0.11028742790222168 + }, + { + "epoch": 3.04290771484375e-05, + "model_forward_time": 0.02545928955078125, + "step": 19942 + }, + { + "epoch": 3.04290771484375e-05, + "step": 19942, + "training_step_time": 0.10753965377807617 + }, + { + "epoch": 3.043060302734375e-05, + "model_forward_time": 0.02494192123413086, + "step": 19943 + }, + { + "epoch": 3.043060302734375e-05, + "step": 19943, + "training_step_time": 0.18908476829528809 + }, + { + "epoch": 3.043212890625e-05, + "model_forward_time": 0.024820327758789062, + "step": 19944 + }, + { + "epoch": 3.043212890625e-05, + "step": 19944, + "training_step_time": 0.16127562522888184 + }, + { + "epoch": 3.043365478515625e-05, + "model_forward_time": 0.024576187133789062, + "step": 19945 + }, + { + "epoch": 3.043365478515625e-05, + "step": 19945, + "training_step_time": 0.11820197105407715 + }, + { + "epoch": 3.04351806640625e-05, + "model_forward_time": 0.025038480758666992, + "step": 19946 + }, + { + "epoch": 3.04351806640625e-05, + "step": 19946, + "training_step_time": 0.18862605094909668 + }, + { + "epoch": 3.043670654296875e-05, + "model_forward_time": 0.02382349967956543, + "step": 19947 + }, + { + "epoch": 3.043670654296875e-05, + "step": 19947, + "training_step_time": 0.14104557037353516 + }, + { + "epoch": 3.0438232421875e-05, + "model_forward_time": 0.024343252182006836, + "step": 19948 + }, + { + "epoch": 3.0438232421875e-05, + "step": 19948, + "training_step_time": 0.21198773384094238 + }, + { + "epoch": 3.043975830078125e-05, + "model_forward_time": 0.024395465850830078, + "step": 19949 + }, + { + "epoch": 3.043975830078125e-05, + "step": 19949, + "training_step_time": 0.1260221004486084 + }, + { + "epoch": 3.04412841796875e-05, + "grad_norm": 0.24325866997241974, + "learning_rate": 2.766955965735968e-05, + "loss": 0.0075, + "step": 19950 + }, + { + "epoch": 3.04412841796875e-05, + "model_forward_time": 0.024066686630249023, + "step": 19950 + }, + { + "epoch": 3.04412841796875e-05, + "step": 19950, + "training_step_time": 0.11644554138183594 + }, + { + "epoch": 3.044281005859375e-05, + "model_forward_time": 0.026113033294677734, + "step": 19951 + }, + { + "epoch": 3.044281005859375e-05, + "step": 19951, + "training_step_time": 0.10965204238891602 + }, + { + "epoch": 3.04443359375e-05, + "model_forward_time": 0.025269508361816406, + "step": 19952 + }, + { + "epoch": 3.04443359375e-05, + "step": 19952, + "training_step_time": 0.16093039512634277 + }, + { + "epoch": 3.044586181640625e-05, + "model_forward_time": 0.0242764949798584, + "step": 19953 + }, + { + "epoch": 3.044586181640625e-05, + "step": 19953, + "training_step_time": 0.13271808624267578 + }, + { + "epoch": 3.04473876953125e-05, + "model_forward_time": 0.02443671226501465, + "step": 19954 + }, + { + "epoch": 3.04473876953125e-05, + "step": 19954, + "training_step_time": 0.18363428115844727 + }, + { + "epoch": 3.044891357421875e-05, + "model_forward_time": 0.024775028228759766, + "step": 19955 + }, + { + "epoch": 3.044891357421875e-05, + "step": 19955, + "training_step_time": 0.17467808723449707 + }, + { + "epoch": 3.0450439453125e-05, + "model_forward_time": 0.024431705474853516, + "step": 19956 + }, + { + "epoch": 3.0450439453125e-05, + "step": 19956, + "training_step_time": 0.10280227661132812 + }, + { + "epoch": 3.045196533203125e-05, + "model_forward_time": 0.024753093719482422, + "step": 19957 + }, + { + "epoch": 3.045196533203125e-05, + "step": 19957, + "training_step_time": 0.1028439998626709 + }, + { + "epoch": 3.04534912109375e-05, + "model_forward_time": 0.025008201599121094, + "step": 19958 + }, + { + "epoch": 3.04534912109375e-05, + "step": 19958, + "training_step_time": 0.10391712188720703 + }, + { + "epoch": 3.045501708984375e-05, + "model_forward_time": 0.025335311889648438, + "step": 19959 + }, + { + "epoch": 3.045501708984375e-05, + "step": 19959, + "training_step_time": 0.10502505302429199 + }, + { + "epoch": 3.045654296875e-05, + "grad_norm": 0.3688250184059143, + "learning_rate": 2.762025963608009e-05, + "loss": 0.0096, + "step": 19960 + }, + { + "epoch": 3.045654296875e-05, + "model_forward_time": 0.02461075782775879, + "step": 19960 + }, + { + "epoch": 3.045654296875e-05, + "step": 19960, + "training_step_time": 0.10373973846435547 + }, + { + "epoch": 3.045806884765625e-05, + "model_forward_time": 0.02527332305908203, + "step": 19961 + }, + { + "epoch": 3.045806884765625e-05, + "step": 19961, + "training_step_time": 0.10569429397583008 + }, + { + "epoch": 3.04595947265625e-05, + "model_forward_time": 0.02544546127319336, + "step": 19962 + }, + { + "epoch": 3.04595947265625e-05, + "step": 19962, + "training_step_time": 0.10693192481994629 + }, + { + "epoch": 3.046112060546875e-05, + "model_forward_time": 0.0256040096282959, + "step": 19963 + }, + { + "epoch": 3.046112060546875e-05, + "step": 19963, + "training_step_time": 0.1060795783996582 + }, + { + "epoch": 3.0462646484375e-05, + "model_forward_time": 0.025006532669067383, + "step": 19964 + }, + { + "epoch": 3.0462646484375e-05, + "step": 19964, + "training_step_time": 0.10557985305786133 + }, + { + "epoch": 3.046417236328125e-05, + "model_forward_time": 0.025159120559692383, + "step": 19965 + }, + { + "epoch": 3.046417236328125e-05, + "step": 19965, + "training_step_time": 0.10832500457763672 + }, + { + "epoch": 3.04656982421875e-05, + "model_forward_time": 0.025431394577026367, + "step": 19966 + }, + { + "epoch": 3.04656982421875e-05, + "step": 19966, + "training_step_time": 0.10777664184570312 + }, + { + "epoch": 3.046722412109375e-05, + "model_forward_time": 0.02511429786682129, + "step": 19967 + }, + { + "epoch": 3.046722412109375e-05, + "step": 19967, + "training_step_time": 0.10891389846801758 + }, + { + "epoch": 3.046875e-05, + "model_forward_time": 0.02514362335205078, + "step": 19968 + }, + { + "epoch": 3.046875e-05, + "step": 19968, + "training_step_time": 0.10719871520996094 + }, + { + "epoch": 3.047027587890625e-05, + "model_forward_time": 0.02505946159362793, + "step": 19969 + }, + { + "epoch": 3.047027587890625e-05, + "step": 19969, + "training_step_time": 0.10890316963195801 + }, + { + "epoch": 3.04718017578125e-05, + "grad_norm": 0.3474755585193634, + "learning_rate": 2.7570986808295324e-05, + "loss": 0.0085, + "step": 19970 + }, + { + "epoch": 3.04718017578125e-05, + "model_forward_time": 0.02526688575744629, + "step": 19970 + }, + { + "epoch": 3.04718017578125e-05, + "step": 19970, + "training_step_time": 0.10830068588256836 + }, + { + "epoch": 3.047332763671875e-05, + "model_forward_time": 0.025133371353149414, + "step": 19971 + }, + { + "epoch": 3.047332763671875e-05, + "step": 19971, + "training_step_time": 0.13037681579589844 + }, + { + "epoch": 3.0474853515625e-05, + "model_forward_time": 0.02466583251953125, + "step": 19972 + }, + { + "epoch": 3.0474853515625e-05, + "step": 19972, + "training_step_time": 0.148085355758667 + }, + { + "epoch": 3.047637939453125e-05, + "model_forward_time": 0.02457594871520996, + "step": 19973 + }, + { + "epoch": 3.047637939453125e-05, + "step": 19973, + "training_step_time": 0.1656947135925293 + }, + { + "epoch": 3.04779052734375e-05, + "model_forward_time": 0.02419137954711914, + "step": 19974 + }, + { + "epoch": 3.04779052734375e-05, + "step": 19974, + "training_step_time": 0.14847254753112793 + }, + { + "epoch": 3.047943115234375e-05, + "model_forward_time": 0.024055957794189453, + "step": 19975 + }, + { + "epoch": 3.047943115234375e-05, + "step": 19975, + "training_step_time": 0.16578888893127441 + }, + { + "epoch": 3.048095703125e-05, + "model_forward_time": 0.024364233016967773, + "step": 19976 + }, + { + "epoch": 3.048095703125e-05, + "step": 19976, + "training_step_time": 0.1230156421661377 + }, + { + "epoch": 3.048248291015625e-05, + "model_forward_time": 0.024200439453125, + "step": 19977 + }, + { + "epoch": 3.048248291015625e-05, + "step": 19977, + "training_step_time": 0.1225137710571289 + }, + { + "epoch": 3.04840087890625e-05, + "model_forward_time": 0.025120258331298828, + "step": 19978 + }, + { + "epoch": 3.04840087890625e-05, + "step": 19978, + "training_step_time": 0.11369657516479492 + }, + { + "epoch": 3.048553466796875e-05, + "model_forward_time": 0.024889707565307617, + "step": 19979 + }, + { + "epoch": 3.048553466796875e-05, + "step": 19979, + "training_step_time": 0.11343550682067871 + }, + { + "epoch": 3.0487060546875e-05, + "grad_norm": 0.4701537489891052, + "learning_rate": 2.7521741233876496e-05, + "loss": 0.0104, + "step": 19980 + }, + { + "epoch": 3.0487060546875e-05, + "model_forward_time": 0.025390148162841797, + "step": 19980 + }, + { + "epoch": 3.0487060546875e-05, + "step": 19980, + "training_step_time": 0.11195564270019531 + }, + { + "epoch": 3.048858642578125e-05, + "model_forward_time": 0.024222850799560547, + "step": 19981 + }, + { + "epoch": 3.048858642578125e-05, + "step": 19981, + "training_step_time": 0.10916447639465332 + }, + { + "epoch": 3.04901123046875e-05, + "model_forward_time": 0.02498769760131836, + "step": 19982 + }, + { + "epoch": 3.04901123046875e-05, + "step": 19982, + "training_step_time": 0.10727262496948242 + }, + { + "epoch": 3.049163818359375e-05, + "model_forward_time": 0.024943113327026367, + "step": 19983 + }, + { + "epoch": 3.049163818359375e-05, + "step": 19983, + "training_step_time": 0.10680890083312988 + }, + { + "epoch": 3.04931640625e-05, + "model_forward_time": 0.025546550750732422, + "step": 19984 + }, + { + "epoch": 3.04931640625e-05, + "step": 19984, + "training_step_time": 0.10891270637512207 + }, + { + "epoch": 3.049468994140625e-05, + "model_forward_time": 0.025310993194580078, + "step": 19985 + }, + { + "epoch": 3.049468994140625e-05, + "step": 19985, + "training_step_time": 0.10904097557067871 + }, + { + "epoch": 3.04962158203125e-05, + "model_forward_time": 0.025338172912597656, + "step": 19986 + }, + { + "epoch": 3.04962158203125e-05, + "step": 19986, + "training_step_time": 0.10748505592346191 + }, + { + "epoch": 3.049774169921875e-05, + "model_forward_time": 0.024489402770996094, + "step": 19987 + }, + { + "epoch": 3.049774169921875e-05, + "step": 19987, + "training_step_time": 0.16831278800964355 + }, + { + "epoch": 3.0499267578125e-05, + "model_forward_time": 0.024480342864990234, + "step": 19988 + }, + { + "epoch": 3.0499267578125e-05, + "step": 19988, + "training_step_time": 0.1105961799621582 + }, + { + "epoch": 3.050079345703125e-05, + "model_forward_time": 0.024851560592651367, + "step": 19989 + }, + { + "epoch": 3.050079345703125e-05, + "step": 19989, + "training_step_time": 0.19509172439575195 + }, + { + "epoch": 3.05023193359375e-05, + "grad_norm": 0.23995867371559143, + "learning_rate": 2.747252297266162e-05, + "loss": 0.01, + "step": 19990 + }, + { + "epoch": 3.05023193359375e-05, + "model_forward_time": 0.024470090866088867, + "step": 19990 + }, + { + "epoch": 3.05023193359375e-05, + "step": 19990, + "training_step_time": 0.19739103317260742 + }, + { + "epoch": 3.050384521484375e-05, + "model_forward_time": 0.024302959442138672, + "step": 19991 + }, + { + "epoch": 3.050384521484375e-05, + "step": 19991, + "training_step_time": 0.1313340663909912 + }, + { + "epoch": 3.050537109375e-05, + "model_forward_time": 0.02444601058959961, + "step": 19992 + }, + { + "epoch": 3.050537109375e-05, + "step": 19992, + "training_step_time": 0.221099853515625 + }, + { + "epoch": 3.050689697265625e-05, + "model_forward_time": 0.024151325225830078, + "step": 19993 + }, + { + "epoch": 3.050689697265625e-05, + "step": 19993, + "training_step_time": 0.1308457851409912 + }, + { + "epoch": 3.05084228515625e-05, + "model_forward_time": 0.024014711380004883, + "step": 19994 + }, + { + "epoch": 3.05084228515625e-05, + "step": 19994, + "training_step_time": 0.14746379852294922 + }, + { + "epoch": 3.050994873046875e-05, + "model_forward_time": 0.024657726287841797, + "step": 19995 + }, + { + "epoch": 3.050994873046875e-05, + "step": 19995, + "training_step_time": 0.17785954475402832 + }, + { + "epoch": 3.0511474609375e-05, + "model_forward_time": 0.024407148361206055, + "step": 19996 + }, + { + "epoch": 3.0511474609375e-05, + "step": 19996, + "training_step_time": 0.1804971694946289 + }, + { + "epoch": 3.051300048828125e-05, + "model_forward_time": 0.02358102798461914, + "step": 19997 + }, + { + "epoch": 3.051300048828125e-05, + "step": 19997, + "training_step_time": 0.12000608444213867 + }, + { + "epoch": 3.05145263671875e-05, + "model_forward_time": 0.02477407455444336, + "step": 19998 + }, + { + "epoch": 3.05145263671875e-05, + "step": 19998, + "training_step_time": 0.10666346549987793 + }, + { + "epoch": 3.051605224609375e-05, + "model_forward_time": 0.025479555130004883, + "step": 19999 + }, + { + "epoch": 3.051605224609375e-05, + "step": 19999, + "training_step_time": 0.19030046463012695 + }, + { + "epoch": 3.0517578125e-05, + "grad_norm": 0.28397536277770996, + "learning_rate": 2.7423332084455544e-05, + "loss": 0.0096, + "step": 20000 + }, + { + "epoch": 3.0517578125e-05, + "model_forward_time": 0.02337193489074707, + "step": 20000 + }, + { + "epoch": 3.0517578125e-05, + "step": 20000, + "training_step_time": 0.09570574760437012 + }, + { + "epoch": 3.051910400390625e-05, + "model_forward_time": 0.0223236083984375, + "step": 20001 + }, + { + "epoch": 3.051910400390625e-05, + "step": 20001, + "training_step_time": 0.14720726013183594 + }, + { + "epoch": 3.05206298828125e-05, + "model_forward_time": 0.024743318557739258, + "step": 20002 + }, + { + "epoch": 3.05206298828125e-05, + "step": 20002, + "training_step_time": 0.11738300323486328 + }, + { + "epoch": 3.052215576171875e-05, + "model_forward_time": 0.024571895599365234, + "step": 20003 + }, + { + "epoch": 3.052215576171875e-05, + "step": 20003, + "training_step_time": 0.1035306453704834 + }, + { + "epoch": 3.0523681640625e-05, + "model_forward_time": 0.024451017379760742, + "step": 20004 + }, + { + "epoch": 3.0523681640625e-05, + "step": 20004, + "training_step_time": 0.10531020164489746 + }, + { + "epoch": 3.052520751953125e-05, + "model_forward_time": 0.02780914306640625, + "step": 20005 + }, + { + "epoch": 3.052520751953125e-05, + "step": 20005, + "training_step_time": 0.11121726036071777 + }, + { + "epoch": 3.05267333984375e-05, + "model_forward_time": 0.024915695190429688, + "step": 20006 + }, + { + "epoch": 3.05267333984375e-05, + "step": 20006, + "training_step_time": 0.10805940628051758 + }, + { + "epoch": 3.052825927734375e-05, + "model_forward_time": 0.024686336517333984, + "step": 20007 + }, + { + "epoch": 3.052825927734375e-05, + "step": 20007, + "training_step_time": 0.1971290111541748 + }, + { + "epoch": 3.052978515625e-05, + "model_forward_time": 0.023880958557128906, + "step": 20008 + }, + { + "epoch": 3.052978515625e-05, + "step": 20008, + "training_step_time": 0.1034994125366211 + }, + { + "epoch": 3.053131103515625e-05, + "model_forward_time": 0.024181842803955078, + "step": 20009 + }, + { + "epoch": 3.053131103515625e-05, + "step": 20009, + "training_step_time": 0.10066819190979004 + }, + { + "epoch": 3.05328369140625e-05, + "grad_norm": 0.14979436993598938, + "learning_rate": 2.7374168629029813e-05, + "loss": 0.007, + "step": 20010 + }, + { + "epoch": 3.05328369140625e-05, + "model_forward_time": 0.024131298065185547, + "step": 20010 + }, + { + "epoch": 3.05328369140625e-05, + "step": 20010, + "training_step_time": 0.10579872131347656 + }, + { + "epoch": 3.053436279296875e-05, + "model_forward_time": 0.024957895278930664, + "step": 20011 + }, + { + "epoch": 3.053436279296875e-05, + "step": 20011, + "training_step_time": 0.10393357276916504 + }, + { + "epoch": 3.0535888671875e-05, + "model_forward_time": 0.024820327758789062, + "step": 20012 + }, + { + "epoch": 3.0535888671875e-05, + "step": 20012, + "training_step_time": 0.10638189315795898 + }, + { + "epoch": 3.053741455078125e-05, + "model_forward_time": 0.025218725204467773, + "step": 20013 + }, + { + "epoch": 3.053741455078125e-05, + "step": 20013, + "training_step_time": 0.10672807693481445 + }, + { + "epoch": 3.05389404296875e-05, + "model_forward_time": 0.02453017234802246, + "step": 20014 + }, + { + "epoch": 3.05389404296875e-05, + "step": 20014, + "training_step_time": 0.17987537384033203 + }, + { + "epoch": 3.054046630859375e-05, + "model_forward_time": 0.024103403091430664, + "step": 20015 + }, + { + "epoch": 3.054046630859375e-05, + "step": 20015, + "training_step_time": 0.18902587890625 + }, + { + "epoch": 3.05419921875e-05, + "model_forward_time": 0.023686885833740234, + "step": 20016 + }, + { + "epoch": 3.05419921875e-05, + "step": 20016, + "training_step_time": 0.17943453788757324 + }, + { + "epoch": 3.054351806640625e-05, + "model_forward_time": 0.023665189743041992, + "step": 20017 + }, + { + "epoch": 3.054351806640625e-05, + "step": 20017, + "training_step_time": 0.16382527351379395 + }, + { + "epoch": 3.05450439453125e-05, + "model_forward_time": 0.023807048797607422, + "step": 20018 + }, + { + "epoch": 3.05450439453125e-05, + "step": 20018, + "training_step_time": 0.11962771415710449 + }, + { + "epoch": 3.054656982421875e-05, + "model_forward_time": 0.027333974838256836, + "step": 20019 + }, + { + "epoch": 3.054656982421875e-05, + "step": 20019, + "training_step_time": 0.10613679885864258 + }, + { + "epoch": 3.0548095703125e-05, + "grad_norm": 0.14195986092090607, + "learning_rate": 2.7325032666122686e-05, + "loss": 0.0072, + "step": 20020 + }, + { + "epoch": 3.0548095703125e-05, + "model_forward_time": 0.024239778518676758, + "step": 20020 + }, + { + "epoch": 3.0548095703125e-05, + "step": 20020, + "training_step_time": 0.10755038261413574 + }, + { + "epoch": 3.054962158203125e-05, + "model_forward_time": 0.024358034133911133, + "step": 20021 + }, + { + "epoch": 3.054962158203125e-05, + "step": 20021, + "training_step_time": 0.13631629943847656 + }, + { + "epoch": 3.05511474609375e-05, + "model_forward_time": 0.024330854415893555, + "step": 20022 + }, + { + "epoch": 3.05511474609375e-05, + "step": 20022, + "training_step_time": 0.11111330986022949 + }, + { + "epoch": 3.055267333984375e-05, + "model_forward_time": 0.0246884822845459, + "step": 20023 + }, + { + "epoch": 3.055267333984375e-05, + "step": 20023, + "training_step_time": 0.133697509765625 + }, + { + "epoch": 3.055419921875e-05, + "model_forward_time": 0.024876117706298828, + "step": 20024 + }, + { + "epoch": 3.055419921875e-05, + "step": 20024, + "training_step_time": 0.14897513389587402 + }, + { + "epoch": 3.055572509765625e-05, + "model_forward_time": 0.023957490921020508, + "step": 20025 + }, + { + "epoch": 3.055572509765625e-05, + "step": 20025, + "training_step_time": 0.11231374740600586 + }, + { + "epoch": 3.05572509765625e-05, + "model_forward_time": 0.02454519271850586, + "step": 20026 + }, + { + "epoch": 3.05572509765625e-05, + "step": 20026, + "training_step_time": 0.12746739387512207 + }, + { + "epoch": 3.055877685546875e-05, + "model_forward_time": 0.026970386505126953, + "step": 20027 + }, + { + "epoch": 3.055877685546875e-05, + "step": 20027, + "training_step_time": 0.12401461601257324 + }, + { + "epoch": 3.0560302734375e-05, + "model_forward_time": 0.024517536163330078, + "step": 20028 + }, + { + "epoch": 3.0560302734375e-05, + "step": 20028, + "training_step_time": 0.10865044593811035 + }, + { + "epoch": 3.056182861328125e-05, + "model_forward_time": 0.024792909622192383, + "step": 20029 + }, + { + "epoch": 3.056182861328125e-05, + "step": 20029, + "training_step_time": 0.11149740219116211 + }, + { + "epoch": 3.05633544921875e-05, + "grad_norm": 0.20012709498405457, + "learning_rate": 2.727592425543899e-05, + "loss": 0.0066, + "step": 20030 + }, + { + "epoch": 3.05633544921875e-05, + "model_forward_time": 0.024751663208007812, + "step": 20030 + }, + { + "epoch": 3.05633544921875e-05, + "step": 20030, + "training_step_time": 0.10913801193237305 + }, + { + "epoch": 3.056488037109375e-05, + "model_forward_time": 0.024646997451782227, + "step": 20031 + }, + { + "epoch": 3.056488037109375e-05, + "step": 20031, + "training_step_time": 0.10865902900695801 + }, + { + "epoch": 3.056640625e-05, + "model_forward_time": 0.02458953857421875, + "step": 20032 + }, + { + "epoch": 3.056640625e-05, + "step": 20032, + "training_step_time": 0.10888004302978516 + }, + { + "epoch": 3.056793212890625e-05, + "model_forward_time": 0.02497553825378418, + "step": 20033 + }, + { + "epoch": 3.056793212890625e-05, + "step": 20033, + "training_step_time": 0.11115121841430664 + }, + { + "epoch": 3.05694580078125e-05, + "model_forward_time": 0.024533748626708984, + "step": 20034 + }, + { + "epoch": 3.05694580078125e-05, + "step": 20034, + "training_step_time": 0.11658573150634766 + }, + { + "epoch": 3.057098388671875e-05, + "model_forward_time": 0.025025606155395508, + "step": 20035 + }, + { + "epoch": 3.057098388671875e-05, + "step": 20035, + "training_step_time": 0.10935258865356445 + }, + { + "epoch": 3.0572509765625e-05, + "model_forward_time": 0.025971174240112305, + "step": 20036 + }, + { + "epoch": 3.0572509765625e-05, + "step": 20036, + "training_step_time": 0.10526800155639648 + }, + { + "epoch": 3.057403564453125e-05, + "model_forward_time": 0.024916410446166992, + "step": 20037 + }, + { + "epoch": 3.057403564453125e-05, + "step": 20037, + "training_step_time": 0.12465143203735352 + }, + { + "epoch": 3.05755615234375e-05, + "model_forward_time": 0.025053024291992188, + "step": 20038 + }, + { + "epoch": 3.05755615234375e-05, + "step": 20038, + "training_step_time": 0.1185305118560791 + }, + { + "epoch": 3.057708740234375e-05, + "model_forward_time": 0.025161266326904297, + "step": 20039 + }, + { + "epoch": 3.057708740234375e-05, + "step": 20039, + "training_step_time": 0.11285591125488281 + }, + { + "epoch": 3.057861328125e-05, + "grad_norm": 0.39375030994415283, + "learning_rate": 2.7226843456650037e-05, + "loss": 0.0204, + "step": 20040 + }, + { + "epoch": 3.057861328125e-05, + "model_forward_time": 0.025297880172729492, + "step": 20040 + }, + { + "epoch": 3.057861328125e-05, + "step": 20040, + "training_step_time": 0.1164710521697998 + }, + { + "epoch": 3.058013916015625e-05, + "model_forward_time": 0.02543926239013672, + "step": 20041 + }, + { + "epoch": 3.058013916015625e-05, + "step": 20041, + "training_step_time": 0.1324000358581543 + }, + { + "epoch": 3.05816650390625e-05, + "model_forward_time": 0.027437210083007812, + "step": 20042 + }, + { + "epoch": 3.05816650390625e-05, + "step": 20042, + "training_step_time": 0.20728349685668945 + }, + { + "epoch": 3.058319091796875e-05, + "model_forward_time": 0.023511409759521484, + "step": 20043 + }, + { + "epoch": 3.058319091796875e-05, + "step": 20043, + "training_step_time": 0.18297863006591797 + }, + { + "epoch": 3.0584716796875e-05, + "model_forward_time": 0.023816347122192383, + "step": 20044 + }, + { + "epoch": 3.0584716796875e-05, + "step": 20044, + "training_step_time": 0.12964749336242676 + }, + { + "epoch": 3.058624267578125e-05, + "model_forward_time": 0.023683786392211914, + "step": 20045 + }, + { + "epoch": 3.058624267578125e-05, + "step": 20045, + "training_step_time": 0.13047409057617188 + }, + { + "epoch": 3.05877685546875e-05, + "model_forward_time": 0.024052858352661133, + "step": 20046 + }, + { + "epoch": 3.05877685546875e-05, + "step": 20046, + "training_step_time": 0.2108476161956787 + }, + { + "epoch": 3.058929443359375e-05, + "model_forward_time": 0.02394723892211914, + "step": 20047 + }, + { + "epoch": 3.058929443359375e-05, + "step": 20047, + "training_step_time": 0.13980579376220703 + }, + { + "epoch": 3.05908203125e-05, + "model_forward_time": 0.02375960350036621, + "step": 20048 + }, + { + "epoch": 3.05908203125e-05, + "step": 20048, + "training_step_time": 0.19109320640563965 + }, + { + "epoch": 3.059234619140625e-05, + "model_forward_time": 0.0247194766998291, + "step": 20049 + }, + { + "epoch": 3.059234619140625e-05, + "step": 20049, + "training_step_time": 0.10919475555419922 + }, + { + "epoch": 3.05938720703125e-05, + "grad_norm": 0.2476683109998703, + "learning_rate": 2.717779032939367e-05, + "loss": 0.0105, + "step": 20050 + }, + { + "epoch": 3.05938720703125e-05, + "model_forward_time": 0.023656129837036133, + "step": 20050 + }, + { + "epoch": 3.05938720703125e-05, + "step": 20050, + "training_step_time": 0.18899941444396973 + }, + { + "epoch": 3.059539794921875e-05, + "model_forward_time": 0.02353692054748535, + "step": 20051 + }, + { + "epoch": 3.059539794921875e-05, + "step": 20051, + "training_step_time": 0.10837650299072266 + }, + { + "epoch": 3.0596923828125e-05, + "model_forward_time": 0.023532390594482422, + "step": 20052 + }, + { + "epoch": 3.0596923828125e-05, + "step": 20052, + "training_step_time": 0.10756468772888184 + }, + { + "epoch": 3.059844970703125e-05, + "model_forward_time": 0.0244600772857666, + "step": 20053 + }, + { + "epoch": 3.059844970703125e-05, + "step": 20053, + "training_step_time": 0.10891985893249512 + }, + { + "epoch": 3.05999755859375e-05, + "model_forward_time": 0.024141550064086914, + "step": 20054 + }, + { + "epoch": 3.05999755859375e-05, + "step": 20054, + "training_step_time": 0.11549162864685059 + }, + { + "epoch": 3.060150146484375e-05, + "model_forward_time": 0.024569034576416016, + "step": 20055 + }, + { + "epoch": 3.060150146484375e-05, + "step": 20055, + "training_step_time": 0.1092219352722168 + }, + { + "epoch": 3.060302734375e-05, + "model_forward_time": 0.024714946746826172, + "step": 20056 + }, + { + "epoch": 3.060302734375e-05, + "step": 20056, + "training_step_time": 0.10893487930297852 + }, + { + "epoch": 3.060455322265625e-05, + "model_forward_time": 0.02446269989013672, + "step": 20057 + }, + { + "epoch": 3.060455322265625e-05, + "step": 20057, + "training_step_time": 0.10633087158203125 + }, + { + "epoch": 3.06060791015625e-05, + "model_forward_time": 0.0245668888092041, + "step": 20058 + }, + { + "epoch": 3.06060791015625e-05, + "step": 20058, + "training_step_time": 0.11205506324768066 + }, + { + "epoch": 3.060760498046875e-05, + "model_forward_time": 0.024166345596313477, + "step": 20059 + }, + { + "epoch": 3.060760498046875e-05, + "step": 20059, + "training_step_time": 0.1069033145904541 + }, + { + "epoch": 3.0609130859375e-05, + "grad_norm": 0.11443420499563217, + "learning_rate": 2.7128764933274052e-05, + "loss": 0.0047, + "step": 20060 + }, + { + "epoch": 3.0609130859375e-05, + "model_forward_time": 0.024232149124145508, + "step": 20060 + }, + { + "epoch": 3.0609130859375e-05, + "step": 20060, + "training_step_time": 0.1068110466003418 + }, + { + "epoch": 3.061065673828125e-05, + "model_forward_time": 0.025106191635131836, + "step": 20061 + }, + { + "epoch": 3.061065673828125e-05, + "step": 20061, + "training_step_time": 0.17304205894470215 + }, + { + "epoch": 3.06121826171875e-05, + "model_forward_time": 0.024400711059570312, + "step": 20062 + }, + { + "epoch": 3.06121826171875e-05, + "step": 20062, + "training_step_time": 0.19573450088500977 + }, + { + "epoch": 3.061370849609375e-05, + "model_forward_time": 0.023867130279541016, + "step": 20063 + }, + { + "epoch": 3.061370849609375e-05, + "step": 20063, + "training_step_time": 0.18181300163269043 + }, + { + "epoch": 3.0615234375e-05, + "model_forward_time": 0.02433180809020996, + "step": 20064 + }, + { + "epoch": 3.0615234375e-05, + "step": 20064, + "training_step_time": 0.19516205787658691 + }, + { + "epoch": 3.061676025390625e-05, + "model_forward_time": 0.023671865463256836, + "step": 20065 + }, + { + "epoch": 3.061676025390625e-05, + "step": 20065, + "training_step_time": 0.18101906776428223 + }, + { + "epoch": 3.06182861328125e-05, + "model_forward_time": 0.024179458618164062, + "step": 20066 + }, + { + "epoch": 3.06182861328125e-05, + "step": 20066, + "training_step_time": 0.14071011543273926 + }, + { + "epoch": 3.061981201171875e-05, + "model_forward_time": 0.024497270584106445, + "step": 20067 + }, + { + "epoch": 3.061981201171875e-05, + "step": 20067, + "training_step_time": 0.15570378303527832 + }, + { + "epoch": 3.0621337890625e-05, + "model_forward_time": 0.02552008628845215, + "step": 20068 + }, + { + "epoch": 3.0621337890625e-05, + "step": 20068, + "training_step_time": 0.1303102970123291 + }, + { + "epoch": 3.062286376953125e-05, + "model_forward_time": 0.024104595184326172, + "step": 20069 + }, + { + "epoch": 3.062286376953125e-05, + "step": 20069, + "training_step_time": 0.11702728271484375 + }, + { + "epoch": 3.06243896484375e-05, + "grad_norm": 0.36181890964508057, + "learning_rate": 2.707976732786166e-05, + "loss": 0.0146, + "step": 20070 + }, + { + "epoch": 3.06243896484375e-05, + "model_forward_time": 0.02505803108215332, + "step": 20070 + }, + { + "epoch": 3.06243896484375e-05, + "step": 20070, + "training_step_time": 0.10621809959411621 + }, + { + "epoch": 3.062591552734375e-05, + "model_forward_time": 0.025150775909423828, + "step": 20071 + }, + { + "epoch": 3.062591552734375e-05, + "step": 20071, + "training_step_time": 0.10820198059082031 + }, + { + "epoch": 3.062744140625e-05, + "model_forward_time": 0.025429248809814453, + "step": 20072 + }, + { + "epoch": 3.062744140625e-05, + "step": 20072, + "training_step_time": 0.11041092872619629 + }, + { + "epoch": 3.062896728515625e-05, + "model_forward_time": 0.025026798248291016, + "step": 20073 + }, + { + "epoch": 3.062896728515625e-05, + "step": 20073, + "training_step_time": 0.10819387435913086 + }, + { + "epoch": 3.06304931640625e-05, + "model_forward_time": 0.025316238403320312, + "step": 20074 + }, + { + "epoch": 3.06304931640625e-05, + "step": 20074, + "training_step_time": 0.10929751396179199 + }, + { + "epoch": 3.063201904296875e-05, + "model_forward_time": 0.025164365768432617, + "step": 20075 + }, + { + "epoch": 3.063201904296875e-05, + "step": 20075, + "training_step_time": 0.1129920482635498 + }, + { + "epoch": 3.0633544921875e-05, + "model_forward_time": 0.02754378318786621, + "step": 20076 + }, + { + "epoch": 3.0633544921875e-05, + "step": 20076, + "training_step_time": 0.11178350448608398 + }, + { + "epoch": 3.063507080078125e-05, + "model_forward_time": 0.024785757064819336, + "step": 20077 + }, + { + "epoch": 3.063507080078125e-05, + "step": 20077, + "training_step_time": 0.11131596565246582 + }, + { + "epoch": 3.06365966796875e-05, + "model_forward_time": 0.025014638900756836, + "step": 20078 + }, + { + "epoch": 3.06365966796875e-05, + "step": 20078, + "training_step_time": 0.11169075965881348 + }, + { + "epoch": 3.063812255859375e-05, + "model_forward_time": 0.025621652603149414, + "step": 20079 + }, + { + "epoch": 3.063812255859375e-05, + "step": 20079, + "training_step_time": 0.11114239692687988 + }, + { + "epoch": 3.06396484375e-05, + "grad_norm": 0.23736076056957245, + "learning_rate": 2.703079757269319e-05, + "loss": 0.0064, + "step": 20080 + }, + { + "epoch": 3.06396484375e-05, + "model_forward_time": 0.02550959587097168, + "step": 20080 + }, + { + "epoch": 3.06396484375e-05, + "step": 20080, + "training_step_time": 0.13739943504333496 + }, + { + "epoch": 3.064117431640625e-05, + "model_forward_time": 0.024765491485595703, + "step": 20081 + }, + { + "epoch": 3.064117431640625e-05, + "step": 20081, + "training_step_time": 0.10868144035339355 + }, + { + "epoch": 3.06427001953125e-05, + "model_forward_time": 0.025235891342163086, + "step": 20082 + }, + { + "epoch": 3.06427001953125e-05, + "step": 20082, + "training_step_time": 0.19417905807495117 + }, + { + "epoch": 3.064422607421875e-05, + "model_forward_time": 0.025873422622680664, + "step": 20083 + }, + { + "epoch": 3.064422607421875e-05, + "step": 20083, + "training_step_time": 0.13622450828552246 + }, + { + "epoch": 3.0645751953125e-05, + "model_forward_time": 0.02423882484436035, + "step": 20084 + }, + { + "epoch": 3.0645751953125e-05, + "step": 20084, + "training_step_time": 0.19639825820922852 + }, + { + "epoch": 3.064727783203125e-05, + "model_forward_time": 0.024513721466064453, + "step": 20085 + }, + { + "epoch": 3.064727783203125e-05, + "step": 20085, + "training_step_time": 0.17439770698547363 + }, + { + "epoch": 3.06488037109375e-05, + "model_forward_time": 0.024694442749023438, + "step": 20086 + }, + { + "epoch": 3.06488037109375e-05, + "step": 20086, + "training_step_time": 0.11016201972961426 + }, + { + "epoch": 3.065032958984375e-05, + "model_forward_time": 0.024710655212402344, + "step": 20087 + }, + { + "epoch": 3.065032958984375e-05, + "step": 20087, + "training_step_time": 0.10719513893127441 + }, + { + "epoch": 3.065185546875e-05, + "model_forward_time": 0.02510523796081543, + "step": 20088 + }, + { + "epoch": 3.065185546875e-05, + "step": 20088, + "training_step_time": 0.18665575981140137 + }, + { + "epoch": 3.065338134765625e-05, + "model_forward_time": 0.0243532657623291, + "step": 20089 + }, + { + "epoch": 3.065338134765625e-05, + "step": 20089, + "training_step_time": 0.21629023551940918 + }, + { + "epoch": 3.06549072265625e-05, + "grad_norm": 0.1674196422100067, + "learning_rate": 2.698185572727151e-05, + "loss": 0.0116, + "step": 20090 + }, + { + "epoch": 3.06549072265625e-05, + "model_forward_time": 0.024390459060668945, + "step": 20090 + }, + { + "epoch": 3.06549072265625e-05, + "step": 20090, + "training_step_time": 0.10967803001403809 + }, + { + "epoch": 3.065643310546875e-05, + "model_forward_time": 0.024274110794067383, + "step": 20091 + }, + { + "epoch": 3.065643310546875e-05, + "step": 20091, + "training_step_time": 0.11041402816772461 + }, + { + "epoch": 3.0657958984375e-05, + "model_forward_time": 0.02630305290222168, + "step": 20092 + }, + { + "epoch": 3.0657958984375e-05, + "step": 20092, + "training_step_time": 0.11366748809814453 + }, + { + "epoch": 3.065948486328125e-05, + "model_forward_time": 0.025084257125854492, + "step": 20093 + }, + { + "epoch": 3.065948486328125e-05, + "step": 20093, + "training_step_time": 0.1096503734588623 + }, + { + "epoch": 3.06610107421875e-05, + "model_forward_time": 0.025183916091918945, + "step": 20094 + }, + { + "epoch": 3.06610107421875e-05, + "step": 20094, + "training_step_time": 0.19602465629577637 + }, + { + "epoch": 3.066253662109375e-05, + "model_forward_time": 0.02431344985961914, + "step": 20095 + }, + { + "epoch": 3.066253662109375e-05, + "step": 20095, + "training_step_time": 0.1074831485748291 + }, + { + "epoch": 3.06640625e-05, + "model_forward_time": 0.024272441864013672, + "step": 20096 + }, + { + "epoch": 3.06640625e-05, + "step": 20096, + "training_step_time": 0.10624408721923828 + }, + { + "epoch": 3.066558837890625e-05, + "model_forward_time": 0.025000810623168945, + "step": 20097 + }, + { + "epoch": 3.066558837890625e-05, + "step": 20097, + "training_step_time": 0.10942268371582031 + }, + { + "epoch": 3.06671142578125e-05, + "model_forward_time": 0.02573251724243164, + "step": 20098 + }, + { + "epoch": 3.06671142578125e-05, + "step": 20098, + "training_step_time": 0.10874247550964355 + }, + { + "epoch": 3.066864013671875e-05, + "model_forward_time": 0.025012493133544922, + "step": 20099 + }, + { + "epoch": 3.066864013671875e-05, + "step": 20099, + "training_step_time": 0.10790872573852539 + }, + { + "epoch": 3.0670166015625e-05, + "grad_norm": 0.19107389450073242, + "learning_rate": 2.693294185106562e-05, + "loss": 0.0079, + "step": 20100 + }, + { + "epoch": 3.0670166015625e-05, + "model_forward_time": 0.024210214614868164, + "step": 20100 + }, + { + "epoch": 3.0670166015625e-05, + "step": 20100, + "training_step_time": 0.10798215866088867 + }, + { + "epoch": 3.067169189453125e-05, + "model_forward_time": 0.023985862731933594, + "step": 20101 + }, + { + "epoch": 3.067169189453125e-05, + "step": 20101, + "training_step_time": 0.10751986503601074 + }, + { + "epoch": 3.06732177734375e-05, + "model_forward_time": 0.025285005569458008, + "step": 20102 + }, + { + "epoch": 3.06732177734375e-05, + "step": 20102, + "training_step_time": 0.11167526245117188 + }, + { + "epoch": 3.067474365234375e-05, + "model_forward_time": 0.026564836502075195, + "step": 20103 + }, + { + "epoch": 3.067474365234375e-05, + "step": 20103, + "training_step_time": 0.10903596878051758 + }, + { + "epoch": 3.067626953125e-05, + "model_forward_time": 0.025388717651367188, + "step": 20104 + }, + { + "epoch": 3.067626953125e-05, + "step": 20104, + "training_step_time": 0.10700821876525879 + }, + { + "epoch": 3.067779541015625e-05, + "model_forward_time": 0.025379657745361328, + "step": 20105 + }, + { + "epoch": 3.067779541015625e-05, + "step": 20105, + "training_step_time": 0.11047744750976562 + }, + { + "epoch": 3.06793212890625e-05, + "model_forward_time": 0.02525806427001953, + "step": 20106 + }, + { + "epoch": 3.06793212890625e-05, + "step": 20106, + "training_step_time": 0.10770463943481445 + }, + { + "epoch": 3.068084716796875e-05, + "model_forward_time": 0.025288820266723633, + "step": 20107 + }, + { + "epoch": 3.068084716796875e-05, + "step": 20107, + "training_step_time": 0.1085062026977539 + }, + { + "epoch": 3.0682373046875e-05, + "model_forward_time": 0.025474071502685547, + "step": 20108 + }, + { + "epoch": 3.0682373046875e-05, + "step": 20108, + "training_step_time": 0.1755831241607666 + }, + { + "epoch": 3.068389892578125e-05, + "model_forward_time": 0.024499177932739258, + "step": 20109 + }, + { + "epoch": 3.068389892578125e-05, + "step": 20109, + "training_step_time": 0.12106704711914062 + }, + { + "epoch": 3.06854248046875e-05, + "grad_norm": 0.19927628338336945, + "learning_rate": 2.688405600351045e-05, + "loss": 0.0078, + "step": 20110 + }, + { + "epoch": 3.06854248046875e-05, + "model_forward_time": 0.024152517318725586, + "step": 20110 + }, + { + "epoch": 3.06854248046875e-05, + "step": 20110, + "training_step_time": 0.13187289237976074 + }, + { + "epoch": 3.068695068359375e-05, + "model_forward_time": 0.025029659271240234, + "step": 20111 + }, + { + "epoch": 3.068695068359375e-05, + "step": 20111, + "training_step_time": 0.16140413284301758 + }, + { + "epoch": 3.06884765625e-05, + "model_forward_time": 0.024066686630249023, + "step": 20112 + }, + { + "epoch": 3.06884765625e-05, + "step": 20112, + "training_step_time": 0.23043465614318848 + }, + { + "epoch": 3.069000244140625e-05, + "model_forward_time": 0.023512601852416992, + "step": 20113 + }, + { + "epoch": 3.069000244140625e-05, + "step": 20113, + "training_step_time": 0.12192201614379883 + }, + { + "epoch": 3.06915283203125e-05, + "model_forward_time": 0.023984432220458984, + "step": 20114 + }, + { + "epoch": 3.06915283203125e-05, + "step": 20114, + "training_step_time": 0.1234583854675293 + }, + { + "epoch": 3.069305419921875e-05, + "model_forward_time": 0.024393320083618164, + "step": 20115 + }, + { + "epoch": 3.069305419921875e-05, + "step": 20115, + "training_step_time": 0.12225008010864258 + }, + { + "epoch": 3.0694580078125e-05, + "model_forward_time": 0.024712085723876953, + "step": 20116 + }, + { + "epoch": 3.0694580078125e-05, + "step": 20116, + "training_step_time": 0.12128353118896484 + }, + { + "epoch": 3.069610595703125e-05, + "model_forward_time": 0.02452254295349121, + "step": 20117 + }, + { + "epoch": 3.069610595703125e-05, + "step": 20117, + "training_step_time": 0.11886334419250488 + }, + { + "epoch": 3.06976318359375e-05, + "model_forward_time": 0.024532318115234375, + "step": 20118 + }, + { + "epoch": 3.06976318359375e-05, + "step": 20118, + "training_step_time": 0.12436580657958984 + }, + { + "epoch": 3.069915771484375e-05, + "model_forward_time": 0.025753498077392578, + "step": 20119 + }, + { + "epoch": 3.069915771484375e-05, + "step": 20119, + "training_step_time": 0.11226177215576172 + }, + { + "epoch": 3.070068359375e-05, + "grad_norm": 0.2917441725730896, + "learning_rate": 2.6835198244006927e-05, + "loss": 0.009, + "step": 20120 + }, + { + "epoch": 3.070068359375e-05, + "model_forward_time": 0.025027751922607422, + "step": 20120 + }, + { + "epoch": 3.070068359375e-05, + "step": 20120, + "training_step_time": 0.11216235160827637 + }, + { + "epoch": 3.070220947265625e-05, + "model_forward_time": 0.024962902069091797, + "step": 20121 + }, + { + "epoch": 3.070220947265625e-05, + "step": 20121, + "training_step_time": 0.11505413055419922 + }, + { + "epoch": 3.07037353515625e-05, + "model_forward_time": 0.02503514289855957, + "step": 20122 + }, + { + "epoch": 3.07037353515625e-05, + "step": 20122, + "training_step_time": 0.10608220100402832 + }, + { + "epoch": 3.070526123046875e-05, + "model_forward_time": 0.024297475814819336, + "step": 20123 + }, + { + "epoch": 3.070526123046875e-05, + "step": 20123, + "training_step_time": 0.14914870262145996 + }, + { + "epoch": 3.0706787109375e-05, + "model_forward_time": 0.02433180809020996, + "step": 20124 + }, + { + "epoch": 3.0706787109375e-05, + "step": 20124, + "training_step_time": 0.16347765922546387 + }, + { + "epoch": 3.070831298828125e-05, + "model_forward_time": 0.024506092071533203, + "step": 20125 + }, + { + "epoch": 3.070831298828125e-05, + "step": 20125, + "training_step_time": 0.1226353645324707 + }, + { + "epoch": 3.07098388671875e-05, + "model_forward_time": 0.024466514587402344, + "step": 20126 + }, + { + "epoch": 3.07098388671875e-05, + "step": 20126, + "training_step_time": 0.2050187587738037 + }, + { + "epoch": 3.071136474609375e-05, + "model_forward_time": 0.024794816970825195, + "step": 20127 + }, + { + "epoch": 3.071136474609375e-05, + "step": 20127, + "training_step_time": 0.16788792610168457 + }, + { + "epoch": 3.0712890625e-05, + "model_forward_time": 0.024058103561401367, + "step": 20128 + }, + { + "epoch": 3.0712890625e-05, + "step": 20128, + "training_step_time": 0.11533808708190918 + }, + { + "epoch": 3.071441650390625e-05, + "model_forward_time": 0.024599552154541016, + "step": 20129 + }, + { + "epoch": 3.071441650390625e-05, + "step": 20129, + "training_step_time": 0.10707664489746094 + }, + { + "epoch": 3.07159423828125e-05, + "grad_norm": 0.26147332787513733, + "learning_rate": 2.6786368631921836e-05, + "loss": 0.0101, + "step": 20130 + }, + { + "epoch": 3.07159423828125e-05, + "model_forward_time": 0.02516651153564453, + "step": 20130 + }, + { + "epoch": 3.07159423828125e-05, + "step": 20130, + "training_step_time": 0.17835164070129395 + }, + { + "epoch": 3.071746826171875e-05, + "model_forward_time": 0.024977684020996094, + "step": 20131 + }, + { + "epoch": 3.071746826171875e-05, + "step": 20131, + "training_step_time": 0.1799161434173584 + }, + { + "epoch": 3.0718994140625e-05, + "model_forward_time": 0.02429032325744629, + "step": 20132 + }, + { + "epoch": 3.0718994140625e-05, + "step": 20132, + "training_step_time": 0.1367645263671875 + }, + { + "epoch": 3.072052001953125e-05, + "model_forward_time": 0.02455615997314453, + "step": 20133 + }, + { + "epoch": 3.072052001953125e-05, + "step": 20133, + "training_step_time": 0.1148984432220459 + }, + { + "epoch": 3.07220458984375e-05, + "model_forward_time": 0.025042057037353516, + "step": 20134 + }, + { + "epoch": 3.07220458984375e-05, + "step": 20134, + "training_step_time": 0.1097564697265625 + }, + { + "epoch": 3.072357177734375e-05, + "model_forward_time": 0.025260448455810547, + "step": 20135 + }, + { + "epoch": 3.072357177734375e-05, + "step": 20135, + "training_step_time": 0.10812830924987793 + }, + { + "epoch": 3.072509765625e-05, + "model_forward_time": 0.02514958381652832, + "step": 20136 + }, + { + "epoch": 3.072509765625e-05, + "step": 20136, + "training_step_time": 0.11067843437194824 + }, + { + "epoch": 3.072662353515625e-05, + "model_forward_time": 0.025125980377197266, + "step": 20137 + }, + { + "epoch": 3.072662353515625e-05, + "step": 20137, + "training_step_time": 0.19734907150268555 + }, + { + "epoch": 3.07281494140625e-05, + "model_forward_time": 0.024164676666259766, + "step": 20138 + }, + { + "epoch": 3.07281494140625e-05, + "step": 20138, + "training_step_time": 0.11006879806518555 + }, + { + "epoch": 3.072967529296875e-05, + "model_forward_time": 0.02487635612487793, + "step": 20139 + }, + { + "epoch": 3.072967529296875e-05, + "step": 20139, + "training_step_time": 0.10609126091003418 + }, + { + "epoch": 3.0731201171875e-05, + "grad_norm": 0.2607508599758148, + "learning_rate": 2.6737567226587747e-05, + "loss": 0.0057, + "step": 20140 + }, + { + "epoch": 3.0731201171875e-05, + "model_forward_time": 0.025351762771606445, + "step": 20140 + }, + { + "epoch": 3.0731201171875e-05, + "step": 20140, + "training_step_time": 0.10692882537841797 + }, + { + "epoch": 3.073272705078125e-05, + "model_forward_time": 0.025469303131103516, + "step": 20141 + }, + { + "epoch": 3.073272705078125e-05, + "step": 20141, + "training_step_time": 0.10724258422851562 + }, + { + "epoch": 3.07342529296875e-05, + "model_forward_time": 0.025270462036132812, + "step": 20142 + }, + { + "epoch": 3.07342529296875e-05, + "step": 20142, + "training_step_time": 0.1138763427734375 + }, + { + "epoch": 3.073577880859375e-05, + "model_forward_time": 0.025310754776000977, + "step": 20143 + }, + { + "epoch": 3.073577880859375e-05, + "step": 20143, + "training_step_time": 0.11436271667480469 + }, + { + "epoch": 3.07373046875e-05, + "model_forward_time": 0.025474071502685547, + "step": 20144 + }, + { + "epoch": 3.07373046875e-05, + "step": 20144, + "training_step_time": 0.11069488525390625 + }, + { + "epoch": 3.073883056640625e-05, + "model_forward_time": 0.0251162052154541, + "step": 20145 + }, + { + "epoch": 3.073883056640625e-05, + "step": 20145, + "training_step_time": 0.11208844184875488 + }, + { + "epoch": 3.07403564453125e-05, + "model_forward_time": 0.025007963180541992, + "step": 20146 + }, + { + "epoch": 3.07403564453125e-05, + "step": 20146, + "training_step_time": 0.11350512504577637 + }, + { + "epoch": 3.074188232421875e-05, + "model_forward_time": 0.02541518211364746, + "step": 20147 + }, + { + "epoch": 3.074188232421875e-05, + "step": 20147, + "training_step_time": 0.11229419708251953 + }, + { + "epoch": 3.0743408203125e-05, + "model_forward_time": 0.02498459815979004, + "step": 20148 + }, + { + "epoch": 3.0743408203125e-05, + "step": 20148, + "training_step_time": 0.10809683799743652 + }, + { + "epoch": 3.074493408203125e-05, + "model_forward_time": 0.025279760360717773, + "step": 20149 + }, + { + "epoch": 3.074493408203125e-05, + "step": 20149, + "training_step_time": 0.1080009937286377 + }, + { + "epoch": 3.07464599609375e-05, + "grad_norm": 0.13364791870117188, + "learning_rate": 2.6688794087302993e-05, + "loss": 0.0085, + "step": 20150 + }, + { + "epoch": 3.07464599609375e-05, + "model_forward_time": 0.025080204010009766, + "step": 20150 + }, + { + "epoch": 3.07464599609375e-05, + "step": 20150, + "training_step_time": 0.11108160018920898 + }, + { + "epoch": 3.074798583984375e-05, + "model_forward_time": 0.025337934494018555, + "step": 20151 + }, + { + "epoch": 3.074798583984375e-05, + "step": 20151, + "training_step_time": 0.1914665699005127 + }, + { + "epoch": 3.074951171875e-05, + "model_forward_time": 0.024394512176513672, + "step": 20152 + }, + { + "epoch": 3.074951171875e-05, + "step": 20152, + "training_step_time": 0.11838960647583008 + }, + { + "epoch": 3.075103759765625e-05, + "model_forward_time": 0.024555206298828125, + "step": 20153 + }, + { + "epoch": 3.075103759765625e-05, + "step": 20153, + "training_step_time": 0.12694668769836426 + }, + { + "epoch": 3.07525634765625e-05, + "model_forward_time": 0.025015592575073242, + "step": 20154 + }, + { + "epoch": 3.07525634765625e-05, + "step": 20154, + "training_step_time": 0.1599881649017334 + }, + { + "epoch": 3.075408935546875e-05, + "model_forward_time": 0.024646759033203125, + "step": 20155 + }, + { + "epoch": 3.075408935546875e-05, + "step": 20155, + "training_step_time": 0.17630243301391602 + }, + { + "epoch": 3.0755615234375e-05, + "model_forward_time": 0.024446725845336914, + "step": 20156 + }, + { + "epoch": 3.0755615234375e-05, + "step": 20156, + "training_step_time": 0.16309022903442383 + }, + { + "epoch": 3.075714111328125e-05, + "model_forward_time": 0.02433037757873535, + "step": 20157 + }, + { + "epoch": 3.075714111328125e-05, + "step": 20157, + "training_step_time": 0.10565042495727539 + }, + { + "epoch": 3.07586669921875e-05, + "model_forward_time": 0.0246889591217041, + "step": 20158 + }, + { + "epoch": 3.07586669921875e-05, + "step": 20158, + "training_step_time": 0.11262702941894531 + }, + { + "epoch": 3.076019287109375e-05, + "model_forward_time": 0.025122880935668945, + "step": 20159 + }, + { + "epoch": 3.076019287109375e-05, + "step": 20159, + "training_step_time": 0.10605311393737793 + }, + { + "epoch": 3.076171875e-05, + "grad_norm": 0.1781671792268753, + "learning_rate": 2.6640049273331515e-05, + "loss": 0.0092, + "step": 20160 + }, + { + "epoch": 3.076171875e-05, + "model_forward_time": 0.025907516479492188, + "step": 20160 + }, + { + "epoch": 3.076171875e-05, + "step": 20160, + "training_step_time": 0.11122298240661621 + }, + { + "epoch": 3.076324462890625e-05, + "model_forward_time": 0.024812698364257812, + "step": 20161 + }, + { + "epoch": 3.076324462890625e-05, + "step": 20161, + "training_step_time": 0.11050534248352051 + }, + { + "epoch": 3.07647705078125e-05, + "model_forward_time": 0.02444601058959961, + "step": 20162 + }, + { + "epoch": 3.07647705078125e-05, + "step": 20162, + "training_step_time": 0.1074838638305664 + }, + { + "epoch": 3.076629638671875e-05, + "model_forward_time": 0.02477550506591797, + "step": 20163 + }, + { + "epoch": 3.076629638671875e-05, + "step": 20163, + "training_step_time": 0.10726523399353027 + }, + { + "epoch": 3.0767822265625e-05, + "model_forward_time": 0.024585485458374023, + "step": 20164 + }, + { + "epoch": 3.0767822265625e-05, + "step": 20164, + "training_step_time": 0.11070656776428223 + }, + { + "epoch": 3.076934814453125e-05, + "model_forward_time": 0.024741649627685547, + "step": 20165 + }, + { + "epoch": 3.076934814453125e-05, + "step": 20165, + "training_step_time": 0.10494041442871094 + }, + { + "epoch": 3.07708740234375e-05, + "model_forward_time": 0.02466273307800293, + "step": 20166 + }, + { + "epoch": 3.07708740234375e-05, + "step": 20166, + "training_step_time": 0.10512232780456543 + }, + { + "epoch": 3.077239990234375e-05, + "model_forward_time": 0.023527860641479492, + "step": 20167 + }, + { + "epoch": 3.077239990234375e-05, + "step": 20167, + "training_step_time": 0.15217900276184082 + }, + { + "epoch": 3.077392578125e-05, + "model_forward_time": 0.023688316345214844, + "step": 20168 + }, + { + "epoch": 3.077392578125e-05, + "step": 20168, + "training_step_time": 0.1640167236328125 + }, + { + "epoch": 3.077545166015625e-05, + "model_forward_time": 0.02411794662475586, + "step": 20169 + }, + { + "epoch": 3.077545166015625e-05, + "step": 20169, + "training_step_time": 0.11856651306152344 + }, + { + "epoch": 3.07769775390625e-05, + "grad_norm": 0.20678578317165375, + "learning_rate": 2.6591332843902884e-05, + "loss": 0.0126, + "step": 20170 + }, + { + "epoch": 3.07769775390625e-05, + "model_forward_time": 0.023735523223876953, + "step": 20170 + }, + { + "epoch": 3.07769775390625e-05, + "step": 20170, + "training_step_time": 0.1717686653137207 + }, + { + "epoch": 3.077850341796875e-05, + "model_forward_time": 0.023598670959472656, + "step": 20171 + }, + { + "epoch": 3.077850341796875e-05, + "step": 20171, + "training_step_time": 0.19781208038330078 + }, + { + "epoch": 3.0780029296875e-05, + "model_forward_time": 0.024016380310058594, + "step": 20172 + }, + { + "epoch": 3.0780029296875e-05, + "step": 20172, + "training_step_time": 0.11575102806091309 + }, + { + "epoch": 3.078155517578125e-05, + "model_forward_time": 0.023656845092773438, + "step": 20173 + }, + { + "epoch": 3.078155517578125e-05, + "step": 20173, + "training_step_time": 0.1806182861328125 + }, + { + "epoch": 3.07830810546875e-05, + "model_forward_time": 0.023870229721069336, + "step": 20174 + }, + { + "epoch": 3.07830810546875e-05, + "step": 20174, + "training_step_time": 0.11176514625549316 + }, + { + "epoch": 3.078460693359375e-05, + "model_forward_time": 0.02383899688720703, + "step": 20175 + }, + { + "epoch": 3.078460693359375e-05, + "step": 20175, + "training_step_time": 0.17157578468322754 + }, + { + "epoch": 3.07861328125e-05, + "model_forward_time": 0.02377796173095703, + "step": 20176 + }, + { + "epoch": 3.07861328125e-05, + "step": 20176, + "training_step_time": 0.13790416717529297 + }, + { + "epoch": 3.078765869140625e-05, + "model_forward_time": 0.0238645076751709, + "step": 20177 + }, + { + "epoch": 3.078765869140625e-05, + "step": 20177, + "training_step_time": 0.10911083221435547 + }, + { + "epoch": 3.07891845703125e-05, + "model_forward_time": 0.02460002899169922, + "step": 20178 + }, + { + "epoch": 3.07891845703125e-05, + "step": 20178, + "training_step_time": 0.11597537994384766 + }, + { + "epoch": 3.079071044921875e-05, + "model_forward_time": 0.024309635162353516, + "step": 20179 + }, + { + "epoch": 3.079071044921875e-05, + "step": 20179, + "training_step_time": 0.11704421043395996 + }, + { + "epoch": 3.0792236328125e-05, + "grad_norm": 0.24393905699253082, + "learning_rate": 2.654264485821214e-05, + "loss": 0.0113, + "step": 20180 + }, + { + "epoch": 3.0792236328125e-05, + "model_forward_time": 0.024632692337036133, + "step": 20180 + }, + { + "epoch": 3.0792236328125e-05, + "step": 20180, + "training_step_time": 0.1798086166381836 + }, + { + "epoch": 3.079376220703125e-05, + "model_forward_time": 0.023427724838256836, + "step": 20181 + }, + { + "epoch": 3.079376220703125e-05, + "step": 20181, + "training_step_time": 0.12470054626464844 + }, + { + "epoch": 3.07952880859375e-05, + "model_forward_time": 0.02408123016357422, + "step": 20182 + }, + { + "epoch": 3.07952880859375e-05, + "step": 20182, + "training_step_time": 0.10941123962402344 + }, + { + "epoch": 3.079681396484375e-05, + "model_forward_time": 0.02460336685180664, + "step": 20183 + }, + { + "epoch": 3.079681396484375e-05, + "step": 20183, + "training_step_time": 0.11217641830444336 + }, + { + "epoch": 3.079833984375e-05, + "model_forward_time": 0.024121999740600586, + "step": 20184 + }, + { + "epoch": 3.079833984375e-05, + "step": 20184, + "training_step_time": 0.1081087589263916 + }, + { + "epoch": 3.079986572265625e-05, + "model_forward_time": 0.023935556411743164, + "step": 20185 + }, + { + "epoch": 3.079986572265625e-05, + "step": 20185, + "training_step_time": 0.1061856746673584 + }, + { + "epoch": 3.08013916015625e-05, + "model_forward_time": 0.024003028869628906, + "step": 20186 + }, + { + "epoch": 3.08013916015625e-05, + "step": 20186, + "training_step_time": 0.1091306209564209 + }, + { + "epoch": 3.080291748046875e-05, + "model_forward_time": 0.026261091232299805, + "step": 20187 + }, + { + "epoch": 3.080291748046875e-05, + "step": 20187, + "training_step_time": 0.10797309875488281 + }, + { + "epoch": 3.0804443359375e-05, + "model_forward_time": 0.024617671966552734, + "step": 20188 + }, + { + "epoch": 3.0804443359375e-05, + "step": 20188, + "training_step_time": 0.11060404777526855 + }, + { + "epoch": 3.080596923828125e-05, + "model_forward_time": 0.023793458938598633, + "step": 20189 + }, + { + "epoch": 3.080596923828125e-05, + "step": 20189, + "training_step_time": 0.11011838912963867 + }, + { + "epoch": 3.08074951171875e-05, + "grad_norm": 0.14980293810367584, + "learning_rate": 2.6493985375419778e-05, + "loss": 0.0057, + "step": 20190 + }, + { + "epoch": 3.08074951171875e-05, + "model_forward_time": 0.024554729461669922, + "step": 20190 + }, + { + "epoch": 3.08074951171875e-05, + "step": 20190, + "training_step_time": 0.1127471923828125 + }, + { + "epoch": 3.080902099609375e-05, + "model_forward_time": 0.024039030075073242, + "step": 20191 + }, + { + "epoch": 3.080902099609375e-05, + "step": 20191, + "training_step_time": 0.11475777626037598 + }, + { + "epoch": 3.0810546875e-05, + "model_forward_time": 0.024587631225585938, + "step": 20192 + }, + { + "epoch": 3.0810546875e-05, + "step": 20192, + "training_step_time": 0.1079854965209961 + }, + { + "epoch": 3.081207275390625e-05, + "model_forward_time": 0.02412128448486328, + "step": 20193 + }, + { + "epoch": 3.081207275390625e-05, + "step": 20193, + "training_step_time": 0.10799217224121094 + }, + { + "epoch": 3.08135986328125e-05, + "model_forward_time": 0.024086713790893555, + "step": 20194 + }, + { + "epoch": 3.08135986328125e-05, + "step": 20194, + "training_step_time": 0.10820508003234863 + }, + { + "epoch": 3.081512451171875e-05, + "model_forward_time": 0.024826526641845703, + "step": 20195 + }, + { + "epoch": 3.081512451171875e-05, + "step": 20195, + "training_step_time": 0.2284867763519287 + }, + { + "epoch": 3.0816650390625e-05, + "model_forward_time": 0.023824214935302734, + "step": 20196 + }, + { + "epoch": 3.0816650390625e-05, + "step": 20196, + "training_step_time": 0.11643433570861816 + }, + { + "epoch": 3.081817626953125e-05, + "model_forward_time": 0.023323774337768555, + "step": 20197 + }, + { + "epoch": 3.081817626953125e-05, + "step": 20197, + "training_step_time": 0.1398162841796875 + }, + { + "epoch": 3.08197021484375e-05, + "model_forward_time": 0.024189233779907227, + "step": 20198 + }, + { + "epoch": 3.08197021484375e-05, + "step": 20198, + "training_step_time": 0.161545991897583 + }, + { + "epoch": 3.082122802734375e-05, + "model_forward_time": 0.023430824279785156, + "step": 20199 + }, + { + "epoch": 3.082122802734375e-05, + "step": 20199, + "training_step_time": 0.2218492031097412 + }, + { + "epoch": 3.082275390625e-05, + "grad_norm": 0.18001802265644073, + "learning_rate": 2.644535445465164e-05, + "loss": 0.0064, + "step": 20200 + }, + { + "epoch": 3.082275390625e-05, + "model_forward_time": 0.02357935905456543, + "step": 20200 + }, + { + "epoch": 3.082275390625e-05, + "step": 20200, + "training_step_time": 0.11756253242492676 + }, + { + "epoch": 3.082427978515625e-05, + "model_forward_time": 0.023369789123535156, + "step": 20201 + }, + { + "epoch": 3.082427978515625e-05, + "step": 20201, + "training_step_time": 0.11740565299987793 + }, + { + "epoch": 3.08258056640625e-05, + "model_forward_time": 0.02399921417236328, + "step": 20202 + }, + { + "epoch": 3.08258056640625e-05, + "step": 20202, + "training_step_time": 0.11145997047424316 + }, + { + "epoch": 3.082733154296875e-05, + "model_forward_time": 0.026042938232421875, + "step": 20203 + }, + { + "epoch": 3.082733154296875e-05, + "step": 20203, + "training_step_time": 0.11573982238769531 + }, + { + "epoch": 3.0828857421875e-05, + "model_forward_time": 0.024569272994995117, + "step": 20204 + }, + { + "epoch": 3.0828857421875e-05, + "step": 20204, + "training_step_time": 0.10821700096130371 + }, + { + "epoch": 3.083038330078125e-05, + "model_forward_time": 0.02445816993713379, + "step": 20205 + }, + { + "epoch": 3.083038330078125e-05, + "step": 20205, + "training_step_time": 0.1122283935546875 + }, + { + "epoch": 3.08319091796875e-05, + "model_forward_time": 0.025043249130249023, + "step": 20206 + }, + { + "epoch": 3.08319091796875e-05, + "step": 20206, + "training_step_time": 0.10926246643066406 + }, + { + "epoch": 3.083343505859375e-05, + "model_forward_time": 0.024710893630981445, + "step": 20207 + }, + { + "epoch": 3.083343505859375e-05, + "step": 20207, + "training_step_time": 0.10887002944946289 + }, + { + "epoch": 3.08349609375e-05, + "model_forward_time": 0.02412724494934082, + "step": 20208 + }, + { + "epoch": 3.08349609375e-05, + "step": 20208, + "training_step_time": 0.1103203296661377 + }, + { + "epoch": 3.083648681640625e-05, + "model_forward_time": 0.02427530288696289, + "step": 20209 + }, + { + "epoch": 3.083648681640625e-05, + "step": 20209, + "training_step_time": 0.1092996597290039 + }, + { + "epoch": 3.08380126953125e-05, + "grad_norm": 0.0846259742975235, + "learning_rate": 2.6396752154998915e-05, + "loss": 0.0051, + "step": 20210 + }, + { + "epoch": 3.08380126953125e-05, + "model_forward_time": 0.024137258529663086, + "step": 20210 + }, + { + "epoch": 3.08380126953125e-05, + "step": 20210, + "training_step_time": 0.10398650169372559 + }, + { + "epoch": 3.083953857421875e-05, + "model_forward_time": 0.023733854293823242, + "step": 20211 + }, + { + "epoch": 3.083953857421875e-05, + "step": 20211, + "training_step_time": 0.14608263969421387 + }, + { + "epoch": 3.0841064453125e-05, + "model_forward_time": 0.024096012115478516, + "step": 20212 + }, + { + "epoch": 3.0841064453125e-05, + "step": 20212, + "training_step_time": 0.1712636947631836 + }, + { + "epoch": 3.084259033203125e-05, + "model_forward_time": 0.0238494873046875, + "step": 20213 + }, + { + "epoch": 3.084259033203125e-05, + "step": 20213, + "training_step_time": 0.1312401294708252 + }, + { + "epoch": 3.08441162109375e-05, + "model_forward_time": 0.023639917373657227, + "step": 20214 + }, + { + "epoch": 3.08441162109375e-05, + "step": 20214, + "training_step_time": 0.20663022994995117 + }, + { + "epoch": 3.084564208984375e-05, + "model_forward_time": 0.02353048324584961, + "step": 20215 + }, + { + "epoch": 3.084564208984375e-05, + "step": 20215, + "training_step_time": 0.10597443580627441 + }, + { + "epoch": 3.084716796875e-05, + "model_forward_time": 0.023611783981323242, + "step": 20216 + }, + { + "epoch": 3.084716796875e-05, + "step": 20216, + "training_step_time": 0.18341851234436035 + }, + { + "epoch": 3.084869384765625e-05, + "model_forward_time": 0.023435354232788086, + "step": 20217 + }, + { + "epoch": 3.084869384765625e-05, + "step": 20217, + "training_step_time": 0.10608863830566406 + }, + { + "epoch": 3.08502197265625e-05, + "model_forward_time": 0.02347254753112793, + "step": 20218 + }, + { + "epoch": 3.08502197265625e-05, + "step": 20218, + "training_step_time": 0.10412979125976562 + }, + { + "epoch": 3.085174560546875e-05, + "model_forward_time": 0.024471044540405273, + "step": 20219 + }, + { + "epoch": 3.085174560546875e-05, + "step": 20219, + "training_step_time": 0.18188953399658203 + }, + { + "epoch": 3.0853271484375e-05, + "grad_norm": 0.11704346537590027, + "learning_rate": 2.6348178535517966e-05, + "loss": 0.0077, + "step": 20220 + }, + { + "epoch": 3.0853271484375e-05, + "model_forward_time": 0.02349066734313965, + "step": 20220 + }, + { + "epoch": 3.0853271484375e-05, + "step": 20220, + "training_step_time": 0.13672637939453125 + }, + { + "epoch": 3.085479736328125e-05, + "model_forward_time": 0.02340984344482422, + "step": 20221 + }, + { + "epoch": 3.085479736328125e-05, + "step": 20221, + "training_step_time": 0.10885834693908691 + }, + { + "epoch": 3.08563232421875e-05, + "model_forward_time": 0.024364471435546875, + "step": 20222 + }, + { + "epoch": 3.08563232421875e-05, + "step": 20222, + "training_step_time": 0.1128389835357666 + }, + { + "epoch": 3.085784912109375e-05, + "model_forward_time": 0.024009227752685547, + "step": 20223 + }, + { + "epoch": 3.085784912109375e-05, + "step": 20223, + "training_step_time": 0.116302490234375 + }, + { + "epoch": 3.0859375e-05, + "model_forward_time": 0.024618864059448242, + "step": 20224 + }, + { + "epoch": 3.0859375e-05, + "step": 20224, + "training_step_time": 0.10491657257080078 + }, + { + "epoch": 3.086090087890625e-05, + "model_forward_time": 0.02428150177001953, + "step": 20225 + }, + { + "epoch": 3.086090087890625e-05, + "step": 20225, + "training_step_time": 0.19633841514587402 + }, + { + "epoch": 3.08624267578125e-05, + "model_forward_time": 0.023705005645751953, + "step": 20226 + }, + { + "epoch": 3.08624267578125e-05, + "step": 20226, + "training_step_time": 0.1023871898651123 + }, + { + "epoch": 3.086395263671875e-05, + "model_forward_time": 0.02349567413330078, + "step": 20227 + }, + { + "epoch": 3.086395263671875e-05, + "step": 20227, + "training_step_time": 0.10074210166931152 + }, + { + "epoch": 3.0865478515625e-05, + "model_forward_time": 0.024228811264038086, + "step": 20228 + }, + { + "epoch": 3.0865478515625e-05, + "step": 20228, + "training_step_time": 0.10529589653015137 + }, + { + "epoch": 3.086700439453125e-05, + "model_forward_time": 0.024242877960205078, + "step": 20229 + }, + { + "epoch": 3.086700439453125e-05, + "step": 20229, + "training_step_time": 0.10521268844604492 + }, + { + "epoch": 3.08685302734375e-05, + "grad_norm": 0.28940412402153015, + "learning_rate": 2.629963365523031e-05, + "loss": 0.0143, + "step": 20230 + }, + { + "epoch": 3.08685302734375e-05, + "model_forward_time": 0.024315357208251953, + "step": 20230 + }, + { + "epoch": 3.08685302734375e-05, + "step": 20230, + "training_step_time": 0.10441899299621582 + }, + { + "epoch": 3.087005615234375e-05, + "model_forward_time": 0.024458646774291992, + "step": 20231 + }, + { + "epoch": 3.087005615234375e-05, + "step": 20231, + "training_step_time": 0.10368585586547852 + }, + { + "epoch": 3.087158203125e-05, + "model_forward_time": 0.02495121955871582, + "step": 20232 + }, + { + "epoch": 3.087158203125e-05, + "step": 20232, + "training_step_time": 0.10915946960449219 + }, + { + "epoch": 3.087310791015625e-05, + "model_forward_time": 0.024271488189697266, + "step": 20233 + }, + { + "epoch": 3.087310791015625e-05, + "step": 20233, + "training_step_time": 0.10931921005249023 + }, + { + "epoch": 3.08746337890625e-05, + "model_forward_time": 0.024724721908569336, + "step": 20234 + }, + { + "epoch": 3.08746337890625e-05, + "step": 20234, + "training_step_time": 0.10394024848937988 + }, + { + "epoch": 3.087615966796875e-05, + "model_forward_time": 0.024185895919799805, + "step": 20235 + }, + { + "epoch": 3.087615966796875e-05, + "step": 20235, + "training_step_time": 0.1086115837097168 + }, + { + "epoch": 3.0877685546875e-05, + "model_forward_time": 0.024412870407104492, + "step": 20236 + }, + { + "epoch": 3.0877685546875e-05, + "step": 20236, + "training_step_time": 0.10397005081176758 + }, + { + "epoch": 3.087921142578125e-05, + "model_forward_time": 0.024617433547973633, + "step": 20237 + }, + { + "epoch": 3.087921142578125e-05, + "step": 20237, + "training_step_time": 0.10375833511352539 + }, + { + "epoch": 3.08807373046875e-05, + "model_forward_time": 0.024225950241088867, + "step": 20238 + }, + { + "epoch": 3.08807373046875e-05, + "step": 20238, + "training_step_time": 0.10857629776000977 + }, + { + "epoch": 3.088226318359375e-05, + "model_forward_time": 0.02434706687927246, + "step": 20239 + }, + { + "epoch": 3.088226318359375e-05, + "step": 20239, + "training_step_time": 0.10439443588256836 + }, + { + "epoch": 3.08837890625e-05, + "grad_norm": 0.3185952603816986, + "learning_rate": 2.6251117573122563e-05, + "loss": 0.0082, + "step": 20240 + }, + { + "epoch": 3.08837890625e-05, + "model_forward_time": 0.02418375015258789, + "step": 20240 + }, + { + "epoch": 3.08837890625e-05, + "step": 20240, + "training_step_time": 0.19379448890686035 + }, + { + "epoch": 3.088531494140625e-05, + "model_forward_time": 0.023304224014282227, + "step": 20241 + }, + { + "epoch": 3.088531494140625e-05, + "step": 20241, + "training_step_time": 0.12569713592529297 + }, + { + "epoch": 3.08868408203125e-05, + "model_forward_time": 0.023907184600830078, + "step": 20242 + }, + { + "epoch": 3.08868408203125e-05, + "step": 20242, + "training_step_time": 0.13182806968688965 + }, + { + "epoch": 3.088836669921875e-05, + "model_forward_time": 0.024083852767944336, + "step": 20243 + }, + { + "epoch": 3.088836669921875e-05, + "step": 20243, + "training_step_time": 0.16073083877563477 + }, + { + "epoch": 3.0889892578125e-05, + "model_forward_time": 0.023714780807495117, + "step": 20244 + }, + { + "epoch": 3.0889892578125e-05, + "step": 20244, + "training_step_time": 0.2216653823852539 + }, + { + "epoch": 3.089141845703125e-05, + "model_forward_time": 0.022882699966430664, + "step": 20245 + }, + { + "epoch": 3.089141845703125e-05, + "step": 20245, + "training_step_time": 0.12031817436218262 + }, + { + "epoch": 3.08929443359375e-05, + "model_forward_time": 0.02342391014099121, + "step": 20246 + }, + { + "epoch": 3.08929443359375e-05, + "step": 20246, + "training_step_time": 0.10500669479370117 + }, + { + "epoch": 3.089447021484375e-05, + "model_forward_time": 0.023952245712280273, + "step": 20247 + }, + { + "epoch": 3.089447021484375e-05, + "step": 20247, + "training_step_time": 0.11027407646179199 + }, + { + "epoch": 3.089599609375e-05, + "model_forward_time": 0.024573802947998047, + "step": 20248 + }, + { + "epoch": 3.089599609375e-05, + "step": 20248, + "training_step_time": 0.11267662048339844 + }, + { + "epoch": 3.089752197265625e-05, + "model_forward_time": 0.024187803268432617, + "step": 20249 + }, + { + "epoch": 3.089752197265625e-05, + "step": 20249, + "training_step_time": 0.10289573669433594 + }, + { + "epoch": 3.08990478515625e-05, + "grad_norm": 0.20640796422958374, + "learning_rate": 2.6202630348146324e-05, + "loss": 0.0072, + "step": 20250 + }, + { + "epoch": 3.08990478515625e-05, + "model_forward_time": 0.02412557601928711, + "step": 20250 + }, + { + "epoch": 3.08990478515625e-05, + "step": 20250, + "training_step_time": 0.10513067245483398 + }, + { + "epoch": 3.090057373046875e-05, + "model_forward_time": 0.024743080139160156, + "step": 20251 + }, + { + "epoch": 3.090057373046875e-05, + "step": 20251, + "training_step_time": 0.10699677467346191 + }, + { + "epoch": 3.0902099609375e-05, + "model_forward_time": 0.024134159088134766, + "step": 20252 + }, + { + "epoch": 3.0902099609375e-05, + "step": 20252, + "training_step_time": 0.10776329040527344 + }, + { + "epoch": 3.090362548828125e-05, + "model_forward_time": 0.02420806884765625, + "step": 20253 + }, + { + "epoch": 3.090362548828125e-05, + "step": 20253, + "training_step_time": 0.10670280456542969 + }, + { + "epoch": 3.09051513671875e-05, + "model_forward_time": 0.02433323860168457, + "step": 20254 + }, + { + "epoch": 3.09051513671875e-05, + "step": 20254, + "training_step_time": 0.1046149730682373 + }, + { + "epoch": 3.090667724609375e-05, + "model_forward_time": 0.024295806884765625, + "step": 20255 + }, + { + "epoch": 3.090667724609375e-05, + "step": 20255, + "training_step_time": 0.10667967796325684 + }, + { + "epoch": 3.0908203125e-05, + "model_forward_time": 0.023836612701416016, + "step": 20256 + }, + { + "epoch": 3.0908203125e-05, + "step": 20256, + "training_step_time": 0.14601755142211914 + }, + { + "epoch": 3.090972900390625e-05, + "model_forward_time": 0.023718595504760742, + "step": 20257 + }, + { + "epoch": 3.090972900390625e-05, + "step": 20257, + "training_step_time": 0.16224145889282227 + }, + { + "epoch": 3.09112548828125e-05, + "model_forward_time": 0.02347111701965332, + "step": 20258 + }, + { + "epoch": 3.09112548828125e-05, + "step": 20258, + "training_step_time": 0.15749239921569824 + }, + { + "epoch": 3.091278076171875e-05, + "model_forward_time": 0.02343440055847168, + "step": 20259 + }, + { + "epoch": 3.091278076171875e-05, + "step": 20259, + "training_step_time": 0.18209409713745117 + }, + { + "epoch": 3.0914306640625e-05, + "grad_norm": 0.1057221069931984, + "learning_rate": 2.6154172039218172e-05, + "loss": 0.006, + "step": 20260 + }, + { + "epoch": 3.0914306640625e-05, + "model_forward_time": 0.023080825805664062, + "step": 20260 + }, + { + "epoch": 3.0914306640625e-05, + "step": 20260, + "training_step_time": 0.18787193298339844 + }, + { + "epoch": 3.091583251953125e-05, + "model_forward_time": 0.023444414138793945, + "step": 20261 + }, + { + "epoch": 3.091583251953125e-05, + "step": 20261, + "training_step_time": 0.12043380737304688 + }, + { + "epoch": 3.09173583984375e-05, + "model_forward_time": 0.023785114288330078, + "step": 20262 + }, + { + "epoch": 3.09173583984375e-05, + "step": 20262, + "training_step_time": 0.10840916633605957 + }, + { + "epoch": 3.091888427734375e-05, + "model_forward_time": 0.024281978607177734, + "step": 20263 + }, + { + "epoch": 3.091888427734375e-05, + "step": 20263, + "training_step_time": 0.10541462898254395 + }, + { + "epoch": 3.092041015625e-05, + "model_forward_time": 0.024016380310058594, + "step": 20264 + }, + { + "epoch": 3.092041015625e-05, + "step": 20264, + "training_step_time": 0.13516879081726074 + }, + { + "epoch": 3.092193603515625e-05, + "model_forward_time": 0.02454376220703125, + "step": 20265 + }, + { + "epoch": 3.092193603515625e-05, + "step": 20265, + "training_step_time": 0.1362764835357666 + }, + { + "epoch": 3.09234619140625e-05, + "model_forward_time": 0.024135589599609375, + "step": 20266 + }, + { + "epoch": 3.09234619140625e-05, + "step": 20266, + "training_step_time": 0.11280608177185059 + }, + { + "epoch": 3.092498779296875e-05, + "model_forward_time": 0.02664041519165039, + "step": 20267 + }, + { + "epoch": 3.092498779296875e-05, + "step": 20267, + "training_step_time": 0.1115257740020752 + }, + { + "epoch": 3.0926513671875e-05, + "model_forward_time": 0.02452826499938965, + "step": 20268 + }, + { + "epoch": 3.0926513671875e-05, + "step": 20268, + "training_step_time": 0.11456084251403809 + }, + { + "epoch": 3.092803955078125e-05, + "model_forward_time": 0.02424454689025879, + "step": 20269 + }, + { + "epoch": 3.092803955078125e-05, + "step": 20269, + "training_step_time": 0.11041831970214844 + }, + { + "epoch": 3.09295654296875e-05, + "grad_norm": 0.12096409499645233, + "learning_rate": 2.6105742705219515e-05, + "loss": 0.0067, + "step": 20270 + }, + { + "epoch": 3.09295654296875e-05, + "model_forward_time": 0.024512767791748047, + "step": 20270 + }, + { + "epoch": 3.09295654296875e-05, + "step": 20270, + "training_step_time": 0.18552136421203613 + }, + { + "epoch": 3.093109130859375e-05, + "model_forward_time": 0.02405095100402832, + "step": 20271 + }, + { + "epoch": 3.093109130859375e-05, + "step": 20271, + "training_step_time": 0.14959192276000977 + }, + { + "epoch": 3.09326171875e-05, + "model_forward_time": 0.023129701614379883, + "step": 20272 + }, + { + "epoch": 3.09326171875e-05, + "step": 20272, + "training_step_time": 0.1291344165802002 + }, + { + "epoch": 3.093414306640625e-05, + "model_forward_time": 0.02367091178894043, + "step": 20273 + }, + { + "epoch": 3.093414306640625e-05, + "step": 20273, + "training_step_time": 0.12607407569885254 + }, + { + "epoch": 3.09356689453125e-05, + "model_forward_time": 0.02408885955810547, + "step": 20274 + }, + { + "epoch": 3.09356689453125e-05, + "step": 20274, + "training_step_time": 0.12212395668029785 + }, + { + "epoch": 3.093719482421875e-05, + "model_forward_time": 0.024029254913330078, + "step": 20275 + }, + { + "epoch": 3.093719482421875e-05, + "step": 20275, + "training_step_time": 0.1178750991821289 + }, + { + "epoch": 3.0938720703125e-05, + "model_forward_time": 0.024106502532958984, + "step": 20276 + }, + { + "epoch": 3.0938720703125e-05, + "step": 20276, + "training_step_time": 0.11334967613220215 + }, + { + "epoch": 3.094024658203125e-05, + "model_forward_time": 0.02433490753173828, + "step": 20277 + }, + { + "epoch": 3.094024658203125e-05, + "step": 20277, + "training_step_time": 0.1134939193725586 + }, + { + "epoch": 3.09417724609375e-05, + "model_forward_time": 0.024335384368896484, + "step": 20278 + }, + { + "epoch": 3.09417724609375e-05, + "step": 20278, + "training_step_time": 0.11189961433410645 + }, + { + "epoch": 3.094329833984375e-05, + "model_forward_time": 0.024118900299072266, + "step": 20279 + }, + { + "epoch": 3.094329833984375e-05, + "step": 20279, + "training_step_time": 0.10966181755065918 + }, + { + "epoch": 3.094482421875e-05, + "grad_norm": 0.14801107347011566, + "learning_rate": 2.6057342404996522e-05, + "loss": 0.005, + "step": 20280 + }, + { + "epoch": 3.094482421875e-05, + "model_forward_time": 0.02424788475036621, + "step": 20280 + }, + { + "epoch": 3.094482421875e-05, + "step": 20280, + "training_step_time": 0.10901403427124023 + }, + { + "epoch": 3.094635009765625e-05, + "model_forward_time": 0.023990631103515625, + "step": 20281 + }, + { + "epoch": 3.094635009765625e-05, + "step": 20281, + "training_step_time": 0.10921907424926758 + }, + { + "epoch": 3.09478759765625e-05, + "model_forward_time": 0.024436473846435547, + "step": 20282 + }, + { + "epoch": 3.09478759765625e-05, + "step": 20282, + "training_step_time": 0.10672926902770996 + }, + { + "epoch": 3.094940185546875e-05, + "model_forward_time": 0.025330305099487305, + "step": 20283 + }, + { + "epoch": 3.094940185546875e-05, + "step": 20283, + "training_step_time": 0.11105847358703613 + }, + { + "epoch": 3.0950927734375e-05, + "model_forward_time": 0.025043725967407227, + "step": 20284 + }, + { + "epoch": 3.0950927734375e-05, + "step": 20284, + "training_step_time": 0.17059707641601562 + }, + { + "epoch": 3.095245361328125e-05, + "model_forward_time": 0.024131298065185547, + "step": 20285 + }, + { + "epoch": 3.095245361328125e-05, + "step": 20285, + "training_step_time": 0.11204361915588379 + }, + { + "epoch": 3.09539794921875e-05, + "model_forward_time": 0.023813247680664062, + "step": 20286 + }, + { + "epoch": 3.09539794921875e-05, + "step": 20286, + "training_step_time": 0.12683463096618652 + }, + { + "epoch": 3.095550537109375e-05, + "model_forward_time": 0.02431654930114746, + "step": 20287 + }, + { + "epoch": 3.095550537109375e-05, + "step": 20287, + "training_step_time": 0.1630098819732666 + }, + { + "epoch": 3.095703125e-05, + "model_forward_time": 0.023221254348754883, + "step": 20288 + }, + { + "epoch": 3.095703125e-05, + "step": 20288, + "training_step_time": 0.17503952980041504 + }, + { + "epoch": 3.095855712890625e-05, + "model_forward_time": 0.023313283920288086, + "step": 20289 + }, + { + "epoch": 3.095855712890625e-05, + "step": 20289, + "training_step_time": 0.16805672645568848 + }, + { + "epoch": 3.09600830078125e-05, + "grad_norm": 0.12908320128917694, + "learning_rate": 2.6008971197360176e-05, + "loss": 0.0125, + "step": 20290 + }, + { + "epoch": 3.09600830078125e-05, + "model_forward_time": 0.023643970489501953, + "step": 20290 + }, + { + "epoch": 3.09600830078125e-05, + "step": 20290, + "training_step_time": 0.10753607749938965 + }, + { + "epoch": 3.096160888671875e-05, + "model_forward_time": 0.023777484893798828, + "step": 20291 + }, + { + "epoch": 3.096160888671875e-05, + "step": 20291, + "training_step_time": 0.10880208015441895 + }, + { + "epoch": 3.0963134765625e-05, + "model_forward_time": 0.02480483055114746, + "step": 20292 + }, + { + "epoch": 3.0963134765625e-05, + "step": 20292, + "training_step_time": 0.10862445831298828 + }, + { + "epoch": 3.096466064453125e-05, + "model_forward_time": 0.02412867546081543, + "step": 20293 + }, + { + "epoch": 3.096466064453125e-05, + "step": 20293, + "training_step_time": 0.1114046573638916 + }, + { + "epoch": 3.09661865234375e-05, + "model_forward_time": 0.02380204200744629, + "step": 20294 + }, + { + "epoch": 3.09661865234375e-05, + "step": 20294, + "training_step_time": 0.10920524597167969 + }, + { + "epoch": 3.096771240234375e-05, + "model_forward_time": 0.02398395538330078, + "step": 20295 + }, + { + "epoch": 3.096771240234375e-05, + "step": 20295, + "training_step_time": 0.10948562622070312 + }, + { + "epoch": 3.096923828125e-05, + "model_forward_time": 0.02419734001159668, + "step": 20296 + }, + { + "epoch": 3.096923828125e-05, + "step": 20296, + "training_step_time": 0.11516356468200684 + }, + { + "epoch": 3.097076416015625e-05, + "model_forward_time": 0.024021387100219727, + "step": 20297 + }, + { + "epoch": 3.097076416015625e-05, + "step": 20297, + "training_step_time": 0.11346101760864258 + }, + { + "epoch": 3.09722900390625e-05, + "model_forward_time": 0.02448582649230957, + "step": 20298 + }, + { + "epoch": 3.09722900390625e-05, + "step": 20298, + "training_step_time": 0.11006927490234375 + }, + { + "epoch": 3.097381591796875e-05, + "model_forward_time": 0.024636268615722656, + "step": 20299 + }, + { + "epoch": 3.097381591796875e-05, + "step": 20299, + "training_step_time": 0.10822272300720215 + }, + { + "epoch": 3.0975341796875e-05, + "grad_norm": 0.1334718018770218, + "learning_rate": 2.5960629141086012e-05, + "loss": 0.0058, + "step": 20300 + }, + { + "epoch": 3.0975341796875e-05, + "model_forward_time": 0.024556636810302734, + "step": 20300 + }, + { + "epoch": 3.0975341796875e-05, + "step": 20300, + "training_step_time": 0.13094854354858398 + }, + { + "epoch": 3.097686767578125e-05, + "model_forward_time": 0.02364492416381836, + "step": 20301 + }, + { + "epoch": 3.097686767578125e-05, + "step": 20301, + "training_step_time": 0.11255407333374023 + }, + { + "epoch": 3.09783935546875e-05, + "model_forward_time": 0.024955034255981445, + "step": 20302 + }, + { + "epoch": 3.09783935546875e-05, + "step": 20302, + "training_step_time": 0.11995339393615723 + }, + { + "epoch": 3.097991943359375e-05, + "model_forward_time": 0.024550676345825195, + "step": 20303 + }, + { + "epoch": 3.097991943359375e-05, + "step": 20303, + "training_step_time": 0.11147499084472656 + }, + { + "epoch": 3.09814453125e-05, + "model_forward_time": 0.024528980255126953, + "step": 20304 + }, + { + "epoch": 3.09814453125e-05, + "step": 20304, + "training_step_time": 0.1755213737487793 + }, + { + "epoch": 3.098297119140625e-05, + "model_forward_time": 0.023474693298339844, + "step": 20305 + }, + { + "epoch": 3.098297119140625e-05, + "step": 20305, + "training_step_time": 0.18876004219055176 + }, + { + "epoch": 3.09844970703125e-05, + "model_forward_time": 0.023133516311645508, + "step": 20306 + }, + { + "epoch": 3.09844970703125e-05, + "step": 20306, + "training_step_time": 0.11075806617736816 + }, + { + "epoch": 3.098602294921875e-05, + "model_forward_time": 0.02364802360534668, + "step": 20307 + }, + { + "epoch": 3.098602294921875e-05, + "step": 20307, + "training_step_time": 0.10815691947937012 + }, + { + "epoch": 3.0987548828125e-05, + "model_forward_time": 0.02443242073059082, + "step": 20308 + }, + { + "epoch": 3.0987548828125e-05, + "step": 20308, + "training_step_time": 0.11348772048950195 + }, + { + "epoch": 3.098907470703125e-05, + "model_forward_time": 0.024419307708740234, + "step": 20309 + }, + { + "epoch": 3.098907470703125e-05, + "step": 20309, + "training_step_time": 0.1411147117614746 + }, + { + "epoch": 3.09906005859375e-05, + "grad_norm": 0.18202942609786987, + "learning_rate": 2.591231629491423e-05, + "loss": 0.0114, + "step": 20310 + }, + { + "epoch": 3.09906005859375e-05, + "model_forward_time": 0.024049997329711914, + "step": 20310 + }, + { + "epoch": 3.09906005859375e-05, + "step": 20310, + "training_step_time": 0.13256025314331055 + }, + { + "epoch": 3.099212646484375e-05, + "model_forward_time": 0.023494243621826172, + "step": 20311 + }, + { + "epoch": 3.099212646484375e-05, + "step": 20311, + "training_step_time": 0.12480521202087402 + }, + { + "epoch": 3.099365234375e-05, + "model_forward_time": 0.024001359939575195, + "step": 20312 + }, + { + "epoch": 3.099365234375e-05, + "step": 20312, + "training_step_time": 0.10767269134521484 + }, + { + "epoch": 3.099517822265625e-05, + "model_forward_time": 0.024210453033447266, + "step": 20313 + }, + { + "epoch": 3.099517822265625e-05, + "step": 20313, + "training_step_time": 0.13320374488830566 + }, + { + "epoch": 3.09967041015625e-05, + "model_forward_time": 0.02465343475341797, + "step": 20314 + }, + { + "epoch": 3.09967041015625e-05, + "step": 20314, + "training_step_time": 0.10839223861694336 + }, + { + "epoch": 3.099822998046875e-05, + "model_forward_time": 0.024294137954711914, + "step": 20315 + }, + { + "epoch": 3.099822998046875e-05, + "step": 20315, + "training_step_time": 0.19416213035583496 + }, + { + "epoch": 3.0999755859375e-05, + "model_forward_time": 0.023432016372680664, + "step": 20316 + }, + { + "epoch": 3.0999755859375e-05, + "step": 20316, + "training_step_time": 0.10625100135803223 + }, + { + "epoch": 3.100128173828125e-05, + "model_forward_time": 0.02371978759765625, + "step": 20317 + }, + { + "epoch": 3.100128173828125e-05, + "step": 20317, + "training_step_time": 0.10913538932800293 + }, + { + "epoch": 3.10028076171875e-05, + "model_forward_time": 0.02446293830871582, + "step": 20318 + }, + { + "epoch": 3.10028076171875e-05, + "step": 20318, + "training_step_time": 0.10594367980957031 + }, + { + "epoch": 3.100433349609375e-05, + "model_forward_time": 0.024090051651000977, + "step": 20319 + }, + { + "epoch": 3.100433349609375e-05, + "step": 20319, + "training_step_time": 0.10794448852539062 + }, + { + "epoch": 3.1005859375e-05, + "grad_norm": 0.280348539352417, + "learning_rate": 2.586403271754947e-05, + "loss": 0.0094, + "step": 20320 + }, + { + "epoch": 3.1005859375e-05, + "model_forward_time": 0.02423381805419922, + "step": 20320 + }, + { + "epoch": 3.1005859375e-05, + "step": 20320, + "training_step_time": 0.10675668716430664 + }, + { + "epoch": 3.100738525390625e-05, + "model_forward_time": 0.024366140365600586, + "step": 20321 + }, + { + "epoch": 3.100738525390625e-05, + "step": 20321, + "training_step_time": 0.10655331611633301 + }, + { + "epoch": 3.10089111328125e-05, + "model_forward_time": 0.024501323699951172, + "step": 20322 + }, + { + "epoch": 3.10089111328125e-05, + "step": 20322, + "training_step_time": 0.10703468322753906 + }, + { + "epoch": 3.101043701171875e-05, + "model_forward_time": 0.02444767951965332, + "step": 20323 + }, + { + "epoch": 3.101043701171875e-05, + "step": 20323, + "training_step_time": 0.1436612606048584 + }, + { + "epoch": 3.1011962890625e-05, + "model_forward_time": 0.02377605438232422, + "step": 20324 + }, + { + "epoch": 3.1011962890625e-05, + "step": 20324, + "training_step_time": 0.1664588451385498 + }, + { + "epoch": 3.101348876953125e-05, + "model_forward_time": 0.023504972457885742, + "step": 20325 + }, + { + "epoch": 3.101348876953125e-05, + "step": 20325, + "training_step_time": 0.14690947532653809 + }, + { + "epoch": 3.10150146484375e-05, + "model_forward_time": 0.023774147033691406, + "step": 20326 + }, + { + "epoch": 3.10150146484375e-05, + "step": 20326, + "training_step_time": 0.13970136642456055 + }, + { + "epoch": 3.101654052734375e-05, + "model_forward_time": 0.023624181747436523, + "step": 20327 + }, + { + "epoch": 3.101654052734375e-05, + "step": 20327, + "training_step_time": 0.12900924682617188 + }, + { + "epoch": 3.101806640625e-05, + "model_forward_time": 0.02335190773010254, + "step": 20328 + }, + { + "epoch": 3.101806640625e-05, + "step": 20328, + "training_step_time": 0.2120654582977295 + }, + { + "epoch": 3.101959228515625e-05, + "model_forward_time": 0.023449182510375977, + "step": 20329 + }, + { + "epoch": 3.101959228515625e-05, + "step": 20329, + "training_step_time": 0.12731385231018066 + }, + { + "epoch": 3.10211181640625e-05, + "grad_norm": 0.2727183699607849, + "learning_rate": 2.5815778467660823e-05, + "loss": 0.0072, + "step": 20330 + }, + { + "epoch": 3.10211181640625e-05, + "model_forward_time": 0.023593664169311523, + "step": 20330 + }, + { + "epoch": 3.10211181640625e-05, + "step": 20330, + "training_step_time": 0.11457300186157227 + }, + { + "epoch": 3.102264404296875e-05, + "model_forward_time": 0.02353072166442871, + "step": 20331 + }, + { + "epoch": 3.102264404296875e-05, + "step": 20331, + "training_step_time": 0.11472558975219727 + }, + { + "epoch": 3.1024169921875e-05, + "model_forward_time": 0.02430438995361328, + "step": 20332 + }, + { + "epoch": 3.1024169921875e-05, + "step": 20332, + "training_step_time": 0.11632394790649414 + }, + { + "epoch": 3.102569580078125e-05, + "model_forward_time": 0.024750709533691406, + "step": 20333 + }, + { + "epoch": 3.102569580078125e-05, + "step": 20333, + "training_step_time": 0.11632966995239258 + }, + { + "epoch": 3.10272216796875e-05, + "model_forward_time": 0.024159908294677734, + "step": 20334 + }, + { + "epoch": 3.10272216796875e-05, + "step": 20334, + "training_step_time": 0.12450456619262695 + }, + { + "epoch": 3.102874755859375e-05, + "model_forward_time": 0.024199724197387695, + "step": 20335 + }, + { + "epoch": 3.102874755859375e-05, + "step": 20335, + "training_step_time": 0.11366581916809082 + }, + { + "epoch": 3.10302734375e-05, + "model_forward_time": 0.024562597274780273, + "step": 20336 + }, + { + "epoch": 3.10302734375e-05, + "step": 20336, + "training_step_time": 0.1057136058807373 + }, + { + "epoch": 3.103179931640625e-05, + "model_forward_time": 0.024151325225830078, + "step": 20337 + }, + { + "epoch": 3.103179931640625e-05, + "step": 20337, + "training_step_time": 0.11228084564208984 + }, + { + "epoch": 3.10333251953125e-05, + "model_forward_time": 0.024050235748291016, + "step": 20338 + }, + { + "epoch": 3.10333251953125e-05, + "step": 20338, + "training_step_time": 0.11221432685852051 + }, + { + "epoch": 3.103485107421875e-05, + "model_forward_time": 0.0242464542388916, + "step": 20339 + }, + { + "epoch": 3.103485107421875e-05, + "step": 20339, + "training_step_time": 0.11094522476196289 + }, + { + "epoch": 3.1036376953125e-05, + "grad_norm": 0.2212766855955124, + "learning_rate": 2.5767553603881767e-05, + "loss": 0.0102, + "step": 20340 + }, + { + "epoch": 3.1036376953125e-05, + "model_forward_time": 0.024171113967895508, + "step": 20340 + }, + { + "epoch": 3.1036376953125e-05, + "step": 20340, + "training_step_time": 0.10960769653320312 + }, + { + "epoch": 3.103790283203125e-05, + "model_forward_time": 0.024101972579956055, + "step": 20341 + }, + { + "epoch": 3.103790283203125e-05, + "step": 20341, + "training_step_time": 0.10882878303527832 + }, + { + "epoch": 3.10394287109375e-05, + "model_forward_time": 0.024158954620361328, + "step": 20342 + }, + { + "epoch": 3.10394287109375e-05, + "step": 20342, + "training_step_time": 0.10892081260681152 + }, + { + "epoch": 3.104095458984375e-05, + "model_forward_time": 0.024196386337280273, + "step": 20343 + }, + { + "epoch": 3.104095458984375e-05, + "step": 20343, + "training_step_time": 0.11417460441589355 + }, + { + "epoch": 3.104248046875e-05, + "model_forward_time": 0.024295806884765625, + "step": 20344 + }, + { + "epoch": 3.104248046875e-05, + "step": 20344, + "training_step_time": 0.1095283031463623 + }, + { + "epoch": 3.104400634765625e-05, + "model_forward_time": 0.02423095703125, + "step": 20345 + }, + { + "epoch": 3.104400634765625e-05, + "step": 20345, + "training_step_time": 0.10478639602661133 + }, + { + "epoch": 3.10455322265625e-05, + "model_forward_time": 0.023151397705078125, + "step": 20346 + }, + { + "epoch": 3.10455322265625e-05, + "step": 20346, + "training_step_time": 0.14491510391235352 + }, + { + "epoch": 3.104705810546875e-05, + "model_forward_time": 0.023964405059814453, + "step": 20347 + }, + { + "epoch": 3.104705810546875e-05, + "step": 20347, + "training_step_time": 0.147871732711792 + }, + { + "epoch": 3.1048583984375e-05, + "model_forward_time": 0.024170637130737305, + "step": 20348 + }, + { + "epoch": 3.1048583984375e-05, + "step": 20348, + "training_step_time": 0.1676180362701416 + }, + { + "epoch": 3.105010986328125e-05, + "model_forward_time": 0.024091005325317383, + "step": 20349 + }, + { + "epoch": 3.105010986328125e-05, + "step": 20349, + "training_step_time": 0.18203401565551758 + }, + { + "epoch": 3.10516357421875e-05, + "grad_norm": 0.1759641170501709, + "learning_rate": 2.571935818481005e-05, + "loss": 0.0094, + "step": 20350 + }, + { + "epoch": 3.10516357421875e-05, + "model_forward_time": 0.023740530014038086, + "step": 20350 + }, + { + "epoch": 3.10516357421875e-05, + "step": 20350, + "training_step_time": 0.16592907905578613 + }, + { + "epoch": 3.105316162109375e-05, + "model_forward_time": 0.024566650390625, + "step": 20351 + }, + { + "epoch": 3.105316162109375e-05, + "step": 20351, + "training_step_time": 0.1929769515991211 + }, + { + "epoch": 3.10546875e-05, + "model_forward_time": 0.02453923225402832, + "step": 20352 + }, + { + "epoch": 3.10546875e-05, + "step": 20352, + "training_step_time": 0.11443471908569336 + }, + { + "epoch": 3.105621337890625e-05, + "model_forward_time": 0.024609088897705078, + "step": 20353 + }, + { + "epoch": 3.105621337890625e-05, + "step": 20353, + "training_step_time": 0.1074824333190918 + }, + { + "epoch": 3.10577392578125e-05, + "model_forward_time": 0.02525019645690918, + "step": 20354 + }, + { + "epoch": 3.10577392578125e-05, + "step": 20354, + "training_step_time": 0.13378024101257324 + }, + { + "epoch": 3.105926513671875e-05, + "model_forward_time": 0.025290727615356445, + "step": 20355 + }, + { + "epoch": 3.105926513671875e-05, + "step": 20355, + "training_step_time": 0.11923718452453613 + }, + { + "epoch": 3.1060791015625e-05, + "model_forward_time": 0.024960994720458984, + "step": 20356 + }, + { + "epoch": 3.1060791015625e-05, + "step": 20356, + "training_step_time": 0.11603713035583496 + }, + { + "epoch": 3.106231689453125e-05, + "model_forward_time": 0.0251615047454834, + "step": 20357 + }, + { + "epoch": 3.106231689453125e-05, + "step": 20357, + "training_step_time": 0.11896753311157227 + }, + { + "epoch": 3.10638427734375e-05, + "model_forward_time": 0.02486395835876465, + "step": 20358 + }, + { + "epoch": 3.10638427734375e-05, + "step": 20358, + "training_step_time": 0.11026811599731445 + }, + { + "epoch": 3.106536865234375e-05, + "model_forward_time": 0.025456666946411133, + "step": 20359 + }, + { + "epoch": 3.106536865234375e-05, + "step": 20359, + "training_step_time": 0.19449186325073242 + }, + { + "epoch": 3.106689453125e-05, + "grad_norm": 0.13494102656841278, + "learning_rate": 2.567119226900764e-05, + "loss": 0.0098, + "step": 20360 + }, + { + "epoch": 3.106689453125e-05, + "model_forward_time": 0.024602890014648438, + "step": 20360 + }, + { + "epoch": 3.106689453125e-05, + "step": 20360, + "training_step_time": 0.10335564613342285 + }, + { + "epoch": 3.106842041015625e-05, + "model_forward_time": 0.0241241455078125, + "step": 20361 + }, + { + "epoch": 3.106842041015625e-05, + "step": 20361, + "training_step_time": 0.10336542129516602 + }, + { + "epoch": 3.10699462890625e-05, + "model_forward_time": 0.024919986724853516, + "step": 20362 + }, + { + "epoch": 3.10699462890625e-05, + "step": 20362, + "training_step_time": 0.10613608360290527 + }, + { + "epoch": 3.107147216796875e-05, + "model_forward_time": 0.02514171600341797, + "step": 20363 + }, + { + "epoch": 3.107147216796875e-05, + "step": 20363, + "training_step_time": 0.10741162300109863 + }, + { + "epoch": 3.1072998046875e-05, + "model_forward_time": 0.025303363800048828, + "step": 20364 + }, + { + "epoch": 3.1072998046875e-05, + "step": 20364, + "training_step_time": 0.1091318130493164 + }, + { + "epoch": 3.107452392578125e-05, + "model_forward_time": 0.027009248733520508, + "step": 20365 + }, + { + "epoch": 3.107452392578125e-05, + "step": 20365, + "training_step_time": 0.12375521659851074 + }, + { + "epoch": 3.10760498046875e-05, + "model_forward_time": 0.02488088607788086, + "step": 20366 + }, + { + "epoch": 3.10760498046875e-05, + "step": 20366, + "training_step_time": 0.12371492385864258 + }, + { + "epoch": 3.107757568359375e-05, + "model_forward_time": 0.024896621704101562, + "step": 20367 + }, + { + "epoch": 3.107757568359375e-05, + "step": 20367, + "training_step_time": 0.12636899948120117 + }, + { + "epoch": 3.10791015625e-05, + "model_forward_time": 0.0253446102142334, + "step": 20368 + }, + { + "epoch": 3.10791015625e-05, + "step": 20368, + "training_step_time": 0.12727046012878418 + }, + { + "epoch": 3.108062744140625e-05, + "model_forward_time": 0.024823427200317383, + "step": 20369 + }, + { + "epoch": 3.108062744140625e-05, + "step": 20369, + "training_step_time": 0.12270164489746094 + }, + { + "epoch": 3.10821533203125e-05, + "grad_norm": 0.20362550020217896, + "learning_rate": 2.562305591500069e-05, + "loss": 0.0126, + "step": 20370 + }, + { + "epoch": 3.10821533203125e-05, + "model_forward_time": 0.024915695190429688, + "step": 20370 + }, + { + "epoch": 3.10821533203125e-05, + "step": 20370, + "training_step_time": 0.12064719200134277 + }, + { + "epoch": 3.108367919921875e-05, + "model_forward_time": 0.024836063385009766, + "step": 20371 + }, + { + "epoch": 3.108367919921875e-05, + "step": 20371, + "training_step_time": 0.11359333992004395 + }, + { + "epoch": 3.1085205078125e-05, + "model_forward_time": 0.025261402130126953, + "step": 20372 + }, + { + "epoch": 3.1085205078125e-05, + "step": 20372, + "training_step_time": 0.11461138725280762 + }, + { + "epoch": 3.108673095703125e-05, + "model_forward_time": 0.025026798248291016, + "step": 20373 + }, + { + "epoch": 3.108673095703125e-05, + "step": 20373, + "training_step_time": 0.11183285713195801 + }, + { + "epoch": 3.10882568359375e-05, + "model_forward_time": 0.025376319885253906, + "step": 20374 + }, + { + "epoch": 3.10882568359375e-05, + "step": 20374, + "training_step_time": 0.18063712120056152 + }, + { + "epoch": 3.108978271484375e-05, + "model_forward_time": 0.026760578155517578, + "step": 20375 + }, + { + "epoch": 3.108978271484375e-05, + "step": 20375, + "training_step_time": 0.11977458000183105 + }, + { + "epoch": 3.109130859375e-05, + "model_forward_time": 0.02406001091003418, + "step": 20376 + }, + { + "epoch": 3.109130859375e-05, + "step": 20376, + "training_step_time": 0.12819242477416992 + }, + { + "epoch": 3.109283447265625e-05, + "model_forward_time": 0.024762392044067383, + "step": 20377 + }, + { + "epoch": 3.109283447265625e-05, + "step": 20377, + "training_step_time": 0.11462664604187012 + }, + { + "epoch": 3.10943603515625e-05, + "model_forward_time": 0.025179386138916016, + "step": 20378 + }, + { + "epoch": 3.10943603515625e-05, + "step": 20378, + "training_step_time": 0.17092275619506836 + }, + { + "epoch": 3.109588623046875e-05, + "model_forward_time": 0.02478337287902832, + "step": 20379 + }, + { + "epoch": 3.109588623046875e-05, + "step": 20379, + "training_step_time": 0.13606810569763184 + }, + { + "epoch": 3.1097412109375e-05, + "grad_norm": 0.22211630642414093, + "learning_rate": 2.55749491812794e-05, + "loss": 0.0197, + "step": 20380 + }, + { + "epoch": 3.1097412109375e-05, + "model_forward_time": 0.024234533309936523, + "step": 20380 + }, + { + "epoch": 3.1097412109375e-05, + "step": 20380, + "training_step_time": 0.11030793190002441 + }, + { + "epoch": 3.109893798828125e-05, + "model_forward_time": 0.024780750274658203, + "step": 20381 + }, + { + "epoch": 3.109893798828125e-05, + "step": 20381, + "training_step_time": 0.10662150382995605 + }, + { + "epoch": 3.11004638671875e-05, + "model_forward_time": 0.025136470794677734, + "step": 20382 + }, + { + "epoch": 3.11004638671875e-05, + "step": 20382, + "training_step_time": 0.10565996170043945 + }, + { + "epoch": 3.110198974609375e-05, + "model_forward_time": 0.02466106414794922, + "step": 20383 + }, + { + "epoch": 3.110198974609375e-05, + "step": 20383, + "training_step_time": 0.10759520530700684 + }, + { + "epoch": 3.1103515625e-05, + "model_forward_time": 0.024927854537963867, + "step": 20384 + }, + { + "epoch": 3.1103515625e-05, + "step": 20384, + "training_step_time": 0.10775351524353027 + }, + { + "epoch": 3.110504150390625e-05, + "model_forward_time": 0.024290084838867188, + "step": 20385 + }, + { + "epoch": 3.110504150390625e-05, + "step": 20385, + "training_step_time": 0.10499739646911621 + }, + { + "epoch": 3.11065673828125e-05, + "model_forward_time": 0.02501368522644043, + "step": 20386 + }, + { + "epoch": 3.11065673828125e-05, + "step": 20386, + "training_step_time": 0.10746598243713379 + }, + { + "epoch": 3.110809326171875e-05, + "model_forward_time": 0.02521681785583496, + "step": 20387 + }, + { + "epoch": 3.110809326171875e-05, + "step": 20387, + "training_step_time": 0.10686516761779785 + }, + { + "epoch": 3.1109619140625e-05, + "model_forward_time": 0.025272130966186523, + "step": 20388 + }, + { + "epoch": 3.1109619140625e-05, + "step": 20388, + "training_step_time": 0.10399365425109863 + }, + { + "epoch": 3.111114501953125e-05, + "model_forward_time": 0.024933338165283203, + "step": 20389 + }, + { + "epoch": 3.111114501953125e-05, + "step": 20389, + "training_step_time": 0.10834860801696777 + }, + { + "epoch": 3.11126708984375e-05, + "grad_norm": 0.25785398483276367, + "learning_rate": 2.552687212629799e-05, + "loss": 0.0093, + "step": 20390 + }, + { + "epoch": 3.11126708984375e-05, + "model_forward_time": 0.02491903305053711, + "step": 20390 + }, + { + "epoch": 3.11126708984375e-05, + "step": 20390, + "training_step_time": 0.11054420471191406 + }, + { + "epoch": 3.111419677734375e-05, + "model_forward_time": 0.024378299713134766, + "step": 20391 + }, + { + "epoch": 3.111419677734375e-05, + "step": 20391, + "training_step_time": 0.14413833618164062 + }, + { + "epoch": 3.111572265625e-05, + "model_forward_time": 0.026362180709838867, + "step": 20392 + }, + { + "epoch": 3.111572265625e-05, + "step": 20392, + "training_step_time": 0.161116361618042 + }, + { + "epoch": 3.111724853515625e-05, + "model_forward_time": 0.025140762329101562, + "step": 20393 + }, + { + "epoch": 3.111724853515625e-05, + "step": 20393, + "training_step_time": 0.11197495460510254 + }, + { + "epoch": 3.11187744140625e-05, + "model_forward_time": 0.024820327758789062, + "step": 20394 + }, + { + "epoch": 3.11187744140625e-05, + "step": 20394, + "training_step_time": 0.1340630054473877 + }, + { + "epoch": 3.112030029296875e-05, + "model_forward_time": 0.025660037994384766, + "step": 20395 + }, + { + "epoch": 3.112030029296875e-05, + "step": 20395, + "training_step_time": 0.20145702362060547 + }, + { + "epoch": 3.1121826171875e-05, + "model_forward_time": 0.024155139923095703, + "step": 20396 + }, + { + "epoch": 3.1121826171875e-05, + "step": 20396, + "training_step_time": 0.13207507133483887 + }, + { + "epoch": 3.112335205078125e-05, + "model_forward_time": 0.02453136444091797, + "step": 20397 + }, + { + "epoch": 3.112335205078125e-05, + "step": 20397, + "training_step_time": 0.12000131607055664 + }, + { + "epoch": 3.11248779296875e-05, + "model_forward_time": 0.024870634078979492, + "step": 20398 + }, + { + "epoch": 3.11248779296875e-05, + "step": 20398, + "training_step_time": 0.1681993007659912 + }, + { + "epoch": 3.112640380859375e-05, + "model_forward_time": 0.025240421295166016, + "step": 20399 + }, + { + "epoch": 3.112640380859375e-05, + "step": 20399, + "training_step_time": 0.1349015235900879 + }, + { + "epoch": 3.11279296875e-05, + "grad_norm": 0.2017369121313095, + "learning_rate": 2.547882480847461e-05, + "loss": 0.0064, + "step": 20400 + }, + { + "epoch": 3.11279296875e-05, + "model_forward_time": 0.02423548698425293, + "step": 20400 + }, + { + "epoch": 3.11279296875e-05, + "step": 20400, + "training_step_time": 0.11487293243408203 + }, + { + "epoch": 3.112945556640625e-05, + "model_forward_time": 0.02504730224609375, + "step": 20401 + }, + { + "epoch": 3.112945556640625e-05, + "step": 20401, + "training_step_time": 0.10878634452819824 + }, + { + "epoch": 3.11309814453125e-05, + "model_forward_time": 0.024625062942504883, + "step": 20402 + }, + { + "epoch": 3.11309814453125e-05, + "step": 20402, + "training_step_time": 0.11632490158081055 + }, + { + "epoch": 3.113250732421875e-05, + "model_forward_time": 0.025011539459228516, + "step": 20403 + }, + { + "epoch": 3.113250732421875e-05, + "step": 20403, + "training_step_time": 0.1076359748840332 + }, + { + "epoch": 3.1134033203125e-05, + "model_forward_time": 0.024991989135742188, + "step": 20404 + }, + { + "epoch": 3.1134033203125e-05, + "step": 20404, + "training_step_time": 0.13332653045654297 + }, + { + "epoch": 3.113555908203125e-05, + "model_forward_time": 0.025496721267700195, + "step": 20405 + }, + { + "epoch": 3.113555908203125e-05, + "step": 20405, + "training_step_time": 0.11874866485595703 + }, + { + "epoch": 3.11370849609375e-05, + "model_forward_time": 0.02481532096862793, + "step": 20406 + }, + { + "epoch": 3.11370849609375e-05, + "step": 20406, + "training_step_time": 0.10432553291320801 + }, + { + "epoch": 3.113861083984375e-05, + "model_forward_time": 0.025310039520263672, + "step": 20407 + }, + { + "epoch": 3.113861083984375e-05, + "step": 20407, + "training_step_time": 0.10714411735534668 + }, + { + "epoch": 3.114013671875e-05, + "model_forward_time": 0.025255680084228516, + "step": 20408 + }, + { + "epoch": 3.114013671875e-05, + "step": 20408, + "training_step_time": 0.10868120193481445 + }, + { + "epoch": 3.114166259765625e-05, + "model_forward_time": 0.02530050277709961, + "step": 20409 + }, + { + "epoch": 3.114166259765625e-05, + "step": 20409, + "training_step_time": 0.10892844200134277 + }, + { + "epoch": 3.11431884765625e-05, + "grad_norm": 0.38811054825782776, + "learning_rate": 2.543080728619127e-05, + "loss": 0.009, + "step": 20410 + }, + { + "epoch": 3.11431884765625e-05, + "model_forward_time": 0.02608513832092285, + "step": 20410 + }, + { + "epoch": 3.11431884765625e-05, + "step": 20410, + "training_step_time": 0.1089780330657959 + }, + { + "epoch": 3.114471435546875e-05, + "model_forward_time": 0.02552962303161621, + "step": 20411 + }, + { + "epoch": 3.114471435546875e-05, + "step": 20411, + "training_step_time": 0.10712456703186035 + }, + { + "epoch": 3.1146240234375e-05, + "model_forward_time": 0.025484800338745117, + "step": 20412 + }, + { + "epoch": 3.1146240234375e-05, + "step": 20412, + "training_step_time": 0.10709404945373535 + }, + { + "epoch": 3.114776611328125e-05, + "model_forward_time": 0.025324344635009766, + "step": 20413 + }, + { + "epoch": 3.114776611328125e-05, + "step": 20413, + "training_step_time": 0.10573792457580566 + }, + { + "epoch": 3.11492919921875e-05, + "model_forward_time": 0.02557086944580078, + "step": 20414 + }, + { + "epoch": 3.11492919921875e-05, + "step": 20414, + "training_step_time": 0.10743427276611328 + }, + { + "epoch": 3.115081787109375e-05, + "model_forward_time": 0.025432109832763672, + "step": 20415 + }, + { + "epoch": 3.115081787109375e-05, + "step": 20415, + "training_step_time": 0.10666823387145996 + }, + { + "epoch": 3.115234375e-05, + "model_forward_time": 0.025096893310546875, + "step": 20416 + }, + { + "epoch": 3.115234375e-05, + "step": 20416, + "training_step_time": 0.10606789588928223 + }, + { + "epoch": 3.115386962890625e-05, + "model_forward_time": 0.025214433670043945, + "step": 20417 + }, + { + "epoch": 3.115386962890625e-05, + "step": 20417, + "training_step_time": 0.10618472099304199 + }, + { + "epoch": 3.11553955078125e-05, + "model_forward_time": 0.02852654457092285, + "step": 20418 + }, + { + "epoch": 3.11553955078125e-05, + "step": 20418, + "training_step_time": 0.10781025886535645 + }, + { + "epoch": 3.115692138671875e-05, + "model_forward_time": 0.025450468063354492, + "step": 20419 + }, + { + "epoch": 3.115692138671875e-05, + "step": 20419, + "training_step_time": 0.10933136940002441 + }, + { + "epoch": 3.1158447265625e-05, + "grad_norm": 0.14591389894485474, + "learning_rate": 2.5382819617793813e-05, + "loss": 0.0068, + "step": 20420 + }, + { + "epoch": 3.1158447265625e-05, + "model_forward_time": 0.025165081024169922, + "step": 20420 + }, + { + "epoch": 3.1158447265625e-05, + "step": 20420, + "training_step_time": 0.10795736312866211 + }, + { + "epoch": 3.115997314453125e-05, + "model_forward_time": 0.025066137313842773, + "step": 20421 + }, + { + "epoch": 3.115997314453125e-05, + "step": 20421, + "training_step_time": 0.12075114250183105 + }, + { + "epoch": 3.11614990234375e-05, + "model_forward_time": 0.024999618530273438, + "step": 20422 + }, + { + "epoch": 3.11614990234375e-05, + "step": 20422, + "training_step_time": 0.12048649787902832 + }, + { + "epoch": 3.116302490234375e-05, + "model_forward_time": 0.025182008743286133, + "step": 20423 + }, + { + "epoch": 3.116302490234375e-05, + "step": 20423, + "training_step_time": 0.13148736953735352 + }, + { + "epoch": 3.116455078125e-05, + "model_forward_time": 0.024830341339111328, + "step": 20424 + }, + { + "epoch": 3.116455078125e-05, + "step": 20424, + "training_step_time": 0.15788650512695312 + }, + { + "epoch": 3.116607666015625e-05, + "model_forward_time": 0.024180173873901367, + "step": 20425 + }, + { + "epoch": 3.116607666015625e-05, + "step": 20425, + "training_step_time": 0.21845483779907227 + }, + { + "epoch": 3.11676025390625e-05, + "model_forward_time": 0.02472400665283203, + "step": 20426 + }, + { + "epoch": 3.11676025390625e-05, + "step": 20426, + "training_step_time": 0.1189262866973877 + }, + { + "epoch": 3.116912841796875e-05, + "model_forward_time": 0.024558544158935547, + "step": 20427 + }, + { + "epoch": 3.116912841796875e-05, + "step": 20427, + "training_step_time": 0.10732340812683105 + }, + { + "epoch": 3.1170654296875e-05, + "model_forward_time": 0.025431156158447266, + "step": 20428 + }, + { + "epoch": 3.1170654296875e-05, + "step": 20428, + "training_step_time": 0.10600423812866211 + }, + { + "epoch": 3.117218017578125e-05, + "model_forward_time": 0.025560617446899414, + "step": 20429 + }, + { + "epoch": 3.117218017578125e-05, + "step": 20429, + "training_step_time": 0.11062955856323242 + }, + { + "epoch": 3.11737060546875e-05, + "grad_norm": 0.17885607481002808, + "learning_rate": 2.5334861861591753e-05, + "loss": 0.0056, + "step": 20430 + }, + { + "epoch": 3.11737060546875e-05, + "model_forward_time": 0.024947404861450195, + "step": 20430 + }, + { + "epoch": 3.11737060546875e-05, + "step": 20430, + "training_step_time": 0.10522913932800293 + }, + { + "epoch": 3.117523193359375e-05, + "model_forward_time": 0.02517414093017578, + "step": 20431 + }, + { + "epoch": 3.117523193359375e-05, + "step": 20431, + "training_step_time": 0.10863375663757324 + }, + { + "epoch": 3.11767578125e-05, + "model_forward_time": 0.025266647338867188, + "step": 20432 + }, + { + "epoch": 3.11767578125e-05, + "step": 20432, + "training_step_time": 0.10877370834350586 + }, + { + "epoch": 3.117828369140625e-05, + "model_forward_time": 0.026983261108398438, + "step": 20433 + }, + { + "epoch": 3.117828369140625e-05, + "step": 20433, + "training_step_time": 0.1101226806640625 + }, + { + "epoch": 3.11798095703125e-05, + "model_forward_time": 0.024824857711791992, + "step": 20434 + }, + { + "epoch": 3.11798095703125e-05, + "step": 20434, + "training_step_time": 0.10632467269897461 + }, + { + "epoch": 3.118133544921875e-05, + "model_forward_time": 0.024945974349975586, + "step": 20435 + }, + { + "epoch": 3.118133544921875e-05, + "step": 20435, + "training_step_time": 0.10532402992248535 + }, + { + "epoch": 3.1182861328125e-05, + "model_forward_time": 0.02496480941772461, + "step": 20436 + }, + { + "epoch": 3.1182861328125e-05, + "step": 20436, + "training_step_time": 0.10535764694213867 + }, + { + "epoch": 3.118438720703125e-05, + "model_forward_time": 0.02482128143310547, + "step": 20437 + }, + { + "epoch": 3.118438720703125e-05, + "step": 20437, + "training_step_time": 0.1461029052734375 + }, + { + "epoch": 3.11859130859375e-05, + "model_forward_time": 0.024402379989624023, + "step": 20438 + }, + { + "epoch": 3.11859130859375e-05, + "step": 20438, + "training_step_time": 0.16138124465942383 + }, + { + "epoch": 3.118743896484375e-05, + "model_forward_time": 0.024184703826904297, + "step": 20439 + }, + { + "epoch": 3.118743896484375e-05, + "step": 20439, + "training_step_time": 0.16146373748779297 + }, + { + "epoch": 3.118896484375e-05, + "grad_norm": 0.37250664830207825, + "learning_rate": 2.528693407585832e-05, + "loss": 0.0126, + "step": 20440 + }, + { + "epoch": 3.118896484375e-05, + "model_forward_time": 0.025819063186645508, + "step": 20440 + }, + { + "epoch": 3.118896484375e-05, + "step": 20440, + "training_step_time": 0.17675518989562988 + }, + { + "epoch": 3.119049072265625e-05, + "model_forward_time": 0.02501201629638672, + "step": 20441 + }, + { + "epoch": 3.119049072265625e-05, + "step": 20441, + "training_step_time": 0.1863386631011963 + }, + { + "epoch": 3.11920166015625e-05, + "model_forward_time": 0.024637460708618164, + "step": 20442 + }, + { + "epoch": 3.11920166015625e-05, + "step": 20442, + "training_step_time": 0.11068177223205566 + }, + { + "epoch": 3.119354248046875e-05, + "model_forward_time": 0.0246427059173584, + "step": 20443 + }, + { + "epoch": 3.119354248046875e-05, + "step": 20443, + "training_step_time": 0.11658453941345215 + }, + { + "epoch": 3.1195068359375e-05, + "model_forward_time": 0.02465057373046875, + "step": 20444 + }, + { + "epoch": 3.1195068359375e-05, + "step": 20444, + "training_step_time": 0.1710047721862793 + }, + { + "epoch": 3.119659423828125e-05, + "model_forward_time": 0.024183273315429688, + "step": 20445 + }, + { + "epoch": 3.119659423828125e-05, + "step": 20445, + "training_step_time": 0.13242077827453613 + }, + { + "epoch": 3.11981201171875e-05, + "model_forward_time": 0.02753901481628418, + "step": 20446 + }, + { + "epoch": 3.11981201171875e-05, + "step": 20446, + "training_step_time": 0.1101231575012207 + }, + { + "epoch": 3.119964599609375e-05, + "model_forward_time": 0.02529597282409668, + "step": 20447 + }, + { + "epoch": 3.119964599609375e-05, + "step": 20447, + "training_step_time": 0.13007760047912598 + }, + { + "epoch": 3.1201171875e-05, + "model_forward_time": 0.024972200393676758, + "step": 20448 + }, + { + "epoch": 3.1201171875e-05, + "step": 20448, + "training_step_time": 0.12068939208984375 + }, + { + "epoch": 3.120269775390625e-05, + "model_forward_time": 0.025066137313842773, + "step": 20449 + }, + { + "epoch": 3.120269775390625e-05, + "step": 20449, + "training_step_time": 0.10463428497314453 + }, + { + "epoch": 3.12042236328125e-05, + "grad_norm": 0.26953527331352234, + "learning_rate": 2.5239036318830278e-05, + "loss": 0.0097, + "step": 20450 + }, + { + "epoch": 3.12042236328125e-05, + "model_forward_time": 0.02538156509399414, + "step": 20450 + }, + { + "epoch": 3.12042236328125e-05, + "step": 20450, + "training_step_time": 0.10718417167663574 + }, + { + "epoch": 3.120574951171875e-05, + "model_forward_time": 0.024911880493164062, + "step": 20451 + }, + { + "epoch": 3.120574951171875e-05, + "step": 20451, + "training_step_time": 0.11488485336303711 + }, + { + "epoch": 3.1207275390625e-05, + "model_forward_time": 0.025112152099609375, + "step": 20452 + }, + { + "epoch": 3.1207275390625e-05, + "step": 20452, + "training_step_time": 0.10546040534973145 + }, + { + "epoch": 3.120880126953125e-05, + "model_forward_time": 0.025464296340942383, + "step": 20453 + }, + { + "epoch": 3.120880126953125e-05, + "step": 20453, + "training_step_time": 0.10883331298828125 + }, + { + "epoch": 3.12103271484375e-05, + "model_forward_time": 0.02742290496826172, + "step": 20454 + }, + { + "epoch": 3.12103271484375e-05, + "step": 20454, + "training_step_time": 0.10835576057434082 + }, + { + "epoch": 3.121185302734375e-05, + "model_forward_time": 0.02506113052368164, + "step": 20455 + }, + { + "epoch": 3.121185302734375e-05, + "step": 20455, + "training_step_time": 0.10542154312133789 + }, + { + "epoch": 3.121337890625e-05, + "model_forward_time": 0.025527477264404297, + "step": 20456 + }, + { + "epoch": 3.121337890625e-05, + "step": 20456, + "training_step_time": 0.10657930374145508 + }, + { + "epoch": 3.121490478515625e-05, + "model_forward_time": 0.02516484260559082, + "step": 20457 + }, + { + "epoch": 3.121490478515625e-05, + "step": 20457, + "training_step_time": 0.10913705825805664 + }, + { + "epoch": 3.12164306640625e-05, + "model_forward_time": 0.02512335777282715, + "step": 20458 + }, + { + "epoch": 3.12164306640625e-05, + "step": 20458, + "training_step_time": 0.10603690147399902 + }, + { + "epoch": 3.121795654296875e-05, + "model_forward_time": 0.025205612182617188, + "step": 20459 + }, + { + "epoch": 3.121795654296875e-05, + "step": 20459, + "training_step_time": 0.10616254806518555 + }, + { + "epoch": 3.1219482421875e-05, + "grad_norm": 0.18705344200134277, + "learning_rate": 2.5191168648707887e-05, + "loss": 0.0297, + "step": 20460 + }, + { + "epoch": 3.1219482421875e-05, + "model_forward_time": 0.0256044864654541, + "step": 20460 + }, + { + "epoch": 3.1219482421875e-05, + "step": 20460, + "training_step_time": 0.11236572265625 + }, + { + "epoch": 3.122100830078125e-05, + "model_forward_time": 0.025088787078857422, + "step": 20461 + }, + { + "epoch": 3.122100830078125e-05, + "step": 20461, + "training_step_time": 0.10549378395080566 + }, + { + "epoch": 3.12225341796875e-05, + "model_forward_time": 0.025125503540039062, + "step": 20462 + }, + { + "epoch": 3.12225341796875e-05, + "step": 20462, + "training_step_time": 0.10924959182739258 + }, + { + "epoch": 3.122406005859375e-05, + "model_forward_time": 0.02544260025024414, + "step": 20463 + }, + { + "epoch": 3.122406005859375e-05, + "step": 20463, + "training_step_time": 0.10643911361694336 + }, + { + "epoch": 3.12255859375e-05, + "model_forward_time": 0.025136232376098633, + "step": 20464 + }, + { + "epoch": 3.12255859375e-05, + "step": 20464, + "training_step_time": 0.10842609405517578 + }, + { + "epoch": 3.122711181640625e-05, + "model_forward_time": 0.025003433227539062, + "step": 20465 + }, + { + "epoch": 3.122711181640625e-05, + "step": 20465, + "training_step_time": 0.10810065269470215 + }, + { + "epoch": 3.12286376953125e-05, + "model_forward_time": 0.02527785301208496, + "step": 20466 + }, + { + "epoch": 3.12286376953125e-05, + "step": 20466, + "training_step_time": 0.2023637294769287 + }, + { + "epoch": 3.123016357421875e-05, + "model_forward_time": 0.024599552154541016, + "step": 20467 + }, + { + "epoch": 3.123016357421875e-05, + "step": 20467, + "training_step_time": 0.11918759346008301 + }, + { + "epoch": 3.1231689453125e-05, + "model_forward_time": 0.024722576141357422, + "step": 20468 + }, + { + "epoch": 3.1231689453125e-05, + "step": 20468, + "training_step_time": 0.11128616333007812 + }, + { + "epoch": 3.123321533203125e-05, + "model_forward_time": 0.025234222412109375, + "step": 20469 + }, + { + "epoch": 3.123321533203125e-05, + "step": 20469, + "training_step_time": 0.10904908180236816 + }, + { + "epoch": 3.12347412109375e-05, + "grad_norm": 0.1729920357465744, + "learning_rate": 2.5143331123654933e-05, + "loss": 0.0158, + "step": 20470 + }, + { + "epoch": 3.12347412109375e-05, + "model_forward_time": 0.027698516845703125, + "step": 20470 + }, + { + "epoch": 3.12347412109375e-05, + "step": 20470, + "training_step_time": 0.14938783645629883 + }, + { + "epoch": 3.123626708984375e-05, + "model_forward_time": 0.02508854866027832, + "step": 20471 + }, + { + "epoch": 3.123626708984375e-05, + "step": 20471, + "training_step_time": 0.13103365898132324 + }, + { + "epoch": 3.123779296875e-05, + "model_forward_time": 0.02626776695251465, + "step": 20472 + }, + { + "epoch": 3.123779296875e-05, + "step": 20472, + "training_step_time": 0.12303757667541504 + }, + { + "epoch": 3.123931884765625e-05, + "model_forward_time": 0.024585723876953125, + "step": 20473 + }, + { + "epoch": 3.123931884765625e-05, + "step": 20473, + "training_step_time": 0.1123814582824707 + }, + { + "epoch": 3.12408447265625e-05, + "model_forward_time": 0.02526712417602539, + "step": 20474 + }, + { + "epoch": 3.12408447265625e-05, + "step": 20474, + "training_step_time": 0.11015558242797852 + }, + { + "epoch": 3.124237060546875e-05, + "model_forward_time": 0.025051116943359375, + "step": 20475 + }, + { + "epoch": 3.124237060546875e-05, + "step": 20475, + "training_step_time": 0.10771322250366211 + }, + { + "epoch": 3.1243896484375e-05, + "model_forward_time": 0.02417159080505371, + "step": 20476 + }, + { + "epoch": 3.1243896484375e-05, + "step": 20476, + "training_step_time": 0.10909819602966309 + }, + { + "epoch": 3.124542236328125e-05, + "model_forward_time": 0.024007558822631836, + "step": 20477 + }, + { + "epoch": 3.124542236328125e-05, + "step": 20477, + "training_step_time": 0.11355733871459961 + }, + { + "epoch": 3.12469482421875e-05, + "model_forward_time": 0.027773380279541016, + "step": 20478 + }, + { + "epoch": 3.12469482421875e-05, + "step": 20478, + "training_step_time": 0.11075711250305176 + }, + { + "epoch": 3.124847412109375e-05, + "model_forward_time": 0.024991512298583984, + "step": 20479 + }, + { + "epoch": 3.124847412109375e-05, + "step": 20479, + "training_step_time": 0.10997939109802246 + }, + { + "epoch": 3.125e-05, + "grad_norm": 0.1775788962841034, + "learning_rate": 2.5095523801798495e-05, + "loss": 0.0079, + "step": 20480 + }, + { + "epoch": 3.125e-05, + "model_forward_time": 0.02524280548095703, + "step": 20480 + }, + { + "epoch": 3.125e-05, + "step": 20480, + "training_step_time": 0.11041545867919922 + }, + { + "epoch": 3.125152587890625e-05, + "model_forward_time": 0.025027751922607422, + "step": 20481 + }, + { + "epoch": 3.125152587890625e-05, + "step": 20481, + "training_step_time": 0.10884451866149902 + }, + { + "epoch": 3.12530517578125e-05, + "model_forward_time": 0.02544403076171875, + "step": 20482 + }, + { + "epoch": 3.12530517578125e-05, + "step": 20482, + "training_step_time": 0.10685873031616211 + }, + { + "epoch": 3.125457763671875e-05, + "model_forward_time": 0.024260759353637695, + "step": 20483 + }, + { + "epoch": 3.125457763671875e-05, + "step": 20483, + "training_step_time": 0.14116597175598145 + }, + { + "epoch": 3.1256103515625e-05, + "model_forward_time": 0.025297880172729492, + "step": 20484 + }, + { + "epoch": 3.1256103515625e-05, + "step": 20484, + "training_step_time": 0.1682753562927246 + }, + { + "epoch": 3.125762939453125e-05, + "model_forward_time": 0.02643132209777832, + "step": 20485 + }, + { + "epoch": 3.125762939453125e-05, + "step": 20485, + "training_step_time": 0.17473149299621582 + }, + { + "epoch": 3.12591552734375e-05, + "model_forward_time": 0.024599313735961914, + "step": 20486 + }, + { + "epoch": 3.12591552734375e-05, + "step": 20486, + "training_step_time": 0.1556859016418457 + }, + { + "epoch": 3.126068115234375e-05, + "model_forward_time": 0.024196147918701172, + "step": 20487 + }, + { + "epoch": 3.126068115234375e-05, + "step": 20487, + "training_step_time": 0.20816659927368164 + }, + { + "epoch": 3.126220703125e-05, + "model_forward_time": 0.023787260055541992, + "step": 20488 + }, + { + "epoch": 3.126220703125e-05, + "step": 20488, + "training_step_time": 0.11965799331665039 + }, + { + "epoch": 3.126373291015625e-05, + "model_forward_time": 0.02520608901977539, + "step": 20489 + }, + { + "epoch": 3.126373291015625e-05, + "step": 20489, + "training_step_time": 0.11709427833557129 + }, + { + "epoch": 3.12652587890625e-05, + "grad_norm": 0.41922852396965027, + "learning_rate": 2.5047746741228978e-05, + "loss": 0.0063, + "step": 20490 + }, + { + "epoch": 3.12652587890625e-05, + "model_forward_time": 0.02466607093811035, + "step": 20490 + }, + { + "epoch": 3.12652587890625e-05, + "step": 20490, + "training_step_time": 0.17425942420959473 + }, + { + "epoch": 3.126678466796875e-05, + "model_forward_time": 0.026747465133666992, + "step": 20491 + }, + { + "epoch": 3.126678466796875e-05, + "step": 20491, + "training_step_time": 0.16351938247680664 + }, + { + "epoch": 3.1268310546875e-05, + "model_forward_time": 0.024102210998535156, + "step": 20492 + }, + { + "epoch": 3.1268310546875e-05, + "step": 20492, + "training_step_time": 0.11326384544372559 + }, + { + "epoch": 3.126983642578125e-05, + "model_forward_time": 0.02425074577331543, + "step": 20493 + }, + { + "epoch": 3.126983642578125e-05, + "step": 20493, + "training_step_time": 0.12543535232543945 + }, + { + "epoch": 3.12713623046875e-05, + "model_forward_time": 0.024608612060546875, + "step": 20494 + }, + { + "epoch": 3.12713623046875e-05, + "step": 20494, + "training_step_time": 0.15553593635559082 + }, + { + "epoch": 3.127288818359375e-05, + "model_forward_time": 0.024268150329589844, + "step": 20495 + }, + { + "epoch": 3.127288818359375e-05, + "step": 20495, + "training_step_time": 0.16475629806518555 + }, + { + "epoch": 3.12744140625e-05, + "model_forward_time": 0.024024248123168945, + "step": 20496 + }, + { + "epoch": 3.12744140625e-05, + "step": 20496, + "training_step_time": 0.13995671272277832 + }, + { + "epoch": 3.127593994140625e-05, + "model_forward_time": 0.024458646774291992, + "step": 20497 + }, + { + "epoch": 3.127593994140625e-05, + "step": 20497, + "training_step_time": 0.13447070121765137 + }, + { + "epoch": 3.12774658203125e-05, + "model_forward_time": 0.024914264678955078, + "step": 20498 + }, + { + "epoch": 3.12774658203125e-05, + "step": 20498, + "training_step_time": 0.12515974044799805 + }, + { + "epoch": 3.127899169921875e-05, + "model_forward_time": 0.025028467178344727, + "step": 20499 + }, + { + "epoch": 3.127899169921875e-05, + "step": 20499, + "training_step_time": 0.12459230422973633 + }, + { + "epoch": 3.1280517578125e-05, + "grad_norm": 0.17294204235076904, + "learning_rate": 2.500000000000001e-05, + "loss": 0.0074, + "step": 20500 + }, + { + "epoch": 3.1280517578125e-05, + "model_forward_time": 0.024628162384033203, + "step": 20500 + }, + { + "epoch": 3.1280517578125e-05, + "step": 20500, + "training_step_time": 0.1190495491027832 + }, + { + "epoch": 3.128204345703125e-05, + "model_forward_time": 0.02515697479248047, + "step": 20501 + }, + { + "epoch": 3.128204345703125e-05, + "step": 20501, + "training_step_time": 0.11376786231994629 + }, + { + "epoch": 3.12835693359375e-05, + "model_forward_time": 0.02513432502746582, + "step": 20502 + }, + { + "epoch": 3.12835693359375e-05, + "step": 20502, + "training_step_time": 0.11221599578857422 + }, + { + "epoch": 3.128509521484375e-05, + "model_forward_time": 0.025011539459228516, + "step": 20503 + }, + { + "epoch": 3.128509521484375e-05, + "step": 20503, + "training_step_time": 0.11024188995361328 + }, + { + "epoch": 3.128662109375e-05, + "model_forward_time": 0.02504253387451172, + "step": 20504 + }, + { + "epoch": 3.128662109375e-05, + "step": 20504, + "training_step_time": 0.11214470863342285 + }, + { + "epoch": 3.128814697265625e-05, + "model_forward_time": 0.025163888931274414, + "step": 20505 + }, + { + "epoch": 3.128814697265625e-05, + "step": 20505, + "training_step_time": 0.11267471313476562 + }, + { + "epoch": 3.12896728515625e-05, + "model_forward_time": 0.025268077850341797, + "step": 20506 + }, + { + "epoch": 3.12896728515625e-05, + "step": 20506, + "training_step_time": 0.11237025260925293 + }, + { + "epoch": 3.129119873046875e-05, + "model_forward_time": 0.024979352951049805, + "step": 20507 + }, + { + "epoch": 3.129119873046875e-05, + "step": 20507, + "training_step_time": 0.10994982719421387 + }, + { + "epoch": 3.1292724609375e-05, + "model_forward_time": 0.025050878524780273, + "step": 20508 + }, + { + "epoch": 3.1292724609375e-05, + "step": 20508, + "training_step_time": 0.10926580429077148 + }, + { + "epoch": 3.129425048828125e-05, + "model_forward_time": 0.02486133575439453, + "step": 20509 + }, + { + "epoch": 3.129425048828125e-05, + "step": 20509, + "training_step_time": 0.10756564140319824 + }, + { + "epoch": 3.12957763671875e-05, + "grad_norm": 0.17006202042102814, + "learning_rate": 2.4952283636128372e-05, + "loss": 0.0079, + "step": 20510 + }, + { + "epoch": 3.12957763671875e-05, + "model_forward_time": 0.025213956832885742, + "step": 20510 + }, + { + "epoch": 3.12957763671875e-05, + "step": 20510, + "training_step_time": 0.16357707977294922 + }, + { + "epoch": 3.129730224609375e-05, + "model_forward_time": 0.02446150779724121, + "step": 20511 + }, + { + "epoch": 3.129730224609375e-05, + "step": 20511, + "training_step_time": 0.12177085876464844 + }, + { + "epoch": 3.1298828125e-05, + "model_forward_time": 0.02486109733581543, + "step": 20512 + }, + { + "epoch": 3.1298828125e-05, + "step": 20512, + "training_step_time": 0.12957143783569336 + }, + { + "epoch": 3.130035400390625e-05, + "model_forward_time": 0.02473735809326172, + "step": 20513 + }, + { + "epoch": 3.130035400390625e-05, + "step": 20513, + "training_step_time": 0.15950775146484375 + }, + { + "epoch": 3.13018798828125e-05, + "model_forward_time": 0.02431797981262207, + "step": 20514 + }, + { + "epoch": 3.13018798828125e-05, + "step": 20514, + "training_step_time": 0.2212824821472168 + }, + { + "epoch": 3.130340576171875e-05, + "model_forward_time": 0.02412700653076172, + "step": 20515 + }, + { + "epoch": 3.130340576171875e-05, + "step": 20515, + "training_step_time": 0.12479186058044434 + }, + { + "epoch": 3.1304931640625e-05, + "model_forward_time": 0.02414107322692871, + "step": 20516 + }, + { + "epoch": 3.1304931640625e-05, + "step": 20516, + "training_step_time": 0.10607624053955078 + }, + { + "epoch": 3.130645751953125e-05, + "model_forward_time": 0.025371313095092773, + "step": 20517 + }, + { + "epoch": 3.130645751953125e-05, + "step": 20517, + "training_step_time": 0.11039113998413086 + }, + { + "epoch": 3.13079833984375e-05, + "model_forward_time": 0.02474355697631836, + "step": 20518 + }, + { + "epoch": 3.13079833984375e-05, + "step": 20518, + "training_step_time": 0.11175107955932617 + }, + { + "epoch": 3.130950927734375e-05, + "model_forward_time": 0.024724721908569336, + "step": 20519 + }, + { + "epoch": 3.130950927734375e-05, + "step": 20519, + "training_step_time": 0.11142158508300781 + }, + { + "epoch": 3.131103515625e-05, + "grad_norm": 0.2066083699464798, + "learning_rate": 2.490459770759398e-05, + "loss": 0.0057, + "step": 20520 + }, + { + "epoch": 3.131103515625e-05, + "model_forward_time": 0.025858402252197266, + "step": 20520 + }, + { + "epoch": 3.131103515625e-05, + "step": 20520, + "training_step_time": 0.11350607872009277 + }, + { + "epoch": 3.131256103515625e-05, + "model_forward_time": 0.02526545524597168, + "step": 20521 + }, + { + "epoch": 3.131256103515625e-05, + "step": 20521, + "training_step_time": 0.10991644859313965 + }, + { + "epoch": 3.13140869140625e-05, + "model_forward_time": 0.025092601776123047, + "step": 20522 + }, + { + "epoch": 3.13140869140625e-05, + "step": 20522, + "training_step_time": 0.10931754112243652 + }, + { + "epoch": 3.131561279296875e-05, + "model_forward_time": 0.024916410446166992, + "step": 20523 + }, + { + "epoch": 3.131561279296875e-05, + "step": 20523, + "training_step_time": 0.10927438735961914 + }, + { + "epoch": 3.1317138671875e-05, + "model_forward_time": 0.02543497085571289, + "step": 20524 + }, + { + "epoch": 3.1317138671875e-05, + "step": 20524, + "training_step_time": 0.10972452163696289 + }, + { + "epoch": 3.131866455078125e-05, + "model_forward_time": 0.025475502014160156, + "step": 20525 + }, + { + "epoch": 3.131866455078125e-05, + "step": 20525, + "training_step_time": 0.1106874942779541 + }, + { + "epoch": 3.13201904296875e-05, + "model_forward_time": 0.025233030319213867, + "step": 20526 + }, + { + "epoch": 3.13201904296875e-05, + "step": 20526, + "training_step_time": 0.13881492614746094 + }, + { + "epoch": 3.132171630859375e-05, + "model_forward_time": 0.025217056274414062, + "step": 20527 + }, + { + "epoch": 3.132171630859375e-05, + "step": 20527, + "training_step_time": 0.17469143867492676 + }, + { + "epoch": 3.13232421875e-05, + "model_forward_time": 0.024817705154418945, + "step": 20528 + }, + { + "epoch": 3.13232421875e-05, + "step": 20528, + "training_step_time": 0.12029361724853516 + }, + { + "epoch": 3.132476806640625e-05, + "model_forward_time": 0.02449321746826172, + "step": 20529 + }, + { + "epoch": 3.132476806640625e-05, + "step": 20529, + "training_step_time": 0.11452078819274902 + }, + { + "epoch": 3.13262939453125e-05, + "grad_norm": 0.2253899872303009, + "learning_rate": 2.485694227233971e-05, + "loss": 0.0138, + "step": 20530 + }, + { + "epoch": 3.13262939453125e-05, + "model_forward_time": 0.02542901039123535, + "step": 20530 + }, + { + "epoch": 3.13262939453125e-05, + "step": 20530, + "training_step_time": 0.1992936134338379 + }, + { + "epoch": 3.132781982421875e-05, + "model_forward_time": 0.02497410774230957, + "step": 20531 + }, + { + "epoch": 3.132781982421875e-05, + "step": 20531, + "training_step_time": 0.15787363052368164 + }, + { + "epoch": 3.1329345703125e-05, + "model_forward_time": 0.025403738021850586, + "step": 20532 + }, + { + "epoch": 3.1329345703125e-05, + "step": 20532, + "training_step_time": 0.10877013206481934 + }, + { + "epoch": 3.133087158203125e-05, + "model_forward_time": 0.024821043014526367, + "step": 20533 + }, + { + "epoch": 3.133087158203125e-05, + "step": 20533, + "training_step_time": 0.11808180809020996 + }, + { + "epoch": 3.13323974609375e-05, + "model_forward_time": 0.025028705596923828, + "step": 20534 + }, + { + "epoch": 3.13323974609375e-05, + "step": 20534, + "training_step_time": 0.19439148902893066 + }, + { + "epoch": 3.133392333984375e-05, + "model_forward_time": 0.024146080017089844, + "step": 20535 + }, + { + "epoch": 3.133392333984375e-05, + "step": 20535, + "training_step_time": 0.13718771934509277 + }, + { + "epoch": 3.133544921875e-05, + "model_forward_time": 0.024811744689941406, + "step": 20536 + }, + { + "epoch": 3.133544921875e-05, + "step": 20536, + "training_step_time": 0.1084432601928711 + }, + { + "epoch": 3.133697509765625e-05, + "model_forward_time": 0.025049448013305664, + "step": 20537 + }, + { + "epoch": 3.133697509765625e-05, + "step": 20537, + "training_step_time": 0.1129453182220459 + }, + { + "epoch": 3.13385009765625e-05, + "model_forward_time": 0.025290250778198242, + "step": 20538 + }, + { + "epoch": 3.13385009765625e-05, + "step": 20538, + "training_step_time": 0.11046242713928223 + }, + { + "epoch": 3.134002685546875e-05, + "model_forward_time": 0.025930404663085938, + "step": 20539 + }, + { + "epoch": 3.134002685546875e-05, + "step": 20539, + "training_step_time": 0.10640788078308105 + }, + { + "epoch": 3.1341552734375e-05, + "grad_norm": 0.26835936307907104, + "learning_rate": 2.4809317388271426e-05, + "loss": 0.005, + "step": 20540 + }, + { + "epoch": 3.1341552734375e-05, + "model_forward_time": 0.02527618408203125, + "step": 20540 + }, + { + "epoch": 3.1341552734375e-05, + "step": 20540, + "training_step_time": 0.19098901748657227 + }, + { + "epoch": 3.134307861328125e-05, + "model_forward_time": 0.024337291717529297, + "step": 20541 + }, + { + "epoch": 3.134307861328125e-05, + "step": 20541, + "training_step_time": 0.10726499557495117 + }, + { + "epoch": 3.13446044921875e-05, + "model_forward_time": 0.024818897247314453, + "step": 20542 + }, + { + "epoch": 3.13446044921875e-05, + "step": 20542, + "training_step_time": 0.1032404899597168 + }, + { + "epoch": 3.134613037109375e-05, + "model_forward_time": 0.02489018440246582, + "step": 20543 + }, + { + "epoch": 3.134613037109375e-05, + "step": 20543, + "training_step_time": 0.10628080368041992 + }, + { + "epoch": 3.134765625e-05, + "model_forward_time": 0.02514505386352539, + "step": 20544 + }, + { + "epoch": 3.134765625e-05, + "step": 20544, + "training_step_time": 0.1063683032989502 + }, + { + "epoch": 3.134918212890625e-05, + "model_forward_time": 0.025470733642578125, + "step": 20545 + }, + { + "epoch": 3.134918212890625e-05, + "step": 20545, + "training_step_time": 0.10642457008361816 + }, + { + "epoch": 3.13507080078125e-05, + "model_forward_time": 0.025505542755126953, + "step": 20546 + }, + { + "epoch": 3.13507080078125e-05, + "step": 20546, + "training_step_time": 0.1062922477722168 + }, + { + "epoch": 3.135223388671875e-05, + "model_forward_time": 0.024820327758789062, + "step": 20547 + }, + { + "epoch": 3.135223388671875e-05, + "step": 20547, + "training_step_time": 0.10829019546508789 + }, + { + "epoch": 3.1353759765625e-05, + "model_forward_time": 0.02498030662536621, + "step": 20548 + }, + { + "epoch": 3.1353759765625e-05, + "step": 20548, + "training_step_time": 0.10569643974304199 + }, + { + "epoch": 3.135528564453125e-05, + "model_forward_time": 0.024504423141479492, + "step": 20549 + }, + { + "epoch": 3.135528564453125e-05, + "step": 20549, + "training_step_time": 0.10878753662109375 + }, + { + "epoch": 3.13568115234375e-05, + "grad_norm": 0.21904797852039337, + "learning_rate": 2.476172311325783e-05, + "loss": 0.0084, + "step": 20550 + }, + { + "epoch": 3.13568115234375e-05, + "model_forward_time": 0.025470733642578125, + "step": 20550 + }, + { + "epoch": 3.13568115234375e-05, + "step": 20550, + "training_step_time": 0.10930371284484863 + }, + { + "epoch": 3.135833740234375e-05, + "model_forward_time": 0.024024248123168945, + "step": 20551 + }, + { + "epoch": 3.135833740234375e-05, + "step": 20551, + "training_step_time": 0.10760855674743652 + }, + { + "epoch": 3.135986328125e-05, + "model_forward_time": 0.024329423904418945, + "step": 20552 + }, + { + "epoch": 3.135986328125e-05, + "step": 20552, + "training_step_time": 0.11164093017578125 + }, + { + "epoch": 3.136138916015625e-05, + "model_forward_time": 0.025037527084350586, + "step": 20553 + }, + { + "epoch": 3.136138916015625e-05, + "step": 20553, + "training_step_time": 0.1100456714630127 + }, + { + "epoch": 3.13629150390625e-05, + "model_forward_time": 0.02472090721130371, + "step": 20554 + }, + { + "epoch": 3.13629150390625e-05, + "step": 20554, + "training_step_time": 0.10709953308105469 + }, + { + "epoch": 3.136444091796875e-05, + "model_forward_time": 0.024889707565307617, + "step": 20555 + }, + { + "epoch": 3.136444091796875e-05, + "step": 20555, + "training_step_time": 0.19934821128845215 + }, + { + "epoch": 3.1365966796875e-05, + "model_forward_time": 0.023773670196533203, + "step": 20556 + }, + { + "epoch": 3.1365966796875e-05, + "step": 20556, + "training_step_time": 0.12378811836242676 + }, + { + "epoch": 3.136749267578125e-05, + "model_forward_time": 0.024443626403808594, + "step": 20557 + }, + { + "epoch": 3.136749267578125e-05, + "step": 20557, + "training_step_time": 0.11337018013000488 + }, + { + "epoch": 3.13690185546875e-05, + "model_forward_time": 0.027028560638427734, + "step": 20558 + }, + { + "epoch": 3.13690185546875e-05, + "step": 20558, + "training_step_time": 0.1594407558441162 + }, + { + "epoch": 3.137054443359375e-05, + "model_forward_time": 0.02451944351196289, + "step": 20559 + }, + { + "epoch": 3.137054443359375e-05, + "step": 20559, + "training_step_time": 0.17675256729125977 + }, + { + "epoch": 3.13720703125e-05, + "grad_norm": 0.3118372857570648, + "learning_rate": 2.4714159505130452e-05, + "loss": 0.0068, + "step": 20560 + }, + { + "epoch": 3.13720703125e-05, + "model_forward_time": 0.024271726608276367, + "step": 20560 + }, + { + "epoch": 3.13720703125e-05, + "step": 20560, + "training_step_time": 0.16133379936218262 + }, + { + "epoch": 3.137359619140625e-05, + "model_forward_time": 0.02428436279296875, + "step": 20561 + }, + { + "epoch": 3.137359619140625e-05, + "step": 20561, + "training_step_time": 0.11382842063903809 + }, + { + "epoch": 3.13751220703125e-05, + "model_forward_time": 0.024514436721801758, + "step": 20562 + }, + { + "epoch": 3.13751220703125e-05, + "step": 20562, + "training_step_time": 0.10971617698669434 + }, + { + "epoch": 3.137664794921875e-05, + "model_forward_time": 0.024964332580566406, + "step": 20563 + }, + { + "epoch": 3.137664794921875e-05, + "step": 20563, + "training_step_time": 0.10756063461303711 + }, + { + "epoch": 3.1378173828125e-05, + "model_forward_time": 0.025181055068969727, + "step": 20564 + }, + { + "epoch": 3.1378173828125e-05, + "step": 20564, + "training_step_time": 0.10816645622253418 + }, + { + "epoch": 3.137969970703125e-05, + "model_forward_time": 0.026098251342773438, + "step": 20565 + }, + { + "epoch": 3.137969970703125e-05, + "step": 20565, + "training_step_time": 0.10790491104125977 + }, + { + "epoch": 3.13812255859375e-05, + "model_forward_time": 0.024805068969726562, + "step": 20566 + }, + { + "epoch": 3.13812255859375e-05, + "step": 20566, + "training_step_time": 0.10875105857849121 + }, + { + "epoch": 3.138275146484375e-05, + "model_forward_time": 0.02469658851623535, + "step": 20567 + }, + { + "epoch": 3.138275146484375e-05, + "step": 20567, + "training_step_time": 0.11125516891479492 + }, + { + "epoch": 3.138427734375e-05, + "model_forward_time": 0.024399280548095703, + "step": 20568 + }, + { + "epoch": 3.138427734375e-05, + "step": 20568, + "training_step_time": 0.1059722900390625 + }, + { + "epoch": 3.138580322265625e-05, + "model_forward_time": 0.024599790573120117, + "step": 20569 + }, + { + "epoch": 3.138580322265625e-05, + "step": 20569, + "training_step_time": 0.11080336570739746 + }, + { + "epoch": 3.13873291015625e-05, + "grad_norm": 0.11126571148633957, + "learning_rate": 2.4666626621683592e-05, + "loss": 0.0051, + "step": 20570 + }, + { + "epoch": 3.13873291015625e-05, + "model_forward_time": 0.024835586547851562, + "step": 20570 + }, + { + "epoch": 3.13873291015625e-05, + "step": 20570, + "training_step_time": 0.10753250122070312 + }, + { + "epoch": 3.138885498046875e-05, + "model_forward_time": 0.024793624877929688, + "step": 20571 + }, + { + "epoch": 3.138885498046875e-05, + "step": 20571, + "training_step_time": 0.15090513229370117 + }, + { + "epoch": 3.1390380859375e-05, + "model_forward_time": 0.023921728134155273, + "step": 20572 + }, + { + "epoch": 3.1390380859375e-05, + "step": 20572, + "training_step_time": 0.1585249900817871 + }, + { + "epoch": 3.139190673828125e-05, + "model_forward_time": 0.02491164207458496, + "step": 20573 + }, + { + "epoch": 3.139190673828125e-05, + "step": 20573, + "training_step_time": 0.11076211929321289 + }, + { + "epoch": 3.13934326171875e-05, + "model_forward_time": 0.026210784912109375, + "step": 20574 + }, + { + "epoch": 3.13934326171875e-05, + "step": 20574, + "training_step_time": 0.12554454803466797 + }, + { + "epoch": 3.139495849609375e-05, + "model_forward_time": 0.025132417678833008, + "step": 20575 + }, + { + "epoch": 3.139495849609375e-05, + "step": 20575, + "training_step_time": 0.20638251304626465 + }, + { + "epoch": 3.1396484375e-05, + "model_forward_time": 0.024350881576538086, + "step": 20576 + }, + { + "epoch": 3.1396484375e-05, + "step": 20576, + "training_step_time": 0.1293320655822754 + }, + { + "epoch": 3.139801025390625e-05, + "model_forward_time": 0.02468276023864746, + "step": 20577 + }, + { + "epoch": 3.139801025390625e-05, + "step": 20577, + "training_step_time": 0.11166143417358398 + }, + { + "epoch": 3.13995361328125e-05, + "model_forward_time": 0.0249941349029541, + "step": 20578 + }, + { + "epoch": 3.13995361328125e-05, + "step": 20578, + "training_step_time": 0.12047719955444336 + }, + { + "epoch": 3.140106201171875e-05, + "model_forward_time": 0.027432680130004883, + "step": 20579 + }, + { + "epoch": 3.140106201171875e-05, + "step": 20579, + "training_step_time": 0.1604154109954834 + }, + { + "epoch": 3.1402587890625e-05, + "grad_norm": 0.17350897192955017, + "learning_rate": 2.4619124520674146e-05, + "loss": 0.0047, + "step": 20580 + }, + { + "epoch": 3.1402587890625e-05, + "model_forward_time": 0.02452540397644043, + "step": 20580 + }, + { + "epoch": 3.1402587890625e-05, + "step": 20580, + "training_step_time": 0.17169570922851562 + }, + { + "epoch": 3.140411376953125e-05, + "model_forward_time": 0.02397894859313965, + "step": 20581 + }, + { + "epoch": 3.140411376953125e-05, + "step": 20581, + "training_step_time": 0.10414290428161621 + }, + { + "epoch": 3.14056396484375e-05, + "model_forward_time": 0.024307966232299805, + "step": 20582 + }, + { + "epoch": 3.14056396484375e-05, + "step": 20582, + "training_step_time": 0.11286067962646484 + }, + { + "epoch": 3.140716552734375e-05, + "model_forward_time": 0.025502443313598633, + "step": 20583 + }, + { + "epoch": 3.140716552734375e-05, + "step": 20583, + "training_step_time": 0.1166226863861084 + }, + { + "epoch": 3.140869140625e-05, + "model_forward_time": 0.025084733963012695, + "step": 20584 + }, + { + "epoch": 3.140869140625e-05, + "step": 20584, + "training_step_time": 0.10914802551269531 + }, + { + "epoch": 3.141021728515625e-05, + "model_forward_time": 0.024925947189331055, + "step": 20585 + }, + { + "epoch": 3.141021728515625e-05, + "step": 20585, + "training_step_time": 0.19291996955871582 + }, + { + "epoch": 3.14117431640625e-05, + "model_forward_time": 0.025218725204467773, + "step": 20586 + }, + { + "epoch": 3.14117431640625e-05, + "step": 20586, + "training_step_time": 0.11324524879455566 + }, + { + "epoch": 3.141326904296875e-05, + "model_forward_time": 0.024022817611694336, + "step": 20587 + }, + { + "epoch": 3.141326904296875e-05, + "step": 20587, + "training_step_time": 0.10542106628417969 + }, + { + "epoch": 3.1414794921875e-05, + "model_forward_time": 0.024758100509643555, + "step": 20588 + }, + { + "epoch": 3.1414794921875e-05, + "step": 20588, + "training_step_time": 0.13703036308288574 + }, + { + "epoch": 3.141632080078125e-05, + "model_forward_time": 0.02486705780029297, + "step": 20589 + }, + { + "epoch": 3.141632080078125e-05, + "step": 20589, + "training_step_time": 0.18683743476867676 + }, + { + "epoch": 3.14178466796875e-05, + "grad_norm": 0.21014504134655, + "learning_rate": 2.4571653259821694e-05, + "loss": 0.008, + "step": 20590 + }, + { + "epoch": 3.14178466796875e-05, + "model_forward_time": 0.023346424102783203, + "step": 20590 + }, + { + "epoch": 3.14178466796875e-05, + "step": 20590, + "training_step_time": 0.18623614311218262 + }, + { + "epoch": 3.141937255859375e-05, + "model_forward_time": 0.024047374725341797, + "step": 20591 + }, + { + "epoch": 3.141937255859375e-05, + "step": 20591, + "training_step_time": 0.177565336227417 + }, + { + "epoch": 3.14208984375e-05, + "model_forward_time": 0.024545907974243164, + "step": 20592 + }, + { + "epoch": 3.14208984375e-05, + "step": 20592, + "training_step_time": 0.15633153915405273 + }, + { + "epoch": 3.142242431640625e-05, + "model_forward_time": 0.023782968521118164, + "step": 20593 + }, + { + "epoch": 3.142242431640625e-05, + "step": 20593, + "training_step_time": 0.14544677734375 + }, + { + "epoch": 3.14239501953125e-05, + "model_forward_time": 0.024092435836791992, + "step": 20594 + }, + { + "epoch": 3.14239501953125e-05, + "step": 20594, + "training_step_time": 0.12963390350341797 + }, + { + "epoch": 3.142547607421875e-05, + "model_forward_time": 0.024512290954589844, + "step": 20595 + }, + { + "epoch": 3.142547607421875e-05, + "step": 20595, + "training_step_time": 0.1268603801727295 + }, + { + "epoch": 3.1427001953125e-05, + "model_forward_time": 0.025098323822021484, + "step": 20596 + }, + { + "epoch": 3.1427001953125e-05, + "step": 20596, + "training_step_time": 0.11785292625427246 + }, + { + "epoch": 3.142852783203125e-05, + "model_forward_time": 0.02449774742126465, + "step": 20597 + }, + { + "epoch": 3.142852783203125e-05, + "step": 20597, + "training_step_time": 0.11635851860046387 + }, + { + "epoch": 3.14300537109375e-05, + "model_forward_time": 0.025457382202148438, + "step": 20598 + }, + { + "epoch": 3.14300537109375e-05, + "step": 20598, + "training_step_time": 0.1170046329498291 + }, + { + "epoch": 3.143157958984375e-05, + "model_forward_time": 0.025466442108154297, + "step": 20599 + }, + { + "epoch": 3.143157958984375e-05, + "step": 20599, + "training_step_time": 0.2063913345336914 + }, + { + "epoch": 3.143310546875e-05, + "grad_norm": 0.23332205414772034, + "learning_rate": 2.4524212896808263e-05, + "loss": 0.0097, + "step": 20600 + }, + { + "epoch": 3.143310546875e-05, + "model_forward_time": 0.024246692657470703, + "step": 20600 + }, + { + "epoch": 3.143310546875e-05, + "step": 20600, + "training_step_time": 0.10812997817993164 + }, + { + "epoch": 3.143463134765625e-05, + "model_forward_time": 0.025010347366333008, + "step": 20601 + }, + { + "epoch": 3.143463134765625e-05, + "step": 20601, + "training_step_time": 0.21222186088562012 + }, + { + "epoch": 3.14361572265625e-05, + "model_forward_time": 0.024252891540527344, + "step": 20602 + }, + { + "epoch": 3.14361572265625e-05, + "step": 20602, + "training_step_time": 0.11905908584594727 + }, + { + "epoch": 3.143768310546875e-05, + "model_forward_time": 0.02419567108154297, + "step": 20603 + }, + { + "epoch": 3.143768310546875e-05, + "step": 20603, + "training_step_time": 0.10321712493896484 + }, + { + "epoch": 3.1439208984375e-05, + "model_forward_time": 0.025074005126953125, + "step": 20604 + }, + { + "epoch": 3.1439208984375e-05, + "step": 20604, + "training_step_time": 0.10217881202697754 + }, + { + "epoch": 3.144073486328125e-05, + "model_forward_time": 0.02528691291809082, + "step": 20605 + }, + { + "epoch": 3.144073486328125e-05, + "step": 20605, + "training_step_time": 0.11067414283752441 + }, + { + "epoch": 3.14422607421875e-05, + "model_forward_time": 0.025298595428466797, + "step": 20606 + }, + { + "epoch": 3.14422607421875e-05, + "step": 20606, + "training_step_time": 0.10805320739746094 + }, + { + "epoch": 3.144378662109375e-05, + "model_forward_time": 0.025137901306152344, + "step": 20607 + }, + { + "epoch": 3.144378662109375e-05, + "step": 20607, + "training_step_time": 0.10650444030761719 + }, + { + "epoch": 3.14453125e-05, + "model_forward_time": 0.0255892276763916, + "step": 20608 + }, + { + "epoch": 3.14453125e-05, + "step": 20608, + "training_step_time": 0.10478973388671875 + }, + { + "epoch": 3.144683837890625e-05, + "model_forward_time": 0.02488112449645996, + "step": 20609 + }, + { + "epoch": 3.144683837890625e-05, + "step": 20609, + "training_step_time": 0.10443401336669922 + }, + { + "epoch": 3.14483642578125e-05, + "grad_norm": 0.29580172896385193, + "learning_rate": 2.447680348927837e-05, + "loss": 0.0138, + "step": 20610 + }, + { + "epoch": 3.14483642578125e-05, + "model_forward_time": 0.025202035903930664, + "step": 20610 + }, + { + "epoch": 3.14483642578125e-05, + "step": 20610, + "training_step_time": 0.10323429107666016 + }, + { + "epoch": 3.144989013671875e-05, + "model_forward_time": 0.024883270263671875, + "step": 20611 + }, + { + "epoch": 3.144989013671875e-05, + "step": 20611, + "training_step_time": 0.10295319557189941 + }, + { + "epoch": 3.1451416015625e-05, + "model_forward_time": 0.0256807804107666, + "step": 20612 + }, + { + "epoch": 3.1451416015625e-05, + "step": 20612, + "training_step_time": 0.10535454750061035 + }, + { + "epoch": 3.145294189453125e-05, + "model_forward_time": 0.02463674545288086, + "step": 20613 + }, + { + "epoch": 3.145294189453125e-05, + "step": 20613, + "training_step_time": 0.15311002731323242 + }, + { + "epoch": 3.14544677734375e-05, + "model_forward_time": 0.024644136428833008, + "step": 20614 + }, + { + "epoch": 3.14544677734375e-05, + "step": 20614, + "training_step_time": 0.16457605361938477 + }, + { + "epoch": 3.145599365234375e-05, + "model_forward_time": 0.024947404861450195, + "step": 20615 + }, + { + "epoch": 3.145599365234375e-05, + "step": 20615, + "training_step_time": 0.13673186302185059 + }, + { + "epoch": 3.145751953125e-05, + "model_forward_time": 0.024286508560180664, + "step": 20616 + }, + { + "epoch": 3.145751953125e-05, + "step": 20616, + "training_step_time": 0.10977506637573242 + }, + { + "epoch": 3.145904541015625e-05, + "model_forward_time": 0.025252819061279297, + "step": 20617 + }, + { + "epoch": 3.145904541015625e-05, + "step": 20617, + "training_step_time": 0.1898491382598877 + }, + { + "epoch": 3.14605712890625e-05, + "model_forward_time": 0.02456378936767578, + "step": 20618 + }, + { + "epoch": 3.14605712890625e-05, + "step": 20618, + "training_step_time": 0.17006754875183105 + }, + { + "epoch": 3.146209716796875e-05, + "model_forward_time": 0.024471044540405273, + "step": 20619 + }, + { + "epoch": 3.146209716796875e-05, + "step": 20619, + "training_step_time": 0.11527538299560547 + }, + { + "epoch": 3.1463623046875e-05, + "grad_norm": 0.1555105447769165, + "learning_rate": 2.4429425094838903e-05, + "loss": 0.0066, + "step": 20620 + }, + { + "epoch": 3.1463623046875e-05, + "model_forward_time": 0.0266721248626709, + "step": 20620 + }, + { + "epoch": 3.1463623046875e-05, + "step": 20620, + "training_step_time": 0.10814833641052246 + }, + { + "epoch": 3.146514892578125e-05, + "model_forward_time": 0.02454686164855957, + "step": 20621 + }, + { + "epoch": 3.146514892578125e-05, + "step": 20621, + "training_step_time": 0.17139506340026855 + }, + { + "epoch": 3.14666748046875e-05, + "model_forward_time": 0.02490687370300293, + "step": 20622 + }, + { + "epoch": 3.14666748046875e-05, + "step": 20622, + "training_step_time": 0.13705754280090332 + }, + { + "epoch": 3.146820068359375e-05, + "model_forward_time": 0.024480104446411133, + "step": 20623 + }, + { + "epoch": 3.146820068359375e-05, + "step": 20623, + "training_step_time": 0.1067051887512207 + }, + { + "epoch": 3.14697265625e-05, + "model_forward_time": 0.02626800537109375, + "step": 20624 + }, + { + "epoch": 3.14697265625e-05, + "step": 20624, + "training_step_time": 0.11019659042358398 + }, + { + "epoch": 3.147125244140625e-05, + "model_forward_time": 0.025109291076660156, + "step": 20625 + }, + { + "epoch": 3.147125244140625e-05, + "step": 20625, + "training_step_time": 0.12220072746276855 + }, + { + "epoch": 3.14727783203125e-05, + "model_forward_time": 0.0249173641204834, + "step": 20626 + }, + { + "epoch": 3.14727783203125e-05, + "step": 20626, + "training_step_time": 0.10727715492248535 + }, + { + "epoch": 3.147430419921875e-05, + "model_forward_time": 0.0248563289642334, + "step": 20627 + }, + { + "epoch": 3.147430419921875e-05, + "step": 20627, + "training_step_time": 0.1877002716064453 + }, + { + "epoch": 3.1475830078125e-05, + "model_forward_time": 0.024515867233276367, + "step": 20628 + }, + { + "epoch": 3.1475830078125e-05, + "step": 20628, + "training_step_time": 0.10427975654602051 + }, + { + "epoch": 3.147735595703125e-05, + "model_forward_time": 0.024553298950195312, + "step": 20629 + }, + { + "epoch": 3.147735595703125e-05, + "step": 20629, + "training_step_time": 0.10350561141967773 + }, + { + "epoch": 3.14788818359375e-05, + "grad_norm": 0.27353760600090027, + "learning_rate": 2.438207777105911e-05, + "loss": 0.0079, + "step": 20630 + }, + { + "epoch": 3.14788818359375e-05, + "model_forward_time": 0.024940013885498047, + "step": 20630 + }, + { + "epoch": 3.14788818359375e-05, + "step": 20630, + "training_step_time": 0.10677170753479004 + }, + { + "epoch": 3.148040771484375e-05, + "model_forward_time": 0.025317907333374023, + "step": 20631 + }, + { + "epoch": 3.148040771484375e-05, + "step": 20631, + "training_step_time": 0.10391521453857422 + }, + { + "epoch": 3.148193359375e-05, + "model_forward_time": 0.02529740333557129, + "step": 20632 + }, + { + "epoch": 3.148193359375e-05, + "step": 20632, + "training_step_time": 0.10454297065734863 + }, + { + "epoch": 3.148345947265625e-05, + "model_forward_time": 0.025235891342163086, + "step": 20633 + }, + { + "epoch": 3.148345947265625e-05, + "step": 20633, + "training_step_time": 0.11024594306945801 + }, + { + "epoch": 3.14849853515625e-05, + "model_forward_time": 0.025067806243896484, + "step": 20634 + }, + { + "epoch": 3.14849853515625e-05, + "step": 20634, + "training_step_time": 0.10528302192687988 + }, + { + "epoch": 3.148651123046875e-05, + "model_forward_time": 0.025241851806640625, + "step": 20635 + }, + { + "epoch": 3.148651123046875e-05, + "step": 20635, + "training_step_time": 0.10708189010620117 + }, + { + "epoch": 3.1488037109375e-05, + "model_forward_time": 0.025351762771606445, + "step": 20636 + }, + { + "epoch": 3.1488037109375e-05, + "step": 20636, + "training_step_time": 0.10694408416748047 + }, + { + "epoch": 3.148956298828125e-05, + "model_forward_time": 0.025554895401000977, + "step": 20637 + }, + { + "epoch": 3.148956298828125e-05, + "step": 20637, + "training_step_time": 0.10595417022705078 + }, + { + "epoch": 3.14910888671875e-05, + "model_forward_time": 0.02491593360900879, + "step": 20638 + }, + { + "epoch": 3.14910888671875e-05, + "step": 20638, + "training_step_time": 0.10448026657104492 + }, + { + "epoch": 3.149261474609375e-05, + "model_forward_time": 0.025191783905029297, + "step": 20639 + }, + { + "epoch": 3.149261474609375e-05, + "step": 20639, + "training_step_time": 0.10908889770507812 + }, + { + "epoch": 3.1494140625e-05, + "grad_norm": 0.3734072744846344, + "learning_rate": 2.433476157547044e-05, + "loss": 0.0076, + "step": 20640 + }, + { + "epoch": 3.1494140625e-05, + "model_forward_time": 0.025458812713623047, + "step": 20640 + }, + { + "epoch": 3.1494140625e-05, + "step": 20640, + "training_step_time": 0.18061208724975586 + }, + { + "epoch": 3.149566650390625e-05, + "model_forward_time": 0.0249783992767334, + "step": 20641 + }, + { + "epoch": 3.149566650390625e-05, + "step": 20641, + "training_step_time": 0.20447015762329102 + }, + { + "epoch": 3.14971923828125e-05, + "model_forward_time": 0.024279117584228516, + "step": 20642 + }, + { + "epoch": 3.14971923828125e-05, + "step": 20642, + "training_step_time": 0.2408463954925537 + }, + { + "epoch": 3.149871826171875e-05, + "model_forward_time": 0.024524211883544922, + "step": 20643 + }, + { + "epoch": 3.149871826171875e-05, + "step": 20643, + "training_step_time": 0.2088308334350586 + }, + { + "epoch": 3.1500244140625e-05, + "model_forward_time": 0.024237394332885742, + "step": 20644 + }, + { + "epoch": 3.1500244140625e-05, + "step": 20644, + "training_step_time": 0.1875467300415039 + }, + { + "epoch": 3.150177001953125e-05, + "model_forward_time": 0.02464771270751953, + "step": 20645 + }, + { + "epoch": 3.150177001953125e-05, + "step": 20645, + "training_step_time": 0.1774120330810547 + }, + { + "epoch": 3.15032958984375e-05, + "model_forward_time": 0.0245361328125, + "step": 20646 + }, + { + "epoch": 3.15032958984375e-05, + "step": 20646, + "training_step_time": 0.16726207733154297 + }, + { + "epoch": 3.150482177734375e-05, + "model_forward_time": 0.024179935455322266, + "step": 20647 + }, + { + "epoch": 3.150482177734375e-05, + "step": 20647, + "training_step_time": 0.10902929306030273 + }, + { + "epoch": 3.150634765625e-05, + "model_forward_time": 0.024752378463745117, + "step": 20648 + }, + { + "epoch": 3.150634765625e-05, + "step": 20648, + "training_step_time": 0.10428237915039062 + }, + { + "epoch": 3.150787353515625e-05, + "model_forward_time": 0.02612757682800293, + "step": 20649 + }, + { + "epoch": 3.150787353515625e-05, + "step": 20649, + "training_step_time": 0.585674524307251 + }, + { + "epoch": 3.15093994140625e-05, + "grad_norm": 0.25616809725761414, + "learning_rate": 2.4287476565566527e-05, + "loss": 0.0043, + "step": 20650 + }, + { + "epoch": 3.15093994140625e-05, + "model_forward_time": 0.02333807945251465, + "step": 20650 + }, + { + "epoch": 3.15093994140625e-05, + "step": 20650, + "training_step_time": 0.10599160194396973 + }, + { + "epoch": 3.151092529296875e-05, + "model_forward_time": 0.024001598358154297, + "step": 20651 + }, + { + "epoch": 3.151092529296875e-05, + "step": 20651, + "training_step_time": 0.19608545303344727 + }, + { + "epoch": 3.1512451171875e-05, + "model_forward_time": 0.024433135986328125, + "step": 20652 + }, + { + "epoch": 3.1512451171875e-05, + "step": 20652, + "training_step_time": 0.15146446228027344 + }, + { + "epoch": 3.151397705078125e-05, + "model_forward_time": 0.024441003799438477, + "step": 20653 + }, + { + "epoch": 3.151397705078125e-05, + "step": 20653, + "training_step_time": 0.16575884819030762 + }, + { + "epoch": 3.15155029296875e-05, + "model_forward_time": 0.02436518669128418, + "step": 20654 + }, + { + "epoch": 3.15155029296875e-05, + "step": 20654, + "training_step_time": 0.17833542823791504 + }, + { + "epoch": 3.151702880859375e-05, + "model_forward_time": 0.024562358856201172, + "step": 20655 + }, + { + "epoch": 3.151702880859375e-05, + "step": 20655, + "training_step_time": 0.16649699211120605 + }, + { + "epoch": 3.15185546875e-05, + "model_forward_time": 0.02464604377746582, + "step": 20656 + }, + { + "epoch": 3.15185546875e-05, + "step": 20656, + "training_step_time": 0.13065242767333984 + }, + { + "epoch": 3.152008056640625e-05, + "model_forward_time": 0.026152372360229492, + "step": 20657 + }, + { + "epoch": 3.152008056640625e-05, + "step": 20657, + "training_step_time": 0.11854267120361328 + }, + { + "epoch": 3.15216064453125e-05, + "model_forward_time": 0.024641990661621094, + "step": 20658 + }, + { + "epoch": 3.15216064453125e-05, + "step": 20658, + "training_step_time": 0.1112678050994873 + }, + { + "epoch": 3.152313232421875e-05, + "model_forward_time": 0.02551412582397461, + "step": 20659 + }, + { + "epoch": 3.152313232421875e-05, + "step": 20659, + "training_step_time": 0.1777348518371582 + }, + { + "epoch": 3.1524658203125e-05, + "grad_norm": 0.09712230414152145, + "learning_rate": 2.424022279880312e-05, + "loss": 0.0062, + "step": 20660 + }, + { + "epoch": 3.1524658203125e-05, + "model_forward_time": 0.024485111236572266, + "step": 20660 + }, + { + "epoch": 3.1524658203125e-05, + "step": 20660, + "training_step_time": 0.13455533981323242 + }, + { + "epoch": 3.152618408203125e-05, + "model_forward_time": 0.024139881134033203, + "step": 20661 + }, + { + "epoch": 3.152618408203125e-05, + "step": 20661, + "training_step_time": 0.10887455940246582 + }, + { + "epoch": 3.15277099609375e-05, + "model_forward_time": 0.02494955062866211, + "step": 20662 + }, + { + "epoch": 3.15277099609375e-05, + "step": 20662, + "training_step_time": 0.11508631706237793 + }, + { + "epoch": 3.152923583984375e-05, + "model_forward_time": 0.025473833084106445, + "step": 20663 + }, + { + "epoch": 3.152923583984375e-05, + "step": 20663, + "training_step_time": 0.11283230781555176 + }, + { + "epoch": 3.153076171875e-05, + "model_forward_time": 0.025490999221801758, + "step": 20664 + }, + { + "epoch": 3.153076171875e-05, + "step": 20664, + "training_step_time": 0.11524796485900879 + }, + { + "epoch": 3.153228759765625e-05, + "model_forward_time": 0.025023698806762695, + "step": 20665 + }, + { + "epoch": 3.153228759765625e-05, + "step": 20665, + "training_step_time": 0.19072484970092773 + }, + { + "epoch": 3.15338134765625e-05, + "model_forward_time": 0.024636030197143555, + "step": 20666 + }, + { + "epoch": 3.15338134765625e-05, + "step": 20666, + "training_step_time": 0.10351252555847168 + }, + { + "epoch": 3.153533935546875e-05, + "model_forward_time": 0.024320125579833984, + "step": 20667 + }, + { + "epoch": 3.153533935546875e-05, + "step": 20667, + "training_step_time": 0.10203838348388672 + }, + { + "epoch": 3.1536865234375e-05, + "model_forward_time": 0.025452375411987305, + "step": 20668 + }, + { + "epoch": 3.1536865234375e-05, + "step": 20668, + "training_step_time": 0.10771369934082031 + }, + { + "epoch": 3.153839111328125e-05, + "model_forward_time": 0.025574207305908203, + "step": 20669 + }, + { + "epoch": 3.153839111328125e-05, + "step": 20669, + "training_step_time": 0.10489010810852051 + }, + { + "epoch": 3.15399169921875e-05, + "grad_norm": 0.10359963774681091, + "learning_rate": 2.419300033259798e-05, + "loss": 0.0039, + "step": 20670 + }, + { + "epoch": 3.15399169921875e-05, + "model_forward_time": 0.025453805923461914, + "step": 20670 + }, + { + "epoch": 3.15399169921875e-05, + "step": 20670, + "training_step_time": 0.11071538925170898 + }, + { + "epoch": 3.154144287109375e-05, + "model_forward_time": 0.025765180587768555, + "step": 20671 + }, + { + "epoch": 3.154144287109375e-05, + "step": 20671, + "training_step_time": 0.10737776756286621 + }, + { + "epoch": 3.154296875e-05, + "model_forward_time": 0.024924278259277344, + "step": 20672 + }, + { + "epoch": 3.154296875e-05, + "step": 20672, + "training_step_time": 0.10734248161315918 + }, + { + "epoch": 3.154449462890625e-05, + "model_forward_time": 0.025188922882080078, + "step": 20673 + }, + { + "epoch": 3.154449462890625e-05, + "step": 20673, + "training_step_time": 0.10474538803100586 + }, + { + "epoch": 3.15460205078125e-05, + "model_forward_time": 0.02511906623840332, + "step": 20674 + }, + { + "epoch": 3.15460205078125e-05, + "step": 20674, + "training_step_time": 0.1050870418548584 + }, + { + "epoch": 3.154754638671875e-05, + "model_forward_time": 0.02557992935180664, + "step": 20675 + }, + { + "epoch": 3.154754638671875e-05, + "step": 20675, + "training_step_time": 0.1031033992767334 + }, + { + "epoch": 3.1549072265625e-05, + "model_forward_time": 0.02510690689086914, + "step": 20676 + }, + { + "epoch": 3.1549072265625e-05, + "step": 20676, + "training_step_time": 0.10477352142333984 + }, + { + "epoch": 3.155059814453125e-05, + "model_forward_time": 0.024929285049438477, + "step": 20677 + }, + { + "epoch": 3.155059814453125e-05, + "step": 20677, + "training_step_time": 0.10323190689086914 + }, + { + "epoch": 3.15521240234375e-05, + "model_forward_time": 0.024713993072509766, + "step": 20678 + }, + { + "epoch": 3.15521240234375e-05, + "step": 20678, + "training_step_time": 0.10405468940734863 + }, + { + "epoch": 3.155364990234375e-05, + "model_forward_time": 0.0244596004486084, + "step": 20679 + }, + { + "epoch": 3.155364990234375e-05, + "step": 20679, + "training_step_time": 0.10951375961303711 + }, + { + "epoch": 3.155517578125e-05, + "grad_norm": 0.1519148200750351, + "learning_rate": 2.4145809224330896e-05, + "loss": 0.0073, + "step": 20680 + }, + { + "epoch": 3.155517578125e-05, + "model_forward_time": 0.02497267723083496, + "step": 20680 + }, + { + "epoch": 3.155517578125e-05, + "step": 20680, + "training_step_time": 0.18686270713806152 + }, + { + "epoch": 3.155670166015625e-05, + "model_forward_time": 0.02387070655822754, + "step": 20681 + }, + { + "epoch": 3.155670166015625e-05, + "step": 20681, + "training_step_time": 0.21013927459716797 + }, + { + "epoch": 3.15582275390625e-05, + "model_forward_time": 0.023865222930908203, + "step": 20682 + }, + { + "epoch": 3.15582275390625e-05, + "step": 20682, + "training_step_time": 0.20511341094970703 + }, + { + "epoch": 3.155975341796875e-05, + "model_forward_time": 0.024127483367919922, + "step": 20683 + }, + { + "epoch": 3.155975341796875e-05, + "step": 20683, + "training_step_time": 0.23737788200378418 + }, + { + "epoch": 3.1561279296875e-05, + "model_forward_time": 0.02369999885559082, + "step": 20684 + }, + { + "epoch": 3.1561279296875e-05, + "step": 20684, + "training_step_time": 0.2355637550354004 + }, + { + "epoch": 3.156280517578125e-05, + "model_forward_time": 0.023929119110107422, + "step": 20685 + }, + { + "epoch": 3.156280517578125e-05, + "step": 20685, + "training_step_time": 0.19212818145751953 + }, + { + "epoch": 3.15643310546875e-05, + "model_forward_time": 0.024302005767822266, + "step": 20686 + }, + { + "epoch": 3.15643310546875e-05, + "step": 20686, + "training_step_time": 0.1812591552734375 + }, + { + "epoch": 3.156585693359375e-05, + "model_forward_time": 0.02437567710876465, + "step": 20687 + }, + { + "epoch": 3.156585693359375e-05, + "step": 20687, + "training_step_time": 0.16174936294555664 + }, + { + "epoch": 3.15673828125e-05, + "model_forward_time": 0.0240023136138916, + "step": 20688 + }, + { + "epoch": 3.15673828125e-05, + "step": 20688, + "training_step_time": 0.10582351684570312 + }, + { + "epoch": 3.156890869140625e-05, + "model_forward_time": 0.02437877655029297, + "step": 20689 + }, + { + "epoch": 3.156890869140625e-05, + "step": 20689, + "training_step_time": 0.10163307189941406 + }, + { + "epoch": 3.15704345703125e-05, + "grad_norm": 0.33185362815856934, + "learning_rate": 2.4098649531343497e-05, + "loss": 0.0061, + "step": 20690 + }, + { + "epoch": 3.15704345703125e-05, + "model_forward_time": 0.024849414825439453, + "step": 20690 + }, + { + "epoch": 3.15704345703125e-05, + "step": 20690, + "training_step_time": 0.10292291641235352 + }, + { + "epoch": 3.157196044921875e-05, + "model_forward_time": 0.02507305145263672, + "step": 20691 + }, + { + "epoch": 3.157196044921875e-05, + "step": 20691, + "training_step_time": 0.10837793350219727 + }, + { + "epoch": 3.1573486328125e-05, + "model_forward_time": 0.02445054054260254, + "step": 20692 + }, + { + "epoch": 3.1573486328125e-05, + "step": 20692, + "training_step_time": 0.14047598838806152 + }, + { + "epoch": 3.157501220703125e-05, + "model_forward_time": 0.025241851806640625, + "step": 20693 + }, + { + "epoch": 3.157501220703125e-05, + "step": 20693, + "training_step_time": 0.16272282600402832 + }, + { + "epoch": 3.15765380859375e-05, + "model_forward_time": 0.0245513916015625, + "step": 20694 + }, + { + "epoch": 3.15765380859375e-05, + "step": 20694, + "training_step_time": 0.19331026077270508 + }, + { + "epoch": 3.157806396484375e-05, + "model_forward_time": 0.024148941040039062, + "step": 20695 + }, + { + "epoch": 3.157806396484375e-05, + "step": 20695, + "training_step_time": 0.14586973190307617 + }, + { + "epoch": 3.157958984375e-05, + "model_forward_time": 0.02462601661682129, + "step": 20696 + }, + { + "epoch": 3.157958984375e-05, + "step": 20696, + "training_step_time": 0.2112882137298584 + }, + { + "epoch": 3.158111572265625e-05, + "model_forward_time": 0.024312257766723633, + "step": 20697 + }, + { + "epoch": 3.158111572265625e-05, + "step": 20697, + "training_step_time": 0.12217164039611816 + }, + { + "epoch": 3.15826416015625e-05, + "model_forward_time": 0.02426624298095703, + "step": 20698 + }, + { + "epoch": 3.15826416015625e-05, + "step": 20698, + "training_step_time": 0.11878561973571777 + }, + { + "epoch": 3.158416748046875e-05, + "model_forward_time": 0.02573680877685547, + "step": 20699 + }, + { + "epoch": 3.158416748046875e-05, + "step": 20699, + "training_step_time": 0.1198875904083252 + }, + { + "epoch": 3.1585693359375e-05, + "grad_norm": 0.12606415152549744, + "learning_rate": 2.405152131093926e-05, + "loss": 0.009, + "step": 20700 + }, + { + "epoch": 3.1585693359375e-05, + "model_forward_time": 0.025359630584716797, + "step": 20700 + }, + { + "epoch": 3.1585693359375e-05, + "step": 20700, + "training_step_time": 0.13372564315795898 + }, + { + "epoch": 3.158721923828125e-05, + "model_forward_time": 0.025069475173950195, + "step": 20701 + }, + { + "epoch": 3.158721923828125e-05, + "step": 20701, + "training_step_time": 0.13780617713928223 + }, + { + "epoch": 3.15887451171875e-05, + "model_forward_time": 0.02647089958190918, + "step": 20702 + }, + { + "epoch": 3.15887451171875e-05, + "step": 20702, + "training_step_time": 0.10906100273132324 + }, + { + "epoch": 3.159027099609375e-05, + "model_forward_time": 0.025393247604370117, + "step": 20703 + }, + { + "epoch": 3.159027099609375e-05, + "step": 20703, + "training_step_time": 0.10835838317871094 + }, + { + "epoch": 3.1591796875e-05, + "model_forward_time": 0.025197267532348633, + "step": 20704 + }, + { + "epoch": 3.1591796875e-05, + "step": 20704, + "training_step_time": 0.11574506759643555 + }, + { + "epoch": 3.159332275390625e-05, + "model_forward_time": 0.02531147003173828, + "step": 20705 + }, + { + "epoch": 3.159332275390625e-05, + "step": 20705, + "training_step_time": 0.10699319839477539 + }, + { + "epoch": 3.15948486328125e-05, + "model_forward_time": 0.026636362075805664, + "step": 20706 + }, + { + "epoch": 3.15948486328125e-05, + "step": 20706, + "training_step_time": 0.19372963905334473 + }, + { + "epoch": 3.159637451171875e-05, + "model_forward_time": 0.024389982223510742, + "step": 20707 + }, + { + "epoch": 3.159637451171875e-05, + "step": 20707, + "training_step_time": 0.10294604301452637 + }, + { + "epoch": 3.1597900390625e-05, + "model_forward_time": 0.024550914764404297, + "step": 20708 + }, + { + "epoch": 3.1597900390625e-05, + "step": 20708, + "training_step_time": 0.10239434242248535 + }, + { + "epoch": 3.159942626953125e-05, + "model_forward_time": 0.025450468063354492, + "step": 20709 + }, + { + "epoch": 3.159942626953125e-05, + "step": 20709, + "training_step_time": 0.10691118240356445 + }, + { + "epoch": 3.16009521484375e-05, + "grad_norm": 0.1511950045824051, + "learning_rate": 2.4004424620383386e-05, + "loss": 0.0048, + "step": 20710 + }, + { + "epoch": 3.16009521484375e-05, + "model_forward_time": 0.025044679641723633, + "step": 20710 + }, + { + "epoch": 3.16009521484375e-05, + "step": 20710, + "training_step_time": 0.11053848266601562 + }, + { + "epoch": 3.160247802734375e-05, + "model_forward_time": 0.02503657341003418, + "step": 20711 + }, + { + "epoch": 3.160247802734375e-05, + "step": 20711, + "training_step_time": 0.11397504806518555 + }, + { + "epoch": 3.160400390625e-05, + "model_forward_time": 0.025638580322265625, + "step": 20712 + }, + { + "epoch": 3.160400390625e-05, + "step": 20712, + "training_step_time": 0.11626815795898438 + }, + { + "epoch": 3.160552978515625e-05, + "model_forward_time": 0.025356054306030273, + "step": 20713 + }, + { + "epoch": 3.160552978515625e-05, + "step": 20713, + "training_step_time": 0.1189579963684082 + }, + { + "epoch": 3.16070556640625e-05, + "model_forward_time": 0.025310516357421875, + "step": 20714 + }, + { + "epoch": 3.16070556640625e-05, + "step": 20714, + "training_step_time": 0.11876296997070312 + }, + { + "epoch": 3.160858154296875e-05, + "model_forward_time": 0.02493119239807129, + "step": 20715 + }, + { + "epoch": 3.160858154296875e-05, + "step": 20715, + "training_step_time": 0.11573910713195801 + }, + { + "epoch": 3.1610107421875e-05, + "model_forward_time": 0.025492191314697266, + "step": 20716 + }, + { + "epoch": 3.1610107421875e-05, + "step": 20716, + "training_step_time": 0.11744976043701172 + }, + { + "epoch": 3.161163330078125e-05, + "model_forward_time": 0.024964332580566406, + "step": 20717 + }, + { + "epoch": 3.161163330078125e-05, + "step": 20717, + "training_step_time": 0.11475944519042969 + }, + { + "epoch": 3.16131591796875e-05, + "model_forward_time": 0.02512955665588379, + "step": 20718 + }, + { + "epoch": 3.16131591796875e-05, + "step": 20718, + "training_step_time": 0.11520218849182129 + }, + { + "epoch": 3.161468505859375e-05, + "model_forward_time": 0.025191307067871094, + "step": 20719 + }, + { + "epoch": 3.161468505859375e-05, + "step": 20719, + "training_step_time": 0.11188721656799316 + }, + { + "epoch": 3.16162109375e-05, + "grad_norm": 0.11043795943260193, + "learning_rate": 2.3957359516902845e-05, + "loss": 0.0109, + "step": 20720 + }, + { + "epoch": 3.16162109375e-05, + "model_forward_time": 0.025226831436157227, + "step": 20720 + }, + { + "epoch": 3.16162109375e-05, + "step": 20720, + "training_step_time": 0.11263847351074219 + }, + { + "epoch": 3.161773681640625e-05, + "model_forward_time": 0.025321483612060547, + "step": 20721 + }, + { + "epoch": 3.161773681640625e-05, + "step": 20721, + "training_step_time": 0.18237900733947754 + }, + { + "epoch": 3.16192626953125e-05, + "model_forward_time": 0.024611473083496094, + "step": 20722 + }, + { + "epoch": 3.16192626953125e-05, + "step": 20722, + "training_step_time": 0.11699271202087402 + }, + { + "epoch": 3.162078857421875e-05, + "model_forward_time": 0.02420520782470703, + "step": 20723 + }, + { + "epoch": 3.162078857421875e-05, + "step": 20723, + "training_step_time": 0.12441778182983398 + }, + { + "epoch": 3.1622314453125e-05, + "model_forward_time": 0.02517223358154297, + "step": 20724 + }, + { + "epoch": 3.1622314453125e-05, + "step": 20724, + "training_step_time": 0.14185619354248047 + }, + { + "epoch": 3.162384033203125e-05, + "model_forward_time": 0.02467513084411621, + "step": 20725 + }, + { + "epoch": 3.162384033203125e-05, + "step": 20725, + "training_step_time": 0.1175391674041748 + }, + { + "epoch": 3.16253662109375e-05, + "model_forward_time": 0.024672746658325195, + "step": 20726 + }, + { + "epoch": 3.16253662109375e-05, + "step": 20726, + "training_step_time": 0.1289510726928711 + }, + { + "epoch": 3.162689208984375e-05, + "model_forward_time": 0.025783777236938477, + "step": 20727 + }, + { + "epoch": 3.162689208984375e-05, + "step": 20727, + "training_step_time": 0.11605048179626465 + }, + { + "epoch": 3.162841796875e-05, + "model_forward_time": 0.025492429733276367, + "step": 20728 + }, + { + "epoch": 3.162841796875e-05, + "step": 20728, + "training_step_time": 0.1086890697479248 + }, + { + "epoch": 3.162994384765625e-05, + "model_forward_time": 0.02489447593688965, + "step": 20729 + }, + { + "epoch": 3.162994384765625e-05, + "step": 20729, + "training_step_time": 0.11183643341064453 + }, + { + "epoch": 3.16314697265625e-05, + "grad_norm": 0.16555260121822357, + "learning_rate": 2.3910326057686127e-05, + "loss": 0.0165, + "step": 20730 + }, + { + "epoch": 3.16314697265625e-05, + "model_forward_time": 0.02549123764038086, + "step": 20730 + }, + { + "epoch": 3.16314697265625e-05, + "step": 20730, + "training_step_time": 0.11242341995239258 + }, + { + "epoch": 3.163299560546875e-05, + "model_forward_time": 0.02541184425354004, + "step": 20731 + }, + { + "epoch": 3.163299560546875e-05, + "step": 20731, + "training_step_time": 0.10867094993591309 + }, + { + "epoch": 3.1634521484375e-05, + "model_forward_time": 0.025127172470092773, + "step": 20732 + }, + { + "epoch": 3.1634521484375e-05, + "step": 20732, + "training_step_time": 0.10868978500366211 + }, + { + "epoch": 3.163604736328125e-05, + "model_forward_time": 0.025144577026367188, + "step": 20733 + }, + { + "epoch": 3.163604736328125e-05, + "step": 20733, + "training_step_time": 0.11056303977966309 + }, + { + "epoch": 3.16375732421875e-05, + "model_forward_time": 0.025198698043823242, + "step": 20734 + }, + { + "epoch": 3.16375732421875e-05, + "step": 20734, + "training_step_time": 0.11150646209716797 + }, + { + "epoch": 3.163909912109375e-05, + "model_forward_time": 0.025629520416259766, + "step": 20735 + }, + { + "epoch": 3.163909912109375e-05, + "step": 20735, + "training_step_time": 0.1132807731628418 + }, + { + "epoch": 3.1640625e-05, + "model_forward_time": 0.026111602783203125, + "step": 20736 + }, + { + "epoch": 3.1640625e-05, + "step": 20736, + "training_step_time": 0.1094660758972168 + }, + { + "epoch": 3.164215087890625e-05, + "model_forward_time": 0.025235652923583984, + "step": 20737 + }, + { + "epoch": 3.164215087890625e-05, + "step": 20737, + "training_step_time": 0.14861845970153809 + }, + { + "epoch": 3.16436767578125e-05, + "model_forward_time": 0.025146961212158203, + "step": 20738 + }, + { + "epoch": 3.16436767578125e-05, + "step": 20738, + "training_step_time": 0.16847944259643555 + }, + { + "epoch": 3.164520263671875e-05, + "model_forward_time": 0.024373292922973633, + "step": 20739 + }, + { + "epoch": 3.164520263671875e-05, + "step": 20739, + "training_step_time": 0.10924839973449707 + }, + { + "epoch": 3.1646728515625e-05, + "grad_norm": 0.11490935832262039, + "learning_rate": 2.3863324299883366e-05, + "loss": 0.004, + "step": 20740 + }, + { + "epoch": 3.1646728515625e-05, + "model_forward_time": 0.024765729904174805, + "step": 20740 + }, + { + "epoch": 3.1646728515625e-05, + "step": 20740, + "training_step_time": 0.13894963264465332 + }, + { + "epoch": 3.164825439453125e-05, + "model_forward_time": 0.025066375732421875, + "step": 20741 + }, + { + "epoch": 3.164825439453125e-05, + "step": 20741, + "training_step_time": 0.19254350662231445 + }, + { + "epoch": 3.16497802734375e-05, + "model_forward_time": 0.024647235870361328, + "step": 20742 + }, + { + "epoch": 3.16497802734375e-05, + "step": 20742, + "training_step_time": 0.15812039375305176 + }, + { + "epoch": 3.165130615234375e-05, + "model_forward_time": 0.024166107177734375, + "step": 20743 + }, + { + "epoch": 3.165130615234375e-05, + "step": 20743, + "training_step_time": 0.12479496002197266 + }, + { + "epoch": 3.165283203125e-05, + "model_forward_time": 0.024815797805786133, + "step": 20744 + }, + { + "epoch": 3.165283203125e-05, + "step": 20744, + "training_step_time": 0.1082921028137207 + }, + { + "epoch": 3.165435791015625e-05, + "model_forward_time": 0.02497696876525879, + "step": 20745 + }, + { + "epoch": 3.165435791015625e-05, + "step": 20745, + "training_step_time": 0.12818360328674316 + }, + { + "epoch": 3.16558837890625e-05, + "model_forward_time": 0.024749755859375, + "step": 20746 + }, + { + "epoch": 3.16558837890625e-05, + "step": 20746, + "training_step_time": 0.11229085922241211 + }, + { + "epoch": 3.165740966796875e-05, + "model_forward_time": 0.025187253952026367, + "step": 20747 + }, + { + "epoch": 3.165740966796875e-05, + "step": 20747, + "training_step_time": 0.1298367977142334 + }, + { + "epoch": 3.1658935546875e-05, + "model_forward_time": 0.025503873825073242, + "step": 20748 + }, + { + "epoch": 3.1658935546875e-05, + "step": 20748, + "training_step_time": 0.11685800552368164 + }, + { + "epoch": 3.166046142578125e-05, + "model_forward_time": 0.02421259880065918, + "step": 20749 + }, + { + "epoch": 3.166046142578125e-05, + "step": 20749, + "training_step_time": 0.18486928939819336 + }, + { + "epoch": 3.16619873046875e-05, + "grad_norm": 0.2567179799079895, + "learning_rate": 2.381635430060611e-05, + "loss": 0.0123, + "step": 20750 + }, + { + "epoch": 3.16619873046875e-05, + "model_forward_time": 0.024132490158081055, + "step": 20750 + }, + { + "epoch": 3.16619873046875e-05, + "step": 20750, + "training_step_time": 0.20169949531555176 + }, + { + "epoch": 3.166351318359375e-05, + "model_forward_time": 0.02469801902770996, + "step": 20751 + }, + { + "epoch": 3.166351318359375e-05, + "step": 20751, + "training_step_time": 0.21213150024414062 + }, + { + "epoch": 3.16650390625e-05, + "model_forward_time": 0.02429342269897461, + "step": 20752 + }, + { + "epoch": 3.16650390625e-05, + "step": 20752, + "training_step_time": 0.1971750259399414 + }, + { + "epoch": 3.166656494140625e-05, + "model_forward_time": 0.02386188507080078, + "step": 20753 + }, + { + "epoch": 3.166656494140625e-05, + "step": 20753, + "training_step_time": 0.19209718704223633 + }, + { + "epoch": 3.16680908203125e-05, + "model_forward_time": 0.024041175842285156, + "step": 20754 + }, + { + "epoch": 3.16680908203125e-05, + "step": 20754, + "training_step_time": 0.18244719505310059 + }, + { + "epoch": 3.166961669921875e-05, + "model_forward_time": 0.02425408363342285, + "step": 20755 + }, + { + "epoch": 3.166961669921875e-05, + "step": 20755, + "training_step_time": 0.1083681583404541 + }, + { + "epoch": 3.1671142578125e-05, + "model_forward_time": 0.024461984634399414, + "step": 20756 + }, + { + "epoch": 3.1671142578125e-05, + "step": 20756, + "training_step_time": 0.1056220531463623 + }, + { + "epoch": 3.167266845703125e-05, + "model_forward_time": 0.024915218353271484, + "step": 20757 + }, + { + "epoch": 3.167266845703125e-05, + "step": 20757, + "training_step_time": 0.10996294021606445 + }, + { + "epoch": 3.16741943359375e-05, + "model_forward_time": 0.025127887725830078, + "step": 20758 + }, + { + "epoch": 3.16741943359375e-05, + "step": 20758, + "training_step_time": 0.11100053787231445 + }, + { + "epoch": 3.167572021484375e-05, + "model_forward_time": 0.025577306747436523, + "step": 20759 + }, + { + "epoch": 3.167572021484375e-05, + "step": 20759, + "training_step_time": 0.11177563667297363 + }, + { + "epoch": 3.167724609375e-05, + "grad_norm": 0.24263069033622742, + "learning_rate": 2.3769416116927335e-05, + "loss": 0.009, + "step": 20760 + }, + { + "epoch": 3.167724609375e-05, + "model_forward_time": 0.02530074119567871, + "step": 20760 + }, + { + "epoch": 3.167724609375e-05, + "step": 20760, + "training_step_time": 0.10830831527709961 + }, + { + "epoch": 3.167877197265625e-05, + "model_forward_time": 0.024767637252807617, + "step": 20761 + }, + { + "epoch": 3.167877197265625e-05, + "step": 20761, + "training_step_time": 0.11713171005249023 + }, + { + "epoch": 3.16802978515625e-05, + "model_forward_time": 0.024916887283325195, + "step": 20762 + }, + { + "epoch": 3.16802978515625e-05, + "step": 20762, + "training_step_time": 0.11265373229980469 + }, + { + "epoch": 3.168182373046875e-05, + "model_forward_time": 0.025521278381347656, + "step": 20763 + }, + { + "epoch": 3.168182373046875e-05, + "step": 20763, + "training_step_time": 0.11465263366699219 + }, + { + "epoch": 3.1683349609375e-05, + "model_forward_time": 0.0250091552734375, + "step": 20764 + }, + { + "epoch": 3.1683349609375e-05, + "step": 20764, + "training_step_time": 0.12122321128845215 + }, + { + "epoch": 3.168487548828125e-05, + "model_forward_time": 0.025263547897338867, + "step": 20765 + }, + { + "epoch": 3.168487548828125e-05, + "step": 20765, + "training_step_time": 0.1185762882232666 + }, + { + "epoch": 3.16864013671875e-05, + "model_forward_time": 0.02532172203063965, + "step": 20766 + }, + { + "epoch": 3.16864013671875e-05, + "step": 20766, + "training_step_time": 0.13256096839904785 + }, + { + "epoch": 3.168792724609375e-05, + "model_forward_time": 0.02491307258605957, + "step": 20767 + }, + { + "epoch": 3.168792724609375e-05, + "step": 20767, + "training_step_time": 0.1627037525177002 + }, + { + "epoch": 3.1689453125e-05, + "model_forward_time": 0.024481534957885742, + "step": 20768 + }, + { + "epoch": 3.1689453125e-05, + "step": 20768, + "training_step_time": 0.22065401077270508 + }, + { + "epoch": 3.169097900390625e-05, + "model_forward_time": 0.025542020797729492, + "step": 20769 + }, + { + "epoch": 3.169097900390625e-05, + "step": 20769, + "training_step_time": 0.12055444717407227 + }, + { + "epoch": 3.16925048828125e-05, + "grad_norm": 0.17910423874855042, + "learning_rate": 2.3722509805881356e-05, + "loss": 0.0143, + "step": 20770 + }, + { + "epoch": 3.16925048828125e-05, + "model_forward_time": 0.02405571937561035, + "step": 20770 + }, + { + "epoch": 3.16925048828125e-05, + "step": 20770, + "training_step_time": 0.11540937423706055 + }, + { + "epoch": 3.169403076171875e-05, + "model_forward_time": 0.02532505989074707, + "step": 20771 + }, + { + "epoch": 3.169403076171875e-05, + "step": 20771, + "training_step_time": 0.1151125431060791 + }, + { + "epoch": 3.1695556640625e-05, + "model_forward_time": 0.02534317970275879, + "step": 20772 + }, + { + "epoch": 3.1695556640625e-05, + "step": 20772, + "training_step_time": 0.1123661994934082 + }, + { + "epoch": 3.169708251953125e-05, + "model_forward_time": 0.02692890167236328, + "step": 20773 + }, + { + "epoch": 3.169708251953125e-05, + "step": 20773, + "training_step_time": 0.11022472381591797 + }, + { + "epoch": 3.16986083984375e-05, + "model_forward_time": 0.02493143081665039, + "step": 20774 + }, + { + "epoch": 3.16986083984375e-05, + "step": 20774, + "training_step_time": 0.10936379432678223 + }, + { + "epoch": 3.170013427734375e-05, + "model_forward_time": 0.027202606201171875, + "step": 20775 + }, + { + "epoch": 3.170013427734375e-05, + "step": 20775, + "training_step_time": 0.10977315902709961 + }, + { + "epoch": 3.170166015625e-05, + "model_forward_time": 0.026238441467285156, + "step": 20776 + }, + { + "epoch": 3.170166015625e-05, + "step": 20776, + "training_step_time": 0.10866665840148926 + }, + { + "epoch": 3.170318603515625e-05, + "model_forward_time": 0.0288393497467041, + "step": 20777 + }, + { + "epoch": 3.170318603515625e-05, + "step": 20777, + "training_step_time": 0.11051654815673828 + }, + { + "epoch": 3.17047119140625e-05, + "model_forward_time": 0.025334596633911133, + "step": 20778 + }, + { + "epoch": 3.17047119140625e-05, + "step": 20778, + "training_step_time": 0.10550570487976074 + }, + { + "epoch": 3.170623779296875e-05, + "model_forward_time": 0.02440190315246582, + "step": 20779 + }, + { + "epoch": 3.170623779296875e-05, + "step": 20779, + "training_step_time": 0.13516521453857422 + }, + { + "epoch": 3.1707763671875e-05, + "grad_norm": 0.238984152674675, + "learning_rate": 2.3675635424463754e-05, + "loss": 0.0124, + "step": 20780 + }, + { + "epoch": 3.1707763671875e-05, + "model_forward_time": 0.025301456451416016, + "step": 20780 + }, + { + "epoch": 3.1707763671875e-05, + "step": 20780, + "training_step_time": 0.11484432220458984 + }, + { + "epoch": 3.170928955078125e-05, + "model_forward_time": 0.025111913681030273, + "step": 20781 + }, + { + "epoch": 3.170928955078125e-05, + "step": 20781, + "training_step_time": 0.11349177360534668 + }, + { + "epoch": 3.17108154296875e-05, + "model_forward_time": 0.025307655334472656, + "step": 20782 + }, + { + "epoch": 3.17108154296875e-05, + "step": 20782, + "training_step_time": 0.11896395683288574 + }, + { + "epoch": 3.171234130859375e-05, + "model_forward_time": 0.024878501892089844, + "step": 20783 + }, + { + "epoch": 3.171234130859375e-05, + "step": 20783, + "training_step_time": 0.10982036590576172 + }, + { + "epoch": 3.17138671875e-05, + "model_forward_time": 0.02583789825439453, + "step": 20784 + }, + { + "epoch": 3.17138671875e-05, + "step": 20784, + "training_step_time": 0.12938475608825684 + }, + { + "epoch": 3.171539306640625e-05, + "model_forward_time": 0.025821924209594727, + "step": 20785 + }, + { + "epoch": 3.171539306640625e-05, + "step": 20785, + "training_step_time": 0.20596623420715332 + }, + { + "epoch": 3.17169189453125e-05, + "model_forward_time": 0.024789810180664062, + "step": 20786 + }, + { + "epoch": 3.17169189453125e-05, + "step": 20786, + "training_step_time": 0.1269512176513672 + }, + { + "epoch": 3.171844482421875e-05, + "model_forward_time": 0.024674654006958008, + "step": 20787 + }, + { + "epoch": 3.171844482421875e-05, + "step": 20787, + "training_step_time": 0.1154944896697998 + }, + { + "epoch": 3.1719970703125e-05, + "model_forward_time": 0.02520585060119629, + "step": 20788 + }, + { + "epoch": 3.1719970703125e-05, + "step": 20788, + "training_step_time": 0.14835119247436523 + }, + { + "epoch": 3.172149658203125e-05, + "model_forward_time": 0.025281190872192383, + "step": 20789 + }, + { + "epoch": 3.172149658203125e-05, + "step": 20789, + "training_step_time": 0.14440512657165527 + }, + { + "epoch": 3.17230224609375e-05, + "grad_norm": 0.12975092232227325, + "learning_rate": 2.362879302963135e-05, + "loss": 0.0047, + "step": 20790 + }, + { + "epoch": 3.17230224609375e-05, + "model_forward_time": 0.024744033813476562, + "step": 20790 + }, + { + "epoch": 3.17230224609375e-05, + "step": 20790, + "training_step_time": 0.11063838005065918 + }, + { + "epoch": 3.172454833984375e-05, + "model_forward_time": 0.026821613311767578, + "step": 20791 + }, + { + "epoch": 3.172454833984375e-05, + "step": 20791, + "training_step_time": 0.11112427711486816 + }, + { + "epoch": 3.172607421875e-05, + "model_forward_time": 0.02529740333557129, + "step": 20792 + }, + { + "epoch": 3.172607421875e-05, + "step": 20792, + "training_step_time": 0.10890007019042969 + }, + { + "epoch": 3.172760009765625e-05, + "model_forward_time": 0.024281024932861328, + "step": 20793 + }, + { + "epoch": 3.172760009765625e-05, + "step": 20793, + "training_step_time": 0.1810460090637207 + }, + { + "epoch": 3.17291259765625e-05, + "model_forward_time": 0.024836301803588867, + "step": 20794 + }, + { + "epoch": 3.17291259765625e-05, + "step": 20794, + "training_step_time": 0.11632943153381348 + }, + { + "epoch": 3.173065185546875e-05, + "model_forward_time": 0.024623394012451172, + "step": 20795 + }, + { + "epoch": 3.173065185546875e-05, + "step": 20795, + "training_step_time": 0.11379265785217285 + }, + { + "epoch": 3.1732177734375e-05, + "model_forward_time": 0.025310039520263672, + "step": 20796 + }, + { + "epoch": 3.1732177734375e-05, + "step": 20796, + "training_step_time": 0.11301898956298828 + }, + { + "epoch": 3.173370361328125e-05, + "model_forward_time": 0.025574922561645508, + "step": 20797 + }, + { + "epoch": 3.173370361328125e-05, + "step": 20797, + "training_step_time": 0.1125340461730957 + }, + { + "epoch": 3.17352294921875e-05, + "model_forward_time": 0.025353193283081055, + "step": 20798 + }, + { + "epoch": 3.17352294921875e-05, + "step": 20798, + "training_step_time": 0.1081991195678711 + }, + { + "epoch": 3.173675537109375e-05, + "model_forward_time": 0.02575516700744629, + "step": 20799 + }, + { + "epoch": 3.173675537109375e-05, + "step": 20799, + "training_step_time": 0.10750007629394531 + }, + { + "epoch": 3.173828125e-05, + "grad_norm": 0.1220598816871643, + "learning_rate": 2.3581982678302063e-05, + "loss": 0.0123, + "step": 20800 + }, + { + "epoch": 3.173828125e-05, + "model_forward_time": 0.025641679763793945, + "step": 20800 + }, + { + "epoch": 3.173828125e-05, + "step": 20800, + "training_step_time": 0.11035013198852539 + }, + { + "epoch": 3.173980712890625e-05, + "model_forward_time": 0.025315284729003906, + "step": 20801 + }, + { + "epoch": 3.173980712890625e-05, + "step": 20801, + "training_step_time": 0.10729002952575684 + }, + { + "epoch": 3.17413330078125e-05, + "model_forward_time": 0.025231361389160156, + "step": 20802 + }, + { + "epoch": 3.17413330078125e-05, + "step": 20802, + "training_step_time": 0.10660552978515625 + }, + { + "epoch": 3.174285888671875e-05, + "model_forward_time": 0.0252687931060791, + "step": 20803 + }, + { + "epoch": 3.174285888671875e-05, + "step": 20803, + "training_step_time": 0.10571980476379395 + }, + { + "epoch": 3.1744384765625e-05, + "model_forward_time": 0.02560281753540039, + "step": 20804 + }, + { + "epoch": 3.1744384765625e-05, + "step": 20804, + "training_step_time": 0.10579252243041992 + }, + { + "epoch": 3.174591064453125e-05, + "model_forward_time": 0.024433612823486328, + "step": 20805 + }, + { + "epoch": 3.174591064453125e-05, + "step": 20805, + "training_step_time": 0.10732126235961914 + }, + { + "epoch": 3.17474365234375e-05, + "model_forward_time": 0.025250911712646484, + "step": 20806 + }, + { + "epoch": 3.17474365234375e-05, + "step": 20806, + "training_step_time": 0.10862851142883301 + }, + { + "epoch": 3.174896240234375e-05, + "model_forward_time": 0.02541804313659668, + "step": 20807 + }, + { + "epoch": 3.174896240234375e-05, + "step": 20807, + "training_step_time": 0.11018991470336914 + }, + { + "epoch": 3.175048828125e-05, + "model_forward_time": 0.025758028030395508, + "step": 20808 + }, + { + "epoch": 3.175048828125e-05, + "step": 20808, + "training_step_time": 0.10549664497375488 + }, + { + "epoch": 3.175201416015625e-05, + "model_forward_time": 0.02521538734436035, + "step": 20809 + }, + { + "epoch": 3.175201416015625e-05, + "step": 20809, + "training_step_time": 0.10509085655212402 + }, + { + "epoch": 3.17535400390625e-05, + "grad_norm": 0.1310320496559143, + "learning_rate": 2.353520442735488e-05, + "loss": 0.0043, + "step": 20810 + }, + { + "epoch": 3.17535400390625e-05, + "model_forward_time": 0.0251462459564209, + "step": 20810 + }, + { + "epoch": 3.17535400390625e-05, + "step": 20810, + "training_step_time": 0.12888407707214355 + }, + { + "epoch": 3.175506591796875e-05, + "model_forward_time": 0.02557086944580078, + "step": 20811 + }, + { + "epoch": 3.175506591796875e-05, + "step": 20811, + "training_step_time": 0.11256814002990723 + }, + { + "epoch": 3.1756591796875e-05, + "model_forward_time": 0.025347232818603516, + "step": 20812 + }, + { + "epoch": 3.1756591796875e-05, + "step": 20812, + "training_step_time": 0.12863755226135254 + }, + { + "epoch": 3.175811767578125e-05, + "model_forward_time": 0.02519989013671875, + "step": 20813 + }, + { + "epoch": 3.175811767578125e-05, + "step": 20813, + "training_step_time": 0.13690567016601562 + }, + { + "epoch": 3.17596435546875e-05, + "model_forward_time": 0.02509593963623047, + "step": 20814 + }, + { + "epoch": 3.17596435546875e-05, + "step": 20814, + "training_step_time": 0.11874055862426758 + }, + { + "epoch": 3.176116943359375e-05, + "model_forward_time": 0.0266873836517334, + "step": 20815 + }, + { + "epoch": 3.176116943359375e-05, + "step": 20815, + "training_step_time": 0.12831377983093262 + }, + { + "epoch": 3.17626953125e-05, + "model_forward_time": 0.02518773078918457, + "step": 20816 + }, + { + "epoch": 3.17626953125e-05, + "step": 20816, + "training_step_time": 0.1165003776550293 + }, + { + "epoch": 3.176422119140625e-05, + "model_forward_time": 0.02521657943725586, + "step": 20817 + }, + { + "epoch": 3.176422119140625e-05, + "step": 20817, + "training_step_time": 0.10573840141296387 + }, + { + "epoch": 3.17657470703125e-05, + "model_forward_time": 0.025226354598999023, + "step": 20818 + }, + { + "epoch": 3.17657470703125e-05, + "step": 20818, + "training_step_time": 0.11005496978759766 + }, + { + "epoch": 3.176727294921875e-05, + "model_forward_time": 0.02493596076965332, + "step": 20819 + }, + { + "epoch": 3.176727294921875e-05, + "step": 20819, + "training_step_time": 0.1055607795715332 + }, + { + "epoch": 3.1768798828125e-05, + "grad_norm": 0.12881946563720703, + "learning_rate": 2.3488458333629777e-05, + "loss": 0.0153, + "step": 20820 + }, + { + "epoch": 3.1768798828125e-05, + "model_forward_time": 0.024974346160888672, + "step": 20820 + }, + { + "epoch": 3.1768798828125e-05, + "step": 20820, + "training_step_time": 0.10694003105163574 + }, + { + "epoch": 3.177032470703125e-05, + "model_forward_time": 0.02532172203063965, + "step": 20821 + }, + { + "epoch": 3.177032470703125e-05, + "step": 20821, + "training_step_time": 0.10769891738891602 + }, + { + "epoch": 3.17718505859375e-05, + "model_forward_time": 0.025367259979248047, + "step": 20822 + }, + { + "epoch": 3.17718505859375e-05, + "step": 20822, + "training_step_time": 0.10882449150085449 + }, + { + "epoch": 3.177337646484375e-05, + "model_forward_time": 0.02500772476196289, + "step": 20823 + }, + { + "epoch": 3.177337646484375e-05, + "step": 20823, + "training_step_time": 0.10730719566345215 + }, + { + "epoch": 3.177490234375e-05, + "model_forward_time": 0.025046586990356445, + "step": 20824 + }, + { + "epoch": 3.177490234375e-05, + "step": 20824, + "training_step_time": 0.1069173812866211 + }, + { + "epoch": 3.177642822265625e-05, + "model_forward_time": 0.024770498275756836, + "step": 20825 + }, + { + "epoch": 3.177642822265625e-05, + "step": 20825, + "training_step_time": 0.10652613639831543 + }, + { + "epoch": 3.17779541015625e-05, + "model_forward_time": 0.025076627731323242, + "step": 20826 + }, + { + "epoch": 3.17779541015625e-05, + "step": 20826, + "training_step_time": 0.15584397315979004 + }, + { + "epoch": 3.177947998046875e-05, + "model_forward_time": 0.024164915084838867, + "step": 20827 + }, + { + "epoch": 3.177947998046875e-05, + "step": 20827, + "training_step_time": 0.19965028762817383 + }, + { + "epoch": 3.1781005859375e-05, + "model_forward_time": 0.024282455444335938, + "step": 20828 + }, + { + "epoch": 3.1781005859375e-05, + "step": 20828, + "training_step_time": 0.20502305030822754 + }, + { + "epoch": 3.178253173828125e-05, + "model_forward_time": 0.0240018367767334, + "step": 20829 + }, + { + "epoch": 3.178253173828125e-05, + "step": 20829, + "training_step_time": 0.1793513298034668 + }, + { + "epoch": 3.17840576171875e-05, + "grad_norm": 0.2658945918083191, + "learning_rate": 2.344174445392766e-05, + "loss": 0.0046, + "step": 20830 + }, + { + "epoch": 3.17840576171875e-05, + "model_forward_time": 0.024616003036499023, + "step": 20830 + }, + { + "epoch": 3.17840576171875e-05, + "step": 20830, + "training_step_time": 0.20130038261413574 + }, + { + "epoch": 3.178558349609375e-05, + "model_forward_time": 0.023886680603027344, + "step": 20831 + }, + { + "epoch": 3.178558349609375e-05, + "step": 20831, + "training_step_time": 0.21325325965881348 + }, + { + "epoch": 3.1787109375e-05, + "model_forward_time": 0.024231433868408203, + "step": 20832 + }, + { + "epoch": 3.1787109375e-05, + "step": 20832, + "training_step_time": 0.14525938034057617 + }, + { + "epoch": 3.178863525390625e-05, + "model_forward_time": 0.024719715118408203, + "step": 20833 + }, + { + "epoch": 3.178863525390625e-05, + "step": 20833, + "training_step_time": 0.17214727401733398 + }, + { + "epoch": 3.17901611328125e-05, + "model_forward_time": 0.024571895599365234, + "step": 20834 + }, + { + "epoch": 3.17901611328125e-05, + "step": 20834, + "training_step_time": 0.16259336471557617 + }, + { + "epoch": 3.179168701171875e-05, + "model_forward_time": 0.024523258209228516, + "step": 20835 + }, + { + "epoch": 3.179168701171875e-05, + "step": 20835, + "training_step_time": 0.10352969169616699 + }, + { + "epoch": 3.1793212890625e-05, + "model_forward_time": 0.025072336196899414, + "step": 20836 + }, + { + "epoch": 3.1793212890625e-05, + "step": 20836, + "training_step_time": 0.12267017364501953 + }, + { + "epoch": 3.179473876953125e-05, + "model_forward_time": 0.02525019645690918, + "step": 20837 + }, + { + "epoch": 3.179473876953125e-05, + "step": 20837, + "training_step_time": 0.10623574256896973 + }, + { + "epoch": 3.17962646484375e-05, + "model_forward_time": 0.025304079055786133, + "step": 20838 + }, + { + "epoch": 3.17962646484375e-05, + "step": 20838, + "training_step_time": 0.19823503494262695 + }, + { + "epoch": 3.179779052734375e-05, + "model_forward_time": 0.023879528045654297, + "step": 20839 + }, + { + "epoch": 3.179779052734375e-05, + "step": 20839, + "training_step_time": 0.10929751396179199 + }, + { + "epoch": 3.179931640625e-05, + "grad_norm": 0.13971443474292755, + "learning_rate": 2.339506284501033e-05, + "loss": 0.0087, + "step": 20840 + }, + { + "epoch": 3.179931640625e-05, + "model_forward_time": 0.02488565444946289, + "step": 20840 + }, + { + "epoch": 3.179931640625e-05, + "step": 20840, + "training_step_time": 0.10435342788696289 + }, + { + "epoch": 3.180084228515625e-05, + "model_forward_time": 0.025182008743286133, + "step": 20841 + }, + { + "epoch": 3.180084228515625e-05, + "step": 20841, + "training_step_time": 0.10995030403137207 + }, + { + "epoch": 3.18023681640625e-05, + "model_forward_time": 0.026810407638549805, + "step": 20842 + }, + { + "epoch": 3.18023681640625e-05, + "step": 20842, + "training_step_time": 0.10855436325073242 + }, + { + "epoch": 3.180389404296875e-05, + "model_forward_time": 0.02527141571044922, + "step": 20843 + }, + { + "epoch": 3.180389404296875e-05, + "step": 20843, + "training_step_time": 0.10493826866149902 + }, + { + "epoch": 3.1805419921875e-05, + "model_forward_time": 0.025180339813232422, + "step": 20844 + }, + { + "epoch": 3.1805419921875e-05, + "step": 20844, + "training_step_time": 0.10901880264282227 + }, + { + "epoch": 3.180694580078125e-05, + "model_forward_time": 0.02506566047668457, + "step": 20845 + }, + { + "epoch": 3.180694580078125e-05, + "step": 20845, + "training_step_time": 0.10476016998291016 + }, + { + "epoch": 3.18084716796875e-05, + "model_forward_time": 0.02523016929626465, + "step": 20846 + }, + { + "epoch": 3.18084716796875e-05, + "step": 20846, + "training_step_time": 0.10540056228637695 + }, + { + "epoch": 3.180999755859375e-05, + "model_forward_time": 0.025115013122558594, + "step": 20847 + }, + { + "epoch": 3.180999755859375e-05, + "step": 20847, + "training_step_time": 0.10620665550231934 + }, + { + "epoch": 3.18115234375e-05, + "model_forward_time": 0.02496027946472168, + "step": 20848 + }, + { + "epoch": 3.18115234375e-05, + "step": 20848, + "training_step_time": 0.10468721389770508 + }, + { + "epoch": 3.181304931640625e-05, + "model_forward_time": 0.024091243743896484, + "step": 20849 + }, + { + "epoch": 3.181304931640625e-05, + "step": 20849, + "training_step_time": 0.10612297058105469 + }, + { + "epoch": 3.18145751953125e-05, + "grad_norm": 0.128709614276886, + "learning_rate": 2.3348413563600325e-05, + "loss": 0.0072, + "step": 20850 + }, + { + "epoch": 3.18145751953125e-05, + "model_forward_time": 0.0250856876373291, + "step": 20850 + }, + { + "epoch": 3.18145751953125e-05, + "step": 20850, + "training_step_time": 0.11839056015014648 + }, + { + "epoch": 3.181610107421875e-05, + "model_forward_time": 0.025763988494873047, + "step": 20851 + }, + { + "epoch": 3.181610107421875e-05, + "step": 20851, + "training_step_time": 0.14472746849060059 + }, + { + "epoch": 3.1817626953125e-05, + "model_forward_time": 0.024891376495361328, + "step": 20852 + }, + { + "epoch": 3.1817626953125e-05, + "step": 20852, + "training_step_time": 0.13581252098083496 + }, + { + "epoch": 3.181915283203125e-05, + "model_forward_time": 0.02466750144958496, + "step": 20853 + }, + { + "epoch": 3.181915283203125e-05, + "step": 20853, + "training_step_time": 0.22120118141174316 + }, + { + "epoch": 3.18206787109375e-05, + "model_forward_time": 0.024425029754638672, + "step": 20854 + }, + { + "epoch": 3.18206787109375e-05, + "step": 20854, + "training_step_time": 0.13273167610168457 + }, + { + "epoch": 3.182220458984375e-05, + "model_forward_time": 0.024687767028808594, + "step": 20855 + }, + { + "epoch": 3.182220458984375e-05, + "step": 20855, + "training_step_time": 0.15469598770141602 + }, + { + "epoch": 3.182373046875e-05, + "model_forward_time": 0.024582386016845703, + "step": 20856 + }, + { + "epoch": 3.182373046875e-05, + "step": 20856, + "training_step_time": 0.13431477546691895 + }, + { + "epoch": 3.182525634765625e-05, + "model_forward_time": 0.026174306869506836, + "step": 20857 + }, + { + "epoch": 3.182525634765625e-05, + "step": 20857, + "training_step_time": 0.21352767944335938 + }, + { + "epoch": 3.18267822265625e-05, + "model_forward_time": 0.024483919143676758, + "step": 20858 + }, + { + "epoch": 3.18267822265625e-05, + "step": 20858, + "training_step_time": 0.12007617950439453 + }, + { + "epoch": 3.182830810546875e-05, + "model_forward_time": 0.02412128448486328, + "step": 20859 + }, + { + "epoch": 3.182830810546875e-05, + "step": 20859, + "training_step_time": 0.10243535041809082 + }, + { + "epoch": 3.1829833984375e-05, + "grad_norm": 0.23636561632156372, + "learning_rate": 2.3301796666380898e-05, + "loss": 0.0111, + "step": 20860 + }, + { + "epoch": 3.1829833984375e-05, + "model_forward_time": 0.025049209594726562, + "step": 20860 + }, + { + "epoch": 3.1829833984375e-05, + "step": 20860, + "training_step_time": 0.10692572593688965 + }, + { + "epoch": 3.183135986328125e-05, + "model_forward_time": 0.025345325469970703, + "step": 20861 + }, + { + "epoch": 3.183135986328125e-05, + "step": 20861, + "training_step_time": 0.10640907287597656 + }, + { + "epoch": 3.18328857421875e-05, + "model_forward_time": 0.025022268295288086, + "step": 20862 + }, + { + "epoch": 3.18328857421875e-05, + "step": 20862, + "training_step_time": 0.10808515548706055 + }, + { + "epoch": 3.183441162109375e-05, + "model_forward_time": 0.025142192840576172, + "step": 20863 + }, + { + "epoch": 3.183441162109375e-05, + "step": 20863, + "training_step_time": 0.10917782783508301 + }, + { + "epoch": 3.18359375e-05, + "model_forward_time": 0.024912595748901367, + "step": 20864 + }, + { + "epoch": 3.18359375e-05, + "step": 20864, + "training_step_time": 0.10842394828796387 + }, + { + "epoch": 3.183746337890625e-05, + "model_forward_time": 0.02498030662536621, + "step": 20865 + }, + { + "epoch": 3.183746337890625e-05, + "step": 20865, + "training_step_time": 0.10465693473815918 + }, + { + "epoch": 3.18389892578125e-05, + "model_forward_time": 0.02520918846130371, + "step": 20866 + }, + { + "epoch": 3.18389892578125e-05, + "step": 20866, + "training_step_time": 0.10567617416381836 + }, + { + "epoch": 3.184051513671875e-05, + "model_forward_time": 0.02533578872680664, + "step": 20867 + }, + { + "epoch": 3.184051513671875e-05, + "step": 20867, + "training_step_time": 0.10598874092102051 + }, + { + "epoch": 3.1842041015625e-05, + "model_forward_time": 0.025208711624145508, + "step": 20868 + }, + { + "epoch": 3.1842041015625e-05, + "step": 20868, + "training_step_time": 0.10860109329223633 + }, + { + "epoch": 3.184356689453125e-05, + "model_forward_time": 0.02535724639892578, + "step": 20869 + }, + { + "epoch": 3.184356689453125e-05, + "step": 20869, + "training_step_time": 0.10575580596923828 + }, + { + "epoch": 3.18450927734375e-05, + "grad_norm": 0.22090311348438263, + "learning_rate": 2.3255212209996025e-05, + "loss": 0.0069, + "step": 20870 + }, + { + "epoch": 3.18450927734375e-05, + "model_forward_time": 0.024407148361206055, + "step": 20870 + }, + { + "epoch": 3.18450927734375e-05, + "step": 20870, + "training_step_time": 0.14859771728515625 + }, + { + "epoch": 3.184661865234375e-05, + "model_forward_time": 0.024658679962158203, + "step": 20871 + }, + { + "epoch": 3.184661865234375e-05, + "step": 20871, + "training_step_time": 0.16710686683654785 + }, + { + "epoch": 3.184814453125e-05, + "model_forward_time": 0.026210784912109375, + "step": 20872 + }, + { + "epoch": 3.184814453125e-05, + "step": 20872, + "training_step_time": 0.1180417537689209 + }, + { + "epoch": 3.184967041015625e-05, + "model_forward_time": 0.024649381637573242, + "step": 20873 + }, + { + "epoch": 3.184967041015625e-05, + "step": 20873, + "training_step_time": 0.11768913269042969 + }, + { + "epoch": 3.18511962890625e-05, + "model_forward_time": 0.025539875030517578, + "step": 20874 + }, + { + "epoch": 3.18511962890625e-05, + "step": 20874, + "training_step_time": 0.20099544525146484 + }, + { + "epoch": 3.185272216796875e-05, + "model_forward_time": 0.024906158447265625, + "step": 20875 + }, + { + "epoch": 3.185272216796875e-05, + "step": 20875, + "training_step_time": 0.16398024559020996 + }, + { + "epoch": 3.1854248046875e-05, + "model_forward_time": 0.0240478515625, + "step": 20876 + }, + { + "epoch": 3.1854248046875e-05, + "step": 20876, + "training_step_time": 0.19109416007995605 + }, + { + "epoch": 3.185577392578125e-05, + "model_forward_time": 0.023636579513549805, + "step": 20877 + }, + { + "epoch": 3.185577392578125e-05, + "step": 20877, + "training_step_time": 0.13818693161010742 + }, + { + "epoch": 3.18572998046875e-05, + "model_forward_time": 0.023825407028198242, + "step": 20878 + }, + { + "epoch": 3.18572998046875e-05, + "step": 20878, + "training_step_time": 0.11240768432617188 + }, + { + "epoch": 3.185882568359375e-05, + "model_forward_time": 0.024588823318481445, + "step": 20879 + }, + { + "epoch": 3.185882568359375e-05, + "step": 20879, + "training_step_time": 0.1066887378692627 + }, + { + "epoch": 3.18603515625e-05, + "grad_norm": 0.3050976097583771, + "learning_rate": 2.3208660251050158e-05, + "loss": 0.0111, + "step": 20880 + }, + { + "epoch": 3.18603515625e-05, + "model_forward_time": 0.024410724639892578, + "step": 20880 + }, + { + "epoch": 3.18603515625e-05, + "step": 20880, + "training_step_time": 0.1069645881652832 + }, + { + "epoch": 3.186187744140625e-05, + "model_forward_time": 0.024531841278076172, + "step": 20881 + }, + { + "epoch": 3.186187744140625e-05, + "step": 20881, + "training_step_time": 0.1074671745300293 + }, + { + "epoch": 3.18634033203125e-05, + "model_forward_time": 0.024474620819091797, + "step": 20882 + }, + { + "epoch": 3.18634033203125e-05, + "step": 20882, + "training_step_time": 0.1943802833557129 + }, + { + "epoch": 3.186492919921875e-05, + "model_forward_time": 0.023926973342895508, + "step": 20883 + }, + { + "epoch": 3.186492919921875e-05, + "step": 20883, + "training_step_time": 0.11274051666259766 + }, + { + "epoch": 3.1866455078125e-05, + "model_forward_time": 0.023557186126708984, + "step": 20884 + }, + { + "epoch": 3.1866455078125e-05, + "step": 20884, + "training_step_time": 0.11159205436706543 + }, + { + "epoch": 3.186798095703125e-05, + "model_forward_time": 0.02423262596130371, + "step": 20885 + }, + { + "epoch": 3.186798095703125e-05, + "step": 20885, + "training_step_time": 0.11328935623168945 + }, + { + "epoch": 3.18695068359375e-05, + "model_forward_time": 0.02410435676574707, + "step": 20886 + }, + { + "epoch": 3.18695068359375e-05, + "step": 20886, + "training_step_time": 0.11318278312683105 + }, + { + "epoch": 3.187103271484375e-05, + "model_forward_time": 0.024291515350341797, + "step": 20887 + }, + { + "epoch": 3.187103271484375e-05, + "step": 20887, + "training_step_time": 0.11020183563232422 + }, + { + "epoch": 3.187255859375e-05, + "model_forward_time": 0.02452254295349121, + "step": 20888 + }, + { + "epoch": 3.187255859375e-05, + "step": 20888, + "training_step_time": 0.10756850242614746 + }, + { + "epoch": 3.187408447265625e-05, + "model_forward_time": 0.023779630661010742, + "step": 20889 + }, + { + "epoch": 3.187408447265625e-05, + "step": 20889, + "training_step_time": 0.1083061695098877 + }, + { + "epoch": 3.18756103515625e-05, + "grad_norm": 0.2992817163467407, + "learning_rate": 2.3162140846108366e-05, + "loss": 0.0104, + "step": 20890 + }, + { + "epoch": 3.18756103515625e-05, + "model_forward_time": 0.024184465408325195, + "step": 20890 + }, + { + "epoch": 3.18756103515625e-05, + "step": 20890, + "training_step_time": 0.1102597713470459 + }, + { + "epoch": 3.187713623046875e-05, + "model_forward_time": 0.024748802185058594, + "step": 20891 + }, + { + "epoch": 3.187713623046875e-05, + "step": 20891, + "training_step_time": 0.11478614807128906 + }, + { + "epoch": 3.1878662109375e-05, + "model_forward_time": 0.024597644805908203, + "step": 20892 + }, + { + "epoch": 3.1878662109375e-05, + "step": 20892, + "training_step_time": 0.11287713050842285 + }, + { + "epoch": 3.188018798828125e-05, + "model_forward_time": 0.024138450622558594, + "step": 20893 + }, + { + "epoch": 3.188018798828125e-05, + "step": 20893, + "training_step_time": 0.11603927612304688 + }, + { + "epoch": 3.18817138671875e-05, + "model_forward_time": 0.024454832077026367, + "step": 20894 + }, + { + "epoch": 3.18817138671875e-05, + "step": 20894, + "training_step_time": 0.10783600807189941 + }, + { + "epoch": 3.188323974609375e-05, + "model_forward_time": 0.02420639991760254, + "step": 20895 + }, + { + "epoch": 3.188323974609375e-05, + "step": 20895, + "training_step_time": 0.11224961280822754 + }, + { + "epoch": 3.1884765625e-05, + "model_forward_time": 0.02467942237854004, + "step": 20896 + }, + { + "epoch": 3.1884765625e-05, + "step": 20896, + "training_step_time": 0.10582423210144043 + }, + { + "epoch": 3.188629150390625e-05, + "model_forward_time": 0.02448248863220215, + "step": 20897 + }, + { + "epoch": 3.188629150390625e-05, + "step": 20897, + "training_step_time": 0.10831928253173828 + }, + { + "epoch": 3.18878173828125e-05, + "model_forward_time": 0.02448272705078125, + "step": 20898 + }, + { + "epoch": 3.18878173828125e-05, + "step": 20898, + "training_step_time": 0.15099310874938965 + }, + { + "epoch": 3.188934326171875e-05, + "model_forward_time": 0.0241396427154541, + "step": 20899 + }, + { + "epoch": 3.188934326171875e-05, + "step": 20899, + "training_step_time": 0.11275005340576172 + }, + { + "epoch": 3.1890869140625e-05, + "grad_norm": 0.48044052720069885, + "learning_rate": 2.3115654051696095e-05, + "loss": 0.0097, + "step": 20900 + }, + { + "epoch": 3.1890869140625e-05, + "model_forward_time": 0.024339914321899414, + "step": 20900 + }, + { + "epoch": 3.1890869140625e-05, + "step": 20900, + "training_step_time": 0.2097020149230957 + }, + { + "epoch": 3.189239501953125e-05, + "model_forward_time": 0.02384018898010254, + "step": 20901 + }, + { + "epoch": 3.189239501953125e-05, + "step": 20901, + "training_step_time": 0.1087653636932373 + }, + { + "epoch": 3.18939208984375e-05, + "model_forward_time": 0.024075984954833984, + "step": 20902 + }, + { + "epoch": 3.18939208984375e-05, + "step": 20902, + "training_step_time": 0.1195986270904541 + }, + { + "epoch": 3.189544677734375e-05, + "model_forward_time": 0.02469944953918457, + "step": 20903 + }, + { + "epoch": 3.189544677734375e-05, + "step": 20903, + "training_step_time": 0.11021018028259277 + }, + { + "epoch": 3.189697265625e-05, + "model_forward_time": 0.024339914321899414, + "step": 20904 + }, + { + "epoch": 3.189697265625e-05, + "step": 20904, + "training_step_time": 0.11266660690307617 + }, + { + "epoch": 3.189849853515625e-05, + "model_forward_time": 0.024362802505493164, + "step": 20905 + }, + { + "epoch": 3.189849853515625e-05, + "step": 20905, + "training_step_time": 0.10747003555297852 + }, + { + "epoch": 3.19000244140625e-05, + "model_forward_time": 0.02451014518737793, + "step": 20906 + }, + { + "epoch": 3.19000244140625e-05, + "step": 20906, + "training_step_time": 0.10698914527893066 + }, + { + "epoch": 3.190155029296875e-05, + "model_forward_time": 0.024125337600708008, + "step": 20907 + }, + { + "epoch": 3.190155029296875e-05, + "step": 20907, + "training_step_time": 0.10592007637023926 + }, + { + "epoch": 3.1903076171875e-05, + "model_forward_time": 0.0242464542388916, + "step": 20908 + }, + { + "epoch": 3.1903076171875e-05, + "step": 20908, + "training_step_time": 0.11037421226501465 + }, + { + "epoch": 3.190460205078125e-05, + "model_forward_time": 0.024834632873535156, + "step": 20909 + }, + { + "epoch": 3.190460205078125e-05, + "step": 20909, + "training_step_time": 0.1159520149230957 + }, + { + "epoch": 3.19061279296875e-05, + "grad_norm": 0.33849474787712097, + "learning_rate": 2.3069199924299174e-05, + "loss": 0.0103, + "step": 20910 + }, + { + "epoch": 3.19061279296875e-05, + "model_forward_time": 0.026947021484375, + "step": 20910 + }, + { + "epoch": 3.19061279296875e-05, + "step": 20910, + "training_step_time": 0.1456005573272705 + }, + { + "epoch": 3.190765380859375e-05, + "model_forward_time": 0.02509140968322754, + "step": 20911 + }, + { + "epoch": 3.190765380859375e-05, + "step": 20911, + "training_step_time": 0.17772555351257324 + }, + { + "epoch": 3.19091796875e-05, + "model_forward_time": 0.02367234230041504, + "step": 20912 + }, + { + "epoch": 3.19091796875e-05, + "step": 20912, + "training_step_time": 0.15875506401062012 + }, + { + "epoch": 3.191070556640625e-05, + "model_forward_time": 0.023256540298461914, + "step": 20913 + }, + { + "epoch": 3.191070556640625e-05, + "step": 20913, + "training_step_time": 0.15701699256896973 + }, + { + "epoch": 3.19122314453125e-05, + "model_forward_time": 0.023736238479614258, + "step": 20914 + }, + { + "epoch": 3.19122314453125e-05, + "step": 20914, + "training_step_time": 0.1899254322052002 + }, + { + "epoch": 3.191375732421875e-05, + "model_forward_time": 0.02338862419128418, + "step": 20915 + }, + { + "epoch": 3.191375732421875e-05, + "step": 20915, + "training_step_time": 0.14279985427856445 + }, + { + "epoch": 3.1915283203125e-05, + "model_forward_time": 0.02374577522277832, + "step": 20916 + }, + { + "epoch": 3.1915283203125e-05, + "step": 20916, + "training_step_time": 0.21148443222045898 + }, + { + "epoch": 3.191680908203125e-05, + "model_forward_time": 0.02332019805908203, + "step": 20917 + }, + { + "epoch": 3.191680908203125e-05, + "step": 20917, + "training_step_time": 0.12008976936340332 + }, + { + "epoch": 3.19183349609375e-05, + "model_forward_time": 0.023340225219726562, + "step": 20918 + }, + { + "epoch": 3.19183349609375e-05, + "step": 20918, + "training_step_time": 0.17731785774230957 + }, + { + "epoch": 3.191986083984375e-05, + "model_forward_time": 0.02382206916809082, + "step": 20919 + }, + { + "epoch": 3.191986083984375e-05, + "step": 20919, + "training_step_time": 0.17373394966125488 + }, + { + "epoch": 3.192138671875e-05, + "grad_norm": 0.2071171998977661, + "learning_rate": 2.3022778520363753e-05, + "loss": 0.0086, + "step": 20920 + }, + { + "epoch": 3.192138671875e-05, + "model_forward_time": 0.023595094680786133, + "step": 20920 + }, + { + "epoch": 3.192138671875e-05, + "step": 20920, + "training_step_time": 0.18980860710144043 + }, + { + "epoch": 3.192291259765625e-05, + "model_forward_time": 0.023751258850097656, + "step": 20921 + }, + { + "epoch": 3.192291259765625e-05, + "step": 20921, + "training_step_time": 0.14075398445129395 + }, + { + "epoch": 3.19244384765625e-05, + "model_forward_time": 0.023923873901367188, + "step": 20922 + }, + { + "epoch": 3.19244384765625e-05, + "step": 20922, + "training_step_time": 0.10462522506713867 + }, + { + "epoch": 3.192596435546875e-05, + "model_forward_time": 0.024194955825805664, + "step": 20923 + }, + { + "epoch": 3.192596435546875e-05, + "step": 20923, + "training_step_time": 0.11624288558959961 + }, + { + "epoch": 3.1927490234375e-05, + "model_forward_time": 0.024443626403808594, + "step": 20924 + }, + { + "epoch": 3.1927490234375e-05, + "step": 20924, + "training_step_time": 0.10983943939208984 + }, + { + "epoch": 3.192901611328125e-05, + "model_forward_time": 0.024003028869628906, + "step": 20925 + }, + { + "epoch": 3.192901611328125e-05, + "step": 20925, + "training_step_time": 0.10519266128540039 + }, + { + "epoch": 3.19305419921875e-05, + "model_forward_time": 0.024275779724121094, + "step": 20926 + }, + { + "epoch": 3.19305419921875e-05, + "step": 20926, + "training_step_time": 0.1604933738708496 + }, + { + "epoch": 3.193206787109375e-05, + "model_forward_time": 0.024112939834594727, + "step": 20927 + }, + { + "epoch": 3.193206787109375e-05, + "step": 20927, + "training_step_time": 0.11036133766174316 + }, + { + "epoch": 3.193359375e-05, + "model_forward_time": 0.023923873901367188, + "step": 20928 + }, + { + "epoch": 3.193359375e-05, + "step": 20928, + "training_step_time": 0.11069965362548828 + }, + { + "epoch": 3.193511962890625e-05, + "model_forward_time": 0.024476051330566406, + "step": 20929 + }, + { + "epoch": 3.193511962890625e-05, + "step": 20929, + "training_step_time": 0.1078493595123291 + }, + { + "epoch": 3.19366455078125e-05, + "grad_norm": 0.18889905512332916, + "learning_rate": 2.2976389896296203e-05, + "loss": 0.0048, + "step": 20930 + }, + { + "epoch": 3.19366455078125e-05, + "model_forward_time": 0.0243380069732666, + "step": 20930 + }, + { + "epoch": 3.19366455078125e-05, + "step": 20930, + "training_step_time": 0.10697484016418457 + }, + { + "epoch": 3.193817138671875e-05, + "model_forward_time": 0.02434515953063965, + "step": 20931 + }, + { + "epoch": 3.193817138671875e-05, + "step": 20931, + "training_step_time": 0.10756659507751465 + }, + { + "epoch": 3.1939697265625e-05, + "model_forward_time": 0.024018526077270508, + "step": 20932 + }, + { + "epoch": 3.1939697265625e-05, + "step": 20932, + "training_step_time": 0.10716009140014648 + }, + { + "epoch": 3.194122314453125e-05, + "model_forward_time": 0.024338960647583008, + "step": 20933 + }, + { + "epoch": 3.194122314453125e-05, + "step": 20933, + "training_step_time": 0.1056208610534668 + }, + { + "epoch": 3.19427490234375e-05, + "model_forward_time": 0.02461075782775879, + "step": 20934 + }, + { + "epoch": 3.19427490234375e-05, + "step": 20934, + "training_step_time": 0.1045067310333252 + }, + { + "epoch": 3.194427490234375e-05, + "model_forward_time": 0.024304866790771484, + "step": 20935 + }, + { + "epoch": 3.194427490234375e-05, + "step": 20935, + "training_step_time": 0.1055455207824707 + }, + { + "epoch": 3.194580078125e-05, + "model_forward_time": 0.024506807327270508, + "step": 20936 + }, + { + "epoch": 3.194580078125e-05, + "step": 20936, + "training_step_time": 0.10832381248474121 + }, + { + "epoch": 3.194732666015625e-05, + "model_forward_time": 0.02446126937866211, + "step": 20937 + }, + { + "epoch": 3.194732666015625e-05, + "step": 20937, + "training_step_time": 0.1055595874786377 + }, + { + "epoch": 3.19488525390625e-05, + "model_forward_time": 0.02409672737121582, + "step": 20938 + }, + { + "epoch": 3.19488525390625e-05, + "step": 20938, + "training_step_time": 0.10618400573730469 + }, + { + "epoch": 3.195037841796875e-05, + "model_forward_time": 0.024507761001586914, + "step": 20939 + }, + { + "epoch": 3.195037841796875e-05, + "step": 20939, + "training_step_time": 0.10516548156738281 + }, + { + "epoch": 3.1951904296875e-05, + "grad_norm": 0.16985678672790527, + "learning_rate": 2.29300341084631e-05, + "loss": 0.0064, + "step": 20940 + }, + { + "epoch": 3.1951904296875e-05, + "model_forward_time": 0.024466991424560547, + "step": 20940 + }, + { + "epoch": 3.1951904296875e-05, + "step": 20940, + "training_step_time": 0.10623669624328613 + }, + { + "epoch": 3.195343017578125e-05, + "model_forward_time": 0.025005340576171875, + "step": 20941 + }, + { + "epoch": 3.195343017578125e-05, + "step": 20941, + "training_step_time": 0.10666084289550781 + }, + { + "epoch": 3.19549560546875e-05, + "model_forward_time": 0.024379491806030273, + "step": 20942 + }, + { + "epoch": 3.19549560546875e-05, + "step": 20942, + "training_step_time": 0.19836187362670898 + }, + { + "epoch": 3.195648193359375e-05, + "model_forward_time": 0.02278614044189453, + "step": 20943 + }, + { + "epoch": 3.195648193359375e-05, + "step": 20943, + "training_step_time": 0.1080026626586914 + }, + { + "epoch": 3.19580078125e-05, + "model_forward_time": 0.023691654205322266, + "step": 20944 + }, + { + "epoch": 3.19580078125e-05, + "step": 20944, + "training_step_time": 0.16800737380981445 + }, + { + "epoch": 3.195953369140625e-05, + "model_forward_time": 0.024271249771118164, + "step": 20945 + }, + { + "epoch": 3.195953369140625e-05, + "step": 20945, + "training_step_time": 0.12114119529724121 + }, + { + "epoch": 3.19610595703125e-05, + "model_forward_time": 0.023670673370361328, + "step": 20946 + }, + { + "epoch": 3.19610595703125e-05, + "step": 20946, + "training_step_time": 0.10887718200683594 + }, + { + "epoch": 3.196258544921875e-05, + "model_forward_time": 0.02483820915222168, + "step": 20947 + }, + { + "epoch": 3.196258544921875e-05, + "step": 20947, + "training_step_time": 0.12133979797363281 + }, + { + "epoch": 3.1964111328125e-05, + "model_forward_time": 0.024420976638793945, + "step": 20948 + }, + { + "epoch": 3.1964111328125e-05, + "step": 20948, + "training_step_time": 0.12640786170959473 + }, + { + "epoch": 3.196563720703125e-05, + "model_forward_time": 0.024159908294677734, + "step": 20949 + }, + { + "epoch": 3.196563720703125e-05, + "step": 20949, + "training_step_time": 0.10400915145874023 + }, + { + "epoch": 3.19671630859375e-05, + "grad_norm": 0.2694411873817444, + "learning_rate": 2.288371121319109e-05, + "loss": 0.0098, + "step": 20950 + }, + { + "epoch": 3.19671630859375e-05, + "model_forward_time": 0.024370193481445312, + "step": 20950 + }, + { + "epoch": 3.19671630859375e-05, + "step": 20950, + "training_step_time": 0.10578036308288574 + }, + { + "epoch": 3.196868896484375e-05, + "model_forward_time": 0.02436995506286621, + "step": 20951 + }, + { + "epoch": 3.196868896484375e-05, + "step": 20951, + "training_step_time": 0.10386824607849121 + }, + { + "epoch": 3.197021484375e-05, + "model_forward_time": 0.02568984031677246, + "step": 20952 + }, + { + "epoch": 3.197021484375e-05, + "step": 20952, + "training_step_time": 0.10805225372314453 + }, + { + "epoch": 3.197174072265625e-05, + "model_forward_time": 0.02538466453552246, + "step": 20953 + }, + { + "epoch": 3.197174072265625e-05, + "step": 20953, + "training_step_time": 0.1058800220489502 + }, + { + "epoch": 3.19732666015625e-05, + "model_forward_time": 0.024991989135742188, + "step": 20954 + }, + { + "epoch": 3.19732666015625e-05, + "step": 20954, + "training_step_time": 0.10443902015686035 + }, + { + "epoch": 3.197479248046875e-05, + "model_forward_time": 0.0249178409576416, + "step": 20955 + }, + { + "epoch": 3.197479248046875e-05, + "step": 20955, + "training_step_time": 0.10324454307556152 + }, + { + "epoch": 3.1976318359375e-05, + "model_forward_time": 0.025630712509155273, + "step": 20956 + }, + { + "epoch": 3.1976318359375e-05, + "step": 20956, + "training_step_time": 0.10499811172485352 + }, + { + "epoch": 3.197784423828125e-05, + "model_forward_time": 0.02494072914123535, + "step": 20957 + }, + { + "epoch": 3.197784423828125e-05, + "step": 20957, + "training_step_time": 0.10555219650268555 + }, + { + "epoch": 3.19793701171875e-05, + "model_forward_time": 0.025252103805541992, + "step": 20958 + }, + { + "epoch": 3.19793701171875e-05, + "step": 20958, + "training_step_time": 0.10450410842895508 + }, + { + "epoch": 3.198089599609375e-05, + "model_forward_time": 0.024678945541381836, + "step": 20959 + }, + { + "epoch": 3.198089599609375e-05, + "step": 20959, + "training_step_time": 0.10578131675720215 + }, + { + "epoch": 3.1982421875e-05, + "grad_norm": 0.1979796290397644, + "learning_rate": 2.2837421266766857e-05, + "loss": 0.0085, + "step": 20960 + }, + { + "epoch": 3.1982421875e-05, + "model_forward_time": 0.02428460121154785, + "step": 20960 + }, + { + "epoch": 3.1982421875e-05, + "step": 20960, + "training_step_time": 0.14855360984802246 + }, + { + "epoch": 3.198394775390625e-05, + "model_forward_time": 0.02537226676940918, + "step": 20961 + }, + { + "epoch": 3.198394775390625e-05, + "step": 20961, + "training_step_time": 0.15253806114196777 + }, + { + "epoch": 3.19854736328125e-05, + "model_forward_time": 0.02459263801574707, + "step": 20962 + }, + { + "epoch": 3.19854736328125e-05, + "step": 20962, + "training_step_time": 0.1141505241394043 + }, + { + "epoch": 3.198699951171875e-05, + "model_forward_time": 0.024610042572021484, + "step": 20963 + }, + { + "epoch": 3.198699951171875e-05, + "step": 20963, + "training_step_time": 0.12652587890625 + }, + { + "epoch": 3.1988525390625e-05, + "model_forward_time": 0.024911880493164062, + "step": 20964 + }, + { + "epoch": 3.1988525390625e-05, + "step": 20964, + "training_step_time": 0.16631388664245605 + }, + { + "epoch": 3.199005126953125e-05, + "model_forward_time": 0.024394989013671875, + "step": 20965 + }, + { + "epoch": 3.199005126953125e-05, + "step": 20965, + "training_step_time": 0.16535353660583496 + }, + { + "epoch": 3.19915771484375e-05, + "model_forward_time": 0.023775100708007812, + "step": 20966 + }, + { + "epoch": 3.19915771484375e-05, + "step": 20966, + "training_step_time": 0.13075518608093262 + }, + { + "epoch": 3.199310302734375e-05, + "model_forward_time": 0.024721384048461914, + "step": 20967 + }, + { + "epoch": 3.199310302734375e-05, + "step": 20967, + "training_step_time": 0.11049699783325195 + }, + { + "epoch": 3.199462890625e-05, + "model_forward_time": 0.02506113052368164, + "step": 20968 + }, + { + "epoch": 3.199462890625e-05, + "step": 20968, + "training_step_time": 0.1205441951751709 + }, + { + "epoch": 3.199615478515625e-05, + "model_forward_time": 0.025595664978027344, + "step": 20969 + }, + { + "epoch": 3.199615478515625e-05, + "step": 20969, + "training_step_time": 0.10684013366699219 + }, + { + "epoch": 3.19976806640625e-05, + "grad_norm": 0.1606241762638092, + "learning_rate": 2.279116432543705e-05, + "loss": 0.0058, + "step": 20970 + }, + { + "epoch": 3.19976806640625e-05, + "model_forward_time": 0.025014400482177734, + "step": 20970 + }, + { + "epoch": 3.19976806640625e-05, + "step": 20970, + "training_step_time": 0.10595464706420898 + }, + { + "epoch": 3.199920654296875e-05, + "model_forward_time": 0.025043249130249023, + "step": 20971 + }, + { + "epoch": 3.199920654296875e-05, + "step": 20971, + "training_step_time": 0.11782336235046387 + }, + { + "epoch": 3.2000732421875e-05, + "model_forward_time": 0.025203943252563477, + "step": 20972 + }, + { + "epoch": 3.2000732421875e-05, + "step": 20972, + "training_step_time": 0.10997772216796875 + }, + { + "epoch": 3.200225830078125e-05, + "model_forward_time": 0.02482891082763672, + "step": 20973 + }, + { + "epoch": 3.200225830078125e-05, + "step": 20973, + "training_step_time": 0.18892621994018555 + }, + { + "epoch": 3.20037841796875e-05, + "model_forward_time": 0.024064302444458008, + "step": 20974 + }, + { + "epoch": 3.20037841796875e-05, + "step": 20974, + "training_step_time": 0.10538268089294434 + }, + { + "epoch": 3.200531005859375e-05, + "model_forward_time": 0.024271488189697266, + "step": 20975 + }, + { + "epoch": 3.200531005859375e-05, + "step": 20975, + "training_step_time": 0.10281777381896973 + }, + { + "epoch": 3.20068359375e-05, + "model_forward_time": 0.02514481544494629, + "step": 20976 + }, + { + "epoch": 3.20068359375e-05, + "step": 20976, + "training_step_time": 0.10588502883911133 + }, + { + "epoch": 3.200836181640625e-05, + "model_forward_time": 0.024989604949951172, + "step": 20977 + }, + { + "epoch": 3.200836181640625e-05, + "step": 20977, + "training_step_time": 0.18580889701843262 + }, + { + "epoch": 3.20098876953125e-05, + "model_forward_time": 0.024666309356689453, + "step": 20978 + }, + { + "epoch": 3.20098876953125e-05, + "step": 20978, + "training_step_time": 0.20614242553710938 + }, + { + "epoch": 3.201141357421875e-05, + "model_forward_time": 0.024148225784301758, + "step": 20979 + }, + { + "epoch": 3.201141357421875e-05, + "step": 20979, + "training_step_time": 0.20559000968933105 + }, + { + "epoch": 3.2012939453125e-05, + "grad_norm": 0.11838044226169586, + "learning_rate": 2.2744940445408202e-05, + "loss": 0.0042, + "step": 20980 + }, + { + "epoch": 3.2012939453125e-05, + "model_forward_time": 0.024283409118652344, + "step": 20980 + }, + { + "epoch": 3.2012939453125e-05, + "step": 20980, + "training_step_time": 0.193495512008667 + }, + { + "epoch": 3.201446533203125e-05, + "model_forward_time": 0.024239301681518555, + "step": 20981 + }, + { + "epoch": 3.201446533203125e-05, + "step": 20981, + "training_step_time": 0.18219327926635742 + }, + { + "epoch": 3.20159912109375e-05, + "model_forward_time": 0.024628639221191406, + "step": 20982 + }, + { + "epoch": 3.20159912109375e-05, + "step": 20982, + "training_step_time": 0.17626619338989258 + }, + { + "epoch": 3.201751708984375e-05, + "model_forward_time": 0.024436235427856445, + "step": 20983 + }, + { + "epoch": 3.201751708984375e-05, + "step": 20983, + "training_step_time": 0.16077065467834473 + }, + { + "epoch": 3.201904296875e-05, + "model_forward_time": 0.024114370346069336, + "step": 20984 + }, + { + "epoch": 3.201904296875e-05, + "step": 20984, + "training_step_time": 0.11462759971618652 + }, + { + "epoch": 3.202056884765625e-05, + "model_forward_time": 0.024523019790649414, + "step": 20985 + }, + { + "epoch": 3.202056884765625e-05, + "step": 20985, + "training_step_time": 0.11400294303894043 + }, + { + "epoch": 3.20220947265625e-05, + "model_forward_time": 0.02523493766784668, + "step": 20986 + }, + { + "epoch": 3.20220947265625e-05, + "step": 20986, + "training_step_time": 0.12998533248901367 + }, + { + "epoch": 3.202362060546875e-05, + "model_forward_time": 0.025480270385742188, + "step": 20987 + }, + { + "epoch": 3.202362060546875e-05, + "step": 20987, + "training_step_time": 0.12911486625671387 + }, + { + "epoch": 3.2025146484375e-05, + "model_forward_time": 0.024914026260375977, + "step": 20988 + }, + { + "epoch": 3.2025146484375e-05, + "step": 20988, + "training_step_time": 0.10532855987548828 + }, + { + "epoch": 3.202667236328125e-05, + "model_forward_time": 0.02524280548095703, + "step": 20989 + }, + { + "epoch": 3.202667236328125e-05, + "step": 20989, + "training_step_time": 0.14786529541015625 + }, + { + "epoch": 3.20281982421875e-05, + "grad_norm": 0.1383422315120697, + "learning_rate": 2.2698749682846687e-05, + "loss": 0.0046, + "step": 20990 + }, + { + "epoch": 3.20281982421875e-05, + "model_forward_time": 0.024940967559814453, + "step": 20990 + }, + { + "epoch": 3.20281982421875e-05, + "step": 20990, + "training_step_time": 0.12484574317932129 + }, + { + "epoch": 3.202972412109375e-05, + "model_forward_time": 0.024914264678955078, + "step": 20991 + }, + { + "epoch": 3.202972412109375e-05, + "step": 20991, + "training_step_time": 0.12060928344726562 + }, + { + "epoch": 3.203125e-05, + "model_forward_time": 0.025206327438354492, + "step": 20992 + }, + { + "epoch": 3.203125e-05, + "step": 20992, + "training_step_time": 0.10564804077148438 + }, + { + "epoch": 3.203277587890625e-05, + "model_forward_time": 0.025262832641601562, + "step": 20993 + }, + { + "epoch": 3.203277587890625e-05, + "step": 20993, + "training_step_time": 0.10614347457885742 + }, + { + "epoch": 3.20343017578125e-05, + "model_forward_time": 0.02471613883972168, + "step": 20994 + }, + { + "epoch": 3.20343017578125e-05, + "step": 20994, + "training_step_time": 0.10723352432250977 + }, + { + "epoch": 3.203582763671875e-05, + "model_forward_time": 0.025940656661987305, + "step": 20995 + }, + { + "epoch": 3.203582763671875e-05, + "step": 20995, + "training_step_time": 0.10569024085998535 + }, + { + "epoch": 3.2037353515625e-05, + "model_forward_time": 0.025329113006591797, + "step": 20996 + }, + { + "epoch": 3.2037353515625e-05, + "step": 20996, + "training_step_time": 0.10524249076843262 + }, + { + "epoch": 3.203887939453125e-05, + "model_forward_time": 0.02527451515197754, + "step": 20997 + }, + { + "epoch": 3.203887939453125e-05, + "step": 20997, + "training_step_time": 0.1073911190032959 + }, + { + "epoch": 3.20404052734375e-05, + "model_forward_time": 0.024895429611206055, + "step": 20998 + }, + { + "epoch": 3.20404052734375e-05, + "step": 20998, + "training_step_time": 0.10617876052856445 + }, + { + "epoch": 3.204193115234375e-05, + "model_forward_time": 0.024880170822143555, + "step": 20999 + }, + { + "epoch": 3.204193115234375e-05, + "step": 20999, + "training_step_time": 0.10453915596008301 + }, + { + "epoch": 3.204345703125e-05, + "grad_norm": 0.12018714845180511, + "learning_rate": 2.2652592093878666e-05, + "loss": 0.0082, + "step": 21000 + }, + { + "epoch": 3.204345703125e-05, + "model_forward_time": 0.02322530746459961, + "step": 21000 + }, + { + "epoch": 3.204345703125e-05, + "step": 21000, + "training_step_time": 0.09423136711120605 + }, + { + "epoch": 3.204498291015625e-05, + "model_forward_time": 0.023467063903808594, + "step": 21001 + }, + { + "epoch": 3.204498291015625e-05, + "step": 21001, + "training_step_time": 0.0988612174987793 + }, + { + "epoch": 3.20465087890625e-05, + "model_forward_time": 0.024544477462768555, + "step": 21002 + }, + { + "epoch": 3.20465087890625e-05, + "step": 21002, + "training_step_time": 0.1031191349029541 + }, + { + "epoch": 3.204803466796875e-05, + "model_forward_time": 0.024768829345703125, + "step": 21003 + }, + { + "epoch": 3.204803466796875e-05, + "step": 21003, + "training_step_time": 0.10243654251098633 + }, + { + "epoch": 3.2049560546875e-05, + "model_forward_time": 0.02525782585144043, + "step": 21004 + }, + { + "epoch": 3.2049560546875e-05, + "step": 21004, + "training_step_time": 0.10604310035705566 + }, + { + "epoch": 3.205108642578125e-05, + "model_forward_time": 0.025297164916992188, + "step": 21005 + }, + { + "epoch": 3.205108642578125e-05, + "step": 21005, + "training_step_time": 0.10829520225524902 + }, + { + "epoch": 3.20526123046875e-05, + "model_forward_time": 0.02553415298461914, + "step": 21006 + }, + { + "epoch": 3.20526123046875e-05, + "step": 21006, + "training_step_time": 0.10339927673339844 + }, + { + "epoch": 3.205413818359375e-05, + "model_forward_time": 0.024680614471435547, + "step": 21007 + }, + { + "epoch": 3.205413818359375e-05, + "step": 21007, + "training_step_time": 0.150040864944458 + }, + { + "epoch": 3.20556640625e-05, + "model_forward_time": 0.024448156356811523, + "step": 21008 + }, + { + "epoch": 3.20556640625e-05, + "step": 21008, + "training_step_time": 0.15775251388549805 + }, + { + "epoch": 3.205718994140625e-05, + "model_forward_time": 0.024425268173217773, + "step": 21009 + }, + { + "epoch": 3.205718994140625e-05, + "step": 21009, + "training_step_time": 0.1583404541015625 + }, + { + "epoch": 3.20587158203125e-05, + "grad_norm": 0.25087517499923706, + "learning_rate": 2.2606467734589924e-05, + "loss": 0.0118, + "step": 21010 + }, + { + "epoch": 3.20587158203125e-05, + "model_forward_time": 0.024266481399536133, + "step": 21010 + }, + { + "epoch": 3.20587158203125e-05, + "step": 21010, + "training_step_time": 0.18371105194091797 + }, + { + "epoch": 3.206024169921875e-05, + "model_forward_time": 0.0244903564453125, + "step": 21011 + }, + { + "epoch": 3.206024169921875e-05, + "step": 21011, + "training_step_time": 0.17206120491027832 + }, + { + "epoch": 3.2061767578125e-05, + "model_forward_time": 0.024683713912963867, + "step": 21012 + }, + { + "epoch": 3.2061767578125e-05, + "step": 21012, + "training_step_time": 0.22068309783935547 + }, + { + "epoch": 3.206329345703125e-05, + "model_forward_time": 0.024411439895629883, + "step": 21013 + }, + { + "epoch": 3.206329345703125e-05, + "step": 21013, + "training_step_time": 0.14562129974365234 + }, + { + "epoch": 3.20648193359375e-05, + "model_forward_time": 0.024422168731689453, + "step": 21014 + }, + { + "epoch": 3.20648193359375e-05, + "step": 21014, + "training_step_time": 0.19055461883544922 + }, + { + "epoch": 3.206634521484375e-05, + "model_forward_time": 0.024055004119873047, + "step": 21015 + }, + { + "epoch": 3.206634521484375e-05, + "step": 21015, + "training_step_time": 0.10859084129333496 + }, + { + "epoch": 3.206787109375e-05, + "model_forward_time": 0.026369571685791016, + "step": 21016 + }, + { + "epoch": 3.206787109375e-05, + "step": 21016, + "training_step_time": 0.19722390174865723 + }, + { + "epoch": 3.206939697265625e-05, + "model_forward_time": 0.024967670440673828, + "step": 21017 + }, + { + "epoch": 3.206939697265625e-05, + "step": 21017, + "training_step_time": 0.1064293384552002 + }, + { + "epoch": 3.20709228515625e-05, + "model_forward_time": 0.02445220947265625, + "step": 21018 + }, + { + "epoch": 3.20709228515625e-05, + "step": 21018, + "training_step_time": 0.10428047180175781 + }, + { + "epoch": 3.207244873046875e-05, + "model_forward_time": 0.02541661262512207, + "step": 21019 + }, + { + "epoch": 3.207244873046875e-05, + "step": 21019, + "training_step_time": 0.10465860366821289 + }, + { + "epoch": 3.2073974609375e-05, + "grad_norm": 0.19894975423812866, + "learning_rate": 2.2560376661025945e-05, + "loss": 0.0068, + "step": 21020 + }, + { + "epoch": 3.2073974609375e-05, + "model_forward_time": 0.025252342224121094, + "step": 21020 + }, + { + "epoch": 3.2073974609375e-05, + "step": 21020, + "training_step_time": 0.11088037490844727 + }, + { + "epoch": 3.207550048828125e-05, + "model_forward_time": 0.02655339241027832, + "step": 21021 + }, + { + "epoch": 3.207550048828125e-05, + "step": 21021, + "training_step_time": 0.10682010650634766 + }, + { + "epoch": 3.20770263671875e-05, + "model_forward_time": 0.025124073028564453, + "step": 21022 + }, + { + "epoch": 3.20770263671875e-05, + "step": 21022, + "training_step_time": 0.1076803207397461 + }, + { + "epoch": 3.207855224609375e-05, + "model_forward_time": 0.025011539459228516, + "step": 21023 + }, + { + "epoch": 3.207855224609375e-05, + "step": 21023, + "training_step_time": 0.10767865180969238 + }, + { + "epoch": 3.2080078125e-05, + "model_forward_time": 0.025256633758544922, + "step": 21024 + }, + { + "epoch": 3.2080078125e-05, + "step": 21024, + "training_step_time": 0.10580277442932129 + }, + { + "epoch": 3.208160400390625e-05, + "model_forward_time": 0.02512359619140625, + "step": 21025 + }, + { + "epoch": 3.208160400390625e-05, + "step": 21025, + "training_step_time": 0.10624265670776367 + }, + { + "epoch": 3.20831298828125e-05, + "model_forward_time": 0.025471210479736328, + "step": 21026 + }, + { + "epoch": 3.20831298828125e-05, + "step": 21026, + "training_step_time": 0.10708022117614746 + }, + { + "epoch": 3.208465576171875e-05, + "model_forward_time": 0.025400161743164062, + "step": 21027 + }, + { + "epoch": 3.208465576171875e-05, + "step": 21027, + "training_step_time": 0.1092379093170166 + }, + { + "epoch": 3.2086181640625e-05, + "model_forward_time": 0.025461435317993164, + "step": 21028 + }, + { + "epoch": 3.2086181640625e-05, + "step": 21028, + "training_step_time": 0.1041111946105957 + }, + { + "epoch": 3.208770751953125e-05, + "model_forward_time": 0.026166200637817383, + "step": 21029 + }, + { + "epoch": 3.208770751953125e-05, + "step": 21029, + "training_step_time": 0.10735678672790527 + }, + { + "epoch": 3.20892333984375e-05, + "grad_norm": 0.12291596829891205, + "learning_rate": 2.251431892919171e-05, + "loss": 0.0102, + "step": 21030 + }, + { + "epoch": 3.20892333984375e-05, + "model_forward_time": 0.02539205551147461, + "step": 21030 + }, + { + "epoch": 3.20892333984375e-05, + "step": 21030, + "training_step_time": 0.13961172103881836 + }, + { + "epoch": 3.209075927734375e-05, + "model_forward_time": 0.025295257568359375, + "step": 21031 + }, + { + "epoch": 3.209075927734375e-05, + "step": 21031, + "training_step_time": 0.15492606163024902 + }, + { + "epoch": 3.209228515625e-05, + "model_forward_time": 0.024661779403686523, + "step": 21032 + }, + { + "epoch": 3.209228515625e-05, + "step": 21032, + "training_step_time": 0.1365213394165039 + }, + { + "epoch": 3.209381103515625e-05, + "model_forward_time": 0.025146484375, + "step": 21033 + }, + { + "epoch": 3.209381103515625e-05, + "step": 21033, + "training_step_time": 0.15526485443115234 + }, + { + "epoch": 3.20953369140625e-05, + "model_forward_time": 0.024460554122924805, + "step": 21034 + }, + { + "epoch": 3.20953369140625e-05, + "step": 21034, + "training_step_time": 0.1095433235168457 + }, + { + "epoch": 3.209686279296875e-05, + "model_forward_time": 0.02466750144958496, + "step": 21035 + }, + { + "epoch": 3.209686279296875e-05, + "step": 21035, + "training_step_time": 0.12091970443725586 + }, + { + "epoch": 3.2098388671875e-05, + "model_forward_time": 0.02536463737487793, + "step": 21036 + }, + { + "epoch": 3.2098388671875e-05, + "step": 21036, + "training_step_time": 0.11558914184570312 + }, + { + "epoch": 3.209991455078125e-05, + "model_forward_time": 0.025399208068847656, + "step": 21037 + }, + { + "epoch": 3.209991455078125e-05, + "step": 21037, + "training_step_time": 0.1633157730102539 + }, + { + "epoch": 3.21014404296875e-05, + "model_forward_time": 0.02440190315246582, + "step": 21038 + }, + { + "epoch": 3.21014404296875e-05, + "step": 21038, + "training_step_time": 0.10747718811035156 + }, + { + "epoch": 3.210296630859375e-05, + "model_forward_time": 0.02467036247253418, + "step": 21039 + }, + { + "epoch": 3.210296630859375e-05, + "step": 21039, + "training_step_time": 0.1133279800415039 + }, + { + "epoch": 3.21044921875e-05, + "grad_norm": 0.15215924382209778, + "learning_rate": 2.2468294595051687e-05, + "loss": 0.0081, + "step": 21040 + }, + { + "epoch": 3.21044921875e-05, + "model_forward_time": 0.02494072914123535, + "step": 21040 + }, + { + "epoch": 3.21044921875e-05, + "step": 21040, + "training_step_time": 0.10536432266235352 + }, + { + "epoch": 3.210601806640625e-05, + "model_forward_time": 0.02517080307006836, + "step": 21041 + }, + { + "epoch": 3.210601806640625e-05, + "step": 21041, + "training_step_time": 0.10616922378540039 + }, + { + "epoch": 3.21075439453125e-05, + "model_forward_time": 0.02533745765686035, + "step": 21042 + }, + { + "epoch": 3.21075439453125e-05, + "step": 21042, + "training_step_time": 0.10864853858947754 + }, + { + "epoch": 3.210906982421875e-05, + "model_forward_time": 0.025047779083251953, + "step": 21043 + }, + { + "epoch": 3.210906982421875e-05, + "step": 21043, + "training_step_time": 0.1063694953918457 + }, + { + "epoch": 3.2110595703125e-05, + "model_forward_time": 0.025476932525634766, + "step": 21044 + }, + { + "epoch": 3.2110595703125e-05, + "step": 21044, + "training_step_time": 0.10620427131652832 + }, + { + "epoch": 3.211212158203125e-05, + "model_forward_time": 0.025572538375854492, + "step": 21045 + }, + { + "epoch": 3.211212158203125e-05, + "step": 21045, + "training_step_time": 0.10952425003051758 + }, + { + "epoch": 3.21136474609375e-05, + "model_forward_time": 0.025313854217529297, + "step": 21046 + }, + { + "epoch": 3.21136474609375e-05, + "step": 21046, + "training_step_time": 0.10838055610656738 + }, + { + "epoch": 3.211517333984375e-05, + "model_forward_time": 0.0254364013671875, + "step": 21047 + }, + { + "epoch": 3.211517333984375e-05, + "step": 21047, + "training_step_time": 0.10993003845214844 + }, + { + "epoch": 3.211669921875e-05, + "model_forward_time": 0.02524876594543457, + "step": 21048 + }, + { + "epoch": 3.211669921875e-05, + "step": 21048, + "training_step_time": 0.12034153938293457 + }, + { + "epoch": 3.211822509765625e-05, + "model_forward_time": 0.025159120559692383, + "step": 21049 + }, + { + "epoch": 3.211822509765625e-05, + "step": 21049, + "training_step_time": 0.14071202278137207 + }, + { + "epoch": 3.21197509765625e-05, + "grad_norm": 0.2919601798057556, + "learning_rate": 2.242230371452982e-05, + "loss": 0.0086, + "step": 21050 + }, + { + "epoch": 3.21197509765625e-05, + "model_forward_time": 0.025293827056884766, + "step": 21050 + }, + { + "epoch": 3.21197509765625e-05, + "step": 21050, + "training_step_time": 0.1121816635131836 + }, + { + "epoch": 3.212127685546875e-05, + "model_forward_time": 0.024393320083618164, + "step": 21051 + }, + { + "epoch": 3.212127685546875e-05, + "step": 21051, + "training_step_time": 0.1553206443786621 + }, + { + "epoch": 3.2122802734375e-05, + "model_forward_time": 0.024788618087768555, + "step": 21052 + }, + { + "epoch": 3.2122802734375e-05, + "step": 21052, + "training_step_time": 0.1535799503326416 + }, + { + "epoch": 3.212432861328125e-05, + "model_forward_time": 0.024834394454956055, + "step": 21053 + }, + { + "epoch": 3.212432861328125e-05, + "step": 21053, + "training_step_time": 0.18761992454528809 + }, + { + "epoch": 3.21258544921875e-05, + "model_forward_time": 0.024721622467041016, + "step": 21054 + }, + { + "epoch": 3.21258544921875e-05, + "step": 21054, + "training_step_time": 0.15810585021972656 + }, + { + "epoch": 3.212738037109375e-05, + "model_forward_time": 0.027066469192504883, + "step": 21055 + }, + { + "epoch": 3.212738037109375e-05, + "step": 21055, + "training_step_time": 0.19086170196533203 + }, + { + "epoch": 3.212890625e-05, + "model_forward_time": 0.024515628814697266, + "step": 21056 + }, + { + "epoch": 3.212890625e-05, + "step": 21056, + "training_step_time": 0.19450640678405762 + }, + { + "epoch": 3.213043212890625e-05, + "model_forward_time": 0.02426886558532715, + "step": 21057 + }, + { + "epoch": 3.213043212890625e-05, + "step": 21057, + "training_step_time": 0.1704401969909668 + }, + { + "epoch": 3.21319580078125e-05, + "model_forward_time": 0.023818254470825195, + "step": 21058 + }, + { + "epoch": 3.21319580078125e-05, + "step": 21058, + "training_step_time": 0.18068885803222656 + }, + { + "epoch": 3.213348388671875e-05, + "model_forward_time": 0.023616790771484375, + "step": 21059 + }, + { + "epoch": 3.213348388671875e-05, + "step": 21059, + "training_step_time": 0.1107473373413086 + }, + { + "epoch": 3.2135009765625e-05, + "grad_norm": 0.24266916513442993, + "learning_rate": 2.237634634350934e-05, + "loss": 0.0133, + "step": 21060 + }, + { + "epoch": 3.2135009765625e-05, + "model_forward_time": 0.024321556091308594, + "step": 21060 + }, + { + "epoch": 3.2135009765625e-05, + "step": 21060, + "training_step_time": 0.12611031532287598 + }, + { + "epoch": 3.213653564453125e-05, + "model_forward_time": 0.026088953018188477, + "step": 21061 + }, + { + "epoch": 3.213653564453125e-05, + "step": 21061, + "training_step_time": 0.11939144134521484 + }, + { + "epoch": 3.21380615234375e-05, + "model_forward_time": 0.025325298309326172, + "step": 21062 + }, + { + "epoch": 3.21380615234375e-05, + "step": 21062, + "training_step_time": 0.10639357566833496 + }, + { + "epoch": 3.213958740234375e-05, + "model_forward_time": 0.02591872215270996, + "step": 21063 + }, + { + "epoch": 3.213958740234375e-05, + "step": 21063, + "training_step_time": 0.10566163063049316 + }, + { + "epoch": 3.214111328125e-05, + "model_forward_time": 0.02567768096923828, + "step": 21064 + }, + { + "epoch": 3.214111328125e-05, + "step": 21064, + "training_step_time": 0.10636162757873535 + }, + { + "epoch": 3.214263916015625e-05, + "model_forward_time": 0.027269840240478516, + "step": 21065 + }, + { + "epoch": 3.214263916015625e-05, + "step": 21065, + "training_step_time": 0.10856080055236816 + }, + { + "epoch": 3.21441650390625e-05, + "model_forward_time": 0.025417089462280273, + "step": 21066 + }, + { + "epoch": 3.21441650390625e-05, + "step": 21066, + "training_step_time": 0.10489296913146973 + }, + { + "epoch": 3.214569091796875e-05, + "model_forward_time": 0.025609254837036133, + "step": 21067 + }, + { + "epoch": 3.214569091796875e-05, + "step": 21067, + "training_step_time": 0.10773634910583496 + }, + { + "epoch": 3.2147216796875e-05, + "model_forward_time": 0.025650978088378906, + "step": 21068 + }, + { + "epoch": 3.2147216796875e-05, + "step": 21068, + "training_step_time": 0.10774922370910645 + }, + { + "epoch": 3.214874267578125e-05, + "model_forward_time": 0.02536177635192871, + "step": 21069 + }, + { + "epoch": 3.214874267578125e-05, + "step": 21069, + "training_step_time": 0.10801386833190918 + }, + { + "epoch": 3.21502685546875e-05, + "grad_norm": 0.23565426468849182, + "learning_rate": 2.23304225378328e-05, + "loss": 0.0099, + "step": 21070 + }, + { + "epoch": 3.21502685546875e-05, + "model_forward_time": 0.025113821029663086, + "step": 21070 + }, + { + "epoch": 3.21502685546875e-05, + "step": 21070, + "training_step_time": 0.10464692115783691 + }, + { + "epoch": 3.215179443359375e-05, + "model_forward_time": 0.02554941177368164, + "step": 21071 + }, + { + "epoch": 3.215179443359375e-05, + "step": 21071, + "training_step_time": 0.10890722274780273 + }, + { + "epoch": 3.21533203125e-05, + "model_forward_time": 0.02564835548400879, + "step": 21072 + }, + { + "epoch": 3.21533203125e-05, + "step": 21072, + "training_step_time": 0.10660314559936523 + }, + { + "epoch": 3.215484619140625e-05, + "model_forward_time": 0.02496790885925293, + "step": 21073 + }, + { + "epoch": 3.215484619140625e-05, + "step": 21073, + "training_step_time": 0.10352945327758789 + }, + { + "epoch": 3.21563720703125e-05, + "model_forward_time": 0.0258331298828125, + "step": 21074 + }, + { + "epoch": 3.21563720703125e-05, + "step": 21074, + "training_step_time": 0.1051180362701416 + }, + { + "epoch": 3.215789794921875e-05, + "model_forward_time": 0.024367570877075195, + "step": 21075 + }, + { + "epoch": 3.215789794921875e-05, + "step": 21075, + "training_step_time": 0.10581111907958984 + }, + { + "epoch": 3.2159423828125e-05, + "model_forward_time": 0.02569127082824707, + "step": 21076 + }, + { + "epoch": 3.2159423828125e-05, + "step": 21076, + "training_step_time": 0.10779023170471191 + }, + { + "epoch": 3.216094970703125e-05, + "model_forward_time": 0.025057554244995117, + "step": 21077 + }, + { + "epoch": 3.216094970703125e-05, + "step": 21077, + "training_step_time": 0.10608267784118652 + }, + { + "epoch": 3.21624755859375e-05, + "model_forward_time": 0.02766108512878418, + "step": 21078 + }, + { + "epoch": 3.21624755859375e-05, + "step": 21078, + "training_step_time": 0.2060239315032959 + }, + { + "epoch": 3.216400146484375e-05, + "model_forward_time": 0.02412557601928711, + "step": 21079 + }, + { + "epoch": 3.216400146484375e-05, + "step": 21079, + "training_step_time": 0.11222267150878906 + }, + { + "epoch": 3.216552734375e-05, + "grad_norm": 0.18812458217144012, + "learning_rate": 2.2284532353301953e-05, + "loss": 0.0052, + "step": 21080 + }, + { + "epoch": 3.216552734375e-05, + "model_forward_time": 0.025162696838378906, + "step": 21080 + }, + { + "epoch": 3.216552734375e-05, + "step": 21080, + "training_step_time": 0.1305561065673828 + }, + { + "epoch": 3.216705322265625e-05, + "model_forward_time": 0.024494171142578125, + "step": 21081 + }, + { + "epoch": 3.216705322265625e-05, + "step": 21081, + "training_step_time": 0.12300372123718262 + }, + { + "epoch": 3.21685791015625e-05, + "model_forward_time": 0.02466869354248047, + "step": 21082 + }, + { + "epoch": 3.21685791015625e-05, + "step": 21082, + "training_step_time": 0.13306736946105957 + }, + { + "epoch": 3.217010498046875e-05, + "model_forward_time": 0.024712324142456055, + "step": 21083 + }, + { + "epoch": 3.217010498046875e-05, + "step": 21083, + "training_step_time": 0.12731099128723145 + }, + { + "epoch": 3.2171630859375e-05, + "model_forward_time": 0.024339675903320312, + "step": 21084 + }, + { + "epoch": 3.2171630859375e-05, + "step": 21084, + "training_step_time": 0.12717223167419434 + }, + { + "epoch": 3.217315673828125e-05, + "model_forward_time": 0.025300264358520508, + "step": 21085 + }, + { + "epoch": 3.217315673828125e-05, + "step": 21085, + "training_step_time": 0.10240507125854492 + }, + { + "epoch": 3.21746826171875e-05, + "model_forward_time": 0.025016307830810547, + "step": 21086 + }, + { + "epoch": 3.21746826171875e-05, + "step": 21086, + "training_step_time": 0.10848498344421387 + }, + { + "epoch": 3.217620849609375e-05, + "model_forward_time": 0.02575230598449707, + "step": 21087 + }, + { + "epoch": 3.217620849609375e-05, + "step": 21087, + "training_step_time": 0.11292600631713867 + }, + { + "epoch": 3.2177734375e-05, + "model_forward_time": 0.024939298629760742, + "step": 21088 + }, + { + "epoch": 3.2177734375e-05, + "step": 21088, + "training_step_time": 0.10768342018127441 + }, + { + "epoch": 3.217926025390625e-05, + "model_forward_time": 0.024736642837524414, + "step": 21089 + }, + { + "epoch": 3.217926025390625e-05, + "step": 21089, + "training_step_time": 0.10599112510681152 + }, + { + "epoch": 3.21807861328125e-05, + "grad_norm": 0.12697091698646545, + "learning_rate": 2.2238675845677663e-05, + "loss": 0.0111, + "step": 21090 + }, + { + "epoch": 3.21807861328125e-05, + "model_forward_time": 0.025030851364135742, + "step": 21090 + }, + { + "epoch": 3.21807861328125e-05, + "step": 21090, + "training_step_time": 0.1064903736114502 + }, + { + "epoch": 3.218231201171875e-05, + "model_forward_time": 0.025336742401123047, + "step": 21091 + }, + { + "epoch": 3.218231201171875e-05, + "step": 21091, + "training_step_time": 0.10695028305053711 + }, + { + "epoch": 3.2183837890625e-05, + "model_forward_time": 0.02520155906677246, + "step": 21092 + }, + { + "epoch": 3.2183837890625e-05, + "step": 21092, + "training_step_time": 0.11504364013671875 + }, + { + "epoch": 3.218536376953125e-05, + "model_forward_time": 0.02470111846923828, + "step": 21093 + }, + { + "epoch": 3.218536376953125e-05, + "step": 21093, + "training_step_time": 0.11203670501708984 + }, + { + "epoch": 3.21868896484375e-05, + "model_forward_time": 0.025255680084228516, + "step": 21094 + }, + { + "epoch": 3.21868896484375e-05, + "step": 21094, + "training_step_time": 0.10781359672546387 + }, + { + "epoch": 3.218841552734375e-05, + "model_forward_time": 0.02498912811279297, + "step": 21095 + }, + { + "epoch": 3.218841552734375e-05, + "step": 21095, + "training_step_time": 0.10896944999694824 + }, + { + "epoch": 3.218994140625e-05, + "model_forward_time": 0.024546146392822266, + "step": 21096 + }, + { + "epoch": 3.218994140625e-05, + "step": 21096, + "training_step_time": 0.14392447471618652 + }, + { + "epoch": 3.219146728515625e-05, + "model_forward_time": 0.0249326229095459, + "step": 21097 + }, + { + "epoch": 3.219146728515625e-05, + "step": 21097, + "training_step_time": 0.15697002410888672 + }, + { + "epoch": 3.21929931640625e-05, + "model_forward_time": 0.02424001693725586, + "step": 21098 + }, + { + "epoch": 3.21929931640625e-05, + "step": 21098, + "training_step_time": 0.11417055130004883 + }, + { + "epoch": 3.219451904296875e-05, + "model_forward_time": 0.024742841720581055, + "step": 21099 + }, + { + "epoch": 3.219451904296875e-05, + "step": 21099, + "training_step_time": 0.13398480415344238 + }, + { + "epoch": 3.2196044921875e-05, + "grad_norm": 0.3559652268886566, + "learning_rate": 2.219285307067997e-05, + "loss": 0.0113, + "step": 21100 + }, + { + "epoch": 3.2196044921875e-05, + "model_forward_time": 0.025150775909423828, + "step": 21100 + }, + { + "epoch": 3.2196044921875e-05, + "step": 21100, + "training_step_time": 0.19571614265441895 + }, + { + "epoch": 3.219757080078125e-05, + "model_forward_time": 0.024554014205932617, + "step": 21101 + }, + { + "epoch": 3.219757080078125e-05, + "step": 21101, + "training_step_time": 0.12973999977111816 + }, + { + "epoch": 3.21990966796875e-05, + "model_forward_time": 0.024652719497680664, + "step": 21102 + }, + { + "epoch": 3.21990966796875e-05, + "step": 21102, + "training_step_time": 0.1428537368774414 + }, + { + "epoch": 3.220062255859375e-05, + "model_forward_time": 0.023976802825927734, + "step": 21103 + }, + { + "epoch": 3.220062255859375e-05, + "step": 21103, + "training_step_time": 0.11202001571655273 + }, + { + "epoch": 3.22021484375e-05, + "model_forward_time": 0.02476644515991211, + "step": 21104 + }, + { + "epoch": 3.22021484375e-05, + "step": 21104, + "training_step_time": 0.10760617256164551 + }, + { + "epoch": 3.220367431640625e-05, + "model_forward_time": 0.025214195251464844, + "step": 21105 + }, + { + "epoch": 3.220367431640625e-05, + "step": 21105, + "training_step_time": 0.13596844673156738 + }, + { + "epoch": 3.22052001953125e-05, + "model_forward_time": 0.024297475814819336, + "step": 21106 + }, + { + "epoch": 3.22052001953125e-05, + "step": 21106, + "training_step_time": 0.2098379135131836 + }, + { + "epoch": 3.220672607421875e-05, + "model_forward_time": 0.024859189987182617, + "step": 21107 + }, + { + "epoch": 3.220672607421875e-05, + "step": 21107, + "training_step_time": 0.17070388793945312 + }, + { + "epoch": 3.2208251953125e-05, + "model_forward_time": 0.02403426170349121, + "step": 21108 + }, + { + "epoch": 3.2208251953125e-05, + "step": 21108, + "training_step_time": 0.16711640357971191 + }, + { + "epoch": 3.220977783203125e-05, + "model_forward_time": 0.024085521697998047, + "step": 21109 + }, + { + "epoch": 3.220977783203125e-05, + "step": 21109, + "training_step_time": 0.16875410079956055 + }, + { + "epoch": 3.22113037109375e-05, + "grad_norm": 0.21862581372261047, + "learning_rate": 2.2147064083987838e-05, + "loss": 0.0068, + "step": 21110 + }, + { + "epoch": 3.22113037109375e-05, + "model_forward_time": 0.024361610412597656, + "step": 21110 + }, + { + "epoch": 3.22113037109375e-05, + "step": 21110, + "training_step_time": 0.15017318725585938 + }, + { + "epoch": 3.221282958984375e-05, + "model_forward_time": 0.024413585662841797, + "step": 21111 + }, + { + "epoch": 3.221282958984375e-05, + "step": 21111, + "training_step_time": 0.13892412185668945 + }, + { + "epoch": 3.221435546875e-05, + "model_forward_time": 0.024481534957885742, + "step": 21112 + }, + { + "epoch": 3.221435546875e-05, + "step": 21112, + "training_step_time": 0.13253474235534668 + }, + { + "epoch": 3.221588134765625e-05, + "model_forward_time": 0.02751469612121582, + "step": 21113 + }, + { + "epoch": 3.221588134765625e-05, + "step": 21113, + "training_step_time": 0.1296827793121338 + }, + { + "epoch": 3.22174072265625e-05, + "model_forward_time": 0.02532815933227539, + "step": 21114 + }, + { + "epoch": 3.22174072265625e-05, + "step": 21114, + "training_step_time": 0.11786627769470215 + }, + { + "epoch": 3.221893310546875e-05, + "model_forward_time": 0.024986982345581055, + "step": 21115 + }, + { + "epoch": 3.221893310546875e-05, + "step": 21115, + "training_step_time": 0.11551904678344727 + }, + { + "epoch": 3.2220458984375e-05, + "model_forward_time": 0.02517223358154297, + "step": 21116 + }, + { + "epoch": 3.2220458984375e-05, + "step": 21116, + "training_step_time": 0.11705493927001953 + }, + { + "epoch": 3.222198486328125e-05, + "model_forward_time": 0.024164676666259766, + "step": 21117 + }, + { + "epoch": 3.222198486328125e-05, + "step": 21117, + "training_step_time": 0.11173319816589355 + }, + { + "epoch": 3.22235107421875e-05, + "model_forward_time": 0.024269580841064453, + "step": 21118 + }, + { + "epoch": 3.22235107421875e-05, + "step": 21118, + "training_step_time": 0.11076641082763672 + }, + { + "epoch": 3.222503662109375e-05, + "model_forward_time": 0.02397918701171875, + "step": 21119 + }, + { + "epoch": 3.222503662109375e-05, + "step": 21119, + "training_step_time": 0.11178779602050781 + }, + { + "epoch": 3.22265625e-05, + "grad_norm": 0.10320556908845901, + "learning_rate": 2.2101308941239203e-05, + "loss": 0.0065, + "step": 21120 + }, + { + "epoch": 3.22265625e-05, + "model_forward_time": 0.024203062057495117, + "step": 21120 + }, + { + "epoch": 3.22265625e-05, + "step": 21120, + "training_step_time": 0.11410236358642578 + }, + { + "epoch": 3.222808837890625e-05, + "model_forward_time": 0.02686333656311035, + "step": 21121 + }, + { + "epoch": 3.222808837890625e-05, + "step": 21121, + "training_step_time": 0.10918474197387695 + }, + { + "epoch": 3.22296142578125e-05, + "model_forward_time": 0.025805234909057617, + "step": 21122 + }, + { + "epoch": 3.22296142578125e-05, + "step": 21122, + "training_step_time": 0.19870972633361816 + }, + { + "epoch": 3.223114013671875e-05, + "model_forward_time": 0.024204730987548828, + "step": 21123 + }, + { + "epoch": 3.223114013671875e-05, + "step": 21123, + "training_step_time": 0.10590314865112305 + }, + { + "epoch": 3.2232666015625e-05, + "model_forward_time": 0.024292707443237305, + "step": 21124 + }, + { + "epoch": 3.2232666015625e-05, + "step": 21124, + "training_step_time": 0.12429237365722656 + }, + { + "epoch": 3.223419189453125e-05, + "model_forward_time": 0.025079727172851562, + "step": 21125 + }, + { + "epoch": 3.223419189453125e-05, + "step": 21125, + "training_step_time": 0.13730931282043457 + }, + { + "epoch": 3.22357177734375e-05, + "model_forward_time": 0.024784326553344727, + "step": 21126 + }, + { + "epoch": 3.22357177734375e-05, + "step": 21126, + "training_step_time": 0.11589646339416504 + }, + { + "epoch": 3.223724365234375e-05, + "model_forward_time": 0.024826526641845703, + "step": 21127 + }, + { + "epoch": 3.223724365234375e-05, + "step": 21127, + "training_step_time": 0.1356487274169922 + }, + { + "epoch": 3.223876953125e-05, + "model_forward_time": 0.0282132625579834, + "step": 21128 + }, + { + "epoch": 3.223876953125e-05, + "step": 21128, + "training_step_time": 0.11227989196777344 + }, + { + "epoch": 3.224029541015625e-05, + "model_forward_time": 0.0251314640045166, + "step": 21129 + }, + { + "epoch": 3.224029541015625e-05, + "step": 21129, + "training_step_time": 0.10677886009216309 + }, + { + "epoch": 3.22418212890625e-05, + "grad_norm": 0.1518149971961975, + "learning_rate": 2.2055587698030877e-05, + "loss": 0.0143, + "step": 21130 + }, + { + "epoch": 3.22418212890625e-05, + "model_forward_time": 0.02476978302001953, + "step": 21130 + }, + { + "epoch": 3.22418212890625e-05, + "step": 21130, + "training_step_time": 0.10728788375854492 + }, + { + "epoch": 3.224334716796875e-05, + "model_forward_time": 0.02576303482055664, + "step": 21131 + }, + { + "epoch": 3.224334716796875e-05, + "step": 21131, + "training_step_time": 0.1086421012878418 + }, + { + "epoch": 3.2244873046875e-05, + "model_forward_time": 0.02473735809326172, + "step": 21132 + }, + { + "epoch": 3.2244873046875e-05, + "step": 21132, + "training_step_time": 0.10787081718444824 + }, + { + "epoch": 3.224639892578125e-05, + "model_forward_time": 0.02524590492248535, + "step": 21133 + }, + { + "epoch": 3.224639892578125e-05, + "step": 21133, + "training_step_time": 0.10401725769042969 + }, + { + "epoch": 3.22479248046875e-05, + "model_forward_time": 0.024825096130371094, + "step": 21134 + }, + { + "epoch": 3.22479248046875e-05, + "step": 21134, + "training_step_time": 0.10587263107299805 + }, + { + "epoch": 3.224945068359375e-05, + "model_forward_time": 0.024815082550048828, + "step": 21135 + }, + { + "epoch": 3.224945068359375e-05, + "step": 21135, + "training_step_time": 0.10649991035461426 + }, + { + "epoch": 3.22509765625e-05, + "model_forward_time": 0.025132179260253906, + "step": 21136 + }, + { + "epoch": 3.22509765625e-05, + "step": 21136, + "training_step_time": 0.11367130279541016 + }, + { + "epoch": 3.225250244140625e-05, + "model_forward_time": 0.024682283401489258, + "step": 21137 + }, + { + "epoch": 3.225250244140625e-05, + "step": 21137, + "training_step_time": 0.1054527759552002 + }, + { + "epoch": 3.22540283203125e-05, + "model_forward_time": 0.025109291076660156, + "step": 21138 + }, + { + "epoch": 3.22540283203125e-05, + "step": 21138, + "training_step_time": 0.1035313606262207 + }, + { + "epoch": 3.225555419921875e-05, + "model_forward_time": 0.02533698081970215, + "step": 21139 + }, + { + "epoch": 3.225555419921875e-05, + "step": 21139, + "training_step_time": 0.10422301292419434 + }, + { + "epoch": 3.2257080078125e-05, + "grad_norm": 0.2994593679904938, + "learning_rate": 2.2009900409918465e-05, + "loss": 0.0105, + "step": 21140 + }, + { + "epoch": 3.2257080078125e-05, + "model_forward_time": 0.024922609329223633, + "step": 21140 + }, + { + "epoch": 3.2257080078125e-05, + "step": 21140, + "training_step_time": 0.14243650436401367 + }, + { + "epoch": 3.225860595703125e-05, + "model_forward_time": 0.024197816848754883, + "step": 21141 + }, + { + "epoch": 3.225860595703125e-05, + "step": 21141, + "training_step_time": 0.16670680046081543 + }, + { + "epoch": 3.22601318359375e-05, + "model_forward_time": 0.024222612380981445, + "step": 21142 + }, + { + "epoch": 3.22601318359375e-05, + "step": 21142, + "training_step_time": 0.10964512825012207 + }, + { + "epoch": 3.226165771484375e-05, + "model_forward_time": 0.02457904815673828, + "step": 21143 + }, + { + "epoch": 3.226165771484375e-05, + "step": 21143, + "training_step_time": 0.15297603607177734 + }, + { + "epoch": 3.226318359375e-05, + "model_forward_time": 0.024840116500854492, + "step": 21144 + }, + { + "epoch": 3.226318359375e-05, + "step": 21144, + "training_step_time": 0.17779064178466797 + }, + { + "epoch": 3.226470947265625e-05, + "model_forward_time": 0.024689674377441406, + "step": 21145 + }, + { + "epoch": 3.226470947265625e-05, + "step": 21145, + "training_step_time": 0.1655879020690918 + }, + { + "epoch": 3.22662353515625e-05, + "model_forward_time": 0.024198293685913086, + "step": 21146 + }, + { + "epoch": 3.22662353515625e-05, + "step": 21146, + "training_step_time": 0.13478851318359375 + }, + { + "epoch": 3.226776123046875e-05, + "model_forward_time": 0.02408289909362793, + "step": 21147 + }, + { + "epoch": 3.226776123046875e-05, + "step": 21147, + "training_step_time": 0.11075425148010254 + }, + { + "epoch": 3.2269287109375e-05, + "model_forward_time": 0.025351762771606445, + "step": 21148 + }, + { + "epoch": 3.2269287109375e-05, + "step": 21148, + "training_step_time": 0.12004733085632324 + }, + { + "epoch": 3.227081298828125e-05, + "model_forward_time": 0.024771451950073242, + "step": 21149 + }, + { + "epoch": 3.227081298828125e-05, + "step": 21149, + "training_step_time": 0.10836434364318848 + }, + { + "epoch": 3.22723388671875e-05, + "grad_norm": 0.215509295463562, + "learning_rate": 2.196424713241637e-05, + "loss": 0.0065, + "step": 21150 + }, + { + "epoch": 3.22723388671875e-05, + "model_forward_time": 0.025159120559692383, + "step": 21150 + }, + { + "epoch": 3.22723388671875e-05, + "step": 21150, + "training_step_time": 0.11777138710021973 + }, + { + "epoch": 3.227386474609375e-05, + "model_forward_time": 0.025344133377075195, + "step": 21151 + }, + { + "epoch": 3.227386474609375e-05, + "step": 21151, + "training_step_time": 0.10483670234680176 + }, + { + "epoch": 3.2275390625e-05, + "model_forward_time": 0.024941205978393555, + "step": 21152 + }, + { + "epoch": 3.2275390625e-05, + "step": 21152, + "training_step_time": 0.19800782203674316 + }, + { + "epoch": 3.227691650390625e-05, + "model_forward_time": 0.02415943145751953, + "step": 21153 + }, + { + "epoch": 3.227691650390625e-05, + "step": 21153, + "training_step_time": 0.10711956024169922 + }, + { + "epoch": 3.22784423828125e-05, + "model_forward_time": 0.024597644805908203, + "step": 21154 + }, + { + "epoch": 3.22784423828125e-05, + "step": 21154, + "training_step_time": 0.10780715942382812 + }, + { + "epoch": 3.227996826171875e-05, + "model_forward_time": 0.024873971939086914, + "step": 21155 + }, + { + "epoch": 3.227996826171875e-05, + "step": 21155, + "training_step_time": 0.10795164108276367 + }, + { + "epoch": 3.2281494140625e-05, + "model_forward_time": 0.024897336959838867, + "step": 21156 + }, + { + "epoch": 3.2281494140625e-05, + "step": 21156, + "training_step_time": 0.10606122016906738 + }, + { + "epoch": 3.228302001953125e-05, + "model_forward_time": 0.024784088134765625, + "step": 21157 + }, + { + "epoch": 3.228302001953125e-05, + "step": 21157, + "training_step_time": 0.10716986656188965 + }, + { + "epoch": 3.22845458984375e-05, + "model_forward_time": 0.025293350219726562, + "step": 21158 + }, + { + "epoch": 3.22845458984375e-05, + "step": 21158, + "training_step_time": 0.11139893531799316 + }, + { + "epoch": 3.228607177734375e-05, + "model_forward_time": 0.02523207664489746, + "step": 21159 + }, + { + "epoch": 3.228607177734375e-05, + "step": 21159, + "training_step_time": 0.1623075008392334 + }, + { + "epoch": 3.228759765625e-05, + "grad_norm": 0.12247727811336517, + "learning_rate": 2.1918627920997593e-05, + "loss": 0.0095, + "step": 21160 + }, + { + "epoch": 3.228759765625e-05, + "model_forward_time": 0.02364039421081543, + "step": 21160 + }, + { + "epoch": 3.228759765625e-05, + "step": 21160, + "training_step_time": 0.15630364418029785 + }, + { + "epoch": 3.228912353515625e-05, + "model_forward_time": 0.023675918579101562, + "step": 21161 + }, + { + "epoch": 3.228912353515625e-05, + "step": 21161, + "training_step_time": 0.14830517768859863 + }, + { + "epoch": 3.22906494140625e-05, + "model_forward_time": 0.023703575134277344, + "step": 21162 + }, + { + "epoch": 3.22906494140625e-05, + "step": 21162, + "training_step_time": 0.13562941551208496 + }, + { + "epoch": 3.229217529296875e-05, + "model_forward_time": 0.023604154586791992, + "step": 21163 + }, + { + "epoch": 3.229217529296875e-05, + "step": 21163, + "training_step_time": 0.12831497192382812 + }, + { + "epoch": 3.2293701171875e-05, + "model_forward_time": 0.02338719367980957, + "step": 21164 + }, + { + "epoch": 3.2293701171875e-05, + "step": 21164, + "training_step_time": 0.1271049976348877 + }, + { + "epoch": 3.229522705078125e-05, + "model_forward_time": 0.026682376861572266, + "step": 21165 + }, + { + "epoch": 3.229522705078125e-05, + "step": 21165, + "training_step_time": 0.12337350845336914 + }, + { + "epoch": 3.22967529296875e-05, + "model_forward_time": 0.025015592575073242, + "step": 21166 + }, + { + "epoch": 3.22967529296875e-05, + "step": 21166, + "training_step_time": 0.12014603614807129 + }, + { + "epoch": 3.229827880859375e-05, + "model_forward_time": 0.0242922306060791, + "step": 21167 + }, + { + "epoch": 3.229827880859375e-05, + "step": 21167, + "training_step_time": 0.18651151657104492 + }, + { + "epoch": 3.22998046875e-05, + "model_forward_time": 0.02480459213256836, + "step": 21168 + }, + { + "epoch": 3.22998046875e-05, + "step": 21168, + "training_step_time": 0.11352658271789551 + }, + { + "epoch": 3.230133056640625e-05, + "model_forward_time": 0.02449941635131836, + "step": 21169 + }, + { + "epoch": 3.230133056640625e-05, + "step": 21169, + "training_step_time": 0.11191010475158691 + }, + { + "epoch": 3.23028564453125e-05, + "grad_norm": 0.11898034065961838, + "learning_rate": 2.1873042831093803e-05, + "loss": 0.008, + "step": 21170 + }, + { + "epoch": 3.23028564453125e-05, + "model_forward_time": 0.025146007537841797, + "step": 21170 + }, + { + "epoch": 3.23028564453125e-05, + "step": 21170, + "training_step_time": 0.11211562156677246 + }, + { + "epoch": 3.230438232421875e-05, + "model_forward_time": 0.02483201026916504, + "step": 21171 + }, + { + "epoch": 3.230438232421875e-05, + "step": 21171, + "training_step_time": 0.1365222930908203 + }, + { + "epoch": 3.2305908203125e-05, + "model_forward_time": 0.025719165802001953, + "step": 21172 + }, + { + "epoch": 3.2305908203125e-05, + "step": 21172, + "training_step_time": 0.20273923873901367 + }, + { + "epoch": 3.230743408203125e-05, + "model_forward_time": 0.024067401885986328, + "step": 21173 + }, + { + "epoch": 3.230743408203125e-05, + "step": 21173, + "training_step_time": 0.11932158470153809 + }, + { + "epoch": 3.23089599609375e-05, + "model_forward_time": 0.023897409439086914, + "step": 21174 + }, + { + "epoch": 3.23089599609375e-05, + "step": 21174, + "training_step_time": 0.10608530044555664 + }, + { + "epoch": 3.231048583984375e-05, + "model_forward_time": 0.02498912811279297, + "step": 21175 + }, + { + "epoch": 3.231048583984375e-05, + "step": 21175, + "training_step_time": 0.10579419136047363 + }, + { + "epoch": 3.231201171875e-05, + "model_forward_time": 0.024982690811157227, + "step": 21176 + }, + { + "epoch": 3.231201171875e-05, + "step": 21176, + "training_step_time": 0.10849666595458984 + }, + { + "epoch": 3.231353759765625e-05, + "model_forward_time": 0.027215242385864258, + "step": 21177 + }, + { + "epoch": 3.231353759765625e-05, + "step": 21177, + "training_step_time": 0.11033916473388672 + }, + { + "epoch": 3.23150634765625e-05, + "model_forward_time": 0.02533578872680664, + "step": 21178 + }, + { + "epoch": 3.23150634765625e-05, + "step": 21178, + "training_step_time": 0.10845398902893066 + }, + { + "epoch": 3.231658935546875e-05, + "model_forward_time": 0.02730274200439453, + "step": 21179 + }, + { + "epoch": 3.231658935546875e-05, + "step": 21179, + "training_step_time": 0.15689373016357422 + }, + { + "epoch": 3.2318115234375e-05, + "grad_norm": 0.15386469662189484, + "learning_rate": 2.182749191809518e-05, + "loss": 0.0053, + "step": 21180 + }, + { + "epoch": 3.2318115234375e-05, + "model_forward_time": 0.026033878326416016, + "step": 21180 + }, + { + "epoch": 3.2318115234375e-05, + "step": 21180, + "training_step_time": 0.17985224723815918 + }, + { + "epoch": 3.231964111328125e-05, + "model_forward_time": 0.024247169494628906, + "step": 21181 + }, + { + "epoch": 3.231964111328125e-05, + "step": 21181, + "training_step_time": 0.18060541152954102 + }, + { + "epoch": 3.23211669921875e-05, + "model_forward_time": 0.02530217170715332, + "step": 21182 + }, + { + "epoch": 3.23211669921875e-05, + "step": 21182, + "training_step_time": 0.1416475772857666 + }, + { + "epoch": 3.232269287109375e-05, + "model_forward_time": 0.024146080017089844, + "step": 21183 + }, + { + "epoch": 3.232269287109375e-05, + "step": 21183, + "training_step_time": 0.15105557441711426 + }, + { + "epoch": 3.232421875e-05, + "model_forward_time": 0.024245500564575195, + "step": 21184 + }, + { + "epoch": 3.232421875e-05, + "step": 21184, + "training_step_time": 0.15365195274353027 + }, + { + "epoch": 3.232574462890625e-05, + "model_forward_time": 0.02378249168395996, + "step": 21185 + }, + { + "epoch": 3.232574462890625e-05, + "step": 21185, + "training_step_time": 0.21893095970153809 + }, + { + "epoch": 3.23272705078125e-05, + "model_forward_time": 0.024749040603637695, + "step": 21186 + }, + { + "epoch": 3.23272705078125e-05, + "step": 21186, + "training_step_time": 0.201218843460083 + }, + { + "epoch": 3.232879638671875e-05, + "model_forward_time": 0.02450847625732422, + "step": 21187 + }, + { + "epoch": 3.232879638671875e-05, + "step": 21187, + "training_step_time": 0.12005424499511719 + }, + { + "epoch": 3.2330322265625e-05, + "model_forward_time": 0.024097919464111328, + "step": 21188 + }, + { + "epoch": 3.2330322265625e-05, + "step": 21188, + "training_step_time": 0.13227605819702148 + }, + { + "epoch": 3.233184814453125e-05, + "model_forward_time": 0.02524542808532715, + "step": 21189 + }, + { + "epoch": 3.233184814453125e-05, + "step": 21189, + "training_step_time": 0.1397538185119629 + }, + { + "epoch": 3.23333740234375e-05, + "grad_norm": 0.4359036087989807, + "learning_rate": 2.1781975237350366e-05, + "loss": 0.0106, + "step": 21190 + }, + { + "epoch": 3.23333740234375e-05, + "model_forward_time": 0.024667739868164062, + "step": 21190 + }, + { + "epoch": 3.23333740234375e-05, + "step": 21190, + "training_step_time": 0.18721723556518555 + }, + { + "epoch": 3.233489990234375e-05, + "model_forward_time": 0.02505183219909668, + "step": 21191 + }, + { + "epoch": 3.233489990234375e-05, + "step": 21191, + "training_step_time": 0.13910555839538574 + }, + { + "epoch": 3.233642578125e-05, + "model_forward_time": 0.024353504180908203, + "step": 21192 + }, + { + "epoch": 3.233642578125e-05, + "step": 21192, + "training_step_time": 0.10639357566833496 + }, + { + "epoch": 3.233795166015625e-05, + "model_forward_time": 0.025227785110473633, + "step": 21193 + }, + { + "epoch": 3.233795166015625e-05, + "step": 21193, + "training_step_time": 0.20298981666564941 + }, + { + "epoch": 3.23394775390625e-05, + "model_forward_time": 0.02467060089111328, + "step": 21194 + }, + { + "epoch": 3.23394775390625e-05, + "step": 21194, + "training_step_time": 0.10439634323120117 + }, + { + "epoch": 3.234100341796875e-05, + "model_forward_time": 0.025470495223999023, + "step": 21195 + }, + { + "epoch": 3.234100341796875e-05, + "step": 21195, + "training_step_time": 0.10763025283813477 + }, + { + "epoch": 3.2342529296875e-05, + "model_forward_time": 0.02551746368408203, + "step": 21196 + }, + { + "epoch": 3.2342529296875e-05, + "step": 21196, + "training_step_time": 0.1078188419342041 + }, + { + "epoch": 3.234405517578125e-05, + "model_forward_time": 0.025696516036987305, + "step": 21197 + }, + { + "epoch": 3.234405517578125e-05, + "step": 21197, + "training_step_time": 0.11059808731079102 + }, + { + "epoch": 3.23455810546875e-05, + "model_forward_time": 0.025440216064453125, + "step": 21198 + }, + { + "epoch": 3.23455810546875e-05, + "step": 21198, + "training_step_time": 0.11473369598388672 + }, + { + "epoch": 3.234710693359375e-05, + "model_forward_time": 0.024814367294311523, + "step": 21199 + }, + { + "epoch": 3.234710693359375e-05, + "step": 21199, + "training_step_time": 0.10697698593139648 + }, + { + "epoch": 3.23486328125e-05, + "grad_norm": 0.20810121297836304, + "learning_rate": 2.1736492844166407e-05, + "loss": 0.01, + "step": 21200 + }, + { + "epoch": 3.23486328125e-05, + "model_forward_time": 0.025196313858032227, + "step": 21200 + }, + { + "epoch": 3.23486328125e-05, + "step": 21200, + "training_step_time": 0.10869431495666504 + }, + { + "epoch": 3.235015869140625e-05, + "model_forward_time": 0.024559974670410156, + "step": 21201 + }, + { + "epoch": 3.235015869140625e-05, + "step": 21201, + "training_step_time": 0.10734176635742188 + }, + { + "epoch": 3.23516845703125e-05, + "model_forward_time": 0.025150299072265625, + "step": 21202 + }, + { + "epoch": 3.23516845703125e-05, + "step": 21202, + "training_step_time": 0.10800004005432129 + }, + { + "epoch": 3.235321044921875e-05, + "model_forward_time": 0.025125980377197266, + "step": 21203 + }, + { + "epoch": 3.235321044921875e-05, + "step": 21203, + "training_step_time": 0.10706353187561035 + }, + { + "epoch": 3.2354736328125e-05, + "model_forward_time": 0.026156187057495117, + "step": 21204 + }, + { + "epoch": 3.2354736328125e-05, + "step": 21204, + "training_step_time": 0.11110472679138184 + }, + { + "epoch": 3.235626220703125e-05, + "model_forward_time": 0.026600360870361328, + "step": 21205 + }, + { + "epoch": 3.235626220703125e-05, + "step": 21205, + "training_step_time": 0.11316156387329102 + }, + { + "epoch": 3.23577880859375e-05, + "model_forward_time": 0.02500176429748535, + "step": 21206 + }, + { + "epoch": 3.23577880859375e-05, + "step": 21206, + "training_step_time": 0.11544585227966309 + }, + { + "epoch": 3.235931396484375e-05, + "model_forward_time": 0.028255224227905273, + "step": 21207 + }, + { + "epoch": 3.235931396484375e-05, + "step": 21207, + "training_step_time": 0.11554932594299316 + }, + { + "epoch": 3.236083984375e-05, + "model_forward_time": 0.025579214096069336, + "step": 21208 + }, + { + "epoch": 3.236083984375e-05, + "step": 21208, + "training_step_time": 0.1082611083984375 + }, + { + "epoch": 3.236236572265625e-05, + "model_forward_time": 0.02570509910583496, + "step": 21209 + }, + { + "epoch": 3.236236572265625e-05, + "step": 21209, + "training_step_time": 0.11014080047607422 + }, + { + "epoch": 3.23638916015625e-05, + "grad_norm": 0.19430792331695557, + "learning_rate": 2.1691044793808734e-05, + "loss": 0.0065, + "step": 21210 + }, + { + "epoch": 3.23638916015625e-05, + "model_forward_time": 0.02509617805480957, + "step": 21210 + }, + { + "epoch": 3.23638916015625e-05, + "step": 21210, + "training_step_time": 0.1860666275024414 + }, + { + "epoch": 3.236541748046875e-05, + "model_forward_time": 0.024563312530517578, + "step": 21211 + }, + { + "epoch": 3.236541748046875e-05, + "step": 21211, + "training_step_time": 0.17884588241577148 + }, + { + "epoch": 3.2366943359375e-05, + "model_forward_time": 0.024672508239746094, + "step": 21212 + }, + { + "epoch": 3.2366943359375e-05, + "step": 21212, + "training_step_time": 0.12824034690856934 + }, + { + "epoch": 3.236846923828125e-05, + "model_forward_time": 0.024639606475830078, + "step": 21213 + }, + { + "epoch": 3.236846923828125e-05, + "step": 21213, + "training_step_time": 0.10855364799499512 + }, + { + "epoch": 3.23699951171875e-05, + "model_forward_time": 0.029419660568237305, + "step": 21214 + }, + { + "epoch": 3.23699951171875e-05, + "step": 21214, + "training_step_time": 0.17448902130126953 + }, + { + "epoch": 3.237152099609375e-05, + "model_forward_time": 0.024523019790649414, + "step": 21215 + }, + { + "epoch": 3.237152099609375e-05, + "step": 21215, + "training_step_time": 0.13331341743469238 + }, + { + "epoch": 3.2373046875e-05, + "model_forward_time": 0.02500295639038086, + "step": 21216 + }, + { + "epoch": 3.2373046875e-05, + "step": 21216, + "training_step_time": 0.12585711479187012 + }, + { + "epoch": 3.237457275390625e-05, + "model_forward_time": 0.025064706802368164, + "step": 21217 + }, + { + "epoch": 3.237457275390625e-05, + "step": 21217, + "training_step_time": 0.10875320434570312 + }, + { + "epoch": 3.23760986328125e-05, + "model_forward_time": 0.026226282119750977, + "step": 21218 + }, + { + "epoch": 3.23760986328125e-05, + "step": 21218, + "training_step_time": 0.11018013954162598 + }, + { + "epoch": 3.237762451171875e-05, + "model_forward_time": 0.025165081024169922, + "step": 21219 + }, + { + "epoch": 3.237762451171875e-05, + "step": 21219, + "training_step_time": 0.13895702362060547 + }, + { + "epoch": 3.2379150390625e-05, + "grad_norm": 0.1496005356311798, + "learning_rate": 2.1645631141500994e-05, + "loss": 0.0101, + "step": 21220 + }, + { + "epoch": 3.2379150390625e-05, + "model_forward_time": 0.02419900894165039, + "step": 21220 + }, + { + "epoch": 3.2379150390625e-05, + "step": 21220, + "training_step_time": 0.16323280334472656 + }, + { + "epoch": 3.238067626953125e-05, + "model_forward_time": 0.02678084373474121, + "step": 21221 + }, + { + "epoch": 3.238067626953125e-05, + "step": 21221, + "training_step_time": 0.16374731063842773 + }, + { + "epoch": 3.23822021484375e-05, + "model_forward_time": 0.02356266975402832, + "step": 21222 + }, + { + "epoch": 3.23822021484375e-05, + "step": 21222, + "training_step_time": 0.1537158489227295 + }, + { + "epoch": 3.238372802734375e-05, + "model_forward_time": 0.024825096130371094, + "step": 21223 + }, + { + "epoch": 3.238372802734375e-05, + "step": 21223, + "training_step_time": 0.16394257545471191 + }, + { + "epoch": 3.238525390625e-05, + "model_forward_time": 0.02569103240966797, + "step": 21224 + }, + { + "epoch": 3.238525390625e-05, + "step": 21224, + "training_step_time": 0.11689400672912598 + }, + { + "epoch": 3.238677978515625e-05, + "model_forward_time": 0.02464008331298828, + "step": 21225 + }, + { + "epoch": 3.238677978515625e-05, + "step": 21225, + "training_step_time": 0.1573953628540039 + }, + { + "epoch": 3.23883056640625e-05, + "model_forward_time": 0.024379491806030273, + "step": 21226 + }, + { + "epoch": 3.23883056640625e-05, + "step": 21226, + "training_step_time": 0.14690399169921875 + }, + { + "epoch": 3.238983154296875e-05, + "model_forward_time": 0.025241851806640625, + "step": 21227 + }, + { + "epoch": 3.238983154296875e-05, + "step": 21227, + "training_step_time": 0.11838722229003906 + }, + { + "epoch": 3.2391357421875e-05, + "model_forward_time": 0.024970054626464844, + "step": 21228 + }, + { + "epoch": 3.2391357421875e-05, + "step": 21228, + "training_step_time": 0.12140345573425293 + }, + { + "epoch": 3.239288330078125e-05, + "model_forward_time": 0.026240825653076172, + "step": 21229 + }, + { + "epoch": 3.239288330078125e-05, + "step": 21229, + "training_step_time": 0.20972204208374023 + }, + { + "epoch": 3.23944091796875e-05, + "grad_norm": 0.22654598951339722, + "learning_rate": 2.1600251942425066e-05, + "loss": 0.0076, + "step": 21230 + }, + { + "epoch": 3.23944091796875e-05, + "model_forward_time": 0.02619457244873047, + "step": 21230 + }, + { + "epoch": 3.23944091796875e-05, + "step": 21230, + "training_step_time": 0.13608717918395996 + }, + { + "epoch": 3.239593505859375e-05, + "model_forward_time": 0.027230262756347656, + "step": 21231 + }, + { + "epoch": 3.239593505859375e-05, + "step": 21231, + "training_step_time": 0.22448086738586426 + }, + { + "epoch": 3.23974609375e-05, + "model_forward_time": 0.025946617126464844, + "step": 21232 + }, + { + "epoch": 3.23974609375e-05, + "step": 21232, + "training_step_time": 0.15306854248046875 + }, + { + "epoch": 3.239898681640625e-05, + "model_forward_time": 0.024697303771972656, + "step": 21233 + }, + { + "epoch": 3.239898681640625e-05, + "step": 21233, + "training_step_time": 0.17835283279418945 + }, + { + "epoch": 3.24005126953125e-05, + "model_forward_time": 0.02568984031677246, + "step": 21234 + }, + { + "epoch": 3.24005126953125e-05, + "step": 21234, + "training_step_time": 0.11004757881164551 + }, + { + "epoch": 3.240203857421875e-05, + "model_forward_time": 0.025584697723388672, + "step": 21235 + }, + { + "epoch": 3.240203857421875e-05, + "step": 21235, + "training_step_time": 0.18908262252807617 + }, + { + "epoch": 3.2403564453125e-05, + "model_forward_time": 0.025524616241455078, + "step": 21236 + }, + { + "epoch": 3.2403564453125e-05, + "step": 21236, + "training_step_time": 0.1049039363861084 + }, + { + "epoch": 3.240509033203125e-05, + "model_forward_time": 0.0254819393157959, + "step": 21237 + }, + { + "epoch": 3.240509033203125e-05, + "step": 21237, + "training_step_time": 0.1027688980102539 + }, + { + "epoch": 3.24066162109375e-05, + "model_forward_time": 0.025950908660888672, + "step": 21238 + }, + { + "epoch": 3.24066162109375e-05, + "step": 21238, + "training_step_time": 0.10636663436889648 + }, + { + "epoch": 3.240814208984375e-05, + "model_forward_time": 0.026631832122802734, + "step": 21239 + }, + { + "epoch": 3.240814208984375e-05, + "step": 21239, + "training_step_time": 0.10644984245300293 + }, + { + "epoch": 3.240966796875e-05, + "grad_norm": 0.1778857707977295, + "learning_rate": 2.1554907251720945e-05, + "loss": 0.0053, + "step": 21240 + }, + { + "epoch": 3.240966796875e-05, + "model_forward_time": 0.029713869094848633, + "step": 21240 + }, + { + "epoch": 3.240966796875e-05, + "step": 21240, + "training_step_time": 0.10948562622070312 + }, + { + "epoch": 3.241119384765625e-05, + "model_forward_time": 0.02584075927734375, + "step": 21241 + }, + { + "epoch": 3.241119384765625e-05, + "step": 21241, + "training_step_time": 0.10787534713745117 + }, + { + "epoch": 3.24127197265625e-05, + "model_forward_time": 0.02519845962524414, + "step": 21242 + }, + { + "epoch": 3.24127197265625e-05, + "step": 21242, + "training_step_time": 0.10442304611206055 + }, + { + "epoch": 3.241424560546875e-05, + "model_forward_time": 0.025231599807739258, + "step": 21243 + }, + { + "epoch": 3.241424560546875e-05, + "step": 21243, + "training_step_time": 0.10607600212097168 + }, + { + "epoch": 3.2415771484375e-05, + "model_forward_time": 0.025231122970581055, + "step": 21244 + }, + { + "epoch": 3.2415771484375e-05, + "step": 21244, + "training_step_time": 0.10566067695617676 + }, + { + "epoch": 3.241729736328125e-05, + "model_forward_time": 0.025299072265625, + "step": 21245 + }, + { + "epoch": 3.241729736328125e-05, + "step": 21245, + "training_step_time": 0.10839056968688965 + }, + { + "epoch": 3.24188232421875e-05, + "model_forward_time": 0.02610015869140625, + "step": 21246 + }, + { + "epoch": 3.24188232421875e-05, + "step": 21246, + "training_step_time": 0.10614991188049316 + }, + { + "epoch": 3.242034912109375e-05, + "model_forward_time": 0.02570366859436035, + "step": 21247 + }, + { + "epoch": 3.242034912109375e-05, + "step": 21247, + "training_step_time": 0.10657525062561035 + }, + { + "epoch": 3.2421875e-05, + "model_forward_time": 0.02432560920715332, + "step": 21248 + }, + { + "epoch": 3.2421875e-05, + "step": 21248, + "training_step_time": 0.10465145111083984 + }, + { + "epoch": 3.242340087890625e-05, + "model_forward_time": 0.024507999420166016, + "step": 21249 + }, + { + "epoch": 3.242340087890625e-05, + "step": 21249, + "training_step_time": 0.10587787628173828 + }, + { + "epoch": 3.24249267578125e-05, + "grad_norm": 0.24440622329711914, + "learning_rate": 2.150959712448669e-05, + "loss": 0.0055, + "step": 21250 + }, + { + "epoch": 3.24249267578125e-05, + "model_forward_time": 0.024590253829956055, + "step": 21250 + }, + { + "epoch": 3.24249267578125e-05, + "step": 21250, + "training_step_time": 0.1047675609588623 + }, + { + "epoch": 3.242645263671875e-05, + "model_forward_time": 0.02554488182067871, + "step": 21251 + }, + { + "epoch": 3.242645263671875e-05, + "step": 21251, + "training_step_time": 0.13676238059997559 + }, + { + "epoch": 3.2427978515625e-05, + "model_forward_time": 0.02427053451538086, + "step": 21252 + }, + { + "epoch": 3.2427978515625e-05, + "step": 21252, + "training_step_time": 0.164292573928833 + }, + { + "epoch": 3.242950439453125e-05, + "model_forward_time": 0.02374124526977539, + "step": 21253 + }, + { + "epoch": 3.242950439453125e-05, + "step": 21253, + "training_step_time": 0.16121315956115723 + }, + { + "epoch": 3.24310302734375e-05, + "model_forward_time": 0.023733854293823242, + "step": 21254 + }, + { + "epoch": 3.24310302734375e-05, + "step": 21254, + "training_step_time": 0.13312387466430664 + }, + { + "epoch": 3.243255615234375e-05, + "model_forward_time": 0.023810148239135742, + "step": 21255 + }, + { + "epoch": 3.243255615234375e-05, + "step": 21255, + "training_step_time": 0.12884521484375 + }, + { + "epoch": 3.243408203125e-05, + "model_forward_time": 0.024776697158813477, + "step": 21256 + }, + { + "epoch": 3.243408203125e-05, + "step": 21256, + "training_step_time": 0.13691043853759766 + }, + { + "epoch": 3.243560791015625e-05, + "model_forward_time": 0.024858474731445312, + "step": 21257 + }, + { + "epoch": 3.243560791015625e-05, + "step": 21257, + "training_step_time": 0.20888304710388184 + }, + { + "epoch": 3.24371337890625e-05, + "model_forward_time": 0.025579452514648438, + "step": 21258 + }, + { + "epoch": 3.24371337890625e-05, + "step": 21258, + "training_step_time": 0.1198573112487793 + }, + { + "epoch": 3.243865966796875e-05, + "model_forward_time": 0.024273157119750977, + "step": 21259 + }, + { + "epoch": 3.243865966796875e-05, + "step": 21259, + "training_step_time": 0.10398983955383301 + }, + { + "epoch": 3.2440185546875e-05, + "grad_norm": 0.20188677310943604, + "learning_rate": 2.1464321615778422e-05, + "loss": 0.0104, + "step": 21260 + }, + { + "epoch": 3.2440185546875e-05, + "model_forward_time": 0.02530837059020996, + "step": 21260 + }, + { + "epoch": 3.2440185546875e-05, + "step": 21260, + "training_step_time": 0.11028385162353516 + }, + { + "epoch": 3.244171142578125e-05, + "model_forward_time": 0.02611231803894043, + "step": 21261 + }, + { + "epoch": 3.244171142578125e-05, + "step": 21261, + "training_step_time": 0.11213850975036621 + }, + { + "epoch": 3.24432373046875e-05, + "model_forward_time": 0.0264585018157959, + "step": 21262 + }, + { + "epoch": 3.24432373046875e-05, + "step": 21262, + "training_step_time": 0.11168622970581055 + }, + { + "epoch": 3.244476318359375e-05, + "model_forward_time": 0.028642892837524414, + "step": 21263 + }, + { + "epoch": 3.244476318359375e-05, + "step": 21263, + "training_step_time": 0.12099194526672363 + }, + { + "epoch": 3.24462890625e-05, + "model_forward_time": 0.02540135383605957, + "step": 21264 + }, + { + "epoch": 3.24462890625e-05, + "step": 21264, + "training_step_time": 0.11056232452392578 + }, + { + "epoch": 3.244781494140625e-05, + "model_forward_time": 0.025817394256591797, + "step": 21265 + }, + { + "epoch": 3.244781494140625e-05, + "step": 21265, + "training_step_time": 0.11344623565673828 + }, + { + "epoch": 3.24493408203125e-05, + "model_forward_time": 0.025115013122558594, + "step": 21266 + }, + { + "epoch": 3.24493408203125e-05, + "step": 21266, + "training_step_time": 0.11269712448120117 + }, + { + "epoch": 3.245086669921875e-05, + "model_forward_time": 0.02422499656677246, + "step": 21267 + }, + { + "epoch": 3.245086669921875e-05, + "step": 21267, + "training_step_time": 0.11496448516845703 + }, + { + "epoch": 3.2452392578125e-05, + "model_forward_time": 0.025621891021728516, + "step": 21268 + }, + { + "epoch": 3.2452392578125e-05, + "step": 21268, + "training_step_time": 0.10912036895751953 + }, + { + "epoch": 3.245391845703125e-05, + "model_forward_time": 0.024749755859375, + "step": 21269 + }, + { + "epoch": 3.245391845703125e-05, + "step": 21269, + "training_step_time": 0.14738202095031738 + }, + { + "epoch": 3.24554443359375e-05, + "grad_norm": 0.12327086180448532, + "learning_rate": 2.1419080780610123e-05, + "loss": 0.0101, + "step": 21270 + }, + { + "epoch": 3.24554443359375e-05, + "model_forward_time": 0.02468729019165039, + "step": 21270 + }, + { + "epoch": 3.24554443359375e-05, + "step": 21270, + "training_step_time": 0.15407252311706543 + }, + { + "epoch": 3.245697021484375e-05, + "model_forward_time": 0.024357318878173828, + "step": 21271 + }, + { + "epoch": 3.245697021484375e-05, + "step": 21271, + "training_step_time": 0.11011791229248047 + }, + { + "epoch": 3.245849609375e-05, + "model_forward_time": 0.02506709098815918, + "step": 21272 + }, + { + "epoch": 3.245849609375e-05, + "step": 21272, + "training_step_time": 0.1347203254699707 + }, + { + "epoch": 3.246002197265625e-05, + "model_forward_time": 0.02528691291809082, + "step": 21273 + }, + { + "epoch": 3.246002197265625e-05, + "step": 21273, + "training_step_time": 0.19986915588378906 + }, + { + "epoch": 3.24615478515625e-05, + "model_forward_time": 0.02436232566833496, + "step": 21274 + }, + { + "epoch": 3.24615478515625e-05, + "step": 21274, + "training_step_time": 0.16452765464782715 + }, + { + "epoch": 3.246307373046875e-05, + "model_forward_time": 0.024370670318603516, + "step": 21275 + }, + { + "epoch": 3.246307373046875e-05, + "step": 21275, + "training_step_time": 0.20297479629516602 + }, + { + "epoch": 3.2464599609375e-05, + "model_forward_time": 0.024593353271484375, + "step": 21276 + }, + { + "epoch": 3.2464599609375e-05, + "step": 21276, + "training_step_time": 0.14850211143493652 + }, + { + "epoch": 3.246612548828125e-05, + "model_forward_time": 0.0241851806640625, + "step": 21277 + }, + { + "epoch": 3.246612548828125e-05, + "step": 21277, + "training_step_time": 0.18338561058044434 + }, + { + "epoch": 3.24676513671875e-05, + "model_forward_time": 0.024279356002807617, + "step": 21278 + }, + { + "epoch": 3.24676513671875e-05, + "step": 21278, + "training_step_time": 0.13506317138671875 + }, + { + "epoch": 3.246917724609375e-05, + "model_forward_time": 0.02419424057006836, + "step": 21279 + }, + { + "epoch": 3.246917724609375e-05, + "step": 21279, + "training_step_time": 0.1097254753112793 + }, + { + "epoch": 3.2470703125e-05, + "grad_norm": 0.12808099389076233, + "learning_rate": 2.1373874673953685e-05, + "loss": 0.0085, + "step": 21280 + }, + { + "epoch": 3.2470703125e-05, + "model_forward_time": 0.025443553924560547, + "step": 21280 + }, + { + "epoch": 3.2470703125e-05, + "step": 21280, + "training_step_time": 0.11110568046569824 + }, + { + "epoch": 3.247222900390625e-05, + "model_forward_time": 0.025237560272216797, + "step": 21281 + }, + { + "epoch": 3.247222900390625e-05, + "step": 21281, + "training_step_time": 0.10890340805053711 + }, + { + "epoch": 3.24737548828125e-05, + "model_forward_time": 0.025832176208496094, + "step": 21282 + }, + { + "epoch": 3.24737548828125e-05, + "step": 21282, + "training_step_time": 0.11000609397888184 + }, + { + "epoch": 3.247528076171875e-05, + "model_forward_time": 0.026210784912109375, + "step": 21283 + }, + { + "epoch": 3.247528076171875e-05, + "step": 21283, + "training_step_time": 0.10967636108398438 + }, + { + "epoch": 3.2476806640625e-05, + "model_forward_time": 0.02518439292907715, + "step": 21284 + }, + { + "epoch": 3.2476806640625e-05, + "step": 21284, + "training_step_time": 0.1105353832244873 + }, + { + "epoch": 3.247833251953125e-05, + "model_forward_time": 0.025671958923339844, + "step": 21285 + }, + { + "epoch": 3.247833251953125e-05, + "step": 21285, + "training_step_time": 0.10935282707214355 + }, + { + "epoch": 3.24798583984375e-05, + "model_forward_time": 0.025464296340942383, + "step": 21286 + }, + { + "epoch": 3.24798583984375e-05, + "step": 21286, + "training_step_time": 0.11247420310974121 + }, + { + "epoch": 3.248138427734375e-05, + "model_forward_time": 0.025449037551879883, + "step": 21287 + }, + { + "epoch": 3.248138427734375e-05, + "step": 21287, + "training_step_time": 0.10718512535095215 + }, + { + "epoch": 3.248291015625e-05, + "model_forward_time": 0.024883031845092773, + "step": 21288 + }, + { + "epoch": 3.248291015625e-05, + "step": 21288, + "training_step_time": 0.11532115936279297 + }, + { + "epoch": 3.248443603515625e-05, + "model_forward_time": 0.0251157283782959, + "step": 21289 + }, + { + "epoch": 3.248443603515625e-05, + "step": 21289, + "training_step_time": 0.11313629150390625 + }, + { + "epoch": 3.24859619140625e-05, + "grad_norm": 0.40604090690612793, + "learning_rate": 2.1328703350738765e-05, + "loss": 0.0075, + "step": 21290 + }, + { + "epoch": 3.24859619140625e-05, + "model_forward_time": 0.02522730827331543, + "step": 21290 + }, + { + "epoch": 3.24859619140625e-05, + "step": 21290, + "training_step_time": 0.11601376533508301 + }, + { + "epoch": 3.248748779296875e-05, + "model_forward_time": 0.02581620216369629, + "step": 21291 + }, + { + "epoch": 3.248748779296875e-05, + "step": 21291, + "training_step_time": 0.10946178436279297 + }, + { + "epoch": 3.2489013671875e-05, + "model_forward_time": 0.025052547454833984, + "step": 21292 + }, + { + "epoch": 3.2489013671875e-05, + "step": 21292, + "training_step_time": 0.10922431945800781 + }, + { + "epoch": 3.249053955078125e-05, + "model_forward_time": 0.025054454803466797, + "step": 21293 + }, + { + "epoch": 3.249053955078125e-05, + "step": 21293, + "training_step_time": 0.1067967414855957 + }, + { + "epoch": 3.24920654296875e-05, + "model_forward_time": 0.025393009185791016, + "step": 21294 + }, + { + "epoch": 3.24920654296875e-05, + "step": 21294, + "training_step_time": 0.10677576065063477 + }, + { + "epoch": 3.249359130859375e-05, + "model_forward_time": 0.025377511978149414, + "step": 21295 + }, + { + "epoch": 3.249359130859375e-05, + "step": 21295, + "training_step_time": 0.10663557052612305 + }, + { + "epoch": 3.24951171875e-05, + "model_forward_time": 0.025272846221923828, + "step": 21296 + }, + { + "epoch": 3.24951171875e-05, + "step": 21296, + "training_step_time": 0.1996934413909912 + }, + { + "epoch": 3.249664306640625e-05, + "model_forward_time": 0.02434515953063965, + "step": 21297 + }, + { + "epoch": 3.249664306640625e-05, + "step": 21297, + "training_step_time": 0.12753582000732422 + }, + { + "epoch": 3.24981689453125e-05, + "model_forward_time": 0.023917675018310547, + "step": 21298 + }, + { + "epoch": 3.24981689453125e-05, + "step": 21298, + "training_step_time": 0.1106119155883789 + }, + { + "epoch": 3.249969482421875e-05, + "model_forward_time": 0.02521204948425293, + "step": 21299 + }, + { + "epoch": 3.249969482421875e-05, + "step": 21299, + "training_step_time": 0.11374855041503906 + }, + { + "epoch": 3.2501220703125e-05, + "grad_norm": 0.1312633454799652, + "learning_rate": 2.128356686585282e-05, + "loss": 0.0057, + "step": 21300 + }, + { + "epoch": 3.2501220703125e-05, + "model_forward_time": 0.025166034698486328, + "step": 21300 + }, + { + "epoch": 3.2501220703125e-05, + "step": 21300, + "training_step_time": 0.16726255416870117 + }, + { + "epoch": 3.250274658203125e-05, + "model_forward_time": 0.024360179901123047, + "step": 21301 + }, + { + "epoch": 3.250274658203125e-05, + "step": 21301, + "training_step_time": 0.15343666076660156 + }, + { + "epoch": 3.25042724609375e-05, + "model_forward_time": 0.024587154388427734, + "step": 21302 + }, + { + "epoch": 3.25042724609375e-05, + "step": 21302, + "training_step_time": 0.11338424682617188 + }, + { + "epoch": 3.250579833984375e-05, + "model_forward_time": 0.02434992790222168, + "step": 21303 + }, + { + "epoch": 3.250579833984375e-05, + "step": 21303, + "training_step_time": 0.10473442077636719 + }, + { + "epoch": 3.250732421875e-05, + "model_forward_time": 0.025153636932373047, + "step": 21304 + }, + { + "epoch": 3.250732421875e-05, + "step": 21304, + "training_step_time": 0.10920405387878418 + }, + { + "epoch": 3.250885009765625e-05, + "model_forward_time": 0.02488112449645996, + "step": 21305 + }, + { + "epoch": 3.250885009765625e-05, + "step": 21305, + "training_step_time": 0.11178779602050781 + }, + { + "epoch": 3.25103759765625e-05, + "model_forward_time": 0.025148630142211914, + "step": 21306 + }, + { + "epoch": 3.25103759765625e-05, + "step": 21306, + "training_step_time": 0.1092221736907959 + }, + { + "epoch": 3.251190185546875e-05, + "model_forward_time": 0.02528095245361328, + "step": 21307 + }, + { + "epoch": 3.251190185546875e-05, + "step": 21307, + "training_step_time": 0.11386346817016602 + }, + { + "epoch": 3.2513427734375e-05, + "model_forward_time": 0.025056123733520508, + "step": 21308 + }, + { + "epoch": 3.2513427734375e-05, + "step": 21308, + "training_step_time": 0.1161651611328125 + }, + { + "epoch": 3.251495361328125e-05, + "model_forward_time": 0.024778366088867188, + "step": 21309 + }, + { + "epoch": 3.251495361328125e-05, + "step": 21309, + "training_step_time": 0.1216585636138916 + }, + { + "epoch": 3.25164794921875e-05, + "grad_norm": 0.2531159818172455, + "learning_rate": 2.12384652741409e-05, + "loss": 0.0064, + "step": 21310 + }, + { + "epoch": 3.25164794921875e-05, + "model_forward_time": 0.025117158889770508, + "step": 21310 + }, + { + "epoch": 3.25164794921875e-05, + "step": 21310, + "training_step_time": 0.1188044548034668 + }, + { + "epoch": 3.251800537109375e-05, + "model_forward_time": 0.02592182159423828, + "step": 21311 + }, + { + "epoch": 3.251800537109375e-05, + "step": 21311, + "training_step_time": 0.11706972122192383 + }, + { + "epoch": 3.251953125e-05, + "model_forward_time": 0.024829387664794922, + "step": 21312 + }, + { + "epoch": 3.251953125e-05, + "step": 21312, + "training_step_time": 0.1062474250793457 + }, + { + "epoch": 3.252105712890625e-05, + "model_forward_time": 0.023712873458862305, + "step": 21313 + }, + { + "epoch": 3.252105712890625e-05, + "step": 21313, + "training_step_time": 0.14829206466674805 + }, + { + "epoch": 3.25225830078125e-05, + "model_forward_time": 0.024098873138427734, + "step": 21314 + }, + { + "epoch": 3.25225830078125e-05, + "step": 21314, + "training_step_time": 0.15225481986999512 + }, + { + "epoch": 3.252410888671875e-05, + "model_forward_time": 0.024515867233276367, + "step": 21315 + }, + { + "epoch": 3.252410888671875e-05, + "step": 21315, + "training_step_time": 0.11450743675231934 + }, + { + "epoch": 3.2525634765625e-05, + "model_forward_time": 0.025091886520385742, + "step": 21316 + }, + { + "epoch": 3.2525634765625e-05, + "step": 21316, + "training_step_time": 0.13332843780517578 + }, + { + "epoch": 3.252716064453125e-05, + "model_forward_time": 0.02563762664794922, + "step": 21317 + }, + { + "epoch": 3.252716064453125e-05, + "step": 21317, + "training_step_time": 0.20834064483642578 + }, + { + "epoch": 3.25286865234375e-05, + "model_forward_time": 0.025037527084350586, + "step": 21318 + }, + { + "epoch": 3.25286865234375e-05, + "step": 21318, + "training_step_time": 0.15656590461730957 + }, + { + "epoch": 3.253021240234375e-05, + "model_forward_time": 0.02461862564086914, + "step": 21319 + }, + { + "epoch": 3.253021240234375e-05, + "step": 21319, + "training_step_time": 0.18573975563049316 + }, + { + "epoch": 3.253173828125e-05, + "grad_norm": 0.11055553704500198, + "learning_rate": 2.1193398630405725e-05, + "loss": 0.0063, + "step": 21320 + }, + { + "epoch": 3.253173828125e-05, + "model_forward_time": 0.02436089515686035, + "step": 21320 + }, + { + "epoch": 3.253173828125e-05, + "step": 21320, + "training_step_time": 0.16140294075012207 + }, + { + "epoch": 3.253326416015625e-05, + "model_forward_time": 0.024593114852905273, + "step": 21321 + }, + { + "epoch": 3.253326416015625e-05, + "step": 21321, + "training_step_time": 0.19222140312194824 + }, + { + "epoch": 3.25347900390625e-05, + "model_forward_time": 0.025799989700317383, + "step": 21322 + }, + { + "epoch": 3.25347900390625e-05, + "step": 21322, + "training_step_time": 0.12839746475219727 + }, + { + "epoch": 3.253631591796875e-05, + "model_forward_time": 0.025096654891967773, + "step": 21323 + }, + { + "epoch": 3.253631591796875e-05, + "step": 21323, + "training_step_time": 0.10694599151611328 + }, + { + "epoch": 3.2537841796875e-05, + "model_forward_time": 0.025942564010620117, + "step": 21324 + }, + { + "epoch": 3.2537841796875e-05, + "step": 21324, + "training_step_time": 0.10702133178710938 + }, + { + "epoch": 3.253936767578125e-05, + "model_forward_time": 0.026082515716552734, + "step": 21325 + }, + { + "epoch": 3.253936767578125e-05, + "step": 21325, + "training_step_time": 0.10600829124450684 + }, + { + "epoch": 3.25408935546875e-05, + "model_forward_time": 0.02938365936279297, + "step": 21326 + }, + { + "epoch": 3.25408935546875e-05, + "step": 21326, + "training_step_time": 0.10964179039001465 + }, + { + "epoch": 3.254241943359375e-05, + "model_forward_time": 0.02550220489501953, + "step": 21327 + }, + { + "epoch": 3.254241943359375e-05, + "step": 21327, + "training_step_time": 0.11289072036743164 + }, + { + "epoch": 3.25439453125e-05, + "model_forward_time": 0.02670764923095703, + "step": 21328 + }, + { + "epoch": 3.25439453125e-05, + "step": 21328, + "training_step_time": 0.10724067687988281 + }, + { + "epoch": 3.254547119140625e-05, + "model_forward_time": 0.02556586265563965, + "step": 21329 + }, + { + "epoch": 3.254547119140625e-05, + "step": 21329, + "training_step_time": 0.10686612129211426 + }, + { + "epoch": 3.25469970703125e-05, + "grad_norm": 0.27071115374565125, + "learning_rate": 2.1148366989407496e-05, + "loss": 0.0147, + "step": 21330 + }, + { + "epoch": 3.25469970703125e-05, + "model_forward_time": 0.025428056716918945, + "step": 21330 + }, + { + "epoch": 3.25469970703125e-05, + "step": 21330, + "training_step_time": 0.10884976387023926 + }, + { + "epoch": 3.254852294921875e-05, + "model_forward_time": 0.02520751953125, + "step": 21331 + }, + { + "epoch": 3.254852294921875e-05, + "step": 21331, + "training_step_time": 0.1066279411315918 + }, + { + "epoch": 3.2550048828125e-05, + "model_forward_time": 0.025646448135375977, + "step": 21332 + }, + { + "epoch": 3.2550048828125e-05, + "step": 21332, + "training_step_time": 0.10484194755554199 + }, + { + "epoch": 3.255157470703125e-05, + "model_forward_time": 0.02533721923828125, + "step": 21333 + }, + { + "epoch": 3.255157470703125e-05, + "step": 21333, + "training_step_time": 0.10896992683410645 + }, + { + "epoch": 3.25531005859375e-05, + "model_forward_time": 0.026538610458374023, + "step": 21334 + }, + { + "epoch": 3.25531005859375e-05, + "step": 21334, + "training_step_time": 0.10889697074890137 + }, + { + "epoch": 3.255462646484375e-05, + "model_forward_time": 0.025992631912231445, + "step": 21335 + }, + { + "epoch": 3.255462646484375e-05, + "step": 21335, + "training_step_time": 0.10542559623718262 + }, + { + "epoch": 3.255615234375e-05, + "model_forward_time": 0.026656389236450195, + "step": 21336 + }, + { + "epoch": 3.255615234375e-05, + "step": 21336, + "training_step_time": 0.10625886917114258 + }, + { + "epoch": 3.255767822265625e-05, + "model_forward_time": 0.02653050422668457, + "step": 21337 + }, + { + "epoch": 3.255767822265625e-05, + "step": 21337, + "training_step_time": 0.10554289817810059 + }, + { + "epoch": 3.25592041015625e-05, + "model_forward_time": 0.027411699295043945, + "step": 21338 + }, + { + "epoch": 3.25592041015625e-05, + "step": 21338, + "training_step_time": 0.10778594017028809 + }, + { + "epoch": 3.256072998046875e-05, + "model_forward_time": 0.025847911834716797, + "step": 21339 + }, + { + "epoch": 3.256072998046875e-05, + "step": 21339, + "training_step_time": 0.10598349571228027 + }, + { + "epoch": 3.2562255859375e-05, + "grad_norm": 0.29129672050476074, + "learning_rate": 2.110337040586391e-05, + "loss": 0.0071, + "step": 21340 + }, + { + "epoch": 3.2562255859375e-05, + "model_forward_time": 0.02493000030517578, + "step": 21340 + }, + { + "epoch": 3.2562255859375e-05, + "step": 21340, + "training_step_time": 0.10450482368469238 + }, + { + "epoch": 3.256378173828125e-05, + "model_forward_time": 0.024988174438476562, + "step": 21341 + }, + { + "epoch": 3.256378173828125e-05, + "step": 21341, + "training_step_time": 0.1085507869720459 + }, + { + "epoch": 3.25653076171875e-05, + "model_forward_time": 0.025181293487548828, + "step": 21342 + }, + { + "epoch": 3.25653076171875e-05, + "step": 21342, + "training_step_time": 0.22105932235717773 + }, + { + "epoch": 3.256683349609375e-05, + "model_forward_time": 0.024350404739379883, + "step": 21343 + }, + { + "epoch": 3.256683349609375e-05, + "step": 21343, + "training_step_time": 0.17120885848999023 + }, + { + "epoch": 3.2568359375e-05, + "model_forward_time": 0.02548527717590332, + "step": 21344 + }, + { + "epoch": 3.2568359375e-05, + "step": 21344, + "training_step_time": 0.18340635299682617 + }, + { + "epoch": 3.256988525390625e-05, + "model_forward_time": 0.02452707290649414, + "step": 21345 + }, + { + "epoch": 3.256988525390625e-05, + "step": 21345, + "training_step_time": 0.1776583194732666 + }, + { + "epoch": 3.25714111328125e-05, + "model_forward_time": 0.024905920028686523, + "step": 21346 + }, + { + "epoch": 3.25714111328125e-05, + "step": 21346, + "training_step_time": 0.13362383842468262 + }, + { + "epoch": 3.257293701171875e-05, + "model_forward_time": 0.025040864944458008, + "step": 21347 + }, + { + "epoch": 3.257293701171875e-05, + "step": 21347, + "training_step_time": 0.11795878410339355 + }, + { + "epoch": 3.2574462890625e-05, + "model_forward_time": 0.025641679763793945, + "step": 21348 + }, + { + "epoch": 3.2574462890625e-05, + "step": 21348, + "training_step_time": 0.10856270790100098 + }, + { + "epoch": 3.257598876953125e-05, + "model_forward_time": 0.02622532844543457, + "step": 21349 + }, + { + "epoch": 3.257598876953125e-05, + "step": 21349, + "training_step_time": 0.10607528686523438 + }, + { + "epoch": 3.25775146484375e-05, + "grad_norm": 0.28593915700912476, + "learning_rate": 2.105840893445005e-05, + "loss": 0.0101, + "step": 21350 + }, + { + "epoch": 3.25775146484375e-05, + "model_forward_time": 0.025183439254760742, + "step": 21350 + }, + { + "epoch": 3.25775146484375e-05, + "step": 21350, + "training_step_time": 0.11247539520263672 + }, + { + "epoch": 3.257904052734375e-05, + "model_forward_time": 0.02522754669189453, + "step": 21351 + }, + { + "epoch": 3.257904052734375e-05, + "step": 21351, + "training_step_time": 0.1322019100189209 + }, + { + "epoch": 3.258056640625e-05, + "model_forward_time": 0.024935007095336914, + "step": 21352 + }, + { + "epoch": 3.258056640625e-05, + "step": 21352, + "training_step_time": 0.15845584869384766 + }, + { + "epoch": 3.258209228515625e-05, + "model_forward_time": 0.024411678314208984, + "step": 21353 + }, + { + "epoch": 3.258209228515625e-05, + "step": 21353, + "training_step_time": 0.1574704647064209 + }, + { + "epoch": 3.25836181640625e-05, + "model_forward_time": 0.02443408966064453, + "step": 21354 + }, + { + "epoch": 3.25836181640625e-05, + "step": 21354, + "training_step_time": 0.1367182731628418 + }, + { + "epoch": 3.258514404296875e-05, + "model_forward_time": 0.025136709213256836, + "step": 21355 + }, + { + "epoch": 3.258514404296875e-05, + "step": 21355, + "training_step_time": 0.1079854965209961 + }, + { + "epoch": 3.2586669921875e-05, + "model_forward_time": 0.02461099624633789, + "step": 21356 + }, + { + "epoch": 3.2586669921875e-05, + "step": 21356, + "training_step_time": 0.14814305305480957 + }, + { + "epoch": 3.258819580078125e-05, + "model_forward_time": 0.024613142013549805, + "step": 21357 + }, + { + "epoch": 3.258819580078125e-05, + "step": 21357, + "training_step_time": 0.15858173370361328 + }, + { + "epoch": 3.25897216796875e-05, + "model_forward_time": 0.024570941925048828, + "step": 21358 + }, + { + "epoch": 3.25897216796875e-05, + "step": 21358, + "training_step_time": 0.11369824409484863 + }, + { + "epoch": 3.259124755859375e-05, + "model_forward_time": 0.026026487350463867, + "step": 21359 + }, + { + "epoch": 3.259124755859375e-05, + "step": 21359, + "training_step_time": 0.13006186485290527 + }, + { + "epoch": 3.25927734375e-05, + "grad_norm": 0.20840445160865784, + "learning_rate": 2.1013482629798333e-05, + "loss": 0.0122, + "step": 21360 + }, + { + "epoch": 3.25927734375e-05, + "model_forward_time": 0.02498650550842285, + "step": 21360 + }, + { + "epoch": 3.25927734375e-05, + "step": 21360, + "training_step_time": 0.19704747200012207 + }, + { + "epoch": 3.259429931640625e-05, + "model_forward_time": 0.024827241897583008, + "step": 21361 + }, + { + "epoch": 3.259429931640625e-05, + "step": 21361, + "training_step_time": 0.15967965126037598 + }, + { + "epoch": 3.25958251953125e-05, + "model_forward_time": 0.02453780174255371, + "step": 21362 + }, + { + "epoch": 3.25958251953125e-05, + "step": 21362, + "training_step_time": 0.12873101234436035 + }, + { + "epoch": 3.259735107421875e-05, + "model_forward_time": 0.0241241455078125, + "step": 21363 + }, + { + "epoch": 3.259735107421875e-05, + "step": 21363, + "training_step_time": 0.11409163475036621 + }, + { + "epoch": 3.2598876953125e-05, + "model_forward_time": 0.026187658309936523, + "step": 21364 + }, + { + "epoch": 3.2598876953125e-05, + "step": 21364, + "training_step_time": 0.12126374244689941 + }, + { + "epoch": 3.260040283203125e-05, + "model_forward_time": 0.025059223175048828, + "step": 21365 + }, + { + "epoch": 3.260040283203125e-05, + "step": 21365, + "training_step_time": 0.11343240737915039 + }, + { + "epoch": 3.26019287109375e-05, + "model_forward_time": 0.02526068687438965, + "step": 21366 + }, + { + "epoch": 3.26019287109375e-05, + "step": 21366, + "training_step_time": 0.17711997032165527 + }, + { + "epoch": 3.260345458984375e-05, + "model_forward_time": 0.024739980697631836, + "step": 21367 + }, + { + "epoch": 3.260345458984375e-05, + "step": 21367, + "training_step_time": 0.11723589897155762 + }, + { + "epoch": 3.260498046875e-05, + "model_forward_time": 0.024399280548095703, + "step": 21368 + }, + { + "epoch": 3.260498046875e-05, + "step": 21368, + "training_step_time": 0.10583162307739258 + }, + { + "epoch": 3.260650634765625e-05, + "model_forward_time": 0.026609420776367188, + "step": 21369 + }, + { + "epoch": 3.260650634765625e-05, + "step": 21369, + "training_step_time": 0.10903167724609375 + }, + { + "epoch": 3.26080322265625e-05, + "grad_norm": 0.19517971575260162, + "learning_rate": 2.0968591546498488e-05, + "loss": 0.0091, + "step": 21370 + }, + { + "epoch": 3.26080322265625e-05, + "model_forward_time": 0.02515578269958496, + "step": 21370 + }, + { + "epoch": 3.26080322265625e-05, + "step": 21370, + "training_step_time": 0.10760831832885742 + }, + { + "epoch": 3.260955810546875e-05, + "model_forward_time": 0.025435686111450195, + "step": 21371 + }, + { + "epoch": 3.260955810546875e-05, + "step": 21371, + "training_step_time": 0.10597515106201172 + }, + { + "epoch": 3.2611083984375e-05, + "model_forward_time": 0.025447845458984375, + "step": 21372 + }, + { + "epoch": 3.2611083984375e-05, + "step": 21372, + "training_step_time": 0.10650849342346191 + }, + { + "epoch": 3.261260986328125e-05, + "model_forward_time": 0.0252225399017334, + "step": 21373 + }, + { + "epoch": 3.261260986328125e-05, + "step": 21373, + "training_step_time": 0.10647249221801758 + }, + { + "epoch": 3.26141357421875e-05, + "model_forward_time": 0.024986743927001953, + "step": 21374 + }, + { + "epoch": 3.26141357421875e-05, + "step": 21374, + "training_step_time": 0.10435152053833008 + }, + { + "epoch": 3.261566162109375e-05, + "model_forward_time": 0.025393009185791016, + "step": 21375 + }, + { + "epoch": 3.261566162109375e-05, + "step": 21375, + "training_step_time": 0.1047968864440918 + }, + { + "epoch": 3.26171875e-05, + "model_forward_time": 0.026475906372070312, + "step": 21376 + }, + { + "epoch": 3.26171875e-05, + "step": 21376, + "training_step_time": 0.10819530487060547 + }, + { + "epoch": 3.261871337890625e-05, + "model_forward_time": 0.026598691940307617, + "step": 21377 + }, + { + "epoch": 3.261871337890625e-05, + "step": 21377, + "training_step_time": 0.10600662231445312 + }, + { + "epoch": 3.26202392578125e-05, + "model_forward_time": 0.026185274124145508, + "step": 21378 + }, + { + "epoch": 3.26202392578125e-05, + "step": 21378, + "training_step_time": 0.10475277900695801 + }, + { + "epoch": 3.262176513671875e-05, + "model_forward_time": 0.02539825439453125, + "step": 21379 + }, + { + "epoch": 3.262176513671875e-05, + "step": 21379, + "training_step_time": 0.10705447196960449 + }, + { + "epoch": 3.2623291015625e-05, + "grad_norm": 0.16651000082492828, + "learning_rate": 2.09237357390974e-05, + "loss": 0.0067, + "step": 21380 + }, + { + "epoch": 3.2623291015625e-05, + "model_forward_time": 0.025920867919921875, + "step": 21380 + }, + { + "epoch": 3.2623291015625e-05, + "step": 21380, + "training_step_time": 0.10423707962036133 + }, + { + "epoch": 3.262481689453125e-05, + "model_forward_time": 0.02617025375366211, + "step": 21381 + }, + { + "epoch": 3.262481689453125e-05, + "step": 21381, + "training_step_time": 0.1067051887512207 + }, + { + "epoch": 3.26263427734375e-05, + "model_forward_time": 0.0254669189453125, + "step": 21382 + }, + { + "epoch": 3.26263427734375e-05, + "step": 21382, + "training_step_time": 0.10767602920532227 + }, + { + "epoch": 3.262786865234375e-05, + "model_forward_time": 0.02643275260925293, + "step": 21383 + }, + { + "epoch": 3.262786865234375e-05, + "step": 21383, + "training_step_time": 0.10576367378234863 + }, + { + "epoch": 3.262939453125e-05, + "model_forward_time": 0.02525043487548828, + "step": 21384 + }, + { + "epoch": 3.262939453125e-05, + "step": 21384, + "training_step_time": 0.1068429946899414 + }, + { + "epoch": 3.263092041015625e-05, + "model_forward_time": 0.025199174880981445, + "step": 21385 + }, + { + "epoch": 3.263092041015625e-05, + "step": 21385, + "training_step_time": 0.11375880241394043 + }, + { + "epoch": 3.26324462890625e-05, + "model_forward_time": 0.024149179458618164, + "step": 21386 + }, + { + "epoch": 3.26324462890625e-05, + "step": 21386, + "training_step_time": 0.18388938903808594 + }, + { + "epoch": 3.263397216796875e-05, + "model_forward_time": 0.025084495544433594, + "step": 21387 + }, + { + "epoch": 3.263397216796875e-05, + "step": 21387, + "training_step_time": 0.17194175720214844 + }, + { + "epoch": 3.2635498046875e-05, + "model_forward_time": 0.024611234664916992, + "step": 21388 + }, + { + "epoch": 3.2635498046875e-05, + "step": 21388, + "training_step_time": 0.13388395309448242 + }, + { + "epoch": 3.263702392578125e-05, + "model_forward_time": 0.024343490600585938, + "step": 21389 + }, + { + "epoch": 3.263702392578125e-05, + "step": 21389, + "training_step_time": 0.1585078239440918 + }, + { + "epoch": 3.26385498046875e-05, + "grad_norm": 0.1198514774441719, + "learning_rate": 2.0878915262099098e-05, + "loss": 0.0066, + "step": 21390 + }, + { + "epoch": 3.26385498046875e-05, + "model_forward_time": 0.025183439254760742, + "step": 21390 + }, + { + "epoch": 3.26385498046875e-05, + "step": 21390, + "training_step_time": 0.21726727485656738 + }, + { + "epoch": 3.264007568359375e-05, + "model_forward_time": 0.024637460708618164, + "step": 21391 + }, + { + "epoch": 3.264007568359375e-05, + "step": 21391, + "training_step_time": 0.1228179931640625 + }, + { + "epoch": 3.26416015625e-05, + "model_forward_time": 0.028346776962280273, + "step": 21392 + }, + { + "epoch": 3.26416015625e-05, + "step": 21392, + "training_step_time": 0.11328697204589844 + }, + { + "epoch": 3.264312744140625e-05, + "model_forward_time": 0.02584218978881836, + "step": 21393 + }, + { + "epoch": 3.264312744140625e-05, + "step": 21393, + "training_step_time": 0.11167502403259277 + }, + { + "epoch": 3.26446533203125e-05, + "model_forward_time": 0.025397062301635742, + "step": 21394 + }, + { + "epoch": 3.26446533203125e-05, + "step": 21394, + "training_step_time": 0.10777449607849121 + }, + { + "epoch": 3.264617919921875e-05, + "model_forward_time": 0.025455474853515625, + "step": 21395 + }, + { + "epoch": 3.264617919921875e-05, + "step": 21395, + "training_step_time": 0.10875272750854492 + }, + { + "epoch": 3.2647705078125e-05, + "model_forward_time": 0.025969743728637695, + "step": 21396 + }, + { + "epoch": 3.2647705078125e-05, + "step": 21396, + "training_step_time": 0.1058037281036377 + }, + { + "epoch": 3.264923095703125e-05, + "model_forward_time": 0.028711557388305664, + "step": 21397 + }, + { + "epoch": 3.264923095703125e-05, + "step": 21397, + "training_step_time": 0.10924124717712402 + }, + { + "epoch": 3.26507568359375e-05, + "model_forward_time": 0.026340484619140625, + "step": 21398 + }, + { + "epoch": 3.26507568359375e-05, + "step": 21398, + "training_step_time": 0.10995912551879883 + }, + { + "epoch": 3.265228271484375e-05, + "model_forward_time": 0.025932788848876953, + "step": 21399 + }, + { + "epoch": 3.265228271484375e-05, + "step": 21399, + "training_step_time": 0.11035537719726562 + }, + { + "epoch": 3.265380859375e-05, + "grad_norm": 0.15694105625152588, + "learning_rate": 2.0834130169964692e-05, + "loss": 0.0122, + "step": 21400 + }, + { + "epoch": 3.265380859375e-05, + "model_forward_time": 0.025883197784423828, + "step": 21400 + }, + { + "epoch": 3.265380859375e-05, + "step": 21400, + "training_step_time": 0.10556793212890625 + }, + { + "epoch": 3.265533447265625e-05, + "model_forward_time": 0.02433466911315918, + "step": 21401 + }, + { + "epoch": 3.265533447265625e-05, + "step": 21401, + "training_step_time": 0.15153145790100098 + }, + { + "epoch": 3.26568603515625e-05, + "model_forward_time": 0.024839401245117188, + "step": 21402 + }, + { + "epoch": 3.26568603515625e-05, + "step": 21402, + "training_step_time": 0.15307855606079102 + }, + { + "epoch": 3.265838623046875e-05, + "model_forward_time": 0.024536609649658203, + "step": 21403 + }, + { + "epoch": 3.265838623046875e-05, + "step": 21403, + "training_step_time": 0.10989999771118164 + }, + { + "epoch": 3.2659912109375e-05, + "model_forward_time": 0.026547670364379883, + "step": 21404 + }, + { + "epoch": 3.2659912109375e-05, + "step": 21404, + "training_step_time": 0.13157272338867188 + }, + { + "epoch": 3.266143798828125e-05, + "model_forward_time": 0.02541351318359375, + "step": 21405 + }, + { + "epoch": 3.266143798828125e-05, + "step": 21405, + "training_step_time": 0.12725067138671875 + }, + { + "epoch": 3.26629638671875e-05, + "model_forward_time": 0.026391983032226562, + "step": 21406 + }, + { + "epoch": 3.26629638671875e-05, + "step": 21406, + "training_step_time": 0.12491726875305176 + }, + { + "epoch": 3.266448974609375e-05, + "model_forward_time": 0.025368213653564453, + "step": 21407 + }, + { + "epoch": 3.266448974609375e-05, + "step": 21407, + "training_step_time": 0.22081971168518066 + }, + { + "epoch": 3.2666015625e-05, + "model_forward_time": 0.02415943145751953, + "step": 21408 + }, + { + "epoch": 3.2666015625e-05, + "step": 21408, + "training_step_time": 0.21675872802734375 + }, + { + "epoch": 3.266754150390625e-05, + "model_forward_time": 0.025788545608520508, + "step": 21409 + }, + { + "epoch": 3.266754150390625e-05, + "step": 21409, + "training_step_time": 0.13145160675048828 + }, + { + "epoch": 3.26690673828125e-05, + "grad_norm": 0.1490681767463684, + "learning_rate": 2.0789380517112272e-05, + "loss": 0.014, + "step": 21410 + }, + { + "epoch": 3.26690673828125e-05, + "model_forward_time": 0.023677825927734375, + "step": 21410 + }, + { + "epoch": 3.26690673828125e-05, + "step": 21410, + "training_step_time": 0.1896672248840332 + }, + { + "epoch": 3.267059326171875e-05, + "model_forward_time": 0.024270057678222656, + "step": 21411 + }, + { + "epoch": 3.267059326171875e-05, + "step": 21411, + "training_step_time": 0.13915061950683594 + }, + { + "epoch": 3.2672119140625e-05, + "model_forward_time": 0.024865150451660156, + "step": 21412 + }, + { + "epoch": 3.2672119140625e-05, + "step": 21412, + "training_step_time": 0.10713052749633789 + }, + { + "epoch": 3.267364501953125e-05, + "model_forward_time": 0.024958133697509766, + "step": 21413 + }, + { + "epoch": 3.267364501953125e-05, + "step": 21413, + "training_step_time": 0.10623764991760254 + }, + { + "epoch": 3.26751708984375e-05, + "model_forward_time": 0.025310754776000977, + "step": 21414 + }, + { + "epoch": 3.26751708984375e-05, + "step": 21414, + "training_step_time": 0.10644912719726562 + }, + { + "epoch": 3.267669677734375e-05, + "model_forward_time": 0.025640487670898438, + "step": 21415 + }, + { + "epoch": 3.267669677734375e-05, + "step": 21415, + "training_step_time": 0.1128394603729248 + }, + { + "epoch": 3.267822265625e-05, + "model_forward_time": 0.025661468505859375, + "step": 21416 + }, + { + "epoch": 3.267822265625e-05, + "step": 21416, + "training_step_time": 0.10508561134338379 + }, + { + "epoch": 3.267974853515625e-05, + "model_forward_time": 0.02522444725036621, + "step": 21417 + }, + { + "epoch": 3.267974853515625e-05, + "step": 21417, + "training_step_time": 0.10712575912475586 + }, + { + "epoch": 3.26812744140625e-05, + "model_forward_time": 0.025450944900512695, + "step": 21418 + }, + { + "epoch": 3.26812744140625e-05, + "step": 21418, + "training_step_time": 0.17090511322021484 + }, + { + "epoch": 3.268280029296875e-05, + "model_forward_time": 0.02460646629333496, + "step": 21419 + }, + { + "epoch": 3.268280029296875e-05, + "step": 21419, + "training_step_time": 0.18145108222961426 + }, + { + "epoch": 3.2684326171875e-05, + "grad_norm": 0.2113717496395111, + "learning_rate": 2.0744666357916925e-05, + "loss": 0.0089, + "step": 21420 + }, + { + "epoch": 3.2684326171875e-05, + "model_forward_time": 0.024824142456054688, + "step": 21420 + }, + { + "epoch": 3.2684326171875e-05, + "step": 21420, + "training_step_time": 0.16096091270446777 + }, + { + "epoch": 3.268585205078125e-05, + "model_forward_time": 0.024448394775390625, + "step": 21421 + }, + { + "epoch": 3.268585205078125e-05, + "step": 21421, + "training_step_time": 0.15123939514160156 + }, + { + "epoch": 3.26873779296875e-05, + "model_forward_time": 0.02460789680480957, + "step": 21422 + }, + { + "epoch": 3.26873779296875e-05, + "step": 21422, + "training_step_time": 0.13439440727233887 + }, + { + "epoch": 3.268890380859375e-05, + "model_forward_time": 0.02446150779724121, + "step": 21423 + }, + { + "epoch": 3.268890380859375e-05, + "step": 21423, + "training_step_time": 0.1273021697998047 + }, + { + "epoch": 3.26904296875e-05, + "model_forward_time": 0.025025367736816406, + "step": 21424 + }, + { + "epoch": 3.26904296875e-05, + "step": 21424, + "training_step_time": 0.12816119194030762 + }, + { + "epoch": 3.269195556640625e-05, + "model_forward_time": 0.025610923767089844, + "step": 21425 + }, + { + "epoch": 3.269195556640625e-05, + "step": 21425, + "training_step_time": 0.12122726440429688 + }, + { + "epoch": 3.26934814453125e-05, + "model_forward_time": 0.024606943130493164, + "step": 21426 + }, + { + "epoch": 3.26934814453125e-05, + "step": 21426, + "training_step_time": 0.11835193634033203 + }, + { + "epoch": 3.269500732421875e-05, + "model_forward_time": 0.025483369827270508, + "step": 21427 + }, + { + "epoch": 3.269500732421875e-05, + "step": 21427, + "training_step_time": 0.10801959037780762 + }, + { + "epoch": 3.2696533203125e-05, + "model_forward_time": 0.025312423706054688, + "step": 21428 + }, + { + "epoch": 3.2696533203125e-05, + "step": 21428, + "training_step_time": 0.1354517936706543 + }, + { + "epoch": 3.269805908203125e-05, + "model_forward_time": 0.02529454231262207, + "step": 21429 + }, + { + "epoch": 3.269805908203125e-05, + "step": 21429, + "training_step_time": 0.11032629013061523 + }, + { + "epoch": 3.26995849609375e-05, + "grad_norm": 0.1630994975566864, + "learning_rate": 2.0699987746710554e-05, + "loss": 0.0058, + "step": 21430 + }, + { + "epoch": 3.26995849609375e-05, + "model_forward_time": 0.0262143611907959, + "step": 21430 + }, + { + "epoch": 3.26995849609375e-05, + "step": 21430, + "training_step_time": 0.13472771644592285 + }, + { + "epoch": 3.270111083984375e-05, + "model_forward_time": 0.025525808334350586, + "step": 21431 + }, + { + "epoch": 3.270111083984375e-05, + "step": 21431, + "training_step_time": 0.15726399421691895 + }, + { + "epoch": 3.270263671875e-05, + "model_forward_time": 0.02477550506591797, + "step": 21432 + }, + { + "epoch": 3.270263671875e-05, + "step": 21432, + "training_step_time": 0.10753846168518066 + }, + { + "epoch": 3.270416259765625e-05, + "model_forward_time": 0.02564263343811035, + "step": 21433 + }, + { + "epoch": 3.270416259765625e-05, + "step": 21433, + "training_step_time": 0.11534905433654785 + }, + { + "epoch": 3.27056884765625e-05, + "model_forward_time": 0.027423858642578125, + "step": 21434 + }, + { + "epoch": 3.27056884765625e-05, + "step": 21434, + "training_step_time": 0.12099885940551758 + }, + { + "epoch": 3.270721435546875e-05, + "model_forward_time": 0.0255887508392334, + "step": 21435 + }, + { + "epoch": 3.270721435546875e-05, + "step": 21435, + "training_step_time": 0.10765242576599121 + }, + { + "epoch": 3.2708740234375e-05, + "model_forward_time": 0.025589466094970703, + "step": 21436 + }, + { + "epoch": 3.2708740234375e-05, + "step": 21436, + "training_step_time": 0.10562705993652344 + }, + { + "epoch": 3.271026611328125e-05, + "model_forward_time": 0.025283098220825195, + "step": 21437 + }, + { + "epoch": 3.271026611328125e-05, + "step": 21437, + "training_step_time": 0.10678625106811523 + }, + { + "epoch": 3.27117919921875e-05, + "model_forward_time": 0.02572464942932129, + "step": 21438 + }, + { + "epoch": 3.27117919921875e-05, + "step": 21438, + "training_step_time": 0.10535931587219238 + }, + { + "epoch": 3.271331787109375e-05, + "model_forward_time": 0.025243520736694336, + "step": 21439 + }, + { + "epoch": 3.271331787109375e-05, + "step": 21439, + "training_step_time": 0.10544896125793457 + }, + { + "epoch": 3.271484375e-05, + "grad_norm": 0.1422598659992218, + "learning_rate": 2.065534473778186e-05, + "loss": 0.0074, + "step": 21440 + }, + { + "epoch": 3.271484375e-05, + "model_forward_time": 0.02503371238708496, + "step": 21440 + }, + { + "epoch": 3.271484375e-05, + "step": 21440, + "training_step_time": 0.10764217376708984 + }, + { + "epoch": 3.271636962890625e-05, + "model_forward_time": 0.025027990341186523, + "step": 21441 + }, + { + "epoch": 3.271636962890625e-05, + "step": 21441, + "training_step_time": 0.12452244758605957 + }, + { + "epoch": 3.27178955078125e-05, + "model_forward_time": 0.025783777236938477, + "step": 21442 + }, + { + "epoch": 3.27178955078125e-05, + "step": 21442, + "training_step_time": 0.14392852783203125 + }, + { + "epoch": 3.271942138671875e-05, + "model_forward_time": 0.02419590950012207, + "step": 21443 + }, + { + "epoch": 3.271942138671875e-05, + "step": 21443, + "training_step_time": 0.10866641998291016 + }, + { + "epoch": 3.2720947265625e-05, + "model_forward_time": 0.02461099624633789, + "step": 21444 + }, + { + "epoch": 3.2720947265625e-05, + "step": 21444, + "training_step_time": 0.14781641960144043 + }, + { + "epoch": 3.272247314453125e-05, + "model_forward_time": 0.02529597282409668, + "step": 21445 + }, + { + "epoch": 3.272247314453125e-05, + "step": 21445, + "training_step_time": 0.15373706817626953 + }, + { + "epoch": 3.27239990234375e-05, + "model_forward_time": 0.02459120750427246, + "step": 21446 + }, + { + "epoch": 3.27239990234375e-05, + "step": 21446, + "training_step_time": 0.1206514835357666 + }, + { + "epoch": 3.272552490234375e-05, + "model_forward_time": 0.024644136428833008, + "step": 21447 + }, + { + "epoch": 3.272552490234375e-05, + "step": 21447, + "training_step_time": 0.12737655639648438 + }, + { + "epoch": 3.272705078125e-05, + "model_forward_time": 0.025829076766967773, + "step": 21448 + }, + { + "epoch": 3.272705078125e-05, + "step": 21448, + "training_step_time": 0.11434578895568848 + }, + { + "epoch": 3.272857666015625e-05, + "model_forward_time": 0.025661230087280273, + "step": 21449 + }, + { + "epoch": 3.272857666015625e-05, + "step": 21449, + "training_step_time": 0.13219356536865234 + }, + { + "epoch": 3.27301025390625e-05, + "grad_norm": 0.2459985464811325, + "learning_rate": 2.061073738537635e-05, + "loss": 0.0158, + "step": 21450 + }, + { + "epoch": 3.27301025390625e-05, + "model_forward_time": 0.024809837341308594, + "step": 21450 + }, + { + "epoch": 3.27301025390625e-05, + "step": 21450, + "training_step_time": 0.19077396392822266 + }, + { + "epoch": 3.273162841796875e-05, + "model_forward_time": 0.024216175079345703, + "step": 21451 + }, + { + "epoch": 3.273162841796875e-05, + "step": 21451, + "training_step_time": 0.16658997535705566 + }, + { + "epoch": 3.2733154296875e-05, + "model_forward_time": 0.025172948837280273, + "step": 21452 + }, + { + "epoch": 3.2733154296875e-05, + "step": 21452, + "training_step_time": 0.20341134071350098 + }, + { + "epoch": 3.273468017578125e-05, + "model_forward_time": 0.026114940643310547, + "step": 21453 + }, + { + "epoch": 3.273468017578125e-05, + "step": 21453, + "training_step_time": 0.11648964881896973 + }, + { + "epoch": 3.27362060546875e-05, + "model_forward_time": 0.025034189224243164, + "step": 21454 + }, + { + "epoch": 3.27362060546875e-05, + "step": 21454, + "training_step_time": 0.11602163314819336 + }, + { + "epoch": 3.273773193359375e-05, + "model_forward_time": 0.025682449340820312, + "step": 21455 + }, + { + "epoch": 3.273773193359375e-05, + "step": 21455, + "training_step_time": 0.10506415367126465 + }, + { + "epoch": 3.27392578125e-05, + "model_forward_time": 0.025560855865478516, + "step": 21456 + }, + { + "epoch": 3.27392578125e-05, + "step": 21456, + "training_step_time": 0.11066937446594238 + }, + { + "epoch": 3.274078369140625e-05, + "model_forward_time": 0.025326967239379883, + "step": 21457 + }, + { + "epoch": 3.274078369140625e-05, + "step": 21457, + "training_step_time": 0.10399842262268066 + }, + { + "epoch": 3.27423095703125e-05, + "model_forward_time": 0.0252687931060791, + "step": 21458 + }, + { + "epoch": 3.27423095703125e-05, + "step": 21458, + "training_step_time": 0.1062307357788086 + }, + { + "epoch": 3.274383544921875e-05, + "model_forward_time": 0.024988174438476562, + "step": 21459 + }, + { + "epoch": 3.274383544921875e-05, + "step": 21459, + "training_step_time": 0.10452914237976074 + }, + { + "epoch": 3.2745361328125e-05, + "grad_norm": 0.22268171608448029, + "learning_rate": 2.056616574369612e-05, + "loss": 0.0065, + "step": 21460 + }, + { + "epoch": 3.2745361328125e-05, + "model_forward_time": 0.025077342987060547, + "step": 21460 + }, + { + "epoch": 3.2745361328125e-05, + "step": 21460, + "training_step_time": 0.10692977905273438 + }, + { + "epoch": 3.274688720703125e-05, + "model_forward_time": 0.025075912475585938, + "step": 21461 + }, + { + "epoch": 3.274688720703125e-05, + "step": 21461, + "training_step_time": 0.10890984535217285 + }, + { + "epoch": 3.27484130859375e-05, + "model_forward_time": 0.025015830993652344, + "step": 21462 + }, + { + "epoch": 3.27484130859375e-05, + "step": 21462, + "training_step_time": 0.10702800750732422 + }, + { + "epoch": 3.274993896484375e-05, + "model_forward_time": 0.02605724334716797, + "step": 21463 + }, + { + "epoch": 3.274993896484375e-05, + "step": 21463, + "training_step_time": 0.11018133163452148 + }, + { + "epoch": 3.275146484375e-05, + "model_forward_time": 0.025979995727539062, + "step": 21464 + }, + { + "epoch": 3.275146484375e-05, + "step": 21464, + "training_step_time": 0.10545659065246582 + }, + { + "epoch": 3.275299072265625e-05, + "model_forward_time": 0.02634453773498535, + "step": 21465 + }, + { + "epoch": 3.275299072265625e-05, + "step": 21465, + "training_step_time": 0.10638213157653809 + }, + { + "epoch": 3.27545166015625e-05, + "model_forward_time": 0.026576995849609375, + "step": 21466 + }, + { + "epoch": 3.27545166015625e-05, + "step": 21466, + "training_step_time": 0.10454201698303223 + }, + { + "epoch": 3.275604248046875e-05, + "model_forward_time": 0.02646017074584961, + "step": 21467 + }, + { + "epoch": 3.275604248046875e-05, + "step": 21467, + "training_step_time": 0.10399127006530762 + }, + { + "epoch": 3.2757568359375e-05, + "model_forward_time": 0.024941205978393555, + "step": 21468 + }, + { + "epoch": 3.2757568359375e-05, + "step": 21468, + "training_step_time": 0.10338759422302246 + }, + { + "epoch": 3.275909423828125e-05, + "model_forward_time": 0.0251467227935791, + "step": 21469 + }, + { + "epoch": 3.275909423828125e-05, + "step": 21469, + "training_step_time": 0.10410785675048828 + }, + { + "epoch": 3.27606201171875e-05, + "grad_norm": 0.4208851754665375, + "learning_rate": 2.0521629866899966e-05, + "loss": 0.0148, + "step": 21470 + }, + { + "epoch": 3.27606201171875e-05, + "model_forward_time": 0.026107072830200195, + "step": 21470 + }, + { + "epoch": 3.27606201171875e-05, + "step": 21470, + "training_step_time": 0.10523295402526855 + }, + { + "epoch": 3.276214599609375e-05, + "model_forward_time": 0.02570939064025879, + "step": 21471 + }, + { + "epoch": 3.276214599609375e-05, + "step": 21471, + "training_step_time": 0.1092078685760498 + }, + { + "epoch": 3.2763671875e-05, + "model_forward_time": 0.02927708625793457, + "step": 21472 + }, + { + "epoch": 3.2763671875e-05, + "step": 21472, + "training_step_time": 0.11399626731872559 + }, + { + "epoch": 3.276519775390625e-05, + "model_forward_time": 0.02492213249206543, + "step": 21473 + }, + { + "epoch": 3.276519775390625e-05, + "step": 21473, + "training_step_time": 0.11025333404541016 + }, + { + "epoch": 3.27667236328125e-05, + "model_forward_time": 0.0254666805267334, + "step": 21474 + }, + { + "epoch": 3.27667236328125e-05, + "step": 21474, + "training_step_time": 0.16224908828735352 + }, + { + "epoch": 3.276824951171875e-05, + "model_forward_time": 0.025496959686279297, + "step": 21475 + }, + { + "epoch": 3.276824951171875e-05, + "step": 21475, + "training_step_time": 0.12044382095336914 + }, + { + "epoch": 3.2769775390625e-05, + "model_forward_time": 0.0246427059173584, + "step": 21476 + }, + { + "epoch": 3.2769775390625e-05, + "step": 21476, + "training_step_time": 0.12644195556640625 + }, + { + "epoch": 3.277130126953125e-05, + "model_forward_time": 0.026027917861938477, + "step": 21477 + }, + { + "epoch": 3.277130126953125e-05, + "step": 21477, + "training_step_time": 0.14521265029907227 + }, + { + "epoch": 3.27728271484375e-05, + "model_forward_time": 0.02507162094116211, + "step": 21478 + }, + { + "epoch": 3.27728271484375e-05, + "step": 21478, + "training_step_time": 0.12110519409179688 + }, + { + "epoch": 3.277435302734375e-05, + "model_forward_time": 0.025676727294921875, + "step": 21479 + }, + { + "epoch": 3.277435302734375e-05, + "step": 21479, + "training_step_time": 0.1261157989501953 + }, + { + "epoch": 3.277587890625e-05, + "grad_norm": 0.19327159225940704, + "learning_rate": 2.0477129809103147e-05, + "loss": 0.0061, + "step": 21480 + }, + { + "epoch": 3.277587890625e-05, + "model_forward_time": 0.025008201599121094, + "step": 21480 + }, + { + "epoch": 3.277587890625e-05, + "step": 21480, + "training_step_time": 0.11131691932678223 + }, + { + "epoch": 3.277740478515625e-05, + "model_forward_time": 0.025173425674438477, + "step": 21481 + }, + { + "epoch": 3.277740478515625e-05, + "step": 21481, + "training_step_time": 0.10843348503112793 + }, + { + "epoch": 3.27789306640625e-05, + "model_forward_time": 0.024735450744628906, + "step": 21482 + }, + { + "epoch": 3.27789306640625e-05, + "step": 21482, + "training_step_time": 0.11014866828918457 + }, + { + "epoch": 3.278045654296875e-05, + "model_forward_time": 0.025592565536499023, + "step": 21483 + }, + { + "epoch": 3.278045654296875e-05, + "step": 21483, + "training_step_time": 0.1072089672088623 + }, + { + "epoch": 3.2781982421875e-05, + "model_forward_time": 0.024317502975463867, + "step": 21484 + }, + { + "epoch": 3.2781982421875e-05, + "step": 21484, + "training_step_time": 0.10916829109191895 + }, + { + "epoch": 3.278350830078125e-05, + "model_forward_time": 0.024244070053100586, + "step": 21485 + }, + { + "epoch": 3.278350830078125e-05, + "step": 21485, + "training_step_time": 0.11597180366516113 + }, + { + "epoch": 3.27850341796875e-05, + "model_forward_time": 0.025903940200805664, + "step": 21486 + }, + { + "epoch": 3.27850341796875e-05, + "step": 21486, + "training_step_time": 0.11055207252502441 + }, + { + "epoch": 3.278656005859375e-05, + "model_forward_time": 0.026667118072509766, + "step": 21487 + }, + { + "epoch": 3.278656005859375e-05, + "step": 21487, + "training_step_time": 0.11065983772277832 + }, + { + "epoch": 3.27880859375e-05, + "model_forward_time": 0.025974035263061523, + "step": 21488 + }, + { + "epoch": 3.27880859375e-05, + "step": 21488, + "training_step_time": 0.10856246948242188 + }, + { + "epoch": 3.278961181640625e-05, + "model_forward_time": 0.025088787078857422, + "step": 21489 + }, + { + "epoch": 3.278961181640625e-05, + "step": 21489, + "training_step_time": 0.10731220245361328 + }, + { + "epoch": 3.27911376953125e-05, + "grad_norm": 0.3131754398345947, + "learning_rate": 2.0432665624377434e-05, + "loss": 0.0089, + "step": 21490 + }, + { + "epoch": 3.27911376953125e-05, + "model_forward_time": 0.025774717330932617, + "step": 21490 + }, + { + "epoch": 3.27911376953125e-05, + "step": 21490, + "training_step_time": 0.10640788078308105 + }, + { + "epoch": 3.279266357421875e-05, + "model_forward_time": 0.024432897567749023, + "step": 21491 + }, + { + "epoch": 3.279266357421875e-05, + "step": 21491, + "training_step_time": 0.1444835662841797 + }, + { + "epoch": 3.2794189453125e-05, + "model_forward_time": 0.024487018585205078, + "step": 21492 + }, + { + "epoch": 3.2794189453125e-05, + "step": 21492, + "training_step_time": 0.15711474418640137 + }, + { + "epoch": 3.279571533203125e-05, + "model_forward_time": 0.02386331558227539, + "step": 21493 + }, + { + "epoch": 3.279571533203125e-05, + "step": 21493, + "training_step_time": 0.11394095420837402 + }, + { + "epoch": 3.27972412109375e-05, + "model_forward_time": 0.024614810943603516, + "step": 21494 + }, + { + "epoch": 3.27972412109375e-05, + "step": 21494, + "training_step_time": 0.12711286544799805 + }, + { + "epoch": 3.279876708984375e-05, + "model_forward_time": 0.025324106216430664, + "step": 21495 + }, + { + "epoch": 3.279876708984375e-05, + "step": 21495, + "training_step_time": 0.20073747634887695 + }, + { + "epoch": 3.280029296875e-05, + "model_forward_time": 0.024139881134033203, + "step": 21496 + }, + { + "epoch": 3.280029296875e-05, + "step": 21496, + "training_step_time": 0.17986392974853516 + }, + { + "epoch": 3.280181884765625e-05, + "model_forward_time": 0.024300098419189453, + "step": 21497 + }, + { + "epoch": 3.280181884765625e-05, + "step": 21497, + "training_step_time": 0.19476556777954102 + }, + { + "epoch": 3.28033447265625e-05, + "model_forward_time": 0.02441716194152832, + "step": 21498 + }, + { + "epoch": 3.28033447265625e-05, + "step": 21498, + "training_step_time": 0.15674662590026855 + }, + { + "epoch": 3.280487060546875e-05, + "model_forward_time": 0.023775577545166016, + "step": 21499 + }, + { + "epoch": 3.280487060546875e-05, + "step": 21499, + "training_step_time": 0.17407631874084473 + }, + { + "epoch": 3.2806396484375e-05, + "grad_norm": 0.15037991106510162, + "learning_rate": 2.0388237366751006e-05, + "loss": 0.0075, + "step": 21500 + }, + { + "epoch": 3.2806396484375e-05, + "model_forward_time": 0.02744579315185547, + "step": 21500 + }, + { + "epoch": 3.2806396484375e-05, + "step": 21500, + "training_step_time": 0.18578815460205078 + }, + { + "epoch": 3.280792236328125e-05, + "model_forward_time": 0.02402353286743164, + "step": 21501 + }, + { + "epoch": 3.280792236328125e-05, + "step": 21501, + "training_step_time": 0.10668253898620605 + }, + { + "epoch": 3.28094482421875e-05, + "model_forward_time": 0.024646759033203125, + "step": 21502 + }, + { + "epoch": 3.28094482421875e-05, + "step": 21502, + "training_step_time": 0.10440564155578613 + }, + { + "epoch": 3.281097412109375e-05, + "model_forward_time": 0.026366710662841797, + "step": 21503 + }, + { + "epoch": 3.281097412109375e-05, + "step": 21503, + "training_step_time": 0.10871315002441406 + }, + { + "epoch": 3.28125e-05, + "model_forward_time": 0.025136947631835938, + "step": 21504 + }, + { + "epoch": 3.28125e-05, + "step": 21504, + "training_step_time": 0.10741662979125977 + }, + { + "epoch": 3.281402587890625e-05, + "model_forward_time": 0.025295257568359375, + "step": 21505 + }, + { + "epoch": 3.281402587890625e-05, + "step": 21505, + "training_step_time": 0.10708189010620117 + }, + { + "epoch": 3.28155517578125e-05, + "model_forward_time": 0.025191307067871094, + "step": 21506 + }, + { + "epoch": 3.28155517578125e-05, + "step": 21506, + "training_step_time": 0.10668087005615234 + }, + { + "epoch": 3.281707763671875e-05, + "model_forward_time": 0.025470972061157227, + "step": 21507 + }, + { + "epoch": 3.281707763671875e-05, + "step": 21507, + "training_step_time": 0.11404705047607422 + }, + { + "epoch": 3.2818603515625e-05, + "model_forward_time": 0.025552749633789062, + "step": 21508 + }, + { + "epoch": 3.2818603515625e-05, + "step": 21508, + "training_step_time": 0.10682559013366699 + }, + { + "epoch": 3.282012939453125e-05, + "model_forward_time": 0.02451491355895996, + "step": 21509 + }, + { + "epoch": 3.282012939453125e-05, + "step": 21509, + "training_step_time": 0.1108553409576416 + }, + { + "epoch": 3.28216552734375e-05, + "grad_norm": 0.1571519672870636, + "learning_rate": 2.0343845090208368e-05, + "loss": 0.0104, + "step": 21510 + }, + { + "epoch": 3.28216552734375e-05, + "model_forward_time": 0.024941682815551758, + "step": 21510 + }, + { + "epoch": 3.28216552734375e-05, + "step": 21510, + "training_step_time": 0.10613393783569336 + }, + { + "epoch": 3.282318115234375e-05, + "model_forward_time": 0.025030851364135742, + "step": 21511 + }, + { + "epoch": 3.282318115234375e-05, + "step": 21511, + "training_step_time": 0.10718464851379395 + }, + { + "epoch": 3.282470703125e-05, + "model_forward_time": 0.025182723999023438, + "step": 21512 + }, + { + "epoch": 3.282470703125e-05, + "step": 21512, + "training_step_time": 0.10704946517944336 + }, + { + "epoch": 3.282623291015625e-05, + "model_forward_time": 0.024796009063720703, + "step": 21513 + }, + { + "epoch": 3.282623291015625e-05, + "step": 21513, + "training_step_time": 0.1072993278503418 + }, + { + "epoch": 3.28277587890625e-05, + "model_forward_time": 0.024898767471313477, + "step": 21514 + }, + { + "epoch": 3.28277587890625e-05, + "step": 21514, + "training_step_time": 0.10870814323425293 + }, + { + "epoch": 3.282928466796875e-05, + "model_forward_time": 0.02528858184814453, + "step": 21515 + }, + { + "epoch": 3.282928466796875e-05, + "step": 21515, + "training_step_time": 0.10987639427185059 + }, + { + "epoch": 3.2830810546875e-05, + "model_forward_time": 0.02509593963623047, + "step": 21516 + }, + { + "epoch": 3.2830810546875e-05, + "step": 21516, + "training_step_time": 0.10849165916442871 + }, + { + "epoch": 3.283233642578125e-05, + "model_forward_time": 0.025395870208740234, + "step": 21517 + }, + { + "epoch": 3.283233642578125e-05, + "step": 21517, + "training_step_time": 0.15361380577087402 + }, + { + "epoch": 3.28338623046875e-05, + "model_forward_time": 0.025089025497436523, + "step": 21518 + }, + { + "epoch": 3.28338623046875e-05, + "step": 21518, + "training_step_time": 0.16307711601257324 + }, + { + "epoch": 3.283538818359375e-05, + "model_forward_time": 0.024187088012695312, + "step": 21519 + }, + { + "epoch": 3.283538818359375e-05, + "step": 21519, + "training_step_time": 0.1461658477783203 + }, + { + "epoch": 3.28369140625e-05, + "grad_norm": 0.13632987439632416, + "learning_rate": 2.0299488848690355e-05, + "loss": 0.0064, + "step": 21520 + }, + { + "epoch": 3.28369140625e-05, + "model_forward_time": 0.024236440658569336, + "step": 21520 + }, + { + "epoch": 3.28369140625e-05, + "step": 21520, + "training_step_time": 0.21790504455566406 + }, + { + "epoch": 3.283843994140625e-05, + "model_forward_time": 0.02570319175720215, + "step": 21521 + }, + { + "epoch": 3.283843994140625e-05, + "step": 21521, + "training_step_time": 0.1640787124633789 + }, + { + "epoch": 3.28399658203125e-05, + "model_forward_time": 0.024812698364257812, + "step": 21522 + }, + { + "epoch": 3.28399658203125e-05, + "step": 21522, + "training_step_time": 0.22839117050170898 + }, + { + "epoch": 3.284149169921875e-05, + "model_forward_time": 0.024095535278320312, + "step": 21523 + }, + { + "epoch": 3.284149169921875e-05, + "step": 21523, + "training_step_time": 0.11899065971374512 + }, + { + "epoch": 3.2843017578125e-05, + "model_forward_time": 0.025454282760620117, + "step": 21524 + }, + { + "epoch": 3.2843017578125e-05, + "step": 21524, + "training_step_time": 0.11448097229003906 + }, + { + "epoch": 3.284454345703125e-05, + "model_forward_time": 0.02493739128112793, + "step": 21525 + }, + { + "epoch": 3.284454345703125e-05, + "step": 21525, + "training_step_time": 0.11047554016113281 + }, + { + "epoch": 3.28460693359375e-05, + "model_forward_time": 0.024954557418823242, + "step": 21526 + }, + { + "epoch": 3.28460693359375e-05, + "step": 21526, + "training_step_time": 0.10754919052124023 + }, + { + "epoch": 3.284759521484375e-05, + "model_forward_time": 0.02541208267211914, + "step": 21527 + }, + { + "epoch": 3.284759521484375e-05, + "step": 21527, + "training_step_time": 0.10722613334655762 + }, + { + "epoch": 3.284912109375e-05, + "model_forward_time": 0.02524256706237793, + "step": 21528 + }, + { + "epoch": 3.284912109375e-05, + "step": 21528, + "training_step_time": 0.10635018348693848 + }, + { + "epoch": 3.285064697265625e-05, + "model_forward_time": 0.0256345272064209, + "step": 21529 + }, + { + "epoch": 3.285064697265625e-05, + "step": 21529, + "training_step_time": 0.10648846626281738 + }, + { + "epoch": 3.28521728515625e-05, + "grad_norm": 0.1119745522737503, + "learning_rate": 2.0255168696093968e-05, + "loss": 0.0051, + "step": 21530 + }, + { + "epoch": 3.28521728515625e-05, + "model_forward_time": 0.025252342224121094, + "step": 21530 + }, + { + "epoch": 3.28521728515625e-05, + "step": 21530, + "training_step_time": 0.10613727569580078 + }, + { + "epoch": 3.285369873046875e-05, + "model_forward_time": 0.025050640106201172, + "step": 21531 + }, + { + "epoch": 3.285369873046875e-05, + "step": 21531, + "training_step_time": 0.10724830627441406 + }, + { + "epoch": 3.2855224609375e-05, + "model_forward_time": 0.02527904510498047, + "step": 21532 + }, + { + "epoch": 3.2855224609375e-05, + "step": 21532, + "training_step_time": 0.10765910148620605 + }, + { + "epoch": 3.285675048828125e-05, + "model_forward_time": 0.025311946868896484, + "step": 21533 + }, + { + "epoch": 3.285675048828125e-05, + "step": 21533, + "training_step_time": 0.10719108581542969 + }, + { + "epoch": 3.28582763671875e-05, + "model_forward_time": 0.024045228958129883, + "step": 21534 + }, + { + "epoch": 3.28582763671875e-05, + "step": 21534, + "training_step_time": 0.14964890480041504 + }, + { + "epoch": 3.285980224609375e-05, + "model_forward_time": 0.025176048278808594, + "step": 21535 + }, + { + "epoch": 3.285980224609375e-05, + "step": 21535, + "training_step_time": 0.15727472305297852 + }, + { + "epoch": 3.2861328125e-05, + "model_forward_time": 0.025552749633789062, + "step": 21536 + }, + { + "epoch": 3.2861328125e-05, + "step": 21536, + "training_step_time": 0.11104750633239746 + }, + { + "epoch": 3.286285400390625e-05, + "model_forward_time": 0.027614593505859375, + "step": 21537 + }, + { + "epoch": 3.286285400390625e-05, + "step": 21537, + "training_step_time": 0.132310152053833 + }, + { + "epoch": 3.28643798828125e-05, + "model_forward_time": 0.02612757682800293, + "step": 21538 + }, + { + "epoch": 3.28643798828125e-05, + "step": 21538, + "training_step_time": 0.19652366638183594 + }, + { + "epoch": 3.286590576171875e-05, + "model_forward_time": 0.024497032165527344, + "step": 21539 + }, + { + "epoch": 3.286590576171875e-05, + "step": 21539, + "training_step_time": 0.13765621185302734 + }, + { + "epoch": 3.2867431640625e-05, + "grad_norm": 0.1564493030309677, + "learning_rate": 2.0210884686272368e-05, + "loss": 0.0067, + "step": 21540 + }, + { + "epoch": 3.2867431640625e-05, + "model_forward_time": 0.0253298282623291, + "step": 21540 + }, + { + "epoch": 3.2867431640625e-05, + "step": 21540, + "training_step_time": 0.11155390739440918 + }, + { + "epoch": 3.286895751953125e-05, + "model_forward_time": 0.025160551071166992, + "step": 21541 + }, + { + "epoch": 3.286895751953125e-05, + "step": 21541, + "training_step_time": 0.10631823539733887 + }, + { + "epoch": 3.28704833984375e-05, + "model_forward_time": 0.025925159454345703, + "step": 21542 + }, + { + "epoch": 3.28704833984375e-05, + "step": 21542, + "training_step_time": 0.11331057548522949 + }, + { + "epoch": 3.287200927734375e-05, + "model_forward_time": 0.029433250427246094, + "step": 21543 + }, + { + "epoch": 3.287200927734375e-05, + "step": 21543, + "training_step_time": 0.1258692741394043 + }, + { + "epoch": 3.287353515625e-05, + "model_forward_time": 0.025684118270874023, + "step": 21544 + }, + { + "epoch": 3.287353515625e-05, + "step": 21544, + "training_step_time": 0.17607998847961426 + }, + { + "epoch": 3.287506103515625e-05, + "model_forward_time": 0.02514934539794922, + "step": 21545 + }, + { + "epoch": 3.287506103515625e-05, + "step": 21545, + "training_step_time": 0.1794416904449463 + }, + { + "epoch": 3.28765869140625e-05, + "model_forward_time": 0.024749040603637695, + "step": 21546 + }, + { + "epoch": 3.28765869140625e-05, + "step": 21546, + "training_step_time": 0.10444402694702148 + }, + { + "epoch": 3.287811279296875e-05, + "model_forward_time": 0.024876117706298828, + "step": 21547 + }, + { + "epoch": 3.287811279296875e-05, + "step": 21547, + "training_step_time": 0.10263228416442871 + }, + { + "epoch": 3.2879638671875e-05, + "model_forward_time": 0.02537703514099121, + "step": 21548 + }, + { + "epoch": 3.2879638671875e-05, + "step": 21548, + "training_step_time": 0.10442209243774414 + }, + { + "epoch": 3.288116455078125e-05, + "model_forward_time": 0.02559947967529297, + "step": 21549 + }, + { + "epoch": 3.288116455078125e-05, + "step": 21549, + "training_step_time": 0.1045846939086914 + }, + { + "epoch": 3.28826904296875e-05, + "grad_norm": 0.2632162868976593, + "learning_rate": 2.0166636873034805e-05, + "loss": 0.0069, + "step": 21550 + }, + { + "epoch": 3.28826904296875e-05, + "model_forward_time": 0.024941682815551758, + "step": 21550 + }, + { + "epoch": 3.28826904296875e-05, + "step": 21550, + "training_step_time": 0.10779309272766113 + }, + { + "epoch": 3.288421630859375e-05, + "model_forward_time": 0.0252687931060791, + "step": 21551 + }, + { + "epoch": 3.288421630859375e-05, + "step": 21551, + "training_step_time": 0.11011409759521484 + }, + { + "epoch": 3.28857421875e-05, + "model_forward_time": 0.02561020851135254, + "step": 21552 + }, + { + "epoch": 3.28857421875e-05, + "step": 21552, + "training_step_time": 0.12099838256835938 + }, + { + "epoch": 3.288726806640625e-05, + "model_forward_time": 0.025429248809814453, + "step": 21553 + }, + { + "epoch": 3.288726806640625e-05, + "step": 21553, + "training_step_time": 0.12843966484069824 + }, + { + "epoch": 3.28887939453125e-05, + "model_forward_time": 0.025462865829467773, + "step": 21554 + }, + { + "epoch": 3.28887939453125e-05, + "step": 21554, + "training_step_time": 0.13503742218017578 + }, + { + "epoch": 3.289031982421875e-05, + "model_forward_time": 0.02446126937866211, + "step": 21555 + }, + { + "epoch": 3.289031982421875e-05, + "step": 21555, + "training_step_time": 0.12807846069335938 + }, + { + "epoch": 3.2891845703125e-05, + "model_forward_time": 0.02577686309814453, + "step": 21556 + }, + { + "epoch": 3.2891845703125e-05, + "step": 21556, + "training_step_time": 0.12390518188476562 + }, + { + "epoch": 3.289337158203125e-05, + "model_forward_time": 0.025543212890625, + "step": 21557 + }, + { + "epoch": 3.289337158203125e-05, + "step": 21557, + "training_step_time": 0.11790609359741211 + }, + { + "epoch": 3.28948974609375e-05, + "model_forward_time": 0.026508808135986328, + "step": 21558 + }, + { + "epoch": 3.28948974609375e-05, + "step": 21558, + "training_step_time": 0.11689615249633789 + }, + { + "epoch": 3.289642333984375e-05, + "model_forward_time": 0.025481462478637695, + "step": 21559 + }, + { + "epoch": 3.289642333984375e-05, + "step": 21559, + "training_step_time": 0.11307072639465332 + }, + { + "epoch": 3.289794921875e-05, + "grad_norm": 0.10203037410974503, + "learning_rate": 2.0122425310146542e-05, + "loss": 0.0062, + "step": 21560 + }, + { + "epoch": 3.289794921875e-05, + "model_forward_time": 0.025548934936523438, + "step": 21560 + }, + { + "epoch": 3.289794921875e-05, + "step": 21560, + "training_step_time": 0.11379742622375488 + }, + { + "epoch": 3.289947509765625e-05, + "model_forward_time": 0.02515411376953125, + "step": 21561 + }, + { + "epoch": 3.289947509765625e-05, + "step": 21561, + "training_step_time": 0.10952639579772949 + }, + { + "epoch": 3.29010009765625e-05, + "model_forward_time": 0.025330543518066406, + "step": 21562 + }, + { + "epoch": 3.29010009765625e-05, + "step": 21562, + "training_step_time": 0.10773968696594238 + }, + { + "epoch": 3.290252685546875e-05, + "model_forward_time": 0.025376319885253906, + "step": 21563 + }, + { + "epoch": 3.290252685546875e-05, + "step": 21563, + "training_step_time": 0.17806482315063477 + }, + { + "epoch": 3.2904052734375e-05, + "model_forward_time": 0.02465987205505371, + "step": 21564 + }, + { + "epoch": 3.2904052734375e-05, + "step": 21564, + "training_step_time": 0.11498379707336426 + }, + { + "epoch": 3.290557861328125e-05, + "model_forward_time": 0.025045156478881836, + "step": 21565 + }, + { + "epoch": 3.290557861328125e-05, + "step": 21565, + "training_step_time": 0.13141131401062012 + }, + { + "epoch": 3.29071044921875e-05, + "model_forward_time": 0.02486419677734375, + "step": 21566 + }, + { + "epoch": 3.29071044921875e-05, + "step": 21566, + "training_step_time": 0.15852618217468262 + }, + { + "epoch": 3.290863037109375e-05, + "model_forward_time": 0.023921966552734375, + "step": 21567 + }, + { + "epoch": 3.290863037109375e-05, + "step": 21567, + "training_step_time": 0.22017359733581543 + }, + { + "epoch": 3.291015625e-05, + "model_forward_time": 0.024416208267211914, + "step": 21568 + }, + { + "epoch": 3.291015625e-05, + "step": 21568, + "training_step_time": 0.11817550659179688 + }, + { + "epoch": 3.291168212890625e-05, + "model_forward_time": 0.02428603172302246, + "step": 21569 + }, + { + "epoch": 3.291168212890625e-05, + "step": 21569, + "training_step_time": 0.10556983947753906 + }, + { + "epoch": 3.29132080078125e-05, + "grad_norm": 0.15802134573459625, + "learning_rate": 2.0078250051328784e-05, + "loss": 0.0063, + "step": 21570 + }, + { + "epoch": 3.29132080078125e-05, + "model_forward_time": 0.024266958236694336, + "step": 21570 + }, + { + "epoch": 3.29132080078125e-05, + "step": 21570, + "training_step_time": 0.10771489143371582 + }, + { + "epoch": 3.291473388671875e-05, + "model_forward_time": 0.02455925941467285, + "step": 21571 + }, + { + "epoch": 3.291473388671875e-05, + "step": 21571, + "training_step_time": 0.10815548896789551 + }, + { + "epoch": 3.2916259765625e-05, + "model_forward_time": 0.025177955627441406, + "step": 21572 + }, + { + "epoch": 3.2916259765625e-05, + "step": 21572, + "training_step_time": 0.10911297798156738 + }, + { + "epoch": 3.291778564453125e-05, + "model_forward_time": 0.025167226791381836, + "step": 21573 + }, + { + "epoch": 3.291778564453125e-05, + "step": 21573, + "training_step_time": 0.10740137100219727 + }, + { + "epoch": 3.29193115234375e-05, + "model_forward_time": 0.025327205657958984, + "step": 21574 + }, + { + "epoch": 3.29193115234375e-05, + "step": 21574, + "training_step_time": 0.10699105262756348 + }, + { + "epoch": 3.292083740234375e-05, + "model_forward_time": 0.02555680274963379, + "step": 21575 + }, + { + "epoch": 3.292083740234375e-05, + "step": 21575, + "training_step_time": 0.10927867889404297 + }, + { + "epoch": 3.292236328125e-05, + "model_forward_time": 0.02524566650390625, + "step": 21576 + }, + { + "epoch": 3.292236328125e-05, + "step": 21576, + "training_step_time": 0.10761380195617676 + }, + { + "epoch": 3.292388916015625e-05, + "model_forward_time": 0.025261640548706055, + "step": 21577 + }, + { + "epoch": 3.292388916015625e-05, + "step": 21577, + "training_step_time": 0.11361861228942871 + }, + { + "epoch": 3.29254150390625e-05, + "model_forward_time": 0.025734663009643555, + "step": 21578 + }, + { + "epoch": 3.29254150390625e-05, + "step": 21578, + "training_step_time": 0.10715246200561523 + }, + { + "epoch": 3.292694091796875e-05, + "model_forward_time": 0.025177001953125, + "step": 21579 + }, + { + "epoch": 3.292694091796875e-05, + "step": 21579, + "training_step_time": 0.1461353302001953 + }, + { + "epoch": 3.2928466796875e-05, + "grad_norm": 0.10607799142599106, + "learning_rate": 2.0034111150258666e-05, + "loss": 0.005, + "step": 21580 + }, + { + "epoch": 3.2928466796875e-05, + "model_forward_time": 0.02518153190612793, + "step": 21580 + }, + { + "epoch": 3.2928466796875e-05, + "step": 21580, + "training_step_time": 0.1569075584411621 + }, + { + "epoch": 3.292999267578125e-05, + "model_forward_time": 0.02855396270751953, + "step": 21581 + }, + { + "epoch": 3.292999267578125e-05, + "step": 21581, + "training_step_time": 0.11198163032531738 + }, + { + "epoch": 3.29315185546875e-05, + "model_forward_time": 0.025858402252197266, + "step": 21582 + }, + { + "epoch": 3.29315185546875e-05, + "step": 21582, + "training_step_time": 0.13446044921875 + }, + { + "epoch": 3.293304443359375e-05, + "model_forward_time": 0.02644944190979004, + "step": 21583 + }, + { + "epoch": 3.293304443359375e-05, + "step": 21583, + "training_step_time": 0.20357203483581543 + }, + { + "epoch": 3.29345703125e-05, + "model_forward_time": 0.025226831436157227, + "step": 21584 + }, + { + "epoch": 3.29345703125e-05, + "step": 21584, + "training_step_time": 0.13934803009033203 + }, + { + "epoch": 3.293609619140625e-05, + "model_forward_time": 0.023853778839111328, + "step": 21585 + }, + { + "epoch": 3.293609619140625e-05, + "step": 21585, + "training_step_time": 0.19888639450073242 + }, + { + "epoch": 3.29376220703125e-05, + "model_forward_time": 0.024805545806884766, + "step": 21586 + }, + { + "epoch": 3.29376220703125e-05, + "step": 21586, + "training_step_time": 0.16363954544067383 + }, + { + "epoch": 3.293914794921875e-05, + "model_forward_time": 0.02881765365600586, + "step": 21587 + }, + { + "epoch": 3.293914794921875e-05, + "step": 21587, + "training_step_time": 0.17384982109069824 + }, + { + "epoch": 3.2940673828125e-05, + "model_forward_time": 0.02469921112060547, + "step": 21588 + }, + { + "epoch": 3.2940673828125e-05, + "step": 21588, + "training_step_time": 0.19984817504882812 + }, + { + "epoch": 3.294219970703125e-05, + "model_forward_time": 0.024837255477905273, + "step": 21589 + }, + { + "epoch": 3.294219970703125e-05, + "step": 21589, + "training_step_time": 0.11052584648132324 + }, + { + "epoch": 3.29437255859375e-05, + "grad_norm": 0.18321019411087036, + "learning_rate": 1.999000866056908e-05, + "loss": 0.0109, + "step": 21590 + }, + { + "epoch": 3.29437255859375e-05, + "model_forward_time": 0.02471613883972168, + "step": 21590 + }, + { + "epoch": 3.29437255859375e-05, + "step": 21590, + "training_step_time": 0.10564446449279785 + }, + { + "epoch": 3.294525146484375e-05, + "model_forward_time": 0.0256192684173584, + "step": 21591 + }, + { + "epoch": 3.294525146484375e-05, + "step": 21591, + "training_step_time": 0.10926580429077148 + }, + { + "epoch": 3.294677734375e-05, + "model_forward_time": 0.026243209838867188, + "step": 21592 + }, + { + "epoch": 3.294677734375e-05, + "step": 21592, + "training_step_time": 0.10800313949584961 + }, + { + "epoch": 3.294830322265625e-05, + "model_forward_time": 0.025821924209594727, + "step": 21593 + }, + { + "epoch": 3.294830322265625e-05, + "step": 21593, + "training_step_time": 0.13608264923095703 + }, + { + "epoch": 3.29498291015625e-05, + "model_forward_time": 0.025051355361938477, + "step": 21594 + }, + { + "epoch": 3.29498291015625e-05, + "step": 21594, + "training_step_time": 0.1742253303527832 + }, + { + "epoch": 3.295135498046875e-05, + "model_forward_time": 0.027420759201049805, + "step": 21595 + }, + { + "epoch": 3.295135498046875e-05, + "step": 21595, + "training_step_time": 0.16759419441223145 + }, + { + "epoch": 3.2952880859375e-05, + "model_forward_time": 0.02770686149597168, + "step": 21596 + }, + { + "epoch": 3.2952880859375e-05, + "step": 21596, + "training_step_time": 0.1587679386138916 + }, + { + "epoch": 3.295440673828125e-05, + "model_forward_time": 0.02574634552001953, + "step": 21597 + }, + { + "epoch": 3.295440673828125e-05, + "step": 21597, + "training_step_time": 0.14095616340637207 + }, + { + "epoch": 3.29559326171875e-05, + "model_forward_time": 0.025413036346435547, + "step": 21598 + }, + { + "epoch": 3.29559326171875e-05, + "step": 21598, + "training_step_time": 0.12986159324645996 + }, + { + "epoch": 3.295745849609375e-05, + "model_forward_time": 0.024929523468017578, + "step": 21599 + }, + { + "epoch": 3.295745849609375e-05, + "step": 21599, + "training_step_time": 0.11888527870178223 + }, + { + "epoch": 3.2958984375e-05, + "grad_norm": 0.13645771145820618, + "learning_rate": 1.9945942635848748e-05, + "loss": 0.0053, + "step": 21600 + }, + { + "epoch": 3.2958984375e-05, + "model_forward_time": 0.02456521987915039, + "step": 21600 + }, + { + "epoch": 3.2958984375e-05, + "step": 21600, + "training_step_time": 0.12133932113647461 + }, + { + "epoch": 3.296051025390625e-05, + "model_forward_time": 0.02410602569580078, + "step": 21601 + }, + { + "epoch": 3.296051025390625e-05, + "step": 21601, + "training_step_time": 0.10464000701904297 + }, + { + "epoch": 3.29620361328125e-05, + "model_forward_time": 0.02534961700439453, + "step": 21602 + }, + { + "epoch": 3.29620361328125e-05, + "step": 21602, + "training_step_time": 0.10602521896362305 + }, + { + "epoch": 3.296356201171875e-05, + "model_forward_time": 0.025056123733520508, + "step": 21603 + }, + { + "epoch": 3.296356201171875e-05, + "step": 21603, + "training_step_time": 0.1026604175567627 + }, + { + "epoch": 3.2965087890625e-05, + "model_forward_time": 0.025554656982421875, + "step": 21604 + }, + { + "epoch": 3.2965087890625e-05, + "step": 21604, + "training_step_time": 0.10693025588989258 + }, + { + "epoch": 3.296661376953125e-05, + "model_forward_time": 0.02476644515991211, + "step": 21605 + }, + { + "epoch": 3.296661376953125e-05, + "step": 21605, + "training_step_time": 0.10554242134094238 + }, + { + "epoch": 3.29681396484375e-05, + "model_forward_time": 0.025457382202148438, + "step": 21606 + }, + { + "epoch": 3.29681396484375e-05, + "step": 21606, + "training_step_time": 0.15619301795959473 + }, + { + "epoch": 3.296966552734375e-05, + "model_forward_time": 0.025591611862182617, + "step": 21607 + }, + { + "epoch": 3.296966552734375e-05, + "step": 21607, + "training_step_time": 0.12072134017944336 + }, + { + "epoch": 3.297119140625e-05, + "model_forward_time": 0.025662660598754883, + "step": 21608 + }, + { + "epoch": 3.297119140625e-05, + "step": 21608, + "training_step_time": 0.12965917587280273 + }, + { + "epoch": 3.297271728515625e-05, + "model_forward_time": 0.025268077850341797, + "step": 21609 + }, + { + "epoch": 3.297271728515625e-05, + "step": 21609, + "training_step_time": 0.16080689430236816 + }, + { + "epoch": 3.29742431640625e-05, + "grad_norm": 0.19638510048389435, + "learning_rate": 1.9901913129642024e-05, + "loss": 0.011, + "step": 21610 + }, + { + "epoch": 3.29742431640625e-05, + "model_forward_time": 0.024733543395996094, + "step": 21610 + }, + { + "epoch": 3.29742431640625e-05, + "step": 21610, + "training_step_time": 0.22168231010437012 + }, + { + "epoch": 3.297576904296875e-05, + "model_forward_time": 0.02540135383605957, + "step": 21611 + }, + { + "epoch": 3.297576904296875e-05, + "step": 21611, + "training_step_time": 0.11881875991821289 + }, + { + "epoch": 3.2977294921875e-05, + "model_forward_time": 0.024294614791870117, + "step": 21612 + }, + { + "epoch": 3.2977294921875e-05, + "step": 21612, + "training_step_time": 0.10375833511352539 + }, + { + "epoch": 3.297882080078125e-05, + "model_forward_time": 0.025632381439208984, + "step": 21613 + }, + { + "epoch": 3.297882080078125e-05, + "step": 21613, + "training_step_time": 0.10560297966003418 + }, + { + "epoch": 3.29803466796875e-05, + "model_forward_time": 0.024953126907348633, + "step": 21614 + }, + { + "epoch": 3.29803466796875e-05, + "step": 21614, + "training_step_time": 0.1084756851196289 + }, + { + "epoch": 3.298187255859375e-05, + "model_forward_time": 0.025233745574951172, + "step": 21615 + }, + { + "epoch": 3.298187255859375e-05, + "step": 21615, + "training_step_time": 0.10884428024291992 + }, + { + "epoch": 3.29833984375e-05, + "model_forward_time": 0.02488112449645996, + "step": 21616 + }, + { + "epoch": 3.29833984375e-05, + "step": 21616, + "training_step_time": 0.10658764839172363 + }, + { + "epoch": 3.298492431640625e-05, + "model_forward_time": 0.024851322174072266, + "step": 21617 + }, + { + "epoch": 3.298492431640625e-05, + "step": 21617, + "training_step_time": 0.10854411125183105 + }, + { + "epoch": 3.29864501953125e-05, + "model_forward_time": 0.02484440803527832, + "step": 21618 + }, + { + "epoch": 3.29864501953125e-05, + "step": 21618, + "training_step_time": 0.10720610618591309 + }, + { + "epoch": 3.298797607421875e-05, + "model_forward_time": 0.025374412536621094, + "step": 21619 + }, + { + "epoch": 3.298797607421875e-05, + "step": 21619, + "training_step_time": 0.10998272895812988 + }, + { + "epoch": 3.2989501953125e-05, + "grad_norm": 0.0906025692820549, + "learning_rate": 1.98579201954489e-05, + "loss": 0.005, + "step": 21620 + }, + { + "epoch": 3.2989501953125e-05, + "model_forward_time": 0.024613380432128906, + "step": 21620 + }, + { + "epoch": 3.2989501953125e-05, + "step": 21620, + "training_step_time": 0.10365676879882812 + }, + { + "epoch": 3.299102783203125e-05, + "model_forward_time": 0.02382826805114746, + "step": 21621 + }, + { + "epoch": 3.299102783203125e-05, + "step": 21621, + "training_step_time": 0.15266036987304688 + }, + { + "epoch": 3.29925537109375e-05, + "model_forward_time": 0.024474143981933594, + "step": 21622 + }, + { + "epoch": 3.29925537109375e-05, + "step": 21622, + "training_step_time": 0.1589653491973877 + }, + { + "epoch": 3.299407958984375e-05, + "model_forward_time": 0.0257568359375, + "step": 21623 + }, + { + "epoch": 3.299407958984375e-05, + "step": 21623, + "training_step_time": 0.10695505142211914 + }, + { + "epoch": 3.299560546875e-05, + "model_forward_time": 0.024897336959838867, + "step": 21624 + }, + { + "epoch": 3.299560546875e-05, + "step": 21624, + "training_step_time": 0.13048505783081055 + }, + { + "epoch": 3.299713134765625e-05, + "model_forward_time": 0.024859189987182617, + "step": 21625 + }, + { + "epoch": 3.299713134765625e-05, + "step": 21625, + "training_step_time": 0.1951286792755127 + }, + { + "epoch": 3.29986572265625e-05, + "model_forward_time": 0.024225473403930664, + "step": 21626 + }, + { + "epoch": 3.29986572265625e-05, + "step": 21626, + "training_step_time": 0.14817070960998535 + }, + { + "epoch": 3.300018310546875e-05, + "model_forward_time": 0.024331092834472656, + "step": 21627 + }, + { + "epoch": 3.300018310546875e-05, + "step": 21627, + "training_step_time": 0.10275697708129883 + }, + { + "epoch": 3.3001708984375e-05, + "model_forward_time": 0.024472713470458984, + "step": 21628 + }, + { + "epoch": 3.3001708984375e-05, + "step": 21628, + "training_step_time": 0.1320514678955078 + }, + { + "epoch": 3.300323486328125e-05, + "model_forward_time": 0.025026559829711914, + "step": 21629 + }, + { + "epoch": 3.300323486328125e-05, + "step": 21629, + "training_step_time": 0.20738911628723145 + }, + { + "epoch": 3.30047607421875e-05, + "grad_norm": 0.15329015254974365, + "learning_rate": 1.981396388672496e-05, + "loss": 0.0084, + "step": 21630 + }, + { + "epoch": 3.30047607421875e-05, + "model_forward_time": 0.023899316787719727, + "step": 21630 + }, + { + "epoch": 3.30047607421875e-05, + "step": 21630, + "training_step_time": 0.1344316005706787 + }, + { + "epoch": 3.300628662109375e-05, + "model_forward_time": 0.024190902709960938, + "step": 21631 + }, + { + "epoch": 3.300628662109375e-05, + "step": 21631, + "training_step_time": 0.12869572639465332 + }, + { + "epoch": 3.30078125e-05, + "model_forward_time": 0.0242612361907959, + "step": 21632 + }, + { + "epoch": 3.30078125e-05, + "step": 21632, + "training_step_time": 0.10609555244445801 + }, + { + "epoch": 3.300933837890625e-05, + "model_forward_time": 0.025055646896362305, + "step": 21633 + }, + { + "epoch": 3.300933837890625e-05, + "step": 21633, + "training_step_time": 0.1178736686706543 + }, + { + "epoch": 3.30108642578125e-05, + "model_forward_time": 0.024854660034179688, + "step": 21634 + }, + { + "epoch": 3.30108642578125e-05, + "step": 21634, + "training_step_time": 0.10793042182922363 + }, + { + "epoch": 3.301239013671875e-05, + "model_forward_time": 0.024514436721801758, + "step": 21635 + }, + { + "epoch": 3.301239013671875e-05, + "step": 21635, + "training_step_time": 0.1103982925415039 + }, + { + "epoch": 3.3013916015625e-05, + "model_forward_time": 0.02469801902770996, + "step": 21636 + }, + { + "epoch": 3.3013916015625e-05, + "step": 21636, + "training_step_time": 0.11001205444335938 + }, + { + "epoch": 3.301544189453125e-05, + "model_forward_time": 0.02520918846130371, + "step": 21637 + }, + { + "epoch": 3.301544189453125e-05, + "step": 21637, + "training_step_time": 0.10795927047729492 + }, + { + "epoch": 3.30169677734375e-05, + "model_forward_time": 0.025318145751953125, + "step": 21638 + }, + { + "epoch": 3.30169677734375e-05, + "step": 21638, + "training_step_time": 0.10701751708984375 + }, + { + "epoch": 3.301849365234375e-05, + "model_forward_time": 0.025209426879882812, + "step": 21639 + }, + { + "epoch": 3.301849365234375e-05, + "step": 21639, + "training_step_time": 0.10817170143127441 + }, + { + "epoch": 3.302001953125e-05, + "grad_norm": 0.2231944501399994, + "learning_rate": 1.977004425688126e-05, + "loss": 0.0056, + "step": 21640 + }, + { + "epoch": 3.302001953125e-05, + "model_forward_time": 0.024566173553466797, + "step": 21640 + }, + { + "epoch": 3.302001953125e-05, + "step": 21640, + "training_step_time": 0.10619688034057617 + }, + { + "epoch": 3.302154541015625e-05, + "model_forward_time": 0.0248868465423584, + "step": 21641 + }, + { + "epoch": 3.302154541015625e-05, + "step": 21641, + "training_step_time": 0.10500502586364746 + }, + { + "epoch": 3.30230712890625e-05, + "model_forward_time": 0.025059223175048828, + "step": 21642 + }, + { + "epoch": 3.30230712890625e-05, + "step": 21642, + "training_step_time": 0.10553121566772461 + }, + { + "epoch": 3.302459716796875e-05, + "model_forward_time": 0.02526235580444336, + "step": 21643 + }, + { + "epoch": 3.302459716796875e-05, + "step": 21643, + "training_step_time": 0.11204266548156738 + }, + { + "epoch": 3.3026123046875e-05, + "model_forward_time": 0.02559804916381836, + "step": 21644 + }, + { + "epoch": 3.3026123046875e-05, + "step": 21644, + "training_step_time": 0.1065378189086914 + }, + { + "epoch": 3.302764892578125e-05, + "model_forward_time": 0.02774357795715332, + "step": 21645 + }, + { + "epoch": 3.302764892578125e-05, + "step": 21645, + "training_step_time": 0.10740327835083008 + }, + { + "epoch": 3.30291748046875e-05, + "model_forward_time": 0.02543187141418457, + "step": 21646 + }, + { + "epoch": 3.30291748046875e-05, + "step": 21646, + "training_step_time": 0.10715484619140625 + }, + { + "epoch": 3.303070068359375e-05, + "model_forward_time": 0.025083065032958984, + "step": 21647 + }, + { + "epoch": 3.303070068359375e-05, + "step": 21647, + "training_step_time": 0.10512447357177734 + }, + { + "epoch": 3.30322265625e-05, + "model_forward_time": 0.025011539459228516, + "step": 21648 + }, + { + "epoch": 3.30322265625e-05, + "step": 21648, + "training_step_time": 0.10540628433227539 + }, + { + "epoch": 3.303375244140625e-05, + "model_forward_time": 0.028086423873901367, + "step": 21649 + }, + { + "epoch": 3.303375244140625e-05, + "step": 21649, + "training_step_time": 0.10953235626220703 + }, + { + "epoch": 3.30352783203125e-05, + "grad_norm": 0.0979032889008522, + "learning_rate": 1.9726161359284286e-05, + "loss": 0.0045, + "step": 21650 + }, + { + "epoch": 3.30352783203125e-05, + "model_forward_time": 0.02527332305908203, + "step": 21650 + }, + { + "epoch": 3.30352783203125e-05, + "step": 21650, + "training_step_time": 0.10857629776000977 + }, + { + "epoch": 3.303680419921875e-05, + "model_forward_time": 0.025359153747558594, + "step": 21651 + }, + { + "epoch": 3.303680419921875e-05, + "step": 21651, + "training_step_time": 0.20366668701171875 + }, + { + "epoch": 3.3038330078125e-05, + "model_forward_time": 0.024358510971069336, + "step": 21652 + }, + { + "epoch": 3.3038330078125e-05, + "step": 21652, + "training_step_time": 0.11240530014038086 + }, + { + "epoch": 3.303985595703125e-05, + "model_forward_time": 0.02442026138305664, + "step": 21653 + }, + { + "epoch": 3.303985595703125e-05, + "step": 21653, + "training_step_time": 0.1396350860595703 + }, + { + "epoch": 3.30413818359375e-05, + "model_forward_time": 0.025272607803344727, + "step": 21654 + }, + { + "epoch": 3.30413818359375e-05, + "step": 21654, + "training_step_time": 0.1537952423095703 + }, + { + "epoch": 3.304290771484375e-05, + "model_forward_time": 0.024760961532592773, + "step": 21655 + }, + { + "epoch": 3.304290771484375e-05, + "step": 21655, + "training_step_time": 0.17072486877441406 + }, + { + "epoch": 3.304443359375e-05, + "model_forward_time": 0.02434086799621582, + "step": 21656 + }, + { + "epoch": 3.304443359375e-05, + "step": 21656, + "training_step_time": 0.17336511611938477 + }, + { + "epoch": 3.304595947265625e-05, + "model_forward_time": 0.02432560920715332, + "step": 21657 + }, + { + "epoch": 3.304595947265625e-05, + "step": 21657, + "training_step_time": 0.1010122299194336 + }, + { + "epoch": 3.30474853515625e-05, + "model_forward_time": 0.024654150009155273, + "step": 21658 + }, + { + "epoch": 3.30474853515625e-05, + "step": 21658, + "training_step_time": 0.10524225234985352 + }, + { + "epoch": 3.304901123046875e-05, + "model_forward_time": 0.025241851806640625, + "step": 21659 + }, + { + "epoch": 3.304901123046875e-05, + "step": 21659, + "training_step_time": 0.10726785659790039 + }, + { + "epoch": 3.3050537109375e-05, + "grad_norm": 0.2468523234128952, + "learning_rate": 1.9682315247255894e-05, + "loss": 0.0074, + "step": 21660 + }, + { + "epoch": 3.3050537109375e-05, + "model_forward_time": 0.025350093841552734, + "step": 21660 + }, + { + "epoch": 3.3050537109375e-05, + "step": 21660, + "training_step_time": 0.10625147819519043 + }, + { + "epoch": 3.305206298828125e-05, + "model_forward_time": 0.0249478816986084, + "step": 21661 + }, + { + "epoch": 3.305206298828125e-05, + "step": 21661, + "training_step_time": 0.10512232780456543 + }, + { + "epoch": 3.30535888671875e-05, + "model_forward_time": 0.025229930877685547, + "step": 21662 + }, + { + "epoch": 3.30535888671875e-05, + "step": 21662, + "training_step_time": 0.1049351692199707 + }, + { + "epoch": 3.305511474609375e-05, + "model_forward_time": 0.02510356903076172, + "step": 21663 + }, + { + "epoch": 3.305511474609375e-05, + "step": 21663, + "training_step_time": 0.1073756217956543 + }, + { + "epoch": 3.3056640625e-05, + "model_forward_time": 0.025348186492919922, + "step": 21664 + }, + { + "epoch": 3.3056640625e-05, + "step": 21664, + "training_step_time": 0.10579681396484375 + }, + { + "epoch": 3.305816650390625e-05, + "model_forward_time": 0.024752378463745117, + "step": 21665 + }, + { + "epoch": 3.305816650390625e-05, + "step": 21665, + "training_step_time": 0.10666465759277344 + }, + { + "epoch": 3.30596923828125e-05, + "model_forward_time": 0.02475595474243164, + "step": 21666 + }, + { + "epoch": 3.30596923828125e-05, + "step": 21666, + "training_step_time": 0.1146688461303711 + }, + { + "epoch": 3.306121826171875e-05, + "model_forward_time": 0.02411794662475586, + "step": 21667 + }, + { + "epoch": 3.306121826171875e-05, + "step": 21667, + "training_step_time": 0.16798686981201172 + }, + { + "epoch": 3.3062744140625e-05, + "model_forward_time": 0.02433037757873535, + "step": 21668 + }, + { + "epoch": 3.3062744140625e-05, + "step": 21668, + "training_step_time": 0.1675407886505127 + }, + { + "epoch": 3.306427001953125e-05, + "model_forward_time": 0.024592876434326172, + "step": 21669 + }, + { + "epoch": 3.306427001953125e-05, + "step": 21669, + "training_step_time": 0.2050011157989502 + }, + { + "epoch": 3.30657958984375e-05, + "grad_norm": 0.33429640531539917, + "learning_rate": 1.9638505974073234e-05, + "loss": 0.0048, + "step": 21670 + }, + { + "epoch": 3.30657958984375e-05, + "model_forward_time": 0.02393960952758789, + "step": 21670 + }, + { + "epoch": 3.30657958984375e-05, + "step": 21670, + "training_step_time": 0.19972658157348633 + }, + { + "epoch": 3.306732177734375e-05, + "model_forward_time": 0.02443075180053711, + "step": 21671 + }, + { + "epoch": 3.306732177734375e-05, + "step": 21671, + "training_step_time": 0.19254136085510254 + }, + { + "epoch": 3.306884765625e-05, + "model_forward_time": 0.02624058723449707, + "step": 21672 + }, + { + "epoch": 3.306884765625e-05, + "step": 21672, + "training_step_time": 0.19963860511779785 + }, + { + "epoch": 3.307037353515625e-05, + "model_forward_time": 0.02439260482788086, + "step": 21673 + }, + { + "epoch": 3.307037353515625e-05, + "step": 21673, + "training_step_time": 0.13341093063354492 + }, + { + "epoch": 3.30718994140625e-05, + "model_forward_time": 0.024617910385131836, + "step": 21674 + }, + { + "epoch": 3.30718994140625e-05, + "step": 21674, + "training_step_time": 0.1929759979248047 + }, + { + "epoch": 3.307342529296875e-05, + "model_forward_time": 0.025384187698364258, + "step": 21675 + }, + { + "epoch": 3.307342529296875e-05, + "step": 21675, + "training_step_time": 0.1546790599822998 + }, + { + "epoch": 3.3074951171875e-05, + "model_forward_time": 0.024454355239868164, + "step": 21676 + }, + { + "epoch": 3.3074951171875e-05, + "step": 21676, + "training_step_time": 0.11436963081359863 + }, + { + "epoch": 3.307647705078125e-05, + "model_forward_time": 0.025300025939941406, + "step": 21677 + }, + { + "epoch": 3.307647705078125e-05, + "step": 21677, + "training_step_time": 0.11842870712280273 + }, + { + "epoch": 3.30780029296875e-05, + "model_forward_time": 0.025342464447021484, + "step": 21678 + }, + { + "epoch": 3.30780029296875e-05, + "step": 21678, + "training_step_time": 0.1099100112915039 + }, + { + "epoch": 3.307952880859375e-05, + "model_forward_time": 0.027800321578979492, + "step": 21679 + }, + { + "epoch": 3.307952880859375e-05, + "step": 21679, + "training_step_time": 0.1094508171081543 + }, + { + "epoch": 3.30810546875e-05, + "grad_norm": 0.09189315140247345, + "learning_rate": 1.9594733592968733e-05, + "loss": 0.0047, + "step": 21680 + }, + { + "epoch": 3.30810546875e-05, + "model_forward_time": 0.024841785430908203, + "step": 21680 + }, + { + "epoch": 3.30810546875e-05, + "step": 21680, + "training_step_time": 0.11111736297607422 + }, + { + "epoch": 3.308258056640625e-05, + "model_forward_time": 0.02526068687438965, + "step": 21681 + }, + { + "epoch": 3.308258056640625e-05, + "step": 21681, + "training_step_time": 0.10657525062561035 + }, + { + "epoch": 3.30841064453125e-05, + "model_forward_time": 0.024965286254882812, + "step": 21682 + }, + { + "epoch": 3.30841064453125e-05, + "step": 21682, + "training_step_time": 0.10718584060668945 + }, + { + "epoch": 3.308563232421875e-05, + "model_forward_time": 0.025114059448242188, + "step": 21683 + }, + { + "epoch": 3.308563232421875e-05, + "step": 21683, + "training_step_time": 0.10442137718200684 + }, + { + "epoch": 3.3087158203125e-05, + "model_forward_time": 0.025166034698486328, + "step": 21684 + }, + { + "epoch": 3.3087158203125e-05, + "step": 21684, + "training_step_time": 0.10658693313598633 + }, + { + "epoch": 3.308868408203125e-05, + "model_forward_time": 0.02519965171813965, + "step": 21685 + }, + { + "epoch": 3.308868408203125e-05, + "step": 21685, + "training_step_time": 0.10705184936523438 + }, + { + "epoch": 3.30902099609375e-05, + "model_forward_time": 0.025802135467529297, + "step": 21686 + }, + { + "epoch": 3.30902099609375e-05, + "step": 21686, + "training_step_time": 0.11495828628540039 + }, + { + "epoch": 3.309173583984375e-05, + "model_forward_time": 0.024657249450683594, + "step": 21687 + }, + { + "epoch": 3.309173583984375e-05, + "step": 21687, + "training_step_time": 0.1120753288269043 + }, + { + "epoch": 3.309326171875e-05, + "model_forward_time": 0.025519609451293945, + "step": 21688 + }, + { + "epoch": 3.309326171875e-05, + "step": 21688, + "training_step_time": 0.11072683334350586 + }, + { + "epoch": 3.309478759765625e-05, + "model_forward_time": 0.02558302879333496, + "step": 21689 + }, + { + "epoch": 3.309478759765625e-05, + "step": 21689, + "training_step_time": 0.10842299461364746 + }, + { + "epoch": 3.30963134765625e-05, + "grad_norm": 0.31479647755622864, + "learning_rate": 1.9550998157129946e-05, + "loss": 0.0064, + "step": 21690 + }, + { + "epoch": 3.30963134765625e-05, + "model_forward_time": 0.02520442008972168, + "step": 21690 + }, + { + "epoch": 3.30963134765625e-05, + "step": 21690, + "training_step_time": 0.10814619064331055 + }, + { + "epoch": 3.309783935546875e-05, + "model_forward_time": 0.02498030662536621, + "step": 21691 + }, + { + "epoch": 3.309783935546875e-05, + "step": 21691, + "training_step_time": 0.10692453384399414 + }, + { + "epoch": 3.3099365234375e-05, + "model_forward_time": 0.026551485061645508, + "step": 21692 + }, + { + "epoch": 3.3099365234375e-05, + "step": 21692, + "training_step_time": 0.10693645477294922 + }, + { + "epoch": 3.310089111328125e-05, + "model_forward_time": 0.024756669998168945, + "step": 21693 + }, + { + "epoch": 3.310089111328125e-05, + "step": 21693, + "training_step_time": 0.1101675033569336 + }, + { + "epoch": 3.31024169921875e-05, + "model_forward_time": 0.02512979507446289, + "step": 21694 + }, + { + "epoch": 3.31024169921875e-05, + "step": 21694, + "training_step_time": 0.17913532257080078 + }, + { + "epoch": 3.310394287109375e-05, + "model_forward_time": 0.024075984954833984, + "step": 21695 + }, + { + "epoch": 3.310394287109375e-05, + "step": 21695, + "training_step_time": 0.11275863647460938 + }, + { + "epoch": 3.310546875e-05, + "model_forward_time": 0.024477243423461914, + "step": 21696 + }, + { + "epoch": 3.310546875e-05, + "step": 21696, + "training_step_time": 0.12874579429626465 + }, + { + "epoch": 3.310699462890625e-05, + "model_forward_time": 0.0252840518951416, + "step": 21697 + }, + { + "epoch": 3.310699462890625e-05, + "step": 21697, + "training_step_time": 0.15785908699035645 + }, + { + "epoch": 3.31085205078125e-05, + "model_forward_time": 0.024330854415893555, + "step": 21698 + }, + { + "epoch": 3.31085205078125e-05, + "step": 21698, + "training_step_time": 0.17954611778259277 + }, + { + "epoch": 3.311004638671875e-05, + "model_forward_time": 0.0243375301361084, + "step": 21699 + }, + { + "epoch": 3.311004638671875e-05, + "step": 21699, + "training_step_time": 0.17148637771606445 + }, + { + "epoch": 3.3111572265625e-05, + "grad_norm": 0.2427108734846115, + "learning_rate": 1.950729971969955e-05, + "loss": 0.008, + "step": 21700 + }, + { + "epoch": 3.3111572265625e-05, + "model_forward_time": 0.024054765701293945, + "step": 21700 + }, + { + "epoch": 3.3111572265625e-05, + "step": 21700, + "training_step_time": 0.11722111701965332 + }, + { + "epoch": 3.311309814453125e-05, + "model_forward_time": 0.024266958236694336, + "step": 21701 + }, + { + "epoch": 3.311309814453125e-05, + "step": 21701, + "training_step_time": 0.11567187309265137 + }, + { + "epoch": 3.31146240234375e-05, + "model_forward_time": 0.027753591537475586, + "step": 21702 + }, + { + "epoch": 3.31146240234375e-05, + "step": 21702, + "training_step_time": 0.11170411109924316 + }, + { + "epoch": 3.311614990234375e-05, + "model_forward_time": 0.025176048278808594, + "step": 21703 + }, + { + "epoch": 3.311614990234375e-05, + "step": 21703, + "training_step_time": 0.11543750762939453 + }, + { + "epoch": 3.311767578125e-05, + "model_forward_time": 0.02476215362548828, + "step": 21704 + }, + { + "epoch": 3.311767578125e-05, + "step": 21704, + "training_step_time": 0.11217379570007324 + }, + { + "epoch": 3.311920166015625e-05, + "model_forward_time": 0.02484726905822754, + "step": 21705 + }, + { + "epoch": 3.311920166015625e-05, + "step": 21705, + "training_step_time": 0.11117172241210938 + }, + { + "epoch": 3.31207275390625e-05, + "model_forward_time": 0.02486395835876465, + "step": 21706 + }, + { + "epoch": 3.31207275390625e-05, + "step": 21706, + "training_step_time": 0.11106657981872559 + }, + { + "epoch": 3.312225341796875e-05, + "model_forward_time": 0.025017499923706055, + "step": 21707 + }, + { + "epoch": 3.312225341796875e-05, + "step": 21707, + "training_step_time": 0.1107938289642334 + }, + { + "epoch": 3.3123779296875e-05, + "model_forward_time": 0.025049924850463867, + "step": 21708 + }, + { + "epoch": 3.3123779296875e-05, + "step": 21708, + "training_step_time": 0.10879778861999512 + }, + { + "epoch": 3.312530517578125e-05, + "model_forward_time": 0.024872779846191406, + "step": 21709 + }, + { + "epoch": 3.312530517578125e-05, + "step": 21709, + "training_step_time": 0.10855412483215332 + }, + { + "epoch": 3.31268310546875e-05, + "grad_norm": 0.1840215027332306, + "learning_rate": 1.9463638333775276e-05, + "loss": 0.0047, + "step": 21710 + }, + { + "epoch": 3.31268310546875e-05, + "model_forward_time": 0.024815082550048828, + "step": 21710 + }, + { + "epoch": 3.31268310546875e-05, + "step": 21710, + "training_step_time": 0.14074158668518066 + }, + { + "epoch": 3.312835693359375e-05, + "model_forward_time": 0.02416253089904785, + "step": 21711 + }, + { + "epoch": 3.312835693359375e-05, + "step": 21711, + "training_step_time": 0.15719342231750488 + }, + { + "epoch": 3.31298828125e-05, + "model_forward_time": 0.02443075180053711, + "step": 21712 + }, + { + "epoch": 3.31298828125e-05, + "step": 21712, + "training_step_time": 0.10923576354980469 + }, + { + "epoch": 3.313140869140625e-05, + "model_forward_time": 0.024692773818969727, + "step": 21713 + }, + { + "epoch": 3.313140869140625e-05, + "step": 21713, + "training_step_time": 0.13438987731933594 + }, + { + "epoch": 3.31329345703125e-05, + "model_forward_time": 0.02522134780883789, + "step": 21714 + }, + { + "epoch": 3.31329345703125e-05, + "step": 21714, + "training_step_time": 0.2067551612854004 + }, + { + "epoch": 3.313446044921875e-05, + "model_forward_time": 0.024754047393798828, + "step": 21715 + }, + { + "epoch": 3.313446044921875e-05, + "step": 21715, + "training_step_time": 0.16179752349853516 + }, + { + "epoch": 3.3135986328125e-05, + "model_forward_time": 0.023836374282836914, + "step": 21716 + }, + { + "epoch": 3.3135986328125e-05, + "step": 21716, + "training_step_time": 0.12055301666259766 + }, + { + "epoch": 3.313751220703125e-05, + "model_forward_time": 0.024665117263793945, + "step": 21717 + }, + { + "epoch": 3.313751220703125e-05, + "step": 21717, + "training_step_time": 0.12958717346191406 + }, + { + "epoch": 3.31390380859375e-05, + "model_forward_time": 0.025177717208862305, + "step": 21718 + }, + { + "epoch": 3.31390380859375e-05, + "step": 21718, + "training_step_time": 0.17693328857421875 + }, + { + "epoch": 3.314056396484375e-05, + "model_forward_time": 0.0240328311920166, + "step": 21719 + }, + { + "epoch": 3.314056396484375e-05, + "step": 21719, + "training_step_time": 0.16917681694030762 + }, + { + "epoch": 3.314208984375e-05, + "grad_norm": 0.3955047130584717, + "learning_rate": 1.942001405240979e-05, + "loss": 0.0116, + "step": 21720 + }, + { + "epoch": 3.314208984375e-05, + "model_forward_time": 0.0245208740234375, + "step": 21720 + }, + { + "epoch": 3.314208984375e-05, + "step": 21720, + "training_step_time": 0.12347674369812012 + }, + { + "epoch": 3.314361572265625e-05, + "model_forward_time": 0.024047136306762695, + "step": 21721 + }, + { + "epoch": 3.314361572265625e-05, + "step": 21721, + "training_step_time": 0.11812472343444824 + }, + { + "epoch": 3.31451416015625e-05, + "model_forward_time": 0.026342153549194336, + "step": 21722 + }, + { + "epoch": 3.31451416015625e-05, + "step": 21722, + "training_step_time": 0.11606740951538086 + }, + { + "epoch": 3.314666748046875e-05, + "model_forward_time": 0.02803349494934082, + "step": 21723 + }, + { + "epoch": 3.314666748046875e-05, + "step": 21723, + "training_step_time": 0.10847878456115723 + }, + { + "epoch": 3.3148193359375e-05, + "model_forward_time": 0.02505350112915039, + "step": 21724 + }, + { + "epoch": 3.3148193359375e-05, + "step": 21724, + "training_step_time": 0.10520052909851074 + }, + { + "epoch": 3.314971923828125e-05, + "model_forward_time": 0.02536153793334961, + "step": 21725 + }, + { + "epoch": 3.314971923828125e-05, + "step": 21725, + "training_step_time": 0.10768342018127441 + }, + { + "epoch": 3.31512451171875e-05, + "model_forward_time": 0.024854183197021484, + "step": 21726 + }, + { + "epoch": 3.31512451171875e-05, + "step": 21726, + "training_step_time": 0.11243939399719238 + }, + { + "epoch": 3.315277099609375e-05, + "model_forward_time": 0.025372743606567383, + "step": 21727 + }, + { + "epoch": 3.315277099609375e-05, + "step": 21727, + "training_step_time": 0.11742782592773438 + }, + { + "epoch": 3.3154296875e-05, + "model_forward_time": 0.025104999542236328, + "step": 21728 + }, + { + "epoch": 3.3154296875e-05, + "step": 21728, + "training_step_time": 0.11244535446166992 + }, + { + "epoch": 3.315582275390625e-05, + "model_forward_time": 0.024924516677856445, + "step": 21729 + }, + { + "epoch": 3.315582275390625e-05, + "step": 21729, + "training_step_time": 0.10927176475524902 + }, + { + "epoch": 3.31573486328125e-05, + "grad_norm": 0.18369890749454498, + "learning_rate": 1.937642692861076e-05, + "loss": 0.0046, + "step": 21730 + }, + { + "epoch": 3.31573486328125e-05, + "model_forward_time": 0.02471637725830078, + "step": 21730 + }, + { + "epoch": 3.31573486328125e-05, + "step": 21730, + "training_step_time": 0.10932779312133789 + }, + { + "epoch": 3.315887451171875e-05, + "model_forward_time": 0.02537226676940918, + "step": 21731 + }, + { + "epoch": 3.315887451171875e-05, + "step": 21731, + "training_step_time": 0.11021137237548828 + }, + { + "epoch": 3.3160400390625e-05, + "model_forward_time": 0.025327444076538086, + "step": 21732 + }, + { + "epoch": 3.3160400390625e-05, + "step": 21732, + "training_step_time": 0.11326432228088379 + }, + { + "epoch": 3.316192626953125e-05, + "model_forward_time": 0.0250551700592041, + "step": 21733 + }, + { + "epoch": 3.316192626953125e-05, + "step": 21733, + "training_step_time": 0.10610842704772949 + }, + { + "epoch": 3.31634521484375e-05, + "model_forward_time": 0.024434566497802734, + "step": 21734 + }, + { + "epoch": 3.31634521484375e-05, + "step": 21734, + "training_step_time": 0.10514402389526367 + }, + { + "epoch": 3.316497802734375e-05, + "model_forward_time": 0.025000810623168945, + "step": 21735 + }, + { + "epoch": 3.316497802734375e-05, + "step": 21735, + "training_step_time": 0.1047670841217041 + }, + { + "epoch": 3.316650390625e-05, + "model_forward_time": 0.02536153793334961, + "step": 21736 + }, + { + "epoch": 3.316650390625e-05, + "step": 21736, + "training_step_time": 0.10601592063903809 + }, + { + "epoch": 3.316802978515625e-05, + "model_forward_time": 0.02505350112915039, + "step": 21737 + }, + { + "epoch": 3.316802978515625e-05, + "step": 21737, + "training_step_time": 0.10398983955383301 + }, + { + "epoch": 3.31695556640625e-05, + "model_forward_time": 0.025296926498413086, + "step": 21738 + }, + { + "epoch": 3.31695556640625e-05, + "step": 21738, + "training_step_time": 0.10823702812194824 + }, + { + "epoch": 3.317108154296875e-05, + "model_forward_time": 0.02506732940673828, + "step": 21739 + }, + { + "epoch": 3.317108154296875e-05, + "step": 21739, + "training_step_time": 0.1528947353363037 + }, + { + "epoch": 3.3172607421875e-05, + "grad_norm": 0.16225546598434448, + "learning_rate": 1.93328770153406e-05, + "loss": 0.0068, + "step": 21740 + }, + { + "epoch": 3.3172607421875e-05, + "model_forward_time": 0.02542734146118164, + "step": 21740 + }, + { + "epoch": 3.3172607421875e-05, + "step": 21740, + "training_step_time": 0.11113405227661133 + }, + { + "epoch": 3.317413330078125e-05, + "model_forward_time": 0.0246124267578125, + "step": 21741 + }, + { + "epoch": 3.317413330078125e-05, + "step": 21741, + "training_step_time": 0.1242818832397461 + }, + { + "epoch": 3.31756591796875e-05, + "model_forward_time": 0.025226354598999023, + "step": 21742 + }, + { + "epoch": 3.31756591796875e-05, + "step": 21742, + "training_step_time": 0.13660049438476562 + }, + { + "epoch": 3.317718505859375e-05, + "model_forward_time": 0.02536463737487793, + "step": 21743 + }, + { + "epoch": 3.317718505859375e-05, + "step": 21743, + "training_step_time": 0.11639690399169922 + }, + { + "epoch": 3.31787109375e-05, + "model_forward_time": 0.024898052215576172, + "step": 21744 + }, + { + "epoch": 3.31787109375e-05, + "step": 21744, + "training_step_time": 0.12708806991577148 + }, + { + "epoch": 3.318023681640625e-05, + "model_forward_time": 0.0249330997467041, + "step": 21745 + }, + { + "epoch": 3.318023681640625e-05, + "step": 21745, + "training_step_time": 0.12420868873596191 + }, + { + "epoch": 3.31817626953125e-05, + "model_forward_time": 0.02484726905822754, + "step": 21746 + }, + { + "epoch": 3.31817626953125e-05, + "step": 21746, + "training_step_time": 0.10887384414672852 + }, + { + "epoch": 3.318328857421875e-05, + "model_forward_time": 0.025100231170654297, + "step": 21747 + }, + { + "epoch": 3.318328857421875e-05, + "step": 21747, + "training_step_time": 0.10782909393310547 + }, + { + "epoch": 3.3184814453125e-05, + "model_forward_time": 0.025473594665527344, + "step": 21748 + }, + { + "epoch": 3.3184814453125e-05, + "step": 21748, + "training_step_time": 0.10842776298522949 + }, + { + "epoch": 3.318634033203125e-05, + "model_forward_time": 0.025015592575073242, + "step": 21749 + }, + { + "epoch": 3.318634033203125e-05, + "step": 21749, + "training_step_time": 0.10453271865844727 + }, + { + "epoch": 3.31878662109375e-05, + "grad_norm": 0.14747479557991028, + "learning_rate": 1.928936436551661e-05, + "loss": 0.0042, + "step": 21750 + }, + { + "epoch": 3.31878662109375e-05, + "model_forward_time": 0.025118350982666016, + "step": 21750 + }, + { + "epoch": 3.31878662109375e-05, + "step": 21750, + "training_step_time": 0.1117696762084961 + }, + { + "epoch": 3.318939208984375e-05, + "model_forward_time": 0.02527308464050293, + "step": 21751 + }, + { + "epoch": 3.318939208984375e-05, + "step": 21751, + "training_step_time": 0.11162710189819336 + }, + { + "epoch": 3.319091796875e-05, + "model_forward_time": 0.025176048278808594, + "step": 21752 + }, + { + "epoch": 3.319091796875e-05, + "step": 21752, + "training_step_time": 0.10956978797912598 + }, + { + "epoch": 3.319244384765625e-05, + "model_forward_time": 0.02522587776184082, + "step": 21753 + }, + { + "epoch": 3.319244384765625e-05, + "step": 21753, + "training_step_time": 0.10582637786865234 + }, + { + "epoch": 3.31939697265625e-05, + "model_forward_time": 0.02535271644592285, + "step": 21754 + }, + { + "epoch": 3.31939697265625e-05, + "step": 21754, + "training_step_time": 0.11062860488891602 + }, + { + "epoch": 3.319549560546875e-05, + "model_forward_time": 0.025850772857666016, + "step": 21755 + }, + { + "epoch": 3.319549560546875e-05, + "step": 21755, + "training_step_time": 0.10478568077087402 + }, + { + "epoch": 3.3197021484375e-05, + "model_forward_time": 0.02476811408996582, + "step": 21756 + }, + { + "epoch": 3.3197021484375e-05, + "step": 21756, + "training_step_time": 0.14247465133666992 + }, + { + "epoch": 3.319854736328125e-05, + "model_forward_time": 0.02482128143310547, + "step": 21757 + }, + { + "epoch": 3.319854736328125e-05, + "step": 21757, + "training_step_time": 0.16814899444580078 + }, + { + "epoch": 3.32000732421875e-05, + "model_forward_time": 0.024598121643066406, + "step": 21758 + }, + { + "epoch": 3.32000732421875e-05, + "step": 21758, + "training_step_time": 0.1084432601928711 + }, + { + "epoch": 3.320159912109375e-05, + "model_forward_time": 0.0245974063873291, + "step": 21759 + }, + { + "epoch": 3.320159912109375e-05, + "step": 21759, + "training_step_time": 0.13166546821594238 + }, + { + "epoch": 3.3203125e-05, + "grad_norm": 0.3442796468734741, + "learning_rate": 1.924588903201074e-05, + "loss": 0.0155, + "step": 21760 + }, + { + "epoch": 3.3203125e-05, + "model_forward_time": 0.02515268325805664, + "step": 21760 + }, + { + "epoch": 3.3203125e-05, + "step": 21760, + "training_step_time": 0.2077195644378662 + }, + { + "epoch": 3.320465087890625e-05, + "model_forward_time": 0.024456024169921875, + "step": 21761 + }, + { + "epoch": 3.320465087890625e-05, + "step": 21761, + "training_step_time": 0.12128114700317383 + }, + { + "epoch": 3.32061767578125e-05, + "model_forward_time": 0.0248410701751709, + "step": 21762 + }, + { + "epoch": 3.32061767578125e-05, + "step": 21762, + "training_step_time": 0.11387801170349121 + }, + { + "epoch": 3.320770263671875e-05, + "model_forward_time": 0.02542257308959961, + "step": 21763 + }, + { + "epoch": 3.320770263671875e-05, + "step": 21763, + "training_step_time": 0.206573486328125 + }, + { + "epoch": 3.3209228515625e-05, + "model_forward_time": 0.023366451263427734, + "step": 21764 + }, + { + "epoch": 3.3209228515625e-05, + "step": 21764, + "training_step_time": 0.20067834854125977 + }, + { + "epoch": 3.321075439453125e-05, + "model_forward_time": 0.024097204208374023, + "step": 21765 + }, + { + "epoch": 3.321075439453125e-05, + "step": 21765, + "training_step_time": 0.14866328239440918 + }, + { + "epoch": 3.32122802734375e-05, + "model_forward_time": 0.024332046508789062, + "step": 21766 + }, + { + "epoch": 3.32122802734375e-05, + "step": 21766, + "training_step_time": 0.13687849044799805 + }, + { + "epoch": 3.321380615234375e-05, + "model_forward_time": 0.023769855499267578, + "step": 21767 + }, + { + "epoch": 3.321380615234375e-05, + "step": 21767, + "training_step_time": 0.11251091957092285 + }, + { + "epoch": 3.321533203125e-05, + "model_forward_time": 0.025343656539916992, + "step": 21768 + }, + { + "epoch": 3.321533203125e-05, + "step": 21768, + "training_step_time": 0.10748910903930664 + }, + { + "epoch": 3.321685791015625e-05, + "model_forward_time": 0.025216341018676758, + "step": 21769 + }, + { + "epoch": 3.321685791015625e-05, + "step": 21769, + "training_step_time": 0.10693764686584473 + }, + { + "epoch": 3.32183837890625e-05, + "grad_norm": 0.30669525265693665, + "learning_rate": 1.920245106764962e-05, + "loss": 0.0092, + "step": 21770 + }, + { + "epoch": 3.32183837890625e-05, + "model_forward_time": 0.02530050277709961, + "step": 21770 + }, + { + "epoch": 3.32183837890625e-05, + "step": 21770, + "training_step_time": 0.10506391525268555 + }, + { + "epoch": 3.321990966796875e-05, + "model_forward_time": 0.024978160858154297, + "step": 21771 + }, + { + "epoch": 3.321990966796875e-05, + "step": 21771, + "training_step_time": 0.10431337356567383 + }, + { + "epoch": 3.3221435546875e-05, + "model_forward_time": 0.02493429183959961, + "step": 21772 + }, + { + "epoch": 3.3221435546875e-05, + "step": 21772, + "training_step_time": 0.1049191951751709 + }, + { + "epoch": 3.322296142578125e-05, + "model_forward_time": 0.02518296241760254, + "step": 21773 + }, + { + "epoch": 3.322296142578125e-05, + "step": 21773, + "training_step_time": 0.10576462745666504 + }, + { + "epoch": 3.32244873046875e-05, + "model_forward_time": 0.025162935256958008, + "step": 21774 + }, + { + "epoch": 3.32244873046875e-05, + "step": 21774, + "training_step_time": 0.10674095153808594 + }, + { + "epoch": 3.322601318359375e-05, + "model_forward_time": 0.025419235229492188, + "step": 21775 + }, + { + "epoch": 3.322601318359375e-05, + "step": 21775, + "training_step_time": 0.10509443283081055 + }, + { + "epoch": 3.32275390625e-05, + "model_forward_time": 0.02595686912536621, + "step": 21776 + }, + { + "epoch": 3.32275390625e-05, + "step": 21776, + "training_step_time": 0.10840940475463867 + }, + { + "epoch": 3.322906494140625e-05, + "model_forward_time": 0.02485179901123047, + "step": 21777 + }, + { + "epoch": 3.322906494140625e-05, + "step": 21777, + "training_step_time": 0.1060950756072998 + }, + { + "epoch": 3.32305908203125e-05, + "model_forward_time": 0.02488851547241211, + "step": 21778 + }, + { + "epoch": 3.32305908203125e-05, + "step": 21778, + "training_step_time": 0.10925698280334473 + }, + { + "epoch": 3.323211669921875e-05, + "model_forward_time": 0.02502894401550293, + "step": 21779 + }, + { + "epoch": 3.323211669921875e-05, + "step": 21779, + "training_step_time": 0.10417723655700684 + }, + { + "epoch": 3.3233642578125e-05, + "grad_norm": 0.08915925770998001, + "learning_rate": 1.9159050525214452e-05, + "loss": 0.0188, + "step": 21780 + }, + { + "epoch": 3.3233642578125e-05, + "model_forward_time": 0.024925947189331055, + "step": 21780 + }, + { + "epoch": 3.3233642578125e-05, + "step": 21780, + "training_step_time": 0.10466504096984863 + }, + { + "epoch": 3.323516845703125e-05, + "model_forward_time": 0.024986982345581055, + "step": 21781 + }, + { + "epoch": 3.323516845703125e-05, + "step": 21781, + "training_step_time": 0.10445022583007812 + }, + { + "epoch": 3.32366943359375e-05, + "model_forward_time": 0.024063825607299805, + "step": 21782 + }, + { + "epoch": 3.32366943359375e-05, + "step": 21782, + "training_step_time": 0.10645222663879395 + }, + { + "epoch": 3.323822021484375e-05, + "model_forward_time": 0.024182558059692383, + "step": 21783 + }, + { + "epoch": 3.323822021484375e-05, + "step": 21783, + "training_step_time": 0.11365580558776855 + }, + { + "epoch": 3.323974609375e-05, + "model_forward_time": 0.025199413299560547, + "step": 21784 + }, + { + "epoch": 3.323974609375e-05, + "step": 21784, + "training_step_time": 0.10591363906860352 + }, + { + "epoch": 3.324127197265625e-05, + "model_forward_time": 0.025432825088500977, + "step": 21785 + }, + { + "epoch": 3.324127197265625e-05, + "step": 21785, + "training_step_time": 0.14859294891357422 + }, + { + "epoch": 3.32427978515625e-05, + "model_forward_time": 0.024664640426635742, + "step": 21786 + }, + { + "epoch": 3.32427978515625e-05, + "step": 21786, + "training_step_time": 0.11029982566833496 + }, + { + "epoch": 3.324432373046875e-05, + "model_forward_time": 0.024808406829833984, + "step": 21787 + }, + { + "epoch": 3.324432373046875e-05, + "step": 21787, + "training_step_time": 0.12851190567016602 + }, + { + "epoch": 3.3245849609375e-05, + "model_forward_time": 0.02513599395751953, + "step": 21788 + }, + { + "epoch": 3.3245849609375e-05, + "step": 21788, + "training_step_time": 0.1403498649597168 + }, + { + "epoch": 3.324737548828125e-05, + "model_forward_time": 0.024633169174194336, + "step": 21789 + }, + { + "epoch": 3.324737548828125e-05, + "step": 21789, + "training_step_time": 0.11573958396911621 + }, + { + "epoch": 3.32489013671875e-05, + "grad_norm": 0.1732388287782669, + "learning_rate": 1.9115687457441022e-05, + "loss": 0.0065, + "step": 21790 + }, + { + "epoch": 3.32489013671875e-05, + "model_forward_time": 0.025089502334594727, + "step": 21790 + }, + { + "epoch": 3.32489013671875e-05, + "step": 21790, + "training_step_time": 0.12963008880615234 + }, + { + "epoch": 3.325042724609375e-05, + "model_forward_time": 0.025164365768432617, + "step": 21791 + }, + { + "epoch": 3.325042724609375e-05, + "step": 21791, + "training_step_time": 0.12912774085998535 + }, + { + "epoch": 3.3251953125e-05, + "model_forward_time": 0.0277101993560791, + "step": 21792 + }, + { + "epoch": 3.3251953125e-05, + "step": 21792, + "training_step_time": 0.1105806827545166 + }, + { + "epoch": 3.325347900390625e-05, + "model_forward_time": 0.02494025230407715, + "step": 21793 + }, + { + "epoch": 3.325347900390625e-05, + "step": 21793, + "training_step_time": 0.11237120628356934 + }, + { + "epoch": 3.32550048828125e-05, + "model_forward_time": 0.024797439575195312, + "step": 21794 + }, + { + "epoch": 3.32550048828125e-05, + "step": 21794, + "training_step_time": 0.11377096176147461 + }, + { + "epoch": 3.325653076171875e-05, + "model_forward_time": 0.024846553802490234, + "step": 21795 + }, + { + "epoch": 3.325653076171875e-05, + "step": 21795, + "training_step_time": 0.11927103996276855 + }, + { + "epoch": 3.3258056640625e-05, + "model_forward_time": 0.02591538429260254, + "step": 21796 + }, + { + "epoch": 3.3258056640625e-05, + "step": 21796, + "training_step_time": 0.10949826240539551 + }, + { + "epoch": 3.325958251953125e-05, + "model_forward_time": 0.025827407836914062, + "step": 21797 + }, + { + "epoch": 3.325958251953125e-05, + "step": 21797, + "training_step_time": 0.10903382301330566 + }, + { + "epoch": 3.32611083984375e-05, + "model_forward_time": 0.025090694427490234, + "step": 21798 + }, + { + "epoch": 3.32611083984375e-05, + "step": 21798, + "training_step_time": 0.10832500457763672 + }, + { + "epoch": 3.326263427734375e-05, + "model_forward_time": 0.025237083435058594, + "step": 21799 + }, + { + "epoch": 3.326263427734375e-05, + "step": 21799, + "training_step_time": 0.1168668270111084 + }, + { + "epoch": 3.326416015625e-05, + "grad_norm": 0.4488953649997711, + "learning_rate": 1.9072361917019536e-05, + "loss": 0.0121, + "step": 21800 + }, + { + "epoch": 3.326416015625e-05, + "model_forward_time": 0.025271177291870117, + "step": 21800 + }, + { + "epoch": 3.326416015625e-05, + "step": 21800, + "training_step_time": 0.11556291580200195 + }, + { + "epoch": 3.326568603515625e-05, + "model_forward_time": 0.02516007423400879, + "step": 21801 + }, + { + "epoch": 3.326568603515625e-05, + "step": 21801, + "training_step_time": 0.10564208030700684 + }, + { + "epoch": 3.32672119140625e-05, + "model_forward_time": 0.02442002296447754, + "step": 21802 + }, + { + "epoch": 3.32672119140625e-05, + "step": 21802, + "training_step_time": 0.15406036376953125 + }, + { + "epoch": 3.326873779296875e-05, + "model_forward_time": 0.025583744049072266, + "step": 21803 + }, + { + "epoch": 3.326873779296875e-05, + "step": 21803, + "training_step_time": 0.15313005447387695 + }, + { + "epoch": 3.3270263671875e-05, + "model_forward_time": 0.024120807647705078, + "step": 21804 + }, + { + "epoch": 3.3270263671875e-05, + "step": 21804, + "training_step_time": 0.11084198951721191 + }, + { + "epoch": 3.327178955078125e-05, + "model_forward_time": 0.024527549743652344, + "step": 21805 + }, + { + "epoch": 3.327178955078125e-05, + "step": 21805, + "training_step_time": 0.18859076499938965 + }, + { + "epoch": 3.32733154296875e-05, + "model_forward_time": 0.024230241775512695, + "step": 21806 + }, + { + "epoch": 3.32733154296875e-05, + "step": 21806, + "training_step_time": 0.14934015274047852 + }, + { + "epoch": 3.327484130859375e-05, + "model_forward_time": 0.024615764617919922, + "step": 21807 + }, + { + "epoch": 3.327484130859375e-05, + "step": 21807, + "training_step_time": 0.199371337890625 + }, + { + "epoch": 3.32763671875e-05, + "model_forward_time": 0.023859262466430664, + "step": 21808 + }, + { + "epoch": 3.32763671875e-05, + "step": 21808, + "training_step_time": 0.1276547908782959 + }, + { + "epoch": 3.327789306640625e-05, + "model_forward_time": 0.023954153060913086, + "step": 21809 + }, + { + "epoch": 3.327789306640625e-05, + "step": 21809, + "training_step_time": 0.15076637268066406 + }, + { + "epoch": 3.32794189453125e-05, + "grad_norm": 0.09804453700780869, + "learning_rate": 1.9029073956594606e-05, + "loss": 0.0041, + "step": 21810 + }, + { + "epoch": 3.32794189453125e-05, + "model_forward_time": 0.024268388748168945, + "step": 21810 + }, + { + "epoch": 3.32794189453125e-05, + "step": 21810, + "training_step_time": 0.14701604843139648 + }, + { + "epoch": 3.328094482421875e-05, + "model_forward_time": 0.02439093589782715, + "step": 21811 + }, + { + "epoch": 3.328094482421875e-05, + "step": 21811, + "training_step_time": 0.20720624923706055 + }, + { + "epoch": 3.3282470703125e-05, + "model_forward_time": 0.024606704711914062, + "step": 21812 + }, + { + "epoch": 3.3282470703125e-05, + "step": 21812, + "training_step_time": 0.12167191505432129 + }, + { + "epoch": 3.328399658203125e-05, + "model_forward_time": 0.024305105209350586, + "step": 21813 + }, + { + "epoch": 3.328399658203125e-05, + "step": 21813, + "training_step_time": 0.11679887771606445 + }, + { + "epoch": 3.32855224609375e-05, + "model_forward_time": 0.025182247161865234, + "step": 21814 + }, + { + "epoch": 3.32855224609375e-05, + "step": 21814, + "training_step_time": 0.11378645896911621 + }, + { + "epoch": 3.328704833984375e-05, + "model_forward_time": 0.025525331497192383, + "step": 21815 + }, + { + "epoch": 3.328704833984375e-05, + "step": 21815, + "training_step_time": 0.10745811462402344 + }, + { + "epoch": 3.328857421875e-05, + "model_forward_time": 0.024863243103027344, + "step": 21816 + }, + { + "epoch": 3.328857421875e-05, + "step": 21816, + "training_step_time": 0.10639476776123047 + }, + { + "epoch": 3.329010009765625e-05, + "model_forward_time": 0.024791717529296875, + "step": 21817 + }, + { + "epoch": 3.329010009765625e-05, + "step": 21817, + "training_step_time": 0.1152498722076416 + }, + { + "epoch": 3.32916259765625e-05, + "model_forward_time": 0.024941205978393555, + "step": 21818 + }, + { + "epoch": 3.32916259765625e-05, + "step": 21818, + "training_step_time": 0.10890746116638184 + }, + { + "epoch": 3.329315185546875e-05, + "model_forward_time": 0.02552032470703125, + "step": 21819 + }, + { + "epoch": 3.329315185546875e-05, + "step": 21819, + "training_step_time": 0.11397576332092285 + }, + { + "epoch": 3.3294677734375e-05, + "grad_norm": 0.17043310403823853, + "learning_rate": 1.8985823628765188e-05, + "loss": 0.005, + "step": 21820 + }, + { + "epoch": 3.3294677734375e-05, + "model_forward_time": 0.02506732940673828, + "step": 21820 + }, + { + "epoch": 3.3294677734375e-05, + "step": 21820, + "training_step_time": 0.10653829574584961 + }, + { + "epoch": 3.329620361328125e-05, + "model_forward_time": 0.025251150131225586, + "step": 21821 + }, + { + "epoch": 3.329620361328125e-05, + "step": 21821, + "training_step_time": 0.10847806930541992 + }, + { + "epoch": 3.32977294921875e-05, + "model_forward_time": 0.02530074119567871, + "step": 21822 + }, + { + "epoch": 3.32977294921875e-05, + "step": 21822, + "training_step_time": 0.1052849292755127 + }, + { + "epoch": 3.329925537109375e-05, + "model_forward_time": 0.025892019271850586, + "step": 21823 + }, + { + "epoch": 3.329925537109375e-05, + "step": 21823, + "training_step_time": 0.10567688941955566 + }, + { + "epoch": 3.330078125e-05, + "model_forward_time": 0.02531886100769043, + "step": 21824 + }, + { + "epoch": 3.330078125e-05, + "step": 21824, + "training_step_time": 0.10826921463012695 + }, + { + "epoch": 3.330230712890625e-05, + "model_forward_time": 0.02545905113220215, + "step": 21825 + }, + { + "epoch": 3.330230712890625e-05, + "step": 21825, + "training_step_time": 0.10414624214172363 + }, + { + "epoch": 3.33038330078125e-05, + "model_forward_time": 0.024924278259277344, + "step": 21826 + }, + { + "epoch": 3.33038330078125e-05, + "step": 21826, + "training_step_time": 0.10510563850402832 + }, + { + "epoch": 3.330535888671875e-05, + "model_forward_time": 0.024805784225463867, + "step": 21827 + }, + { + "epoch": 3.330535888671875e-05, + "step": 21827, + "training_step_time": 0.1055152416229248 + }, + { + "epoch": 3.3306884765625e-05, + "model_forward_time": 0.025597572326660156, + "step": 21828 + }, + { + "epoch": 3.3306884765625e-05, + "step": 21828, + "training_step_time": 0.10855960845947266 + }, + { + "epoch": 3.330841064453125e-05, + "model_forward_time": 0.025432825088500977, + "step": 21829 + }, + { + "epoch": 3.330841064453125e-05, + "step": 21829, + "training_step_time": 0.10846161842346191 + }, + { + "epoch": 3.33099365234375e-05, + "grad_norm": 0.4265593886375427, + "learning_rate": 1.8942610986084486e-05, + "loss": 0.0117, + "step": 21830 + }, + { + "epoch": 3.33099365234375e-05, + "model_forward_time": 0.02542901039123535, + "step": 21830 + }, + { + "epoch": 3.33099365234375e-05, + "step": 21830, + "training_step_time": 0.17316174507141113 + }, + { + "epoch": 3.331146240234375e-05, + "model_forward_time": 0.02429676055908203, + "step": 21831 + }, + { + "epoch": 3.331146240234375e-05, + "step": 21831, + "training_step_time": 0.12160825729370117 + }, + { + "epoch": 3.331298828125e-05, + "model_forward_time": 0.024595975875854492, + "step": 21832 + }, + { + "epoch": 3.331298828125e-05, + "step": 21832, + "training_step_time": 0.1272883415222168 + }, + { + "epoch": 3.331451416015625e-05, + "model_forward_time": 0.02492976188659668, + "step": 21833 + }, + { + "epoch": 3.331451416015625e-05, + "step": 21833, + "training_step_time": 0.15986943244934082 + }, + { + "epoch": 3.33160400390625e-05, + "model_forward_time": 0.02434062957763672, + "step": 21834 + }, + { + "epoch": 3.33160400390625e-05, + "step": 21834, + "training_step_time": 0.18533587455749512 + }, + { + "epoch": 3.331756591796875e-05, + "model_forward_time": 0.024075031280517578, + "step": 21835 + }, + { + "epoch": 3.331756591796875e-05, + "step": 21835, + "training_step_time": 0.1640181541442871 + }, + { + "epoch": 3.3319091796875e-05, + "model_forward_time": 0.024425506591796875, + "step": 21836 + }, + { + "epoch": 3.3319091796875e-05, + "step": 21836, + "training_step_time": 0.11726951599121094 + }, + { + "epoch": 3.332061767578125e-05, + "model_forward_time": 0.02438640594482422, + "step": 21837 + }, + { + "epoch": 3.332061767578125e-05, + "step": 21837, + "training_step_time": 0.11077237129211426 + }, + { + "epoch": 3.33221435546875e-05, + "model_forward_time": 0.024880647659301758, + "step": 21838 + }, + { + "epoch": 3.33221435546875e-05, + "step": 21838, + "training_step_time": 0.11326217651367188 + }, + { + "epoch": 3.332366943359375e-05, + "model_forward_time": 0.0251467227935791, + "step": 21839 + }, + { + "epoch": 3.332366943359375e-05, + "step": 21839, + "training_step_time": 0.1108860969543457 + }, + { + "epoch": 3.33251953125e-05, + "grad_norm": 0.12453766167163849, + "learning_rate": 1.8899436081059975e-05, + "loss": 0.0061, + "step": 21840 + }, + { + "epoch": 3.33251953125e-05, + "model_forward_time": 0.02491450309753418, + "step": 21840 + }, + { + "epoch": 3.33251953125e-05, + "step": 21840, + "training_step_time": 0.11063313484191895 + }, + { + "epoch": 3.332672119140625e-05, + "model_forward_time": 0.02473902702331543, + "step": 21841 + }, + { + "epoch": 3.332672119140625e-05, + "step": 21841, + "training_step_time": 0.10833239555358887 + }, + { + "epoch": 3.33282470703125e-05, + "model_forward_time": 0.02484273910522461, + "step": 21842 + }, + { + "epoch": 3.33282470703125e-05, + "step": 21842, + "training_step_time": 0.11195898056030273 + }, + { + "epoch": 3.332977294921875e-05, + "model_forward_time": 0.02499222755432129, + "step": 21843 + }, + { + "epoch": 3.332977294921875e-05, + "step": 21843, + "training_step_time": 0.10643482208251953 + }, + { + "epoch": 3.3331298828125e-05, + "model_forward_time": 0.02524256706237793, + "step": 21844 + }, + { + "epoch": 3.3331298828125e-05, + "step": 21844, + "training_step_time": 0.10648369789123535 + }, + { + "epoch": 3.333282470703125e-05, + "model_forward_time": 0.02487492561340332, + "step": 21845 + }, + { + "epoch": 3.333282470703125e-05, + "step": 21845, + "training_step_time": 0.10500550270080566 + }, + { + "epoch": 3.33343505859375e-05, + "model_forward_time": 0.024409770965576172, + "step": 21846 + }, + { + "epoch": 3.33343505859375e-05, + "step": 21846, + "training_step_time": 0.15057015419006348 + }, + { + "epoch": 3.333587646484375e-05, + "model_forward_time": 0.024445056915283203, + "step": 21847 + }, + { + "epoch": 3.333587646484375e-05, + "step": 21847, + "training_step_time": 0.1541757583618164 + }, + { + "epoch": 3.333740234375e-05, + "model_forward_time": 0.02426743507385254, + "step": 21848 + }, + { + "epoch": 3.333740234375e-05, + "step": 21848, + "training_step_time": 0.11188340187072754 + }, + { + "epoch": 3.333892822265625e-05, + "model_forward_time": 0.024758577346801758, + "step": 21849 + }, + { + "epoch": 3.333892822265625e-05, + "step": 21849, + "training_step_time": 0.1560688018798828 + }, + { + "epoch": 3.33404541015625e-05, + "grad_norm": 0.1800937056541443, + "learning_rate": 1.8856298966153212e-05, + "loss": 0.0065, + "step": 21850 + }, + { + "epoch": 3.33404541015625e-05, + "model_forward_time": 0.024729251861572266, + "step": 21850 + }, + { + "epoch": 3.33404541015625e-05, + "step": 21850, + "training_step_time": 0.17142367362976074 + }, + { + "epoch": 3.334197998046875e-05, + "model_forward_time": 0.02407979965209961, + "step": 21851 + }, + { + "epoch": 3.334197998046875e-05, + "step": 21851, + "training_step_time": 0.17992830276489258 + }, + { + "epoch": 3.3343505859375e-05, + "model_forward_time": 0.02410149574279785, + "step": 21852 + }, + { + "epoch": 3.3343505859375e-05, + "step": 21852, + "training_step_time": 0.12167000770568848 + }, + { + "epoch": 3.334503173828125e-05, + "model_forward_time": 0.024135828018188477, + "step": 21853 + }, + { + "epoch": 3.334503173828125e-05, + "step": 21853, + "training_step_time": 0.10448479652404785 + }, + { + "epoch": 3.33465576171875e-05, + "model_forward_time": 0.025197267532348633, + "step": 21854 + }, + { + "epoch": 3.33465576171875e-05, + "step": 21854, + "training_step_time": 0.19836997985839844 + }, + { + "epoch": 3.334808349609375e-05, + "model_forward_time": 0.02428603172302246, + "step": 21855 + }, + { + "epoch": 3.334808349609375e-05, + "step": 21855, + "training_step_time": 0.16509294509887695 + }, + { + "epoch": 3.3349609375e-05, + "model_forward_time": 0.0242156982421875, + "step": 21856 + }, + { + "epoch": 3.3349609375e-05, + "step": 21856, + "training_step_time": 0.13178753852844238 + }, + { + "epoch": 3.335113525390625e-05, + "model_forward_time": 0.024380922317504883, + "step": 21857 + }, + { + "epoch": 3.335113525390625e-05, + "step": 21857, + "training_step_time": 0.12881207466125488 + }, + { + "epoch": 3.33526611328125e-05, + "model_forward_time": 0.024967670440673828, + "step": 21858 + }, + { + "epoch": 3.33526611328125e-05, + "step": 21858, + "training_step_time": 0.12106752395629883 + }, + { + "epoch": 3.335418701171875e-05, + "model_forward_time": 0.024941444396972656, + "step": 21859 + }, + { + "epoch": 3.335418701171875e-05, + "step": 21859, + "training_step_time": 0.11635017395019531 + }, + { + "epoch": 3.3355712890625e-05, + "grad_norm": 0.24301566183567047, + "learning_rate": 1.881319969377987e-05, + "loss": 0.0121, + "step": 21860 + }, + { + "epoch": 3.3355712890625e-05, + "model_forward_time": 0.02521491050720215, + "step": 21860 + }, + { + "epoch": 3.3355712890625e-05, + "step": 21860, + "training_step_time": 0.1148219108581543 + }, + { + "epoch": 3.335723876953125e-05, + "model_forward_time": 0.024951696395874023, + "step": 21861 + }, + { + "epoch": 3.335723876953125e-05, + "step": 21861, + "training_step_time": 0.11199665069580078 + }, + { + "epoch": 3.33587646484375e-05, + "model_forward_time": 0.02507495880126953, + "step": 21862 + }, + { + "epoch": 3.33587646484375e-05, + "step": 21862, + "training_step_time": 0.10988974571228027 + }, + { + "epoch": 3.336029052734375e-05, + "model_forward_time": 0.024872303009033203, + "step": 21863 + }, + { + "epoch": 3.336029052734375e-05, + "step": 21863, + "training_step_time": 0.1070561408996582 + }, + { + "epoch": 3.336181640625e-05, + "model_forward_time": 0.026783227920532227, + "step": 21864 + }, + { + "epoch": 3.336181640625e-05, + "step": 21864, + "training_step_time": 0.1087348461151123 + }, + { + "epoch": 3.336334228515625e-05, + "model_forward_time": 0.024792909622192383, + "step": 21865 + }, + { + "epoch": 3.336334228515625e-05, + "step": 21865, + "training_step_time": 0.10808944702148438 + }, + { + "epoch": 3.33648681640625e-05, + "model_forward_time": 0.023998260498046875, + "step": 21866 + }, + { + "epoch": 3.33648681640625e-05, + "step": 21866, + "training_step_time": 0.10508894920349121 + }, + { + "epoch": 3.336639404296875e-05, + "model_forward_time": 0.024866342544555664, + "step": 21867 + }, + { + "epoch": 3.336639404296875e-05, + "step": 21867, + "training_step_time": 0.10407638549804688 + }, + { + "epoch": 3.3367919921875e-05, + "model_forward_time": 0.025317668914794922, + "step": 21868 + }, + { + "epoch": 3.3367919921875e-05, + "step": 21868, + "training_step_time": 0.10530734062194824 + }, + { + "epoch": 3.336944580078125e-05, + "model_forward_time": 0.025447845458984375, + "step": 21869 + }, + { + "epoch": 3.336944580078125e-05, + "step": 21869, + "training_step_time": 0.10706901550292969 + }, + { + "epoch": 3.33709716796875e-05, + "grad_norm": 0.10550834983587265, + "learning_rate": 1.877013831630961e-05, + "loss": 0.0075, + "step": 21870 + }, + { + "epoch": 3.33709716796875e-05, + "model_forward_time": 0.024760007858276367, + "step": 21870 + }, + { + "epoch": 3.33709716796875e-05, + "step": 21870, + "training_step_time": 0.11013126373291016 + }, + { + "epoch": 3.337249755859375e-05, + "model_forward_time": 0.025163888931274414, + "step": 21871 + }, + { + "epoch": 3.337249755859375e-05, + "step": 21871, + "training_step_time": 0.10971283912658691 + }, + { + "epoch": 3.33740234375e-05, + "model_forward_time": 0.02463698387145996, + "step": 21872 + }, + { + "epoch": 3.33740234375e-05, + "step": 21872, + "training_step_time": 0.1077272891998291 + }, + { + "epoch": 3.337554931640625e-05, + "model_forward_time": 0.0252532958984375, + "step": 21873 + }, + { + "epoch": 3.337554931640625e-05, + "step": 21873, + "training_step_time": 0.10522007942199707 + }, + { + "epoch": 3.33770751953125e-05, + "model_forward_time": 0.025236129760742188, + "step": 21874 + }, + { + "epoch": 3.33770751953125e-05, + "step": 21874, + "training_step_time": 0.17871475219726562 + }, + { + "epoch": 3.337860107421875e-05, + "model_forward_time": 0.024168968200683594, + "step": 21875 + }, + { + "epoch": 3.337860107421875e-05, + "step": 21875, + "training_step_time": 0.12221479415893555 + }, + { + "epoch": 3.3380126953125e-05, + "model_forward_time": 0.02402472496032715, + "step": 21876 + }, + { + "epoch": 3.3380126953125e-05, + "step": 21876, + "training_step_time": 0.10711050033569336 + }, + { + "epoch": 3.338165283203125e-05, + "model_forward_time": 0.02476668357849121, + "step": 21877 + }, + { + "epoch": 3.338165283203125e-05, + "step": 21877, + "training_step_time": 0.10479950904846191 + }, + { + "epoch": 3.33831787109375e-05, + "model_forward_time": 0.02499079704284668, + "step": 21878 + }, + { + "epoch": 3.33831787109375e-05, + "step": 21878, + "training_step_time": 0.21601033210754395 + }, + { + "epoch": 3.338470458984375e-05, + "model_forward_time": 0.02414846420288086, + "step": 21879 + }, + { + "epoch": 3.338470458984375e-05, + "step": 21879, + "training_step_time": 0.12385129928588867 + }, + { + "epoch": 3.338623046875e-05, + "grad_norm": 0.17147311568260193, + "learning_rate": 1.872711488606609e-05, + "loss": 0.004, + "step": 21880 + }, + { + "epoch": 3.338623046875e-05, + "model_forward_time": 0.023589611053466797, + "step": 21880 + }, + { + "epoch": 3.338623046875e-05, + "step": 21880, + "training_step_time": 0.11432909965515137 + }, + { + "epoch": 3.338775634765625e-05, + "model_forward_time": 0.024756669998168945, + "step": 21881 + }, + { + "epoch": 3.338775634765625e-05, + "step": 21881, + "training_step_time": 0.10818362236022949 + }, + { + "epoch": 3.33892822265625e-05, + "model_forward_time": 0.02466726303100586, + "step": 21882 + }, + { + "epoch": 3.33892822265625e-05, + "step": 21882, + "training_step_time": 0.10753607749938965 + }, + { + "epoch": 3.339080810546875e-05, + "model_forward_time": 0.024960994720458984, + "step": 21883 + }, + { + "epoch": 3.339080810546875e-05, + "step": 21883, + "training_step_time": 0.10529494285583496 + }, + { + "epoch": 3.3392333984375e-05, + "model_forward_time": 0.024820804595947266, + "step": 21884 + }, + { + "epoch": 3.3392333984375e-05, + "step": 21884, + "training_step_time": 0.1078195571899414 + }, + { + "epoch": 3.339385986328125e-05, + "model_forward_time": 0.025045156478881836, + "step": 21885 + }, + { + "epoch": 3.339385986328125e-05, + "step": 21885, + "training_step_time": 0.10800671577453613 + }, + { + "epoch": 3.33953857421875e-05, + "model_forward_time": 0.025008201599121094, + "step": 21886 + }, + { + "epoch": 3.33953857421875e-05, + "step": 21886, + "training_step_time": 0.10856294631958008 + }, + { + "epoch": 3.339691162109375e-05, + "model_forward_time": 0.024991273880004883, + "step": 21887 + }, + { + "epoch": 3.339691162109375e-05, + "step": 21887, + "training_step_time": 0.10384631156921387 + }, + { + "epoch": 3.33984375e-05, + "model_forward_time": 0.02796316146850586, + "step": 21888 + }, + { + "epoch": 3.33984375e-05, + "step": 21888, + "training_step_time": 0.10702967643737793 + }, + { + "epoch": 3.339996337890625e-05, + "model_forward_time": 0.02507638931274414, + "step": 21889 + }, + { + "epoch": 3.339996337890625e-05, + "step": 21889, + "training_step_time": 0.10507607460021973 + }, + { + "epoch": 3.34014892578125e-05, + "grad_norm": 0.19023002684116364, + "learning_rate": 1.868412945532681e-05, + "loss": 0.0048, + "step": 21890 + }, + { + "epoch": 3.34014892578125e-05, + "model_forward_time": 0.02390909194946289, + "step": 21890 + }, + { + "epoch": 3.34014892578125e-05, + "step": 21890, + "training_step_time": 0.10432052612304688 + }, + { + "epoch": 3.340301513671875e-05, + "model_forward_time": 0.02433633804321289, + "step": 21891 + }, + { + "epoch": 3.340301513671875e-05, + "step": 21891, + "training_step_time": 0.103302001953125 + }, + { + "epoch": 3.3404541015625e-05, + "model_forward_time": 0.02447199821472168, + "step": 21892 + }, + { + "epoch": 3.3404541015625e-05, + "step": 21892, + "training_step_time": 0.14989686012268066 + }, + { + "epoch": 3.340606689453125e-05, + "model_forward_time": 0.02468252182006836, + "step": 21893 + }, + { + "epoch": 3.340606689453125e-05, + "step": 21893, + "training_step_time": 0.15548014640808105 + }, + { + "epoch": 3.34075927734375e-05, + "model_forward_time": 0.024575471878051758, + "step": 21894 + }, + { + "epoch": 3.34075927734375e-05, + "step": 21894, + "training_step_time": 0.1885085105895996 + }, + { + "epoch": 3.340911865234375e-05, + "model_forward_time": 0.024600505828857422, + "step": 21895 + }, + { + "epoch": 3.340911865234375e-05, + "step": 21895, + "training_step_time": 0.15296506881713867 + }, + { + "epoch": 3.341064453125e-05, + "model_forward_time": 0.024071216583251953, + "step": 21896 + }, + { + "epoch": 3.341064453125e-05, + "step": 21896, + "training_step_time": 0.10956358909606934 + }, + { + "epoch": 3.341217041015625e-05, + "model_forward_time": 0.025008678436279297, + "step": 21897 + }, + { + "epoch": 3.341217041015625e-05, + "step": 21897, + "training_step_time": 0.10748910903930664 + }, + { + "epoch": 3.34136962890625e-05, + "model_forward_time": 0.025221824645996094, + "step": 21898 + }, + { + "epoch": 3.34136962890625e-05, + "step": 21898, + "training_step_time": 0.11350536346435547 + }, + { + "epoch": 3.341522216796875e-05, + "model_forward_time": 0.025104999542236328, + "step": 21899 + }, + { + "epoch": 3.341522216796875e-05, + "step": 21899, + "training_step_time": 0.1551041603088379 + }, + { + "epoch": 3.3416748046875e-05, + "grad_norm": 0.1172272339463234, + "learning_rate": 1.8641182076323148e-05, + "loss": 0.0062, + "step": 21900 + }, + { + "epoch": 3.3416748046875e-05, + "model_forward_time": 0.024877071380615234, + "step": 21900 + }, + { + "epoch": 3.3416748046875e-05, + "step": 21900, + "training_step_time": 0.1443798542022705 + }, + { + "epoch": 3.341827392578125e-05, + "model_forward_time": 0.02397322654724121, + "step": 21901 + }, + { + "epoch": 3.341827392578125e-05, + "step": 21901, + "training_step_time": 0.11443853378295898 + }, + { + "epoch": 3.34197998046875e-05, + "model_forward_time": 0.025059223175048828, + "step": 21902 + }, + { + "epoch": 3.34197998046875e-05, + "step": 21902, + "training_step_time": 0.11507773399353027 + }, + { + "epoch": 3.342132568359375e-05, + "model_forward_time": 0.02497720718383789, + "step": 21903 + }, + { + "epoch": 3.342132568359375e-05, + "step": 21903, + "training_step_time": 0.11725187301635742 + }, + { + "epoch": 3.34228515625e-05, + "model_forward_time": 0.02496790885925293, + "step": 21904 + }, + { + "epoch": 3.34228515625e-05, + "step": 21904, + "training_step_time": 0.12517571449279785 + }, + { + "epoch": 3.342437744140625e-05, + "model_forward_time": 0.025188684463500977, + "step": 21905 + }, + { + "epoch": 3.342437744140625e-05, + "step": 21905, + "training_step_time": 0.1178278923034668 + }, + { + "epoch": 3.34259033203125e-05, + "model_forward_time": 0.024924516677856445, + "step": 21906 + }, + { + "epoch": 3.34259033203125e-05, + "step": 21906, + "training_step_time": 0.11422872543334961 + }, + { + "epoch": 3.342742919921875e-05, + "model_forward_time": 0.024988412857055664, + "step": 21907 + }, + { + "epoch": 3.342742919921875e-05, + "step": 21907, + "training_step_time": 0.11598563194274902 + }, + { + "epoch": 3.3428955078125e-05, + "model_forward_time": 0.026479005813598633, + "step": 21908 + }, + { + "epoch": 3.3428955078125e-05, + "step": 21908, + "training_step_time": 0.11613059043884277 + }, + { + "epoch": 3.343048095703125e-05, + "model_forward_time": 0.025203227996826172, + "step": 21909 + }, + { + "epoch": 3.343048095703125e-05, + "step": 21909, + "training_step_time": 0.11009955406188965 + }, + { + "epoch": 3.34320068359375e-05, + "grad_norm": 0.2895066738128662, + "learning_rate": 1.8598272801240213e-05, + "loss": 0.0104, + "step": 21910 + }, + { + "epoch": 3.34320068359375e-05, + "model_forward_time": 0.024789094924926758, + "step": 21910 + }, + { + "epoch": 3.34320068359375e-05, + "step": 21910, + "training_step_time": 0.10646462440490723 + }, + { + "epoch": 3.343353271484375e-05, + "model_forward_time": 0.025174856185913086, + "step": 21911 + }, + { + "epoch": 3.343353271484375e-05, + "step": 21911, + "training_step_time": 0.10925126075744629 + }, + { + "epoch": 3.343505859375e-05, + "model_forward_time": 0.024848461151123047, + "step": 21912 + }, + { + "epoch": 3.343505859375e-05, + "step": 21912, + "training_step_time": 0.10745668411254883 + }, + { + "epoch": 3.343658447265625e-05, + "model_forward_time": 0.024905681610107422, + "step": 21913 + }, + { + "epoch": 3.343658447265625e-05, + "step": 21913, + "training_step_time": 0.1065225601196289 + }, + { + "epoch": 3.34381103515625e-05, + "model_forward_time": 0.025066852569580078, + "step": 21914 + }, + { + "epoch": 3.34381103515625e-05, + "step": 21914, + "training_step_time": 0.10826826095581055 + }, + { + "epoch": 3.343963623046875e-05, + "model_forward_time": 0.024744749069213867, + "step": 21915 + }, + { + "epoch": 3.343963623046875e-05, + "step": 21915, + "training_step_time": 0.10559749603271484 + }, + { + "epoch": 3.3441162109375e-05, + "model_forward_time": 0.024999141693115234, + "step": 21916 + }, + { + "epoch": 3.3441162109375e-05, + "step": 21916, + "training_step_time": 0.10587930679321289 + }, + { + "epoch": 3.344268798828125e-05, + "model_forward_time": 0.02464127540588379, + "step": 21917 + }, + { + "epoch": 3.344268798828125e-05, + "step": 21917, + "training_step_time": 0.10648727416992188 + }, + { + "epoch": 3.34442138671875e-05, + "model_forward_time": 0.028539419174194336, + "step": 21918 + }, + { + "epoch": 3.34442138671875e-05, + "step": 21918, + "training_step_time": 0.10886812210083008 + }, + { + "epoch": 3.344573974609375e-05, + "model_forward_time": 0.02498149871826172, + "step": 21919 + }, + { + "epoch": 3.344573974609375e-05, + "step": 21919, + "training_step_time": 0.10808062553405762 + }, + { + "epoch": 3.3447265625e-05, + "grad_norm": 0.15119513869285583, + "learning_rate": 1.855540168221681e-05, + "loss": 0.0073, + "step": 21920 + }, + { + "epoch": 3.3447265625e-05, + "model_forward_time": 0.024871349334716797, + "step": 21920 + }, + { + "epoch": 3.3447265625e-05, + "step": 21920, + "training_step_time": 0.10555005073547363 + }, + { + "epoch": 3.344879150390625e-05, + "model_forward_time": 0.02491450309753418, + "step": 21921 + }, + { + "epoch": 3.344879150390625e-05, + "step": 21921, + "training_step_time": 0.13134169578552246 + }, + { + "epoch": 3.34503173828125e-05, + "model_forward_time": 0.0251920223236084, + "step": 21922 + }, + { + "epoch": 3.34503173828125e-05, + "step": 21922, + "training_step_time": 0.11861729621887207 + }, + { + "epoch": 3.345184326171875e-05, + "model_forward_time": 0.02498602867126465, + "step": 21923 + }, + { + "epoch": 3.345184326171875e-05, + "step": 21923, + "training_step_time": 0.13724589347839355 + }, + { + "epoch": 3.3453369140625e-05, + "model_forward_time": 0.024552345275878906, + "step": 21924 + }, + { + "epoch": 3.3453369140625e-05, + "step": 21924, + "training_step_time": 0.10592198371887207 + }, + { + "epoch": 3.345489501953125e-05, + "model_forward_time": 0.024903297424316406, + "step": 21925 + }, + { + "epoch": 3.345489501953125e-05, + "step": 21925, + "training_step_time": 0.1693730354309082 + }, + { + "epoch": 3.34564208984375e-05, + "model_forward_time": 0.02442336082458496, + "step": 21926 + }, + { + "epoch": 3.34564208984375e-05, + "step": 21926, + "training_step_time": 0.13880157470703125 + }, + { + "epoch": 3.345794677734375e-05, + "model_forward_time": 0.023960590362548828, + "step": 21927 + }, + { + "epoch": 3.345794677734375e-05, + "step": 21927, + "training_step_time": 0.11413073539733887 + }, + { + "epoch": 3.345947265625e-05, + "model_forward_time": 0.02466297149658203, + "step": 21928 + }, + { + "epoch": 3.345947265625e-05, + "step": 21928, + "training_step_time": 0.10674023628234863 + }, + { + "epoch": 3.346099853515625e-05, + "model_forward_time": 0.0251309871673584, + "step": 21929 + }, + { + "epoch": 3.346099853515625e-05, + "step": 21929, + "training_step_time": 0.1786787509918213 + }, + { + "epoch": 3.34625244140625e-05, + "grad_norm": 0.16395282745361328, + "learning_rate": 1.851256877134538e-05, + "loss": 0.0047, + "step": 21930 + }, + { + "epoch": 3.34625244140625e-05, + "model_forward_time": 0.02409839630126953, + "step": 21930 + }, + { + "epoch": 3.34625244140625e-05, + "step": 21930, + "training_step_time": 0.20003819465637207 + }, + { + "epoch": 3.346405029296875e-05, + "model_forward_time": 0.024295806884765625, + "step": 21931 + }, + { + "epoch": 3.346405029296875e-05, + "step": 21931, + "training_step_time": 0.19412827491760254 + }, + { + "epoch": 3.3465576171875e-05, + "model_forward_time": 0.024642229080200195, + "step": 21932 + }, + { + "epoch": 3.3465576171875e-05, + "step": 21932, + "training_step_time": 0.18524527549743652 + }, + { + "epoch": 3.346710205078125e-05, + "model_forward_time": 0.02411627769470215, + "step": 21933 + }, + { + "epoch": 3.346710205078125e-05, + "step": 21933, + "training_step_time": 0.1680307388305664 + }, + { + "epoch": 3.34686279296875e-05, + "model_forward_time": 0.023997068405151367, + "step": 21934 + }, + { + "epoch": 3.34686279296875e-05, + "step": 21934, + "training_step_time": 0.11161065101623535 + }, + { + "epoch": 3.347015380859375e-05, + "model_forward_time": 0.025363445281982422, + "step": 21935 + }, + { + "epoch": 3.347015380859375e-05, + "step": 21935, + "training_step_time": 0.10446786880493164 + }, + { + "epoch": 3.34716796875e-05, + "model_forward_time": 0.025089025497436523, + "step": 21936 + }, + { + "epoch": 3.34716796875e-05, + "step": 21936, + "training_step_time": 0.14411163330078125 + }, + { + "epoch": 3.347320556640625e-05, + "model_forward_time": 0.024474143981933594, + "step": 21937 + }, + { + "epoch": 3.347320556640625e-05, + "step": 21937, + "training_step_time": 0.15804171562194824 + }, + { + "epoch": 3.34747314453125e-05, + "model_forward_time": 0.026667356491088867, + "step": 21938 + }, + { + "epoch": 3.34747314453125e-05, + "step": 21938, + "training_step_time": 0.16880297660827637 + }, + { + "epoch": 3.347625732421875e-05, + "model_forward_time": 0.024512529373168945, + "step": 21939 + }, + { + "epoch": 3.347625732421875e-05, + "step": 21939, + "training_step_time": 0.16932439804077148 + }, + { + "epoch": 3.3477783203125e-05, + "grad_norm": 0.41240185499191284, + "learning_rate": 1.846977412067198e-05, + "loss": 0.0073, + "step": 21940 + }, + { + "epoch": 3.3477783203125e-05, + "model_forward_time": 0.024316072463989258, + "step": 21940 + }, + { + "epoch": 3.3477783203125e-05, + "step": 21940, + "training_step_time": 0.17650890350341797 + }, + { + "epoch": 3.347930908203125e-05, + "model_forward_time": 0.024280548095703125, + "step": 21941 + }, + { + "epoch": 3.347930908203125e-05, + "step": 21941, + "training_step_time": 0.11753559112548828 + }, + { + "epoch": 3.34808349609375e-05, + "model_forward_time": 0.02404046058654785, + "step": 21942 + }, + { + "epoch": 3.34808349609375e-05, + "step": 21942, + "training_step_time": 0.10599541664123535 + }, + { + "epoch": 3.348236083984375e-05, + "model_forward_time": 0.024616241455078125, + "step": 21943 + }, + { + "epoch": 3.348236083984375e-05, + "step": 21943, + "training_step_time": 0.1569383144378662 + }, + { + "epoch": 3.348388671875e-05, + "model_forward_time": 0.024314165115356445, + "step": 21944 + }, + { + "epoch": 3.348388671875e-05, + "step": 21944, + "training_step_time": 0.14064669609069824 + }, + { + "epoch": 3.348541259765625e-05, + "model_forward_time": 0.02416396141052246, + "step": 21945 + }, + { + "epoch": 3.348541259765625e-05, + "step": 21945, + "training_step_time": 0.18384146690368652 + }, + { + "epoch": 3.34869384765625e-05, + "model_forward_time": 0.0240323543548584, + "step": 21946 + }, + { + "epoch": 3.34869384765625e-05, + "step": 21946, + "training_step_time": 0.15659189224243164 + }, + { + "epoch": 3.348846435546875e-05, + "model_forward_time": 0.023503780364990234, + "step": 21947 + }, + { + "epoch": 3.348846435546875e-05, + "step": 21947, + "training_step_time": 0.12787342071533203 + }, + { + "epoch": 3.3489990234375e-05, + "model_forward_time": 0.024297714233398438, + "step": 21948 + }, + { + "epoch": 3.3489990234375e-05, + "step": 21948, + "training_step_time": 0.12018895149230957 + }, + { + "epoch": 3.349151611328125e-05, + "model_forward_time": 0.026633262634277344, + "step": 21949 + }, + { + "epoch": 3.349151611328125e-05, + "step": 21949, + "training_step_time": 0.11145448684692383 + }, + { + "epoch": 3.34930419921875e-05, + "grad_norm": 0.11369265615940094, + "learning_rate": 1.8427017782196127e-05, + "loss": 0.0037, + "step": 21950 + }, + { + "epoch": 3.34930419921875e-05, + "model_forward_time": 0.02529144287109375, + "step": 21950 + }, + { + "epoch": 3.34930419921875e-05, + "step": 21950, + "training_step_time": 0.1411571502685547 + }, + { + "epoch": 3.349456787109375e-05, + "model_forward_time": 0.028241634368896484, + "step": 21951 + }, + { + "epoch": 3.349456787109375e-05, + "step": 21951, + "training_step_time": 0.1082770824432373 + }, + { + "epoch": 3.349609375e-05, + "model_forward_time": 0.026320695877075195, + "step": 21952 + }, + { + "epoch": 3.349609375e-05, + "step": 21952, + "training_step_time": 0.1068868637084961 + }, + { + "epoch": 3.349761962890625e-05, + "model_forward_time": 0.02522420883178711, + "step": 21953 + }, + { + "epoch": 3.349761962890625e-05, + "step": 21953, + "training_step_time": 0.10429120063781738 + }, + { + "epoch": 3.34991455078125e-05, + "model_forward_time": 0.02511906623840332, + "step": 21954 + }, + { + "epoch": 3.34991455078125e-05, + "step": 21954, + "training_step_time": 0.10689425468444824 + }, + { + "epoch": 3.350067138671875e-05, + "model_forward_time": 0.02586531639099121, + "step": 21955 + }, + { + "epoch": 3.350067138671875e-05, + "step": 21955, + "training_step_time": 0.10547304153442383 + }, + { + "epoch": 3.3502197265625e-05, + "model_forward_time": 0.025156736373901367, + "step": 21956 + }, + { + "epoch": 3.3502197265625e-05, + "step": 21956, + "training_step_time": 0.10576033592224121 + }, + { + "epoch": 3.350372314453125e-05, + "model_forward_time": 0.024840354919433594, + "step": 21957 + }, + { + "epoch": 3.350372314453125e-05, + "step": 21957, + "training_step_time": 0.10561513900756836 + }, + { + "epoch": 3.35052490234375e-05, + "model_forward_time": 0.025061845779418945, + "step": 21958 + }, + { + "epoch": 3.35052490234375e-05, + "step": 21958, + "training_step_time": 0.10975503921508789 + }, + { + "epoch": 3.350677490234375e-05, + "model_forward_time": 0.02515101432800293, + "step": 21959 + }, + { + "epoch": 3.350677490234375e-05, + "step": 21959, + "training_step_time": 0.1128835678100586 + }, + { + "epoch": 3.350830078125e-05, + "grad_norm": 0.18088452517986298, + "learning_rate": 1.838429980787081e-05, + "loss": 0.0065, + "step": 21960 + }, + { + "epoch": 3.350830078125e-05, + "model_forward_time": 0.025295734405517578, + "step": 21960 + }, + { + "epoch": 3.350830078125e-05, + "step": 21960, + "training_step_time": 0.1054234504699707 + }, + { + "epoch": 3.350982666015625e-05, + "model_forward_time": 0.025385379791259766, + "step": 21961 + }, + { + "epoch": 3.350982666015625e-05, + "step": 21961, + "training_step_time": 0.1065981388092041 + }, + { + "epoch": 3.35113525390625e-05, + "model_forward_time": 0.02478647232055664, + "step": 21962 + }, + { + "epoch": 3.35113525390625e-05, + "step": 21962, + "training_step_time": 0.10497903823852539 + }, + { + "epoch": 3.351287841796875e-05, + "model_forward_time": 0.025162935256958008, + "step": 21963 + }, + { + "epoch": 3.351287841796875e-05, + "step": 21963, + "training_step_time": 0.10491132736206055 + }, + { + "epoch": 3.3514404296875e-05, + "model_forward_time": 0.024976730346679688, + "step": 21964 + }, + { + "epoch": 3.3514404296875e-05, + "step": 21964, + "training_step_time": 0.14592242240905762 + }, + { + "epoch": 3.351593017578125e-05, + "model_forward_time": 0.024506092071533203, + "step": 21965 + }, + { + "epoch": 3.351593017578125e-05, + "step": 21965, + "training_step_time": 0.12034487724304199 + }, + { + "epoch": 3.35174560546875e-05, + "model_forward_time": 0.02522444725036621, + "step": 21966 + }, + { + "epoch": 3.35174560546875e-05, + "step": 21966, + "training_step_time": 0.12931394577026367 + }, + { + "epoch": 3.351898193359375e-05, + "model_forward_time": 0.024785518646240234, + "step": 21967 + }, + { + "epoch": 3.351898193359375e-05, + "step": 21967, + "training_step_time": 0.15756988525390625 + }, + { + "epoch": 3.35205078125e-05, + "model_forward_time": 0.024558305740356445, + "step": 21968 + }, + { + "epoch": 3.35205078125e-05, + "step": 21968, + "training_step_time": 0.10171723365783691 + }, + { + "epoch": 3.352203369140625e-05, + "model_forward_time": 0.025147676467895508, + "step": 21969 + }, + { + "epoch": 3.352203369140625e-05, + "step": 21969, + "training_step_time": 0.12040376663208008 + }, + { + "epoch": 3.35235595703125e-05, + "grad_norm": 0.10777976363897324, + "learning_rate": 1.8341620249602387e-05, + "loss": 0.0073, + "step": 21970 + }, + { + "epoch": 3.35235595703125e-05, + "model_forward_time": 0.02538776397705078, + "step": 21970 + }, + { + "epoch": 3.35235595703125e-05, + "step": 21970, + "training_step_time": 0.11500930786132812 + }, + { + "epoch": 3.352508544921875e-05, + "model_forward_time": 0.02500295639038086, + "step": 21971 + }, + { + "epoch": 3.352508544921875e-05, + "step": 21971, + "training_step_time": 0.10777401924133301 + }, + { + "epoch": 3.3526611328125e-05, + "model_forward_time": 0.024872303009033203, + "step": 21972 + }, + { + "epoch": 3.3526611328125e-05, + "step": 21972, + "training_step_time": 0.1077277660369873 + }, + { + "epoch": 3.352813720703125e-05, + "model_forward_time": 0.025223493576049805, + "step": 21973 + }, + { + "epoch": 3.352813720703125e-05, + "step": 21973, + "training_step_time": 0.10329580307006836 + }, + { + "epoch": 3.35296630859375e-05, + "model_forward_time": 0.025937795639038086, + "step": 21974 + }, + { + "epoch": 3.35296630859375e-05, + "step": 21974, + "training_step_time": 0.10796785354614258 + }, + { + "epoch": 3.353118896484375e-05, + "model_forward_time": 0.02512836456298828, + "step": 21975 + }, + { + "epoch": 3.353118896484375e-05, + "step": 21975, + "training_step_time": 0.10573673248291016 + }, + { + "epoch": 3.353271484375e-05, + "model_forward_time": 0.02514505386352539, + "step": 21976 + }, + { + "epoch": 3.353271484375e-05, + "step": 21976, + "training_step_time": 0.10509777069091797 + }, + { + "epoch": 3.353424072265625e-05, + "model_forward_time": 0.02508687973022461, + "step": 21977 + }, + { + "epoch": 3.353424072265625e-05, + "step": 21977, + "training_step_time": 0.1089315414428711 + }, + { + "epoch": 3.35357666015625e-05, + "model_forward_time": 0.024575471878051758, + "step": 21978 + }, + { + "epoch": 3.35357666015625e-05, + "step": 21978, + "training_step_time": 0.10536813735961914 + }, + { + "epoch": 3.353729248046875e-05, + "model_forward_time": 0.025023698806762695, + "step": 21979 + }, + { + "epoch": 3.353729248046875e-05, + "step": 21979, + "training_step_time": 0.10579252243041992 + }, + { + "epoch": 3.3538818359375e-05, + "grad_norm": 0.1644177883863449, + "learning_rate": 1.8298979159250557e-05, + "loss": 0.0055, + "step": 21980 + }, + { + "epoch": 3.3538818359375e-05, + "model_forward_time": 0.024699926376342773, + "step": 21980 + }, + { + "epoch": 3.3538818359375e-05, + "step": 21980, + "training_step_time": 0.10270261764526367 + }, + { + "epoch": 3.354034423828125e-05, + "model_forward_time": 0.024265289306640625, + "step": 21981 + }, + { + "epoch": 3.354034423828125e-05, + "step": 21981, + "training_step_time": 0.15401196479797363 + }, + { + "epoch": 3.35418701171875e-05, + "model_forward_time": 0.025017499923706055, + "step": 21982 + }, + { + "epoch": 3.35418701171875e-05, + "step": 21982, + "training_step_time": 0.15387392044067383 + }, + { + "epoch": 3.354339599609375e-05, + "model_forward_time": 0.024634122848510742, + "step": 21983 + }, + { + "epoch": 3.354339599609375e-05, + "step": 21983, + "training_step_time": 0.1511986255645752 + }, + { + "epoch": 3.3544921875e-05, + "model_forward_time": 0.025026559829711914, + "step": 21984 + }, + { + "epoch": 3.3544921875e-05, + "step": 21984, + "training_step_time": 0.17862486839294434 + }, + { + "epoch": 3.354644775390625e-05, + "model_forward_time": 0.02434086799621582, + "step": 21985 + }, + { + "epoch": 3.354644775390625e-05, + "step": 21985, + "training_step_time": 0.1838853359222412 + }, + { + "epoch": 3.35479736328125e-05, + "model_forward_time": 0.02460789680480957, + "step": 21986 + }, + { + "epoch": 3.35479736328125e-05, + "step": 21986, + "training_step_time": 0.12901043891906738 + }, + { + "epoch": 3.354949951171875e-05, + "model_forward_time": 0.024167776107788086, + "step": 21987 + }, + { + "epoch": 3.354949951171875e-05, + "step": 21987, + "training_step_time": 0.17749977111816406 + }, + { + "epoch": 3.3551025390625e-05, + "model_forward_time": 0.024352312088012695, + "step": 21988 + }, + { + "epoch": 3.3551025390625e-05, + "step": 21988, + "training_step_time": 0.11972546577453613 + }, + { + "epoch": 3.355255126953125e-05, + "model_forward_time": 0.02433180809020996, + "step": 21989 + }, + { + "epoch": 3.355255126953125e-05, + "step": 21989, + "training_step_time": 0.17307186126708984 + }, + { + "epoch": 3.35540771484375e-05, + "grad_norm": 0.13825196027755737, + "learning_rate": 1.8256376588628238e-05, + "loss": 0.006, + "step": 21990 + }, + { + "epoch": 3.35540771484375e-05, + "model_forward_time": 0.024563074111938477, + "step": 21990 + }, + { + "epoch": 3.35540771484375e-05, + "step": 21990, + "training_step_time": 0.12290668487548828 + }, + { + "epoch": 3.355560302734375e-05, + "model_forward_time": 0.024152517318725586, + "step": 21991 + }, + { + "epoch": 3.355560302734375e-05, + "step": 21991, + "training_step_time": 0.13365983963012695 + }, + { + "epoch": 3.355712890625e-05, + "model_forward_time": 0.024890899658203125, + "step": 21992 + }, + { + "epoch": 3.355712890625e-05, + "step": 21992, + "training_step_time": 0.1719675064086914 + }, + { + "epoch": 3.355865478515625e-05, + "model_forward_time": 0.026370763778686523, + "step": 21993 + }, + { + "epoch": 3.355865478515625e-05, + "step": 21993, + "training_step_time": 0.1783008575439453 + }, + { + "epoch": 3.35601806640625e-05, + "model_forward_time": 0.02379441261291504, + "step": 21994 + }, + { + "epoch": 3.35601806640625e-05, + "step": 21994, + "training_step_time": 0.12861299514770508 + }, + { + "epoch": 3.356170654296875e-05, + "model_forward_time": 0.024871826171875, + "step": 21995 + }, + { + "epoch": 3.356170654296875e-05, + "step": 21995, + "training_step_time": 0.11438393592834473 + }, + { + "epoch": 3.3563232421875e-05, + "model_forward_time": 0.024910926818847656, + "step": 21996 + }, + { + "epoch": 3.3563232421875e-05, + "step": 21996, + "training_step_time": 0.10733199119567871 + }, + { + "epoch": 3.356475830078125e-05, + "model_forward_time": 0.025028705596923828, + "step": 21997 + }, + { + "epoch": 3.356475830078125e-05, + "step": 21997, + "training_step_time": 0.10549330711364746 + }, + { + "epoch": 3.35662841796875e-05, + "model_forward_time": 0.024802446365356445, + "step": 21998 + }, + { + "epoch": 3.35662841796875e-05, + "step": 21998, + "training_step_time": 0.10600852966308594 + }, + { + "epoch": 3.356781005859375e-05, + "model_forward_time": 0.025032520294189453, + "step": 21999 + }, + { + "epoch": 3.356781005859375e-05, + "step": 21999, + "training_step_time": 0.10514092445373535 + }, + { + "epoch": 3.35693359375e-05, + "grad_norm": 0.15530382096767426, + "learning_rate": 1.821381258950161e-05, + "loss": 0.0055, + "step": 22000 + }, + { + "epoch": 3.35693359375e-05, + "model_forward_time": 0.02475452423095703, + "step": 22000 + }, + { + "epoch": 3.35693359375e-05, + "step": 22000, + "training_step_time": 0.09588789939880371 + }, + { + "epoch": 3.357086181640625e-05, + "model_forward_time": 0.022913455963134766, + "step": 22001 + }, + { + "epoch": 3.357086181640625e-05, + "step": 22001, + "training_step_time": 0.09594488143920898 + }, + { + "epoch": 3.35723876953125e-05, + "model_forward_time": 0.024338483810424805, + "step": 22002 + }, + { + "epoch": 3.35723876953125e-05, + "step": 22002, + "training_step_time": 0.1024024486541748 + }, + { + "epoch": 3.357391357421875e-05, + "model_forward_time": 0.024985790252685547, + "step": 22003 + }, + { + "epoch": 3.357391357421875e-05, + "step": 22003, + "training_step_time": 0.1040964126586914 + }, + { + "epoch": 3.3575439453125e-05, + "model_forward_time": 0.0244293212890625, + "step": 22004 + }, + { + "epoch": 3.3575439453125e-05, + "step": 22004, + "training_step_time": 0.10966968536376953 + }, + { + "epoch": 3.357696533203125e-05, + "model_forward_time": 0.02476024627685547, + "step": 22005 + }, + { + "epoch": 3.357696533203125e-05, + "step": 22005, + "training_step_time": 0.10764956474304199 + }, + { + "epoch": 3.35784912109375e-05, + "model_forward_time": 0.02488541603088379, + "step": 22006 + }, + { + "epoch": 3.35784912109375e-05, + "step": 22006, + "training_step_time": 0.10936117172241211 + }, + { + "epoch": 3.358001708984375e-05, + "model_forward_time": 0.024812936782836914, + "step": 22007 + }, + { + "epoch": 3.358001708984375e-05, + "step": 22007, + "training_step_time": 0.10627460479736328 + }, + { + "epoch": 3.358154296875e-05, + "model_forward_time": 0.02492046356201172, + "step": 22008 + }, + { + "epoch": 3.358154296875e-05, + "step": 22008, + "training_step_time": 0.10655641555786133 + }, + { + "epoch": 3.358306884765625e-05, + "model_forward_time": 0.023703575134277344, + "step": 22009 + }, + { + "epoch": 3.358306884765625e-05, + "step": 22009, + "training_step_time": 0.10750484466552734 + }, + { + "epoch": 3.35845947265625e-05, + "grad_norm": 0.16007855534553528, + "learning_rate": 1.817128721358991e-05, + "loss": 0.0042, + "step": 22010 + }, + { + "epoch": 3.35845947265625e-05, + "model_forward_time": 0.024477481842041016, + "step": 22010 + }, + { + "epoch": 3.35845947265625e-05, + "step": 22010, + "training_step_time": 0.19192862510681152 + }, + { + "epoch": 3.358612060546875e-05, + "model_forward_time": 0.024020671844482422, + "step": 22011 + }, + { + "epoch": 3.358612060546875e-05, + "step": 22011, + "training_step_time": 0.12665152549743652 + }, + { + "epoch": 3.3587646484375e-05, + "model_forward_time": 0.02410888671875, + "step": 22012 + }, + { + "epoch": 3.3587646484375e-05, + "step": 22012, + "training_step_time": 0.1696009635925293 + }, + { + "epoch": 3.358917236328125e-05, + "model_forward_time": 0.02480459213256836, + "step": 22013 + }, + { + "epoch": 3.358917236328125e-05, + "step": 22013, + "training_step_time": 0.1442399024963379 + }, + { + "epoch": 3.35906982421875e-05, + "model_forward_time": 0.024608135223388672, + "step": 22014 + }, + { + "epoch": 3.35906982421875e-05, + "step": 22014, + "training_step_time": 0.1261286735534668 + }, + { + "epoch": 3.359222412109375e-05, + "model_forward_time": 0.024415969848632812, + "step": 22015 + }, + { + "epoch": 3.359222412109375e-05, + "step": 22015, + "training_step_time": 0.15874719619750977 + }, + { + "epoch": 3.359375e-05, + "model_forward_time": 0.024023771286010742, + "step": 22016 + }, + { + "epoch": 3.359375e-05, + "step": 22016, + "training_step_time": 0.10609292984008789 + }, + { + "epoch": 3.359527587890625e-05, + "model_forward_time": 0.024616241455078125, + "step": 22017 + }, + { + "epoch": 3.359527587890625e-05, + "step": 22017, + "training_step_time": 0.10281562805175781 + }, + { + "epoch": 3.35968017578125e-05, + "model_forward_time": 0.025092601776123047, + "step": 22018 + }, + { + "epoch": 3.35968017578125e-05, + "step": 22018, + "training_step_time": 0.10621118545532227 + }, + { + "epoch": 3.359832763671875e-05, + "model_forward_time": 0.025038957595825195, + "step": 22019 + }, + { + "epoch": 3.359832763671875e-05, + "step": 22019, + "training_step_time": 0.10753583908081055 + }, + { + "epoch": 3.3599853515625e-05, + "grad_norm": 0.12033859640359879, + "learning_rate": 1.8128800512565513e-05, + "loss": 0.0081, + "step": 22020 + }, + { + "epoch": 3.3599853515625e-05, + "model_forward_time": 0.025561809539794922, + "step": 22020 + }, + { + "epoch": 3.3599853515625e-05, + "step": 22020, + "training_step_time": 0.1090993881225586 + }, + { + "epoch": 3.360137939453125e-05, + "model_forward_time": 0.024938344955444336, + "step": 22021 + }, + { + "epoch": 3.360137939453125e-05, + "step": 22021, + "training_step_time": 0.11868143081665039 + }, + { + "epoch": 3.36029052734375e-05, + "model_forward_time": 0.025056123733520508, + "step": 22022 + }, + { + "epoch": 3.36029052734375e-05, + "step": 22022, + "training_step_time": 0.11347270011901855 + }, + { + "epoch": 3.360443115234375e-05, + "model_forward_time": 0.024766206741333008, + "step": 22023 + }, + { + "epoch": 3.360443115234375e-05, + "step": 22023, + "training_step_time": 0.11670064926147461 + }, + { + "epoch": 3.360595703125e-05, + "model_forward_time": 0.024721145629882812, + "step": 22024 + }, + { + "epoch": 3.360595703125e-05, + "step": 22024, + "training_step_time": 0.1120598316192627 + }, + { + "epoch": 3.360748291015625e-05, + "model_forward_time": 0.024762868881225586, + "step": 22025 + }, + { + "epoch": 3.360748291015625e-05, + "step": 22025, + "training_step_time": 0.10821819305419922 + }, + { + "epoch": 3.36090087890625e-05, + "model_forward_time": 0.02478957176208496, + "step": 22026 + }, + { + "epoch": 3.36090087890625e-05, + "step": 22026, + "training_step_time": 0.1033179759979248 + }, + { + "epoch": 3.361053466796875e-05, + "model_forward_time": 0.0240328311920166, + "step": 22027 + }, + { + "epoch": 3.361053466796875e-05, + "step": 22027, + "training_step_time": 0.15194106101989746 + }, + { + "epoch": 3.3612060546875e-05, + "model_forward_time": 0.024971961975097656, + "step": 22028 + }, + { + "epoch": 3.3612060546875e-05, + "step": 22028, + "training_step_time": 0.16945433616638184 + }, + { + "epoch": 3.361358642578125e-05, + "model_forward_time": 0.0243375301361084, + "step": 22029 + }, + { + "epoch": 3.361358642578125e-05, + "step": 22029, + "training_step_time": 0.17234539985656738 + }, + { + "epoch": 3.36151123046875e-05, + "grad_norm": 0.19749611616134644, + "learning_rate": 1.808635253805376e-05, + "loss": 0.0095, + "step": 22030 + }, + { + "epoch": 3.36151123046875e-05, + "model_forward_time": 0.024533748626708984, + "step": 22030 + }, + { + "epoch": 3.36151123046875e-05, + "step": 22030, + "training_step_time": 0.17444086074829102 + }, + { + "epoch": 3.361663818359375e-05, + "model_forward_time": 0.024257183074951172, + "step": 22031 + }, + { + "epoch": 3.361663818359375e-05, + "step": 22031, + "training_step_time": 0.11185789108276367 + }, + { + "epoch": 3.36181640625e-05, + "model_forward_time": 0.024164676666259766, + "step": 22032 + }, + { + "epoch": 3.36181640625e-05, + "step": 22032, + "training_step_time": 0.11118507385253906 + }, + { + "epoch": 3.361968994140625e-05, + "model_forward_time": 0.024956941604614258, + "step": 22033 + }, + { + "epoch": 3.361968994140625e-05, + "step": 22033, + "training_step_time": 0.16123604774475098 + }, + { + "epoch": 3.36212158203125e-05, + "model_forward_time": 0.024712800979614258, + "step": 22034 + }, + { + "epoch": 3.36212158203125e-05, + "step": 22034, + "training_step_time": 0.14887022972106934 + }, + { + "epoch": 3.362274169921875e-05, + "model_forward_time": 0.023926258087158203, + "step": 22035 + }, + { + "epoch": 3.362274169921875e-05, + "step": 22035, + "training_step_time": 0.10612106323242188 + }, + { + "epoch": 3.3624267578125e-05, + "model_forward_time": 0.02471637725830078, + "step": 22036 + }, + { + "epoch": 3.3624267578125e-05, + "step": 22036, + "training_step_time": 0.10458660125732422 + }, + { + "epoch": 3.362579345703125e-05, + "model_forward_time": 0.024910926818847656, + "step": 22037 + }, + { + "epoch": 3.362579345703125e-05, + "step": 22037, + "training_step_time": 0.12972354888916016 + }, + { + "epoch": 3.36273193359375e-05, + "model_forward_time": 0.02482914924621582, + "step": 22038 + }, + { + "epoch": 3.36273193359375e-05, + "step": 22038, + "training_step_time": 0.10700535774230957 + }, + { + "epoch": 3.362884521484375e-05, + "model_forward_time": 0.024960756301879883, + "step": 22039 + }, + { + "epoch": 3.362884521484375e-05, + "step": 22039, + "training_step_time": 0.10965728759765625 + }, + { + "epoch": 3.363037109375e-05, + "grad_norm": 0.17156188189983368, + "learning_rate": 1.8043943341632907e-05, + "loss": 0.0088, + "step": 22040 + }, + { + "epoch": 3.363037109375e-05, + "model_forward_time": 0.025137662887573242, + "step": 22040 + }, + { + "epoch": 3.363037109375e-05, + "step": 22040, + "training_step_time": 0.12963414192199707 + }, + { + "epoch": 3.363189697265625e-05, + "model_forward_time": 0.02479410171508789, + "step": 22041 + }, + { + "epoch": 3.363189697265625e-05, + "step": 22041, + "training_step_time": 0.12388968467712402 + }, + { + "epoch": 3.36334228515625e-05, + "model_forward_time": 0.02495741844177246, + "step": 22042 + }, + { + "epoch": 3.36334228515625e-05, + "step": 22042, + "training_step_time": 0.11787271499633789 + }, + { + "epoch": 3.363494873046875e-05, + "model_forward_time": 0.02500152587890625, + "step": 22043 + }, + { + "epoch": 3.363494873046875e-05, + "step": 22043, + "training_step_time": 0.11094880104064941 + }, + { + "epoch": 3.3636474609375e-05, + "model_forward_time": 0.024794340133666992, + "step": 22044 + }, + { + "epoch": 3.3636474609375e-05, + "step": 22044, + "training_step_time": 0.10430240631103516 + }, + { + "epoch": 3.363800048828125e-05, + "model_forward_time": 0.024932861328125, + "step": 22045 + }, + { + "epoch": 3.363800048828125e-05, + "step": 22045, + "training_step_time": 0.10292458534240723 + }, + { + "epoch": 3.36395263671875e-05, + "model_forward_time": 0.024818897247314453, + "step": 22046 + }, + { + "epoch": 3.36395263671875e-05, + "step": 22046, + "training_step_time": 0.10722613334655762 + }, + { + "epoch": 3.364105224609375e-05, + "model_forward_time": 0.025091886520385742, + "step": 22047 + }, + { + "epoch": 3.364105224609375e-05, + "step": 22047, + "training_step_time": 0.10452675819396973 + }, + { + "epoch": 3.3642578125e-05, + "model_forward_time": 0.024979829788208008, + "step": 22048 + }, + { + "epoch": 3.3642578125e-05, + "step": 22048, + "training_step_time": 0.10580730438232422 + }, + { + "epoch": 3.364410400390625e-05, + "model_forward_time": 0.024852514266967773, + "step": 22049 + }, + { + "epoch": 3.364410400390625e-05, + "step": 22049, + "training_step_time": 0.10592484474182129 + }, + { + "epoch": 3.36456298828125e-05, + "grad_norm": 0.09379428625106812, + "learning_rate": 1.800157297483417e-05, + "loss": 0.006, + "step": 22050 + }, + { + "epoch": 3.36456298828125e-05, + "model_forward_time": 0.02552485466003418, + "step": 22050 + }, + { + "epoch": 3.36456298828125e-05, + "step": 22050, + "training_step_time": 0.10558485984802246 + }, + { + "epoch": 3.364715576171875e-05, + "model_forward_time": 0.02476644515991211, + "step": 22051 + }, + { + "epoch": 3.364715576171875e-05, + "step": 22051, + "training_step_time": 0.11562156677246094 + }, + { + "epoch": 3.3648681640625e-05, + "model_forward_time": 0.02506279945373535, + "step": 22052 + }, + { + "epoch": 3.3648681640625e-05, + "step": 22052, + "training_step_time": 0.11258411407470703 + }, + { + "epoch": 3.365020751953125e-05, + "model_forward_time": 0.025385141372680664, + "step": 22053 + }, + { + "epoch": 3.365020751953125e-05, + "step": 22053, + "training_step_time": 0.1081690788269043 + }, + { + "epoch": 3.36517333984375e-05, + "model_forward_time": 0.02518296241760254, + "step": 22054 + }, + { + "epoch": 3.36517333984375e-05, + "step": 22054, + "training_step_time": 0.10996127128601074 + }, + { + "epoch": 3.365325927734375e-05, + "model_forward_time": 0.024766206741333008, + "step": 22055 + }, + { + "epoch": 3.365325927734375e-05, + "step": 22055, + "training_step_time": 0.107208251953125 + }, + { + "epoch": 3.365478515625e-05, + "model_forward_time": 0.025093555450439453, + "step": 22056 + }, + { + "epoch": 3.365478515625e-05, + "step": 22056, + "training_step_time": 0.12782049179077148 + }, + { + "epoch": 3.365631103515625e-05, + "model_forward_time": 0.025124073028564453, + "step": 22057 + }, + { + "epoch": 3.365631103515625e-05, + "step": 22057, + "training_step_time": 0.12386012077331543 + }, + { + "epoch": 3.36578369140625e-05, + "model_forward_time": 0.02506732940673828, + "step": 22058 + }, + { + "epoch": 3.36578369140625e-05, + "step": 22058, + "training_step_time": 0.132612943649292 + }, + { + "epoch": 3.365936279296875e-05, + "model_forward_time": 0.0253298282623291, + "step": 22059 + }, + { + "epoch": 3.365936279296875e-05, + "step": 22059, + "training_step_time": 0.11371159553527832 + }, + { + "epoch": 3.3660888671875e-05, + "grad_norm": 0.11976824700832367, + "learning_rate": 1.7959241489141525e-05, + "loss": 0.0043, + "step": 22060 + }, + { + "epoch": 3.3660888671875e-05, + "model_forward_time": 0.024948596954345703, + "step": 22060 + }, + { + "epoch": 3.3660888671875e-05, + "step": 22060, + "training_step_time": 0.17679500579833984 + }, + { + "epoch": 3.366241455078125e-05, + "model_forward_time": 0.024548053741455078, + "step": 22061 + }, + { + "epoch": 3.366241455078125e-05, + "step": 22061, + "training_step_time": 0.12395381927490234 + }, + { + "epoch": 3.36639404296875e-05, + "model_forward_time": 0.02429509162902832, + "step": 22062 + }, + { + "epoch": 3.36639404296875e-05, + "step": 22062, + "training_step_time": 0.12175154685974121 + }, + { + "epoch": 3.366546630859375e-05, + "model_forward_time": 0.025143861770629883, + "step": 22063 + }, + { + "epoch": 3.366546630859375e-05, + "step": 22063, + "training_step_time": 0.10907435417175293 + }, + { + "epoch": 3.36669921875e-05, + "model_forward_time": 0.02524590492248535, + "step": 22064 + }, + { + "epoch": 3.36669921875e-05, + "step": 22064, + "training_step_time": 0.11278891563415527 + }, + { + "epoch": 3.366851806640625e-05, + "model_forward_time": 0.025288105010986328, + "step": 22065 + }, + { + "epoch": 3.366851806640625e-05, + "step": 22065, + "training_step_time": 0.11050915718078613 + }, + { + "epoch": 3.36700439453125e-05, + "model_forward_time": 0.025485754013061523, + "step": 22066 + }, + { + "epoch": 3.36700439453125e-05, + "step": 22066, + "training_step_time": 0.11541104316711426 + }, + { + "epoch": 3.367156982421875e-05, + "model_forward_time": 0.025388002395629883, + "step": 22067 + }, + { + "epoch": 3.367156982421875e-05, + "step": 22067, + "training_step_time": 0.10773277282714844 + }, + { + "epoch": 3.3673095703125e-05, + "model_forward_time": 0.024590253829956055, + "step": 22068 + }, + { + "epoch": 3.3673095703125e-05, + "step": 22068, + "training_step_time": 0.11530232429504395 + }, + { + "epoch": 3.367462158203125e-05, + "model_forward_time": 0.024728775024414062, + "step": 22069 + }, + { + "epoch": 3.367462158203125e-05, + "step": 22069, + "training_step_time": 0.10981535911560059 + }, + { + "epoch": 3.36761474609375e-05, + "grad_norm": 0.27646952867507935, + "learning_rate": 1.7916948935991718e-05, + "loss": 0.0067, + "step": 22070 + }, + { + "epoch": 3.36761474609375e-05, + "model_forward_time": 0.02508258819580078, + "step": 22070 + }, + { + "epoch": 3.36761474609375e-05, + "step": 22070, + "training_step_time": 0.11499714851379395 + }, + { + "epoch": 3.367767333984375e-05, + "model_forward_time": 0.02522873878479004, + "step": 22071 + }, + { + "epoch": 3.367767333984375e-05, + "step": 22071, + "training_step_time": 0.11368536949157715 + }, + { + "epoch": 3.367919921875e-05, + "model_forward_time": 0.02501082420349121, + "step": 22072 + }, + { + "epoch": 3.367919921875e-05, + "step": 22072, + "training_step_time": 0.10786843299865723 + }, + { + "epoch": 3.368072509765625e-05, + "model_forward_time": 0.0254364013671875, + "step": 22073 + }, + { + "epoch": 3.368072509765625e-05, + "step": 22073, + "training_step_time": 0.10866641998291016 + }, + { + "epoch": 3.36822509765625e-05, + "model_forward_time": 0.024925947189331055, + "step": 22074 + }, + { + "epoch": 3.36822509765625e-05, + "step": 22074, + "training_step_time": 0.15185117721557617 + }, + { + "epoch": 3.368377685546875e-05, + "model_forward_time": 0.024457693099975586, + "step": 22075 + }, + { + "epoch": 3.368377685546875e-05, + "step": 22075, + "training_step_time": 0.16089320182800293 + }, + { + "epoch": 3.3685302734375e-05, + "model_forward_time": 0.02427530288696289, + "step": 22076 + }, + { + "epoch": 3.3685302734375e-05, + "step": 22076, + "training_step_time": 0.13083410263061523 + }, + { + "epoch": 3.368682861328125e-05, + "model_forward_time": 0.024447202682495117, + "step": 22077 + }, + { + "epoch": 3.368682861328125e-05, + "step": 22077, + "training_step_time": 0.1368699073791504 + }, + { + "epoch": 3.36883544921875e-05, + "model_forward_time": 0.026310443878173828, + "step": 22078 + }, + { + "epoch": 3.36883544921875e-05, + "step": 22078, + "training_step_time": 0.17287015914916992 + }, + { + "epoch": 3.368988037109375e-05, + "model_forward_time": 0.024475812911987305, + "step": 22079 + }, + { + "epoch": 3.368988037109375e-05, + "step": 22079, + "training_step_time": 0.1528306007385254 + }, + { + "epoch": 3.369140625e-05, + "grad_norm": 0.20525725185871124, + "learning_rate": 1.787469536677419e-05, + "loss": 0.0057, + "step": 22080 + }, + { + "epoch": 3.369140625e-05, + "model_forward_time": 0.02459096908569336, + "step": 22080 + }, + { + "epoch": 3.369140625e-05, + "step": 22080, + "training_step_time": 0.10769009590148926 + }, + { + "epoch": 3.369293212890625e-05, + "model_forward_time": 0.027686119079589844, + "step": 22081 + }, + { + "epoch": 3.369293212890625e-05, + "step": 22081, + "training_step_time": 0.19274330139160156 + }, + { + "epoch": 3.36944580078125e-05, + "model_forward_time": 0.02444601058959961, + "step": 22082 + }, + { + "epoch": 3.36944580078125e-05, + "step": 22082, + "training_step_time": 0.10213184356689453 + }, + { + "epoch": 3.369598388671875e-05, + "model_forward_time": 0.025621891021728516, + "step": 22083 + }, + { + "epoch": 3.369598388671875e-05, + "step": 22083, + "training_step_time": 0.10543990135192871 + }, + { + "epoch": 3.3697509765625e-05, + "model_forward_time": 0.025042295455932617, + "step": 22084 + }, + { + "epoch": 3.3697509765625e-05, + "step": 22084, + "training_step_time": 0.14552044868469238 + }, + { + "epoch": 3.369903564453125e-05, + "model_forward_time": 0.024957895278930664, + "step": 22085 + }, + { + "epoch": 3.369903564453125e-05, + "step": 22085, + "training_step_time": 0.11055850982666016 + }, + { + "epoch": 3.37005615234375e-05, + "model_forward_time": 0.02511310577392578, + "step": 22086 + }, + { + "epoch": 3.37005615234375e-05, + "step": 22086, + "training_step_time": 0.11157870292663574 + }, + { + "epoch": 3.370208740234375e-05, + "model_forward_time": 0.02510523796081543, + "step": 22087 + }, + { + "epoch": 3.370208740234375e-05, + "step": 22087, + "training_step_time": 0.12030315399169922 + }, + { + "epoch": 3.370361328125e-05, + "model_forward_time": 0.024698972702026367, + "step": 22088 + }, + { + "epoch": 3.370361328125e-05, + "step": 22088, + "training_step_time": 0.16036152839660645 + }, + { + "epoch": 3.370513916015625e-05, + "model_forward_time": 0.024704933166503906, + "step": 22089 + }, + { + "epoch": 3.370513916015625e-05, + "step": 22089, + "training_step_time": 0.1086881160736084 + }, + { + "epoch": 3.37066650390625e-05, + "grad_norm": 0.16050368547439575, + "learning_rate": 1.7832480832830987e-05, + "loss": 0.0106, + "step": 22090 + }, + { + "epoch": 3.37066650390625e-05, + "model_forward_time": 0.02433037757873535, + "step": 22090 + }, + { + "epoch": 3.37066650390625e-05, + "step": 22090, + "training_step_time": 0.10866379737854004 + }, + { + "epoch": 3.370819091796875e-05, + "model_forward_time": 0.024655580520629883, + "step": 22091 + }, + { + "epoch": 3.370819091796875e-05, + "step": 22091, + "training_step_time": 0.10538935661315918 + }, + { + "epoch": 3.3709716796875e-05, + "model_forward_time": 0.025115489959716797, + "step": 22092 + }, + { + "epoch": 3.3709716796875e-05, + "step": 22092, + "training_step_time": 0.10508275032043457 + }, + { + "epoch": 3.371124267578125e-05, + "model_forward_time": 0.024903297424316406, + "step": 22093 + }, + { + "epoch": 3.371124267578125e-05, + "step": 22093, + "training_step_time": 0.1112513542175293 + }, + { + "epoch": 3.37127685546875e-05, + "model_forward_time": 0.0251009464263916, + "step": 22094 + }, + { + "epoch": 3.37127685546875e-05, + "step": 22094, + "training_step_time": 0.11023330688476562 + }, + { + "epoch": 3.371429443359375e-05, + "model_forward_time": 0.02552628517150879, + "step": 22095 + }, + { + "epoch": 3.371429443359375e-05, + "step": 22095, + "training_step_time": 0.1098787784576416 + }, + { + "epoch": 3.37158203125e-05, + "model_forward_time": 0.025022506713867188, + "step": 22096 + }, + { + "epoch": 3.37158203125e-05, + "step": 22096, + "training_step_time": 0.10639142990112305 + }, + { + "epoch": 3.371734619140625e-05, + "model_forward_time": 0.024855852127075195, + "step": 22097 + }, + { + "epoch": 3.371734619140625e-05, + "step": 22097, + "training_step_time": 0.10494542121887207 + }, + { + "epoch": 3.37188720703125e-05, + "model_forward_time": 0.0251312255859375, + "step": 22098 + }, + { + "epoch": 3.37188720703125e-05, + "step": 22098, + "training_step_time": 0.10486745834350586 + }, + { + "epoch": 3.372039794921875e-05, + "model_forward_time": 0.024608135223388672, + "step": 22099 + }, + { + "epoch": 3.372039794921875e-05, + "step": 22099, + "training_step_time": 0.10403752326965332 + }, + { + "epoch": 3.3721923828125e-05, + "grad_norm": 0.1448470950126648, + "learning_rate": 1.7790305385456795e-05, + "loss": 0.0055, + "step": 22100 + }, + { + "epoch": 3.3721923828125e-05, + "model_forward_time": 0.024161338806152344, + "step": 22100 + }, + { + "epoch": 3.3721923828125e-05, + "step": 22100, + "training_step_time": 0.10615205764770508 + }, + { + "epoch": 3.372344970703125e-05, + "model_forward_time": 0.025004148483276367, + "step": 22101 + }, + { + "epoch": 3.372344970703125e-05, + "step": 22101, + "training_step_time": 0.1066277027130127 + }, + { + "epoch": 3.37249755859375e-05, + "model_forward_time": 0.024811983108520508, + "step": 22102 + }, + { + "epoch": 3.37249755859375e-05, + "step": 22102, + "training_step_time": 0.18333148956298828 + }, + { + "epoch": 3.372650146484375e-05, + "model_forward_time": 0.0244905948638916, + "step": 22103 + }, + { + "epoch": 3.372650146484375e-05, + "step": 22103, + "training_step_time": 0.1650533676147461 + }, + { + "epoch": 3.372802734375e-05, + "model_forward_time": 0.02401876449584961, + "step": 22104 + }, + { + "epoch": 3.372802734375e-05, + "step": 22104, + "training_step_time": 0.1381528377532959 + }, + { + "epoch": 3.372955322265625e-05, + "model_forward_time": 0.024289369583129883, + "step": 22105 + }, + { + "epoch": 3.372955322265625e-05, + "step": 22105, + "training_step_time": 0.1543292999267578 + }, + { + "epoch": 3.37310791015625e-05, + "model_forward_time": 0.024353981018066406, + "step": 22106 + }, + { + "epoch": 3.37310791015625e-05, + "step": 22106, + "training_step_time": 0.10492968559265137 + }, + { + "epoch": 3.373260498046875e-05, + "model_forward_time": 0.0246884822845459, + "step": 22107 + }, + { + "epoch": 3.373260498046875e-05, + "step": 22107, + "training_step_time": 0.12307620048522949 + }, + { + "epoch": 3.3734130859375e-05, + "model_forward_time": 0.024834394454956055, + "step": 22108 + }, + { + "epoch": 3.3734130859375e-05, + "step": 22108, + "training_step_time": 0.10532760620117188 + }, + { + "epoch": 3.373565673828125e-05, + "model_forward_time": 0.02520751953125, + "step": 22109 + }, + { + "epoch": 3.373565673828125e-05, + "step": 22109, + "training_step_time": 0.10733604431152344 + }, + { + "epoch": 3.37371826171875e-05, + "grad_norm": 0.20625461637973785, + "learning_rate": 1.774816907589873e-05, + "loss": 0.0171, + "step": 22110 + }, + { + "epoch": 3.37371826171875e-05, + "model_forward_time": 0.02508378028869629, + "step": 22110 + }, + { + "epoch": 3.37371826171875e-05, + "step": 22110, + "training_step_time": 0.10889363288879395 + }, + { + "epoch": 3.373870849609375e-05, + "model_forward_time": 0.025385379791259766, + "step": 22111 + }, + { + "epoch": 3.373870849609375e-05, + "step": 22111, + "training_step_time": 0.10715579986572266 + }, + { + "epoch": 3.3740234375e-05, + "model_forward_time": 0.025226116180419922, + "step": 22112 + }, + { + "epoch": 3.3740234375e-05, + "step": 22112, + "training_step_time": 0.10509681701660156 + }, + { + "epoch": 3.374176025390625e-05, + "model_forward_time": 0.024886369705200195, + "step": 22113 + }, + { + "epoch": 3.374176025390625e-05, + "step": 22113, + "training_step_time": 0.11054396629333496 + }, + { + "epoch": 3.37432861328125e-05, + "model_forward_time": 0.02523016929626465, + "step": 22114 + }, + { + "epoch": 3.37432861328125e-05, + "step": 22114, + "training_step_time": 0.10825610160827637 + }, + { + "epoch": 3.374481201171875e-05, + "model_forward_time": 0.024632692337036133, + "step": 22115 + }, + { + "epoch": 3.374481201171875e-05, + "step": 22115, + "training_step_time": 0.10953164100646973 + }, + { + "epoch": 3.3746337890625e-05, + "model_forward_time": 0.024777650833129883, + "step": 22116 + }, + { + "epoch": 3.3746337890625e-05, + "step": 22116, + "training_step_time": 0.10456180572509766 + }, + { + "epoch": 3.374786376953125e-05, + "model_forward_time": 0.02474236488342285, + "step": 22117 + }, + { + "epoch": 3.374786376953125e-05, + "step": 22117, + "training_step_time": 0.10730457305908203 + }, + { + "epoch": 3.37493896484375e-05, + "model_forward_time": 0.025014400482177734, + "step": 22118 + }, + { + "epoch": 3.37493896484375e-05, + "step": 22118, + "training_step_time": 0.10668706893920898 + }, + { + "epoch": 3.375091552734375e-05, + "model_forward_time": 0.025732755661010742, + "step": 22119 + }, + { + "epoch": 3.375091552734375e-05, + "step": 22119, + "training_step_time": 0.10856461524963379 + }, + { + "epoch": 3.375244140625e-05, + "grad_norm": 0.1136520728468895, + "learning_rate": 1.770607195535639e-05, + "loss": 0.0163, + "step": 22120 + }, + { + "epoch": 3.375244140625e-05, + "model_forward_time": 0.0243377685546875, + "step": 22120 + }, + { + "epoch": 3.375244140625e-05, + "step": 22120, + "training_step_time": 0.13463807106018066 + }, + { + "epoch": 3.375396728515625e-05, + "model_forward_time": 0.024973392486572266, + "step": 22121 + }, + { + "epoch": 3.375396728515625e-05, + "step": 22121, + "training_step_time": 0.16428232192993164 + }, + { + "epoch": 3.37554931640625e-05, + "model_forward_time": 0.02422809600830078, + "step": 22122 + }, + { + "epoch": 3.37554931640625e-05, + "step": 22122, + "training_step_time": 0.11235189437866211 + }, + { + "epoch": 3.375701904296875e-05, + "model_forward_time": 0.02428913116455078, + "step": 22123 + }, + { + "epoch": 3.375701904296875e-05, + "step": 22123, + "training_step_time": 0.13874268531799316 + }, + { + "epoch": 3.3758544921875e-05, + "model_forward_time": 0.02481818199157715, + "step": 22124 + }, + { + "epoch": 3.3758544921875e-05, + "step": 22124, + "training_step_time": 0.18659186363220215 + }, + { + "epoch": 3.376007080078125e-05, + "model_forward_time": 0.023975610733032227, + "step": 22125 + }, + { + "epoch": 3.376007080078125e-05, + "step": 22125, + "training_step_time": 0.14794087409973145 + }, + { + "epoch": 3.37615966796875e-05, + "model_forward_time": 0.024412155151367188, + "step": 22126 + }, + { + "epoch": 3.37615966796875e-05, + "step": 22126, + "training_step_time": 0.10832095146179199 + }, + { + "epoch": 3.376312255859375e-05, + "model_forward_time": 0.02550482749938965, + "step": 22127 + }, + { + "epoch": 3.376312255859375e-05, + "step": 22127, + "training_step_time": 0.10826539993286133 + }, + { + "epoch": 3.37646484375e-05, + "model_forward_time": 0.02492499351501465, + "step": 22128 + }, + { + "epoch": 3.37646484375e-05, + "step": 22128, + "training_step_time": 0.10705804824829102 + }, + { + "epoch": 3.376617431640625e-05, + "model_forward_time": 0.025075674057006836, + "step": 22129 + }, + { + "epoch": 3.376617431640625e-05, + "step": 22129, + "training_step_time": 0.1069955825805664 + }, + { + "epoch": 3.37677001953125e-05, + "grad_norm": 0.10590667277574539, + "learning_rate": 1.7664014074981742e-05, + "loss": 0.0085, + "step": 22130 + }, + { + "epoch": 3.37677001953125e-05, + "model_forward_time": 0.025167226791381836, + "step": 22130 + }, + { + "epoch": 3.37677001953125e-05, + "step": 22130, + "training_step_time": 0.10798859596252441 + }, + { + "epoch": 3.376922607421875e-05, + "model_forward_time": 0.024977684020996094, + "step": 22131 + }, + { + "epoch": 3.376922607421875e-05, + "step": 22131, + "training_step_time": 0.20845770835876465 + }, + { + "epoch": 3.3770751953125e-05, + "model_forward_time": 0.02465224266052246, + "step": 22132 + }, + { + "epoch": 3.3770751953125e-05, + "step": 22132, + "training_step_time": 0.10904312133789062 + }, + { + "epoch": 3.377227783203125e-05, + "model_forward_time": 0.024756193161010742, + "step": 22133 + }, + { + "epoch": 3.377227783203125e-05, + "step": 22133, + "training_step_time": 0.10879039764404297 + }, + { + "epoch": 3.37738037109375e-05, + "model_forward_time": 0.024891376495361328, + "step": 22134 + }, + { + "epoch": 3.37738037109375e-05, + "step": 22134, + "training_step_time": 0.1254711151123047 + }, + { + "epoch": 3.377532958984375e-05, + "model_forward_time": 0.024827003479003906, + "step": 22135 + }, + { + "epoch": 3.377532958984375e-05, + "step": 22135, + "training_step_time": 0.12429451942443848 + }, + { + "epoch": 3.377685546875e-05, + "model_forward_time": 0.025003433227539062, + "step": 22136 + }, + { + "epoch": 3.377685546875e-05, + "step": 22136, + "training_step_time": 0.15282225608825684 + }, + { + "epoch": 3.377838134765625e-05, + "model_forward_time": 0.024444580078125, + "step": 22137 + }, + { + "epoch": 3.377838134765625e-05, + "step": 22137, + "training_step_time": 0.1221153736114502 + }, + { + "epoch": 3.37799072265625e-05, + "model_forward_time": 0.024462461471557617, + "step": 22138 + }, + { + "epoch": 3.37799072265625e-05, + "step": 22138, + "training_step_time": 0.10831356048583984 + }, + { + "epoch": 3.378143310546875e-05, + "model_forward_time": 0.025004148483276367, + "step": 22139 + }, + { + "epoch": 3.378143310546875e-05, + "step": 22139, + "training_step_time": 0.11420869827270508 + }, + { + "epoch": 3.3782958984375e-05, + "grad_norm": 0.11597984284162521, + "learning_rate": 1.7621995485879062e-05, + "loss": 0.006, + "step": 22140 + }, + { + "epoch": 3.3782958984375e-05, + "model_forward_time": 0.02406454086303711, + "step": 22140 + }, + { + "epoch": 3.3782958984375e-05, + "step": 22140, + "training_step_time": 0.11019039154052734 + }, + { + "epoch": 3.378448486328125e-05, + "model_forward_time": 0.025536537170410156, + "step": 22141 + }, + { + "epoch": 3.378448486328125e-05, + "step": 22141, + "training_step_time": 0.1114048957824707 + }, + { + "epoch": 3.37860107421875e-05, + "model_forward_time": 0.025068998336791992, + "step": 22142 + }, + { + "epoch": 3.37860107421875e-05, + "step": 22142, + "training_step_time": 0.10976409912109375 + }, + { + "epoch": 3.378753662109375e-05, + "model_forward_time": 0.02652883529663086, + "step": 22143 + }, + { + "epoch": 3.378753662109375e-05, + "step": 22143, + "training_step_time": 0.11104369163513184 + }, + { + "epoch": 3.37890625e-05, + "model_forward_time": 0.02503228187561035, + "step": 22144 + }, + { + "epoch": 3.37890625e-05, + "step": 22144, + "training_step_time": 0.10775947570800781 + }, + { + "epoch": 3.379058837890625e-05, + "model_forward_time": 0.02527451515197754, + "step": 22145 + }, + { + "epoch": 3.379058837890625e-05, + "step": 22145, + "training_step_time": 0.10788321495056152 + }, + { + "epoch": 3.37921142578125e-05, + "model_forward_time": 0.02521204948425293, + "step": 22146 + }, + { + "epoch": 3.37921142578125e-05, + "step": 22146, + "training_step_time": 0.11015009880065918 + }, + { + "epoch": 3.379364013671875e-05, + "model_forward_time": 0.025229454040527344, + "step": 22147 + }, + { + "epoch": 3.379364013671875e-05, + "step": 22147, + "training_step_time": 0.1063072681427002 + }, + { + "epoch": 3.3795166015625e-05, + "model_forward_time": 0.025147676467895508, + "step": 22148 + }, + { + "epoch": 3.3795166015625e-05, + "step": 22148, + "training_step_time": 0.20652055740356445 + }, + { + "epoch": 3.379669189453125e-05, + "model_forward_time": 0.0242154598236084, + "step": 22149 + }, + { + "epoch": 3.379669189453125e-05, + "step": 22149, + "training_step_time": 0.12357640266418457 + }, + { + "epoch": 3.37982177734375e-05, + "grad_norm": 0.08215171098709106, + "learning_rate": 1.7580016239104924e-05, + "loss": 0.0066, + "step": 22150 + }, + { + "epoch": 3.37982177734375e-05, + "model_forward_time": 0.0243072509765625, + "step": 22150 + }, + { + "epoch": 3.37982177734375e-05, + "step": 22150, + "training_step_time": 0.10963940620422363 + }, + { + "epoch": 3.379974365234375e-05, + "model_forward_time": 0.024663448333740234, + "step": 22151 + }, + { + "epoch": 3.379974365234375e-05, + "step": 22151, + "training_step_time": 0.1157689094543457 + }, + { + "epoch": 3.380126953125e-05, + "model_forward_time": 0.02483534812927246, + "step": 22152 + }, + { + "epoch": 3.380126953125e-05, + "step": 22152, + "training_step_time": 0.11003780364990234 + }, + { + "epoch": 3.380279541015625e-05, + "model_forward_time": 0.024890899658203125, + "step": 22153 + }, + { + "epoch": 3.380279541015625e-05, + "step": 22153, + "training_step_time": 0.10624408721923828 + }, + { + "epoch": 3.38043212890625e-05, + "model_forward_time": 0.025231122970581055, + "step": 22154 + }, + { + "epoch": 3.38043212890625e-05, + "step": 22154, + "training_step_time": 0.1752769947052002 + }, + { + "epoch": 3.380584716796875e-05, + "model_forward_time": 0.02527904510498047, + "step": 22155 + }, + { + "epoch": 3.380584716796875e-05, + "step": 22155, + "training_step_time": 0.10510063171386719 + }, + { + "epoch": 3.3807373046875e-05, + "model_forward_time": 0.026553869247436523, + "step": 22156 + }, + { + "epoch": 3.3807373046875e-05, + "step": 22156, + "training_step_time": 0.10563945770263672 + }, + { + "epoch": 3.380889892578125e-05, + "model_forward_time": 0.02507495880126953, + "step": 22157 + }, + { + "epoch": 3.380889892578125e-05, + "step": 22157, + "training_step_time": 0.1085667610168457 + }, + { + "epoch": 3.38104248046875e-05, + "model_forward_time": 0.024009227752685547, + "step": 22158 + }, + { + "epoch": 3.38104248046875e-05, + "step": 22158, + "training_step_time": 0.10533285140991211 + }, + { + "epoch": 3.381195068359375e-05, + "model_forward_time": 0.023781776428222656, + "step": 22159 + }, + { + "epoch": 3.381195068359375e-05, + "step": 22159, + "training_step_time": 0.10631918907165527 + }, + { + "epoch": 3.38134765625e-05, + "grad_norm": 0.10888959467411041, + "learning_rate": 1.753807638566805e-05, + "loss": 0.0047, + "step": 22160 + }, + { + "epoch": 3.38134765625e-05, + "model_forward_time": 0.02486896514892578, + "step": 22160 + }, + { + "epoch": 3.38134765625e-05, + "step": 22160, + "training_step_time": 0.10628581047058105 + }, + { + "epoch": 3.381500244140625e-05, + "model_forward_time": 0.024901151657104492, + "step": 22161 + }, + { + "epoch": 3.381500244140625e-05, + "step": 22161, + "training_step_time": 0.1091153621673584 + }, + { + "epoch": 3.38165283203125e-05, + "model_forward_time": 0.025144100189208984, + "step": 22162 + }, + { + "epoch": 3.38165283203125e-05, + "step": 22162, + "training_step_time": 0.1085355281829834 + }, + { + "epoch": 3.381805419921875e-05, + "model_forward_time": 0.024973392486572266, + "step": 22163 + }, + { + "epoch": 3.381805419921875e-05, + "step": 22163, + "training_step_time": 0.17773723602294922 + }, + { + "epoch": 3.3819580078125e-05, + "model_forward_time": 0.024677515029907227, + "step": 22164 + }, + { + "epoch": 3.3819580078125e-05, + "step": 22164, + "training_step_time": 0.20174765586853027 + }, + { + "epoch": 3.382110595703125e-05, + "model_forward_time": 0.024209976196289062, + "step": 22165 + }, + { + "epoch": 3.382110595703125e-05, + "step": 22165, + "training_step_time": 0.2161257266998291 + }, + { + "epoch": 3.38226318359375e-05, + "model_forward_time": 0.024022579193115234, + "step": 22166 + }, + { + "epoch": 3.38226318359375e-05, + "step": 22166, + "training_step_time": 0.18233847618103027 + }, + { + "epoch": 3.382415771484375e-05, + "model_forward_time": 0.024090290069580078, + "step": 22167 + }, + { + "epoch": 3.382415771484375e-05, + "step": 22167, + "training_step_time": 0.1870427131652832 + }, + { + "epoch": 3.382568359375e-05, + "model_forward_time": 0.024547338485717773, + "step": 22168 + }, + { + "epoch": 3.382568359375e-05, + "step": 22168, + "training_step_time": 0.17144465446472168 + }, + { + "epoch": 3.382720947265625e-05, + "model_forward_time": 0.024591684341430664, + "step": 22169 + }, + { + "epoch": 3.382720947265625e-05, + "step": 22169, + "training_step_time": 0.17792367935180664 + }, + { + "epoch": 3.38287353515625e-05, + "grad_norm": 0.09951245784759521, + "learning_rate": 1.749617597652934e-05, + "loss": 0.0044, + "step": 22170 + }, + { + "epoch": 3.38287353515625e-05, + "model_forward_time": 0.024028539657592773, + "step": 22170 + }, + { + "epoch": 3.38287353515625e-05, + "step": 22170, + "training_step_time": 0.10448408126831055 + }, + { + "epoch": 3.383026123046875e-05, + "model_forward_time": 0.024554967880249023, + "step": 22171 + }, + { + "epoch": 3.383026123046875e-05, + "step": 22171, + "training_step_time": 0.12530231475830078 + }, + { + "epoch": 3.3831787109375e-05, + "model_forward_time": 0.024995088577270508, + "step": 22172 + }, + { + "epoch": 3.3831787109375e-05, + "step": 22172, + "training_step_time": 0.12049150466918945 + }, + { + "epoch": 3.383331298828125e-05, + "model_forward_time": 0.02483987808227539, + "step": 22173 + }, + { + "epoch": 3.383331298828125e-05, + "step": 22173, + "training_step_time": 0.10395693778991699 + }, + { + "epoch": 3.38348388671875e-05, + "model_forward_time": 0.02515554428100586, + "step": 22174 + }, + { + "epoch": 3.38348388671875e-05, + "step": 22174, + "training_step_time": 0.10426592826843262 + }, + { + "epoch": 3.383636474609375e-05, + "model_forward_time": 0.025090694427490234, + "step": 22175 + }, + { + "epoch": 3.383636474609375e-05, + "step": 22175, + "training_step_time": 0.18278145790100098 + }, + { + "epoch": 3.3837890625e-05, + "model_forward_time": 0.02492356300354004, + "step": 22176 + }, + { + "epoch": 3.3837890625e-05, + "step": 22176, + "training_step_time": 0.1110689640045166 + }, + { + "epoch": 3.383941650390625e-05, + "model_forward_time": 0.024241209030151367, + "step": 22177 + }, + { + "epoch": 3.383941650390625e-05, + "step": 22177, + "training_step_time": 0.11284708976745605 + }, + { + "epoch": 3.38409423828125e-05, + "model_forward_time": 0.024901151657104492, + "step": 22178 + }, + { + "epoch": 3.38409423828125e-05, + "step": 22178, + "training_step_time": 0.128889799118042 + }, + { + "epoch": 3.384246826171875e-05, + "model_forward_time": 0.024940967559814453, + "step": 22179 + }, + { + "epoch": 3.384246826171875e-05, + "step": 22179, + "training_step_time": 0.12832355499267578 + }, + { + "epoch": 3.3843994140625e-05, + "grad_norm": 0.16980668902397156, + "learning_rate": 1.745431506260173e-05, + "loss": 0.0047, + "step": 22180 + }, + { + "epoch": 3.3843994140625e-05, + "model_forward_time": 0.025179624557495117, + "step": 22180 + }, + { + "epoch": 3.3843994140625e-05, + "step": 22180, + "training_step_time": 0.11362218856811523 + }, + { + "epoch": 3.384552001953125e-05, + "model_forward_time": 0.025287151336669922, + "step": 22181 + }, + { + "epoch": 3.384552001953125e-05, + "step": 22181, + "training_step_time": 0.11606049537658691 + }, + { + "epoch": 3.38470458984375e-05, + "model_forward_time": 0.02514481544494629, + "step": 22182 + }, + { + "epoch": 3.38470458984375e-05, + "step": 22182, + "training_step_time": 0.10805630683898926 + }, + { + "epoch": 3.384857177734375e-05, + "model_forward_time": 0.0252840518951416, + "step": 22183 + }, + { + "epoch": 3.384857177734375e-05, + "step": 22183, + "training_step_time": 0.10616350173950195 + }, + { + "epoch": 3.385009765625e-05, + "model_forward_time": 0.025262117385864258, + "step": 22184 + }, + { + "epoch": 3.385009765625e-05, + "step": 22184, + "training_step_time": 0.10661983489990234 + }, + { + "epoch": 3.385162353515625e-05, + "model_forward_time": 0.024952173233032227, + "step": 22185 + }, + { + "epoch": 3.385162353515625e-05, + "step": 22185, + "training_step_time": 0.11705470085144043 + }, + { + "epoch": 3.38531494140625e-05, + "model_forward_time": 0.025152206420898438, + "step": 22186 + }, + { + "epoch": 3.38531494140625e-05, + "step": 22186, + "training_step_time": 0.11028861999511719 + }, + { + "epoch": 3.385467529296875e-05, + "model_forward_time": 0.025119543075561523, + "step": 22187 + }, + { + "epoch": 3.385467529296875e-05, + "step": 22187, + "training_step_time": 0.11005067825317383 + }, + { + "epoch": 3.3856201171875e-05, + "model_forward_time": 0.025055885314941406, + "step": 22188 + }, + { + "epoch": 3.3856201171875e-05, + "step": 22188, + "training_step_time": 0.11140322685241699 + }, + { + "epoch": 3.385772705078125e-05, + "model_forward_time": 0.02509784698486328, + "step": 22189 + }, + { + "epoch": 3.385772705078125e-05, + "step": 22189, + "training_step_time": 0.10689234733581543 + }, + { + "epoch": 3.38592529296875e-05, + "grad_norm": 0.10665614902973175, + "learning_rate": 1.7412493694750176e-05, + "loss": 0.0057, + "step": 22190 + }, + { + "epoch": 3.38592529296875e-05, + "model_forward_time": 0.02534031867980957, + "step": 22190 + }, + { + "epoch": 3.38592529296875e-05, + "step": 22190, + "training_step_time": 0.11113286018371582 + }, + { + "epoch": 3.386077880859375e-05, + "model_forward_time": 0.02509284019470215, + "step": 22191 + }, + { + "epoch": 3.386077880859375e-05, + "step": 22191, + "training_step_time": 0.11188507080078125 + }, + { + "epoch": 3.38623046875e-05, + "model_forward_time": 0.0254514217376709, + "step": 22192 + }, + { + "epoch": 3.38623046875e-05, + "step": 22192, + "training_step_time": 0.11149263381958008 + }, + { + "epoch": 3.386383056640625e-05, + "model_forward_time": 0.02530813217163086, + "step": 22193 + }, + { + "epoch": 3.386383056640625e-05, + "step": 22193, + "training_step_time": 0.10808205604553223 + }, + { + "epoch": 3.38653564453125e-05, + "model_forward_time": 0.025127172470092773, + "step": 22194 + }, + { + "epoch": 3.38653564453125e-05, + "step": 22194, + "training_step_time": 0.11192512512207031 + }, + { + "epoch": 3.386688232421875e-05, + "model_forward_time": 0.024953603744506836, + "step": 22195 + }, + { + "epoch": 3.386688232421875e-05, + "step": 22195, + "training_step_time": 0.2112903594970703 + }, + { + "epoch": 3.3868408203125e-05, + "model_forward_time": 0.02407050132751465, + "step": 22196 + }, + { + "epoch": 3.3868408203125e-05, + "step": 22196, + "training_step_time": 0.10657572746276855 + }, + { + "epoch": 3.386993408203125e-05, + "model_forward_time": 0.024010896682739258, + "step": 22197 + }, + { + "epoch": 3.386993408203125e-05, + "step": 22197, + "training_step_time": 0.11416506767272949 + }, + { + "epoch": 3.38714599609375e-05, + "model_forward_time": 0.025293827056884766, + "step": 22198 + }, + { + "epoch": 3.38714599609375e-05, + "step": 22198, + "training_step_time": 0.16232728958129883 + }, + { + "epoch": 3.387298583984375e-05, + "model_forward_time": 0.024297714233398438, + "step": 22199 + }, + { + "epoch": 3.387298583984375e-05, + "step": 22199, + "training_step_time": 0.10534906387329102 + }, + { + "epoch": 3.387451171875e-05, + "grad_norm": 0.07339908927679062, + "learning_rate": 1.7370711923791567e-05, + "loss": 0.0067, + "step": 22200 + }, + { + "epoch": 3.387451171875e-05, + "model_forward_time": 0.02508687973022461, + "step": 22200 + }, + { + "epoch": 3.387451171875e-05, + "step": 22200, + "training_step_time": 0.10323643684387207 + }, + { + "epoch": 3.387603759765625e-05, + "model_forward_time": 0.026215553283691406, + "step": 22201 + }, + { + "epoch": 3.387603759765625e-05, + "step": 22201, + "training_step_time": 0.10805249214172363 + }, + { + "epoch": 3.38775634765625e-05, + "model_forward_time": 0.025249481201171875, + "step": 22202 + }, + { + "epoch": 3.38775634765625e-05, + "step": 22202, + "training_step_time": 0.10402250289916992 + }, + { + "epoch": 3.387908935546875e-05, + "model_forward_time": 0.02498483657836914, + "step": 22203 + }, + { + "epoch": 3.387908935546875e-05, + "step": 22203, + "training_step_time": 0.10381770133972168 + }, + { + "epoch": 3.3880615234375e-05, + "model_forward_time": 0.02507328987121582, + "step": 22204 + }, + { + "epoch": 3.3880615234375e-05, + "step": 22204, + "training_step_time": 0.10397219657897949 + }, + { + "epoch": 3.388214111328125e-05, + "model_forward_time": 0.025532007217407227, + "step": 22205 + }, + { + "epoch": 3.388214111328125e-05, + "step": 22205, + "training_step_time": 0.10472702980041504 + }, + { + "epoch": 3.38836669921875e-05, + "model_forward_time": 0.025216102600097656, + "step": 22206 + }, + { + "epoch": 3.38836669921875e-05, + "step": 22206, + "training_step_time": 0.10619211196899414 + }, + { + "epoch": 3.388519287109375e-05, + "model_forward_time": 0.025568723678588867, + "step": 22207 + }, + { + "epoch": 3.388519287109375e-05, + "step": 22207, + "training_step_time": 0.10539555549621582 + }, + { + "epoch": 3.388671875e-05, + "model_forward_time": 0.025257349014282227, + "step": 22208 + }, + { + "epoch": 3.388671875e-05, + "step": 22208, + "training_step_time": 0.10530948638916016 + }, + { + "epoch": 3.388824462890625e-05, + "model_forward_time": 0.02499103546142578, + "step": 22209 + }, + { + "epoch": 3.388824462890625e-05, + "step": 22209, + "training_step_time": 0.10581517219543457 + }, + { + "epoch": 3.38897705078125e-05, + "grad_norm": 0.28890460729599, + "learning_rate": 1.7328969800494726e-05, + "loss": 0.0072, + "step": 22210 + }, + { + "epoch": 3.38897705078125e-05, + "model_forward_time": 0.0258481502532959, + "step": 22210 + }, + { + "epoch": 3.38897705078125e-05, + "step": 22210, + "training_step_time": 0.10332131385803223 + }, + { + "epoch": 3.389129638671875e-05, + "model_forward_time": 0.02541518211364746, + "step": 22211 + }, + { + "epoch": 3.389129638671875e-05, + "step": 22211, + "training_step_time": 0.14841604232788086 + }, + { + "epoch": 3.3892822265625e-05, + "model_forward_time": 0.024779319763183594, + "step": 22212 + }, + { + "epoch": 3.3892822265625e-05, + "step": 22212, + "training_step_time": 0.15446972846984863 + }, + { + "epoch": 3.389434814453125e-05, + "model_forward_time": 0.024474620819091797, + "step": 22213 + }, + { + "epoch": 3.389434814453125e-05, + "step": 22213, + "training_step_time": 0.22321629524230957 + }, + { + "epoch": 3.38958740234375e-05, + "model_forward_time": 0.024509906768798828, + "step": 22214 + }, + { + "epoch": 3.38958740234375e-05, + "step": 22214, + "training_step_time": 0.1200108528137207 + }, + { + "epoch": 3.389739990234375e-05, + "model_forward_time": 0.02464771270751953, + "step": 22215 + }, + { + "epoch": 3.389739990234375e-05, + "step": 22215, + "training_step_time": 0.11345171928405762 + }, + { + "epoch": 3.389892578125e-05, + "model_forward_time": 0.02524542808532715, + "step": 22216 + }, + { + "epoch": 3.389892578125e-05, + "step": 22216, + "training_step_time": 0.11906933784484863 + }, + { + "epoch": 3.390045166015625e-05, + "model_forward_time": 0.025357723236083984, + "step": 22217 + }, + { + "epoch": 3.390045166015625e-05, + "step": 22217, + "training_step_time": 0.11003661155700684 + }, + { + "epoch": 3.39019775390625e-05, + "model_forward_time": 0.02523183822631836, + "step": 22218 + }, + { + "epoch": 3.39019775390625e-05, + "step": 22218, + "training_step_time": 0.10879087448120117 + }, + { + "epoch": 3.390350341796875e-05, + "model_forward_time": 0.025473594665527344, + "step": 22219 + }, + { + "epoch": 3.390350341796875e-05, + "step": 22219, + "training_step_time": 0.11221528053283691 + }, + { + "epoch": 3.3905029296875e-05, + "grad_norm": 0.12778474390506744, + "learning_rate": 1.7287267375580256e-05, + "loss": 0.0056, + "step": 22220 + }, + { + "epoch": 3.3905029296875e-05, + "model_forward_time": 0.025042295455932617, + "step": 22220 + }, + { + "epoch": 3.3905029296875e-05, + "step": 22220, + "training_step_time": 0.10469317436218262 + }, + { + "epoch": 3.390655517578125e-05, + "model_forward_time": 0.02385544776916504, + "step": 22221 + }, + { + "epoch": 3.390655517578125e-05, + "step": 22221, + "training_step_time": 0.10703158378601074 + }, + { + "epoch": 3.39080810546875e-05, + "model_forward_time": 0.02529287338256836, + "step": 22222 + }, + { + "epoch": 3.39080810546875e-05, + "step": 22222, + "training_step_time": 0.16131997108459473 + }, + { + "epoch": 3.390960693359375e-05, + "model_forward_time": 0.024844884872436523, + "step": 22223 + }, + { + "epoch": 3.390960693359375e-05, + "step": 22223, + "training_step_time": 0.11198925971984863 + }, + { + "epoch": 3.39111328125e-05, + "model_forward_time": 0.024639368057250977, + "step": 22224 + }, + { + "epoch": 3.39111328125e-05, + "step": 22224, + "training_step_time": 0.1094520092010498 + }, + { + "epoch": 3.391265869140625e-05, + "model_forward_time": 0.024764060974121094, + "step": 22225 + }, + { + "epoch": 3.391265869140625e-05, + "step": 22225, + "training_step_time": 0.1083519458770752 + }, + { + "epoch": 3.39141845703125e-05, + "model_forward_time": 0.025512218475341797, + "step": 22226 + }, + { + "epoch": 3.39141845703125e-05, + "step": 22226, + "training_step_time": 0.12638640403747559 + }, + { + "epoch": 3.391571044921875e-05, + "model_forward_time": 0.02500152587890625, + "step": 22227 + }, + { + "epoch": 3.391571044921875e-05, + "step": 22227, + "training_step_time": 0.1152350902557373 + }, + { + "epoch": 3.3917236328125e-05, + "model_forward_time": 0.02442145347595215, + "step": 22228 + }, + { + "epoch": 3.3917236328125e-05, + "step": 22228, + "training_step_time": 0.11396646499633789 + }, + { + "epoch": 3.391876220703125e-05, + "model_forward_time": 0.02481985092163086, + "step": 22229 + }, + { + "epoch": 3.391876220703125e-05, + "step": 22229, + "training_step_time": 0.10477828979492188 + }, + { + "epoch": 3.39202880859375e-05, + "grad_norm": 0.1929740309715271, + "learning_rate": 1.7245604699720535e-05, + "loss": 0.0037, + "step": 22230 + }, + { + "epoch": 3.39202880859375e-05, + "model_forward_time": 0.025355100631713867, + "step": 22230 + }, + { + "epoch": 3.39202880859375e-05, + "step": 22230, + "training_step_time": 0.10429716110229492 + }, + { + "epoch": 3.392181396484375e-05, + "model_forward_time": 0.025200366973876953, + "step": 22231 + }, + { + "epoch": 3.392181396484375e-05, + "step": 22231, + "training_step_time": 0.10840916633605957 + }, + { + "epoch": 3.392333984375e-05, + "model_forward_time": 0.02507781982421875, + "step": 22232 + }, + { + "epoch": 3.392333984375e-05, + "step": 22232, + "training_step_time": 0.10611605644226074 + }, + { + "epoch": 3.392486572265625e-05, + "model_forward_time": 0.024096965789794922, + "step": 22233 + }, + { + "epoch": 3.392486572265625e-05, + "step": 22233, + "training_step_time": 0.10585379600524902 + }, + { + "epoch": 3.39263916015625e-05, + "model_forward_time": 0.025354385375976562, + "step": 22234 + }, + { + "epoch": 3.39263916015625e-05, + "step": 22234, + "training_step_time": 0.1062319278717041 + }, + { + "epoch": 3.392791748046875e-05, + "model_forward_time": 0.02522587776184082, + "step": 22235 + }, + { + "epoch": 3.392791748046875e-05, + "step": 22235, + "training_step_time": 0.10551214218139648 + }, + { + "epoch": 3.3929443359375e-05, + "model_forward_time": 0.024923324584960938, + "step": 22236 + }, + { + "epoch": 3.3929443359375e-05, + "step": 22236, + "training_step_time": 0.10493135452270508 + }, + { + "epoch": 3.393096923828125e-05, + "model_forward_time": 0.02538466453552246, + "step": 22237 + }, + { + "epoch": 3.393096923828125e-05, + "step": 22237, + "training_step_time": 0.11011934280395508 + }, + { + "epoch": 3.39324951171875e-05, + "model_forward_time": 0.025057315826416016, + "step": 22238 + }, + { + "epoch": 3.39324951171875e-05, + "step": 22238, + "training_step_time": 0.11074638366699219 + }, + { + "epoch": 3.393402099609375e-05, + "model_forward_time": 0.025115489959716797, + "step": 22239 + }, + { + "epoch": 3.393402099609375e-05, + "step": 22239, + "training_step_time": 0.10809516906738281 + }, + { + "epoch": 3.3935546875e-05, + "grad_norm": 0.43977484107017517, + "learning_rate": 1.7203981823539643e-05, + "loss": 0.0115, + "step": 22240 + }, + { + "epoch": 3.3935546875e-05, + "model_forward_time": 0.02527141571044922, + "step": 22240 + }, + { + "epoch": 3.3935546875e-05, + "step": 22240, + "training_step_time": 0.18401718139648438 + }, + { + "epoch": 3.393707275390625e-05, + "model_forward_time": 0.024701356887817383, + "step": 22241 + }, + { + "epoch": 3.393707275390625e-05, + "step": 22241, + "training_step_time": 0.18105816841125488 + }, + { + "epoch": 3.39385986328125e-05, + "model_forward_time": 0.024062395095825195, + "step": 22242 + }, + { + "epoch": 3.39385986328125e-05, + "step": 22242, + "training_step_time": 0.19898295402526855 + }, + { + "epoch": 3.394012451171875e-05, + "model_forward_time": 0.0249788761138916, + "step": 22243 + }, + { + "epoch": 3.394012451171875e-05, + "step": 22243, + "training_step_time": 0.1036984920501709 + }, + { + "epoch": 3.3941650390625e-05, + "model_forward_time": 0.023246288299560547, + "step": 22244 + }, + { + "epoch": 3.3941650390625e-05, + "step": 22244, + "training_step_time": 0.10319852828979492 + }, + { + "epoch": 3.394317626953125e-05, + "model_forward_time": 0.025272130966186523, + "step": 22245 + }, + { + "epoch": 3.394317626953125e-05, + "step": 22245, + "training_step_time": 0.10411477088928223 + }, + { + "epoch": 3.39447021484375e-05, + "model_forward_time": 0.024898290634155273, + "step": 22246 + }, + { + "epoch": 3.39447021484375e-05, + "step": 22246, + "training_step_time": 0.10325217247009277 + }, + { + "epoch": 3.394622802734375e-05, + "model_forward_time": 0.024779558181762695, + "step": 22247 + }, + { + "epoch": 3.394622802734375e-05, + "step": 22247, + "training_step_time": 0.10794520378112793 + }, + { + "epoch": 3.394775390625e-05, + "model_forward_time": 0.025064468383789062, + "step": 22248 + }, + { + "epoch": 3.394775390625e-05, + "step": 22248, + "training_step_time": 0.10546875 + }, + { + "epoch": 3.394927978515625e-05, + "model_forward_time": 0.02550220489501953, + "step": 22249 + }, + { + "epoch": 3.394927978515625e-05, + "step": 22249, + "training_step_time": 0.10567975044250488 + }, + { + "epoch": 3.39508056640625e-05, + "grad_norm": 0.3760296404361725, + "learning_rate": 1.7162398797613282e-05, + "loss": 0.0036, + "step": 22250 + }, + { + "epoch": 3.39508056640625e-05, + "model_forward_time": 0.02500438690185547, + "step": 22250 + }, + { + "epoch": 3.39508056640625e-05, + "step": 22250, + "training_step_time": 0.10545969009399414 + }, + { + "epoch": 3.395233154296875e-05, + "model_forward_time": 0.02496790885925293, + "step": 22251 + }, + { + "epoch": 3.395233154296875e-05, + "step": 22251, + "training_step_time": 0.10519289970397949 + }, + { + "epoch": 3.3953857421875e-05, + "model_forward_time": 0.024667978286743164, + "step": 22252 + }, + { + "epoch": 3.3953857421875e-05, + "step": 22252, + "training_step_time": 0.10599684715270996 + }, + { + "epoch": 3.395538330078125e-05, + "model_forward_time": 0.024966955184936523, + "step": 22253 + }, + { + "epoch": 3.395538330078125e-05, + "step": 22253, + "training_step_time": 0.10933423042297363 + }, + { + "epoch": 3.39569091796875e-05, + "model_forward_time": 0.025258779525756836, + "step": 22254 + }, + { + "epoch": 3.39569091796875e-05, + "step": 22254, + "training_step_time": 0.10359883308410645 + }, + { + "epoch": 3.395843505859375e-05, + "model_forward_time": 0.02504587173461914, + "step": 22255 + }, + { + "epoch": 3.395843505859375e-05, + "step": 22255, + "training_step_time": 0.10813450813293457 + }, + { + "epoch": 3.39599609375e-05, + "model_forward_time": 0.024923086166381836, + "step": 22256 + }, + { + "epoch": 3.39599609375e-05, + "step": 22256, + "training_step_time": 0.10460448265075684 + }, + { + "epoch": 3.396148681640625e-05, + "model_forward_time": 0.024698972702026367, + "step": 22257 + }, + { + "epoch": 3.396148681640625e-05, + "step": 22257, + "training_step_time": 0.10447359085083008 + }, + { + "epoch": 3.39630126953125e-05, + "model_forward_time": 0.02630758285522461, + "step": 22258 + }, + { + "epoch": 3.39630126953125e-05, + "step": 22258, + "training_step_time": 0.11006546020507812 + }, + { + "epoch": 3.396453857421875e-05, + "model_forward_time": 0.025466203689575195, + "step": 22259 + }, + { + "epoch": 3.396453857421875e-05, + "step": 22259, + "training_step_time": 0.10492157936096191 + }, + { + "epoch": 3.3966064453125e-05, + "grad_norm": 0.24082809686660767, + "learning_rate": 1.712085567246878e-05, + "loss": 0.0083, + "step": 22260 + }, + { + "epoch": 3.3966064453125e-05, + "model_forward_time": 0.02491903305053711, + "step": 22260 + }, + { + "epoch": 3.3966064453125e-05, + "step": 22260, + "training_step_time": 0.13901996612548828 + }, + { + "epoch": 3.396759033203125e-05, + "model_forward_time": 0.02578139305114746, + "step": 22261 + }, + { + "epoch": 3.396759033203125e-05, + "step": 22261, + "training_step_time": 0.16664409637451172 + }, + { + "epoch": 3.39691162109375e-05, + "model_forward_time": 0.02461981773376465, + "step": 22262 + }, + { + "epoch": 3.39691162109375e-05, + "step": 22262, + "training_step_time": 0.17667698860168457 + }, + { + "epoch": 3.397064208984375e-05, + "model_forward_time": 0.0245206356048584, + "step": 22263 + }, + { + "epoch": 3.397064208984375e-05, + "step": 22263, + "training_step_time": 0.15718841552734375 + }, + { + "epoch": 3.397216796875e-05, + "model_forward_time": 0.02418828010559082, + "step": 22264 + }, + { + "epoch": 3.397216796875e-05, + "step": 22264, + "training_step_time": 0.16265416145324707 + }, + { + "epoch": 3.397369384765625e-05, + "model_forward_time": 0.024004697799682617, + "step": 22265 + }, + { + "epoch": 3.397369384765625e-05, + "step": 22265, + "training_step_time": 0.11006522178649902 + }, + { + "epoch": 3.39752197265625e-05, + "model_forward_time": 0.024549484252929688, + "step": 22266 + }, + { + "epoch": 3.39752197265625e-05, + "step": 22266, + "training_step_time": 0.13738775253295898 + }, + { + "epoch": 3.397674560546875e-05, + "model_forward_time": 0.024753808975219727, + "step": 22267 + }, + { + "epoch": 3.397674560546875e-05, + "step": 22267, + "training_step_time": 0.13679766654968262 + }, + { + "epoch": 3.3978271484375e-05, + "model_forward_time": 0.024460315704345703, + "step": 22268 + }, + { + "epoch": 3.3978271484375e-05, + "step": 22268, + "training_step_time": 0.1369616985321045 + }, + { + "epoch": 3.397979736328125e-05, + "model_forward_time": 0.023296117782592773, + "step": 22269 + }, + { + "epoch": 3.397979736328125e-05, + "step": 22269, + "training_step_time": 0.17188215255737305 + }, + { + "epoch": 3.39813232421875e-05, + "grad_norm": 0.12613904476165771, + "learning_rate": 1.7079352498584934e-05, + "loss": 0.0044, + "step": 22270 + }, + { + "epoch": 3.39813232421875e-05, + "model_forward_time": 0.024170875549316406, + "step": 22270 + }, + { + "epoch": 3.39813232421875e-05, + "step": 22270, + "training_step_time": 0.13015198707580566 + }, + { + "epoch": 3.398284912109375e-05, + "model_forward_time": 0.023164987564086914, + "step": 22271 + }, + { + "epoch": 3.398284912109375e-05, + "step": 22271, + "training_step_time": 0.20395278930664062 + }, + { + "epoch": 3.3984375e-05, + "model_forward_time": 0.024168014526367188, + "step": 22272 + }, + { + "epoch": 3.3984375e-05, + "step": 22272, + "training_step_time": 0.1381852626800537 + }, + { + "epoch": 3.398590087890625e-05, + "model_forward_time": 0.024350643157958984, + "step": 22273 + }, + { + "epoch": 3.398590087890625e-05, + "step": 22273, + "training_step_time": 0.11548519134521484 + }, + { + "epoch": 3.39874267578125e-05, + "model_forward_time": 0.023637771606445312, + "step": 22274 + }, + { + "epoch": 3.39874267578125e-05, + "step": 22274, + "training_step_time": 0.11224865913391113 + }, + { + "epoch": 3.398895263671875e-05, + "model_forward_time": 0.025110721588134766, + "step": 22275 + }, + { + "epoch": 3.398895263671875e-05, + "step": 22275, + "training_step_time": 0.10746049880981445 + }, + { + "epoch": 3.3990478515625e-05, + "model_forward_time": 0.0253140926361084, + "step": 22276 + }, + { + "epoch": 3.3990478515625e-05, + "step": 22276, + "training_step_time": 0.10890531539916992 + }, + { + "epoch": 3.399200439453125e-05, + "model_forward_time": 0.024935007095336914, + "step": 22277 + }, + { + "epoch": 3.399200439453125e-05, + "step": 22277, + "training_step_time": 0.10553598403930664 + }, + { + "epoch": 3.39935302734375e-05, + "model_forward_time": 0.02522110939025879, + "step": 22278 + }, + { + "epoch": 3.39935302734375e-05, + "step": 22278, + "training_step_time": 0.10856199264526367 + }, + { + "epoch": 3.399505615234375e-05, + "model_forward_time": 0.025410175323486328, + "step": 22279 + }, + { + "epoch": 3.399505615234375e-05, + "step": 22279, + "training_step_time": 0.10534143447875977 + }, + { + "epoch": 3.399658203125e-05, + "grad_norm": 0.10447924584150314, + "learning_rate": 1.703788932639202e-05, + "loss": 0.0071, + "step": 22280 + }, + { + "epoch": 3.399658203125e-05, + "model_forward_time": 0.025168180465698242, + "step": 22280 + }, + { + "epoch": 3.399658203125e-05, + "step": 22280, + "training_step_time": 0.10451126098632812 + }, + { + "epoch": 3.399810791015625e-05, + "model_forward_time": 0.025101900100708008, + "step": 22281 + }, + { + "epoch": 3.399810791015625e-05, + "step": 22281, + "training_step_time": 0.10871267318725586 + }, + { + "epoch": 3.39996337890625e-05, + "model_forward_time": 0.0251007080078125, + "step": 22282 + }, + { + "epoch": 3.39996337890625e-05, + "step": 22282, + "training_step_time": 0.1053922176361084 + }, + { + "epoch": 3.400115966796875e-05, + "model_forward_time": 0.025167226791381836, + "step": 22283 + }, + { + "epoch": 3.400115966796875e-05, + "step": 22283, + "training_step_time": 0.10477089881896973 + }, + { + "epoch": 3.4002685546875e-05, + "model_forward_time": 0.025130510330200195, + "step": 22284 + }, + { + "epoch": 3.4002685546875e-05, + "step": 22284, + "training_step_time": 0.12216544151306152 + }, + { + "epoch": 3.400421142578125e-05, + "model_forward_time": 0.025210142135620117, + "step": 22285 + }, + { + "epoch": 3.400421142578125e-05, + "step": 22285, + "training_step_time": 0.10521864891052246 + }, + { + "epoch": 3.40057373046875e-05, + "model_forward_time": 0.024967193603515625, + "step": 22286 + }, + { + "epoch": 3.40057373046875e-05, + "step": 22286, + "training_step_time": 0.21430444717407227 + }, + { + "epoch": 3.400726318359375e-05, + "model_forward_time": 0.02446722984313965, + "step": 22287 + }, + { + "epoch": 3.400726318359375e-05, + "step": 22287, + "training_step_time": 0.10526037216186523 + }, + { + "epoch": 3.40087890625e-05, + "model_forward_time": 0.02478623390197754, + "step": 22288 + }, + { + "epoch": 3.40087890625e-05, + "step": 22288, + "training_step_time": 0.11983728408813477 + }, + { + "epoch": 3.401031494140625e-05, + "model_forward_time": 0.024987459182739258, + "step": 22289 + }, + { + "epoch": 3.401031494140625e-05, + "step": 22289, + "training_step_time": 0.15848779678344727 + }, + { + "epoch": 3.40118408203125e-05, + "grad_norm": 0.08365624397993088, + "learning_rate": 1.699646620627168e-05, + "loss": 0.0059, + "step": 22290 + }, + { + "epoch": 3.40118408203125e-05, + "model_forward_time": 0.02446770668029785, + "step": 22290 + }, + { + "epoch": 3.40118408203125e-05, + "step": 22290, + "training_step_time": 0.1025381088256836 + }, + { + "epoch": 3.401336669921875e-05, + "model_forward_time": 0.024798154830932617, + "step": 22291 + }, + { + "epoch": 3.401336669921875e-05, + "step": 22291, + "training_step_time": 0.10355854034423828 + }, + { + "epoch": 3.4014892578125e-05, + "model_forward_time": 0.024913787841796875, + "step": 22292 + }, + { + "epoch": 3.4014892578125e-05, + "step": 22292, + "training_step_time": 0.10693526268005371 + }, + { + "epoch": 3.401641845703125e-05, + "model_forward_time": 0.025327444076538086, + "step": 22293 + }, + { + "epoch": 3.401641845703125e-05, + "step": 22293, + "training_step_time": 0.10769486427307129 + }, + { + "epoch": 3.40179443359375e-05, + "model_forward_time": 0.02492380142211914, + "step": 22294 + }, + { + "epoch": 3.40179443359375e-05, + "step": 22294, + "training_step_time": 0.10181808471679688 + }, + { + "epoch": 3.401947021484375e-05, + "model_forward_time": 0.025163650512695312, + "step": 22295 + }, + { + "epoch": 3.401947021484375e-05, + "step": 22295, + "training_step_time": 0.1043705940246582 + }, + { + "epoch": 3.402099609375e-05, + "model_forward_time": 0.02514958381652832, + "step": 22296 + }, + { + "epoch": 3.402099609375e-05, + "step": 22296, + "training_step_time": 0.1081244945526123 + }, + { + "epoch": 3.402252197265625e-05, + "model_forward_time": 0.025298357009887695, + "step": 22297 + }, + { + "epoch": 3.402252197265625e-05, + "step": 22297, + "training_step_time": 0.10935306549072266 + }, + { + "epoch": 3.40240478515625e-05, + "model_forward_time": 0.0252687931060791, + "step": 22298 + }, + { + "epoch": 3.40240478515625e-05, + "step": 22298, + "training_step_time": 0.10853719711303711 + }, + { + "epoch": 3.402557373046875e-05, + "model_forward_time": 0.02474188804626465, + "step": 22299 + }, + { + "epoch": 3.402557373046875e-05, + "step": 22299, + "training_step_time": 0.10465073585510254 + }, + { + "epoch": 3.4027099609375e-05, + "grad_norm": 0.15075549483299255, + "learning_rate": 1.6955083188556947e-05, + "loss": 0.0055, + "step": 22300 + }, + { + "epoch": 3.4027099609375e-05, + "model_forward_time": 0.02494645118713379, + "step": 22300 + }, + { + "epoch": 3.4027099609375e-05, + "step": 22300, + "training_step_time": 0.10527801513671875 + }, + { + "epoch": 3.402862548828125e-05, + "model_forward_time": 0.02557063102722168, + "step": 22301 + }, + { + "epoch": 3.402862548828125e-05, + "step": 22301, + "training_step_time": 0.10534882545471191 + }, + { + "epoch": 3.40301513671875e-05, + "model_forward_time": 0.02717757225036621, + "step": 22302 + }, + { + "epoch": 3.40301513671875e-05, + "step": 22302, + "training_step_time": 0.10916256904602051 + }, + { + "epoch": 3.403167724609375e-05, + "model_forward_time": 0.024744749069213867, + "step": 22303 + }, + { + "epoch": 3.403167724609375e-05, + "step": 22303, + "training_step_time": 0.1046910285949707 + }, + { + "epoch": 3.4033203125e-05, + "model_forward_time": 0.02456951141357422, + "step": 22304 + }, + { + "epoch": 3.4033203125e-05, + "step": 22304, + "training_step_time": 0.10333991050720215 + }, + { + "epoch": 3.403472900390625e-05, + "model_forward_time": 0.024039745330810547, + "step": 22305 + }, + { + "epoch": 3.403472900390625e-05, + "step": 22305, + "training_step_time": 0.14157509803771973 + }, + { + "epoch": 3.40362548828125e-05, + "model_forward_time": 0.02763652801513672, + "step": 22306 + }, + { + "epoch": 3.40362548828125e-05, + "step": 22306, + "training_step_time": 0.1618332862854004 + }, + { + "epoch": 3.403778076171875e-05, + "model_forward_time": 0.024574756622314453, + "step": 22307 + }, + { + "epoch": 3.403778076171875e-05, + "step": 22307, + "training_step_time": 0.18008875846862793 + }, + { + "epoch": 3.4039306640625e-05, + "model_forward_time": 0.024396181106567383, + "step": 22308 + }, + { + "epoch": 3.4039306640625e-05, + "step": 22308, + "training_step_time": 0.1524195671081543 + }, + { + "epoch": 3.404083251953125e-05, + "model_forward_time": 0.024350881576538086, + "step": 22309 + }, + { + "epoch": 3.404083251953125e-05, + "step": 22309, + "training_step_time": 0.17176151275634766 + }, + { + "epoch": 3.40423583984375e-05, + "grad_norm": 0.2075805515050888, + "learning_rate": 1.691374032353205e-05, + "loss": 0.0075, + "step": 22310 + }, + { + "epoch": 3.40423583984375e-05, + "model_forward_time": 0.024282217025756836, + "step": 22310 + }, + { + "epoch": 3.40423583984375e-05, + "step": 22310, + "training_step_time": 0.10498762130737305 + }, + { + "epoch": 3.404388427734375e-05, + "model_forward_time": 0.024506330490112305, + "step": 22311 + }, + { + "epoch": 3.404388427734375e-05, + "step": 22311, + "training_step_time": 0.1050260066986084 + }, + { + "epoch": 3.404541015625e-05, + "model_forward_time": 0.02545166015625, + "step": 22312 + }, + { + "epoch": 3.404541015625e-05, + "step": 22312, + "training_step_time": 0.10775494575500488 + }, + { + "epoch": 3.404693603515625e-05, + "model_forward_time": 0.025567054748535156, + "step": 22313 + }, + { + "epoch": 3.404693603515625e-05, + "step": 22313, + "training_step_time": 0.1628279685974121 + }, + { + "epoch": 3.40484619140625e-05, + "model_forward_time": 0.026265859603881836, + "step": 22314 + }, + { + "epoch": 3.40484619140625e-05, + "step": 22314, + "training_step_time": 0.16417717933654785 + }, + { + "epoch": 3.404998779296875e-05, + "model_forward_time": 0.0243680477142334, + "step": 22315 + }, + { + "epoch": 3.404998779296875e-05, + "step": 22315, + "training_step_time": 0.16736769676208496 + }, + { + "epoch": 3.4051513671875e-05, + "model_forward_time": 0.024075984954833984, + "step": 22316 + }, + { + "epoch": 3.4051513671875e-05, + "step": 22316, + "training_step_time": 0.16303777694702148 + }, + { + "epoch": 3.405303955078125e-05, + "model_forward_time": 0.023956298828125, + "step": 22317 + }, + { + "epoch": 3.405303955078125e-05, + "step": 22317, + "training_step_time": 0.13863325119018555 + }, + { + "epoch": 3.40545654296875e-05, + "model_forward_time": 0.024230241775512695, + "step": 22318 + }, + { + "epoch": 3.40545654296875e-05, + "step": 22318, + "training_step_time": 0.13014554977416992 + }, + { + "epoch": 3.405609130859375e-05, + "model_forward_time": 0.024568796157836914, + "step": 22319 + }, + { + "epoch": 3.405609130859375e-05, + "step": 22319, + "training_step_time": 0.16027235984802246 + }, + { + "epoch": 3.40576171875e-05, + "grad_norm": 0.3397980034351349, + "learning_rate": 1.6872437661432517e-05, + "loss": 0.0098, + "step": 22320 + }, + { + "epoch": 3.40576171875e-05, + "model_forward_time": 0.02552008628845215, + "step": 22320 + }, + { + "epoch": 3.40576171875e-05, + "step": 22320, + "training_step_time": 0.1410675048828125 + }, + { + "epoch": 3.405914306640625e-05, + "model_forward_time": 0.024914979934692383, + "step": 22321 + }, + { + "epoch": 3.405914306640625e-05, + "step": 22321, + "training_step_time": 0.10501766204833984 + }, + { + "epoch": 3.40606689453125e-05, + "model_forward_time": 0.025391101837158203, + "step": 22322 + }, + { + "epoch": 3.40606689453125e-05, + "step": 22322, + "training_step_time": 0.10269570350646973 + }, + { + "epoch": 3.406219482421875e-05, + "model_forward_time": 0.02498149871826172, + "step": 22323 + }, + { + "epoch": 3.406219482421875e-05, + "step": 22323, + "training_step_time": 0.10476851463317871 + }, + { + "epoch": 3.4063720703125e-05, + "model_forward_time": 0.0253751277923584, + "step": 22324 + }, + { + "epoch": 3.4063720703125e-05, + "step": 22324, + "training_step_time": 0.10556697845458984 + }, + { + "epoch": 3.406524658203125e-05, + "model_forward_time": 0.02503514289855957, + "step": 22325 + }, + { + "epoch": 3.406524658203125e-05, + "step": 22325, + "training_step_time": 0.10655093193054199 + }, + { + "epoch": 3.40667724609375e-05, + "model_forward_time": 0.025315284729003906, + "step": 22326 + }, + { + "epoch": 3.40667724609375e-05, + "step": 22326, + "training_step_time": 0.10497641563415527 + }, + { + "epoch": 3.406829833984375e-05, + "model_forward_time": 0.025534629821777344, + "step": 22327 + }, + { + "epoch": 3.406829833984375e-05, + "step": 22327, + "training_step_time": 0.10580945014953613 + }, + { + "epoch": 3.406982421875e-05, + "model_forward_time": 0.02538013458251953, + "step": 22328 + }, + { + "epoch": 3.406982421875e-05, + "step": 22328, + "training_step_time": 0.11018967628479004 + }, + { + "epoch": 3.407135009765625e-05, + "model_forward_time": 0.02521347999572754, + "step": 22329 + }, + { + "epoch": 3.407135009765625e-05, + "step": 22329, + "training_step_time": 0.1338033676147461 + }, + { + "epoch": 3.40728759765625e-05, + "grad_norm": 0.21587461233139038, + "learning_rate": 1.6831175252444943e-05, + "loss": 0.0048, + "step": 22330 + }, + { + "epoch": 3.40728759765625e-05, + "model_forward_time": 0.025536060333251953, + "step": 22330 + }, + { + "epoch": 3.40728759765625e-05, + "step": 22330, + "training_step_time": 0.10819196701049805 + }, + { + "epoch": 3.407440185546875e-05, + "model_forward_time": 0.024944782257080078, + "step": 22331 + }, + { + "epoch": 3.407440185546875e-05, + "step": 22331, + "training_step_time": 0.21061944961547852 + }, + { + "epoch": 3.4075927734375e-05, + "model_forward_time": 0.024202585220336914, + "step": 22332 + }, + { + "epoch": 3.4075927734375e-05, + "step": 22332, + "training_step_time": 0.10684037208557129 + }, + { + "epoch": 3.407745361328125e-05, + "model_forward_time": 0.024291038513183594, + "step": 22333 + }, + { + "epoch": 3.407745361328125e-05, + "step": 22333, + "training_step_time": 0.11098670959472656 + }, + { + "epoch": 3.40789794921875e-05, + "model_forward_time": 0.025118350982666016, + "step": 22334 + }, + { + "epoch": 3.40789794921875e-05, + "step": 22334, + "training_step_time": 0.10969853401184082 + }, + { + "epoch": 3.408050537109375e-05, + "model_forward_time": 0.025268077850341797, + "step": 22335 + }, + { + "epoch": 3.408050537109375e-05, + "step": 22335, + "training_step_time": 0.10558390617370605 + }, + { + "epoch": 3.408203125e-05, + "model_forward_time": 0.025547266006469727, + "step": 22336 + }, + { + "epoch": 3.408203125e-05, + "step": 22336, + "training_step_time": 0.1079864501953125 + }, + { + "epoch": 3.408355712890625e-05, + "model_forward_time": 0.024543046951293945, + "step": 22337 + }, + { + "epoch": 3.408355712890625e-05, + "step": 22337, + "training_step_time": 0.10457801818847656 + }, + { + "epoch": 3.40850830078125e-05, + "model_forward_time": 0.025679588317871094, + "step": 22338 + }, + { + "epoch": 3.40850830078125e-05, + "step": 22338, + "training_step_time": 0.1058967113494873 + }, + { + "epoch": 3.408660888671875e-05, + "model_forward_time": 0.025058984756469727, + "step": 22339 + }, + { + "epoch": 3.408660888671875e-05, + "step": 22339, + "training_step_time": 0.10463333129882812 + }, + { + "epoch": 3.4088134765625e-05, + "grad_norm": 0.08035118132829666, + "learning_rate": 1.6789953146707053e-05, + "loss": 0.0121, + "step": 22340 + }, + { + "epoch": 3.4088134765625e-05, + "model_forward_time": 0.02535414695739746, + "step": 22340 + }, + { + "epoch": 3.4088134765625e-05, + "step": 22340, + "training_step_time": 0.10564303398132324 + }, + { + "epoch": 3.408966064453125e-05, + "model_forward_time": 0.02519965171813965, + "step": 22341 + }, + { + "epoch": 3.408966064453125e-05, + "step": 22341, + "training_step_time": 0.10824823379516602 + }, + { + "epoch": 3.40911865234375e-05, + "model_forward_time": 0.02512216567993164, + "step": 22342 + }, + { + "epoch": 3.40911865234375e-05, + "step": 22342, + "training_step_time": 0.10604286193847656 + }, + { + "epoch": 3.409271240234375e-05, + "model_forward_time": 0.025490760803222656, + "step": 22343 + }, + { + "epoch": 3.409271240234375e-05, + "step": 22343, + "training_step_time": 0.10480117797851562 + }, + { + "epoch": 3.409423828125e-05, + "model_forward_time": 0.0247952938079834, + "step": 22344 + }, + { + "epoch": 3.409423828125e-05, + "step": 22344, + "training_step_time": 0.10836601257324219 + }, + { + "epoch": 3.409576416015625e-05, + "model_forward_time": 0.025383710861206055, + "step": 22345 + }, + { + "epoch": 3.409576416015625e-05, + "step": 22345, + "training_step_time": 0.10429072380065918 + }, + { + "epoch": 3.40972900390625e-05, + "model_forward_time": 0.025011062622070312, + "step": 22346 + }, + { + "epoch": 3.40972900390625e-05, + "step": 22346, + "training_step_time": 0.10444331169128418 + }, + { + "epoch": 3.409881591796875e-05, + "model_forward_time": 0.02515435218811035, + "step": 22347 + }, + { + "epoch": 3.409881591796875e-05, + "step": 22347, + "training_step_time": 0.10367679595947266 + }, + { + "epoch": 3.4100341796875e-05, + "model_forward_time": 0.02533411979675293, + "step": 22348 + }, + { + "epoch": 3.4100341796875e-05, + "step": 22348, + "training_step_time": 0.10473203659057617 + }, + { + "epoch": 3.410186767578125e-05, + "model_forward_time": 0.02552628517150879, + "step": 22349 + }, + { + "epoch": 3.410186767578125e-05, + "step": 22349, + "training_step_time": 0.1061854362487793 + }, + { + "epoch": 3.41033935546875e-05, + "grad_norm": 0.15022511780261993, + "learning_rate": 1.6748771394307585e-05, + "loss": 0.0102, + "step": 22350 + }, + { + "epoch": 3.41033935546875e-05, + "model_forward_time": 0.02396869659423828, + "step": 22350 + }, + { + "epoch": 3.41033935546875e-05, + "step": 22350, + "training_step_time": 0.14788818359375 + }, + { + "epoch": 3.410491943359375e-05, + "model_forward_time": 0.025060176849365234, + "step": 22351 + }, + { + "epoch": 3.410491943359375e-05, + "step": 22351, + "training_step_time": 0.15220975875854492 + }, + { + "epoch": 3.41064453125e-05, + "model_forward_time": 0.024693012237548828, + "step": 22352 + }, + { + "epoch": 3.41064453125e-05, + "step": 22352, + "training_step_time": 0.11061310768127441 + }, + { + "epoch": 3.410797119140625e-05, + "model_forward_time": 0.024750471115112305, + "step": 22353 + }, + { + "epoch": 3.410797119140625e-05, + "step": 22353, + "training_step_time": 0.13216876983642578 + }, + { + "epoch": 3.41094970703125e-05, + "model_forward_time": 0.025804758071899414, + "step": 22354 + }, + { + "epoch": 3.41094970703125e-05, + "step": 22354, + "training_step_time": 0.20363235473632812 + }, + { + "epoch": 3.411102294921875e-05, + "model_forward_time": 0.023818254470825195, + "step": 22355 + }, + { + "epoch": 3.411102294921875e-05, + "step": 22355, + "training_step_time": 0.10530424118041992 + }, + { + "epoch": 3.4112548828125e-05, + "model_forward_time": 0.024637699127197266, + "step": 22356 + }, + { + "epoch": 3.4112548828125e-05, + "step": 22356, + "training_step_time": 0.10890626907348633 + }, + { + "epoch": 3.411407470703125e-05, + "model_forward_time": 0.025214672088623047, + "step": 22357 + }, + { + "epoch": 3.411407470703125e-05, + "step": 22357, + "training_step_time": 0.11024284362792969 + }, + { + "epoch": 3.41156005859375e-05, + "model_forward_time": 0.024851322174072266, + "step": 22358 + }, + { + "epoch": 3.41156005859375e-05, + "step": 22358, + "training_step_time": 0.10738706588745117 + }, + { + "epoch": 3.411712646484375e-05, + "model_forward_time": 0.024818897247314453, + "step": 22359 + }, + { + "epoch": 3.411712646484375e-05, + "step": 22359, + "training_step_time": 0.11810970306396484 + }, + { + "epoch": 3.411865234375e-05, + "grad_norm": 0.11675149947404861, + "learning_rate": 1.6707630045286265e-05, + "loss": 0.0043, + "step": 22360 + }, + { + "epoch": 3.411865234375e-05, + "model_forward_time": 0.025145292282104492, + "step": 22360 + }, + { + "epoch": 3.411865234375e-05, + "step": 22360, + "training_step_time": 0.10600924491882324 + }, + { + "epoch": 3.412017822265625e-05, + "model_forward_time": 0.025168180465698242, + "step": 22361 + }, + { + "epoch": 3.412017822265625e-05, + "step": 22361, + "training_step_time": 0.19519591331481934 + }, + { + "epoch": 3.41217041015625e-05, + "model_forward_time": 0.024242877960205078, + "step": 22362 + }, + { + "epoch": 3.41217041015625e-05, + "step": 22362, + "training_step_time": 0.11031818389892578 + }, + { + "epoch": 3.412322998046875e-05, + "model_forward_time": 0.024910688400268555, + "step": 22363 + }, + { + "epoch": 3.412322998046875e-05, + "step": 22363, + "training_step_time": 0.1074976921081543 + }, + { + "epoch": 3.4124755859375e-05, + "model_forward_time": 0.025353431701660156, + "step": 22364 + }, + { + "epoch": 3.4124755859375e-05, + "step": 22364, + "training_step_time": 0.13373827934265137 + }, + { + "epoch": 3.412628173828125e-05, + "model_forward_time": 0.02839493751525879, + "step": 22365 + }, + { + "epoch": 3.412628173828125e-05, + "step": 22365, + "training_step_time": 0.12445473670959473 + }, + { + "epoch": 3.41278076171875e-05, + "model_forward_time": 0.024983882904052734, + "step": 22366 + }, + { + "epoch": 3.41278076171875e-05, + "step": 22366, + "training_step_time": 0.11909723281860352 + }, + { + "epoch": 3.412933349609375e-05, + "model_forward_time": 0.023677825927734375, + "step": 22367 + }, + { + "epoch": 3.412933349609375e-05, + "step": 22367, + "training_step_time": 0.1439499855041504 + }, + { + "epoch": 3.4130859375e-05, + "model_forward_time": 0.023617267608642578, + "step": 22368 + }, + { + "epoch": 3.4130859375e-05, + "step": 22368, + "training_step_time": 0.14217066764831543 + }, + { + "epoch": 3.413238525390625e-05, + "model_forward_time": 0.024296998977661133, + "step": 22369 + }, + { + "epoch": 3.413238525390625e-05, + "step": 22369, + "training_step_time": 0.1460132598876953 + }, + { + "epoch": 3.41339111328125e-05, + "grad_norm": 0.12481307238340378, + "learning_rate": 1.666652914963371e-05, + "loss": 0.0178, + "step": 22370 + }, + { + "epoch": 3.41339111328125e-05, + "model_forward_time": 0.024140357971191406, + "step": 22370 + }, + { + "epoch": 3.41339111328125e-05, + "step": 22370, + "training_step_time": 0.14366579055786133 + }, + { + "epoch": 3.413543701171875e-05, + "model_forward_time": 0.024472713470458984, + "step": 22371 + }, + { + "epoch": 3.413543701171875e-05, + "step": 22371, + "training_step_time": 0.13518762588500977 + }, + { + "epoch": 3.4136962890625e-05, + "model_forward_time": 0.024513721466064453, + "step": 22372 + }, + { + "epoch": 3.4136962890625e-05, + "step": 22372, + "training_step_time": 0.12340426445007324 + }, + { + "epoch": 3.413848876953125e-05, + "model_forward_time": 0.024428129196166992, + "step": 22373 + }, + { + "epoch": 3.413848876953125e-05, + "step": 22373, + "training_step_time": 0.12000799179077148 + }, + { + "epoch": 3.41400146484375e-05, + "model_forward_time": 0.024433374404907227, + "step": 22374 + }, + { + "epoch": 3.41400146484375e-05, + "step": 22374, + "training_step_time": 0.18062996864318848 + }, + { + "epoch": 3.414154052734375e-05, + "model_forward_time": 0.024607419967651367, + "step": 22375 + }, + { + "epoch": 3.414154052734375e-05, + "step": 22375, + "training_step_time": 0.11812496185302734 + }, + { + "epoch": 3.414306640625e-05, + "model_forward_time": 0.024066448211669922, + "step": 22376 + }, + { + "epoch": 3.414306640625e-05, + "step": 22376, + "training_step_time": 0.20033907890319824 + }, + { + "epoch": 3.414459228515625e-05, + "model_forward_time": 0.02404165267944336, + "step": 22377 + }, + { + "epoch": 3.414459228515625e-05, + "step": 22377, + "training_step_time": 0.19022870063781738 + }, + { + "epoch": 3.41461181640625e-05, + "model_forward_time": 0.027565479278564453, + "step": 22378 + }, + { + "epoch": 3.41461181640625e-05, + "step": 22378, + "training_step_time": 0.13741850852966309 + }, + { + "epoch": 3.414764404296875e-05, + "model_forward_time": 0.024247407913208008, + "step": 22379 + }, + { + "epoch": 3.414764404296875e-05, + "step": 22379, + "training_step_time": 0.11807107925415039 + }, + { + "epoch": 3.4149169921875e-05, + "grad_norm": 0.11971784383058548, + "learning_rate": 1.662546875729138e-05, + "loss": 0.0072, + "step": 22380 + }, + { + "epoch": 3.4149169921875e-05, + "model_forward_time": 0.02432847023010254, + "step": 22380 + }, + { + "epoch": 3.4149169921875e-05, + "step": 22380, + "training_step_time": 0.10497665405273438 + }, + { + "epoch": 3.415069580078125e-05, + "model_forward_time": 0.024884462356567383, + "step": 22381 + }, + { + "epoch": 3.415069580078125e-05, + "step": 22381, + "training_step_time": 0.10737729072570801 + }, + { + "epoch": 3.41522216796875e-05, + "model_forward_time": 0.024908065795898438, + "step": 22382 + }, + { + "epoch": 3.41522216796875e-05, + "step": 22382, + "training_step_time": 0.10604476928710938 + }, + { + "epoch": 3.415374755859375e-05, + "model_forward_time": 0.024859189987182617, + "step": 22383 + }, + { + "epoch": 3.415374755859375e-05, + "step": 22383, + "training_step_time": 0.10712456703186035 + }, + { + "epoch": 3.41552734375e-05, + "model_forward_time": 0.025123119354248047, + "step": 22384 + }, + { + "epoch": 3.41552734375e-05, + "step": 22384, + "training_step_time": 0.1079111099243164 + }, + { + "epoch": 3.415679931640625e-05, + "model_forward_time": 0.024837970733642578, + "step": 22385 + }, + { + "epoch": 3.415679931640625e-05, + "step": 22385, + "training_step_time": 0.10713815689086914 + }, + { + "epoch": 3.41583251953125e-05, + "model_forward_time": 0.024560213088989258, + "step": 22386 + }, + { + "epoch": 3.41583251953125e-05, + "step": 22386, + "training_step_time": 0.10690021514892578 + }, + { + "epoch": 3.415985107421875e-05, + "model_forward_time": 0.02489185333251953, + "step": 22387 + }, + { + "epoch": 3.415985107421875e-05, + "step": 22387, + "training_step_time": 0.10455060005187988 + }, + { + "epoch": 3.4161376953125e-05, + "model_forward_time": 0.024939775466918945, + "step": 22388 + }, + { + "epoch": 3.4161376953125e-05, + "step": 22388, + "training_step_time": 0.10802173614501953 + }, + { + "epoch": 3.416290283203125e-05, + "model_forward_time": 0.025206565856933594, + "step": 22389 + }, + { + "epoch": 3.416290283203125e-05, + "step": 22389, + "training_step_time": 0.10504388809204102 + }, + { + "epoch": 3.41644287109375e-05, + "grad_norm": 0.09075043350458145, + "learning_rate": 1.658444891815152e-05, + "loss": 0.0035, + "step": 22390 + }, + { + "epoch": 3.41644287109375e-05, + "model_forward_time": 0.024889469146728516, + "step": 22390 + }, + { + "epoch": 3.41644287109375e-05, + "step": 22390, + "training_step_time": 0.10416698455810547 + }, + { + "epoch": 3.416595458984375e-05, + "model_forward_time": 0.024668216705322266, + "step": 22391 + }, + { + "epoch": 3.416595458984375e-05, + "step": 22391, + "training_step_time": 0.10533714294433594 + }, + { + "epoch": 3.416748046875e-05, + "model_forward_time": 0.024922609329223633, + "step": 22392 + }, + { + "epoch": 3.416748046875e-05, + "step": 22392, + "training_step_time": 0.10623502731323242 + }, + { + "epoch": 3.416900634765625e-05, + "model_forward_time": 0.024741411209106445, + "step": 22393 + }, + { + "epoch": 3.416900634765625e-05, + "step": 22393, + "training_step_time": 0.10268163681030273 + }, + { + "epoch": 3.41705322265625e-05, + "model_forward_time": 0.02620410919189453, + "step": 22394 + }, + { + "epoch": 3.41705322265625e-05, + "step": 22394, + "training_step_time": 0.12650251388549805 + }, + { + "epoch": 3.417205810546875e-05, + "model_forward_time": 0.025969266891479492, + "step": 22395 + }, + { + "epoch": 3.417205810546875e-05, + "step": 22395, + "training_step_time": 0.16384100914001465 + }, + { + "epoch": 3.4173583984375e-05, + "model_forward_time": 0.02457404136657715, + "step": 22396 + }, + { + "epoch": 3.4173583984375e-05, + "step": 22396, + "training_step_time": 0.10990381240844727 + }, + { + "epoch": 3.417510986328125e-05, + "model_forward_time": 0.0243833065032959, + "step": 22397 + }, + { + "epoch": 3.417510986328125e-05, + "step": 22397, + "training_step_time": 0.13201379776000977 + }, + { + "epoch": 3.41766357421875e-05, + "model_forward_time": 0.025101661682128906, + "step": 22398 + }, + { + "epoch": 3.41766357421875e-05, + "step": 22398, + "training_step_time": 0.19716978073120117 + }, + { + "epoch": 3.417816162109375e-05, + "model_forward_time": 0.024075984954833984, + "step": 22399 + }, + { + "epoch": 3.417816162109375e-05, + "step": 22399, + "training_step_time": 0.11251068115234375 + }, + { + "epoch": 3.41796875e-05, + "grad_norm": 0.396480917930603, + "learning_rate": 1.6543469682057106e-05, + "loss": 0.0116, + "step": 22400 + }, + { + "epoch": 3.41796875e-05, + "model_forward_time": 0.02446126937866211, + "step": 22400 + }, + { + "epoch": 3.41796875e-05, + "step": 22400, + "training_step_time": 0.1990652084350586 + }, + { + "epoch": 3.418121337890625e-05, + "model_forward_time": 0.024676084518432617, + "step": 22401 + }, + { + "epoch": 3.418121337890625e-05, + "step": 22401, + "training_step_time": 0.10377097129821777 + }, + { + "epoch": 3.41827392578125e-05, + "model_forward_time": 0.025716066360473633, + "step": 22402 + }, + { + "epoch": 3.41827392578125e-05, + "step": 22402, + "training_step_time": 0.14365172386169434 + }, + { + "epoch": 3.418426513671875e-05, + "model_forward_time": 0.02464747428894043, + "step": 22403 + }, + { + "epoch": 3.418426513671875e-05, + "step": 22403, + "training_step_time": 0.17332959175109863 + }, + { + "epoch": 3.4185791015625e-05, + "model_forward_time": 0.024372339248657227, + "step": 22404 + }, + { + "epoch": 3.4185791015625e-05, + "step": 22404, + "training_step_time": 0.19342589378356934 + }, + { + "epoch": 3.418731689453125e-05, + "model_forward_time": 0.02457118034362793, + "step": 22405 + }, + { + "epoch": 3.418731689453125e-05, + "step": 22405, + "training_step_time": 0.15049076080322266 + }, + { + "epoch": 3.41888427734375e-05, + "model_forward_time": 0.023029565811157227, + "step": 22406 + }, + { + "epoch": 3.41888427734375e-05, + "step": 22406, + "training_step_time": 0.21095061302185059 + }, + { + "epoch": 3.419036865234375e-05, + "model_forward_time": 0.024020671844482422, + "step": 22407 + }, + { + "epoch": 3.419036865234375e-05, + "step": 22407, + "training_step_time": 0.13986468315124512 + }, + { + "epoch": 3.419189453125e-05, + "model_forward_time": 0.023830890655517578, + "step": 22408 + }, + { + "epoch": 3.419189453125e-05, + "step": 22408, + "training_step_time": 0.10308551788330078 + }, + { + "epoch": 3.419342041015625e-05, + "model_forward_time": 0.024825096130371094, + "step": 22409 + }, + { + "epoch": 3.419342041015625e-05, + "step": 22409, + "training_step_time": 0.11868810653686523 + }, + { + "epoch": 3.41949462890625e-05, + "grad_norm": 0.10397058725357056, + "learning_rate": 1.6502531098801753e-05, + "loss": 0.0186, + "step": 22410 + }, + { + "epoch": 3.41949462890625e-05, + "model_forward_time": 0.025275468826293945, + "step": 22410 + }, + { + "epoch": 3.41949462890625e-05, + "step": 22410, + "training_step_time": 0.10603189468383789 + }, + { + "epoch": 3.419647216796875e-05, + "model_forward_time": 0.024883270263671875, + "step": 22411 + }, + { + "epoch": 3.419647216796875e-05, + "step": 22411, + "training_step_time": 0.10431623458862305 + }, + { + "epoch": 3.4197998046875e-05, + "model_forward_time": 0.02496027946472168, + "step": 22412 + }, + { + "epoch": 3.4197998046875e-05, + "step": 22412, + "training_step_time": 0.10762619972229004 + }, + { + "epoch": 3.419952392578125e-05, + "model_forward_time": 0.024773120880126953, + "step": 22413 + }, + { + "epoch": 3.419952392578125e-05, + "step": 22413, + "training_step_time": 0.1041421890258789 + }, + { + "epoch": 3.42010498046875e-05, + "model_forward_time": 0.027117490768432617, + "step": 22414 + }, + { + "epoch": 3.42010498046875e-05, + "step": 22414, + "training_step_time": 0.10729074478149414 + }, + { + "epoch": 3.420257568359375e-05, + "model_forward_time": 0.024976491928100586, + "step": 22415 + }, + { + "epoch": 3.420257568359375e-05, + "step": 22415, + "training_step_time": 0.10467028617858887 + }, + { + "epoch": 3.42041015625e-05, + "model_forward_time": 0.02530527114868164, + "step": 22416 + }, + { + "epoch": 3.42041015625e-05, + "step": 22416, + "training_step_time": 0.13192224502563477 + }, + { + "epoch": 3.420562744140625e-05, + "model_forward_time": 0.024147987365722656, + "step": 22417 + }, + { + "epoch": 3.420562744140625e-05, + "step": 22417, + "training_step_time": 0.14472723007202148 + }, + { + "epoch": 3.42071533203125e-05, + "model_forward_time": 0.023494720458984375, + "step": 22418 + }, + { + "epoch": 3.42071533203125e-05, + "step": 22418, + "training_step_time": 0.13454699516296387 + }, + { + "epoch": 3.420867919921875e-05, + "model_forward_time": 0.023492097854614258, + "step": 22419 + }, + { + "epoch": 3.420867919921875e-05, + "step": 22419, + "training_step_time": 0.21616315841674805 + }, + { + "epoch": 3.4210205078125e-05, + "grad_norm": 0.276035875082016, + "learning_rate": 1.646163321812974e-05, + "loss": 0.0079, + "step": 22420 + }, + { + "epoch": 3.4210205078125e-05, + "model_forward_time": 0.023929595947265625, + "step": 22420 + }, + { + "epoch": 3.4210205078125e-05, + "step": 22420, + "training_step_time": 0.14091014862060547 + }, + { + "epoch": 3.421173095703125e-05, + "model_forward_time": 0.02567601203918457, + "step": 22421 + }, + { + "epoch": 3.421173095703125e-05, + "step": 22421, + "training_step_time": 0.11544156074523926 + }, + { + "epoch": 3.42132568359375e-05, + "model_forward_time": 0.02512836456298828, + "step": 22422 + }, + { + "epoch": 3.42132568359375e-05, + "step": 22422, + "training_step_time": 0.12088990211486816 + }, + { + "epoch": 3.421478271484375e-05, + "model_forward_time": 0.0251615047454834, + "step": 22423 + }, + { + "epoch": 3.421478271484375e-05, + "step": 22423, + "training_step_time": 0.111785888671875 + }, + { + "epoch": 3.421630859375e-05, + "model_forward_time": 0.025393009185791016, + "step": 22424 + }, + { + "epoch": 3.421630859375e-05, + "step": 22424, + "training_step_time": 0.10827946662902832 + }, + { + "epoch": 3.421783447265625e-05, + "model_forward_time": 0.024881362915039062, + "step": 22425 + }, + { + "epoch": 3.421783447265625e-05, + "step": 22425, + "training_step_time": 0.10597825050354004 + }, + { + "epoch": 3.42193603515625e-05, + "model_forward_time": 0.025358915328979492, + "step": 22426 + }, + { + "epoch": 3.42193603515625e-05, + "step": 22426, + "training_step_time": 0.10537934303283691 + }, + { + "epoch": 3.422088623046875e-05, + "model_forward_time": 0.0249631404876709, + "step": 22427 + }, + { + "epoch": 3.422088623046875e-05, + "step": 22427, + "training_step_time": 0.11023807525634766 + }, + { + "epoch": 3.4222412109375e-05, + "model_forward_time": 0.02502751350402832, + "step": 22428 + }, + { + "epoch": 3.4222412109375e-05, + "step": 22428, + "training_step_time": 0.10494303703308105 + }, + { + "epoch": 3.422393798828125e-05, + "model_forward_time": 0.02562713623046875, + "step": 22429 + }, + { + "epoch": 3.422393798828125e-05, + "step": 22429, + "training_step_time": 0.11208915710449219 + }, + { + "epoch": 3.42254638671875e-05, + "grad_norm": 0.3813806474208832, + "learning_rate": 1.6420776089735827e-05, + "loss": 0.0051, + "step": 22430 + }, + { + "epoch": 3.42254638671875e-05, + "model_forward_time": 0.02512073516845703, + "step": 22430 + }, + { + "epoch": 3.42254638671875e-05, + "step": 22430, + "training_step_time": 0.11412310600280762 + }, + { + "epoch": 3.422698974609375e-05, + "model_forward_time": 0.024928569793701172, + "step": 22431 + }, + { + "epoch": 3.422698974609375e-05, + "step": 22431, + "training_step_time": 0.10455942153930664 + }, + { + "epoch": 3.4228515625e-05, + "model_forward_time": 0.025096416473388672, + "step": 22432 + }, + { + "epoch": 3.4228515625e-05, + "step": 22432, + "training_step_time": 0.10565042495727539 + }, + { + "epoch": 3.423004150390625e-05, + "model_forward_time": 0.025542736053466797, + "step": 22433 + }, + { + "epoch": 3.423004150390625e-05, + "step": 22433, + "training_step_time": 0.10500717163085938 + }, + { + "epoch": 3.42315673828125e-05, + "model_forward_time": 0.0256502628326416, + "step": 22434 + }, + { + "epoch": 3.42315673828125e-05, + "step": 22434, + "training_step_time": 0.10578346252441406 + }, + { + "epoch": 3.423309326171875e-05, + "model_forward_time": 0.025389671325683594, + "step": 22435 + }, + { + "epoch": 3.423309326171875e-05, + "step": 22435, + "training_step_time": 0.10383963584899902 + }, + { + "epoch": 3.4234619140625e-05, + "model_forward_time": 0.025395631790161133, + "step": 22436 + }, + { + "epoch": 3.4234619140625e-05, + "step": 22436, + "training_step_time": 0.11177444458007812 + }, + { + "epoch": 3.423614501953125e-05, + "model_forward_time": 0.02590489387512207, + "step": 22437 + }, + { + "epoch": 3.423614501953125e-05, + "step": 22437, + "training_step_time": 0.10516667366027832 + }, + { + "epoch": 3.42376708984375e-05, + "model_forward_time": 0.024885177612304688, + "step": 22438 + }, + { + "epoch": 3.42376708984375e-05, + "step": 22438, + "training_step_time": 0.15067505836486816 + }, + { + "epoch": 3.423919677734375e-05, + "model_forward_time": 0.02453756332397461, + "step": 22439 + }, + { + "epoch": 3.423919677734375e-05, + "step": 22439, + "training_step_time": 0.21863007545471191 + }, + { + "epoch": 3.424072265625e-05, + "grad_norm": 0.1491369605064392, + "learning_rate": 1.637995976326527e-05, + "loss": 0.0054, + "step": 22440 + }, + { + "epoch": 3.424072265625e-05, + "model_forward_time": 0.02414107322692871, + "step": 22440 + }, + { + "epoch": 3.424072265625e-05, + "step": 22440, + "training_step_time": 0.1854255199432373 + }, + { + "epoch": 3.424224853515625e-05, + "model_forward_time": 0.024416685104370117, + "step": 22441 + }, + { + "epoch": 3.424224853515625e-05, + "step": 22441, + "training_step_time": 0.1526956558227539 + }, + { + "epoch": 3.42437744140625e-05, + "model_forward_time": 0.02459430694580078, + "step": 22442 + }, + { + "epoch": 3.42437744140625e-05, + "step": 22442, + "training_step_time": 0.10424470901489258 + }, + { + "epoch": 3.424530029296875e-05, + "model_forward_time": 0.027668476104736328, + "step": 22443 + }, + { + "epoch": 3.424530029296875e-05, + "step": 22443, + "training_step_time": 0.19573497772216797 + }, + { + "epoch": 3.4246826171875e-05, + "model_forward_time": 0.02541494369506836, + "step": 22444 + }, + { + "epoch": 3.4246826171875e-05, + "step": 22444, + "training_step_time": 0.10525894165039062 + }, + { + "epoch": 3.424835205078125e-05, + "model_forward_time": 0.02418828010559082, + "step": 22445 + }, + { + "epoch": 3.424835205078125e-05, + "step": 22445, + "training_step_time": 0.10398197174072266 + }, + { + "epoch": 3.42498779296875e-05, + "model_forward_time": 0.025159597396850586, + "step": 22446 + }, + { + "epoch": 3.42498779296875e-05, + "step": 22446, + "training_step_time": 0.15280818939208984 + }, + { + "epoch": 3.425140380859375e-05, + "model_forward_time": 0.024649858474731445, + "step": 22447 + }, + { + "epoch": 3.425140380859375e-05, + "step": 22447, + "training_step_time": 0.18122506141662598 + }, + { + "epoch": 3.42529296875e-05, + "model_forward_time": 0.023980140686035156, + "step": 22448 + }, + { + "epoch": 3.42529296875e-05, + "step": 22448, + "training_step_time": 0.20926713943481445 + }, + { + "epoch": 3.425445556640625e-05, + "model_forward_time": 0.027878522872924805, + "step": 22449 + }, + { + "epoch": 3.425445556640625e-05, + "step": 22449, + "training_step_time": 0.1589667797088623 + }, + { + "epoch": 3.42559814453125e-05, + "grad_norm": 0.31565800309181213, + "learning_rate": 1.633918428831377e-05, + "loss": 0.0089, + "step": 22450 + }, + { + "epoch": 3.42559814453125e-05, + "model_forward_time": 0.023316144943237305, + "step": 22450 + }, + { + "epoch": 3.42559814453125e-05, + "step": 22450, + "training_step_time": 0.17870235443115234 + }, + { + "epoch": 3.425750732421875e-05, + "model_forward_time": 0.02413201332092285, + "step": 22451 + }, + { + "epoch": 3.425750732421875e-05, + "step": 22451, + "training_step_time": 0.1248466968536377 + }, + { + "epoch": 3.4259033203125e-05, + "model_forward_time": 0.02429342269897461, + "step": 22452 + }, + { + "epoch": 3.4259033203125e-05, + "step": 22452, + "training_step_time": 0.11592984199523926 + }, + { + "epoch": 3.426055908203125e-05, + "model_forward_time": 0.025311946868896484, + "step": 22453 + }, + { + "epoch": 3.426055908203125e-05, + "step": 22453, + "training_step_time": 0.11795568466186523 + }, + { + "epoch": 3.42620849609375e-05, + "model_forward_time": 0.025185585021972656, + "step": 22454 + }, + { + "epoch": 3.42620849609375e-05, + "step": 22454, + "training_step_time": 0.10916757583618164 + }, + { + "epoch": 3.426361083984375e-05, + "model_forward_time": 0.024597644805908203, + "step": 22455 + }, + { + "epoch": 3.426361083984375e-05, + "step": 22455, + "training_step_time": 0.10643172264099121 + }, + { + "epoch": 3.426513671875e-05, + "model_forward_time": 0.026105642318725586, + "step": 22456 + }, + { + "epoch": 3.426513671875e-05, + "step": 22456, + "training_step_time": 0.1096343994140625 + }, + { + "epoch": 3.426666259765625e-05, + "model_forward_time": 0.025262832641601562, + "step": 22457 + }, + { + "epoch": 3.426666259765625e-05, + "step": 22457, + "training_step_time": 0.10553240776062012 + }, + { + "epoch": 3.42681884765625e-05, + "model_forward_time": 0.025252342224121094, + "step": 22458 + }, + { + "epoch": 3.42681884765625e-05, + "step": 22458, + "training_step_time": 0.10580205917358398 + }, + { + "epoch": 3.426971435546875e-05, + "model_forward_time": 0.02510356903076172, + "step": 22459 + }, + { + "epoch": 3.426971435546875e-05, + "step": 22459, + "training_step_time": 0.10562634468078613 + }, + { + "epoch": 3.4271240234375e-05, + "grad_norm": 0.38021135330200195, + "learning_rate": 1.6298449714427355e-05, + "loss": 0.0081, + "step": 22460 + }, + { + "epoch": 3.4271240234375e-05, + "model_forward_time": 0.02519512176513672, + "step": 22460 + }, + { + "epoch": 3.4271240234375e-05, + "step": 22460, + "training_step_time": 0.10873627662658691 + }, + { + "epoch": 3.427276611328125e-05, + "model_forward_time": 0.024911880493164062, + "step": 22461 + }, + { + "epoch": 3.427276611328125e-05, + "step": 22461, + "training_step_time": 0.15426111221313477 + }, + { + "epoch": 3.42742919921875e-05, + "model_forward_time": 0.024502992630004883, + "step": 22462 + }, + { + "epoch": 3.42742919921875e-05, + "step": 22462, + "training_step_time": 0.154313325881958 + }, + { + "epoch": 3.427581787109375e-05, + "model_forward_time": 0.024194717407226562, + "step": 22463 + }, + { + "epoch": 3.427581787109375e-05, + "step": 22463, + "training_step_time": 0.10732030868530273 + }, + { + "epoch": 3.427734375e-05, + "model_forward_time": 0.02803325653076172, + "step": 22464 + }, + { + "epoch": 3.427734375e-05, + "step": 22464, + "training_step_time": 0.14589881896972656 + }, + { + "epoch": 3.427886962890625e-05, + "model_forward_time": 0.024923086166381836, + "step": 22465 + }, + { + "epoch": 3.427886962890625e-05, + "step": 22465, + "training_step_time": 0.1748206615447998 + }, + { + "epoch": 3.42803955078125e-05, + "model_forward_time": 0.024135112762451172, + "step": 22466 + }, + { + "epoch": 3.42803955078125e-05, + "step": 22466, + "training_step_time": 0.1683053970336914 + }, + { + "epoch": 3.428192138671875e-05, + "model_forward_time": 0.0239715576171875, + "step": 22467 + }, + { + "epoch": 3.428192138671875e-05, + "step": 22467, + "training_step_time": 0.10049223899841309 + }, + { + "epoch": 3.4283447265625e-05, + "model_forward_time": 0.024181842803955078, + "step": 22468 + }, + { + "epoch": 3.4283447265625e-05, + "step": 22468, + "training_step_time": 0.10140013694763184 + }, + { + "epoch": 3.428497314453125e-05, + "model_forward_time": 0.02485942840576172, + "step": 22469 + }, + { + "epoch": 3.428497314453125e-05, + "step": 22469, + "training_step_time": 0.1057901382446289 + }, + { + "epoch": 3.42864990234375e-05, + "grad_norm": 0.16420020163059235, + "learning_rate": 1.62577560911024e-05, + "loss": 0.008, + "step": 22470 + }, + { + "epoch": 3.42864990234375e-05, + "model_forward_time": 0.025231122970581055, + "step": 22470 + }, + { + "epoch": 3.42864990234375e-05, + "step": 22470, + "training_step_time": 0.10350680351257324 + }, + { + "epoch": 3.428802490234375e-05, + "model_forward_time": 0.025012969970703125, + "step": 22471 + }, + { + "epoch": 3.428802490234375e-05, + "step": 22471, + "training_step_time": 0.10654830932617188 + }, + { + "epoch": 3.428955078125e-05, + "model_forward_time": 0.025200366973876953, + "step": 22472 + }, + { + "epoch": 3.428955078125e-05, + "step": 22472, + "training_step_time": 0.10405397415161133 + }, + { + "epoch": 3.429107666015625e-05, + "model_forward_time": 0.024828672409057617, + "step": 22473 + }, + { + "epoch": 3.429107666015625e-05, + "step": 22473, + "training_step_time": 0.10475754737854004 + }, + { + "epoch": 3.42926025390625e-05, + "model_forward_time": 0.024593353271484375, + "step": 22474 + }, + { + "epoch": 3.42926025390625e-05, + "step": 22474, + "training_step_time": 0.10272073745727539 + }, + { + "epoch": 3.429412841796875e-05, + "model_forward_time": 0.025143146514892578, + "step": 22475 + }, + { + "epoch": 3.429412841796875e-05, + "step": 22475, + "training_step_time": 0.10408949851989746 + }, + { + "epoch": 3.4295654296875e-05, + "model_forward_time": 0.02501535415649414, + "step": 22476 + }, + { + "epoch": 3.4295654296875e-05, + "step": 22476, + "training_step_time": 0.10512495040893555 + }, + { + "epoch": 3.429718017578125e-05, + "model_forward_time": 0.024796485900878906, + "step": 22477 + }, + { + "epoch": 3.429718017578125e-05, + "step": 22477, + "training_step_time": 0.10399150848388672 + }, + { + "epoch": 3.42987060546875e-05, + "model_forward_time": 0.02478957176208496, + "step": 22478 + }, + { + "epoch": 3.42987060546875e-05, + "step": 22478, + "training_step_time": 0.10568785667419434 + }, + { + "epoch": 3.430023193359375e-05, + "model_forward_time": 0.024737834930419922, + "step": 22479 + }, + { + "epoch": 3.430023193359375e-05, + "step": 22479, + "training_step_time": 0.1050727367401123 + }, + { + "epoch": 3.43017578125e-05, + "grad_norm": 0.4871219992637634, + "learning_rate": 1.6217103467785484e-05, + "loss": 0.0077, + "step": 22480 + }, + { + "epoch": 3.43017578125e-05, + "model_forward_time": 0.02924799919128418, + "step": 22480 + }, + { + "epoch": 3.43017578125e-05, + "step": 22480, + "training_step_time": 0.11047172546386719 + }, + { + "epoch": 3.430328369140625e-05, + "model_forward_time": 0.02570343017578125, + "step": 22481 + }, + { + "epoch": 3.430328369140625e-05, + "step": 22481, + "training_step_time": 0.11348581314086914 + }, + { + "epoch": 3.43048095703125e-05, + "model_forward_time": 0.024979352951049805, + "step": 22482 + }, + { + "epoch": 3.43048095703125e-05, + "step": 22482, + "training_step_time": 0.13289809226989746 + }, + { + "epoch": 3.430633544921875e-05, + "model_forward_time": 0.024374961853027344, + "step": 22483 + }, + { + "epoch": 3.430633544921875e-05, + "step": 22483, + "training_step_time": 0.17105555534362793 + }, + { + "epoch": 3.4307861328125e-05, + "model_forward_time": 0.02447962760925293, + "step": 22484 + }, + { + "epoch": 3.4307861328125e-05, + "step": 22484, + "training_step_time": 0.2177739143371582 + }, + { + "epoch": 3.430938720703125e-05, + "model_forward_time": 0.024221420288085938, + "step": 22485 + }, + { + "epoch": 3.430938720703125e-05, + "step": 22485, + "training_step_time": 0.2430107593536377 + }, + { + "epoch": 3.43109130859375e-05, + "model_forward_time": 0.024776458740234375, + "step": 22486 + }, + { + "epoch": 3.43109130859375e-05, + "step": 22486, + "training_step_time": 0.2290935516357422 + }, + { + "epoch": 3.431243896484375e-05, + "model_forward_time": 0.02419757843017578, + "step": 22487 + }, + { + "epoch": 3.431243896484375e-05, + "step": 22487, + "training_step_time": 0.1952812671661377 + }, + { + "epoch": 3.431396484375e-05, + "model_forward_time": 0.024350881576538086, + "step": 22488 + }, + { + "epoch": 3.431396484375e-05, + "step": 22488, + "training_step_time": 0.17877912521362305 + }, + { + "epoch": 3.431549072265625e-05, + "model_forward_time": 0.026100635528564453, + "step": 22489 + }, + { + "epoch": 3.431549072265625e-05, + "step": 22489, + "training_step_time": 0.17620015144348145 + }, + { + "epoch": 3.43170166015625e-05, + "grad_norm": 0.41838160157203674, + "learning_rate": 1.617649189387337e-05, + "loss": 0.0065, + "step": 22490 + }, + { + "epoch": 3.43170166015625e-05, + "model_forward_time": 0.02673935890197754, + "step": 22490 + }, + { + "epoch": 3.43170166015625e-05, + "step": 22490, + "training_step_time": 0.1646428108215332 + }, + { + "epoch": 3.431854248046875e-05, + "model_forward_time": 0.027776718139648438, + "step": 22491 + }, + { + "epoch": 3.431854248046875e-05, + "step": 22491, + "training_step_time": 0.19440221786499023 + }, + { + "epoch": 3.4320068359375e-05, + "model_forward_time": 0.02987527847290039, + "step": 22492 + }, + { + "epoch": 3.4320068359375e-05, + "step": 22492, + "training_step_time": 0.2585277557373047 + }, + { + "epoch": 3.432159423828125e-05, + "model_forward_time": 0.027833938598632812, + "step": 22493 + }, + { + "epoch": 3.432159423828125e-05, + "step": 22493, + "training_step_time": 0.3719778060913086 + }, + { + "epoch": 3.43231201171875e-05, + "model_forward_time": 0.031086444854736328, + "step": 22494 + }, + { + "epoch": 3.43231201171875e-05, + "step": 22494, + "training_step_time": 0.2861180305480957 + }, + { + "epoch": 3.432464599609375e-05, + "model_forward_time": 0.0289461612701416, + "step": 22495 + }, + { + "epoch": 3.432464599609375e-05, + "step": 22495, + "training_step_time": 0.3135216236114502 + }, + { + "epoch": 3.4326171875e-05, + "model_forward_time": 0.031242847442626953, + "step": 22496 + }, + { + "epoch": 3.4326171875e-05, + "step": 22496, + "training_step_time": 0.2987210750579834 + }, + { + "epoch": 3.432769775390625e-05, + "model_forward_time": 0.030518770217895508, + "step": 22497 + }, + { + "epoch": 3.432769775390625e-05, + "step": 22497, + "training_step_time": 0.3424403667449951 + }, + { + "epoch": 3.43292236328125e-05, + "model_forward_time": 0.030771732330322266, + "step": 22498 + }, + { + "epoch": 3.43292236328125e-05, + "step": 22498, + "training_step_time": 0.3419816493988037 + }, + { + "epoch": 3.433074951171875e-05, + "model_forward_time": 0.034188032150268555, + "step": 22499 + }, + { + "epoch": 3.433074951171875e-05, + "step": 22499, + "training_step_time": 0.33868932723999023 + }, + { + "epoch": 3.4332275390625e-05, + "grad_norm": 0.1720426231622696, + "learning_rate": 1.6135921418712956e-05, + "loss": 0.0051, + "step": 22500 + }, + { + "epoch": 3.4332275390625e-05, + "model_forward_time": 0.029853343963623047, + "step": 22500 + }, + { + "epoch": 3.4332275390625e-05, + "step": 22500, + "training_step_time": 0.20296096801757812 + }, + { + "epoch": 3.433380126953125e-05, + "model_forward_time": 0.030093908309936523, + "step": 22501 + }, + { + "epoch": 3.433380126953125e-05, + "step": 22501, + "training_step_time": 0.27199268341064453 + }, + { + "epoch": 3.43353271484375e-05, + "model_forward_time": 0.030057668685913086, + "step": 22502 + }, + { + "epoch": 3.43353271484375e-05, + "step": 22502, + "training_step_time": 0.2082653045654297 + }, + { + "epoch": 3.433685302734375e-05, + "model_forward_time": 0.0322413444519043, + "step": 22503 + }, + { + "epoch": 3.433685302734375e-05, + "step": 22503, + "training_step_time": 0.17333030700683594 + }, + { + "epoch": 3.433837890625e-05, + "model_forward_time": 0.03046393394470215, + "step": 22504 + }, + { + "epoch": 3.433837890625e-05, + "step": 22504, + "training_step_time": 0.17946076393127441 + }, + { + "epoch": 3.433990478515625e-05, + "model_forward_time": 0.033746957778930664, + "step": 22505 + }, + { + "epoch": 3.433990478515625e-05, + "step": 22505, + "training_step_time": 0.1766035556793213 + }, + { + "epoch": 3.43414306640625e-05, + "model_forward_time": 0.02998948097229004, + "step": 22506 + }, + { + "epoch": 3.43414306640625e-05, + "step": 22506, + "training_step_time": 0.12865447998046875 + }, + { + "epoch": 3.434295654296875e-05, + "model_forward_time": 0.028602123260498047, + "step": 22507 + }, + { + "epoch": 3.434295654296875e-05, + "step": 22507, + "training_step_time": 0.1841588020324707 + }, + { + "epoch": 3.4344482421875e-05, + "model_forward_time": 0.02686476707458496, + "step": 22508 + }, + { + "epoch": 3.4344482421875e-05, + "step": 22508, + "training_step_time": 0.12012243270874023 + }, + { + "epoch": 3.434600830078125e-05, + "model_forward_time": 0.026732444763183594, + "step": 22509 + }, + { + "epoch": 3.434600830078125e-05, + "step": 22509, + "training_step_time": 0.20315885543823242 + }, + { + "epoch": 3.43475341796875e-05, + "grad_norm": 0.08240101486444473, + "learning_rate": 1.6095392091601175e-05, + "loss": 0.0032, + "step": 22510 + }, + { + "epoch": 3.43475341796875e-05, + "model_forward_time": 0.025716781616210938, + "step": 22510 + }, + { + "epoch": 3.43475341796875e-05, + "step": 22510, + "training_step_time": 0.14647555351257324 + }, + { + "epoch": 3.434906005859375e-05, + "model_forward_time": 0.02506089210510254, + "step": 22511 + }, + { + "epoch": 3.434906005859375e-05, + "step": 22511, + "training_step_time": 0.2005445957183838 + }, + { + "epoch": 3.43505859375e-05, + "model_forward_time": 0.023823022842407227, + "step": 22512 + }, + { + "epoch": 3.43505859375e-05, + "step": 22512, + "training_step_time": 0.11275124549865723 + }, + { + "epoch": 3.435211181640625e-05, + "model_forward_time": 0.02328014373779297, + "step": 22513 + }, + { + "epoch": 3.435211181640625e-05, + "step": 22513, + "training_step_time": 0.12714266777038574 + }, + { + "epoch": 3.43536376953125e-05, + "model_forward_time": 0.024764299392700195, + "step": 22514 + }, + { + "epoch": 3.43536376953125e-05, + "step": 22514, + "training_step_time": 0.1269221305847168 + }, + { + "epoch": 3.435516357421875e-05, + "model_forward_time": 0.02464151382446289, + "step": 22515 + }, + { + "epoch": 3.435516357421875e-05, + "step": 22515, + "training_step_time": 0.12345552444458008 + }, + { + "epoch": 3.4356689453125e-05, + "model_forward_time": 0.025038957595825195, + "step": 22516 + }, + { + "epoch": 3.4356689453125e-05, + "step": 22516, + "training_step_time": 0.11983561515808105 + }, + { + "epoch": 3.435821533203125e-05, + "model_forward_time": 0.024979591369628906, + "step": 22517 + }, + { + "epoch": 3.435821533203125e-05, + "step": 22517, + "training_step_time": 0.11545276641845703 + }, + { + "epoch": 3.43597412109375e-05, + "model_forward_time": 0.02479720115661621, + "step": 22518 + }, + { + "epoch": 3.43597412109375e-05, + "step": 22518, + "training_step_time": 0.11314558982849121 + }, + { + "epoch": 3.436126708984375e-05, + "model_forward_time": 0.025104284286499023, + "step": 22519 + }, + { + "epoch": 3.436126708984375e-05, + "step": 22519, + "training_step_time": 0.11342072486877441 + }, + { + "epoch": 3.436279296875e-05, + "grad_norm": 0.09012985974550247, + "learning_rate": 1.6054903961785013e-05, + "loss": 0.0099, + "step": 22520 + }, + { + "epoch": 3.436279296875e-05, + "model_forward_time": 0.0248565673828125, + "step": 22520 + }, + { + "epoch": 3.436279296875e-05, + "step": 22520, + "training_step_time": 0.12018513679504395 + }, + { + "epoch": 3.436431884765625e-05, + "model_forward_time": 0.024838924407958984, + "step": 22521 + }, + { + "epoch": 3.436431884765625e-05, + "step": 22521, + "training_step_time": 0.23431944847106934 + }, + { + "epoch": 3.43658447265625e-05, + "model_forward_time": 0.024327754974365234, + "step": 22522 + }, + { + "epoch": 3.43658447265625e-05, + "step": 22522, + "training_step_time": 0.10803937911987305 + }, + { + "epoch": 3.436737060546875e-05, + "model_forward_time": 0.024684667587280273, + "step": 22523 + }, + { + "epoch": 3.436737060546875e-05, + "step": 22523, + "training_step_time": 0.12930750846862793 + }, + { + "epoch": 3.4368896484375e-05, + "model_forward_time": 0.02517104148864746, + "step": 22524 + }, + { + "epoch": 3.4368896484375e-05, + "step": 22524, + "training_step_time": 0.11920332908630371 + }, + { + "epoch": 3.437042236328125e-05, + "model_forward_time": 0.024694442749023438, + "step": 22525 + }, + { + "epoch": 3.437042236328125e-05, + "step": 22525, + "training_step_time": 0.11605215072631836 + }, + { + "epoch": 3.43719482421875e-05, + "model_forward_time": 0.024904966354370117, + "step": 22526 + }, + { + "epoch": 3.43719482421875e-05, + "step": 22526, + "training_step_time": 0.14576005935668945 + }, + { + "epoch": 3.437347412109375e-05, + "model_forward_time": 0.024609804153442383, + "step": 22527 + }, + { + "epoch": 3.437347412109375e-05, + "step": 22527, + "training_step_time": 0.10378193855285645 + }, + { + "epoch": 3.4375e-05, + "model_forward_time": 0.02496051788330078, + "step": 22528 + }, + { + "epoch": 3.4375e-05, + "step": 22528, + "training_step_time": 0.10738110542297363 + }, + { + "epoch": 3.437652587890625e-05, + "model_forward_time": 0.025020360946655273, + "step": 22529 + }, + { + "epoch": 3.437652587890625e-05, + "step": 22529, + "training_step_time": 0.10481619834899902 + }, + { + "epoch": 3.43780517578125e-05, + "grad_norm": 0.2845122218132019, + "learning_rate": 1.6014457078461353e-05, + "loss": 0.0111, + "step": 22530 + }, + { + "epoch": 3.43780517578125e-05, + "model_forward_time": 0.024991750717163086, + "step": 22530 + }, + { + "epoch": 3.43780517578125e-05, + "step": 22530, + "training_step_time": 0.10554981231689453 + }, + { + "epoch": 3.437957763671875e-05, + "model_forward_time": 0.025631189346313477, + "step": 22531 + }, + { + "epoch": 3.437957763671875e-05, + "step": 22531, + "training_step_time": 0.10950636863708496 + }, + { + "epoch": 3.4381103515625e-05, + "model_forward_time": 0.025404930114746094, + "step": 22532 + }, + { + "epoch": 3.4381103515625e-05, + "step": 22532, + "training_step_time": 0.10576534271240234 + }, + { + "epoch": 3.438262939453125e-05, + "model_forward_time": 0.026144027709960938, + "step": 22533 + }, + { + "epoch": 3.438262939453125e-05, + "step": 22533, + "training_step_time": 0.1797025203704834 + }, + { + "epoch": 3.43841552734375e-05, + "model_forward_time": 0.025594234466552734, + "step": 22534 + }, + { + "epoch": 3.43841552734375e-05, + "step": 22534, + "training_step_time": 0.11345505714416504 + }, + { + "epoch": 3.438568115234375e-05, + "model_forward_time": 0.024806976318359375, + "step": 22535 + }, + { + "epoch": 3.438568115234375e-05, + "step": 22535, + "training_step_time": 0.1372518539428711 + }, + { + "epoch": 3.438720703125e-05, + "model_forward_time": 0.02469778060913086, + "step": 22536 + }, + { + "epoch": 3.438720703125e-05, + "step": 22536, + "training_step_time": 0.156052827835083 + }, + { + "epoch": 3.438873291015625e-05, + "model_forward_time": 0.02478194236755371, + "step": 22537 + }, + { + "epoch": 3.438873291015625e-05, + "step": 22537, + "training_step_time": 0.10473942756652832 + }, + { + "epoch": 3.43902587890625e-05, + "model_forward_time": 0.024973630905151367, + "step": 22538 + }, + { + "epoch": 3.43902587890625e-05, + "step": 22538, + "training_step_time": 0.11530303955078125 + }, + { + "epoch": 3.439178466796875e-05, + "model_forward_time": 0.02603745460510254, + "step": 22539 + }, + { + "epoch": 3.439178466796875e-05, + "step": 22539, + "training_step_time": 0.10576319694519043 + }, + { + "epoch": 3.4393310546875e-05, + "grad_norm": 0.1552228033542633, + "learning_rate": 1.597405149077697e-05, + "loss": 0.0033, + "step": 22540 + }, + { + "epoch": 3.4393310546875e-05, + "model_forward_time": 0.024698257446289062, + "step": 22540 + }, + { + "epoch": 3.4393310546875e-05, + "step": 22540, + "training_step_time": 0.10421323776245117 + }, + { + "epoch": 3.439483642578125e-05, + "model_forward_time": 0.024886608123779297, + "step": 22541 + }, + { + "epoch": 3.439483642578125e-05, + "step": 22541, + "training_step_time": 0.10962653160095215 + }, + { + "epoch": 3.43963623046875e-05, + "model_forward_time": 0.02501535415649414, + "step": 22542 + }, + { + "epoch": 3.43963623046875e-05, + "step": 22542, + "training_step_time": 0.11786580085754395 + }, + { + "epoch": 3.439788818359375e-05, + "model_forward_time": 0.024881601333618164, + "step": 22543 + }, + { + "epoch": 3.439788818359375e-05, + "step": 22543, + "training_step_time": 0.11621427536010742 + }, + { + "epoch": 3.43994140625e-05, + "model_forward_time": 0.025180578231811523, + "step": 22544 + }, + { + "epoch": 3.43994140625e-05, + "step": 22544, + "training_step_time": 0.11540102958679199 + }, + { + "epoch": 3.440093994140625e-05, + "model_forward_time": 0.02502751350402832, + "step": 22545 + }, + { + "epoch": 3.440093994140625e-05, + "step": 22545, + "training_step_time": 0.10991787910461426 + }, + { + "epoch": 3.44024658203125e-05, + "model_forward_time": 0.025009632110595703, + "step": 22546 + }, + { + "epoch": 3.44024658203125e-05, + "step": 22546, + "training_step_time": 0.11142349243164062 + }, + { + "epoch": 3.440399169921875e-05, + "model_forward_time": 0.02531123161315918, + "step": 22547 + }, + { + "epoch": 3.440399169921875e-05, + "step": 22547, + "training_step_time": 0.10947823524475098 + }, + { + "epoch": 3.4405517578125e-05, + "model_forward_time": 0.02483391761779785, + "step": 22548 + }, + { + "epoch": 3.4405517578125e-05, + "step": 22548, + "training_step_time": 0.1094655990600586 + }, + { + "epoch": 3.440704345703125e-05, + "model_forward_time": 0.025200605392456055, + "step": 22549 + }, + { + "epoch": 3.440704345703125e-05, + "step": 22549, + "training_step_time": 0.10857582092285156 + }, + { + "epoch": 3.44085693359375e-05, + "grad_norm": 0.07073287665843964, + "learning_rate": 1.593368724782846e-05, + "loss": 0.0123, + "step": 22550 + }, + { + "epoch": 3.44085693359375e-05, + "model_forward_time": 0.02495121955871582, + "step": 22550 + }, + { + "epoch": 3.44085693359375e-05, + "step": 22550, + "training_step_time": 0.10446429252624512 + }, + { + "epoch": 3.441009521484375e-05, + "model_forward_time": 0.025267601013183594, + "step": 22551 + }, + { + "epoch": 3.441009521484375e-05, + "step": 22551, + "training_step_time": 0.10826683044433594 + }, + { + "epoch": 3.441162109375e-05, + "model_forward_time": 0.025191068649291992, + "step": 22552 + }, + { + "epoch": 3.441162109375e-05, + "step": 22552, + "training_step_time": 0.1363527774810791 + }, + { + "epoch": 3.441314697265625e-05, + "model_forward_time": 0.024929523468017578, + "step": 22553 + }, + { + "epoch": 3.441314697265625e-05, + "step": 22553, + "training_step_time": 0.14258790016174316 + }, + { + "epoch": 3.44146728515625e-05, + "model_forward_time": 0.024688005447387695, + "step": 22554 + }, + { + "epoch": 3.44146728515625e-05, + "step": 22554, + "training_step_time": 0.13992524147033691 + }, + { + "epoch": 3.441619873046875e-05, + "model_forward_time": 0.024988412857055664, + "step": 22555 + }, + { + "epoch": 3.441619873046875e-05, + "step": 22555, + "training_step_time": 0.1925981044769287 + }, + { + "epoch": 3.4417724609375e-05, + "model_forward_time": 0.024658203125, + "step": 22556 + }, + { + "epoch": 3.4417724609375e-05, + "step": 22556, + "training_step_time": 0.14413022994995117 + }, + { + "epoch": 3.441925048828125e-05, + "model_forward_time": 0.024521350860595703, + "step": 22557 + }, + { + "epoch": 3.441925048828125e-05, + "step": 22557, + "training_step_time": 0.15850448608398438 + }, + { + "epoch": 3.44207763671875e-05, + "model_forward_time": 0.024738788604736328, + "step": 22558 + }, + { + "epoch": 3.44207763671875e-05, + "step": 22558, + "training_step_time": 0.10285162925720215 + }, + { + "epoch": 3.442230224609375e-05, + "model_forward_time": 0.02480292320251465, + "step": 22559 + }, + { + "epoch": 3.442230224609375e-05, + "step": 22559, + "training_step_time": 0.10341787338256836 + }, + { + "epoch": 3.4423828125e-05, + "grad_norm": 0.2642311155796051, + "learning_rate": 1.5893364398662176e-05, + "loss": 0.0182, + "step": 22560 + }, + { + "epoch": 3.4423828125e-05, + "model_forward_time": 0.025300025939941406, + "step": 22560 + }, + { + "epoch": 3.4423828125e-05, + "step": 22560, + "training_step_time": 0.11081051826477051 + }, + { + "epoch": 3.442535400390625e-05, + "model_forward_time": 0.025388002395629883, + "step": 22561 + }, + { + "epoch": 3.442535400390625e-05, + "step": 22561, + "training_step_time": 0.10899734497070312 + }, + { + "epoch": 3.44268798828125e-05, + "model_forward_time": 0.025207042694091797, + "step": 22562 + }, + { + "epoch": 3.44268798828125e-05, + "step": 22562, + "training_step_time": 0.10673236846923828 + }, + { + "epoch": 3.442840576171875e-05, + "model_forward_time": 0.025087833404541016, + "step": 22563 + }, + { + "epoch": 3.442840576171875e-05, + "step": 22563, + "training_step_time": 0.10633063316345215 + }, + { + "epoch": 3.4429931640625e-05, + "model_forward_time": 0.024871110916137695, + "step": 22564 + }, + { + "epoch": 3.4429931640625e-05, + "step": 22564, + "training_step_time": 0.10430598258972168 + }, + { + "epoch": 3.443145751953125e-05, + "model_forward_time": 0.025195837020874023, + "step": 22565 + }, + { + "epoch": 3.443145751953125e-05, + "step": 22565, + "training_step_time": 0.10355305671691895 + }, + { + "epoch": 3.44329833984375e-05, + "model_forward_time": 0.02477860450744629, + "step": 22566 + }, + { + "epoch": 3.44329833984375e-05, + "step": 22566, + "training_step_time": 0.10409164428710938 + }, + { + "epoch": 3.443450927734375e-05, + "model_forward_time": 0.02439284324645996, + "step": 22567 + }, + { + "epoch": 3.443450927734375e-05, + "step": 22567, + "training_step_time": 0.13987946510314941 + }, + { + "epoch": 3.443603515625e-05, + "model_forward_time": 0.02516341209411621, + "step": 22568 + }, + { + "epoch": 3.443603515625e-05, + "step": 22568, + "training_step_time": 0.11322283744812012 + }, + { + "epoch": 3.443756103515625e-05, + "model_forward_time": 0.024591922760009766, + "step": 22569 + }, + { + "epoch": 3.443756103515625e-05, + "step": 22569, + "training_step_time": 0.14094328880310059 + }, + { + "epoch": 3.44390869140625e-05, + "grad_norm": 0.17577330768108368, + "learning_rate": 1.5853082992274205e-05, + "loss": 0.0049, + "step": 22570 + }, + { + "epoch": 3.44390869140625e-05, + "model_forward_time": 0.02508997917175293, + "step": 22570 + }, + { + "epoch": 3.44390869140625e-05, + "step": 22570, + "training_step_time": 0.21321320533752441 + }, + { + "epoch": 3.444061279296875e-05, + "model_forward_time": 0.025599241256713867, + "step": 22571 + }, + { + "epoch": 3.444061279296875e-05, + "step": 22571, + "training_step_time": 0.11498737335205078 + }, + { + "epoch": 3.4442138671875e-05, + "model_forward_time": 0.024283170700073242, + "step": 22572 + }, + { + "epoch": 3.4442138671875e-05, + "step": 22572, + "training_step_time": 0.11481904983520508 + }, + { + "epoch": 3.444366455078125e-05, + "model_forward_time": 0.025135040283203125, + "step": 22573 + }, + { + "epoch": 3.444366455078125e-05, + "step": 22573, + "training_step_time": 0.14130783081054688 + }, + { + "epoch": 3.44451904296875e-05, + "model_forward_time": 0.024860143661499023, + "step": 22574 + }, + { + "epoch": 3.44451904296875e-05, + "step": 22574, + "training_step_time": 0.10347294807434082 + }, + { + "epoch": 3.444671630859375e-05, + "model_forward_time": 0.025289297103881836, + "step": 22575 + }, + { + "epoch": 3.444671630859375e-05, + "step": 22575, + "training_step_time": 0.1062467098236084 + }, + { + "epoch": 3.44482421875e-05, + "model_forward_time": 0.025598764419555664, + "step": 22576 + }, + { + "epoch": 3.44482421875e-05, + "step": 22576, + "training_step_time": 0.10679435729980469 + }, + { + "epoch": 3.444976806640625e-05, + "model_forward_time": 0.02544403076171875, + "step": 22577 + }, + { + "epoch": 3.444976806640625e-05, + "step": 22577, + "training_step_time": 0.10471463203430176 + }, + { + "epoch": 3.44512939453125e-05, + "model_forward_time": 0.024773597717285156, + "step": 22578 + }, + { + "epoch": 3.44512939453125e-05, + "step": 22578, + "training_step_time": 0.10529422760009766 + }, + { + "epoch": 3.445281982421875e-05, + "model_forward_time": 0.024950742721557617, + "step": 22579 + }, + { + "epoch": 3.445281982421875e-05, + "step": 22579, + "training_step_time": 0.13483572006225586 + }, + { + "epoch": 3.4454345703125e-05, + "grad_norm": 0.1434876024723053, + "learning_rate": 1.581284307761024e-05, + "loss": 0.0052, + "step": 22580 + }, + { + "epoch": 3.4454345703125e-05, + "model_forward_time": 0.025471210479736328, + "step": 22580 + }, + { + "epoch": 3.4454345703125e-05, + "step": 22580, + "training_step_time": 0.18225598335266113 + }, + { + "epoch": 3.445587158203125e-05, + "model_forward_time": 0.02443528175354004, + "step": 22581 + }, + { + "epoch": 3.445587158203125e-05, + "step": 22581, + "training_step_time": 0.12352919578552246 + }, + { + "epoch": 3.44573974609375e-05, + "model_forward_time": 0.024270296096801758, + "step": 22582 + }, + { + "epoch": 3.44573974609375e-05, + "step": 22582, + "training_step_time": 0.11872267723083496 + }, + { + "epoch": 3.445892333984375e-05, + "model_forward_time": 0.024873018264770508, + "step": 22583 + }, + { + "epoch": 3.445892333984375e-05, + "step": 22583, + "training_step_time": 0.20750904083251953 + }, + { + "epoch": 3.446044921875e-05, + "model_forward_time": 0.023919105529785156, + "step": 22584 + }, + { + "epoch": 3.446044921875e-05, + "step": 22584, + "training_step_time": 0.1170196533203125 + }, + { + "epoch": 3.446197509765625e-05, + "model_forward_time": 0.024164199829101562, + "step": 22585 + }, + { + "epoch": 3.446197509765625e-05, + "step": 22585, + "training_step_time": 0.11087894439697266 + }, + { + "epoch": 3.44635009765625e-05, + "model_forward_time": 0.025058984756469727, + "step": 22586 + }, + { + "epoch": 3.44635009765625e-05, + "step": 22586, + "training_step_time": 0.10880374908447266 + }, + { + "epoch": 3.446502685546875e-05, + "model_forward_time": 0.02532172203063965, + "step": 22587 + }, + { + "epoch": 3.446502685546875e-05, + "step": 22587, + "training_step_time": 0.10690116882324219 + }, + { + "epoch": 3.4466552734375e-05, + "model_forward_time": 0.025583505630493164, + "step": 22588 + }, + { + "epoch": 3.4466552734375e-05, + "step": 22588, + "training_step_time": 0.1074683666229248 + }, + { + "epoch": 3.446807861328125e-05, + "model_forward_time": 0.025719642639160156, + "step": 22589 + }, + { + "epoch": 3.446807861328125e-05, + "step": 22589, + "training_step_time": 0.11057281494140625 + }, + { + "epoch": 3.44696044921875e-05, + "grad_norm": 0.167452871799469, + "learning_rate": 1.5772644703565565e-05, + "loss": 0.0041, + "step": 22590 + }, + { + "epoch": 3.44696044921875e-05, + "model_forward_time": 0.02565145492553711, + "step": 22590 + }, + { + "epoch": 3.44696044921875e-05, + "step": 22590, + "training_step_time": 0.11219048500061035 + }, + { + "epoch": 3.447113037109375e-05, + "model_forward_time": 0.025362491607666016, + "step": 22591 + }, + { + "epoch": 3.447113037109375e-05, + "step": 22591, + "training_step_time": 0.11288070678710938 + }, + { + "epoch": 3.447265625e-05, + "model_forward_time": 0.025502681732177734, + "step": 22592 + }, + { + "epoch": 3.447265625e-05, + "step": 22592, + "training_step_time": 0.1077110767364502 + }, + { + "epoch": 3.447418212890625e-05, + "model_forward_time": 0.025272130966186523, + "step": 22593 + }, + { + "epoch": 3.447418212890625e-05, + "step": 22593, + "training_step_time": 0.1120448112487793 + }, + { + "epoch": 3.44757080078125e-05, + "model_forward_time": 0.0257110595703125, + "step": 22594 + }, + { + "epoch": 3.44757080078125e-05, + "step": 22594, + "training_step_time": 0.10993242263793945 + }, + { + "epoch": 3.447723388671875e-05, + "model_forward_time": 0.025301456451416016, + "step": 22595 + }, + { + "epoch": 3.447723388671875e-05, + "step": 22595, + "training_step_time": 0.10610342025756836 + }, + { + "epoch": 3.4478759765625e-05, + "model_forward_time": 0.025497913360595703, + "step": 22596 + }, + { + "epoch": 3.4478759765625e-05, + "step": 22596, + "training_step_time": 0.10670733451843262 + }, + { + "epoch": 3.448028564453125e-05, + "model_forward_time": 0.025922060012817383, + "step": 22597 + }, + { + "epoch": 3.448028564453125e-05, + "step": 22597, + "training_step_time": 0.10722804069519043 + }, + { + "epoch": 3.44818115234375e-05, + "model_forward_time": 0.024599552154541016, + "step": 22598 + }, + { + "epoch": 3.44818115234375e-05, + "step": 22598, + "training_step_time": 0.15507769584655762 + }, + { + "epoch": 3.448333740234375e-05, + "model_forward_time": 0.0252077579498291, + "step": 22599 + }, + { + "epoch": 3.448333740234375e-05, + "step": 22599, + "training_step_time": 0.15497732162475586 + }, + { + "epoch": 3.448486328125e-05, + "grad_norm": 0.16000565886497498, + "learning_rate": 1.5732487918985018e-05, + "loss": 0.0047, + "step": 22600 + }, + { + "epoch": 3.448486328125e-05, + "model_forward_time": 0.02480936050415039, + "step": 22600 + }, + { + "epoch": 3.448486328125e-05, + "step": 22600, + "training_step_time": 0.1786642074584961 + }, + { + "epoch": 3.448638916015625e-05, + "model_forward_time": 0.025177478790283203, + "step": 22601 + }, + { + "epoch": 3.448638916015625e-05, + "step": 22601, + "training_step_time": 0.15608811378479004 + }, + { + "epoch": 3.44879150390625e-05, + "model_forward_time": 0.02507948875427246, + "step": 22602 + }, + { + "epoch": 3.44879150390625e-05, + "step": 22602, + "training_step_time": 0.14945292472839355 + }, + { + "epoch": 3.448944091796875e-05, + "model_forward_time": 0.02465987205505371, + "step": 22603 + }, + { + "epoch": 3.448944091796875e-05, + "step": 22603, + "training_step_time": 0.10299515724182129 + }, + { + "epoch": 3.4490966796875e-05, + "model_forward_time": 0.02558159828186035, + "step": 22604 + }, + { + "epoch": 3.4490966796875e-05, + "step": 22604, + "training_step_time": 0.10664725303649902 + }, + { + "epoch": 3.449249267578125e-05, + "model_forward_time": 0.025170564651489258, + "step": 22605 + }, + { + "epoch": 3.449249267578125e-05, + "step": 22605, + "training_step_time": 0.10531949996948242 + }, + { + "epoch": 3.44940185546875e-05, + "model_forward_time": 0.02552938461303711, + "step": 22606 + }, + { + "epoch": 3.44940185546875e-05, + "step": 22606, + "training_step_time": 0.10657715797424316 + }, + { + "epoch": 3.449554443359375e-05, + "model_forward_time": 0.02535390853881836, + "step": 22607 + }, + { + "epoch": 3.449554443359375e-05, + "step": 22607, + "training_step_time": 0.10749149322509766 + }, + { + "epoch": 3.44970703125e-05, + "model_forward_time": 0.025127410888671875, + "step": 22608 + }, + { + "epoch": 3.44970703125e-05, + "step": 22608, + "training_step_time": 0.18455839157104492 + }, + { + "epoch": 3.449859619140625e-05, + "model_forward_time": 0.023488759994506836, + "step": 22609 + }, + { + "epoch": 3.449859619140625e-05, + "step": 22609, + "training_step_time": 0.21075034141540527 + }, + { + "epoch": 3.45001220703125e-05, + "grad_norm": 0.27791628241539, + "learning_rate": 1.569237277266286e-05, + "loss": 0.0041, + "step": 22610 + }, + { + "epoch": 3.45001220703125e-05, + "model_forward_time": 0.023622989654541016, + "step": 22610 + }, + { + "epoch": 3.45001220703125e-05, + "step": 22610, + "training_step_time": 0.19536995887756348 + }, + { + "epoch": 3.450164794921875e-05, + "model_forward_time": 0.023660659790039062, + "step": 22611 + }, + { + "epoch": 3.450164794921875e-05, + "step": 22611, + "training_step_time": 0.19393348693847656 + }, + { + "epoch": 3.4503173828125e-05, + "model_forward_time": 0.023743152618408203, + "step": 22612 + }, + { + "epoch": 3.4503173828125e-05, + "step": 22612, + "training_step_time": 0.18079209327697754 + }, + { + "epoch": 3.450469970703125e-05, + "model_forward_time": 0.024435758590698242, + "step": 22613 + }, + { + "epoch": 3.450469970703125e-05, + "step": 22613, + "training_step_time": 0.2048492431640625 + }, + { + "epoch": 3.45062255859375e-05, + "model_forward_time": 0.02657032012939453, + "step": 22614 + }, + { + "epoch": 3.45062255859375e-05, + "step": 22614, + "training_step_time": 0.1594257354736328 + }, + { + "epoch": 3.450775146484375e-05, + "model_forward_time": 0.024350404739379883, + "step": 22615 + }, + { + "epoch": 3.450775146484375e-05, + "step": 22615, + "training_step_time": 0.16452717781066895 + }, + { + "epoch": 3.450927734375e-05, + "model_forward_time": 0.024178743362426758, + "step": 22616 + }, + { + "epoch": 3.450927734375e-05, + "step": 22616, + "training_step_time": 0.10221052169799805 + }, + { + "epoch": 3.451080322265625e-05, + "model_forward_time": 0.02451634407043457, + "step": 22617 + }, + { + "epoch": 3.451080322265625e-05, + "step": 22617, + "training_step_time": 0.10526561737060547 + }, + { + "epoch": 3.45123291015625e-05, + "model_forward_time": 0.026382923126220703, + "step": 22618 + }, + { + "epoch": 3.45123291015625e-05, + "step": 22618, + "training_step_time": 0.10652875900268555 + }, + { + "epoch": 3.451385498046875e-05, + "model_forward_time": 0.025501012802124023, + "step": 22619 + }, + { + "epoch": 3.451385498046875e-05, + "step": 22619, + "training_step_time": 0.10457086563110352 + }, + { + "epoch": 3.4515380859375e-05, + "grad_norm": 0.12451514601707458, + "learning_rate": 1.5652299313342773e-05, + "loss": 0.0057, + "step": 22620 + }, + { + "epoch": 3.4515380859375e-05, + "model_forward_time": 0.025288105010986328, + "step": 22620 + }, + { + "epoch": 3.4515380859375e-05, + "step": 22620, + "training_step_time": 0.10478425025939941 + }, + { + "epoch": 3.451690673828125e-05, + "model_forward_time": 0.02518630027770996, + "step": 22621 + }, + { + "epoch": 3.451690673828125e-05, + "step": 22621, + "training_step_time": 0.10658121109008789 + }, + { + "epoch": 3.45184326171875e-05, + "model_forward_time": 0.02536487579345703, + "step": 22622 + }, + { + "epoch": 3.45184326171875e-05, + "step": 22622, + "training_step_time": 0.10521841049194336 + }, + { + "epoch": 3.451995849609375e-05, + "model_forward_time": 0.025150775909423828, + "step": 22623 + }, + { + "epoch": 3.451995849609375e-05, + "step": 22623, + "training_step_time": 0.10679292678833008 + }, + { + "epoch": 3.4521484375e-05, + "model_forward_time": 0.025905132293701172, + "step": 22624 + }, + { + "epoch": 3.4521484375e-05, + "step": 22624, + "training_step_time": 0.2043473720550537 + }, + { + "epoch": 3.452301025390625e-05, + "model_forward_time": 0.024646997451782227, + "step": 22625 + }, + { + "epoch": 3.452301025390625e-05, + "step": 22625, + "training_step_time": 0.10651206970214844 + }, + { + "epoch": 3.45245361328125e-05, + "model_forward_time": 0.02467942237854004, + "step": 22626 + }, + { + "epoch": 3.45245361328125e-05, + "step": 22626, + "training_step_time": 0.17296290397644043 + }, + { + "epoch": 3.452606201171875e-05, + "model_forward_time": 0.024392366409301758, + "step": 22627 + }, + { + "epoch": 3.452606201171875e-05, + "step": 22627, + "training_step_time": 0.15138888359069824 + }, + { + "epoch": 3.4527587890625e-05, + "model_forward_time": 0.02470850944519043, + "step": 22628 + }, + { + "epoch": 3.4527587890625e-05, + "step": 22628, + "training_step_time": 0.10401511192321777 + }, + { + "epoch": 3.452911376953125e-05, + "model_forward_time": 0.024695873260498047, + "step": 22629 + }, + { + "epoch": 3.452911376953125e-05, + "step": 22629, + "training_step_time": 0.10762476921081543 + }, + { + "epoch": 3.45306396484375e-05, + "grad_norm": 0.15308569371700287, + "learning_rate": 1.5612267589717805e-05, + "loss": 0.0051, + "step": 22630 + }, + { + "epoch": 3.45306396484375e-05, + "model_forward_time": 0.025216341018676758, + "step": 22630 + }, + { + "epoch": 3.45306396484375e-05, + "step": 22630, + "training_step_time": 0.10547304153442383 + }, + { + "epoch": 3.453216552734375e-05, + "model_forward_time": 0.025307893753051758, + "step": 22631 + }, + { + "epoch": 3.453216552734375e-05, + "step": 22631, + "training_step_time": 0.10705280303955078 + }, + { + "epoch": 3.453369140625e-05, + "model_forward_time": 0.024931907653808594, + "step": 22632 + }, + { + "epoch": 3.453369140625e-05, + "step": 22632, + "training_step_time": 0.10941815376281738 + }, + { + "epoch": 3.453521728515625e-05, + "model_forward_time": 0.024862289428710938, + "step": 22633 + }, + { + "epoch": 3.453521728515625e-05, + "step": 22633, + "training_step_time": 0.10802412033081055 + }, + { + "epoch": 3.45367431640625e-05, + "model_forward_time": 0.024967432022094727, + "step": 22634 + }, + { + "epoch": 3.45367431640625e-05, + "step": 22634, + "training_step_time": 0.10782623291015625 + }, + { + "epoch": 3.453826904296875e-05, + "model_forward_time": 0.025029659271240234, + "step": 22635 + }, + { + "epoch": 3.453826904296875e-05, + "step": 22635, + "training_step_time": 0.1077737808227539 + }, + { + "epoch": 3.4539794921875e-05, + "model_forward_time": 0.02444171905517578, + "step": 22636 + }, + { + "epoch": 3.4539794921875e-05, + "step": 22636, + "training_step_time": 0.10875582695007324 + }, + { + "epoch": 3.454132080078125e-05, + "model_forward_time": 0.02518606185913086, + "step": 22637 + }, + { + "epoch": 3.454132080078125e-05, + "step": 22637, + "training_step_time": 0.11926770210266113 + }, + { + "epoch": 3.45428466796875e-05, + "model_forward_time": 0.024074554443359375, + "step": 22638 + }, + { + "epoch": 3.45428466796875e-05, + "step": 22638, + "training_step_time": 0.12267231941223145 + }, + { + "epoch": 3.454437255859375e-05, + "model_forward_time": 0.023903369903564453, + "step": 22639 + }, + { + "epoch": 3.454437255859375e-05, + "step": 22639, + "training_step_time": 0.1225888729095459 + }, + { + "epoch": 3.45458984375e-05, + "grad_norm": 0.09998887032270432, + "learning_rate": 1.557227765043027e-05, + "loss": 0.013, + "step": 22640 + }, + { + "epoch": 3.45458984375e-05, + "model_forward_time": 0.024883747100830078, + "step": 22640 + }, + { + "epoch": 3.45458984375e-05, + "step": 22640, + "training_step_time": 0.17571377754211426 + }, + { + "epoch": 3.454742431640625e-05, + "model_forward_time": 0.024603605270385742, + "step": 22641 + }, + { + "epoch": 3.454742431640625e-05, + "step": 22641, + "training_step_time": 0.16199898719787598 + }, + { + "epoch": 3.45489501953125e-05, + "model_forward_time": 0.024004220962524414, + "step": 22642 + }, + { + "epoch": 3.45489501953125e-05, + "step": 22642, + "training_step_time": 0.1923356056213379 + }, + { + "epoch": 3.455047607421875e-05, + "model_forward_time": 0.02419281005859375, + "step": 22643 + }, + { + "epoch": 3.455047607421875e-05, + "step": 22643, + "training_step_time": 0.16680335998535156 + }, + { + "epoch": 3.4552001953125e-05, + "model_forward_time": 0.023984909057617188, + "step": 22644 + }, + { + "epoch": 3.4552001953125e-05, + "step": 22644, + "training_step_time": 0.1653447151184082 + }, + { + "epoch": 3.455352783203125e-05, + "model_forward_time": 0.023545503616333008, + "step": 22645 + }, + { + "epoch": 3.455352783203125e-05, + "step": 22645, + "training_step_time": 0.10384798049926758 + }, + { + "epoch": 3.45550537109375e-05, + "model_forward_time": 0.024919509887695312, + "step": 22646 + }, + { + "epoch": 3.45550537109375e-05, + "step": 22646, + "training_step_time": 0.10674858093261719 + }, + { + "epoch": 3.455657958984375e-05, + "model_forward_time": 0.02539825439453125, + "step": 22647 + }, + { + "epoch": 3.455657958984375e-05, + "step": 22647, + "training_step_time": 0.10599875450134277 + }, + { + "epoch": 3.455810546875e-05, + "model_forward_time": 0.024866580963134766, + "step": 22648 + }, + { + "epoch": 3.455810546875e-05, + "step": 22648, + "training_step_time": 0.1106269359588623 + }, + { + "epoch": 3.455963134765625e-05, + "model_forward_time": 0.02497076988220215, + "step": 22649 + }, + { + "epoch": 3.455963134765625e-05, + "step": 22649, + "training_step_time": 0.11391568183898926 + }, + { + "epoch": 3.45611572265625e-05, + "grad_norm": 0.36277642846107483, + "learning_rate": 1.553232954407171e-05, + "loss": 0.0094, + "step": 22650 + }, + { + "epoch": 3.45611572265625e-05, + "model_forward_time": 0.02516913414001465, + "step": 22650 + }, + { + "epoch": 3.45611572265625e-05, + "step": 22650, + "training_step_time": 0.10614895820617676 + }, + { + "epoch": 3.456268310546875e-05, + "model_forward_time": 0.024924278259277344, + "step": 22651 + }, + { + "epoch": 3.456268310546875e-05, + "step": 22651, + "training_step_time": 0.10732841491699219 + }, + { + "epoch": 3.4564208984375e-05, + "model_forward_time": 0.02500605583190918, + "step": 22652 + }, + { + "epoch": 3.4564208984375e-05, + "step": 22652, + "training_step_time": 0.10872411727905273 + }, + { + "epoch": 3.456573486328125e-05, + "model_forward_time": 0.024770259857177734, + "step": 22653 + }, + { + "epoch": 3.456573486328125e-05, + "step": 22653, + "training_step_time": 0.10824036598205566 + }, + { + "epoch": 3.45672607421875e-05, + "model_forward_time": 0.024863719940185547, + "step": 22654 + }, + { + "epoch": 3.45672607421875e-05, + "step": 22654, + "training_step_time": 0.10634136199951172 + }, + { + "epoch": 3.456878662109375e-05, + "model_forward_time": 0.025053739547729492, + "step": 22655 + }, + { + "epoch": 3.456878662109375e-05, + "step": 22655, + "training_step_time": 0.16725754737854004 + }, + { + "epoch": 3.45703125e-05, + "model_forward_time": 0.02429938316345215, + "step": 22656 + }, + { + "epoch": 3.45703125e-05, + "step": 22656, + "training_step_time": 0.11075115203857422 + }, + { + "epoch": 3.457183837890625e-05, + "model_forward_time": 0.024128198623657227, + "step": 22657 + }, + { + "epoch": 3.457183837890625e-05, + "step": 22657, + "training_step_time": 0.12139439582824707 + }, + { + "epoch": 3.45733642578125e-05, + "model_forward_time": 0.025157451629638672, + "step": 22658 + }, + { + "epoch": 3.45733642578125e-05, + "step": 22658, + "training_step_time": 0.13670086860656738 + }, + { + "epoch": 3.457489013671875e-05, + "model_forward_time": 0.024887800216674805, + "step": 22659 + }, + { + "epoch": 3.457489013671875e-05, + "step": 22659, + "training_step_time": 0.13590121269226074 + }, + { + "epoch": 3.4576416015625e-05, + "grad_norm": 0.27834823727607727, + "learning_rate": 1.549242331918285e-05, + "loss": 0.0069, + "step": 22660 + }, + { + "epoch": 3.4576416015625e-05, + "model_forward_time": 0.024457216262817383, + "step": 22660 + }, + { + "epoch": 3.4576416015625e-05, + "step": 22660, + "training_step_time": 0.11397528648376465 + }, + { + "epoch": 3.457794189453125e-05, + "model_forward_time": 0.02589130401611328, + "step": 22661 + }, + { + "epoch": 3.457794189453125e-05, + "step": 22661, + "training_step_time": 0.12010002136230469 + }, + { + "epoch": 3.45794677734375e-05, + "model_forward_time": 0.024899721145629883, + "step": 22662 + }, + { + "epoch": 3.45794677734375e-05, + "step": 22662, + "training_step_time": 0.1097261905670166 + }, + { + "epoch": 3.458099365234375e-05, + "model_forward_time": 0.024814844131469727, + "step": 22663 + }, + { + "epoch": 3.458099365234375e-05, + "step": 22663, + "training_step_time": 0.10960960388183594 + }, + { + "epoch": 3.458251953125e-05, + "model_forward_time": 0.02419567108154297, + "step": 22664 + }, + { + "epoch": 3.458251953125e-05, + "step": 22664, + "training_step_time": 0.10557389259338379 + }, + { + "epoch": 3.458404541015625e-05, + "model_forward_time": 0.023843050003051758, + "step": 22665 + }, + { + "epoch": 3.458404541015625e-05, + "step": 22665, + "training_step_time": 0.10669684410095215 + }, + { + "epoch": 3.45855712890625e-05, + "model_forward_time": 0.02442169189453125, + "step": 22666 + }, + { + "epoch": 3.45855712890625e-05, + "step": 22666, + "training_step_time": 0.10704565048217773 + }, + { + "epoch": 3.458709716796875e-05, + "model_forward_time": 0.023878097534179688, + "step": 22667 + }, + { + "epoch": 3.458709716796875e-05, + "step": 22667, + "training_step_time": 0.1396334171295166 + }, + { + "epoch": 3.4588623046875e-05, + "model_forward_time": 0.024099349975585938, + "step": 22668 + }, + { + "epoch": 3.4588623046875e-05, + "step": 22668, + "training_step_time": 0.11092042922973633 + }, + { + "epoch": 3.459014892578125e-05, + "model_forward_time": 0.026486873626708984, + "step": 22669 + }, + { + "epoch": 3.459014892578125e-05, + "step": 22669, + "training_step_time": 0.21377038955688477 + }, + { + "epoch": 3.45916748046875e-05, + "grad_norm": 0.12981250882148743, + "learning_rate": 1.5452559024253487e-05, + "loss": 0.0045, + "step": 22670 + }, + { + "epoch": 3.45916748046875e-05, + "model_forward_time": 0.023784875869750977, + "step": 22670 + }, + { + "epoch": 3.45916748046875e-05, + "step": 22670, + "training_step_time": 0.10393810272216797 + }, + { + "epoch": 3.459320068359375e-05, + "model_forward_time": 0.02320241928100586, + "step": 22671 + }, + { + "epoch": 3.459320068359375e-05, + "step": 22671, + "training_step_time": 0.11812043190002441 + }, + { + "epoch": 3.45947265625e-05, + "model_forward_time": 0.024200916290283203, + "step": 22672 + }, + { + "epoch": 3.45947265625e-05, + "step": 22672, + "training_step_time": 0.11285090446472168 + }, + { + "epoch": 3.459625244140625e-05, + "model_forward_time": 0.024064302444458008, + "step": 22673 + }, + { + "epoch": 3.459625244140625e-05, + "step": 22673, + "training_step_time": 0.11010909080505371 + }, + { + "epoch": 3.45977783203125e-05, + "model_forward_time": 0.024003982543945312, + "step": 22674 + }, + { + "epoch": 3.45977783203125e-05, + "step": 22674, + "training_step_time": 0.10930681228637695 + }, + { + "epoch": 3.459930419921875e-05, + "model_forward_time": 0.024041414260864258, + "step": 22675 + }, + { + "epoch": 3.459930419921875e-05, + "step": 22675, + "training_step_time": 0.10636377334594727 + }, + { + "epoch": 3.4600830078125e-05, + "model_forward_time": 0.02397012710571289, + "step": 22676 + }, + { + "epoch": 3.4600830078125e-05, + "step": 22676, + "training_step_time": 0.10981583595275879 + }, + { + "epoch": 3.460235595703125e-05, + "model_forward_time": 0.024153947830200195, + "step": 22677 + }, + { + "epoch": 3.460235595703125e-05, + "step": 22677, + "training_step_time": 0.10694766044616699 + }, + { + "epoch": 3.46038818359375e-05, + "model_forward_time": 0.023944616317749023, + "step": 22678 + }, + { + "epoch": 3.46038818359375e-05, + "step": 22678, + "training_step_time": 0.10927891731262207 + }, + { + "epoch": 3.460540771484375e-05, + "model_forward_time": 0.024237632751464844, + "step": 22679 + }, + { + "epoch": 3.460540771484375e-05, + "step": 22679, + "training_step_time": 0.10911321640014648 + }, + { + "epoch": 3.460693359375e-05, + "grad_norm": 0.16855190694332123, + "learning_rate": 1.5412736707722537e-05, + "loss": 0.0066, + "step": 22680 + }, + { + "epoch": 3.460693359375e-05, + "model_forward_time": 0.024281024932861328, + "step": 22680 + }, + { + "epoch": 3.460693359375e-05, + "step": 22680, + "training_step_time": 0.10901141166687012 + }, + { + "epoch": 3.460845947265625e-05, + "model_forward_time": 0.023990869522094727, + "step": 22681 + }, + { + "epoch": 3.460845947265625e-05, + "step": 22681, + "training_step_time": 0.10925793647766113 + }, + { + "epoch": 3.46099853515625e-05, + "model_forward_time": 0.02411961555480957, + "step": 22682 + }, + { + "epoch": 3.46099853515625e-05, + "step": 22682, + "training_step_time": 0.10846257209777832 + }, + { + "epoch": 3.461151123046875e-05, + "model_forward_time": 0.024064302444458008, + "step": 22683 + }, + { + "epoch": 3.461151123046875e-05, + "step": 22683, + "training_step_time": 0.10765647888183594 + }, + { + "epoch": 3.4613037109375e-05, + "model_forward_time": 0.025329113006591797, + "step": 22684 + }, + { + "epoch": 3.4613037109375e-05, + "step": 22684, + "training_step_time": 0.11897540092468262 + }, + { + "epoch": 3.461456298828125e-05, + "model_forward_time": 0.023889780044555664, + "step": 22685 + }, + { + "epoch": 3.461456298828125e-05, + "step": 22685, + "training_step_time": 0.16896295547485352 + }, + { + "epoch": 3.46160888671875e-05, + "model_forward_time": 0.024741411209106445, + "step": 22686 + }, + { + "epoch": 3.46160888671875e-05, + "step": 22686, + "training_step_time": 0.11070466041564941 + }, + { + "epoch": 3.461761474609375e-05, + "model_forward_time": 0.023485898971557617, + "step": 22687 + }, + { + "epoch": 3.461761474609375e-05, + "step": 22687, + "training_step_time": 0.16466975212097168 + }, + { + "epoch": 3.4619140625e-05, + "model_forward_time": 0.02356719970703125, + "step": 22688 + }, + { + "epoch": 3.4619140625e-05, + "step": 22688, + "training_step_time": 0.1440896987915039 + }, + { + "epoch": 3.462066650390625e-05, + "model_forward_time": 0.023482799530029297, + "step": 22689 + }, + { + "epoch": 3.462066650390625e-05, + "step": 22689, + "training_step_time": 0.17079472541809082 + }, + { + "epoch": 3.46221923828125e-05, + "grad_norm": 0.13016711175441742, + "learning_rate": 1.537295641797785e-05, + "loss": 0.0047, + "step": 22690 + }, + { + "epoch": 3.46221923828125e-05, + "model_forward_time": 0.02337336540222168, + "step": 22690 + }, + { + "epoch": 3.46221923828125e-05, + "step": 22690, + "training_step_time": 0.15986394882202148 + }, + { + "epoch": 3.462371826171875e-05, + "model_forward_time": 0.023081541061401367, + "step": 22691 + }, + { + "epoch": 3.462371826171875e-05, + "step": 22691, + "training_step_time": 0.11078834533691406 + }, + { + "epoch": 3.4625244140625e-05, + "model_forward_time": 0.023950576782226562, + "step": 22692 + }, + { + "epoch": 3.4625244140625e-05, + "step": 22692, + "training_step_time": 0.11227250099182129 + }, + { + "epoch": 3.462677001953125e-05, + "model_forward_time": 0.024293899536132812, + "step": 22693 + }, + { + "epoch": 3.462677001953125e-05, + "step": 22693, + "training_step_time": 0.10295867919921875 + }, + { + "epoch": 3.46282958984375e-05, + "model_forward_time": 0.02438640594482422, + "step": 22694 + }, + { + "epoch": 3.46282958984375e-05, + "step": 22694, + "training_step_time": 0.10865664482116699 + }, + { + "epoch": 3.462982177734375e-05, + "model_forward_time": 0.023991107940673828, + "step": 22695 + }, + { + "epoch": 3.462982177734375e-05, + "step": 22695, + "training_step_time": 0.10357046127319336 + }, + { + "epoch": 3.463134765625e-05, + "model_forward_time": 0.023816585540771484, + "step": 22696 + }, + { + "epoch": 3.463134765625e-05, + "step": 22696, + "training_step_time": 0.10399127006530762 + }, + { + "epoch": 3.463287353515625e-05, + "model_forward_time": 0.025452136993408203, + "step": 22697 + }, + { + "epoch": 3.463287353515625e-05, + "step": 22697, + "training_step_time": 0.10942459106445312 + }, + { + "epoch": 3.46343994140625e-05, + "model_forward_time": 0.025284290313720703, + "step": 22698 + }, + { + "epoch": 3.46343994140625e-05, + "step": 22698, + "training_step_time": 0.1551656723022461 + }, + { + "epoch": 3.463592529296875e-05, + "model_forward_time": 0.02498340606689453, + "step": 22699 + }, + { + "epoch": 3.463592529296875e-05, + "step": 22699, + "training_step_time": 0.1642286777496338 + }, + { + "epoch": 3.4637451171875e-05, + "grad_norm": 0.22802887856960297, + "learning_rate": 1.5333218203356243e-05, + "loss": 0.0053, + "step": 22700 + }, + { + "epoch": 3.4637451171875e-05, + "model_forward_time": 0.02660226821899414, + "step": 22700 + }, + { + "epoch": 3.4637451171875e-05, + "step": 22700, + "training_step_time": 0.15730714797973633 + }, + { + "epoch": 3.463897705078125e-05, + "model_forward_time": 0.02332139015197754, + "step": 22701 + }, + { + "epoch": 3.463897705078125e-05, + "step": 22701, + "training_step_time": 0.17769217491149902 + }, + { + "epoch": 3.46405029296875e-05, + "model_forward_time": 0.02424478530883789, + "step": 22702 + }, + { + "epoch": 3.46405029296875e-05, + "step": 22702, + "training_step_time": 0.1382887363433838 + }, + { + "epoch": 3.464202880859375e-05, + "model_forward_time": 0.02326035499572754, + "step": 22703 + }, + { + "epoch": 3.464202880859375e-05, + "step": 22703, + "training_step_time": 0.1977241039276123 + }, + { + "epoch": 3.46435546875e-05, + "model_forward_time": 0.024146556854248047, + "step": 22704 + }, + { + "epoch": 3.46435546875e-05, + "step": 22704, + "training_step_time": 0.13750624656677246 + }, + { + "epoch": 3.464508056640625e-05, + "model_forward_time": 0.024793624877929688, + "step": 22705 + }, + { + "epoch": 3.464508056640625e-05, + "step": 22705, + "training_step_time": 0.1199800968170166 + }, + { + "epoch": 3.46466064453125e-05, + "model_forward_time": 0.024591445922851562, + "step": 22706 + }, + { + "epoch": 3.46466064453125e-05, + "step": 22706, + "training_step_time": 0.11993622779846191 + }, + { + "epoch": 3.464813232421875e-05, + "model_forward_time": 0.024833202362060547, + "step": 22707 + }, + { + "epoch": 3.464813232421875e-05, + "step": 22707, + "training_step_time": 0.11265420913696289 + }, + { + "epoch": 3.4649658203125e-05, + "model_forward_time": 0.02526068687438965, + "step": 22708 + }, + { + "epoch": 3.4649658203125e-05, + "step": 22708, + "training_step_time": 0.11073946952819824 + }, + { + "epoch": 3.465118408203125e-05, + "model_forward_time": 0.024870872497558594, + "step": 22709 + }, + { + "epoch": 3.465118408203125e-05, + "step": 22709, + "training_step_time": 0.1105196475982666 + }, + { + "epoch": 3.46527099609375e-05, + "grad_norm": 0.259838730096817, + "learning_rate": 1.5293522112143373e-05, + "loss": 0.0168, + "step": 22710 + }, + { + "epoch": 3.46527099609375e-05, + "model_forward_time": 0.024914026260375977, + "step": 22710 + }, + { + "epoch": 3.46527099609375e-05, + "step": 22710, + "training_step_time": 0.10899138450622559 + }, + { + "epoch": 3.465423583984375e-05, + "model_forward_time": 0.0247189998626709, + "step": 22711 + }, + { + "epoch": 3.465423583984375e-05, + "step": 22711, + "training_step_time": 0.17776775360107422 + }, + { + "epoch": 3.465576171875e-05, + "model_forward_time": 0.02428889274597168, + "step": 22712 + }, + { + "epoch": 3.465576171875e-05, + "step": 22712, + "training_step_time": 0.11378073692321777 + }, + { + "epoch": 3.465728759765625e-05, + "model_forward_time": 0.024118423461914062, + "step": 22713 + }, + { + "epoch": 3.465728759765625e-05, + "step": 22713, + "training_step_time": 0.21210861206054688 + }, + { + "epoch": 3.46588134765625e-05, + "model_forward_time": 0.02452230453491211, + "step": 22714 + }, + { + "epoch": 3.46588134765625e-05, + "step": 22714, + "training_step_time": 0.10902857780456543 + }, + { + "epoch": 3.466033935546875e-05, + "model_forward_time": 0.024440288543701172, + "step": 22715 + }, + { + "epoch": 3.466033935546875e-05, + "step": 22715, + "training_step_time": 0.1206057071685791 + }, + { + "epoch": 3.4661865234375e-05, + "model_forward_time": 0.025306224822998047, + "step": 22716 + }, + { + "epoch": 3.4661865234375e-05, + "step": 22716, + "training_step_time": 0.20368123054504395 + }, + { + "epoch": 3.466339111328125e-05, + "model_forward_time": 0.024439096450805664, + "step": 22717 + }, + { + "epoch": 3.466339111328125e-05, + "step": 22717, + "training_step_time": 0.1036231517791748 + }, + { + "epoch": 3.46649169921875e-05, + "model_forward_time": 0.024217844009399414, + "step": 22718 + }, + { + "epoch": 3.46649169921875e-05, + "step": 22718, + "training_step_time": 0.10343003273010254 + }, + { + "epoch": 3.466644287109375e-05, + "model_forward_time": 0.02548384666442871, + "step": 22719 + }, + { + "epoch": 3.466644287109375e-05, + "step": 22719, + "training_step_time": 0.11218547821044922 + }, + { + "epoch": 3.466796875e-05, + "grad_norm": 0.17345885932445526, + "learning_rate": 1.5253868192573729e-05, + "loss": 0.0036, + "step": 22720 + }, + { + "epoch": 3.466796875e-05, + "model_forward_time": 0.025122642517089844, + "step": 22720 + }, + { + "epoch": 3.466796875e-05, + "step": 22720, + "training_step_time": 0.11033177375793457 + }, + { + "epoch": 3.466949462890625e-05, + "model_forward_time": 0.025322914123535156, + "step": 22721 + }, + { + "epoch": 3.466949462890625e-05, + "step": 22721, + "training_step_time": 0.10634517669677734 + }, + { + "epoch": 3.46710205078125e-05, + "model_forward_time": 0.025482892990112305, + "step": 22722 + }, + { + "epoch": 3.46710205078125e-05, + "step": 22722, + "training_step_time": 0.10773372650146484 + }, + { + "epoch": 3.467254638671875e-05, + "model_forward_time": 0.025668859481811523, + "step": 22723 + }, + { + "epoch": 3.467254638671875e-05, + "step": 22723, + "training_step_time": 0.10601925849914551 + }, + { + "epoch": 3.4674072265625e-05, + "model_forward_time": 0.024875879287719727, + "step": 22724 + }, + { + "epoch": 3.4674072265625e-05, + "step": 22724, + "training_step_time": 0.10669136047363281 + }, + { + "epoch": 3.467559814453125e-05, + "model_forward_time": 0.024714946746826172, + "step": 22725 + }, + { + "epoch": 3.467559814453125e-05, + "step": 22725, + "training_step_time": 0.10521101951599121 + }, + { + "epoch": 3.46771240234375e-05, + "model_forward_time": 0.02600264549255371, + "step": 22726 + }, + { + "epoch": 3.46771240234375e-05, + "step": 22726, + "training_step_time": 0.1052401065826416 + }, + { + "epoch": 3.467864990234375e-05, + "model_forward_time": 0.025722026824951172, + "step": 22727 + }, + { + "epoch": 3.467864990234375e-05, + "step": 22727, + "training_step_time": 0.1058499813079834 + }, + { + "epoch": 3.468017578125e-05, + "model_forward_time": 0.025041580200195312, + "step": 22728 + }, + { + "epoch": 3.468017578125e-05, + "step": 22728, + "training_step_time": 0.16454577445983887 + }, + { + "epoch": 3.468170166015625e-05, + "model_forward_time": 0.024910688400268555, + "step": 22729 + }, + { + "epoch": 3.468170166015625e-05, + "step": 22729, + "training_step_time": 0.13653278350830078 + }, + { + "epoch": 3.46832275390625e-05, + "grad_norm": 0.2135002762079239, + "learning_rate": 1.5214256492830598e-05, + "loss": 0.0053, + "step": 22730 + }, + { + "epoch": 3.46832275390625e-05, + "model_forward_time": 0.0253143310546875, + "step": 22730 + }, + { + "epoch": 3.46832275390625e-05, + "step": 22730, + "training_step_time": 0.10994267463684082 + }, + { + "epoch": 3.468475341796875e-05, + "model_forward_time": 0.025034427642822266, + "step": 22731 + }, + { + "epoch": 3.468475341796875e-05, + "step": 22731, + "training_step_time": 0.1519179344177246 + }, + { + "epoch": 3.4686279296875e-05, + "model_forward_time": 0.024996042251586914, + "step": 22732 + }, + { + "epoch": 3.4686279296875e-05, + "step": 22732, + "training_step_time": 0.15223169326782227 + }, + { + "epoch": 3.468780517578125e-05, + "model_forward_time": 0.02487492561340332, + "step": 22733 + }, + { + "epoch": 3.468780517578125e-05, + "step": 22733, + "training_step_time": 0.1062781810760498 + }, + { + "epoch": 3.46893310546875e-05, + "model_forward_time": 0.025048494338989258, + "step": 22734 + }, + { + "epoch": 3.46893310546875e-05, + "step": 22734, + "training_step_time": 0.12725567817687988 + }, + { + "epoch": 3.469085693359375e-05, + "model_forward_time": 0.025586366653442383, + "step": 22735 + }, + { + "epoch": 3.469085693359375e-05, + "step": 22735, + "training_step_time": 0.11344647407531738 + }, + { + "epoch": 3.46923828125e-05, + "model_forward_time": 0.02557849884033203, + "step": 22736 + }, + { + "epoch": 3.46923828125e-05, + "step": 22736, + "training_step_time": 0.12065982818603516 + }, + { + "epoch": 3.469390869140625e-05, + "model_forward_time": 0.025480031967163086, + "step": 22737 + }, + { + "epoch": 3.469390869140625e-05, + "step": 22737, + "training_step_time": 0.10644078254699707 + }, + { + "epoch": 3.46954345703125e-05, + "model_forward_time": 0.025440692901611328, + "step": 22738 + }, + { + "epoch": 3.46954345703125e-05, + "step": 22738, + "training_step_time": 0.11100530624389648 + }, + { + "epoch": 3.469696044921875e-05, + "model_forward_time": 0.02528977394104004, + "step": 22739 + }, + { + "epoch": 3.469696044921875e-05, + "step": 22739, + "training_step_time": 0.10768246650695801 + }, + { + "epoch": 3.4698486328125e-05, + "grad_norm": 0.15731625258922577, + "learning_rate": 1.517468706104589e-05, + "loss": 0.0068, + "step": 22740 + }, + { + "epoch": 3.4698486328125e-05, + "model_forward_time": 0.02524256706237793, + "step": 22740 + }, + { + "epoch": 3.4698486328125e-05, + "step": 22740, + "training_step_time": 0.10785794258117676 + }, + { + "epoch": 3.470001220703125e-05, + "model_forward_time": 0.025379657745361328, + "step": 22741 + }, + { + "epoch": 3.470001220703125e-05, + "step": 22741, + "training_step_time": 0.10427021980285645 + }, + { + "epoch": 3.47015380859375e-05, + "model_forward_time": 0.025455474853515625, + "step": 22742 + }, + { + "epoch": 3.47015380859375e-05, + "step": 22742, + "training_step_time": 0.10503840446472168 + }, + { + "epoch": 3.470306396484375e-05, + "model_forward_time": 0.02528548240661621, + "step": 22743 + }, + { + "epoch": 3.470306396484375e-05, + "step": 22743, + "training_step_time": 0.11847162246704102 + }, + { + "epoch": 3.470458984375e-05, + "model_forward_time": 0.02529621124267578, + "step": 22744 + }, + { + "epoch": 3.470458984375e-05, + "step": 22744, + "training_step_time": 0.13333678245544434 + }, + { + "epoch": 3.470611572265625e-05, + "model_forward_time": 0.025431394577026367, + "step": 22745 + }, + { + "epoch": 3.470611572265625e-05, + "step": 22745, + "training_step_time": 0.1319727897644043 + }, + { + "epoch": 3.47076416015625e-05, + "model_forward_time": 0.025043487548828125, + "step": 22746 + }, + { + "epoch": 3.47076416015625e-05, + "step": 22746, + "training_step_time": 0.12191534042358398 + }, + { + "epoch": 3.470916748046875e-05, + "model_forward_time": 0.024849653244018555, + "step": 22747 + }, + { + "epoch": 3.470916748046875e-05, + "step": 22747, + "training_step_time": 0.12015867233276367 + }, + { + "epoch": 3.4710693359375e-05, + "model_forward_time": 0.025035619735717773, + "step": 22748 + }, + { + "epoch": 3.4710693359375e-05, + "step": 22748, + "training_step_time": 0.11684918403625488 + }, + { + "epoch": 3.471221923828125e-05, + "model_forward_time": 0.02492809295654297, + "step": 22749 + }, + { + "epoch": 3.471221923828125e-05, + "step": 22749, + "training_step_time": 0.21842622756958008 + }, + { + "epoch": 3.47137451171875e-05, + "grad_norm": 0.24600502848625183, + "learning_rate": 1.5135159945300231e-05, + "loss": 0.0115, + "step": 22750 + }, + { + "epoch": 3.47137451171875e-05, + "model_forward_time": 0.024547338485717773, + "step": 22750 + }, + { + "epoch": 3.47137451171875e-05, + "step": 22750, + "training_step_time": 0.12700390815734863 + }, + { + "epoch": 3.471527099609375e-05, + "model_forward_time": 0.024763107299804688, + "step": 22751 + }, + { + "epoch": 3.471527099609375e-05, + "step": 22751, + "training_step_time": 0.11107373237609863 + }, + { + "epoch": 3.4716796875e-05, + "model_forward_time": 0.0253298282623291, + "step": 22752 + }, + { + "epoch": 3.4716796875e-05, + "step": 22752, + "training_step_time": 0.11928462982177734 + }, + { + "epoch": 3.471832275390625e-05, + "model_forward_time": 0.02521347999572754, + "step": 22753 + }, + { + "epoch": 3.471832275390625e-05, + "step": 22753, + "training_step_time": 0.11527156829833984 + }, + { + "epoch": 3.47198486328125e-05, + "model_forward_time": 0.025943279266357422, + "step": 22754 + }, + { + "epoch": 3.47198486328125e-05, + "step": 22754, + "training_step_time": 0.10757660865783691 + }, + { + "epoch": 3.472137451171875e-05, + "model_forward_time": 0.02525782585144043, + "step": 22755 + }, + { + "epoch": 3.472137451171875e-05, + "step": 22755, + "training_step_time": 0.10715699195861816 + }, + { + "epoch": 3.4722900390625e-05, + "model_forward_time": 0.025412559509277344, + "step": 22756 + }, + { + "epoch": 3.4722900390625e-05, + "step": 22756, + "training_step_time": 0.1069040298461914 + }, + { + "epoch": 3.472442626953125e-05, + "model_forward_time": 0.025170326232910156, + "step": 22757 + }, + { + "epoch": 3.472442626953125e-05, + "step": 22757, + "training_step_time": 0.1368885040283203 + }, + { + "epoch": 3.47259521484375e-05, + "model_forward_time": 0.02606654167175293, + "step": 22758 + }, + { + "epoch": 3.47259521484375e-05, + "step": 22758, + "training_step_time": 0.10951089859008789 + }, + { + "epoch": 3.472747802734375e-05, + "model_forward_time": 0.025597810745239258, + "step": 22759 + }, + { + "epoch": 3.472747802734375e-05, + "step": 22759, + "training_step_time": 0.20629644393920898 + }, + { + "epoch": 3.472900390625e-05, + "grad_norm": 0.3221125304698944, + "learning_rate": 1.5095675193622777e-05, + "loss": 0.0117, + "step": 22760 + }, + { + "epoch": 3.472900390625e-05, + "model_forward_time": 0.024474143981933594, + "step": 22760 + }, + { + "epoch": 3.472900390625e-05, + "step": 22760, + "training_step_time": 0.10703825950622559 + }, + { + "epoch": 3.473052978515625e-05, + "model_forward_time": 0.024786710739135742, + "step": 22761 + }, + { + "epoch": 3.473052978515625e-05, + "step": 22761, + "training_step_time": 0.10791206359863281 + }, + { + "epoch": 3.47320556640625e-05, + "model_forward_time": 0.025203466415405273, + "step": 22762 + }, + { + "epoch": 3.47320556640625e-05, + "step": 22762, + "training_step_time": 0.10649824142456055 + }, + { + "epoch": 3.473358154296875e-05, + "model_forward_time": 0.0252535343170166, + "step": 22763 + }, + { + "epoch": 3.473358154296875e-05, + "step": 22763, + "training_step_time": 0.10558271408081055 + }, + { + "epoch": 3.4735107421875e-05, + "model_forward_time": 0.025072813034057617, + "step": 22764 + }, + { + "epoch": 3.4735107421875e-05, + "step": 22764, + "training_step_time": 0.10522699356079102 + }, + { + "epoch": 3.473663330078125e-05, + "model_forward_time": 0.025418758392333984, + "step": 22765 + }, + { + "epoch": 3.473663330078125e-05, + "step": 22765, + "training_step_time": 0.10479927062988281 + }, + { + "epoch": 3.47381591796875e-05, + "model_forward_time": 0.02541208267211914, + "step": 22766 + }, + { + "epoch": 3.47381591796875e-05, + "step": 22766, + "training_step_time": 0.10793066024780273 + }, + { + "epoch": 3.473968505859375e-05, + "model_forward_time": 0.02559661865234375, + "step": 22767 + }, + { + "epoch": 3.473968505859375e-05, + "step": 22767, + "training_step_time": 0.10595011711120605 + }, + { + "epoch": 3.47412109375e-05, + "model_forward_time": 0.02535843849182129, + "step": 22768 + }, + { + "epoch": 3.47412109375e-05, + "step": 22768, + "training_step_time": 0.10576105117797852 + }, + { + "epoch": 3.474273681640625e-05, + "model_forward_time": 0.025388717651367188, + "step": 22769 + }, + { + "epoch": 3.474273681640625e-05, + "step": 22769, + "training_step_time": 0.10571813583374023 + }, + { + "epoch": 3.47442626953125e-05, + "grad_norm": 0.14750151336193085, + "learning_rate": 1.5056232853991209e-05, + "loss": 0.0068, + "step": 22770 + }, + { + "epoch": 3.47442626953125e-05, + "model_forward_time": 0.0250701904296875, + "step": 22770 + }, + { + "epoch": 3.47442626953125e-05, + "step": 22770, + "training_step_time": 0.10787010192871094 + }, + { + "epoch": 3.474578857421875e-05, + "model_forward_time": 0.025307655334472656, + "step": 22771 + }, + { + "epoch": 3.474578857421875e-05, + "step": 22771, + "training_step_time": 0.10696268081665039 + }, + { + "epoch": 3.4747314453125e-05, + "model_forward_time": 0.02534627914428711, + "step": 22772 + }, + { + "epoch": 3.4747314453125e-05, + "step": 22772, + "training_step_time": 0.11025595664978027 + }, + { + "epoch": 3.474884033203125e-05, + "model_forward_time": 0.025321483612060547, + "step": 22773 + }, + { + "epoch": 3.474884033203125e-05, + "step": 22773, + "training_step_time": 0.10450434684753418 + }, + { + "epoch": 3.47503662109375e-05, + "model_forward_time": 0.025637149810791016, + "step": 22774 + }, + { + "epoch": 3.47503662109375e-05, + "step": 22774, + "training_step_time": 0.10747432708740234 + }, + { + "epoch": 3.475189208984375e-05, + "model_forward_time": 0.025498628616333008, + "step": 22775 + }, + { + "epoch": 3.475189208984375e-05, + "step": 22775, + "training_step_time": 0.19253134727478027 + }, + { + "epoch": 3.475341796875e-05, + "model_forward_time": 0.024778127670288086, + "step": 22776 + }, + { + "epoch": 3.475341796875e-05, + "step": 22776, + "training_step_time": 0.14162492752075195 + }, + { + "epoch": 3.475494384765625e-05, + "model_forward_time": 0.025519132614135742, + "step": 22777 + }, + { + "epoch": 3.475494384765625e-05, + "step": 22777, + "training_step_time": 0.10752582550048828 + }, + { + "epoch": 3.47564697265625e-05, + "model_forward_time": 0.025004863739013672, + "step": 22778 + }, + { + "epoch": 3.47564697265625e-05, + "step": 22778, + "training_step_time": 0.18317270278930664 + }, + { + "epoch": 3.475799560546875e-05, + "model_forward_time": 0.025086641311645508, + "step": 22779 + }, + { + "epoch": 3.475799560546875e-05, + "step": 22779, + "training_step_time": 0.16836309432983398 + }, + { + "epoch": 3.4759521484375e-05, + "grad_norm": 0.13970600068569183, + "learning_rate": 1.5016832974331724e-05, + "loss": 0.0044, + "step": 22780 + }, + { + "epoch": 3.4759521484375e-05, + "model_forward_time": 0.024425029754638672, + "step": 22780 + }, + { + "epoch": 3.4759521484375e-05, + "step": 22780, + "training_step_time": 0.16679859161376953 + }, + { + "epoch": 3.476104736328125e-05, + "model_forward_time": 0.02862858772277832, + "step": 22781 + }, + { + "epoch": 3.476104736328125e-05, + "step": 22781, + "training_step_time": 0.1746206283569336 + }, + { + "epoch": 3.47625732421875e-05, + "model_forward_time": 0.024951696395874023, + "step": 22782 + }, + { + "epoch": 3.47625732421875e-05, + "step": 22782, + "training_step_time": 0.12057971954345703 + }, + { + "epoch": 3.476409912109375e-05, + "model_forward_time": 0.024610280990600586, + "step": 22783 + }, + { + "epoch": 3.476409912109375e-05, + "step": 22783, + "training_step_time": 0.11373066902160645 + }, + { + "epoch": 3.4765625e-05, + "model_forward_time": 0.025423765182495117, + "step": 22784 + }, + { + "epoch": 3.4765625e-05, + "step": 22784, + "training_step_time": 0.11100149154663086 + }, + { + "epoch": 3.476715087890625e-05, + "model_forward_time": 0.025137662887573242, + "step": 22785 + }, + { + "epoch": 3.476715087890625e-05, + "step": 22785, + "training_step_time": 0.11122798919677734 + }, + { + "epoch": 3.47686767578125e-05, + "model_forward_time": 0.0242769718170166, + "step": 22786 + }, + { + "epoch": 3.47686767578125e-05, + "step": 22786, + "training_step_time": 0.11081910133361816 + }, + { + "epoch": 3.477020263671875e-05, + "model_forward_time": 0.025257110595703125, + "step": 22787 + }, + { + "epoch": 3.477020263671875e-05, + "step": 22787, + "training_step_time": 0.10951066017150879 + }, + { + "epoch": 3.4771728515625e-05, + "model_forward_time": 0.024289369583129883, + "step": 22788 + }, + { + "epoch": 3.4771728515625e-05, + "step": 22788, + "training_step_time": 0.10925769805908203 + }, + { + "epoch": 3.477325439453125e-05, + "model_forward_time": 0.024606943130493164, + "step": 22789 + }, + { + "epoch": 3.477325439453125e-05, + "step": 22789, + "training_step_time": 0.10770869255065918 + }, + { + "epoch": 3.47747802734375e-05, + "grad_norm": 0.20419451594352722, + "learning_rate": 1.4977475602518876e-05, + "loss": 0.0036, + "step": 22790 + }, + { + "epoch": 3.47747802734375e-05, + "model_forward_time": 0.026028871536254883, + "step": 22790 + }, + { + "epoch": 3.47747802734375e-05, + "step": 22790, + "training_step_time": 0.10820865631103516 + }, + { + "epoch": 3.477630615234375e-05, + "model_forward_time": 0.025558948516845703, + "step": 22791 + }, + { + "epoch": 3.477630615234375e-05, + "step": 22791, + "training_step_time": 0.10811018943786621 + }, + { + "epoch": 3.477783203125e-05, + "model_forward_time": 0.025388240814208984, + "step": 22792 + }, + { + "epoch": 3.477783203125e-05, + "step": 22792, + "training_step_time": 0.10717916488647461 + }, + { + "epoch": 3.477935791015625e-05, + "model_forward_time": 0.02521038055419922, + "step": 22793 + }, + { + "epoch": 3.477935791015625e-05, + "step": 22793, + "training_step_time": 0.12821364402770996 + }, + { + "epoch": 3.47808837890625e-05, + "model_forward_time": 0.026283979415893555, + "step": 22794 + }, + { + "epoch": 3.47808837890625e-05, + "step": 22794, + "training_step_time": 0.1070408821105957 + }, + { + "epoch": 3.478240966796875e-05, + "model_forward_time": 0.025435209274291992, + "step": 22795 + }, + { + "epoch": 3.478240966796875e-05, + "step": 22795, + "training_step_time": 0.10952568054199219 + }, + { + "epoch": 3.4783935546875e-05, + "model_forward_time": 0.025142908096313477, + "step": 22796 + }, + { + "epoch": 3.4783935546875e-05, + "step": 22796, + "training_step_time": 0.11936044692993164 + }, + { + "epoch": 3.478546142578125e-05, + "model_forward_time": 0.027043581008911133, + "step": 22797 + }, + { + "epoch": 3.478546142578125e-05, + "step": 22797, + "training_step_time": 0.12593579292297363 + }, + { + "epoch": 3.47869873046875e-05, + "model_forward_time": 0.02536177635192871, + "step": 22798 + }, + { + "epoch": 3.47869873046875e-05, + "step": 22798, + "training_step_time": 0.12269258499145508 + }, + { + "epoch": 3.478851318359375e-05, + "model_forward_time": 0.025135517120361328, + "step": 22799 + }, + { + "epoch": 3.478851318359375e-05, + "step": 22799, + "training_step_time": 0.11824393272399902 + }, + { + "epoch": 3.47900390625e-05, + "grad_norm": 0.09725761413574219, + "learning_rate": 1.4938160786375572e-05, + "loss": 0.0035, + "step": 22800 + }, + { + "epoch": 3.47900390625e-05, + "model_forward_time": 0.02498626708984375, + "step": 22800 + }, + { + "epoch": 3.47900390625e-05, + "step": 22800, + "training_step_time": 0.10482048988342285 + }, + { + "epoch": 3.479156494140625e-05, + "model_forward_time": 0.02560734748840332, + "step": 22801 + }, + { + "epoch": 3.479156494140625e-05, + "step": 22801, + "training_step_time": 0.10585522651672363 + }, + { + "epoch": 3.47930908203125e-05, + "model_forward_time": 0.025606870651245117, + "step": 22802 + }, + { + "epoch": 3.47930908203125e-05, + "step": 22802, + "training_step_time": 0.1055445671081543 + }, + { + "epoch": 3.479461669921875e-05, + "model_forward_time": 0.02541661262512207, + "step": 22803 + }, + { + "epoch": 3.479461669921875e-05, + "step": 22803, + "training_step_time": 0.1771228313446045 + }, + { + "epoch": 3.4796142578125e-05, + "model_forward_time": 0.025107622146606445, + "step": 22804 + }, + { + "epoch": 3.4796142578125e-05, + "step": 22804, + "training_step_time": 0.1158151626586914 + }, + { + "epoch": 3.479766845703125e-05, + "model_forward_time": 0.02494525909423828, + "step": 22805 + }, + { + "epoch": 3.479766845703125e-05, + "step": 22805, + "training_step_time": 0.20346641540527344 + }, + { + "epoch": 3.47991943359375e-05, + "model_forward_time": 0.024654626846313477, + "step": 22806 + }, + { + "epoch": 3.47991943359375e-05, + "step": 22806, + "training_step_time": 0.18082714080810547 + }, + { + "epoch": 3.480072021484375e-05, + "model_forward_time": 0.024167299270629883, + "step": 22807 + }, + { + "epoch": 3.480072021484375e-05, + "step": 22807, + "training_step_time": 0.1410210132598877 + }, + { + "epoch": 3.480224609375e-05, + "model_forward_time": 0.02449512481689453, + "step": 22808 + }, + { + "epoch": 3.480224609375e-05, + "step": 22808, + "training_step_time": 0.10700321197509766 + }, + { + "epoch": 3.480377197265625e-05, + "model_forward_time": 0.025849580764770508, + "step": 22809 + }, + { + "epoch": 3.480377197265625e-05, + "step": 22809, + "training_step_time": 0.10742712020874023 + }, + { + "epoch": 3.48052978515625e-05, + "grad_norm": 0.14344458281993866, + "learning_rate": 1.4898888573673031e-05, + "loss": 0.0068, + "step": 22810 + }, + { + "epoch": 3.48052978515625e-05, + "model_forward_time": 0.02580571174621582, + "step": 22810 + }, + { + "epoch": 3.48052978515625e-05, + "step": 22810, + "training_step_time": 0.10942959785461426 + }, + { + "epoch": 3.480682373046875e-05, + "model_forward_time": 0.02559804916381836, + "step": 22811 + }, + { + "epoch": 3.480682373046875e-05, + "step": 22811, + "training_step_time": 0.10956358909606934 + }, + { + "epoch": 3.4808349609375e-05, + "model_forward_time": 0.025168895721435547, + "step": 22812 + }, + { + "epoch": 3.4808349609375e-05, + "step": 22812, + "training_step_time": 0.10886597633361816 + }, + { + "epoch": 3.480987548828125e-05, + "model_forward_time": 0.025579214096069336, + "step": 22813 + }, + { + "epoch": 3.480987548828125e-05, + "step": 22813, + "training_step_time": 0.10538458824157715 + }, + { + "epoch": 3.48114013671875e-05, + "model_forward_time": 0.025473594665527344, + "step": 22814 + }, + { + "epoch": 3.48114013671875e-05, + "step": 22814, + "training_step_time": 0.10645079612731934 + }, + { + "epoch": 3.481292724609375e-05, + "model_forward_time": 0.024853944778442383, + "step": 22815 + }, + { + "epoch": 3.481292724609375e-05, + "step": 22815, + "training_step_time": 0.10262393951416016 + }, + { + "epoch": 3.4814453125e-05, + "model_forward_time": 0.025220632553100586, + "step": 22816 + }, + { + "epoch": 3.4814453125e-05, + "step": 22816, + "training_step_time": 0.10415506362915039 + }, + { + "epoch": 3.481597900390625e-05, + "model_forward_time": 0.02528095245361328, + "step": 22817 + }, + { + "epoch": 3.481597900390625e-05, + "step": 22817, + "training_step_time": 0.10394644737243652 + }, + { + "epoch": 3.48175048828125e-05, + "model_forward_time": 0.0253903865814209, + "step": 22818 + }, + { + "epoch": 3.48175048828125e-05, + "step": 22818, + "training_step_time": 0.10526227951049805 + }, + { + "epoch": 3.481903076171875e-05, + "model_forward_time": 0.02604508399963379, + "step": 22819 + }, + { + "epoch": 3.481903076171875e-05, + "step": 22819, + "training_step_time": 0.10596489906311035 + }, + { + "epoch": 3.4820556640625e-05, + "grad_norm": 0.12907643616199493, + "learning_rate": 1.4859659012130695e-05, + "loss": 0.011, + "step": 22820 + }, + { + "epoch": 3.4820556640625e-05, + "model_forward_time": 0.028371095657348633, + "step": 22820 + }, + { + "epoch": 3.4820556640625e-05, + "step": 22820, + "training_step_time": 0.10895442962646484 + }, + { + "epoch": 3.482208251953125e-05, + "model_forward_time": 0.025696516036987305, + "step": 22821 + }, + { + "epoch": 3.482208251953125e-05, + "step": 22821, + "training_step_time": 0.10643839836120605 + }, + { + "epoch": 3.48236083984375e-05, + "model_forward_time": 0.025859832763671875, + "step": 22822 + }, + { + "epoch": 3.48236083984375e-05, + "step": 22822, + "training_step_time": 0.11932826042175293 + }, + { + "epoch": 3.482513427734375e-05, + "model_forward_time": 0.02556324005126953, + "step": 22823 + }, + { + "epoch": 3.482513427734375e-05, + "step": 22823, + "training_step_time": 0.10962700843811035 + }, + { + "epoch": 3.482666015625e-05, + "model_forward_time": 0.02634143829345703, + "step": 22824 + }, + { + "epoch": 3.482666015625e-05, + "step": 22824, + "training_step_time": 0.1068723201751709 + }, + { + "epoch": 3.482818603515625e-05, + "model_forward_time": 0.025702476501464844, + "step": 22825 + }, + { + "epoch": 3.482818603515625e-05, + "step": 22825, + "training_step_time": 0.21186447143554688 + }, + { + "epoch": 3.48297119140625e-05, + "model_forward_time": 0.02496170997619629, + "step": 22826 + }, + { + "epoch": 3.48297119140625e-05, + "step": 22826, + "training_step_time": 0.21672582626342773 + }, + { + "epoch": 3.483123779296875e-05, + "model_forward_time": 0.024808645248413086, + "step": 22827 + }, + { + "epoch": 3.483123779296875e-05, + "step": 22827, + "training_step_time": 0.20836377143859863 + }, + { + "epoch": 3.4832763671875e-05, + "model_forward_time": 0.024509429931640625, + "step": 22828 + }, + { + "epoch": 3.4832763671875e-05, + "step": 22828, + "training_step_time": 0.2055835723876953 + }, + { + "epoch": 3.483428955078125e-05, + "model_forward_time": 0.025012493133544922, + "step": 22829 + }, + { + "epoch": 3.483428955078125e-05, + "step": 22829, + "training_step_time": 0.17716026306152344 + }, + { + "epoch": 3.48358154296875e-05, + "grad_norm": 0.144840270280838, + "learning_rate": 1.4820472149416154e-05, + "loss": 0.0057, + "step": 22830 + }, + { + "epoch": 3.48358154296875e-05, + "model_forward_time": 0.02431654930114746, + "step": 22830 + }, + { + "epoch": 3.48358154296875e-05, + "step": 22830, + "training_step_time": 0.14636969566345215 + }, + { + "epoch": 3.483734130859375e-05, + "model_forward_time": 0.02350902557373047, + "step": 22831 + }, + { + "epoch": 3.483734130859375e-05, + "step": 22831, + "training_step_time": 0.13284611701965332 + }, + { + "epoch": 3.48388671875e-05, + "model_forward_time": 0.0274050235748291, + "step": 22832 + }, + { + "epoch": 3.48388671875e-05, + "step": 22832, + "training_step_time": 0.12695646286010742 + }, + { + "epoch": 3.484039306640625e-05, + "model_forward_time": 0.023859739303588867, + "step": 22833 + }, + { + "epoch": 3.484039306640625e-05, + "step": 22833, + "training_step_time": 0.12520742416381836 + }, + { + "epoch": 3.48419189453125e-05, + "model_forward_time": 0.02385878562927246, + "step": 22834 + }, + { + "epoch": 3.48419189453125e-05, + "step": 22834, + "training_step_time": 0.10430216789245605 + }, + { + "epoch": 3.484344482421875e-05, + "model_forward_time": 0.02612757682800293, + "step": 22835 + }, + { + "epoch": 3.484344482421875e-05, + "step": 22835, + "training_step_time": 0.1042177677154541 + }, + { + "epoch": 3.4844970703125e-05, + "model_forward_time": 0.02556300163269043, + "step": 22836 + }, + { + "epoch": 3.4844970703125e-05, + "step": 22836, + "training_step_time": 0.10341835021972656 + }, + { + "epoch": 3.484649658203125e-05, + "model_forward_time": 0.025195837020874023, + "step": 22837 + }, + { + "epoch": 3.484649658203125e-05, + "step": 22837, + "training_step_time": 0.17187213897705078 + }, + { + "epoch": 3.48480224609375e-05, + "model_forward_time": 0.024822473526000977, + "step": 22838 + }, + { + "epoch": 3.48480224609375e-05, + "step": 22838, + "training_step_time": 0.14420104026794434 + }, + { + "epoch": 3.484954833984375e-05, + "model_forward_time": 0.024406909942626953, + "step": 22839 + }, + { + "epoch": 3.484954833984375e-05, + "step": 22839, + "training_step_time": 0.1162106990814209 + }, + { + "epoch": 3.485107421875e-05, + "grad_norm": 0.2122463583946228, + "learning_rate": 1.4781328033145187e-05, + "loss": 0.0093, + "step": 22840 + }, + { + "epoch": 3.485107421875e-05, + "model_forward_time": 0.02503657341003418, + "step": 22840 + }, + { + "epoch": 3.485107421875e-05, + "step": 22840, + "training_step_time": 0.1175847053527832 + }, + { + "epoch": 3.485260009765625e-05, + "model_forward_time": 0.0252535343170166, + "step": 22841 + }, + { + "epoch": 3.485260009765625e-05, + "step": 22841, + "training_step_time": 0.1319141387939453 + }, + { + "epoch": 3.48541259765625e-05, + "model_forward_time": 0.02534627914428711, + "step": 22842 + }, + { + "epoch": 3.48541259765625e-05, + "step": 22842, + "training_step_time": 0.11172199249267578 + }, + { + "epoch": 3.485565185546875e-05, + "model_forward_time": 0.024980545043945312, + "step": 22843 + }, + { + "epoch": 3.485565185546875e-05, + "step": 22843, + "training_step_time": 0.11527371406555176 + }, + { + "epoch": 3.4857177734375e-05, + "model_forward_time": 0.025072813034057617, + "step": 22844 + }, + { + "epoch": 3.4857177734375e-05, + "step": 22844, + "training_step_time": 0.1045684814453125 + }, + { + "epoch": 3.485870361328125e-05, + "model_forward_time": 0.025394201278686523, + "step": 22845 + }, + { + "epoch": 3.485870361328125e-05, + "step": 22845, + "training_step_time": 0.10884904861450195 + }, + { + "epoch": 3.48602294921875e-05, + "model_forward_time": 0.024939298629760742, + "step": 22846 + }, + { + "epoch": 3.48602294921875e-05, + "step": 22846, + "training_step_time": 0.1121675968170166 + }, + { + "epoch": 3.486175537109375e-05, + "model_forward_time": 0.02510809898376465, + "step": 22847 + }, + { + "epoch": 3.486175537109375e-05, + "step": 22847, + "training_step_time": 0.11913251876831055 + }, + { + "epoch": 3.486328125e-05, + "model_forward_time": 0.025386333465576172, + "step": 22848 + }, + { + "epoch": 3.486328125e-05, + "step": 22848, + "training_step_time": 0.13124442100524902 + }, + { + "epoch": 3.486480712890625e-05, + "model_forward_time": 0.024923086166381836, + "step": 22849 + }, + { + "epoch": 3.486480712890625e-05, + "step": 22849, + "training_step_time": 0.21547269821166992 + }, + { + "epoch": 3.48663330078125e-05, + "grad_norm": 0.35279932618141174, + "learning_rate": 1.4742226710881558e-05, + "loss": 0.0187, + "step": 22850 + }, + { + "epoch": 3.48663330078125e-05, + "model_forward_time": 0.024524211883544922, + "step": 22850 + }, + { + "epoch": 3.48663330078125e-05, + "step": 22850, + "training_step_time": 0.1030418872833252 + }, + { + "epoch": 3.486785888671875e-05, + "model_forward_time": 0.02425551414489746, + "step": 22851 + }, + { + "epoch": 3.486785888671875e-05, + "step": 22851, + "training_step_time": 0.11866044998168945 + }, + { + "epoch": 3.4869384765625e-05, + "model_forward_time": 0.02402663230895996, + "step": 22852 + }, + { + "epoch": 3.4869384765625e-05, + "step": 22852, + "training_step_time": 0.11206412315368652 + }, + { + "epoch": 3.487091064453125e-05, + "model_forward_time": 0.02507328987121582, + "step": 22853 + }, + { + "epoch": 3.487091064453125e-05, + "step": 22853, + "training_step_time": 0.11282539367675781 + }, + { + "epoch": 3.48724365234375e-05, + "model_forward_time": 0.024970293045043945, + "step": 22854 + }, + { + "epoch": 3.48724365234375e-05, + "step": 22854, + "training_step_time": 0.11034035682678223 + }, + { + "epoch": 3.487396240234375e-05, + "model_forward_time": 0.02519679069519043, + "step": 22855 + }, + { + "epoch": 3.487396240234375e-05, + "step": 22855, + "training_step_time": 0.10639619827270508 + }, + { + "epoch": 3.487548828125e-05, + "model_forward_time": 0.024964570999145508, + "step": 22856 + }, + { + "epoch": 3.487548828125e-05, + "step": 22856, + "training_step_time": 0.10882806777954102 + }, + { + "epoch": 3.487701416015625e-05, + "model_forward_time": 0.02490091323852539, + "step": 22857 + }, + { + "epoch": 3.487701416015625e-05, + "step": 22857, + "training_step_time": 0.10769200325012207 + }, + { + "epoch": 3.48785400390625e-05, + "model_forward_time": 0.025066614151000977, + "step": 22858 + }, + { + "epoch": 3.48785400390625e-05, + "step": 22858, + "training_step_time": 0.1067354679107666 + }, + { + "epoch": 3.488006591796875e-05, + "model_forward_time": 0.024793386459350586, + "step": 22859 + }, + { + "epoch": 3.488006591796875e-05, + "step": 22859, + "training_step_time": 0.10522818565368652 + }, + { + "epoch": 3.4881591796875e-05, + "grad_norm": 0.10908864438533783, + "learning_rate": 1.470316823013707e-05, + "loss": 0.0047, + "step": 22860 + }, + { + "epoch": 3.4881591796875e-05, + "model_forward_time": 0.024829626083374023, + "step": 22860 + }, + { + "epoch": 3.4881591796875e-05, + "step": 22860, + "training_step_time": 0.10541844367980957 + }, + { + "epoch": 3.488311767578125e-05, + "model_forward_time": 0.02483201026916504, + "step": 22861 + }, + { + "epoch": 3.488311767578125e-05, + "step": 22861, + "training_step_time": 0.10969853401184082 + }, + { + "epoch": 3.48846435546875e-05, + "model_forward_time": 0.024955272674560547, + "step": 22862 + }, + { + "epoch": 3.48846435546875e-05, + "step": 22862, + "training_step_time": 0.10819816589355469 + }, + { + "epoch": 3.488616943359375e-05, + "model_forward_time": 0.02535223960876465, + "step": 22863 + }, + { + "epoch": 3.488616943359375e-05, + "step": 22863, + "training_step_time": 0.11041426658630371 + }, + { + "epoch": 3.48876953125e-05, + "model_forward_time": 0.025101184844970703, + "step": 22864 + }, + { + "epoch": 3.48876953125e-05, + "step": 22864, + "training_step_time": 0.10503387451171875 + }, + { + "epoch": 3.488922119140625e-05, + "model_forward_time": 0.024740934371948242, + "step": 22865 + }, + { + "epoch": 3.488922119140625e-05, + "step": 22865, + "training_step_time": 0.11049509048461914 + }, + { + "epoch": 3.48907470703125e-05, + "model_forward_time": 0.024889469146728516, + "step": 22866 + }, + { + "epoch": 3.48907470703125e-05, + "step": 22866, + "training_step_time": 0.1351768970489502 + }, + { + "epoch": 3.489227294921875e-05, + "model_forward_time": 0.025843143463134766, + "step": 22867 + }, + { + "epoch": 3.489227294921875e-05, + "step": 22867, + "training_step_time": 0.11082577705383301 + }, + { + "epoch": 3.4893798828125e-05, + "model_forward_time": 0.02529597282409668, + "step": 22868 + }, + { + "epoch": 3.4893798828125e-05, + "step": 22868, + "training_step_time": 0.10524177551269531 + }, + { + "epoch": 3.489532470703125e-05, + "model_forward_time": 0.024091720581054688, + "step": 22869 + }, + { + "epoch": 3.489532470703125e-05, + "step": 22869, + "training_step_time": 0.15042614936828613 + }, + { + "epoch": 3.48968505859375e-05, + "grad_norm": 0.2644929885864258, + "learning_rate": 1.4664152638371437e-05, + "loss": 0.0059, + "step": 22870 + }, + { + "epoch": 3.48968505859375e-05, + "model_forward_time": 0.02468395233154297, + "step": 22870 + }, + { + "epoch": 3.48968505859375e-05, + "step": 22870, + "training_step_time": 0.16089081764221191 + }, + { + "epoch": 3.489837646484375e-05, + "model_forward_time": 0.02477240562438965, + "step": 22871 + }, + { + "epoch": 3.489837646484375e-05, + "step": 22871, + "training_step_time": 0.10635781288146973 + }, + { + "epoch": 3.489990234375e-05, + "model_forward_time": 0.024487733840942383, + "step": 22872 + }, + { + "epoch": 3.489990234375e-05, + "step": 22872, + "training_step_time": 0.1285703182220459 + }, + { + "epoch": 3.490142822265625e-05, + "model_forward_time": 0.024829387664794922, + "step": 22873 + }, + { + "epoch": 3.490142822265625e-05, + "step": 22873, + "training_step_time": 0.19692635536193848 + }, + { + "epoch": 3.49029541015625e-05, + "model_forward_time": 0.024796247482299805, + "step": 22874 + }, + { + "epoch": 3.49029541015625e-05, + "step": 22874, + "training_step_time": 0.1048426628112793 + }, + { + "epoch": 3.490447998046875e-05, + "model_forward_time": 0.024367332458496094, + "step": 22875 + }, + { + "epoch": 3.490447998046875e-05, + "step": 22875, + "training_step_time": 0.10435175895690918 + }, + { + "epoch": 3.4906005859375e-05, + "model_forward_time": 0.025068283081054688, + "step": 22876 + }, + { + "epoch": 3.4906005859375e-05, + "step": 22876, + "training_step_time": 0.10718822479248047 + }, + { + "epoch": 3.490753173828125e-05, + "model_forward_time": 0.025122642517089844, + "step": 22877 + }, + { + "epoch": 3.490753173828125e-05, + "step": 22877, + "training_step_time": 0.10822153091430664 + }, + { + "epoch": 3.49090576171875e-05, + "model_forward_time": 0.025420427322387695, + "step": 22878 + }, + { + "epoch": 3.49090576171875e-05, + "step": 22878, + "training_step_time": 0.10799002647399902 + }, + { + "epoch": 3.491058349609375e-05, + "model_forward_time": 0.024608850479125977, + "step": 22879 + }, + { + "epoch": 3.491058349609375e-05, + "step": 22879, + "training_step_time": 0.10671520233154297 + }, + { + "epoch": 3.4912109375e-05, + "grad_norm": 0.20742835104465485, + "learning_rate": 1.4625179982992321e-05, + "loss": 0.0123, + "step": 22880 + }, + { + "epoch": 3.4912109375e-05, + "model_forward_time": 0.024595022201538086, + "step": 22880 + }, + { + "epoch": 3.4912109375e-05, + "step": 22880, + "training_step_time": 0.10769152641296387 + }, + { + "epoch": 3.491363525390625e-05, + "model_forward_time": 0.025026798248291016, + "step": 22881 + }, + { + "epoch": 3.491363525390625e-05, + "step": 22881, + "training_step_time": 0.10483670234680176 + }, + { + "epoch": 3.49151611328125e-05, + "model_forward_time": 0.02530217170715332, + "step": 22882 + }, + { + "epoch": 3.49151611328125e-05, + "step": 22882, + "training_step_time": 0.10591793060302734 + }, + { + "epoch": 3.491668701171875e-05, + "model_forward_time": 0.024617433547973633, + "step": 22883 + }, + { + "epoch": 3.491668701171875e-05, + "step": 22883, + "training_step_time": 0.10512900352478027 + }, + { + "epoch": 3.4918212890625e-05, + "model_forward_time": 0.025096654891967773, + "step": 22884 + }, + { + "epoch": 3.4918212890625e-05, + "step": 22884, + "training_step_time": 0.1108396053314209 + }, + { + "epoch": 3.491973876953125e-05, + "model_forward_time": 0.024796485900878906, + "step": 22885 + }, + { + "epoch": 3.491973876953125e-05, + "step": 22885, + "training_step_time": 0.13176536560058594 + }, + { + "epoch": 3.49212646484375e-05, + "model_forward_time": 0.025580883026123047, + "step": 22886 + }, + { + "epoch": 3.49212646484375e-05, + "step": 22886, + "training_step_time": 0.10870170593261719 + }, + { + "epoch": 3.492279052734375e-05, + "model_forward_time": 0.025025606155395508, + "step": 22887 + }, + { + "epoch": 3.492279052734375e-05, + "step": 22887, + "training_step_time": 0.11175060272216797 + }, + { + "epoch": 3.492431640625e-05, + "model_forward_time": 0.025172710418701172, + "step": 22888 + }, + { + "epoch": 3.492431640625e-05, + "step": 22888, + "training_step_time": 0.12007713317871094 + }, + { + "epoch": 3.492584228515625e-05, + "model_forward_time": 0.025093793869018555, + "step": 22889 + }, + { + "epoch": 3.492584228515625e-05, + "step": 22889, + "training_step_time": 0.12108016014099121 + }, + { + "epoch": 3.49273681640625e-05, + "grad_norm": 0.09625491499900818, + "learning_rate": 1.4586250311355132e-05, + "loss": 0.0043, + "step": 22890 + }, + { + "epoch": 3.49273681640625e-05, + "model_forward_time": 0.024801254272460938, + "step": 22890 + }, + { + "epoch": 3.49273681640625e-05, + "step": 22890, + "training_step_time": 0.11254644393920898 + }, + { + "epoch": 3.492889404296875e-05, + "model_forward_time": 0.025102853775024414, + "step": 22891 + }, + { + "epoch": 3.492889404296875e-05, + "step": 22891, + "training_step_time": 0.1310122013092041 + }, + { + "epoch": 3.4930419921875e-05, + "model_forward_time": 0.025182247161865234, + "step": 22892 + }, + { + "epoch": 3.4930419921875e-05, + "step": 22892, + "training_step_time": 0.14044928550720215 + }, + { + "epoch": 3.493194580078125e-05, + "model_forward_time": 0.024358510971069336, + "step": 22893 + }, + { + "epoch": 3.493194580078125e-05, + "step": 22893, + "training_step_time": 0.1328907012939453 + }, + { + "epoch": 3.49334716796875e-05, + "model_forward_time": 0.024767398834228516, + "step": 22894 + }, + { + "epoch": 3.49334716796875e-05, + "step": 22894, + "training_step_time": 0.12484884262084961 + }, + { + "epoch": 3.493499755859375e-05, + "model_forward_time": 0.024884462356567383, + "step": 22895 + }, + { + "epoch": 3.493499755859375e-05, + "step": 22895, + "training_step_time": 0.15195035934448242 + }, + { + "epoch": 3.49365234375e-05, + "model_forward_time": 0.024791479110717773, + "step": 22896 + }, + { + "epoch": 3.49365234375e-05, + "step": 22896, + "training_step_time": 0.1569521427154541 + }, + { + "epoch": 3.493804931640625e-05, + "model_forward_time": 0.024165868759155273, + "step": 22897 + }, + { + "epoch": 3.493804931640625e-05, + "step": 22897, + "training_step_time": 0.11577486991882324 + }, + { + "epoch": 3.49395751953125e-05, + "model_forward_time": 0.02445077896118164, + "step": 22898 + }, + { + "epoch": 3.49395751953125e-05, + "step": 22898, + "training_step_time": 0.1087496280670166 + }, + { + "epoch": 3.494110107421875e-05, + "model_forward_time": 0.02503037452697754, + "step": 22899 + }, + { + "epoch": 3.494110107421875e-05, + "step": 22899, + "training_step_time": 0.1879105567932129 + }, + { + "epoch": 3.4942626953125e-05, + "grad_norm": 0.14945359528064728, + "learning_rate": 1.4547363670763137e-05, + "loss": 0.0149, + "step": 22900 + }, + { + "epoch": 3.4942626953125e-05, + "model_forward_time": 0.024821996688842773, + "step": 22900 + }, + { + "epoch": 3.4942626953125e-05, + "step": 22900, + "training_step_time": 0.10581135749816895 + }, + { + "epoch": 3.494415283203125e-05, + "model_forward_time": 0.024337053298950195, + "step": 22901 + }, + { + "epoch": 3.494415283203125e-05, + "step": 22901, + "training_step_time": 0.10126185417175293 + }, + { + "epoch": 3.49456787109375e-05, + "model_forward_time": 0.02523350715637207, + "step": 22902 + }, + { + "epoch": 3.49456787109375e-05, + "step": 22902, + "training_step_time": 0.10530734062194824 + }, + { + "epoch": 3.494720458984375e-05, + "model_forward_time": 0.025290489196777344, + "step": 22903 + }, + { + "epoch": 3.494720458984375e-05, + "step": 22903, + "training_step_time": 0.10489821434020996 + }, + { + "epoch": 3.494873046875e-05, + "model_forward_time": 0.025685548782348633, + "step": 22904 + }, + { + "epoch": 3.494873046875e-05, + "step": 22904, + "training_step_time": 0.10569548606872559 + }, + { + "epoch": 3.495025634765625e-05, + "model_forward_time": 0.025206327438354492, + "step": 22905 + }, + { + "epoch": 3.495025634765625e-05, + "step": 22905, + "training_step_time": 0.10495781898498535 + }, + { + "epoch": 3.49517822265625e-05, + "model_forward_time": 0.025297880172729492, + "step": 22906 + }, + { + "epoch": 3.49517822265625e-05, + "step": 22906, + "training_step_time": 0.10867953300476074 + }, + { + "epoch": 3.495330810546875e-05, + "model_forward_time": 0.025444984436035156, + "step": 22907 + }, + { + "epoch": 3.495330810546875e-05, + "step": 22907, + "training_step_time": 0.1203775405883789 + }, + { + "epoch": 3.4954833984375e-05, + "model_forward_time": 0.025078773498535156, + "step": 22908 + }, + { + "epoch": 3.4954833984375e-05, + "step": 22908, + "training_step_time": 0.10955357551574707 + }, + { + "epoch": 3.495635986328125e-05, + "model_forward_time": 0.025688648223876953, + "step": 22909 + }, + { + "epoch": 3.495635986328125e-05, + "step": 22909, + "training_step_time": 0.11077165603637695 + }, + { + "epoch": 3.49578857421875e-05, + "grad_norm": 0.08774767816066742, + "learning_rate": 1.450852010846724e-05, + "loss": 0.0081, + "step": 22910 + }, + { + "epoch": 3.49578857421875e-05, + "model_forward_time": 0.025636672973632812, + "step": 22910 + }, + { + "epoch": 3.49578857421875e-05, + "step": 22910, + "training_step_time": 0.11340618133544922 + }, + { + "epoch": 3.495941162109375e-05, + "model_forward_time": 0.024908065795898438, + "step": 22911 + }, + { + "epoch": 3.495941162109375e-05, + "step": 22911, + "training_step_time": 0.12753081321716309 + }, + { + "epoch": 3.49609375e-05, + "model_forward_time": 0.024957656860351562, + "step": 22912 + }, + { + "epoch": 3.49609375e-05, + "step": 22912, + "training_step_time": 0.12898826599121094 + }, + { + "epoch": 3.496246337890625e-05, + "model_forward_time": 0.024945735931396484, + "step": 22913 + }, + { + "epoch": 3.496246337890625e-05, + "step": 22913, + "training_step_time": 0.10939145088195801 + }, + { + "epoch": 3.49639892578125e-05, + "model_forward_time": 0.025281906127929688, + "step": 22914 + }, + { + "epoch": 3.49639892578125e-05, + "step": 22914, + "training_step_time": 0.1091461181640625 + }, + { + "epoch": 3.496551513671875e-05, + "model_forward_time": 0.02581191062927246, + "step": 22915 + }, + { + "epoch": 3.496551513671875e-05, + "step": 22915, + "training_step_time": 0.10804224014282227 + }, + { + "epoch": 3.4967041015625e-05, + "model_forward_time": 0.025528430938720703, + "step": 22916 + }, + { + "epoch": 3.4967041015625e-05, + "step": 22916, + "training_step_time": 0.16184377670288086 + }, + { + "epoch": 3.496856689453125e-05, + "model_forward_time": 0.024809598922729492, + "step": 22917 + }, + { + "epoch": 3.496856689453125e-05, + "step": 22917, + "training_step_time": 0.10933709144592285 + }, + { + "epoch": 3.49700927734375e-05, + "model_forward_time": 0.02513909339904785, + "step": 22918 + }, + { + "epoch": 3.49700927734375e-05, + "step": 22918, + "training_step_time": 0.19405436515808105 + }, + { + "epoch": 3.497161865234375e-05, + "model_forward_time": 0.02425074577331543, + "step": 22919 + }, + { + "epoch": 3.497161865234375e-05, + "step": 22919, + "training_step_time": 0.13287615776062012 + }, + { + "epoch": 3.497314453125e-05, + "grad_norm": 0.39075523614883423, + "learning_rate": 1.4469719671666043e-05, + "loss": 0.01, + "step": 22920 + }, + { + "epoch": 3.497314453125e-05, + "model_forward_time": 0.0246734619140625, + "step": 22920 + }, + { + "epoch": 3.497314453125e-05, + "step": 22920, + "training_step_time": 0.19736862182617188 + }, + { + "epoch": 3.497467041015625e-05, + "model_forward_time": 0.024672508239746094, + "step": 22921 + }, + { + "epoch": 3.497467041015625e-05, + "step": 22921, + "training_step_time": 0.10254263877868652 + }, + { + "epoch": 3.49761962890625e-05, + "model_forward_time": 0.024676084518432617, + "step": 22922 + }, + { + "epoch": 3.49761962890625e-05, + "step": 22922, + "training_step_time": 0.10589241981506348 + }, + { + "epoch": 3.497772216796875e-05, + "model_forward_time": 0.025455236434936523, + "step": 22923 + }, + { + "epoch": 3.497772216796875e-05, + "step": 22923, + "training_step_time": 0.10478353500366211 + }, + { + "epoch": 3.4979248046875e-05, + "model_forward_time": 0.025585651397705078, + "step": 22924 + }, + { + "epoch": 3.4979248046875e-05, + "step": 22924, + "training_step_time": 0.10547375679016113 + }, + { + "epoch": 3.498077392578125e-05, + "model_forward_time": 0.02533578872680664, + "step": 22925 + }, + { + "epoch": 3.498077392578125e-05, + "step": 22925, + "training_step_time": 0.11114168167114258 + }, + { + "epoch": 3.49822998046875e-05, + "model_forward_time": 0.025166034698486328, + "step": 22926 + }, + { + "epoch": 3.49822998046875e-05, + "step": 22926, + "training_step_time": 0.10577011108398438 + }, + { + "epoch": 3.498382568359375e-05, + "model_forward_time": 0.024971961975097656, + "step": 22927 + }, + { + "epoch": 3.498382568359375e-05, + "step": 22927, + "training_step_time": 0.10732793807983398 + }, + { + "epoch": 3.49853515625e-05, + "model_forward_time": 0.025249958038330078, + "step": 22928 + }, + { + "epoch": 3.49853515625e-05, + "step": 22928, + "training_step_time": 0.10660409927368164 + }, + { + "epoch": 3.498687744140625e-05, + "model_forward_time": 0.025095224380493164, + "step": 22929 + }, + { + "epoch": 3.498687744140625e-05, + "step": 22929, + "training_step_time": 0.10963749885559082 + }, + { + "epoch": 3.49884033203125e-05, + "grad_norm": 0.1738383024930954, + "learning_rate": 1.443096240750571e-05, + "loss": 0.0045, + "step": 22930 + }, + { + "epoch": 3.49884033203125e-05, + "model_forward_time": 0.02469658851623535, + "step": 22930 + }, + { + "epoch": 3.49884033203125e-05, + "step": 22930, + "training_step_time": 0.10951495170593262 + }, + { + "epoch": 3.498992919921875e-05, + "model_forward_time": 0.025074243545532227, + "step": 22931 + }, + { + "epoch": 3.498992919921875e-05, + "step": 22931, + "training_step_time": 0.1098470687866211 + }, + { + "epoch": 3.4991455078125e-05, + "model_forward_time": 0.02485942840576172, + "step": 22932 + }, + { + "epoch": 3.4991455078125e-05, + "step": 22932, + "training_step_time": 0.1109466552734375 + }, + { + "epoch": 3.499298095703125e-05, + "model_forward_time": 0.024941444396972656, + "step": 22933 + }, + { + "epoch": 3.499298095703125e-05, + "step": 22933, + "training_step_time": 0.10934066772460938 + }, + { + "epoch": 3.49945068359375e-05, + "model_forward_time": 0.026477575302124023, + "step": 22934 + }, + { + "epoch": 3.49945068359375e-05, + "step": 22934, + "training_step_time": 0.1725609302520752 + }, + { + "epoch": 3.499603271484375e-05, + "model_forward_time": 0.024517536163330078, + "step": 22935 + }, + { + "epoch": 3.499603271484375e-05, + "step": 22935, + "training_step_time": 0.1756279468536377 + }, + { + "epoch": 3.499755859375e-05, + "model_forward_time": 0.0244600772857666, + "step": 22936 + }, + { + "epoch": 3.499755859375e-05, + "step": 22936, + "training_step_time": 0.13087034225463867 + }, + { + "epoch": 3.499908447265625e-05, + "model_forward_time": 0.024407148361206055, + "step": 22937 + }, + { + "epoch": 3.499908447265625e-05, + "step": 22937, + "training_step_time": 0.11875247955322266 + }, + { + "epoch": 3.50006103515625e-05, + "model_forward_time": 0.024937152862548828, + "step": 22938 + }, + { + "epoch": 3.50006103515625e-05, + "step": 22938, + "training_step_time": 0.13707780838012695 + }, + { + "epoch": 3.500213623046875e-05, + "model_forward_time": 0.02476358413696289, + "step": 22939 + }, + { + "epoch": 3.500213623046875e-05, + "step": 22939, + "training_step_time": 0.1017601490020752 + }, + { + "epoch": 3.5003662109375e-05, + "grad_norm": 0.18951627612113953, + "learning_rate": 1.439224836308002e-05, + "loss": 0.0035, + "step": 22940 + }, + { + "epoch": 3.5003662109375e-05, + "model_forward_time": 0.0251309871673584, + "step": 22940 + }, + { + "epoch": 3.5003662109375e-05, + "step": 22940, + "training_step_time": 0.1058347225189209 + }, + { + "epoch": 3.500518798828125e-05, + "model_forward_time": 0.0251619815826416, + "step": 22941 + }, + { + "epoch": 3.500518798828125e-05, + "step": 22941, + "training_step_time": 0.11094164848327637 + }, + { + "epoch": 3.50067138671875e-05, + "model_forward_time": 0.025304317474365234, + "step": 22942 + }, + { + "epoch": 3.50067138671875e-05, + "step": 22942, + "training_step_time": 0.2035377025604248 + }, + { + "epoch": 3.500823974609375e-05, + "model_forward_time": 0.0241241455078125, + "step": 22943 + }, + { + "epoch": 3.500823974609375e-05, + "step": 22943, + "training_step_time": 0.13550353050231934 + }, + { + "epoch": 3.5009765625e-05, + "model_forward_time": 0.02397608757019043, + "step": 22944 + }, + { + "epoch": 3.5009765625e-05, + "step": 22944, + "training_step_time": 0.19083309173583984 + }, + { + "epoch": 3.501129150390625e-05, + "model_forward_time": 0.02388620376586914, + "step": 22945 + }, + { + "epoch": 3.501129150390625e-05, + "step": 22945, + "training_step_time": 0.11823248863220215 + }, + { + "epoch": 3.50128173828125e-05, + "model_forward_time": 0.024020910263061523, + "step": 22946 + }, + { + "epoch": 3.50128173828125e-05, + "step": 22946, + "training_step_time": 0.1153266429901123 + }, + { + "epoch": 3.501434326171875e-05, + "model_forward_time": 0.02592945098876953, + "step": 22947 + }, + { + "epoch": 3.501434326171875e-05, + "step": 22947, + "training_step_time": 0.11437845230102539 + }, + { + "epoch": 3.5015869140625e-05, + "model_forward_time": 0.024970054626464844, + "step": 22948 + }, + { + "epoch": 3.5015869140625e-05, + "step": 22948, + "training_step_time": 0.11403203010559082 + }, + { + "epoch": 3.501739501953125e-05, + "model_forward_time": 0.025075435638427734, + "step": 22949 + }, + { + "epoch": 3.501739501953125e-05, + "step": 22949, + "training_step_time": 0.1113893985748291 + }, + { + "epoch": 3.50189208984375e-05, + "grad_norm": 0.3043539822101593, + "learning_rate": 1.435357758543015e-05, + "loss": 0.0156, + "step": 22950 + }, + { + "epoch": 3.50189208984375e-05, + "model_forward_time": 0.025061607360839844, + "step": 22950 + }, + { + "epoch": 3.50189208984375e-05, + "step": 22950, + "training_step_time": 0.10552120208740234 + }, + { + "epoch": 3.502044677734375e-05, + "model_forward_time": 0.025180339813232422, + "step": 22951 + }, + { + "epoch": 3.502044677734375e-05, + "step": 22951, + "training_step_time": 0.10758447647094727 + }, + { + "epoch": 3.502197265625e-05, + "model_forward_time": 0.025279760360717773, + "step": 22952 + }, + { + "epoch": 3.502197265625e-05, + "step": 22952, + "training_step_time": 0.10790395736694336 + }, + { + "epoch": 3.502349853515625e-05, + "model_forward_time": 0.02527451515197754, + "step": 22953 + }, + { + "epoch": 3.502349853515625e-05, + "step": 22953, + "training_step_time": 0.10855746269226074 + }, + { + "epoch": 3.50250244140625e-05, + "model_forward_time": 0.025063276290893555, + "step": 22954 + }, + { + "epoch": 3.50250244140625e-05, + "step": 22954, + "training_step_time": 0.10828876495361328 + }, + { + "epoch": 3.502655029296875e-05, + "model_forward_time": 0.024831771850585938, + "step": 22955 + }, + { + "epoch": 3.502655029296875e-05, + "step": 22955, + "training_step_time": 0.1081075668334961 + }, + { + "epoch": 3.5028076171875e-05, + "model_forward_time": 0.025151968002319336, + "step": 22956 + }, + { + "epoch": 3.5028076171875e-05, + "step": 22956, + "training_step_time": 0.1655285358428955 + }, + { + "epoch": 3.502960205078125e-05, + "model_forward_time": 0.02429962158203125, + "step": 22957 + }, + { + "epoch": 3.502960205078125e-05, + "step": 22957, + "training_step_time": 0.14154386520385742 + }, + { + "epoch": 3.50311279296875e-05, + "model_forward_time": 0.0243990421295166, + "step": 22958 + }, + { + "epoch": 3.50311279296875e-05, + "step": 22958, + "training_step_time": 0.1049356460571289 + }, + { + "epoch": 3.503265380859375e-05, + "model_forward_time": 0.025194644927978516, + "step": 22959 + }, + { + "epoch": 3.503265380859375e-05, + "step": 22959, + "training_step_time": 0.1134331226348877 + }, + { + "epoch": 3.50341796875e-05, + "grad_norm": 0.10754483938217163, + "learning_rate": 1.4314950121544756e-05, + "loss": 0.0034, + "step": 22960 + }, + { + "epoch": 3.50341796875e-05, + "model_forward_time": 0.02427220344543457, + "step": 22960 + }, + { + "epoch": 3.50341796875e-05, + "step": 22960, + "training_step_time": 0.10371589660644531 + }, + { + "epoch": 3.503570556640625e-05, + "model_forward_time": 0.02450108528137207, + "step": 22961 + }, + { + "epoch": 3.503570556640625e-05, + "step": 22961, + "training_step_time": 0.17071270942687988 + }, + { + "epoch": 3.50372314453125e-05, + "model_forward_time": 0.02419447898864746, + "step": 22962 + }, + { + "epoch": 3.50372314453125e-05, + "step": 22962, + "training_step_time": 0.1515488624572754 + }, + { + "epoch": 3.503875732421875e-05, + "model_forward_time": 0.0246279239654541, + "step": 22963 + }, + { + "epoch": 3.503875732421875e-05, + "step": 22963, + "training_step_time": 0.11121082305908203 + }, + { + "epoch": 3.5040283203125e-05, + "model_forward_time": 0.025051116943359375, + "step": 22964 + }, + { + "epoch": 3.5040283203125e-05, + "step": 22964, + "training_step_time": 0.13140201568603516 + }, + { + "epoch": 3.504180908203125e-05, + "model_forward_time": 0.024945974349975586, + "step": 22965 + }, + { + "epoch": 3.504180908203125e-05, + "step": 22965, + "training_step_time": 0.21582770347595215 + }, + { + "epoch": 3.50433349609375e-05, + "model_forward_time": 0.024689674377441406, + "step": 22966 + }, + { + "epoch": 3.50433349609375e-05, + "step": 22966, + "training_step_time": 0.10262036323547363 + }, + { + "epoch": 3.504486083984375e-05, + "model_forward_time": 0.024833202362060547, + "step": 22967 + }, + { + "epoch": 3.504486083984375e-05, + "step": 22967, + "training_step_time": 0.10364842414855957 + }, + { + "epoch": 3.504638671875e-05, + "model_forward_time": 0.02597808837890625, + "step": 22968 + }, + { + "epoch": 3.504638671875e-05, + "step": 22968, + "training_step_time": 0.10544991493225098 + }, + { + "epoch": 3.504791259765625e-05, + "model_forward_time": 0.025086164474487305, + "step": 22969 + }, + { + "epoch": 3.504791259765625e-05, + "step": 22969, + "training_step_time": 0.1083533763885498 + }, + { + "epoch": 3.50494384765625e-05, + "grad_norm": 0.10444493591785431, + "learning_rate": 1.4276366018359844e-05, + "loss": 0.0038, + "step": 22970 + }, + { + "epoch": 3.50494384765625e-05, + "model_forward_time": 0.024846792221069336, + "step": 22970 + }, + { + "epoch": 3.50494384765625e-05, + "step": 22970, + "training_step_time": 0.1086270809173584 + }, + { + "epoch": 3.505096435546875e-05, + "model_forward_time": 0.025808334350585938, + "step": 22971 + }, + { + "epoch": 3.505096435546875e-05, + "step": 22971, + "training_step_time": 0.1075587272644043 + }, + { + "epoch": 3.5052490234375e-05, + "model_forward_time": 0.02546072006225586, + "step": 22972 + }, + { + "epoch": 3.5052490234375e-05, + "step": 22972, + "training_step_time": 0.10783171653747559 + }, + { + "epoch": 3.505401611328125e-05, + "model_forward_time": 0.026033401489257812, + "step": 22973 + }, + { + "epoch": 3.505401611328125e-05, + "step": 22973, + "training_step_time": 0.11246109008789062 + }, + { + "epoch": 3.50555419921875e-05, + "model_forward_time": 0.025255203247070312, + "step": 22974 + }, + { + "epoch": 3.50555419921875e-05, + "step": 22974, + "training_step_time": 0.10934209823608398 + }, + { + "epoch": 3.505706787109375e-05, + "model_forward_time": 0.02425384521484375, + "step": 22975 + }, + { + "epoch": 3.505706787109375e-05, + "step": 22975, + "training_step_time": 0.10840249061584473 + }, + { + "epoch": 3.505859375e-05, + "model_forward_time": 0.025038480758666992, + "step": 22976 + }, + { + "epoch": 3.505859375e-05, + "step": 22976, + "training_step_time": 0.10570311546325684 + }, + { + "epoch": 3.506011962890625e-05, + "model_forward_time": 0.025494098663330078, + "step": 22977 + }, + { + "epoch": 3.506011962890625e-05, + "step": 22977, + "training_step_time": 0.16730833053588867 + }, + { + "epoch": 3.50616455078125e-05, + "model_forward_time": 0.02465510368347168, + "step": 22978 + }, + { + "epoch": 3.50616455078125e-05, + "step": 22978, + "training_step_time": 0.2264251708984375 + }, + { + "epoch": 3.506317138671875e-05, + "model_forward_time": 0.02477860450744629, + "step": 22979 + }, + { + "epoch": 3.506317138671875e-05, + "step": 22979, + "training_step_time": 0.11684513092041016 + }, + { + "epoch": 3.5064697265625e-05, + "grad_norm": 0.10280394554138184, + "learning_rate": 1.4237825322758736e-05, + "loss": 0.0067, + "step": 22980 + }, + { + "epoch": 3.5064697265625e-05, + "model_forward_time": 0.02480792999267578, + "step": 22980 + }, + { + "epoch": 3.5064697265625e-05, + "step": 22980, + "training_step_time": 0.1286776065826416 + }, + { + "epoch": 3.506622314453125e-05, + "model_forward_time": 0.02496623992919922, + "step": 22981 + }, + { + "epoch": 3.506622314453125e-05, + "step": 22981, + "training_step_time": 0.11635470390319824 + }, + { + "epoch": 3.50677490234375e-05, + "model_forward_time": 0.02700185775756836, + "step": 22982 + }, + { + "epoch": 3.50677490234375e-05, + "step": 22982, + "training_step_time": 0.12810301780700684 + }, + { + "epoch": 3.506927490234375e-05, + "model_forward_time": 0.0252227783203125, + "step": 22983 + }, + { + "epoch": 3.506927490234375e-05, + "step": 22983, + "training_step_time": 0.1192173957824707 + }, + { + "epoch": 3.507080078125e-05, + "model_forward_time": 0.026024341583251953, + "step": 22984 + }, + { + "epoch": 3.507080078125e-05, + "step": 22984, + "training_step_time": 0.15346455574035645 + }, + { + "epoch": 3.507232666015625e-05, + "model_forward_time": 0.024796485900878906, + "step": 22985 + }, + { + "epoch": 3.507232666015625e-05, + "step": 22985, + "training_step_time": 0.10400652885437012 + }, + { + "epoch": 3.50738525390625e-05, + "model_forward_time": 0.024823665618896484, + "step": 22986 + }, + { + "epoch": 3.50738525390625e-05, + "step": 22986, + "training_step_time": 0.1661064624786377 + }, + { + "epoch": 3.507537841796875e-05, + "model_forward_time": 0.02460503578186035, + "step": 22987 + }, + { + "epoch": 3.507537841796875e-05, + "step": 22987, + "training_step_time": 0.1453406810760498 + }, + { + "epoch": 3.5076904296875e-05, + "model_forward_time": 0.02445054054260254, + "step": 22988 + }, + { + "epoch": 3.5076904296875e-05, + "step": 22988, + "training_step_time": 0.12819671630859375 + }, + { + "epoch": 3.507843017578125e-05, + "model_forward_time": 0.02554774284362793, + "step": 22989 + }, + { + "epoch": 3.507843017578125e-05, + "step": 22989, + "training_step_time": 0.18590402603149414 + }, + { + "epoch": 3.50799560546875e-05, + "grad_norm": 0.14856906235218048, + "learning_rate": 1.4199328081572e-05, + "loss": 0.0086, + "step": 22990 + }, + { + "epoch": 3.50799560546875e-05, + "model_forward_time": 0.024710655212402344, + "step": 22990 + }, + { + "epoch": 3.50799560546875e-05, + "step": 22990, + "training_step_time": 0.10460996627807617 + }, + { + "epoch": 3.508148193359375e-05, + "model_forward_time": 0.024793624877929688, + "step": 22991 + }, + { + "epoch": 3.508148193359375e-05, + "step": 22991, + "training_step_time": 0.10935020446777344 + }, + { + "epoch": 3.50830078125e-05, + "model_forward_time": 0.025073528289794922, + "step": 22992 + }, + { + "epoch": 3.50830078125e-05, + "step": 22992, + "training_step_time": 0.11066222190856934 + }, + { + "epoch": 3.508453369140625e-05, + "model_forward_time": 0.024153947830200195, + "step": 22993 + }, + { + "epoch": 3.508453369140625e-05, + "step": 22993, + "training_step_time": 0.11229610443115234 + }, + { + "epoch": 3.50860595703125e-05, + "model_forward_time": 0.024278879165649414, + "step": 22994 + }, + { + "epoch": 3.50860595703125e-05, + "step": 22994, + "training_step_time": 0.10435652732849121 + }, + { + "epoch": 3.508758544921875e-05, + "model_forward_time": 0.024259567260742188, + "step": 22995 + }, + { + "epoch": 3.508758544921875e-05, + "step": 22995, + "training_step_time": 0.10274624824523926 + }, + { + "epoch": 3.5089111328125e-05, + "model_forward_time": 0.025200843811035156, + "step": 22996 + }, + { + "epoch": 3.5089111328125e-05, + "step": 22996, + "training_step_time": 0.11021280288696289 + }, + { + "epoch": 3.509063720703125e-05, + "model_forward_time": 0.025528669357299805, + "step": 22997 + }, + { + "epoch": 3.509063720703125e-05, + "step": 22997, + "training_step_time": 0.10526347160339355 + }, + { + "epoch": 3.50921630859375e-05, + "model_forward_time": 0.024500370025634766, + "step": 22998 + }, + { + "epoch": 3.50921630859375e-05, + "step": 22998, + "training_step_time": 0.10307455062866211 + }, + { + "epoch": 3.509368896484375e-05, + "model_forward_time": 0.027987241744995117, + "step": 22999 + }, + { + "epoch": 3.509368896484375e-05, + "step": 22999, + "training_step_time": 0.10934042930603027 + }, + { + "epoch": 3.509521484375e-05, + "grad_norm": 0.25646406412124634, + "learning_rate": 1.4160874341577446e-05, + "loss": 0.0046, + "step": 23000 + }, + { + "epoch": 3.509521484375e-05, + "model_forward_time": 0.02539801597595215, + "step": 23000 + }, + { + "epoch": 3.509521484375e-05, + "step": 23000, + "training_step_time": 0.0993657112121582 + }, + { + "epoch": 3.509674072265625e-05, + "model_forward_time": 0.02325606346130371, + "step": 23001 + }, + { + "epoch": 3.509674072265625e-05, + "step": 23001, + "training_step_time": 0.09928417205810547 + }, + { + "epoch": 3.50982666015625e-05, + "model_forward_time": 0.02511000633239746, + "step": 23002 + }, + { + "epoch": 3.50982666015625e-05, + "step": 23002, + "training_step_time": 0.1422569751739502 + }, + { + "epoch": 3.509979248046875e-05, + "model_forward_time": 0.025448083877563477, + "step": 23003 + }, + { + "epoch": 3.509979248046875e-05, + "step": 23003, + "training_step_time": 0.10786032676696777 + }, + { + "epoch": 3.5101318359375e-05, + "model_forward_time": 0.02525806427001953, + "step": 23004 + }, + { + "epoch": 3.5101318359375e-05, + "step": 23004, + "training_step_time": 0.1040186882019043 + }, + { + "epoch": 3.510284423828125e-05, + "model_forward_time": 0.02498149871826172, + "step": 23005 + }, + { + "epoch": 3.510284423828125e-05, + "step": 23005, + "training_step_time": 0.1151738166809082 + }, + { + "epoch": 3.51043701171875e-05, + "model_forward_time": 0.025829315185546875, + "step": 23006 + }, + { + "epoch": 3.51043701171875e-05, + "step": 23006, + "training_step_time": 0.1173093318939209 + }, + { + "epoch": 3.510589599609375e-05, + "model_forward_time": 0.025655269622802734, + "step": 23007 + }, + { + "epoch": 3.510589599609375e-05, + "step": 23007, + "training_step_time": 0.18257522583007812 + }, + { + "epoch": 3.5107421875e-05, + "model_forward_time": 0.02471780776977539, + "step": 23008 + }, + { + "epoch": 3.5107421875e-05, + "step": 23008, + "training_step_time": 0.12736892700195312 + }, + { + "epoch": 3.510894775390625e-05, + "model_forward_time": 0.0252838134765625, + "step": 23009 + }, + { + "epoch": 3.510894775390625e-05, + "step": 23009, + "training_step_time": 0.16756510734558105 + }, + { + "epoch": 3.51104736328125e-05, + "grad_norm": 0.09030667692422867, + "learning_rate": 1.412246414949997e-05, + "loss": 0.0045, + "step": 23010 + }, + { + "epoch": 3.51104736328125e-05, + "model_forward_time": 0.0251157283782959, + "step": 23010 + }, + { + "epoch": 3.51104736328125e-05, + "step": 23010, + "training_step_time": 0.10673999786376953 + }, + { + "epoch": 3.511199951171875e-05, + "model_forward_time": 0.024898529052734375, + "step": 23011 + }, + { + "epoch": 3.511199951171875e-05, + "step": 23011, + "training_step_time": 0.18826651573181152 + }, + { + "epoch": 3.5113525390625e-05, + "model_forward_time": 0.024615049362182617, + "step": 23012 + }, + { + "epoch": 3.5113525390625e-05, + "step": 23012, + "training_step_time": 0.10674023628234863 + }, + { + "epoch": 3.511505126953125e-05, + "model_forward_time": 0.024814844131469727, + "step": 23013 + }, + { + "epoch": 3.511505126953125e-05, + "step": 23013, + "training_step_time": 0.10528326034545898 + }, + { + "epoch": 3.51165771484375e-05, + "model_forward_time": 0.025429725646972656, + "step": 23014 + }, + { + "epoch": 3.51165771484375e-05, + "step": 23014, + "training_step_time": 0.1090695858001709 + }, + { + "epoch": 3.511810302734375e-05, + "model_forward_time": 0.025153160095214844, + "step": 23015 + }, + { + "epoch": 3.511810302734375e-05, + "step": 23015, + "training_step_time": 0.10849857330322266 + }, + { + "epoch": 3.511962890625e-05, + "model_forward_time": 0.02527451515197754, + "step": 23016 + }, + { + "epoch": 3.511962890625e-05, + "step": 23016, + "training_step_time": 0.10800957679748535 + }, + { + "epoch": 3.512115478515625e-05, + "model_forward_time": 0.02562713623046875, + "step": 23017 + }, + { + "epoch": 3.512115478515625e-05, + "step": 23017, + "training_step_time": 0.10434126853942871 + }, + { + "epoch": 3.51226806640625e-05, + "model_forward_time": 0.0255887508392334, + "step": 23018 + }, + { + "epoch": 3.51226806640625e-05, + "step": 23018, + "training_step_time": 0.10509490966796875 + }, + { + "epoch": 3.512420654296875e-05, + "model_forward_time": 0.025323867797851562, + "step": 23019 + }, + { + "epoch": 3.512420654296875e-05, + "step": 23019, + "training_step_time": 0.11030936241149902 + }, + { + "epoch": 3.5125732421875e-05, + "grad_norm": 0.21688659489154816, + "learning_rate": 1.4084097552011571e-05, + "loss": 0.0045, + "step": 23020 + }, + { + "epoch": 3.5125732421875e-05, + "model_forward_time": 0.023921966552734375, + "step": 23020 + }, + { + "epoch": 3.5125732421875e-05, + "step": 23020, + "training_step_time": 0.10755419731140137 + }, + { + "epoch": 3.512725830078125e-05, + "model_forward_time": 0.024764537811279297, + "step": 23021 + }, + { + "epoch": 3.512725830078125e-05, + "step": 23021, + "training_step_time": 0.10832977294921875 + }, + { + "epoch": 3.51287841796875e-05, + "model_forward_time": 0.025289535522460938, + "step": 23022 + }, + { + "epoch": 3.51287841796875e-05, + "step": 23022, + "training_step_time": 0.10703611373901367 + }, + { + "epoch": 3.513031005859375e-05, + "model_forward_time": 0.025289058685302734, + "step": 23023 + }, + { + "epoch": 3.513031005859375e-05, + "step": 23023, + "training_step_time": 0.10833477973937988 + }, + { + "epoch": 3.51318359375e-05, + "model_forward_time": 0.025257349014282227, + "step": 23024 + }, + { + "epoch": 3.51318359375e-05, + "step": 23024, + "training_step_time": 0.1061863899230957 + }, + { + "epoch": 3.513336181640625e-05, + "model_forward_time": 0.024889469146728516, + "step": 23025 + }, + { + "epoch": 3.513336181640625e-05, + "step": 23025, + "training_step_time": 0.13547468185424805 + }, + { + "epoch": 3.51348876953125e-05, + "model_forward_time": 0.025437593460083008, + "step": 23026 + }, + { + "epoch": 3.51348876953125e-05, + "step": 23026, + "training_step_time": 0.10654854774475098 + }, + { + "epoch": 3.513641357421875e-05, + "model_forward_time": 0.025429964065551758, + "step": 23027 + }, + { + "epoch": 3.513641357421875e-05, + "step": 23027, + "training_step_time": 0.1110081672668457 + }, + { + "epoch": 3.5137939453125e-05, + "model_forward_time": 0.02485942840576172, + "step": 23028 + }, + { + "epoch": 3.5137939453125e-05, + "step": 23028, + "training_step_time": 0.131011962890625 + }, + { + "epoch": 3.513946533203125e-05, + "model_forward_time": 0.02566695213317871, + "step": 23029 + }, + { + "epoch": 3.513946533203125e-05, + "step": 23029, + "training_step_time": 0.11715555191040039 + }, + { + "epoch": 3.51409912109375e-05, + "grad_norm": 0.1771112084388733, + "learning_rate": 1.4045774595731315e-05, + "loss": 0.0084, + "step": 23030 + }, + { + "epoch": 3.51409912109375e-05, + "model_forward_time": 0.02570199966430664, + "step": 23030 + }, + { + "epoch": 3.51409912109375e-05, + "step": 23030, + "training_step_time": 0.11069273948669434 + }, + { + "epoch": 3.514251708984375e-05, + "model_forward_time": 0.025145530700683594, + "step": 23031 + }, + { + "epoch": 3.514251708984375e-05, + "step": 23031, + "training_step_time": 0.1827259063720703 + }, + { + "epoch": 3.514404296875e-05, + "model_forward_time": 0.024221181869506836, + "step": 23032 + }, + { + "epoch": 3.514404296875e-05, + "step": 23032, + "training_step_time": 0.11976385116577148 + }, + { + "epoch": 3.514556884765625e-05, + "model_forward_time": 0.024445056915283203, + "step": 23033 + }, + { + "epoch": 3.514556884765625e-05, + "step": 23033, + "training_step_time": 0.20771121978759766 + }, + { + "epoch": 3.51470947265625e-05, + "model_forward_time": 0.024549007415771484, + "step": 23034 + }, + { + "epoch": 3.51470947265625e-05, + "step": 23034, + "training_step_time": 0.10373735427856445 + }, + { + "epoch": 3.514862060546875e-05, + "model_forward_time": 0.0247650146484375, + "step": 23035 + }, + { + "epoch": 3.514862060546875e-05, + "step": 23035, + "training_step_time": 0.11725425720214844 + }, + { + "epoch": 3.5150146484375e-05, + "model_forward_time": 0.0253908634185791, + "step": 23036 + }, + { + "epoch": 3.5150146484375e-05, + "step": 23036, + "training_step_time": 0.16239690780639648 + }, + { + "epoch": 3.515167236328125e-05, + "model_forward_time": 0.02458786964416504, + "step": 23037 + }, + { + "epoch": 3.515167236328125e-05, + "step": 23037, + "training_step_time": 0.1058499813079834 + }, + { + "epoch": 3.51531982421875e-05, + "model_forward_time": 0.02828216552734375, + "step": 23038 + }, + { + "epoch": 3.51531982421875e-05, + "step": 23038, + "training_step_time": 0.11051392555236816 + }, + { + "epoch": 3.515472412109375e-05, + "model_forward_time": 0.025793075561523438, + "step": 23039 + }, + { + "epoch": 3.515472412109375e-05, + "step": 23039, + "training_step_time": 0.10580945014953613 + }, + { + "epoch": 3.515625e-05, + "grad_norm": 0.3599991500377655, + "learning_rate": 1.4007495327225162e-05, + "loss": 0.0069, + "step": 23040 + }, + { + "epoch": 3.515625e-05, + "model_forward_time": 0.025115013122558594, + "step": 23040 + }, + { + "epoch": 3.515625e-05, + "step": 23040, + "training_step_time": 0.10543584823608398 + }, + { + "epoch": 3.515777587890625e-05, + "model_forward_time": 0.024872779846191406, + "step": 23041 + }, + { + "epoch": 3.515777587890625e-05, + "step": 23041, + "training_step_time": 0.10440373420715332 + }, + { + "epoch": 3.51593017578125e-05, + "model_forward_time": 0.02507781982421875, + "step": 23042 + }, + { + "epoch": 3.51593017578125e-05, + "step": 23042, + "training_step_time": 0.10541987419128418 + }, + { + "epoch": 3.516082763671875e-05, + "model_forward_time": 0.025162696838378906, + "step": 23043 + }, + { + "epoch": 3.516082763671875e-05, + "step": 23043, + "training_step_time": 0.11371517181396484 + }, + { + "epoch": 3.5162353515625e-05, + "model_forward_time": 0.025356531143188477, + "step": 23044 + }, + { + "epoch": 3.5162353515625e-05, + "step": 23044, + "training_step_time": 0.10441851615905762 + }, + { + "epoch": 3.516387939453125e-05, + "model_forward_time": 0.025207042694091797, + "step": 23045 + }, + { + "epoch": 3.516387939453125e-05, + "step": 23045, + "training_step_time": 0.10316824913024902 + }, + { + "epoch": 3.51654052734375e-05, + "model_forward_time": 0.024935245513916016, + "step": 23046 + }, + { + "epoch": 3.51654052734375e-05, + "step": 23046, + "training_step_time": 0.10908913612365723 + }, + { + "epoch": 3.516693115234375e-05, + "model_forward_time": 0.024891376495361328, + "step": 23047 + }, + { + "epoch": 3.516693115234375e-05, + "step": 23047, + "training_step_time": 0.10562610626220703 + }, + { + "epoch": 3.516845703125e-05, + "model_forward_time": 0.024912118911743164, + "step": 23048 + }, + { + "epoch": 3.516845703125e-05, + "step": 23048, + "training_step_time": 0.12527012825012207 + }, + { + "epoch": 3.516998291015625e-05, + "model_forward_time": 0.024872303009033203, + "step": 23049 + }, + { + "epoch": 3.516998291015625e-05, + "step": 23049, + "training_step_time": 0.14543557167053223 + }, + { + "epoch": 3.51715087890625e-05, + "grad_norm": 0.32509398460388184, + "learning_rate": 1.3969259793006079e-05, + "loss": 0.0124, + "step": 23050 + }, + { + "epoch": 3.51715087890625e-05, + "model_forward_time": 0.024800777435302734, + "step": 23050 + }, + { + "epoch": 3.51715087890625e-05, + "step": 23050, + "training_step_time": 0.10483479499816895 + }, + { + "epoch": 3.517303466796875e-05, + "model_forward_time": 0.024719953536987305, + "step": 23051 + }, + { + "epoch": 3.517303466796875e-05, + "step": 23051, + "training_step_time": 0.10918498039245605 + }, + { + "epoch": 3.5174560546875e-05, + "model_forward_time": 0.025378704071044922, + "step": 23052 + }, + { + "epoch": 3.5174560546875e-05, + "step": 23052, + "training_step_time": 0.1071021556854248 + }, + { + "epoch": 3.517608642578125e-05, + "model_forward_time": 0.02514934539794922, + "step": 23053 + }, + { + "epoch": 3.517608642578125e-05, + "step": 23053, + "training_step_time": 0.10806655883789062 + }, + { + "epoch": 3.51776123046875e-05, + "model_forward_time": 0.02511119842529297, + "step": 23054 + }, + { + "epoch": 3.51776123046875e-05, + "step": 23054, + "training_step_time": 0.15474462509155273 + }, + { + "epoch": 3.517913818359375e-05, + "model_forward_time": 0.028865814208984375, + "step": 23055 + }, + { + "epoch": 3.517913818359375e-05, + "step": 23055, + "training_step_time": 0.1471726894378662 + }, + { + "epoch": 3.51806640625e-05, + "model_forward_time": 0.024655818939208984, + "step": 23056 + }, + { + "epoch": 3.51806640625e-05, + "step": 23056, + "training_step_time": 0.11116456985473633 + }, + { + "epoch": 3.518218994140625e-05, + "model_forward_time": 0.027028322219848633, + "step": 23057 + }, + { + "epoch": 3.518218994140625e-05, + "step": 23057, + "training_step_time": 0.1247415542602539 + }, + { + "epoch": 3.51837158203125e-05, + "model_forward_time": 0.025287151336669922, + "step": 23058 + }, + { + "epoch": 3.51837158203125e-05, + "step": 23058, + "training_step_time": 0.17093443870544434 + }, + { + "epoch": 3.518524169921875e-05, + "model_forward_time": 0.024871110916137695, + "step": 23059 + }, + { + "epoch": 3.518524169921875e-05, + "step": 23059, + "training_step_time": 0.10534405708312988 + }, + { + "epoch": 3.5186767578125e-05, + "grad_norm": 0.27006369829177856, + "learning_rate": 1.3931068039533823e-05, + "loss": 0.0061, + "step": 23060 + }, + { + "epoch": 3.5186767578125e-05, + "model_forward_time": 0.024516582489013672, + "step": 23060 + }, + { + "epoch": 3.5186767578125e-05, + "step": 23060, + "training_step_time": 0.10267424583435059 + }, + { + "epoch": 3.518829345703125e-05, + "model_forward_time": 0.0251312255859375, + "step": 23061 + }, + { + "epoch": 3.518829345703125e-05, + "step": 23061, + "training_step_time": 0.10782456398010254 + }, + { + "epoch": 3.51898193359375e-05, + "model_forward_time": 0.02488994598388672, + "step": 23062 + }, + { + "epoch": 3.51898193359375e-05, + "step": 23062, + "training_step_time": 0.10932207107543945 + }, + { + "epoch": 3.519134521484375e-05, + "model_forward_time": 0.024297475814819336, + "step": 23063 + }, + { + "epoch": 3.519134521484375e-05, + "step": 23063, + "training_step_time": 0.10717177391052246 + }, + { + "epoch": 3.519287109375e-05, + "model_forward_time": 0.025592803955078125, + "step": 23064 + }, + { + "epoch": 3.519287109375e-05, + "step": 23064, + "training_step_time": 0.10600733757019043 + }, + { + "epoch": 3.519439697265625e-05, + "model_forward_time": 0.024846315383911133, + "step": 23065 + }, + { + "epoch": 3.519439697265625e-05, + "step": 23065, + "training_step_time": 0.1053462028503418 + }, + { + "epoch": 3.51959228515625e-05, + "model_forward_time": 0.024908065795898438, + "step": 23066 + }, + { + "epoch": 3.51959228515625e-05, + "step": 23066, + "training_step_time": 0.10908961296081543 + }, + { + "epoch": 3.519744873046875e-05, + "model_forward_time": 0.027004003524780273, + "step": 23067 + }, + { + "epoch": 3.519744873046875e-05, + "step": 23067, + "training_step_time": 0.10745906829833984 + }, + { + "epoch": 3.5198974609375e-05, + "model_forward_time": 0.025081872940063477, + "step": 23068 + }, + { + "epoch": 3.5198974609375e-05, + "step": 23068, + "training_step_time": 0.10512280464172363 + }, + { + "epoch": 3.520050048828125e-05, + "model_forward_time": 0.025026798248291016, + "step": 23069 + }, + { + "epoch": 3.520050048828125e-05, + "step": 23069, + "training_step_time": 0.11208939552307129 + }, + { + "epoch": 3.52020263671875e-05, + "grad_norm": 0.3704567849636078, + "learning_rate": 1.389292011321498e-05, + "loss": 0.0043, + "step": 23070 + }, + { + "epoch": 3.52020263671875e-05, + "model_forward_time": 0.024922609329223633, + "step": 23070 + }, + { + "epoch": 3.52020263671875e-05, + "step": 23070, + "training_step_time": 0.10541915893554688 + }, + { + "epoch": 3.520355224609375e-05, + "model_forward_time": 0.024843931198120117, + "step": 23071 + }, + { + "epoch": 3.520355224609375e-05, + "step": 23071, + "training_step_time": 0.10347819328308105 + }, + { + "epoch": 3.5205078125e-05, + "model_forward_time": 0.024923086166381836, + "step": 23072 + }, + { + "epoch": 3.5205078125e-05, + "step": 23072, + "training_step_time": 0.20636868476867676 + }, + { + "epoch": 3.520660400390625e-05, + "model_forward_time": 0.024051427841186523, + "step": 23073 + }, + { + "epoch": 3.520660400390625e-05, + "step": 23073, + "training_step_time": 0.10555410385131836 + }, + { + "epoch": 3.52081298828125e-05, + "model_forward_time": 0.02428746223449707, + "step": 23074 + }, + { + "epoch": 3.52081298828125e-05, + "step": 23074, + "training_step_time": 0.1076200008392334 + }, + { + "epoch": 3.520965576171875e-05, + "model_forward_time": 0.02544403076171875, + "step": 23075 + }, + { + "epoch": 3.520965576171875e-05, + "step": 23075, + "training_step_time": 0.13357901573181152 + }, + { + "epoch": 3.5211181640625e-05, + "model_forward_time": 0.024820327758789062, + "step": 23076 + }, + { + "epoch": 3.5211181640625e-05, + "step": 23076, + "training_step_time": 0.12658071517944336 + }, + { + "epoch": 3.521270751953125e-05, + "model_forward_time": 0.024715423583984375, + "step": 23077 + }, + { + "epoch": 3.521270751953125e-05, + "step": 23077, + "training_step_time": 0.10562872886657715 + }, + { + "epoch": 3.52142333984375e-05, + "model_forward_time": 0.024915218353271484, + "step": 23078 + }, + { + "epoch": 3.52142333984375e-05, + "step": 23078, + "training_step_time": 0.12357902526855469 + }, + { + "epoch": 3.521575927734375e-05, + "model_forward_time": 0.02485346794128418, + "step": 23079 + }, + { + "epoch": 3.521575927734375e-05, + "step": 23079, + "training_step_time": 0.10692644119262695 + }, + { + "epoch": 3.521728515625e-05, + "grad_norm": 0.35327544808387756, + "learning_rate": 1.385481606040287e-05, + "loss": 0.0061, + "step": 23080 + }, + { + "epoch": 3.521728515625e-05, + "model_forward_time": 0.024805307388305664, + "step": 23080 + }, + { + "epoch": 3.521728515625e-05, + "step": 23080, + "training_step_time": 0.20168209075927734 + }, + { + "epoch": 3.521881103515625e-05, + "model_forward_time": 0.024309635162353516, + "step": 23081 + }, + { + "epoch": 3.521881103515625e-05, + "step": 23081, + "training_step_time": 0.10931873321533203 + }, + { + "epoch": 3.52203369140625e-05, + "model_forward_time": 0.0267789363861084, + "step": 23082 + }, + { + "epoch": 3.52203369140625e-05, + "step": 23082, + "training_step_time": 0.12189364433288574 + }, + { + "epoch": 3.522186279296875e-05, + "model_forward_time": 0.025970458984375, + "step": 23083 + }, + { + "epoch": 3.522186279296875e-05, + "step": 23083, + "training_step_time": 0.18715143203735352 + }, + { + "epoch": 3.5223388671875e-05, + "model_forward_time": 0.02407097816467285, + "step": 23084 + }, + { + "epoch": 3.5223388671875e-05, + "step": 23084, + "training_step_time": 0.10316228866577148 + }, + { + "epoch": 3.522491455078125e-05, + "model_forward_time": 0.025057315826416016, + "step": 23085 + }, + { + "epoch": 3.522491455078125e-05, + "step": 23085, + "training_step_time": 0.10377860069274902 + }, + { + "epoch": 3.52264404296875e-05, + "model_forward_time": 0.02531886100769043, + "step": 23086 + }, + { + "epoch": 3.52264404296875e-05, + "step": 23086, + "training_step_time": 0.10595059394836426 + }, + { + "epoch": 3.522796630859375e-05, + "model_forward_time": 0.02520012855529785, + "step": 23087 + }, + { + "epoch": 3.522796630859375e-05, + "step": 23087, + "training_step_time": 0.10639595985412598 + }, + { + "epoch": 3.52294921875e-05, + "model_forward_time": 0.025005817413330078, + "step": 23088 + }, + { + "epoch": 3.52294921875e-05, + "step": 23088, + "training_step_time": 0.10934972763061523 + }, + { + "epoch": 3.523101806640625e-05, + "model_forward_time": 0.025043010711669922, + "step": 23089 + }, + { + "epoch": 3.523101806640625e-05, + "step": 23089, + "training_step_time": 0.10528826713562012 + }, + { + "epoch": 3.52325439453125e-05, + "grad_norm": 0.1638215035200119, + "learning_rate": 1.3816755927397502e-05, + "loss": 0.0049, + "step": 23090 + }, + { + "epoch": 3.52325439453125e-05, + "model_forward_time": 0.026592016220092773, + "step": 23090 + }, + { + "epoch": 3.52325439453125e-05, + "step": 23090, + "training_step_time": 0.1082158088684082 + }, + { + "epoch": 3.523406982421875e-05, + "model_forward_time": 0.025236129760742188, + "step": 23091 + }, + { + "epoch": 3.523406982421875e-05, + "step": 23091, + "training_step_time": 0.10436344146728516 + }, + { + "epoch": 3.5235595703125e-05, + "model_forward_time": 0.025329113006591797, + "step": 23092 + }, + { + "epoch": 3.5235595703125e-05, + "step": 23092, + "training_step_time": 0.13971710205078125 + }, + { + "epoch": 3.523712158203125e-05, + "model_forward_time": 0.025399446487426758, + "step": 23093 + }, + { + "epoch": 3.523712158203125e-05, + "step": 23093, + "training_step_time": 0.16233420372009277 + }, + { + "epoch": 3.52386474609375e-05, + "model_forward_time": 0.024213075637817383, + "step": 23094 + }, + { + "epoch": 3.52386474609375e-05, + "step": 23094, + "training_step_time": 0.15911197662353516 + }, + { + "epoch": 3.524017333984375e-05, + "model_forward_time": 0.024246692657470703, + "step": 23095 + }, + { + "epoch": 3.524017333984375e-05, + "step": 23095, + "training_step_time": 0.14734292030334473 + }, + { + "epoch": 3.524169921875e-05, + "model_forward_time": 0.02434086799621582, + "step": 23096 + }, + { + "epoch": 3.524169921875e-05, + "step": 23096, + "training_step_time": 0.19723749160766602 + }, + { + "epoch": 3.524322509765625e-05, + "model_forward_time": 0.024396181106567383, + "step": 23097 + }, + { + "epoch": 3.524322509765625e-05, + "step": 23097, + "training_step_time": 0.12333488464355469 + }, + { + "epoch": 3.52447509765625e-05, + "model_forward_time": 0.025194406509399414, + "step": 23098 + }, + { + "epoch": 3.52447509765625e-05, + "step": 23098, + "training_step_time": 0.1395728588104248 + }, + { + "epoch": 3.524627685546875e-05, + "model_forward_time": 0.02460932731628418, + "step": 23099 + }, + { + "epoch": 3.524627685546875e-05, + "step": 23099, + "training_step_time": 0.1540238857269287 + }, + { + "epoch": 3.5247802734375e-05, + "grad_norm": 0.16723492741584778, + "learning_rate": 1.3778739760445552e-05, + "loss": 0.0095, + "step": 23100 + }, + { + "epoch": 3.5247802734375e-05, + "model_forward_time": 0.026525259017944336, + "step": 23100 + }, + { + "epoch": 3.5247802734375e-05, + "step": 23100, + "training_step_time": 0.1522533893585205 + }, + { + "epoch": 3.524932861328125e-05, + "model_forward_time": 0.0241241455078125, + "step": 23101 + }, + { + "epoch": 3.524932861328125e-05, + "step": 23101, + "training_step_time": 0.11867475509643555 + }, + { + "epoch": 3.52508544921875e-05, + "model_forward_time": 0.025692462921142578, + "step": 23102 + }, + { + "epoch": 3.52508544921875e-05, + "step": 23102, + "training_step_time": 0.11806654930114746 + }, + { + "epoch": 3.525238037109375e-05, + "model_forward_time": 0.027798891067504883, + "step": 23103 + }, + { + "epoch": 3.525238037109375e-05, + "step": 23103, + "training_step_time": 0.1245884895324707 + }, + { + "epoch": 3.525390625e-05, + "model_forward_time": 0.025104522705078125, + "step": 23104 + }, + { + "epoch": 3.525390625e-05, + "step": 23104, + "training_step_time": 0.10880351066589355 + }, + { + "epoch": 3.525543212890625e-05, + "model_forward_time": 0.02618575096130371, + "step": 23105 + }, + { + "epoch": 3.525543212890625e-05, + "step": 23105, + "training_step_time": 0.11228346824645996 + }, + { + "epoch": 3.52569580078125e-05, + "model_forward_time": 0.025060653686523438, + "step": 23106 + }, + { + "epoch": 3.52569580078125e-05, + "step": 23106, + "training_step_time": 0.10997462272644043 + }, + { + "epoch": 3.525848388671875e-05, + "model_forward_time": 0.024953842163085938, + "step": 23107 + }, + { + "epoch": 3.525848388671875e-05, + "step": 23107, + "training_step_time": 0.1102900505065918 + }, + { + "epoch": 3.5260009765625e-05, + "model_forward_time": 0.02556014060974121, + "step": 23108 + }, + { + "epoch": 3.5260009765625e-05, + "step": 23108, + "training_step_time": 0.10829019546508789 + }, + { + "epoch": 3.526153564453125e-05, + "model_forward_time": 0.024992704391479492, + "step": 23109 + }, + { + "epoch": 3.526153564453125e-05, + "step": 23109, + "training_step_time": 0.10482978820800781 + }, + { + "epoch": 3.52630615234375e-05, + "grad_norm": 0.12765149772167206, + "learning_rate": 1.374076760574024e-05, + "loss": 0.01, + "step": 23110 + }, + { + "epoch": 3.52630615234375e-05, + "model_forward_time": 0.025246143341064453, + "step": 23110 + }, + { + "epoch": 3.52630615234375e-05, + "step": 23110, + "training_step_time": 0.10931563377380371 + }, + { + "epoch": 3.526458740234375e-05, + "model_forward_time": 0.025652408599853516, + "step": 23111 + }, + { + "epoch": 3.526458740234375e-05, + "step": 23111, + "training_step_time": 0.10915565490722656 + }, + { + "epoch": 3.526611328125e-05, + "model_forward_time": 0.025240182876586914, + "step": 23112 + }, + { + "epoch": 3.526611328125e-05, + "step": 23112, + "training_step_time": 0.10574889183044434 + }, + { + "epoch": 3.526763916015625e-05, + "model_forward_time": 0.024881601333618164, + "step": 23113 + }, + { + "epoch": 3.526763916015625e-05, + "step": 23113, + "training_step_time": 0.10488080978393555 + }, + { + "epoch": 3.52691650390625e-05, + "model_forward_time": 0.02550220489501953, + "step": 23114 + }, + { + "epoch": 3.52691650390625e-05, + "step": 23114, + "training_step_time": 0.10786986351013184 + }, + { + "epoch": 3.527069091796875e-05, + "model_forward_time": 0.025188684463500977, + "step": 23115 + }, + { + "epoch": 3.527069091796875e-05, + "step": 23115, + "training_step_time": 0.1053304672241211 + }, + { + "epoch": 3.5272216796875e-05, + "model_forward_time": 0.025440454483032227, + "step": 23116 + }, + { + "epoch": 3.5272216796875e-05, + "step": 23116, + "training_step_time": 0.10437893867492676 + }, + { + "epoch": 3.527374267578125e-05, + "model_forward_time": 0.025090932846069336, + "step": 23117 + }, + { + "epoch": 3.527374267578125e-05, + "step": 23117, + "training_step_time": 0.10689616203308105 + }, + { + "epoch": 3.52752685546875e-05, + "model_forward_time": 0.025070667266845703, + "step": 23118 + }, + { + "epoch": 3.52752685546875e-05, + "step": 23118, + "training_step_time": 0.1650702953338623 + }, + { + "epoch": 3.527679443359375e-05, + "model_forward_time": 0.024255752563476562, + "step": 23119 + }, + { + "epoch": 3.527679443359375e-05, + "step": 23119, + "training_step_time": 0.10512971878051758 + }, + { + "epoch": 3.52783203125e-05, + "grad_norm": 0.18373161554336548, + "learning_rate": 1.3702839509421305e-05, + "loss": 0.007, + "step": 23120 + }, + { + "epoch": 3.52783203125e-05, + "model_forward_time": 0.02522873878479004, + "step": 23120 + }, + { + "epoch": 3.52783203125e-05, + "step": 23120, + "training_step_time": 0.10798978805541992 + }, + { + "epoch": 3.527984619140625e-05, + "model_forward_time": 0.025073528289794922, + "step": 23121 + }, + { + "epoch": 3.527984619140625e-05, + "step": 23121, + "training_step_time": 0.12377691268920898 + }, + { + "epoch": 3.52813720703125e-05, + "model_forward_time": 0.02470088005065918, + "step": 23122 + }, + { + "epoch": 3.52813720703125e-05, + "step": 23122, + "training_step_time": 0.13213658332824707 + }, + { + "epoch": 3.528289794921875e-05, + "model_forward_time": 0.02543163299560547, + "step": 23123 + }, + { + "epoch": 3.528289794921875e-05, + "step": 23123, + "training_step_time": 0.15021991729736328 + }, + { + "epoch": 3.5284423828125e-05, + "model_forward_time": 0.024410247802734375, + "step": 23124 + }, + { + "epoch": 3.5284423828125e-05, + "step": 23124, + "training_step_time": 0.17357683181762695 + }, + { + "epoch": 3.528594970703125e-05, + "model_forward_time": 0.024524688720703125, + "step": 23125 + }, + { + "epoch": 3.528594970703125e-05, + "step": 23125, + "training_step_time": 0.14800596237182617 + }, + { + "epoch": 3.52874755859375e-05, + "model_forward_time": 0.02429962158203125, + "step": 23126 + }, + { + "epoch": 3.52874755859375e-05, + "step": 23126, + "training_step_time": 0.14898681640625 + }, + { + "epoch": 3.528900146484375e-05, + "model_forward_time": 0.024982690811157227, + "step": 23127 + }, + { + "epoch": 3.528900146484375e-05, + "step": 23127, + "training_step_time": 0.15599393844604492 + }, + { + "epoch": 3.529052734375e-05, + "model_forward_time": 0.024388551712036133, + "step": 23128 + }, + { + "epoch": 3.529052734375e-05, + "step": 23128, + "training_step_time": 0.10816407203674316 + }, + { + "epoch": 3.529205322265625e-05, + "model_forward_time": 0.024954557418823242, + "step": 23129 + }, + { + "epoch": 3.529205322265625e-05, + "step": 23129, + "training_step_time": 0.10117244720458984 + }, + { + "epoch": 3.52935791015625e-05, + "grad_norm": 0.10926108062267303, + "learning_rate": 1.3664955517574968e-05, + "loss": 0.005, + "step": 23130 + }, + { + "epoch": 3.52935791015625e-05, + "model_forward_time": 0.02512526512145996, + "step": 23130 + }, + { + "epoch": 3.52935791015625e-05, + "step": 23130, + "training_step_time": 0.10891032218933105 + }, + { + "epoch": 3.529510498046875e-05, + "model_forward_time": 0.024851083755493164, + "step": 23131 + }, + { + "epoch": 3.529510498046875e-05, + "step": 23131, + "training_step_time": 0.10560441017150879 + }, + { + "epoch": 3.5296630859375e-05, + "model_forward_time": 0.025130033493041992, + "step": 23132 + }, + { + "epoch": 3.5296630859375e-05, + "step": 23132, + "training_step_time": 0.11008644104003906 + }, + { + "epoch": 3.529815673828125e-05, + "model_forward_time": 0.024986982345581055, + "step": 23133 + }, + { + "epoch": 3.529815673828125e-05, + "step": 23133, + "training_step_time": 0.10787439346313477 + }, + { + "epoch": 3.52996826171875e-05, + "model_forward_time": 0.025223970413208008, + "step": 23134 + }, + { + "epoch": 3.52996826171875e-05, + "step": 23134, + "training_step_time": 0.10712289810180664 + }, + { + "epoch": 3.530120849609375e-05, + "model_forward_time": 0.025408506393432617, + "step": 23135 + }, + { + "epoch": 3.530120849609375e-05, + "step": 23135, + "training_step_time": 0.10667562484741211 + }, + { + "epoch": 3.5302734375e-05, + "model_forward_time": 0.02529120445251465, + "step": 23136 + }, + { + "epoch": 3.5302734375e-05, + "step": 23136, + "training_step_time": 0.10810375213623047 + }, + { + "epoch": 3.530426025390625e-05, + "model_forward_time": 0.025081872940063477, + "step": 23137 + }, + { + "epoch": 3.530426025390625e-05, + "step": 23137, + "training_step_time": 0.1043083667755127 + }, + { + "epoch": 3.53057861328125e-05, + "model_forward_time": 0.02501082420349121, + "step": 23138 + }, + { + "epoch": 3.53057861328125e-05, + "step": 23138, + "training_step_time": 0.1042017936706543 + }, + { + "epoch": 3.530731201171875e-05, + "model_forward_time": 0.02492237091064453, + "step": 23139 + }, + { + "epoch": 3.530731201171875e-05, + "step": 23139, + "training_step_time": 0.10530972480773926 + }, + { + "epoch": 3.5308837890625e-05, + "grad_norm": 0.107993483543396, + "learning_rate": 1.3627115676233832e-05, + "loss": 0.0033, + "step": 23140 + }, + { + "epoch": 3.5308837890625e-05, + "model_forward_time": 0.0258634090423584, + "step": 23140 + }, + { + "epoch": 3.5308837890625e-05, + "step": 23140, + "training_step_time": 0.11224055290222168 + }, + { + "epoch": 3.531036376953125e-05, + "model_forward_time": 0.025231599807739258, + "step": 23141 + }, + { + "epoch": 3.531036376953125e-05, + "step": 23141, + "training_step_time": 0.1389141082763672 + }, + { + "epoch": 3.53118896484375e-05, + "model_forward_time": 0.025280475616455078, + "step": 23142 + }, + { + "epoch": 3.53118896484375e-05, + "step": 23142, + "training_step_time": 0.17558932304382324 + }, + { + "epoch": 3.531341552734375e-05, + "model_forward_time": 0.025073766708374023, + "step": 23143 + }, + { + "epoch": 3.531341552734375e-05, + "step": 23143, + "training_step_time": 0.1547374725341797 + }, + { + "epoch": 3.531494140625e-05, + "model_forward_time": 0.02518177032470703, + "step": 23144 + }, + { + "epoch": 3.531494140625e-05, + "step": 23144, + "training_step_time": 0.16141605377197266 + }, + { + "epoch": 3.531646728515625e-05, + "model_forward_time": 0.02423262596130371, + "step": 23145 + }, + { + "epoch": 3.531646728515625e-05, + "step": 23145, + "training_step_time": 0.17450547218322754 + }, + { + "epoch": 3.53179931640625e-05, + "model_forward_time": 0.024927854537963867, + "step": 23146 + }, + { + "epoch": 3.53179931640625e-05, + "step": 23146, + "training_step_time": 0.2000565528869629 + }, + { + "epoch": 3.531951904296875e-05, + "model_forward_time": 0.02409815788269043, + "step": 23147 + }, + { + "epoch": 3.531951904296875e-05, + "step": 23147, + "training_step_time": 0.14000868797302246 + }, + { + "epoch": 3.5321044921875e-05, + "model_forward_time": 0.02734518051147461, + "step": 23148 + }, + { + "epoch": 3.5321044921875e-05, + "step": 23148, + "training_step_time": 0.20338034629821777 + }, + { + "epoch": 3.532257080078125e-05, + "model_forward_time": 0.02471613883972168, + "step": 23149 + }, + { + "epoch": 3.532257080078125e-05, + "step": 23149, + "training_step_time": 0.11739897727966309 + }, + { + "epoch": 3.53240966796875e-05, + "grad_norm": 0.21589864790439606, + "learning_rate": 1.3589320031376901e-05, + "loss": 0.0193, + "step": 23150 + }, + { + "epoch": 3.53240966796875e-05, + "model_forward_time": 0.023828506469726562, + "step": 23150 + }, + { + "epoch": 3.53240966796875e-05, + "step": 23150, + "training_step_time": 0.11547422409057617 + }, + { + "epoch": 3.532562255859375e-05, + "model_forward_time": 0.024300336837768555, + "step": 23151 + }, + { + "epoch": 3.532562255859375e-05, + "step": 23151, + "training_step_time": 0.1128087043762207 + }, + { + "epoch": 3.53271484375e-05, + "model_forward_time": 0.024165868759155273, + "step": 23152 + }, + { + "epoch": 3.53271484375e-05, + "step": 23152, + "training_step_time": 0.1094212532043457 + }, + { + "epoch": 3.532867431640625e-05, + "model_forward_time": 0.024923324584960938, + "step": 23153 + }, + { + "epoch": 3.532867431640625e-05, + "step": 23153, + "training_step_time": 0.10966134071350098 + }, + { + "epoch": 3.53302001953125e-05, + "model_forward_time": 0.024950504302978516, + "step": 23154 + }, + { + "epoch": 3.53302001953125e-05, + "step": 23154, + "training_step_time": 0.1122894287109375 + }, + { + "epoch": 3.533172607421875e-05, + "model_forward_time": 0.025178194046020508, + "step": 23155 + }, + { + "epoch": 3.533172607421875e-05, + "step": 23155, + "training_step_time": 0.1089625358581543 + }, + { + "epoch": 3.5333251953125e-05, + "model_forward_time": 0.025194406509399414, + "step": 23156 + }, + { + "epoch": 3.5333251953125e-05, + "step": 23156, + "training_step_time": 0.10573196411132812 + }, + { + "epoch": 3.533477783203125e-05, + "model_forward_time": 0.025018930435180664, + "step": 23157 + }, + { + "epoch": 3.533477783203125e-05, + "step": 23157, + "training_step_time": 0.10500121116638184 + }, + { + "epoch": 3.53363037109375e-05, + "model_forward_time": 0.025125503540039062, + "step": 23158 + }, + { + "epoch": 3.53363037109375e-05, + "step": 23158, + "training_step_time": 0.10570025444030762 + }, + { + "epoch": 3.533782958984375e-05, + "model_forward_time": 0.025163888931274414, + "step": 23159 + }, + { + "epoch": 3.533782958984375e-05, + "step": 23159, + "training_step_time": 0.10565829277038574 + }, + { + "epoch": 3.533935546875e-05, + "grad_norm": 0.11769426614046097, + "learning_rate": 1.3551568628929434e-05, + "loss": 0.0042, + "step": 23160 + }, + { + "epoch": 3.533935546875e-05, + "model_forward_time": 0.025023460388183594, + "step": 23160 + }, + { + "epoch": 3.533935546875e-05, + "step": 23160, + "training_step_time": 0.10630416870117188 + }, + { + "epoch": 3.534088134765625e-05, + "model_forward_time": 0.025200843811035156, + "step": 23161 + }, + { + "epoch": 3.534088134765625e-05, + "step": 23161, + "training_step_time": 0.10595250129699707 + }, + { + "epoch": 3.53424072265625e-05, + "model_forward_time": 0.02394247055053711, + "step": 23162 + }, + { + "epoch": 3.53424072265625e-05, + "step": 23162, + "training_step_time": 0.10371804237365723 + }, + { + "epoch": 3.534393310546875e-05, + "model_forward_time": 0.0249176025390625, + "step": 23163 + }, + { + "epoch": 3.534393310546875e-05, + "step": 23163, + "training_step_time": 0.10493588447570801 + }, + { + "epoch": 3.5345458984375e-05, + "model_forward_time": 0.025281190872192383, + "step": 23164 + }, + { + "epoch": 3.5345458984375e-05, + "step": 23164, + "training_step_time": 0.12078738212585449 + }, + { + "epoch": 3.534698486328125e-05, + "model_forward_time": 0.02546215057373047, + "step": 23165 + }, + { + "epoch": 3.534698486328125e-05, + "step": 23165, + "training_step_time": 0.1128091812133789 + }, + { + "epoch": 3.53485107421875e-05, + "model_forward_time": 0.02509331703186035, + "step": 23166 + }, + { + "epoch": 3.53485107421875e-05, + "step": 23166, + "training_step_time": 0.1770646572113037 + }, + { + "epoch": 3.535003662109375e-05, + "model_forward_time": 0.024168014526367188, + "step": 23167 + }, + { + "epoch": 3.535003662109375e-05, + "step": 23167, + "training_step_time": 0.17686176300048828 + }, + { + "epoch": 3.53515625e-05, + "model_forward_time": 0.02414107322692871, + "step": 23168 + }, + { + "epoch": 3.53515625e-05, + "step": 23168, + "training_step_time": 0.19973278045654297 + }, + { + "epoch": 3.535308837890625e-05, + "model_forward_time": 0.024329185485839844, + "step": 23169 + }, + { + "epoch": 3.535308837890625e-05, + "step": 23169, + "training_step_time": 0.13905692100524902 + }, + { + "epoch": 3.53546142578125e-05, + "grad_norm": 0.08424199372529984, + "learning_rate": 1.3513861514762933e-05, + "loss": 0.0063, + "step": 23170 + }, + { + "epoch": 3.53546142578125e-05, + "model_forward_time": 0.024341583251953125, + "step": 23170 + }, + { + "epoch": 3.53546142578125e-05, + "step": 23170, + "training_step_time": 0.10908222198486328 + }, + { + "epoch": 3.535614013671875e-05, + "model_forward_time": 0.02497553825378418, + "step": 23171 + }, + { + "epoch": 3.535614013671875e-05, + "step": 23171, + "training_step_time": 0.1777026653289795 + }, + { + "epoch": 3.5357666015625e-05, + "model_forward_time": 0.02461409568786621, + "step": 23172 + }, + { + "epoch": 3.5357666015625e-05, + "step": 23172, + "training_step_time": 0.12847208976745605 + }, + { + "epoch": 3.535919189453125e-05, + "model_forward_time": 0.02349257469177246, + "step": 23173 + }, + { + "epoch": 3.535919189453125e-05, + "step": 23173, + "training_step_time": 0.12204575538635254 + }, + { + "epoch": 3.53607177734375e-05, + "model_forward_time": 0.02443528175354004, + "step": 23174 + }, + { + "epoch": 3.53607177734375e-05, + "step": 23174, + "training_step_time": 0.10437226295471191 + }, + { + "epoch": 3.536224365234375e-05, + "model_forward_time": 0.02559804916381836, + "step": 23175 + }, + { + "epoch": 3.536224365234375e-05, + "step": 23175, + "training_step_time": 0.10947871208190918 + }, + { + "epoch": 3.536376953125e-05, + "model_forward_time": 0.02488088607788086, + "step": 23176 + }, + { + "epoch": 3.536376953125e-05, + "step": 23176, + "training_step_time": 0.10653328895568848 + }, + { + "epoch": 3.536529541015625e-05, + "model_forward_time": 0.024779558181762695, + "step": 23177 + }, + { + "epoch": 3.536529541015625e-05, + "step": 23177, + "training_step_time": 0.1111290454864502 + }, + { + "epoch": 3.53668212890625e-05, + "model_forward_time": 0.027599573135375977, + "step": 23178 + }, + { + "epoch": 3.53668212890625e-05, + "step": 23178, + "training_step_time": 0.10939764976501465 + }, + { + "epoch": 3.536834716796875e-05, + "model_forward_time": 0.025013208389282227, + "step": 23179 + }, + { + "epoch": 3.536834716796875e-05, + "step": 23179, + "training_step_time": 0.10781335830688477 + }, + { + "epoch": 3.5369873046875e-05, + "grad_norm": 0.08925186842679977, + "learning_rate": 1.3476198734695122e-05, + "loss": 0.0037, + "step": 23180 + }, + { + "epoch": 3.5369873046875e-05, + "model_forward_time": 0.025090456008911133, + "step": 23180 + }, + { + "epoch": 3.5369873046875e-05, + "step": 23180, + "training_step_time": 0.1077268123626709 + }, + { + "epoch": 3.537139892578125e-05, + "model_forward_time": 0.02525925636291504, + "step": 23181 + }, + { + "epoch": 3.537139892578125e-05, + "step": 23181, + "training_step_time": 0.10716080665588379 + }, + { + "epoch": 3.53729248046875e-05, + "model_forward_time": 0.02513718605041504, + "step": 23182 + }, + { + "epoch": 3.53729248046875e-05, + "step": 23182, + "training_step_time": 0.10761070251464844 + }, + { + "epoch": 3.537445068359375e-05, + "model_forward_time": 0.025086402893066406, + "step": 23183 + }, + { + "epoch": 3.537445068359375e-05, + "step": 23183, + "training_step_time": 0.18614983558654785 + }, + { + "epoch": 3.53759765625e-05, + "model_forward_time": 0.024204730987548828, + "step": 23184 + }, + { + "epoch": 3.53759765625e-05, + "step": 23184, + "training_step_time": 0.1532917022705078 + }, + { + "epoch": 3.537750244140625e-05, + "model_forward_time": 0.024193763732910156, + "step": 23185 + }, + { + "epoch": 3.537750244140625e-05, + "step": 23185, + "training_step_time": 0.11033821105957031 + }, + { + "epoch": 3.53790283203125e-05, + "model_forward_time": 0.024779558181762695, + "step": 23186 + }, + { + "epoch": 3.53790283203125e-05, + "step": 23186, + "training_step_time": 0.10337281227111816 + }, + { + "epoch": 3.538055419921875e-05, + "model_forward_time": 0.02505779266357422, + "step": 23187 + }, + { + "epoch": 3.538055419921875e-05, + "step": 23187, + "training_step_time": 0.11513972282409668 + }, + { + "epoch": 3.5382080078125e-05, + "model_forward_time": 0.02463817596435547, + "step": 23188 + }, + { + "epoch": 3.5382080078125e-05, + "step": 23188, + "training_step_time": 0.1819157600402832 + }, + { + "epoch": 3.538360595703125e-05, + "model_forward_time": 0.026914119720458984, + "step": 23189 + }, + { + "epoch": 3.538360595703125e-05, + "step": 23189, + "training_step_time": 0.2105121612548828 + }, + { + "epoch": 3.53851318359375e-05, + "grad_norm": 0.11898551881313324, + "learning_rate": 1.343858033448982e-05, + "loss": 0.0066, + "step": 23190 + }, + { + "epoch": 3.53851318359375e-05, + "model_forward_time": 0.024605274200439453, + "step": 23190 + }, + { + "epoch": 3.53851318359375e-05, + "step": 23190, + "training_step_time": 0.1592860221862793 + }, + { + "epoch": 3.538665771484375e-05, + "model_forward_time": 0.024466991424560547, + "step": 23191 + }, + { + "epoch": 3.538665771484375e-05, + "step": 23191, + "training_step_time": 0.11331367492675781 + }, + { + "epoch": 3.538818359375e-05, + "model_forward_time": 0.02729487419128418, + "step": 23192 + }, + { + "epoch": 3.538818359375e-05, + "step": 23192, + "training_step_time": 0.13415265083312988 + }, + { + "epoch": 3.538970947265625e-05, + "model_forward_time": 0.024981260299682617, + "step": 23193 + }, + { + "epoch": 3.538970947265625e-05, + "step": 23193, + "training_step_time": 0.19992852210998535 + }, + { + "epoch": 3.53912353515625e-05, + "model_forward_time": 0.024353504180908203, + "step": 23194 + }, + { + "epoch": 3.53912353515625e-05, + "step": 23194, + "training_step_time": 0.1254897117614746 + }, + { + "epoch": 3.539276123046875e-05, + "model_forward_time": 0.0244748592376709, + "step": 23195 + }, + { + "epoch": 3.539276123046875e-05, + "step": 23195, + "training_step_time": 0.12822842597961426 + }, + { + "epoch": 3.5394287109375e-05, + "model_forward_time": 0.02441883087158203, + "step": 23196 + }, + { + "epoch": 3.5394287109375e-05, + "step": 23196, + "training_step_time": 0.12202048301696777 + }, + { + "epoch": 3.539581298828125e-05, + "model_forward_time": 0.024977922439575195, + "step": 23197 + }, + { + "epoch": 3.539581298828125e-05, + "step": 23197, + "training_step_time": 0.11932706832885742 + }, + { + "epoch": 3.53973388671875e-05, + "model_forward_time": 0.0253908634185791, + "step": 23198 + }, + { + "epoch": 3.53973388671875e-05, + "step": 23198, + "training_step_time": 0.11548233032226562 + }, + { + "epoch": 3.539886474609375e-05, + "model_forward_time": 0.02502298355102539, + "step": 23199 + }, + { + "epoch": 3.539886474609375e-05, + "step": 23199, + "training_step_time": 0.11390852928161621 + }, + { + "epoch": 3.5400390625e-05, + "grad_norm": 0.1261717975139618, + "learning_rate": 1.3401006359856915e-05, + "loss": 0.0052, + "step": 23200 + }, + { + "epoch": 3.5400390625e-05, + "model_forward_time": 0.02512359619140625, + "step": 23200 + }, + { + "epoch": 3.5400390625e-05, + "step": 23200, + "training_step_time": 0.11222577095031738 + }, + { + "epoch": 3.540191650390625e-05, + "model_forward_time": 0.025458097457885742, + "step": 23201 + }, + { + "epoch": 3.540191650390625e-05, + "step": 23201, + "training_step_time": 0.11441636085510254 + }, + { + "epoch": 3.54034423828125e-05, + "model_forward_time": 0.02531599998474121, + "step": 23202 + }, + { + "epoch": 3.54034423828125e-05, + "step": 23202, + "training_step_time": 0.1075751781463623 + }, + { + "epoch": 3.540496826171875e-05, + "model_forward_time": 0.02594780921936035, + "step": 23203 + }, + { + "epoch": 3.540496826171875e-05, + "step": 23203, + "training_step_time": 0.10672950744628906 + }, + { + "epoch": 3.5406494140625e-05, + "model_forward_time": 0.02500629425048828, + "step": 23204 + }, + { + "epoch": 3.5406494140625e-05, + "step": 23204, + "training_step_time": 0.10536932945251465 + }, + { + "epoch": 3.540802001953125e-05, + "model_forward_time": 0.024811506271362305, + "step": 23205 + }, + { + "epoch": 3.540802001953125e-05, + "step": 23205, + "training_step_time": 0.10711526870727539 + }, + { + "epoch": 3.54095458984375e-05, + "model_forward_time": 0.025145292282104492, + "step": 23206 + }, + { + "epoch": 3.54095458984375e-05, + "step": 23206, + "training_step_time": 0.1084451675415039 + }, + { + "epoch": 3.541107177734375e-05, + "model_forward_time": 0.025076627731323242, + "step": 23207 + }, + { + "epoch": 3.541107177734375e-05, + "step": 23207, + "training_step_time": 0.10977387428283691 + }, + { + "epoch": 3.541259765625e-05, + "model_forward_time": 0.024968862533569336, + "step": 23208 + }, + { + "epoch": 3.541259765625e-05, + "step": 23208, + "training_step_time": 0.16614603996276855 + }, + { + "epoch": 3.541412353515625e-05, + "model_forward_time": 0.024358749389648438, + "step": 23209 + }, + { + "epoch": 3.541412353515625e-05, + "step": 23209, + "training_step_time": 0.10749959945678711 + }, + { + "epoch": 3.54156494140625e-05, + "grad_norm": 0.08713316172361374, + "learning_rate": 1.3363476856452356e-05, + "loss": 0.0046, + "step": 23210 + }, + { + "epoch": 3.54156494140625e-05, + "model_forward_time": 0.025378704071044922, + "step": 23210 + }, + { + "epoch": 3.54156494140625e-05, + "step": 23210, + "training_step_time": 0.10693883895874023 + }, + { + "epoch": 3.541717529296875e-05, + "model_forward_time": 0.025310277938842773, + "step": 23211 + }, + { + "epoch": 3.541717529296875e-05, + "step": 23211, + "training_step_time": 0.1280827522277832 + }, + { + "epoch": 3.5418701171875e-05, + "model_forward_time": 0.025011062622070312, + "step": 23212 + }, + { + "epoch": 3.5418701171875e-05, + "step": 23212, + "training_step_time": 0.11826372146606445 + }, + { + "epoch": 3.542022705078125e-05, + "model_forward_time": 0.02533102035522461, + "step": 23213 + }, + { + "epoch": 3.542022705078125e-05, + "step": 23213, + "training_step_time": 0.21614766120910645 + }, + { + "epoch": 3.54217529296875e-05, + "model_forward_time": 0.02450728416442871, + "step": 23214 + }, + { + "epoch": 3.54217529296875e-05, + "step": 23214, + "training_step_time": 0.11902379989624023 + }, + { + "epoch": 3.542327880859375e-05, + "model_forward_time": 0.02406144142150879, + "step": 23215 + }, + { + "epoch": 3.542327880859375e-05, + "step": 23215, + "training_step_time": 0.12376856803894043 + }, + { + "epoch": 3.54248046875e-05, + "model_forward_time": 0.025962114334106445, + "step": 23216 + }, + { + "epoch": 3.54248046875e-05, + "step": 23216, + "training_step_time": 0.15221571922302246 + }, + { + "epoch": 3.542633056640625e-05, + "model_forward_time": 0.024130582809448242, + "step": 23217 + }, + { + "epoch": 3.542633056640625e-05, + "step": 23217, + "training_step_time": 0.10278511047363281 + }, + { + "epoch": 3.54278564453125e-05, + "model_forward_time": 0.024770021438598633, + "step": 23218 + }, + { + "epoch": 3.54278564453125e-05, + "step": 23218, + "training_step_time": 0.11805987358093262 + }, + { + "epoch": 3.542938232421875e-05, + "model_forward_time": 0.025004863739013672, + "step": 23219 + }, + { + "epoch": 3.542938232421875e-05, + "step": 23219, + "training_step_time": 0.11264300346374512 + }, + { + "epoch": 3.5430908203125e-05, + "grad_norm": 0.07672315090894699, + "learning_rate": 1.3325991869878013e-05, + "loss": 0.0041, + "step": 23220 + }, + { + "epoch": 3.5430908203125e-05, + "model_forward_time": 0.025330543518066406, + "step": 23220 + }, + { + "epoch": 3.5430908203125e-05, + "step": 23220, + "training_step_time": 0.10419535636901855 + }, + { + "epoch": 3.543243408203125e-05, + "model_forward_time": 0.025084495544433594, + "step": 23221 + }, + { + "epoch": 3.543243408203125e-05, + "step": 23221, + "training_step_time": 0.10591363906860352 + }, + { + "epoch": 3.54339599609375e-05, + "model_forward_time": 0.027942895889282227, + "step": 23222 + }, + { + "epoch": 3.54339599609375e-05, + "step": 23222, + "training_step_time": 0.10905265808105469 + }, + { + "epoch": 3.543548583984375e-05, + "model_forward_time": 0.025374412536621094, + "step": 23223 + }, + { + "epoch": 3.543548583984375e-05, + "step": 23223, + "training_step_time": 0.1045374870300293 + }, + { + "epoch": 3.543701171875e-05, + "model_forward_time": 0.0255429744720459, + "step": 23224 + }, + { + "epoch": 3.543701171875e-05, + "step": 23224, + "training_step_time": 0.10607695579528809 + }, + { + "epoch": 3.543853759765625e-05, + "model_forward_time": 0.025141477584838867, + "step": 23225 + }, + { + "epoch": 3.543853759765625e-05, + "step": 23225, + "training_step_time": 0.10401415824890137 + }, + { + "epoch": 3.54400634765625e-05, + "model_forward_time": 0.02557659149169922, + "step": 23226 + }, + { + "epoch": 3.54400634765625e-05, + "step": 23226, + "training_step_time": 0.10540771484375 + }, + { + "epoch": 3.544158935546875e-05, + "model_forward_time": 0.025210142135620117, + "step": 23227 + }, + { + "epoch": 3.544158935546875e-05, + "step": 23227, + "training_step_time": 0.10345458984375 + }, + { + "epoch": 3.5443115234375e-05, + "model_forward_time": 0.026408672332763672, + "step": 23228 + }, + { + "epoch": 3.5443115234375e-05, + "step": 23228, + "training_step_time": 0.1813514232635498 + }, + { + "epoch": 3.544464111328125e-05, + "model_forward_time": 0.024237394332885742, + "step": 23229 + }, + { + "epoch": 3.544464111328125e-05, + "step": 23229, + "training_step_time": 0.14154505729675293 + }, + { + "epoch": 3.54461669921875e-05, + "grad_norm": 0.16616730391979218, + "learning_rate": 1.328855144568168e-05, + "loss": 0.0068, + "step": 23230 + }, + { + "epoch": 3.54461669921875e-05, + "model_forward_time": 0.024788379669189453, + "step": 23230 + }, + { + "epoch": 3.54461669921875e-05, + "step": 23230, + "training_step_time": 0.11187744140625 + }, + { + "epoch": 3.544769287109375e-05, + "model_forward_time": 0.025032758712768555, + "step": 23231 + }, + { + "epoch": 3.544769287109375e-05, + "step": 23231, + "training_step_time": 0.1047823429107666 + }, + { + "epoch": 3.544921875e-05, + "model_forward_time": 0.02505183219909668, + "step": 23232 + }, + { + "epoch": 3.544921875e-05, + "step": 23232, + "training_step_time": 0.10802388191223145 + }, + { + "epoch": 3.545074462890625e-05, + "model_forward_time": 0.024825096130371094, + "step": 23233 + }, + { + "epoch": 3.545074462890625e-05, + "step": 23233, + "training_step_time": 0.10576272010803223 + }, + { + "epoch": 3.54522705078125e-05, + "model_forward_time": 0.025194406509399414, + "step": 23234 + }, + { + "epoch": 3.54522705078125e-05, + "step": 23234, + "training_step_time": 0.12727808952331543 + }, + { + "epoch": 3.545379638671875e-05, + "model_forward_time": 0.02395176887512207, + "step": 23235 + }, + { + "epoch": 3.545379638671875e-05, + "step": 23235, + "training_step_time": 0.12461733818054199 + }, + { + "epoch": 3.5455322265625e-05, + "model_forward_time": 0.0250699520111084, + "step": 23236 + }, + { + "epoch": 3.5455322265625e-05, + "step": 23236, + "training_step_time": 0.16695499420166016 + }, + { + "epoch": 3.545684814453125e-05, + "model_forward_time": 0.025638341903686523, + "step": 23237 + }, + { + "epoch": 3.545684814453125e-05, + "step": 23237, + "training_step_time": 0.11171555519104004 + }, + { + "epoch": 3.54583740234375e-05, + "model_forward_time": 0.024732112884521484, + "step": 23238 + }, + { + "epoch": 3.54583740234375e-05, + "step": 23238, + "training_step_time": 0.14163899421691895 + }, + { + "epoch": 3.545989990234375e-05, + "model_forward_time": 0.025101900100708008, + "step": 23239 + }, + { + "epoch": 3.545989990234375e-05, + "step": 23239, + "training_step_time": 0.1862194538116455 + }, + { + "epoch": 3.546142578125e-05, + "grad_norm": 0.2733677923679352, + "learning_rate": 1.325115562935701e-05, + "loss": 0.0065, + "step": 23240 + }, + { + "epoch": 3.546142578125e-05, + "model_forward_time": 0.02449774742126465, + "step": 23240 + }, + { + "epoch": 3.546142578125e-05, + "step": 23240, + "training_step_time": 0.12102222442626953 + }, + { + "epoch": 3.546295166015625e-05, + "model_forward_time": 0.0246884822845459, + "step": 23241 + }, + { + "epoch": 3.546295166015625e-05, + "step": 23241, + "training_step_time": 0.11891484260559082 + }, + { + "epoch": 3.54644775390625e-05, + "model_forward_time": 0.02537250518798828, + "step": 23242 + }, + { + "epoch": 3.54644775390625e-05, + "step": 23242, + "training_step_time": 0.11764168739318848 + }, + { + "epoch": 3.546600341796875e-05, + "model_forward_time": 0.02504110336303711, + "step": 23243 + }, + { + "epoch": 3.546600341796875e-05, + "step": 23243, + "training_step_time": 0.11519527435302734 + }, + { + "epoch": 3.5467529296875e-05, + "model_forward_time": 0.02525162696838379, + "step": 23244 + }, + { + "epoch": 3.5467529296875e-05, + "step": 23244, + "training_step_time": 0.10953164100646973 + }, + { + "epoch": 3.546905517578125e-05, + "model_forward_time": 0.024918079376220703, + "step": 23245 + }, + { + "epoch": 3.546905517578125e-05, + "step": 23245, + "training_step_time": 0.11133337020874023 + }, + { + "epoch": 3.54705810546875e-05, + "model_forward_time": 0.02353501319885254, + "step": 23246 + }, + { + "epoch": 3.54705810546875e-05, + "step": 23246, + "training_step_time": 0.10850024223327637 + }, + { + "epoch": 3.547210693359375e-05, + "model_forward_time": 0.024735212326049805, + "step": 23247 + }, + { + "epoch": 3.547210693359375e-05, + "step": 23247, + "training_step_time": 0.10585331916809082 + }, + { + "epoch": 3.54736328125e-05, + "model_forward_time": 0.024870872497558594, + "step": 23248 + }, + { + "epoch": 3.54736328125e-05, + "step": 23248, + "training_step_time": 0.10627007484436035 + }, + { + "epoch": 3.547515869140625e-05, + "model_forward_time": 0.02475428581237793, + "step": 23249 + }, + { + "epoch": 3.547515869140625e-05, + "step": 23249, + "training_step_time": 0.10610795021057129 + }, + { + "epoch": 3.54766845703125e-05, + "grad_norm": 0.10498591512441635, + "learning_rate": 1.3213804466343421e-05, + "loss": 0.0076, + "step": 23250 + }, + { + "epoch": 3.54766845703125e-05, + "model_forward_time": 0.02497243881225586, + "step": 23250 + }, + { + "epoch": 3.54766845703125e-05, + "step": 23250, + "training_step_time": 0.10488319396972656 + }, + { + "epoch": 3.547821044921875e-05, + "model_forward_time": 0.02505207061767578, + "step": 23251 + }, + { + "epoch": 3.547821044921875e-05, + "step": 23251, + "training_step_time": 0.10893988609313965 + }, + { + "epoch": 3.5479736328125e-05, + "model_forward_time": 0.02505326271057129, + "step": 23252 + }, + { + "epoch": 3.5479736328125e-05, + "step": 23252, + "training_step_time": 0.10735774040222168 + }, + { + "epoch": 3.548126220703125e-05, + "model_forward_time": 0.025427579879760742, + "step": 23253 + }, + { + "epoch": 3.548126220703125e-05, + "step": 23253, + "training_step_time": 0.10477447509765625 + }, + { + "epoch": 3.54827880859375e-05, + "model_forward_time": 0.02494049072265625, + "step": 23254 + }, + { + "epoch": 3.54827880859375e-05, + "step": 23254, + "training_step_time": 0.1077413558959961 + }, + { + "epoch": 3.548431396484375e-05, + "model_forward_time": 0.02415156364440918, + "step": 23255 + }, + { + "epoch": 3.548431396484375e-05, + "step": 23255, + "training_step_time": 0.13237833976745605 + }, + { + "epoch": 3.548583984375e-05, + "model_forward_time": 0.025124788284301758, + "step": 23256 + }, + { + "epoch": 3.548583984375e-05, + "step": 23256, + "training_step_time": 0.11238884925842285 + }, + { + "epoch": 3.548736572265625e-05, + "model_forward_time": 0.02548837661743164, + "step": 23257 + }, + { + "epoch": 3.548736572265625e-05, + "step": 23257, + "training_step_time": 0.11195874214172363 + }, + { + "epoch": 3.54888916015625e-05, + "model_forward_time": 0.02487468719482422, + "step": 23258 + }, + { + "epoch": 3.54888916015625e-05, + "step": 23258, + "training_step_time": 0.1182868480682373 + }, + { + "epoch": 3.549041748046875e-05, + "model_forward_time": 0.024868488311767578, + "step": 23259 + }, + { + "epoch": 3.549041748046875e-05, + "step": 23259, + "training_step_time": 0.12801098823547363 + }, + { + "epoch": 3.5491943359375e-05, + "grad_norm": 0.09662266075611115, + "learning_rate": 1.3176498002026128e-05, + "loss": 0.0043, + "step": 23260 + }, + { + "epoch": 3.5491943359375e-05, + "model_forward_time": 0.024862051010131836, + "step": 23260 + }, + { + "epoch": 3.5491943359375e-05, + "step": 23260, + "training_step_time": 0.15819597244262695 + }, + { + "epoch": 3.549346923828125e-05, + "model_forward_time": 0.023737668991088867, + "step": 23261 + }, + { + "epoch": 3.549346923828125e-05, + "step": 23261, + "training_step_time": 0.16686058044433594 + }, + { + "epoch": 3.54949951171875e-05, + "model_forward_time": 0.024262666702270508, + "step": 23262 + }, + { + "epoch": 3.54949951171875e-05, + "step": 23262, + "training_step_time": 0.16208553314208984 + }, + { + "epoch": 3.549652099609375e-05, + "model_forward_time": 0.02391648292541504, + "step": 23263 + }, + { + "epoch": 3.549652099609375e-05, + "step": 23263, + "training_step_time": 0.16832447052001953 + }, + { + "epoch": 3.5498046875e-05, + "model_forward_time": 0.024211406707763672, + "step": 23264 + }, + { + "epoch": 3.5498046875e-05, + "step": 23264, + "training_step_time": 0.12025809288024902 + }, + { + "epoch": 3.549957275390625e-05, + "model_forward_time": 0.024289846420288086, + "step": 23265 + }, + { + "epoch": 3.549957275390625e-05, + "step": 23265, + "training_step_time": 0.11322546005249023 + }, + { + "epoch": 3.55010986328125e-05, + "model_forward_time": 0.025094032287597656, + "step": 23266 + }, + { + "epoch": 3.55010986328125e-05, + "step": 23266, + "training_step_time": 0.10638666152954102 + }, + { + "epoch": 3.550262451171875e-05, + "model_forward_time": 0.02642512321472168, + "step": 23267 + }, + { + "epoch": 3.550262451171875e-05, + "step": 23267, + "training_step_time": 0.10757994651794434 + }, + { + "epoch": 3.5504150390625e-05, + "model_forward_time": 0.025058984756469727, + "step": 23268 + }, + { + "epoch": 3.5504150390625e-05, + "step": 23268, + "training_step_time": 0.10490179061889648 + }, + { + "epoch": 3.550567626953125e-05, + "model_forward_time": 0.02548503875732422, + "step": 23269 + }, + { + "epoch": 3.550567626953125e-05, + "step": 23269, + "training_step_time": 0.15086650848388672 + }, + { + "epoch": 3.55072021484375e-05, + "grad_norm": 0.48639655113220215, + "learning_rate": 1.3139236281736e-05, + "loss": 0.0063, + "step": 23270 + }, + { + "epoch": 3.55072021484375e-05, + "model_forward_time": 0.024800777435302734, + "step": 23270 + }, + { + "epoch": 3.55072021484375e-05, + "step": 23270, + "training_step_time": 0.15541934967041016 + }, + { + "epoch": 3.550872802734375e-05, + "model_forward_time": 0.02434992790222168, + "step": 23271 + }, + { + "epoch": 3.550872802734375e-05, + "step": 23271, + "training_step_time": 0.1647641658782959 + }, + { + "epoch": 3.551025390625e-05, + "model_forward_time": 0.02456951141357422, + "step": 23272 + }, + { + "epoch": 3.551025390625e-05, + "step": 23272, + "training_step_time": 0.1574866771697998 + }, + { + "epoch": 3.551177978515625e-05, + "model_forward_time": 0.024190425872802734, + "step": 23273 + }, + { + "epoch": 3.551177978515625e-05, + "step": 23273, + "training_step_time": 0.1403357982635498 + }, + { + "epoch": 3.55133056640625e-05, + "model_forward_time": 0.02552008628845215, + "step": 23274 + }, + { + "epoch": 3.55133056640625e-05, + "step": 23274, + "training_step_time": 0.1301894187927246 + }, + { + "epoch": 3.551483154296875e-05, + "model_forward_time": 0.02405261993408203, + "step": 23275 + }, + { + "epoch": 3.551483154296875e-05, + "step": 23275, + "training_step_time": 0.18494534492492676 + }, + { + "epoch": 3.5516357421875e-05, + "model_forward_time": 0.024471044540405273, + "step": 23276 + }, + { + "epoch": 3.5516357421875e-05, + "step": 23276, + "training_step_time": 0.1264054775238037 + }, + { + "epoch": 3.551788330078125e-05, + "model_forward_time": 0.024277687072753906, + "step": 23277 + }, + { + "epoch": 3.551788330078125e-05, + "step": 23277, + "training_step_time": 0.18108487129211426 + }, + { + "epoch": 3.55194091796875e-05, + "model_forward_time": 0.024292707443237305, + "step": 23278 + }, + { + "epoch": 3.55194091796875e-05, + "step": 23278, + "training_step_time": 0.11571288108825684 + }, + { + "epoch": 3.552093505859375e-05, + "model_forward_time": 0.024270057678222656, + "step": 23279 + }, + { + "epoch": 3.552093505859375e-05, + "step": 23279, + "training_step_time": 0.12001895904541016 + }, + { + "epoch": 3.55224609375e-05, + "grad_norm": 0.11145589500665665, + "learning_rate": 1.3102019350749528e-05, + "loss": 0.0067, + "step": 23280 + }, + { + "epoch": 3.55224609375e-05, + "model_forward_time": 0.025935649871826172, + "step": 23280 + }, + { + "epoch": 3.55224609375e-05, + "step": 23280, + "training_step_time": 0.16225361824035645 + }, + { + "epoch": 3.552398681640625e-05, + "model_forward_time": 0.02530980110168457, + "step": 23281 + }, + { + "epoch": 3.552398681640625e-05, + "step": 23281, + "training_step_time": 0.10560727119445801 + }, + { + "epoch": 3.55255126953125e-05, + "model_forward_time": 0.024799585342407227, + "step": 23282 + }, + { + "epoch": 3.55255126953125e-05, + "step": 23282, + "training_step_time": 0.13144898414611816 + }, + { + "epoch": 3.552703857421875e-05, + "model_forward_time": 0.02505803108215332, + "step": 23283 + }, + { + "epoch": 3.552703857421875e-05, + "step": 23283, + "training_step_time": 0.1971437931060791 + }, + { + "epoch": 3.5528564453125e-05, + "model_forward_time": 0.024053573608398438, + "step": 23284 + }, + { + "epoch": 3.5528564453125e-05, + "step": 23284, + "training_step_time": 0.10184669494628906 + }, + { + "epoch": 3.553009033203125e-05, + "model_forward_time": 0.02431941032409668, + "step": 23285 + }, + { + "epoch": 3.553009033203125e-05, + "step": 23285, + "training_step_time": 0.10358595848083496 + }, + { + "epoch": 3.55316162109375e-05, + "model_forward_time": 0.024988651275634766, + "step": 23286 + }, + { + "epoch": 3.55316162109375e-05, + "step": 23286, + "training_step_time": 0.1100015640258789 + }, + { + "epoch": 3.553314208984375e-05, + "model_forward_time": 0.024855375289916992, + "step": 23287 + }, + { + "epoch": 3.553314208984375e-05, + "step": 23287, + "training_step_time": 0.1062617301940918 + }, + { + "epoch": 3.553466796875e-05, + "model_forward_time": 0.0253298282623291, + "step": 23288 + }, + { + "epoch": 3.553466796875e-05, + "step": 23288, + "training_step_time": 0.10749983787536621 + }, + { + "epoch": 3.553619384765625e-05, + "model_forward_time": 0.024973392486572266, + "step": 23289 + }, + { + "epoch": 3.553619384765625e-05, + "step": 23289, + "training_step_time": 0.10478854179382324 + }, + { + "epoch": 3.55377197265625e-05, + "grad_norm": 0.11471951007843018, + "learning_rate": 1.3064847254288797e-05, + "loss": 0.0151, + "step": 23290 + }, + { + "epoch": 3.55377197265625e-05, + "model_forward_time": 0.025330543518066406, + "step": 23290 + }, + { + "epoch": 3.55377197265625e-05, + "step": 23290, + "training_step_time": 0.10653042793273926 + }, + { + "epoch": 3.553924560546875e-05, + "model_forward_time": 0.024834871292114258, + "step": 23291 + }, + { + "epoch": 3.553924560546875e-05, + "step": 23291, + "training_step_time": 0.10373878479003906 + }, + { + "epoch": 3.5540771484375e-05, + "model_forward_time": 0.024792194366455078, + "step": 23292 + }, + { + "epoch": 3.5540771484375e-05, + "step": 23292, + "training_step_time": 0.10434103012084961 + }, + { + "epoch": 3.554229736328125e-05, + "model_forward_time": 0.025188207626342773, + "step": 23293 + }, + { + "epoch": 3.554229736328125e-05, + "step": 23293, + "training_step_time": 0.10489225387573242 + }, + { + "epoch": 3.55438232421875e-05, + "model_forward_time": 0.025982141494750977, + "step": 23294 + }, + { + "epoch": 3.55438232421875e-05, + "step": 23294, + "training_step_time": 0.10580873489379883 + }, + { + "epoch": 3.554534912109375e-05, + "model_forward_time": 0.024731874465942383, + "step": 23295 + }, + { + "epoch": 3.554534912109375e-05, + "step": 23295, + "training_step_time": 0.10597562789916992 + }, + { + "epoch": 3.5546875e-05, + "model_forward_time": 0.025089502334594727, + "step": 23296 + }, + { + "epoch": 3.5546875e-05, + "step": 23296, + "training_step_time": 0.10997128486633301 + }, + { + "epoch": 3.554840087890625e-05, + "model_forward_time": 0.025561094284057617, + "step": 23297 + }, + { + "epoch": 3.554840087890625e-05, + "step": 23297, + "training_step_time": 0.10680890083312988 + }, + { + "epoch": 3.55499267578125e-05, + "model_forward_time": 0.02472829818725586, + "step": 23298 + }, + { + "epoch": 3.55499267578125e-05, + "step": 23298, + "training_step_time": 0.11026453971862793 + }, + { + "epoch": 3.555145263671875e-05, + "model_forward_time": 0.025469541549682617, + "step": 23299 + }, + { + "epoch": 3.555145263671875e-05, + "step": 23299, + "training_step_time": 0.10811519622802734 + }, + { + "epoch": 3.5552978515625e-05, + "grad_norm": 0.13361810147762299, + "learning_rate": 1.3027720037521397e-05, + "loss": 0.0067, + "step": 23300 + }, + { + "epoch": 3.5552978515625e-05, + "model_forward_time": 0.024899005889892578, + "step": 23300 + }, + { + "epoch": 3.5552978515625e-05, + "step": 23300, + "training_step_time": 0.1316995620727539 + }, + { + "epoch": 3.555450439453125e-05, + "model_forward_time": 0.02493143081665039, + "step": 23301 + }, + { + "epoch": 3.555450439453125e-05, + "step": 23301, + "training_step_time": 0.10849690437316895 + }, + { + "epoch": 3.55560302734375e-05, + "model_forward_time": 0.025310993194580078, + "step": 23302 + }, + { + "epoch": 3.55560302734375e-05, + "step": 23302, + "training_step_time": 0.1089019775390625 + }, + { + "epoch": 3.555755615234375e-05, + "model_forward_time": 0.024958372116088867, + "step": 23303 + }, + { + "epoch": 3.555755615234375e-05, + "step": 23303, + "training_step_time": 0.13319849967956543 + }, + { + "epoch": 3.555908203125e-05, + "model_forward_time": 0.025043249130249023, + "step": 23304 + }, + { + "epoch": 3.555908203125e-05, + "step": 23304, + "training_step_time": 0.12258553504943848 + }, + { + "epoch": 3.556060791015625e-05, + "model_forward_time": 0.025115251541137695, + "step": 23305 + }, + { + "epoch": 3.556060791015625e-05, + "step": 23305, + "training_step_time": 0.110107421875 + }, + { + "epoch": 3.55621337890625e-05, + "model_forward_time": 0.025054454803466797, + "step": 23306 + }, + { + "epoch": 3.55621337890625e-05, + "step": 23306, + "training_step_time": 0.14678144454956055 + }, + { + "epoch": 3.556365966796875e-05, + "model_forward_time": 0.024631500244140625, + "step": 23307 + }, + { + "epoch": 3.556365966796875e-05, + "step": 23307, + "training_step_time": 0.11731100082397461 + }, + { + "epoch": 3.5565185546875e-05, + "model_forward_time": 0.024802684783935547, + "step": 23308 + }, + { + "epoch": 3.5565185546875e-05, + "step": 23308, + "training_step_time": 0.11899328231811523 + }, + { + "epoch": 3.556671142578125e-05, + "model_forward_time": 0.025536775588989258, + "step": 23309 + }, + { + "epoch": 3.556671142578125e-05, + "step": 23309, + "training_step_time": 0.13795137405395508 + }, + { + "epoch": 3.55682373046875e-05, + "grad_norm": 0.29144373536109924, + "learning_rate": 1.299063774556042e-05, + "loss": 0.0124, + "step": 23310 + }, + { + "epoch": 3.55682373046875e-05, + "model_forward_time": 0.02469491958618164, + "step": 23310 + }, + { + "epoch": 3.55682373046875e-05, + "step": 23310, + "training_step_time": 0.11194825172424316 + }, + { + "epoch": 3.556976318359375e-05, + "model_forward_time": 0.02476334571838379, + "step": 23311 + }, + { + "epoch": 3.556976318359375e-05, + "step": 23311, + "training_step_time": 0.13114643096923828 + }, + { + "epoch": 3.55712890625e-05, + "model_forward_time": 0.025343656539916992, + "step": 23312 + }, + { + "epoch": 3.55712890625e-05, + "step": 23312, + "training_step_time": 0.10868430137634277 + }, + { + "epoch": 3.557281494140625e-05, + "model_forward_time": 0.025021076202392578, + "step": 23313 + }, + { + "epoch": 3.557281494140625e-05, + "step": 23313, + "training_step_time": 0.10639834403991699 + }, + { + "epoch": 3.55743408203125e-05, + "model_forward_time": 0.025058507919311523, + "step": 23314 + }, + { + "epoch": 3.55743408203125e-05, + "step": 23314, + "training_step_time": 0.10808539390563965 + }, + { + "epoch": 3.557586669921875e-05, + "model_forward_time": 0.02484416961669922, + "step": 23315 + }, + { + "epoch": 3.557586669921875e-05, + "step": 23315, + "training_step_time": 0.10540509223937988 + }, + { + "epoch": 3.5577392578125e-05, + "model_forward_time": 0.025367021560668945, + "step": 23316 + }, + { + "epoch": 3.5577392578125e-05, + "step": 23316, + "training_step_time": 0.10761475563049316 + }, + { + "epoch": 3.557891845703125e-05, + "model_forward_time": 0.024781465530395508, + "step": 23317 + }, + { + "epoch": 3.557891845703125e-05, + "step": 23317, + "training_step_time": 0.1067814826965332 + }, + { + "epoch": 3.55804443359375e-05, + "model_forward_time": 0.025071144104003906, + "step": 23318 + }, + { + "epoch": 3.55804443359375e-05, + "step": 23318, + "training_step_time": 0.10760903358459473 + }, + { + "epoch": 3.558197021484375e-05, + "model_forward_time": 0.02533745765686035, + "step": 23319 + }, + { + "epoch": 3.558197021484375e-05, + "step": 23319, + "training_step_time": 0.11087560653686523 + }, + { + "epoch": 3.558349609375e-05, + "grad_norm": 0.4952482581138611, + "learning_rate": 1.2953600423464324e-05, + "loss": 0.0069, + "step": 23320 + }, + { + "epoch": 3.558349609375e-05, + "model_forward_time": 0.024710416793823242, + "step": 23320 + }, + { + "epoch": 3.558349609375e-05, + "step": 23320, + "training_step_time": 0.1100778579711914 + }, + { + "epoch": 3.558502197265625e-05, + "model_forward_time": 0.025284767150878906, + "step": 23321 + }, + { + "epoch": 3.558502197265625e-05, + "step": 23321, + "training_step_time": 0.12181758880615234 + }, + { + "epoch": 3.55865478515625e-05, + "model_forward_time": 0.02543044090270996, + "step": 23322 + }, + { + "epoch": 3.55865478515625e-05, + "step": 23322, + "training_step_time": 0.10865664482116699 + }, + { + "epoch": 3.558807373046875e-05, + "model_forward_time": 0.025342702865600586, + "step": 23323 + }, + { + "epoch": 3.558807373046875e-05, + "step": 23323, + "training_step_time": 0.1110837459564209 + }, + { + "epoch": 3.5589599609375e-05, + "model_forward_time": 0.0251007080078125, + "step": 23324 + }, + { + "epoch": 3.5589599609375e-05, + "step": 23324, + "training_step_time": 0.10879254341125488 + }, + { + "epoch": 3.559112548828125e-05, + "model_forward_time": 0.025250673294067383, + "step": 23325 + }, + { + "epoch": 3.559112548828125e-05, + "step": 23325, + "training_step_time": 0.10831046104431152 + }, + { + "epoch": 3.55926513671875e-05, + "model_forward_time": 0.02517390251159668, + "step": 23326 + }, + { + "epoch": 3.55926513671875e-05, + "step": 23326, + "training_step_time": 0.1046907901763916 + }, + { + "epoch": 3.559417724609375e-05, + "model_forward_time": 0.024714946746826172, + "step": 23327 + }, + { + "epoch": 3.559417724609375e-05, + "step": 23327, + "training_step_time": 0.11985516548156738 + }, + { + "epoch": 3.5595703125e-05, + "model_forward_time": 0.025017261505126953, + "step": 23328 + }, + { + "epoch": 3.5595703125e-05, + "step": 23328, + "training_step_time": 0.12053775787353516 + }, + { + "epoch": 3.559722900390625e-05, + "model_forward_time": 0.024998188018798828, + "step": 23329 + }, + { + "epoch": 3.559722900390625e-05, + "step": 23329, + "training_step_time": 0.19411802291870117 + }, + { + "epoch": 3.55987548828125e-05, + "grad_norm": 0.1660095751285553, + "learning_rate": 1.2916608116236977e-05, + "loss": 0.0069, + "step": 23330 + }, + { + "epoch": 3.55987548828125e-05, + "model_forward_time": 0.024116992950439453, + "step": 23330 + }, + { + "epoch": 3.55987548828125e-05, + "step": 23330, + "training_step_time": 0.1246027946472168 + }, + { + "epoch": 3.560028076171875e-05, + "model_forward_time": 0.023980140686035156, + "step": 23331 + }, + { + "epoch": 3.560028076171875e-05, + "step": 23331, + "training_step_time": 0.16336941719055176 + }, + { + "epoch": 3.5601806640625e-05, + "model_forward_time": 0.024234533309936523, + "step": 23332 + }, + { + "epoch": 3.5601806640625e-05, + "step": 23332, + "training_step_time": 0.1069941520690918 + }, + { + "epoch": 3.560333251953125e-05, + "model_forward_time": 0.024553298950195312, + "step": 23333 + }, + { + "epoch": 3.560333251953125e-05, + "step": 23333, + "training_step_time": 0.10144782066345215 + }, + { + "epoch": 3.56048583984375e-05, + "model_forward_time": 0.0250399112701416, + "step": 23334 + }, + { + "epoch": 3.56048583984375e-05, + "step": 23334, + "training_step_time": 0.10926222801208496 + }, + { + "epoch": 3.560638427734375e-05, + "model_forward_time": 0.027924537658691406, + "step": 23335 + }, + { + "epoch": 3.560638427734375e-05, + "step": 23335, + "training_step_time": 0.10864901542663574 + }, + { + "epoch": 3.560791015625e-05, + "model_forward_time": 0.02485489845275879, + "step": 23336 + }, + { + "epoch": 3.560791015625e-05, + "step": 23336, + "training_step_time": 0.10827326774597168 + }, + { + "epoch": 3.560943603515625e-05, + "model_forward_time": 0.024815797805786133, + "step": 23337 + }, + { + "epoch": 3.560943603515625e-05, + "step": 23337, + "training_step_time": 0.10715842247009277 + }, + { + "epoch": 3.56109619140625e-05, + "model_forward_time": 0.025029420852661133, + "step": 23338 + }, + { + "epoch": 3.56109619140625e-05, + "step": 23338, + "training_step_time": 0.11116528511047363 + }, + { + "epoch": 3.561248779296875e-05, + "model_forward_time": 0.025376319885253906, + "step": 23339 + }, + { + "epoch": 3.561248779296875e-05, + "step": 23339, + "training_step_time": 0.11059141159057617 + }, + { + "epoch": 3.5614013671875e-05, + "grad_norm": 0.310923308134079, + "learning_rate": 1.2879660868827508e-05, + "loss": 0.0084, + "step": 23340 + }, + { + "epoch": 3.5614013671875e-05, + "model_forward_time": 0.024525880813598633, + "step": 23340 + }, + { + "epoch": 3.5614013671875e-05, + "step": 23340, + "training_step_time": 0.19060635566711426 + }, + { + "epoch": 3.561553955078125e-05, + "model_forward_time": 0.022893428802490234, + "step": 23341 + }, + { + "epoch": 3.561553955078125e-05, + "step": 23341, + "training_step_time": 0.2080857753753662 + }, + { + "epoch": 3.56170654296875e-05, + "model_forward_time": 0.024225473403930664, + "step": 23342 + }, + { + "epoch": 3.56170654296875e-05, + "step": 23342, + "training_step_time": 0.20780491828918457 + }, + { + "epoch": 3.561859130859375e-05, + "model_forward_time": 0.022484779357910156, + "step": 23343 + }, + { + "epoch": 3.561859130859375e-05, + "step": 23343, + "training_step_time": 0.19821953773498535 + }, + { + "epoch": 3.56201171875e-05, + "model_forward_time": 0.023019790649414062, + "step": 23344 + }, + { + "epoch": 3.56201171875e-05, + "step": 23344, + "training_step_time": 0.1931760311126709 + }, + { + "epoch": 3.562164306640625e-05, + "model_forward_time": 0.02282571792602539, + "step": 23345 + }, + { + "epoch": 3.562164306640625e-05, + "step": 23345, + "training_step_time": 0.13660502433776855 + }, + { + "epoch": 3.56231689453125e-05, + "model_forward_time": 0.02583003044128418, + "step": 23346 + }, + { + "epoch": 3.56231689453125e-05, + "step": 23346, + "training_step_time": 0.22557473182678223 + }, + { + "epoch": 3.562469482421875e-05, + "model_forward_time": 0.02505970001220703, + "step": 23347 + }, + { + "epoch": 3.562469482421875e-05, + "step": 23347, + "training_step_time": 0.11062383651733398 + }, + { + "epoch": 3.5626220703125e-05, + "model_forward_time": 0.025407075881958008, + "step": 23348 + }, + { + "epoch": 3.5626220703125e-05, + "step": 23348, + "training_step_time": 0.12543535232543945 + }, + { + "epoch": 3.562774658203125e-05, + "model_forward_time": 0.025110721588134766, + "step": 23349 + }, + { + "epoch": 3.562774658203125e-05, + "step": 23349, + "training_step_time": 0.18897771835327148 + }, + { + "epoch": 3.56292724609375e-05, + "grad_norm": 0.15721100568771362, + "learning_rate": 1.2842758726130283e-05, + "loss": 0.008, + "step": 23350 + }, + { + "epoch": 3.56292724609375e-05, + "model_forward_time": 0.024692773818969727, + "step": 23350 + }, + { + "epoch": 3.56292724609375e-05, + "step": 23350, + "training_step_time": 0.18013334274291992 + }, + { + "epoch": 3.563079833984375e-05, + "model_forward_time": 0.02423381805419922, + "step": 23351 + }, + { + "epoch": 3.563079833984375e-05, + "step": 23351, + "training_step_time": 0.15563654899597168 + }, + { + "epoch": 3.563232421875e-05, + "model_forward_time": 0.024013757705688477, + "step": 23352 + }, + { + "epoch": 3.563232421875e-05, + "step": 23352, + "training_step_time": 0.16464519500732422 + }, + { + "epoch": 3.563385009765625e-05, + "model_forward_time": 0.026987075805664062, + "step": 23353 + }, + { + "epoch": 3.563385009765625e-05, + "step": 23353, + "training_step_time": 0.16717791557312012 + }, + { + "epoch": 3.56353759765625e-05, + "model_forward_time": 0.023528575897216797, + "step": 23354 + }, + { + "epoch": 3.56353759765625e-05, + "step": 23354, + "training_step_time": 0.17630600929260254 + }, + { + "epoch": 3.563690185546875e-05, + "model_forward_time": 0.02416706085205078, + "step": 23355 + }, + { + "epoch": 3.563690185546875e-05, + "step": 23355, + "training_step_time": 0.17811322212219238 + }, + { + "epoch": 3.5638427734375e-05, + "model_forward_time": 0.024021625518798828, + "step": 23356 + }, + { + "epoch": 3.5638427734375e-05, + "step": 23356, + "training_step_time": 0.16107773780822754 + }, + { + "epoch": 3.563995361328125e-05, + "model_forward_time": 0.023891210556030273, + "step": 23357 + }, + { + "epoch": 3.563995361328125e-05, + "step": 23357, + "training_step_time": 0.14253520965576172 + }, + { + "epoch": 3.56414794921875e-05, + "model_forward_time": 0.024144411087036133, + "step": 23358 + }, + { + "epoch": 3.56414794921875e-05, + "step": 23358, + "training_step_time": 0.13903450965881348 + }, + { + "epoch": 3.564300537109375e-05, + "model_forward_time": 0.027928829193115234, + "step": 23359 + }, + { + "epoch": 3.564300537109375e-05, + "step": 23359, + "training_step_time": 0.16277098655700684 + }, + { + "epoch": 3.564453125e-05, + "grad_norm": 0.18141719698905945, + "learning_rate": 1.2805901732984921e-05, + "loss": 0.0063, + "step": 23360 + }, + { + "epoch": 3.564453125e-05, + "model_forward_time": 0.024163246154785156, + "step": 23360 + }, + { + "epoch": 3.564453125e-05, + "step": 23360, + "training_step_time": 0.1347823143005371 + }, + { + "epoch": 3.564605712890625e-05, + "model_forward_time": 0.02443385124206543, + "step": 23361 + }, + { + "epoch": 3.564605712890625e-05, + "step": 23361, + "training_step_time": 0.10343670845031738 + }, + { + "epoch": 3.56475830078125e-05, + "model_forward_time": 0.025066614151000977, + "step": 23362 + }, + { + "epoch": 3.56475830078125e-05, + "step": 23362, + "training_step_time": 0.10456037521362305 + }, + { + "epoch": 3.564910888671875e-05, + "model_forward_time": 0.02501678466796875, + "step": 23363 + }, + { + "epoch": 3.564910888671875e-05, + "step": 23363, + "training_step_time": 0.11089372634887695 + }, + { + "epoch": 3.5650634765625e-05, + "model_forward_time": 0.02549910545349121, + "step": 23364 + }, + { + "epoch": 3.5650634765625e-05, + "step": 23364, + "training_step_time": 0.10844969749450684 + }, + { + "epoch": 3.565216064453125e-05, + "model_forward_time": 0.025300264358520508, + "step": 23365 + }, + { + "epoch": 3.565216064453125e-05, + "step": 23365, + "training_step_time": 0.10320210456848145 + }, + { + "epoch": 3.56536865234375e-05, + "model_forward_time": 0.024327754974365234, + "step": 23366 + }, + { + "epoch": 3.56536865234375e-05, + "step": 23366, + "training_step_time": 0.14797687530517578 + }, + { + "epoch": 3.565521240234375e-05, + "model_forward_time": 0.024629831314086914, + "step": 23367 + }, + { + "epoch": 3.565521240234375e-05, + "step": 23367, + "training_step_time": 0.10716962814331055 + }, + { + "epoch": 3.565673828125e-05, + "model_forward_time": 0.025292634963989258, + "step": 23368 + }, + { + "epoch": 3.565673828125e-05, + "step": 23368, + "training_step_time": 0.18473291397094727 + }, + { + "epoch": 3.565826416015625e-05, + "model_forward_time": 0.024432897567749023, + "step": 23369 + }, + { + "epoch": 3.565826416015625e-05, + "step": 23369, + "training_step_time": 0.1386425495147705 + }, + { + "epoch": 3.56597900390625e-05, + "grad_norm": 0.11859441548585892, + "learning_rate": 1.2769089934176126e-05, + "loss": 0.0093, + "step": 23370 + }, + { + "epoch": 3.56597900390625e-05, + "model_forward_time": 0.02429485321044922, + "step": 23370 + }, + { + "epoch": 3.56597900390625e-05, + "step": 23370, + "training_step_time": 0.19167351722717285 + }, + { + "epoch": 3.566131591796875e-05, + "model_forward_time": 0.024566173553466797, + "step": 23371 + }, + { + "epoch": 3.566131591796875e-05, + "step": 23371, + "training_step_time": 0.105224609375 + }, + { + "epoch": 3.5662841796875e-05, + "model_forward_time": 0.02485370635986328, + "step": 23372 + }, + { + "epoch": 3.5662841796875e-05, + "step": 23372, + "training_step_time": 0.1062474250793457 + }, + { + "epoch": 3.566436767578125e-05, + "model_forward_time": 0.025367021560668945, + "step": 23373 + }, + { + "epoch": 3.566436767578125e-05, + "step": 23373, + "training_step_time": 0.10510611534118652 + }, + { + "epoch": 3.56658935546875e-05, + "model_forward_time": 0.024597644805908203, + "step": 23374 + }, + { + "epoch": 3.56658935546875e-05, + "step": 23374, + "training_step_time": 0.11034798622131348 + }, + { + "epoch": 3.566741943359375e-05, + "model_forward_time": 0.0239870548248291, + "step": 23375 + }, + { + "epoch": 3.566741943359375e-05, + "step": 23375, + "training_step_time": 0.10626745223999023 + }, + { + "epoch": 3.56689453125e-05, + "model_forward_time": 0.02525162696838379, + "step": 23376 + }, + { + "epoch": 3.56689453125e-05, + "step": 23376, + "training_step_time": 0.11477017402648926 + }, + { + "epoch": 3.567047119140625e-05, + "model_forward_time": 0.023775577545166016, + "step": 23377 + }, + { + "epoch": 3.567047119140625e-05, + "step": 23377, + "training_step_time": 0.11799931526184082 + }, + { + "epoch": 3.56719970703125e-05, + "model_forward_time": 0.024049758911132812, + "step": 23378 + }, + { + "epoch": 3.56719970703125e-05, + "step": 23378, + "training_step_time": 0.11258220672607422 + }, + { + "epoch": 3.567352294921875e-05, + "model_forward_time": 0.02508234977722168, + "step": 23379 + }, + { + "epoch": 3.567352294921875e-05, + "step": 23379, + "training_step_time": 0.11038875579833984 + }, + { + "epoch": 3.5675048828125e-05, + "grad_norm": 0.30648550391197205, + "learning_rate": 1.2732323374433707e-05, + "loss": 0.0117, + "step": 23380 + }, + { + "epoch": 3.5675048828125e-05, + "model_forward_time": 0.02510237693786621, + "step": 23380 + }, + { + "epoch": 3.5675048828125e-05, + "step": 23380, + "training_step_time": 0.11193013191223145 + }, + { + "epoch": 3.567657470703125e-05, + "model_forward_time": 0.025217771530151367, + "step": 23381 + }, + { + "epoch": 3.567657470703125e-05, + "step": 23381, + "training_step_time": 0.1100320816040039 + }, + { + "epoch": 3.56781005859375e-05, + "model_forward_time": 0.024732589721679688, + "step": 23382 + }, + { + "epoch": 3.56781005859375e-05, + "step": 23382, + "training_step_time": 0.10996675491333008 + }, + { + "epoch": 3.567962646484375e-05, + "model_forward_time": 0.024745464324951172, + "step": 23383 + }, + { + "epoch": 3.567962646484375e-05, + "step": 23383, + "training_step_time": 0.10846495628356934 + }, + { + "epoch": 3.568115234375e-05, + "model_forward_time": 0.024907827377319336, + "step": 23384 + }, + { + "epoch": 3.568115234375e-05, + "step": 23384, + "training_step_time": 0.10883402824401855 + }, + { + "epoch": 3.568267822265625e-05, + "model_forward_time": 0.025298118591308594, + "step": 23385 + }, + { + "epoch": 3.568267822265625e-05, + "step": 23385, + "training_step_time": 0.10821318626403809 + }, + { + "epoch": 3.56842041015625e-05, + "model_forward_time": 0.024891138076782227, + "step": 23386 + }, + { + "epoch": 3.56842041015625e-05, + "step": 23386, + "training_step_time": 0.10923361778259277 + }, + { + "epoch": 3.568572998046875e-05, + "model_forward_time": 0.024541854858398438, + "step": 23387 + }, + { + "epoch": 3.568572998046875e-05, + "step": 23387, + "training_step_time": 0.20641350746154785 + }, + { + "epoch": 3.5687255859375e-05, + "model_forward_time": 0.02439093589782715, + "step": 23388 + }, + { + "epoch": 3.5687255859375e-05, + "step": 23388, + "training_step_time": 0.10827755928039551 + }, + { + "epoch": 3.568878173828125e-05, + "model_forward_time": 0.024199485778808594, + "step": 23389 + }, + { + "epoch": 3.568878173828125e-05, + "step": 23389, + "training_step_time": 0.12380838394165039 + }, + { + "epoch": 3.56903076171875e-05, + "grad_norm": 0.1790643036365509, + "learning_rate": 1.2695602098432502e-05, + "loss": 0.0139, + "step": 23390 + }, + { + "epoch": 3.56903076171875e-05, + "model_forward_time": 0.02472090721130371, + "step": 23390 + }, + { + "epoch": 3.56903076171875e-05, + "step": 23390, + "training_step_time": 0.12818002700805664 + }, + { + "epoch": 3.569183349609375e-05, + "model_forward_time": 0.024599313735961914, + "step": 23391 + }, + { + "epoch": 3.569183349609375e-05, + "step": 23391, + "training_step_time": 0.17270874977111816 + }, + { + "epoch": 3.5693359375e-05, + "model_forward_time": 0.02446126937866211, + "step": 23392 + }, + { + "epoch": 3.5693359375e-05, + "step": 23392, + "training_step_time": 0.19969511032104492 + }, + { + "epoch": 3.569488525390625e-05, + "model_forward_time": 0.024658679962158203, + "step": 23393 + }, + { + "epoch": 3.569488525390625e-05, + "step": 23393, + "training_step_time": 0.1544027328491211 + }, + { + "epoch": 3.56964111328125e-05, + "model_forward_time": 0.024145126342773438, + "step": 23394 + }, + { + "epoch": 3.56964111328125e-05, + "step": 23394, + "training_step_time": 0.16910266876220703 + }, + { + "epoch": 3.569793701171875e-05, + "model_forward_time": 0.023594379425048828, + "step": 23395 + }, + { + "epoch": 3.569793701171875e-05, + "step": 23395, + "training_step_time": 0.12185931205749512 + }, + { + "epoch": 3.5699462890625e-05, + "model_forward_time": 0.0244748592376709, + "step": 23396 + }, + { + "epoch": 3.5699462890625e-05, + "step": 23396, + "training_step_time": 0.1120297908782959 + }, + { + "epoch": 3.570098876953125e-05, + "model_forward_time": 0.025224924087524414, + "step": 23397 + }, + { + "epoch": 3.570098876953125e-05, + "step": 23397, + "training_step_time": 0.10351753234863281 + }, + { + "epoch": 3.57025146484375e-05, + "model_forward_time": 0.02507805824279785, + "step": 23398 + }, + { + "epoch": 3.57025146484375e-05, + "step": 23398, + "training_step_time": 0.10559582710266113 + }, + { + "epoch": 3.570404052734375e-05, + "model_forward_time": 0.02516937255859375, + "step": 23399 + }, + { + "epoch": 3.570404052734375e-05, + "step": 23399, + "training_step_time": 0.10587382316589355 + }, + { + "epoch": 3.570556640625e-05, + "grad_norm": 0.1076262816786766, + "learning_rate": 1.2658926150792322e-05, + "loss": 0.0125, + "step": 23400 + }, + { + "epoch": 3.570556640625e-05, + "model_forward_time": 0.024970293045043945, + "step": 23400 + }, + { + "epoch": 3.570556640625e-05, + "step": 23400, + "training_step_time": 0.10558700561523438 + }, + { + "epoch": 3.570709228515625e-05, + "model_forward_time": 0.024866104125976562, + "step": 23401 + }, + { + "epoch": 3.570709228515625e-05, + "step": 23401, + "training_step_time": 0.10610699653625488 + }, + { + "epoch": 3.57086181640625e-05, + "model_forward_time": 0.025146484375, + "step": 23402 + }, + { + "epoch": 3.57086181640625e-05, + "step": 23402, + "training_step_time": 0.11017489433288574 + }, + { + "epoch": 3.571014404296875e-05, + "model_forward_time": 0.025333881378173828, + "step": 23403 + }, + { + "epoch": 3.571014404296875e-05, + "step": 23403, + "training_step_time": 0.10482192039489746 + }, + { + "epoch": 3.5711669921875e-05, + "model_forward_time": 0.02502751350402832, + "step": 23404 + }, + { + "epoch": 3.5711669921875e-05, + "step": 23404, + "training_step_time": 0.13179779052734375 + }, + { + "epoch": 3.571319580078125e-05, + "model_forward_time": 0.025427579879760742, + "step": 23405 + }, + { + "epoch": 3.571319580078125e-05, + "step": 23405, + "training_step_time": 0.12712836265563965 + }, + { + "epoch": 3.57147216796875e-05, + "model_forward_time": 0.02441120147705078, + "step": 23406 + }, + { + "epoch": 3.57147216796875e-05, + "step": 23406, + "training_step_time": 0.10913872718811035 + }, + { + "epoch": 3.571624755859375e-05, + "model_forward_time": 0.02491307258605957, + "step": 23407 + }, + { + "epoch": 3.571624755859375e-05, + "step": 23407, + "training_step_time": 0.10963010787963867 + }, + { + "epoch": 3.57177734375e-05, + "model_forward_time": 0.024850845336914062, + "step": 23408 + }, + { + "epoch": 3.57177734375e-05, + "step": 23408, + "training_step_time": 0.11035490036010742 + }, + { + "epoch": 3.571929931640625e-05, + "model_forward_time": 0.025103330612182617, + "step": 23409 + }, + { + "epoch": 3.571929931640625e-05, + "step": 23409, + "training_step_time": 0.1094350814819336 + }, + { + "epoch": 3.57208251953125e-05, + "grad_norm": 0.2716013491153717, + "learning_rate": 1.2622295576077886e-05, + "loss": 0.0111, + "step": 23410 + }, + { + "epoch": 3.57208251953125e-05, + "model_forward_time": 0.024790048599243164, + "step": 23410 + }, + { + "epoch": 3.57208251953125e-05, + "step": 23410, + "training_step_time": 0.18961286544799805 + }, + { + "epoch": 3.572235107421875e-05, + "model_forward_time": 0.025233745574951172, + "step": 23411 + }, + { + "epoch": 3.572235107421875e-05, + "step": 23411, + "training_step_time": 0.10554385185241699 + }, + { + "epoch": 3.5723876953125e-05, + "model_forward_time": 0.024452686309814453, + "step": 23412 + }, + { + "epoch": 3.5723876953125e-05, + "step": 23412, + "training_step_time": 0.14497947692871094 + }, + { + "epoch": 3.572540283203125e-05, + "model_forward_time": 0.026020050048828125, + "step": 23413 + }, + { + "epoch": 3.572540283203125e-05, + "step": 23413, + "training_step_time": 0.15570807456970215 + }, + { + "epoch": 3.57269287109375e-05, + "model_forward_time": 0.024310588836669922, + "step": 23414 + }, + { + "epoch": 3.57269287109375e-05, + "step": 23414, + "training_step_time": 0.11222624778747559 + }, + { + "epoch": 3.572845458984375e-05, + "model_forward_time": 0.024678945541381836, + "step": 23415 + }, + { + "epoch": 3.572845458984375e-05, + "step": 23415, + "training_step_time": 0.13232088088989258 + }, + { + "epoch": 3.572998046875e-05, + "model_forward_time": 0.024749279022216797, + "step": 23416 + }, + { + "epoch": 3.572998046875e-05, + "step": 23416, + "training_step_time": 0.19741511344909668 + }, + { + "epoch": 3.573150634765625e-05, + "model_forward_time": 0.024762630462646484, + "step": 23417 + }, + { + "epoch": 3.573150634765625e-05, + "step": 23417, + "training_step_time": 0.10423970222473145 + }, + { + "epoch": 3.57330322265625e-05, + "model_forward_time": 0.02482008934020996, + "step": 23418 + }, + { + "epoch": 3.57330322265625e-05, + "step": 23418, + "training_step_time": 0.10256242752075195 + }, + { + "epoch": 3.573455810546875e-05, + "model_forward_time": 0.02541208267211914, + "step": 23419 + }, + { + "epoch": 3.573455810546875e-05, + "step": 23419, + "training_step_time": 0.10630273818969727 + }, + { + "epoch": 3.5736083984375e-05, + "grad_norm": 0.1038910299539566, + "learning_rate": 1.2585710418798823e-05, + "loss": 0.0033, + "step": 23420 + }, + { + "epoch": 3.5736083984375e-05, + "model_forward_time": 0.024979829788208008, + "step": 23420 + }, + { + "epoch": 3.5736083984375e-05, + "step": 23420, + "training_step_time": 0.10648488998413086 + }, + { + "epoch": 3.573760986328125e-05, + "model_forward_time": 0.025211334228515625, + "step": 23421 + }, + { + "epoch": 3.573760986328125e-05, + "step": 23421, + "training_step_time": 0.10393404960632324 + }, + { + "epoch": 3.57391357421875e-05, + "model_forward_time": 0.025221586227416992, + "step": 23422 + }, + { + "epoch": 3.57391357421875e-05, + "step": 23422, + "training_step_time": 0.1058950424194336 + }, + { + "epoch": 3.574066162109375e-05, + "model_forward_time": 0.025351524353027344, + "step": 23423 + }, + { + "epoch": 3.574066162109375e-05, + "step": 23423, + "training_step_time": 0.10654306411743164 + }, + { + "epoch": 3.57421875e-05, + "model_forward_time": 0.025364160537719727, + "step": 23424 + }, + { + "epoch": 3.57421875e-05, + "step": 23424, + "training_step_time": 0.11027932167053223 + }, + { + "epoch": 3.574371337890625e-05, + "model_forward_time": 0.02570319175720215, + "step": 23425 + }, + { + "epoch": 3.574371337890625e-05, + "step": 23425, + "training_step_time": 0.11285138130187988 + }, + { + "epoch": 3.57452392578125e-05, + "model_forward_time": 0.028299808502197266, + "step": 23426 + }, + { + "epoch": 3.57452392578125e-05, + "step": 23426, + "training_step_time": 0.10859036445617676 + }, + { + "epoch": 3.574676513671875e-05, + "model_forward_time": 0.025182485580444336, + "step": 23427 + }, + { + "epoch": 3.574676513671875e-05, + "step": 23427, + "training_step_time": 0.13878870010375977 + }, + { + "epoch": 3.5748291015625e-05, + "model_forward_time": 0.02499222755432129, + "step": 23428 + }, + { + "epoch": 3.5748291015625e-05, + "step": 23428, + "training_step_time": 0.18028974533081055 + }, + { + "epoch": 3.574981689453125e-05, + "model_forward_time": 0.02451181411743164, + "step": 23429 + }, + { + "epoch": 3.574981689453125e-05, + "step": 23429, + "training_step_time": 0.18407702445983887 + }, + { + "epoch": 3.57513427734375e-05, + "grad_norm": 0.07759720087051392, + "learning_rate": 1.2549170723409549e-05, + "loss": 0.0064, + "step": 23430 + }, + { + "epoch": 3.57513427734375e-05, + "model_forward_time": 0.02447819709777832, + "step": 23430 + }, + { + "epoch": 3.57513427734375e-05, + "step": 23430, + "training_step_time": 0.1833333969116211 + }, + { + "epoch": 3.575286865234375e-05, + "model_forward_time": 0.023698806762695312, + "step": 23431 + }, + { + "epoch": 3.575286865234375e-05, + "step": 23431, + "training_step_time": 0.1671161651611328 + }, + { + "epoch": 3.575439453125e-05, + "model_forward_time": 0.024468421936035156, + "step": 23432 + }, + { + "epoch": 3.575439453125e-05, + "step": 23432, + "training_step_time": 0.17778611183166504 + }, + { + "epoch": 3.575592041015625e-05, + "model_forward_time": 0.024341583251953125, + "step": 23433 + }, + { + "epoch": 3.575592041015625e-05, + "step": 23433, + "training_step_time": 0.14948439598083496 + }, + { + "epoch": 3.57574462890625e-05, + "model_forward_time": 0.024144649505615234, + "step": 23434 + }, + { + "epoch": 3.57574462890625e-05, + "step": 23434, + "training_step_time": 0.22899413108825684 + }, + { + "epoch": 3.575897216796875e-05, + "model_forward_time": 0.024518966674804688, + "step": 23435 + }, + { + "epoch": 3.575897216796875e-05, + "step": 23435, + "training_step_time": 0.1322798728942871 + }, + { + "epoch": 3.5760498046875e-05, + "model_forward_time": 0.024150609970092773, + "step": 23436 + }, + { + "epoch": 3.5760498046875e-05, + "step": 23436, + "training_step_time": 0.12027430534362793 + }, + { + "epoch": 3.576202392578125e-05, + "model_forward_time": 0.024500608444213867, + "step": 23437 + }, + { + "epoch": 3.576202392578125e-05, + "step": 23437, + "training_step_time": 0.15895462036132812 + }, + { + "epoch": 3.57635498046875e-05, + "model_forward_time": 0.02450418472290039, + "step": 23438 + }, + { + "epoch": 3.57635498046875e-05, + "step": 23438, + "training_step_time": 0.21920013427734375 + }, + { + "epoch": 3.576507568359375e-05, + "model_forward_time": 0.02441573143005371, + "step": 23439 + }, + { + "epoch": 3.576507568359375e-05, + "step": 23439, + "training_step_time": 0.10787749290466309 + }, + { + "epoch": 3.57666015625e-05, + "grad_norm": 0.21360817551612854, + "learning_rate": 1.251267653430922e-05, + "loss": 0.0064, + "step": 23440 + }, + { + "epoch": 3.57666015625e-05, + "model_forward_time": 0.024637937545776367, + "step": 23440 + }, + { + "epoch": 3.57666015625e-05, + "step": 23440, + "training_step_time": 0.10346484184265137 + }, + { + "epoch": 3.576812744140625e-05, + "model_forward_time": 0.025167465209960938, + "step": 23441 + }, + { + "epoch": 3.576812744140625e-05, + "step": 23441, + "training_step_time": 0.10791754722595215 + }, + { + "epoch": 3.57696533203125e-05, + "model_forward_time": 0.02518463134765625, + "step": 23442 + }, + { + "epoch": 3.57696533203125e-05, + "step": 23442, + "training_step_time": 0.10646677017211914 + }, + { + "epoch": 3.577117919921875e-05, + "model_forward_time": 0.024528026580810547, + "step": 23443 + }, + { + "epoch": 3.577117919921875e-05, + "step": 23443, + "training_step_time": 0.1049647331237793 + }, + { + "epoch": 3.5772705078125e-05, + "model_forward_time": 0.025364398956298828, + "step": 23444 + }, + { + "epoch": 3.5772705078125e-05, + "step": 23444, + "training_step_time": 0.10627293586730957 + }, + { + "epoch": 3.577423095703125e-05, + "model_forward_time": 0.02756977081298828, + "step": 23445 + }, + { + "epoch": 3.577423095703125e-05, + "step": 23445, + "training_step_time": 0.10701203346252441 + }, + { + "epoch": 3.57757568359375e-05, + "model_forward_time": 0.025101423263549805, + "step": 23446 + }, + { + "epoch": 3.57757568359375e-05, + "step": 23446, + "training_step_time": 0.10464072227478027 + }, + { + "epoch": 3.577728271484375e-05, + "model_forward_time": 0.025330781936645508, + "step": 23447 + }, + { + "epoch": 3.577728271484375e-05, + "step": 23447, + "training_step_time": 0.1168966293334961 + }, + { + "epoch": 3.577880859375e-05, + "model_forward_time": 0.024997949600219727, + "step": 23448 + }, + { + "epoch": 3.577880859375e-05, + "step": 23448, + "training_step_time": 0.1403191089630127 + }, + { + "epoch": 3.578033447265625e-05, + "model_forward_time": 0.02475595474243164, + "step": 23449 + }, + { + "epoch": 3.578033447265625e-05, + "step": 23449, + "training_step_time": 0.10434269905090332 + }, + { + "epoch": 3.57818603515625e-05, + "grad_norm": 0.2325982004404068, + "learning_rate": 1.2476227895841713e-05, + "loss": 0.0078, + "step": 23450 + }, + { + "epoch": 3.57818603515625e-05, + "model_forward_time": 0.02525162696838379, + "step": 23450 + }, + { + "epoch": 3.57818603515625e-05, + "step": 23450, + "training_step_time": 0.1096029281616211 + }, + { + "epoch": 3.578338623046875e-05, + "model_forward_time": 0.02516961097717285, + "step": 23451 + }, + { + "epoch": 3.578338623046875e-05, + "step": 23451, + "training_step_time": 0.10805034637451172 + }, + { + "epoch": 3.5784912109375e-05, + "model_forward_time": 0.025000333786010742, + "step": 23452 + }, + { + "epoch": 3.5784912109375e-05, + "step": 23452, + "training_step_time": 0.11142277717590332 + }, + { + "epoch": 3.578643798828125e-05, + "model_forward_time": 0.025150060653686523, + "step": 23453 + }, + { + "epoch": 3.578643798828125e-05, + "step": 23453, + "training_step_time": 0.1939072608947754 + }, + { + "epoch": 3.57879638671875e-05, + "model_forward_time": 0.024639368057250977, + "step": 23454 + }, + { + "epoch": 3.57879638671875e-05, + "step": 23454, + "training_step_time": 0.10454535484313965 + }, + { + "epoch": 3.578948974609375e-05, + "model_forward_time": 0.024239540100097656, + "step": 23455 + }, + { + "epoch": 3.578948974609375e-05, + "step": 23455, + "training_step_time": 0.14604640007019043 + }, + { + "epoch": 3.5791015625e-05, + "model_forward_time": 0.02385115623474121, + "step": 23456 + }, + { + "epoch": 3.5791015625e-05, + "step": 23456, + "training_step_time": 0.1557013988494873 + }, + { + "epoch": 3.579254150390625e-05, + "model_forward_time": 0.024628162384033203, + "step": 23457 + }, + { + "epoch": 3.579254150390625e-05, + "step": 23457, + "training_step_time": 0.11300039291381836 + }, + { + "epoch": 3.57940673828125e-05, + "model_forward_time": 0.024349689483642578, + "step": 23458 + }, + { + "epoch": 3.57940673828125e-05, + "step": 23458, + "training_step_time": 0.13253068923950195 + }, + { + "epoch": 3.579559326171875e-05, + "model_forward_time": 0.025249719619750977, + "step": 23459 + }, + { + "epoch": 3.579559326171875e-05, + "step": 23459, + "training_step_time": 0.1992473602294922 + }, + { + "epoch": 3.5797119140625e-05, + "grad_norm": 0.08661402016878128, + "learning_rate": 1.243982485229559e-05, + "loss": 0.0049, + "step": 23460 + }, + { + "epoch": 3.5797119140625e-05, + "model_forward_time": 0.024042844772338867, + "step": 23460 + }, + { + "epoch": 3.5797119140625e-05, + "step": 23460, + "training_step_time": 0.10460567474365234 + }, + { + "epoch": 3.579864501953125e-05, + "model_forward_time": 0.02474498748779297, + "step": 23461 + }, + { + "epoch": 3.579864501953125e-05, + "step": 23461, + "training_step_time": 0.10154438018798828 + }, + { + "epoch": 3.58001708984375e-05, + "model_forward_time": 0.025541067123413086, + "step": 23462 + }, + { + "epoch": 3.58001708984375e-05, + "step": 23462, + "training_step_time": 0.1037135124206543 + }, + { + "epoch": 3.580169677734375e-05, + "model_forward_time": 0.02479100227355957, + "step": 23463 + }, + { + "epoch": 3.580169677734375e-05, + "step": 23463, + "training_step_time": 0.10387372970581055 + }, + { + "epoch": 3.580322265625e-05, + "model_forward_time": 0.024901866912841797, + "step": 23464 + }, + { + "epoch": 3.580322265625e-05, + "step": 23464, + "training_step_time": 0.10335445404052734 + }, + { + "epoch": 3.580474853515625e-05, + "model_forward_time": 0.025379180908203125, + "step": 23465 + }, + { + "epoch": 3.580474853515625e-05, + "step": 23465, + "training_step_time": 0.10377645492553711 + }, + { + "epoch": 3.58062744140625e-05, + "model_forward_time": 0.02531743049621582, + "step": 23466 + }, + { + "epoch": 3.58062744140625e-05, + "step": 23466, + "training_step_time": 0.1050114631652832 + }, + { + "epoch": 3.580780029296875e-05, + "model_forward_time": 0.025125503540039062, + "step": 23467 + }, + { + "epoch": 3.580780029296875e-05, + "step": 23467, + "training_step_time": 0.10748982429504395 + }, + { + "epoch": 3.5809326171875e-05, + "model_forward_time": 0.024906396865844727, + "step": 23468 + }, + { + "epoch": 3.5809326171875e-05, + "step": 23468, + "training_step_time": 0.10595154762268066 + }, + { + "epoch": 3.581085205078125e-05, + "model_forward_time": 0.024980545043945312, + "step": 23469 + }, + { + "epoch": 3.581085205078125e-05, + "step": 23469, + "training_step_time": 0.10785484313964844 + }, + { + "epoch": 3.58123779296875e-05, + "grad_norm": 0.09178736060857773, + "learning_rate": 1.2403467447903943e-05, + "loss": 0.0063, + "step": 23470 + }, + { + "epoch": 3.58123779296875e-05, + "model_forward_time": 0.02494215965270996, + "step": 23470 + }, + { + "epoch": 3.58123779296875e-05, + "step": 23470, + "training_step_time": 0.10521316528320312 + }, + { + "epoch": 3.581390380859375e-05, + "model_forward_time": 0.02504110336303711, + "step": 23471 + }, + { + "epoch": 3.581390380859375e-05, + "step": 23471, + "training_step_time": 0.10410594940185547 + }, + { + "epoch": 3.58154296875e-05, + "model_forward_time": 0.02494192123413086, + "step": 23472 + }, + { + "epoch": 3.58154296875e-05, + "step": 23472, + "training_step_time": 0.10920095443725586 + }, + { + "epoch": 3.581695556640625e-05, + "model_forward_time": 0.02512049674987793, + "step": 23473 + }, + { + "epoch": 3.581695556640625e-05, + "step": 23473, + "training_step_time": 0.10818171501159668 + }, + { + "epoch": 3.58184814453125e-05, + "model_forward_time": 0.025060653686523438, + "step": 23474 + }, + { + "epoch": 3.58184814453125e-05, + "step": 23474, + "training_step_time": 0.10834622383117676 + }, + { + "epoch": 3.582000732421875e-05, + "model_forward_time": 0.024729013442993164, + "step": 23475 + }, + { + "epoch": 3.582000732421875e-05, + "step": 23475, + "training_step_time": 0.11076927185058594 + }, + { + "epoch": 3.5821533203125e-05, + "model_forward_time": 0.025833606719970703, + "step": 23476 + }, + { + "epoch": 3.5821533203125e-05, + "step": 23476, + "training_step_time": 0.10984015464782715 + }, + { + "epoch": 3.582305908203125e-05, + "model_forward_time": 0.02550506591796875, + "step": 23477 + }, + { + "epoch": 3.582305908203125e-05, + "step": 23477, + "training_step_time": 0.14144611358642578 + }, + { + "epoch": 3.58245849609375e-05, + "model_forward_time": 0.025098800659179688, + "step": 23478 + }, + { + "epoch": 3.58245849609375e-05, + "step": 23478, + "training_step_time": 0.10920071601867676 + }, + { + "epoch": 3.582611083984375e-05, + "model_forward_time": 0.025118112564086914, + "step": 23479 + }, + { + "epoch": 3.582611083984375e-05, + "step": 23479, + "training_step_time": 0.10694289207458496 + }, + { + "epoch": 3.582763671875e-05, + "grad_norm": 0.08807004988193512, + "learning_rate": 1.2367155726844492e-05, + "loss": 0.0036, + "step": 23480 + }, + { + "epoch": 3.582763671875e-05, + "model_forward_time": 0.02416706085205078, + "step": 23480 + }, + { + "epoch": 3.582763671875e-05, + "step": 23480, + "training_step_time": 0.21194005012512207 + }, + { + "epoch": 3.582916259765625e-05, + "model_forward_time": 0.024582862854003906, + "step": 23481 + }, + { + "epoch": 3.582916259765625e-05, + "step": 23481, + "training_step_time": 0.15209341049194336 + }, + { + "epoch": 3.58306884765625e-05, + "model_forward_time": 0.024405479431152344, + "step": 23482 + }, + { + "epoch": 3.58306884765625e-05, + "step": 23482, + "training_step_time": 0.14760470390319824 + }, + { + "epoch": 3.583221435546875e-05, + "model_forward_time": 0.024424314498901367, + "step": 23483 + }, + { + "epoch": 3.583221435546875e-05, + "step": 23483, + "training_step_time": 0.11039423942565918 + }, + { + "epoch": 3.5833740234375e-05, + "model_forward_time": 0.024700641632080078, + "step": 23484 + }, + { + "epoch": 3.5833740234375e-05, + "step": 23484, + "training_step_time": 0.2165203094482422 + }, + { + "epoch": 3.583526611328125e-05, + "model_forward_time": 0.024034500122070312, + "step": 23485 + }, + { + "epoch": 3.583526611328125e-05, + "step": 23485, + "training_step_time": 0.11187601089477539 + }, + { + "epoch": 3.58367919921875e-05, + "model_forward_time": 0.024262428283691406, + "step": 23486 + }, + { + "epoch": 3.58367919921875e-05, + "step": 23486, + "training_step_time": 0.10226893424987793 + }, + { + "epoch": 3.583831787109375e-05, + "model_forward_time": 0.02522110939025879, + "step": 23487 + }, + { + "epoch": 3.583831787109375e-05, + "step": 23487, + "training_step_time": 0.10602307319641113 + }, + { + "epoch": 3.583984375e-05, + "model_forward_time": 0.025101184844970703, + "step": 23488 + }, + { + "epoch": 3.583984375e-05, + "step": 23488, + "training_step_time": 0.10498356819152832 + }, + { + "epoch": 3.584136962890625e-05, + "model_forward_time": 0.025235652923583984, + "step": 23489 + }, + { + "epoch": 3.584136962890625e-05, + "step": 23489, + "training_step_time": 0.10766887664794922 + }, + { + "epoch": 3.58428955078125e-05, + "grad_norm": 0.17897766828536987, + "learning_rate": 1.233088973323937e-05, + "loss": 0.0079, + "step": 23490 + }, + { + "epoch": 3.58428955078125e-05, + "model_forward_time": 0.025496959686279297, + "step": 23490 + }, + { + "epoch": 3.58428955078125e-05, + "step": 23490, + "training_step_time": 0.10622096061706543 + }, + { + "epoch": 3.584442138671875e-05, + "model_forward_time": 0.02518320083618164, + "step": 23491 + }, + { + "epoch": 3.584442138671875e-05, + "step": 23491, + "training_step_time": 0.10700631141662598 + }, + { + "epoch": 3.5845947265625e-05, + "model_forward_time": 0.025218486785888672, + "step": 23492 + }, + { + "epoch": 3.5845947265625e-05, + "step": 23492, + "training_step_time": 0.10589241981506348 + }, + { + "epoch": 3.584747314453125e-05, + "model_forward_time": 0.0251157283782959, + "step": 23493 + }, + { + "epoch": 3.584747314453125e-05, + "step": 23493, + "training_step_time": 0.12342405319213867 + }, + { + "epoch": 3.58489990234375e-05, + "model_forward_time": 0.025136947631835938, + "step": 23494 + }, + { + "epoch": 3.58489990234375e-05, + "step": 23494, + "training_step_time": 0.13508248329162598 + }, + { + "epoch": 3.585052490234375e-05, + "model_forward_time": 0.024733781814575195, + "step": 23495 + }, + { + "epoch": 3.585052490234375e-05, + "step": 23495, + "training_step_time": 0.10801100730895996 + }, + { + "epoch": 3.585205078125e-05, + "model_forward_time": 0.02513575553894043, + "step": 23496 + }, + { + "epoch": 3.585205078125e-05, + "step": 23496, + "training_step_time": 0.10839962959289551 + }, + { + "epoch": 3.585357666015625e-05, + "model_forward_time": 0.02506279945373535, + "step": 23497 + }, + { + "epoch": 3.585357666015625e-05, + "step": 23497, + "training_step_time": 0.11402606964111328 + }, + { + "epoch": 3.58551025390625e-05, + "model_forward_time": 0.02499246597290039, + "step": 23498 + }, + { + "epoch": 3.58551025390625e-05, + "step": 23498, + "training_step_time": 0.10582470893859863 + }, + { + "epoch": 3.585662841796875e-05, + "model_forward_time": 0.024924278259277344, + "step": 23499 + }, + { + "epoch": 3.585662841796875e-05, + "step": 23499, + "training_step_time": 0.2000875473022461 + }, + { + "epoch": 3.5858154296875e-05, + "grad_norm": 0.07113680988550186, + "learning_rate": 1.2294669511155193e-05, + "loss": 0.0159, + "step": 23500 + }, + { + "epoch": 3.5858154296875e-05, + "model_forward_time": 0.024460792541503906, + "step": 23500 + }, + { + "epoch": 3.5858154296875e-05, + "step": 23500, + "training_step_time": 0.1026468276977539 + }, + { + "epoch": 3.585968017578125e-05, + "model_forward_time": 0.024387836456298828, + "step": 23501 + }, + { + "epoch": 3.585968017578125e-05, + "step": 23501, + "training_step_time": 0.12257838249206543 + }, + { + "epoch": 3.58612060546875e-05, + "model_forward_time": 0.024692058563232422, + "step": 23502 + }, + { + "epoch": 3.58612060546875e-05, + "step": 23502, + "training_step_time": 0.10624456405639648 + }, + { + "epoch": 3.586273193359375e-05, + "model_forward_time": 0.0254971981048584, + "step": 23503 + }, + { + "epoch": 3.586273193359375e-05, + "step": 23503, + "training_step_time": 0.12292885780334473 + }, + { + "epoch": 3.58642578125e-05, + "model_forward_time": 0.025409221649169922, + "step": 23504 + }, + { + "epoch": 3.58642578125e-05, + "step": 23504, + "training_step_time": 0.10971617698669434 + }, + { + "epoch": 3.586578369140625e-05, + "model_forward_time": 0.025597095489501953, + "step": 23505 + }, + { + "epoch": 3.586578369140625e-05, + "step": 23505, + "training_step_time": 0.11999964714050293 + }, + { + "epoch": 3.58673095703125e-05, + "model_forward_time": 0.025358915328979492, + "step": 23506 + }, + { + "epoch": 3.58673095703125e-05, + "step": 23506, + "training_step_time": 0.10528063774108887 + }, + { + "epoch": 3.586883544921875e-05, + "model_forward_time": 0.02526569366455078, + "step": 23507 + }, + { + "epoch": 3.586883544921875e-05, + "step": 23507, + "training_step_time": 0.1100308895111084 + }, + { + "epoch": 3.5870361328125e-05, + "model_forward_time": 0.02522730827331543, + "step": 23508 + }, + { + "epoch": 3.5870361328125e-05, + "step": 23508, + "training_step_time": 0.10607218742370605 + }, + { + "epoch": 3.587188720703125e-05, + "model_forward_time": 0.025279760360717773, + "step": 23509 + }, + { + "epoch": 3.587188720703125e-05, + "step": 23509, + "training_step_time": 0.1060476303100586 + }, + { + "epoch": 3.58734130859375e-05, + "grad_norm": 0.08744464069604874, + "learning_rate": 1.2258495104602924e-05, + "loss": 0.0061, + "step": 23510 + }, + { + "epoch": 3.58734130859375e-05, + "model_forward_time": 0.025003433227539062, + "step": 23510 + }, + { + "epoch": 3.58734130859375e-05, + "step": 23510, + "training_step_time": 0.10390901565551758 + }, + { + "epoch": 3.587493896484375e-05, + "model_forward_time": 0.025115251541137695, + "step": 23511 + }, + { + "epoch": 3.587493896484375e-05, + "step": 23511, + "training_step_time": 0.10389494895935059 + }, + { + "epoch": 3.587646484375e-05, + "model_forward_time": 0.024961471557617188, + "step": 23512 + }, + { + "epoch": 3.587646484375e-05, + "step": 23512, + "training_step_time": 0.10506272315979004 + }, + { + "epoch": 3.587799072265625e-05, + "model_forward_time": 0.02483534812927246, + "step": 23513 + }, + { + "epoch": 3.587799072265625e-05, + "step": 23513, + "training_step_time": 0.11701703071594238 + }, + { + "epoch": 3.58795166015625e-05, + "model_forward_time": 0.025045394897460938, + "step": 23514 + }, + { + "epoch": 3.58795166015625e-05, + "step": 23514, + "training_step_time": 0.12827110290527344 + }, + { + "epoch": 3.588104248046875e-05, + "model_forward_time": 0.02506256103515625, + "step": 23515 + }, + { + "epoch": 3.588104248046875e-05, + "step": 23515, + "training_step_time": 0.13115167617797852 + }, + { + "epoch": 3.5882568359375e-05, + "model_forward_time": 0.02457118034362793, + "step": 23516 + }, + { + "epoch": 3.5882568359375e-05, + "step": 23516, + "training_step_time": 0.12658286094665527 + }, + { + "epoch": 3.588409423828125e-05, + "model_forward_time": 0.024909019470214844, + "step": 23517 + }, + { + "epoch": 3.588409423828125e-05, + "step": 23517, + "training_step_time": 0.12047672271728516 + }, + { + "epoch": 3.58856201171875e-05, + "model_forward_time": 0.02512335777282715, + "step": 23518 + }, + { + "epoch": 3.58856201171875e-05, + "step": 23518, + "training_step_time": 0.1179051399230957 + }, + { + "epoch": 3.588714599609375e-05, + "model_forward_time": 0.0250241756439209, + "step": 23519 + }, + { + "epoch": 3.588714599609375e-05, + "step": 23519, + "training_step_time": 0.11281919479370117 + }, + { + "epoch": 3.5888671875e-05, + "grad_norm": 0.5377272367477417, + "learning_rate": 1.2222366557537911e-05, + "loss": 0.0197, + "step": 23520 + }, + { + "epoch": 3.5888671875e-05, + "model_forward_time": 0.02539968490600586, + "step": 23520 + }, + { + "epoch": 3.5888671875e-05, + "step": 23520, + "training_step_time": 0.11529254913330078 + }, + { + "epoch": 3.589019775390625e-05, + "model_forward_time": 0.026179075241088867, + "step": 23521 + }, + { + "epoch": 3.589019775390625e-05, + "step": 23521, + "training_step_time": 0.11334371566772461 + }, + { + "epoch": 3.58917236328125e-05, + "model_forward_time": 0.024983644485473633, + "step": 23522 + }, + { + "epoch": 3.58917236328125e-05, + "step": 23522, + "training_step_time": 0.10957646369934082 + }, + { + "epoch": 3.589324951171875e-05, + "model_forward_time": 0.025823354721069336, + "step": 23523 + }, + { + "epoch": 3.589324951171875e-05, + "step": 23523, + "training_step_time": 0.17384934425354004 + }, + { + "epoch": 3.5894775390625e-05, + "model_forward_time": 0.024407148361206055, + "step": 23524 + }, + { + "epoch": 3.5894775390625e-05, + "step": 23524, + "training_step_time": 0.10748839378356934 + }, + { + "epoch": 3.589630126953125e-05, + "model_forward_time": 0.02473306655883789, + "step": 23525 + }, + { + "epoch": 3.589630126953125e-05, + "step": 23525, + "training_step_time": 0.16157984733581543 + }, + { + "epoch": 3.58978271484375e-05, + "model_forward_time": 0.024798154830932617, + "step": 23526 + }, + { + "epoch": 3.58978271484375e-05, + "step": 23526, + "training_step_time": 0.13297271728515625 + }, + { + "epoch": 3.589935302734375e-05, + "model_forward_time": 0.024226665496826172, + "step": 23527 + }, + { + "epoch": 3.589935302734375e-05, + "step": 23527, + "training_step_time": 0.12042379379272461 + }, + { + "epoch": 3.590087890625e-05, + "model_forward_time": 0.028097867965698242, + "step": 23528 + }, + { + "epoch": 3.590087890625e-05, + "step": 23528, + "training_step_time": 0.13189435005187988 + }, + { + "epoch": 3.590240478515625e-05, + "model_forward_time": 0.02470993995666504, + "step": 23529 + }, + { + "epoch": 3.590240478515625e-05, + "step": 23529, + "training_step_time": 0.14176034927368164 + }, + { + "epoch": 3.59039306640625e-05, + "grad_norm": 0.14628897607326508, + "learning_rate": 1.2186283913859726e-05, + "loss": 0.0037, + "step": 23530 + }, + { + "epoch": 3.59039306640625e-05, + "model_forward_time": 0.02386927604675293, + "step": 23530 + }, + { + "epoch": 3.59039306640625e-05, + "step": 23530, + "training_step_time": 0.20692229270935059 + }, + { + "epoch": 3.590545654296875e-05, + "model_forward_time": 0.024326801300048828, + "step": 23531 + }, + { + "epoch": 3.590545654296875e-05, + "step": 23531, + "training_step_time": 0.1141660213470459 + }, + { + "epoch": 3.5906982421875e-05, + "model_forward_time": 0.024133682250976562, + "step": 23532 + }, + { + "epoch": 3.5906982421875e-05, + "step": 23532, + "training_step_time": 0.10202169418334961 + }, + { + "epoch": 3.590850830078125e-05, + "model_forward_time": 0.02486562728881836, + "step": 23533 + }, + { + "epoch": 3.590850830078125e-05, + "step": 23533, + "training_step_time": 0.10515332221984863 + }, + { + "epoch": 3.59100341796875e-05, + "model_forward_time": 0.025341510772705078, + "step": 23534 + }, + { + "epoch": 3.59100341796875e-05, + "step": 23534, + "training_step_time": 0.10917806625366211 + }, + { + "epoch": 3.591156005859375e-05, + "model_forward_time": 0.025040388107299805, + "step": 23535 + }, + { + "epoch": 3.591156005859375e-05, + "step": 23535, + "training_step_time": 0.10798788070678711 + }, + { + "epoch": 3.59130859375e-05, + "model_forward_time": 0.025020599365234375, + "step": 23536 + }, + { + "epoch": 3.59130859375e-05, + "step": 23536, + "training_step_time": 0.10379958152770996 + }, + { + "epoch": 3.591461181640625e-05, + "model_forward_time": 0.02504873275756836, + "step": 23537 + }, + { + "epoch": 3.591461181640625e-05, + "step": 23537, + "training_step_time": 0.10690450668334961 + }, + { + "epoch": 3.59161376953125e-05, + "model_forward_time": 0.026689529418945312, + "step": 23538 + }, + { + "epoch": 3.59161376953125e-05, + "step": 23538, + "training_step_time": 0.11020612716674805 + }, + { + "epoch": 3.591766357421875e-05, + "model_forward_time": 0.0250241756439209, + "step": 23539 + }, + { + "epoch": 3.591766357421875e-05, + "step": 23539, + "training_step_time": 0.11072087287902832 + }, + { + "epoch": 3.5919189453125e-05, + "grad_norm": 0.14447247982025146, + "learning_rate": 1.2150247217412186e-05, + "loss": 0.0087, + "step": 23540 + }, + { + "epoch": 3.5919189453125e-05, + "model_forward_time": 0.024659156799316406, + "step": 23540 + }, + { + "epoch": 3.5919189453125e-05, + "step": 23540, + "training_step_time": 0.1290600299835205 + }, + { + "epoch": 3.592071533203125e-05, + "model_forward_time": 0.024895429611206055, + "step": 23541 + }, + { + "epoch": 3.592071533203125e-05, + "step": 23541, + "training_step_time": 0.10946512222290039 + }, + { + "epoch": 3.59222412109375e-05, + "model_forward_time": 0.025244951248168945, + "step": 23542 + }, + { + "epoch": 3.59222412109375e-05, + "step": 23542, + "training_step_time": 0.1135261058807373 + }, + { + "epoch": 3.592376708984375e-05, + "model_forward_time": 0.025615930557250977, + "step": 23543 + }, + { + "epoch": 3.592376708984375e-05, + "step": 23543, + "training_step_time": 0.11053943634033203 + }, + { + "epoch": 3.592529296875e-05, + "model_forward_time": 0.02485346794128418, + "step": 23544 + }, + { + "epoch": 3.592529296875e-05, + "step": 23544, + "training_step_time": 0.1028134822845459 + }, + { + "epoch": 3.592681884765625e-05, + "model_forward_time": 0.025249958038330078, + "step": 23545 + }, + { + "epoch": 3.592681884765625e-05, + "step": 23545, + "training_step_time": 0.19267773628234863 + }, + { + "epoch": 3.59283447265625e-05, + "model_forward_time": 0.024204254150390625, + "step": 23546 + }, + { + "epoch": 3.59283447265625e-05, + "step": 23546, + "training_step_time": 0.10150718688964844 + }, + { + "epoch": 3.592987060546875e-05, + "model_forward_time": 0.024008750915527344, + "step": 23547 + }, + { + "epoch": 3.592987060546875e-05, + "step": 23547, + "training_step_time": 0.10187053680419922 + }, + { + "epoch": 3.5931396484375e-05, + "model_forward_time": 0.024293184280395508, + "step": 23548 + }, + { + "epoch": 3.5931396484375e-05, + "step": 23548, + "training_step_time": 0.15435409545898438 + }, + { + "epoch": 3.593292236328125e-05, + "model_forward_time": 0.025104999542236328, + "step": 23549 + }, + { + "epoch": 3.593292236328125e-05, + "step": 23549, + "training_step_time": 0.14942026138305664 + }, + { + "epoch": 3.59344482421875e-05, + "grad_norm": 0.1571156084537506, + "learning_rate": 1.2114256511983274e-05, + "loss": 0.0042, + "step": 23550 + }, + { + "epoch": 3.59344482421875e-05, + "model_forward_time": 0.024634361267089844, + "step": 23550 + }, + { + "epoch": 3.59344482421875e-05, + "step": 23550, + "training_step_time": 0.14546465873718262 + }, + { + "epoch": 3.593597412109375e-05, + "model_forward_time": 0.023958206176757812, + "step": 23551 + }, + { + "epoch": 3.593597412109375e-05, + "step": 23551, + "training_step_time": 0.13865375518798828 + }, + { + "epoch": 3.59375e-05, + "model_forward_time": 0.024143695831298828, + "step": 23552 + }, + { + "epoch": 3.59375e-05, + "step": 23552, + "training_step_time": 0.19643950462341309 + }, + { + "epoch": 3.593902587890625e-05, + "model_forward_time": 0.024049997329711914, + "step": 23553 + }, + { + "epoch": 3.593902587890625e-05, + "step": 23553, + "training_step_time": 0.10597777366638184 + }, + { + "epoch": 3.59405517578125e-05, + "model_forward_time": 0.023816585540771484, + "step": 23554 + }, + { + "epoch": 3.59405517578125e-05, + "step": 23554, + "training_step_time": 0.10827875137329102 + }, + { + "epoch": 3.594207763671875e-05, + "model_forward_time": 0.024288177490234375, + "step": 23555 + }, + { + "epoch": 3.594207763671875e-05, + "step": 23555, + "training_step_time": 0.10856914520263672 + }, + { + "epoch": 3.5943603515625e-05, + "model_forward_time": 0.023801565170288086, + "step": 23556 + }, + { + "epoch": 3.5943603515625e-05, + "step": 23556, + "training_step_time": 0.10976123809814453 + }, + { + "epoch": 3.594512939453125e-05, + "model_forward_time": 0.028114795684814453, + "step": 23557 + }, + { + "epoch": 3.594512939453125e-05, + "step": 23557, + "training_step_time": 0.10836434364318848 + }, + { + "epoch": 3.59466552734375e-05, + "model_forward_time": 0.026053428649902344, + "step": 23558 + }, + { + "epoch": 3.59466552734375e-05, + "step": 23558, + "training_step_time": 0.11066627502441406 + }, + { + "epoch": 3.594818115234375e-05, + "model_forward_time": 0.024656057357788086, + "step": 23559 + }, + { + "epoch": 3.594818115234375e-05, + "step": 23559, + "training_step_time": 0.10505127906799316 + }, + { + "epoch": 3.594970703125e-05, + "grad_norm": 0.1672380417585373, + "learning_rate": 1.2078311841305084e-05, + "loss": 0.0049, + "step": 23560 + }, + { + "epoch": 3.594970703125e-05, + "model_forward_time": 0.0247652530670166, + "step": 23560 + }, + { + "epoch": 3.594970703125e-05, + "step": 23560, + "training_step_time": 0.10628533363342285 + }, + { + "epoch": 3.595123291015625e-05, + "model_forward_time": 0.024989843368530273, + "step": 23561 + }, + { + "epoch": 3.595123291015625e-05, + "step": 23561, + "training_step_time": 0.10596609115600586 + }, + { + "epoch": 3.59527587890625e-05, + "model_forward_time": 0.02521538734436035, + "step": 23562 + }, + { + "epoch": 3.59527587890625e-05, + "step": 23562, + "training_step_time": 0.10956668853759766 + }, + { + "epoch": 3.595428466796875e-05, + "model_forward_time": 0.025239229202270508, + "step": 23563 + }, + { + "epoch": 3.595428466796875e-05, + "step": 23563, + "training_step_time": 0.10869622230529785 + }, + { + "epoch": 3.5955810546875e-05, + "model_forward_time": 0.024997234344482422, + "step": 23564 + }, + { + "epoch": 3.5955810546875e-05, + "step": 23564, + "training_step_time": 0.11023736000061035 + }, + { + "epoch": 3.595733642578125e-05, + "model_forward_time": 0.024806737899780273, + "step": 23565 + }, + { + "epoch": 3.595733642578125e-05, + "step": 23565, + "training_step_time": 0.10726737976074219 + }, + { + "epoch": 3.59588623046875e-05, + "model_forward_time": 0.024991989135742188, + "step": 23566 + }, + { + "epoch": 3.59588623046875e-05, + "step": 23566, + "training_step_time": 0.10702204704284668 + }, + { + "epoch": 3.596038818359375e-05, + "model_forward_time": 0.025324106216430664, + "step": 23567 + }, + { + "epoch": 3.596038818359375e-05, + "step": 23567, + "training_step_time": 0.10797786712646484 + }, + { + "epoch": 3.59619140625e-05, + "model_forward_time": 0.0250089168548584, + "step": 23568 + }, + { + "epoch": 3.59619140625e-05, + "step": 23568, + "training_step_time": 0.10377168655395508 + }, + { + "epoch": 3.596343994140625e-05, + "model_forward_time": 0.024726152420043945, + "step": 23569 + }, + { + "epoch": 3.596343994140625e-05, + "step": 23569, + "training_step_time": 0.10789346694946289 + }, + { + "epoch": 3.59649658203125e-05, + "grad_norm": 0.09695081412792206, + "learning_rate": 1.2042413249053796e-05, + "loss": 0.0041, + "step": 23570 + }, + { + "epoch": 3.59649658203125e-05, + "model_forward_time": 0.024789094924926758, + "step": 23570 + }, + { + "epoch": 3.59649658203125e-05, + "step": 23570, + "training_step_time": 0.18400168418884277 + }, + { + "epoch": 3.596649169921875e-05, + "model_forward_time": 0.02424025535583496, + "step": 23571 + }, + { + "epoch": 3.596649169921875e-05, + "step": 23571, + "training_step_time": 0.23163127899169922 + }, + { + "epoch": 3.5968017578125e-05, + "model_forward_time": 0.024302244186401367, + "step": 23572 + }, + { + "epoch": 3.5968017578125e-05, + "step": 23572, + "training_step_time": 0.15258359909057617 + }, + { + "epoch": 3.596954345703125e-05, + "model_forward_time": 0.023998022079467773, + "step": 23573 + }, + { + "epoch": 3.596954345703125e-05, + "step": 23573, + "training_step_time": 0.20294570922851562 + }, + { + "epoch": 3.59710693359375e-05, + "model_forward_time": 0.024451494216918945, + "step": 23574 + }, + { + "epoch": 3.59710693359375e-05, + "step": 23574, + "training_step_time": 0.17335939407348633 + }, + { + "epoch": 3.597259521484375e-05, + "model_forward_time": 0.024142742156982422, + "step": 23575 + }, + { + "epoch": 3.597259521484375e-05, + "step": 23575, + "training_step_time": 0.12632513046264648 + }, + { + "epoch": 3.597412109375e-05, + "model_forward_time": 0.023948192596435547, + "step": 23576 + }, + { + "epoch": 3.597412109375e-05, + "step": 23576, + "training_step_time": 0.10339212417602539 + }, + { + "epoch": 3.597564697265625e-05, + "model_forward_time": 0.025509119033813477, + "step": 23577 + }, + { + "epoch": 3.597564697265625e-05, + "step": 23577, + "training_step_time": 0.10282540321350098 + }, + { + "epoch": 3.59771728515625e-05, + "model_forward_time": 0.024596691131591797, + "step": 23578 + }, + { + "epoch": 3.59771728515625e-05, + "step": 23578, + "training_step_time": 0.10655403137207031 + }, + { + "epoch": 3.597869873046875e-05, + "model_forward_time": 0.024942398071289062, + "step": 23579 + }, + { + "epoch": 3.597869873046875e-05, + "step": 23579, + "training_step_time": 0.10764145851135254 + }, + { + "epoch": 3.5980224609375e-05, + "grad_norm": 0.20490725338459015, + "learning_rate": 1.2006560778849578e-05, + "loss": 0.0067, + "step": 23580 + }, + { + "epoch": 3.5980224609375e-05, + "model_forward_time": 0.027492046356201172, + "step": 23580 + }, + { + "epoch": 3.5980224609375e-05, + "step": 23580, + "training_step_time": 0.10685038566589355 + }, + { + "epoch": 3.598175048828125e-05, + "model_forward_time": 0.024816036224365234, + "step": 23581 + }, + { + "epoch": 3.598175048828125e-05, + "step": 23581, + "training_step_time": 0.12947964668273926 + }, + { + "epoch": 3.59832763671875e-05, + "model_forward_time": 0.024666309356689453, + "step": 23582 + }, + { + "epoch": 3.59832763671875e-05, + "step": 23582, + "training_step_time": 0.1424112319946289 + }, + { + "epoch": 3.598480224609375e-05, + "model_forward_time": 0.024393796920776367, + "step": 23583 + }, + { + "epoch": 3.598480224609375e-05, + "step": 23583, + "training_step_time": 0.1782221794128418 + }, + { + "epoch": 3.5986328125e-05, + "model_forward_time": 0.025555133819580078, + "step": 23584 + }, + { + "epoch": 3.5986328125e-05, + "step": 23584, + "training_step_time": 0.146806001663208 + }, + { + "epoch": 3.598785400390625e-05, + "model_forward_time": 0.024281024932861328, + "step": 23585 + }, + { + "epoch": 3.598785400390625e-05, + "step": 23585, + "training_step_time": 0.12274360656738281 + }, + { + "epoch": 3.59893798828125e-05, + "model_forward_time": 0.02421736717224121, + "step": 23586 + }, + { + "epoch": 3.59893798828125e-05, + "step": 23586, + "training_step_time": 0.19606637954711914 + }, + { + "epoch": 3.599090576171875e-05, + "model_forward_time": 0.024244308471679688, + "step": 23587 + }, + { + "epoch": 3.599090576171875e-05, + "step": 23587, + "training_step_time": 0.11588263511657715 + }, + { + "epoch": 3.5992431640625e-05, + "model_forward_time": 0.024840593338012695, + "step": 23588 + }, + { + "epoch": 3.5992431640625e-05, + "step": 23588, + "training_step_time": 0.19193744659423828 + }, + { + "epoch": 3.599395751953125e-05, + "model_forward_time": 0.025223493576049805, + "step": 23589 + }, + { + "epoch": 3.599395751953125e-05, + "step": 23589, + "training_step_time": 0.10455155372619629 + }, + { + "epoch": 3.59954833984375e-05, + "grad_norm": 0.09416133165359497, + "learning_rate": 1.1970754474256563e-05, + "loss": 0.0151, + "step": 23590 + }, + { + "epoch": 3.59954833984375e-05, + "model_forward_time": 0.025414466857910156, + "step": 23590 + }, + { + "epoch": 3.59954833984375e-05, + "step": 23590, + "training_step_time": 0.10431718826293945 + }, + { + "epoch": 3.599700927734375e-05, + "model_forward_time": 0.024834156036376953, + "step": 23591 + }, + { + "epoch": 3.599700927734375e-05, + "step": 23591, + "training_step_time": 0.14815783500671387 + }, + { + "epoch": 3.599853515625e-05, + "model_forward_time": 0.025567054748535156, + "step": 23592 + }, + { + "epoch": 3.599853515625e-05, + "step": 23592, + "training_step_time": 0.11618757247924805 + }, + { + "epoch": 3.600006103515625e-05, + "model_forward_time": 0.02521657943725586, + "step": 23593 + }, + { + "epoch": 3.600006103515625e-05, + "step": 23593, + "training_step_time": 0.11089920997619629 + }, + { + "epoch": 3.60015869140625e-05, + "model_forward_time": 0.02496957778930664, + "step": 23594 + }, + { + "epoch": 3.60015869140625e-05, + "step": 23594, + "training_step_time": 0.11283302307128906 + }, + { + "epoch": 3.600311279296875e-05, + "model_forward_time": 0.02503204345703125, + "step": 23595 + }, + { + "epoch": 3.600311279296875e-05, + "step": 23595, + "training_step_time": 0.11581754684448242 + }, + { + "epoch": 3.6004638671875e-05, + "model_forward_time": 0.025567293167114258, + "step": 23596 + }, + { + "epoch": 3.6004638671875e-05, + "step": 23596, + "training_step_time": 0.1293470859527588 + }, + { + "epoch": 3.600616455078125e-05, + "model_forward_time": 0.025957107543945312, + "step": 23597 + }, + { + "epoch": 3.600616455078125e-05, + "step": 23597, + "training_step_time": 0.11259126663208008 + }, + { + "epoch": 3.60076904296875e-05, + "model_forward_time": 0.02510237693786621, + "step": 23598 + }, + { + "epoch": 3.60076904296875e-05, + "step": 23598, + "training_step_time": 0.1048727035522461 + }, + { + "epoch": 3.600921630859375e-05, + "model_forward_time": 0.02497577667236328, + "step": 23599 + }, + { + "epoch": 3.600921630859375e-05, + "step": 23599, + "training_step_time": 0.1073920726776123 + }, + { + "epoch": 3.60107421875e-05, + "grad_norm": 0.11431095004081726, + "learning_rate": 1.1934994378782772e-05, + "loss": 0.0076, + "step": 23600 + }, + { + "epoch": 3.60107421875e-05, + "model_forward_time": 0.02571415901184082, + "step": 23600 + }, + { + "epoch": 3.60107421875e-05, + "step": 23600, + "training_step_time": 0.10950541496276855 + }, + { + "epoch": 3.601226806640625e-05, + "model_forward_time": 0.025052309036254883, + "step": 23601 + }, + { + "epoch": 3.601226806640625e-05, + "step": 23601, + "training_step_time": 0.10416293144226074 + }, + { + "epoch": 3.60137939453125e-05, + "model_forward_time": 0.025397539138793945, + "step": 23602 + }, + { + "epoch": 3.60137939453125e-05, + "step": 23602, + "training_step_time": 0.10831522941589355 + }, + { + "epoch": 3.601531982421875e-05, + "model_forward_time": 0.024766921997070312, + "step": 23603 + }, + { + "epoch": 3.601531982421875e-05, + "step": 23603, + "training_step_time": 0.10491561889648438 + }, + { + "epoch": 3.6016845703125e-05, + "model_forward_time": 0.025013446807861328, + "step": 23604 + }, + { + "epoch": 3.6016845703125e-05, + "step": 23604, + "training_step_time": 0.1061406135559082 + }, + { + "epoch": 3.601837158203125e-05, + "model_forward_time": 0.024492740631103516, + "step": 23605 + }, + { + "epoch": 3.601837158203125e-05, + "step": 23605, + "training_step_time": 0.10788440704345703 + }, + { + "epoch": 3.60198974609375e-05, + "model_forward_time": 0.02505970001220703, + "step": 23606 + }, + { + "epoch": 3.60198974609375e-05, + "step": 23606, + "training_step_time": 0.10908031463623047 + }, + { + "epoch": 3.602142333984375e-05, + "model_forward_time": 0.024733543395996094, + "step": 23607 + }, + { + "epoch": 3.602142333984375e-05, + "step": 23607, + "training_step_time": 0.10606050491333008 + }, + { + "epoch": 3.602294921875e-05, + "model_forward_time": 0.025064468383789062, + "step": 23608 + }, + { + "epoch": 3.602294921875e-05, + "step": 23608, + "training_step_time": 0.10462021827697754 + }, + { + "epoch": 3.602447509765625e-05, + "model_forward_time": 0.025264263153076172, + "step": 23609 + }, + { + "epoch": 3.602447509765625e-05, + "step": 23609, + "training_step_time": 0.10648608207702637 + }, + { + "epoch": 3.60260009765625e-05, + "grad_norm": 0.10195007920265198, + "learning_rate": 1.1899280535880119e-05, + "loss": 0.0085, + "step": 23610 + }, + { + "epoch": 3.60260009765625e-05, + "model_forward_time": 0.024643898010253906, + "step": 23610 + }, + { + "epoch": 3.60260009765625e-05, + "step": 23610, + "training_step_time": 0.10398316383361816 + }, + { + "epoch": 3.602752685546875e-05, + "model_forward_time": 0.0250244140625, + "step": 23611 + }, + { + "epoch": 3.602752685546875e-05, + "step": 23611, + "training_step_time": 0.10622048377990723 + }, + { + "epoch": 3.6029052734375e-05, + "model_forward_time": 0.024807453155517578, + "step": 23612 + }, + { + "epoch": 3.6029052734375e-05, + "step": 23612, + "training_step_time": 0.10375165939331055 + }, + { + "epoch": 3.603057861328125e-05, + "model_forward_time": 0.02509927749633789, + "step": 23613 + }, + { + "epoch": 3.603057861328125e-05, + "step": 23613, + "training_step_time": 0.10480713844299316 + }, + { + "epoch": 3.60321044921875e-05, + "model_forward_time": 0.02498340606689453, + "step": 23614 + }, + { + "epoch": 3.60321044921875e-05, + "step": 23614, + "training_step_time": 0.10484027862548828 + }, + { + "epoch": 3.603363037109375e-05, + "model_forward_time": 0.025099754333496094, + "step": 23615 + }, + { + "epoch": 3.603363037109375e-05, + "step": 23615, + "training_step_time": 0.18629765510559082 + }, + { + "epoch": 3.603515625e-05, + "model_forward_time": 0.02417445182800293, + "step": 23616 + }, + { + "epoch": 3.603515625e-05, + "step": 23616, + "training_step_time": 0.20795345306396484 + }, + { + "epoch": 3.603668212890625e-05, + "model_forward_time": 0.024274349212646484, + "step": 23617 + }, + { + "epoch": 3.603668212890625e-05, + "step": 23617, + "training_step_time": 0.2086319923400879 + }, + { + "epoch": 3.60382080078125e-05, + "model_forward_time": 0.024445056915283203, + "step": 23618 + }, + { + "epoch": 3.60382080078125e-05, + "step": 23618, + "training_step_time": 0.18890881538391113 + }, + { + "epoch": 3.603973388671875e-05, + "model_forward_time": 0.024297237396240234, + "step": 23619 + }, + { + "epoch": 3.603973388671875e-05, + "step": 23619, + "training_step_time": 0.19274091720581055 + }, + { + "epoch": 3.6041259765625e-05, + "grad_norm": 0.14618656039237976, + "learning_rate": 1.1863612988944267e-05, + "loss": 0.0042, + "step": 23620 + }, + { + "epoch": 3.6041259765625e-05, + "model_forward_time": 0.02428579330444336, + "step": 23620 + }, + { + "epoch": 3.6041259765625e-05, + "step": 23620, + "training_step_time": 0.10942697525024414 + }, + { + "epoch": 3.604278564453125e-05, + "model_forward_time": 0.024430513381958008, + "step": 23621 + }, + { + "epoch": 3.604278564453125e-05, + "step": 23621, + "training_step_time": 0.10673141479492188 + }, + { + "epoch": 3.60443115234375e-05, + "model_forward_time": 0.024926185607910156, + "step": 23622 + }, + { + "epoch": 3.60443115234375e-05, + "step": 23622, + "training_step_time": 0.1098170280456543 + }, + { + "epoch": 3.604583740234375e-05, + "model_forward_time": 0.02496647834777832, + "step": 23623 + }, + { + "epoch": 3.604583740234375e-05, + "step": 23623, + "training_step_time": 0.1047065258026123 + }, + { + "epoch": 3.604736328125e-05, + "model_forward_time": 0.02501058578491211, + "step": 23624 + }, + { + "epoch": 3.604736328125e-05, + "step": 23624, + "training_step_time": 0.11269998550415039 + }, + { + "epoch": 3.604888916015625e-05, + "model_forward_time": 0.025124788284301758, + "step": 23625 + }, + { + "epoch": 3.604888916015625e-05, + "step": 23625, + "training_step_time": 0.10673141479492188 + }, + { + "epoch": 3.60504150390625e-05, + "model_forward_time": 0.02520155906677246, + "step": 23626 + }, + { + "epoch": 3.60504150390625e-05, + "step": 23626, + "training_step_time": 0.10758709907531738 + }, + { + "epoch": 3.605194091796875e-05, + "model_forward_time": 0.02508234977722168, + "step": 23627 + }, + { + "epoch": 3.605194091796875e-05, + "step": 23627, + "training_step_time": 0.19153094291687012 + }, + { + "epoch": 3.6053466796875e-05, + "model_forward_time": 0.024097204208374023, + "step": 23628 + }, + { + "epoch": 3.6053466796875e-05, + "step": 23628, + "training_step_time": 0.1426694393157959 + }, + { + "epoch": 3.605499267578125e-05, + "model_forward_time": 0.024979114532470703, + "step": 23629 + }, + { + "epoch": 3.605499267578125e-05, + "step": 23629, + "training_step_time": 0.10771560668945312 + }, + { + "epoch": 3.60565185546875e-05, + "grad_norm": 0.05361221730709076, + "learning_rate": 1.1827991781314667e-05, + "loss": 0.0036, + "step": 23630 + }, + { + "epoch": 3.60565185546875e-05, + "model_forward_time": 0.026069164276123047, + "step": 23630 + }, + { + "epoch": 3.60565185546875e-05, + "step": 23630, + "training_step_time": 0.1053464412689209 + }, + { + "epoch": 3.605804443359375e-05, + "model_forward_time": 0.0251009464263916, + "step": 23631 + }, + { + "epoch": 3.605804443359375e-05, + "step": 23631, + "training_step_time": 0.1287093162536621 + }, + { + "epoch": 3.60595703125e-05, + "model_forward_time": 0.0249478816986084, + "step": 23632 + }, + { + "epoch": 3.60595703125e-05, + "step": 23632, + "training_step_time": 0.10561251640319824 + }, + { + "epoch": 3.606109619140625e-05, + "model_forward_time": 0.024882078170776367, + "step": 23633 + }, + { + "epoch": 3.606109619140625e-05, + "step": 23633, + "training_step_time": 0.12982773780822754 + }, + { + "epoch": 3.60626220703125e-05, + "model_forward_time": 0.02492380142211914, + "step": 23634 + }, + { + "epoch": 3.60626220703125e-05, + "step": 23634, + "training_step_time": 0.11460661888122559 + }, + { + "epoch": 3.606414794921875e-05, + "model_forward_time": 0.02471637725830078, + "step": 23635 + }, + { + "epoch": 3.606414794921875e-05, + "step": 23635, + "training_step_time": 0.1020212173461914 + }, + { + "epoch": 3.6065673828125e-05, + "model_forward_time": 0.025141239166259766, + "step": 23636 + }, + { + "epoch": 3.6065673828125e-05, + "step": 23636, + "training_step_time": 0.10455560684204102 + }, + { + "epoch": 3.606719970703125e-05, + "model_forward_time": 0.02605724334716797, + "step": 23637 + }, + { + "epoch": 3.606719970703125e-05, + "step": 23637, + "training_step_time": 0.1095585823059082 + }, + { + "epoch": 3.60687255859375e-05, + "model_forward_time": 0.025331974029541016, + "step": 23638 + }, + { + "epoch": 3.60687255859375e-05, + "step": 23638, + "training_step_time": 0.11555147171020508 + }, + { + "epoch": 3.607025146484375e-05, + "model_forward_time": 0.024937152862548828, + "step": 23639 + }, + { + "epoch": 3.607025146484375e-05, + "step": 23639, + "training_step_time": 0.11928367614746094 + }, + { + "epoch": 3.607177734375e-05, + "grad_norm": 0.09311248362064362, + "learning_rate": 1.1792416956274444e-05, + "loss": 0.0103, + "step": 23640 + }, + { + "epoch": 3.607177734375e-05, + "model_forward_time": 0.025140047073364258, + "step": 23640 + }, + { + "epoch": 3.607177734375e-05, + "step": 23640, + "training_step_time": 0.21143865585327148 + }, + { + "epoch": 3.607330322265625e-05, + "model_forward_time": 0.024616003036499023, + "step": 23641 + }, + { + "epoch": 3.607330322265625e-05, + "step": 23641, + "training_step_time": 0.12924838066101074 + }, + { + "epoch": 3.60748291015625e-05, + "model_forward_time": 0.024165868759155273, + "step": 23642 + }, + { + "epoch": 3.60748291015625e-05, + "step": 23642, + "training_step_time": 0.10586285591125488 + }, + { + "epoch": 3.607635498046875e-05, + "model_forward_time": 0.025098323822021484, + "step": 23643 + }, + { + "epoch": 3.607635498046875e-05, + "step": 23643, + "training_step_time": 0.1102759838104248 + }, + { + "epoch": 3.6077880859375e-05, + "model_forward_time": 0.025369644165039062, + "step": 23644 + }, + { + "epoch": 3.6077880859375e-05, + "step": 23644, + "training_step_time": 0.10628628730773926 + }, + { + "epoch": 3.607940673828125e-05, + "model_forward_time": 0.02664661407470703, + "step": 23645 + }, + { + "epoch": 3.607940673828125e-05, + "step": 23645, + "training_step_time": 0.18776988983154297 + }, + { + "epoch": 3.60809326171875e-05, + "model_forward_time": 0.024267911911010742, + "step": 23646 + }, + { + "epoch": 3.60809326171875e-05, + "step": 23646, + "training_step_time": 0.20703840255737305 + }, + { + "epoch": 3.608245849609375e-05, + "model_forward_time": 0.02430582046508789, + "step": 23647 + }, + { + "epoch": 3.608245849609375e-05, + "step": 23647, + "training_step_time": 0.19942545890808105 + }, + { + "epoch": 3.6083984375e-05, + "model_forward_time": 0.02398061752319336, + "step": 23648 + }, + { + "epoch": 3.6083984375e-05, + "step": 23648, + "training_step_time": 0.1999495029449463 + }, + { + "epoch": 3.608551025390625e-05, + "model_forward_time": 0.02431631088256836, + "step": 23649 + }, + { + "epoch": 3.608551025390625e-05, + "step": 23649, + "training_step_time": 0.1857318878173828 + }, + { + "epoch": 3.60870361328125e-05, + "grad_norm": 0.18105536699295044, + "learning_rate": 1.1756888557050355e-05, + "loss": 0.0058, + "step": 23650 + }, + { + "epoch": 3.60870361328125e-05, + "model_forward_time": 0.024442672729492188, + "step": 23650 + }, + { + "epoch": 3.60870361328125e-05, + "step": 23650, + "training_step_time": 0.1774148941040039 + }, + { + "epoch": 3.608856201171875e-05, + "model_forward_time": 0.024475812911987305, + "step": 23651 + }, + { + "epoch": 3.608856201171875e-05, + "step": 23651, + "training_step_time": 0.16650772094726562 + }, + { + "epoch": 3.6090087890625e-05, + "model_forward_time": 0.024212360382080078, + "step": 23652 + }, + { + "epoch": 3.6090087890625e-05, + "step": 23652, + "training_step_time": 0.10882425308227539 + }, + { + "epoch": 3.609161376953125e-05, + "model_forward_time": 0.02420783042907715, + "step": 23653 + }, + { + "epoch": 3.609161376953125e-05, + "step": 23653, + "training_step_time": 0.10209846496582031 + }, + { + "epoch": 3.60931396484375e-05, + "model_forward_time": 0.024633169174194336, + "step": 23654 + }, + { + "epoch": 3.60931396484375e-05, + "step": 23654, + "training_step_time": 0.10369515419006348 + }, + { + "epoch": 3.609466552734375e-05, + "model_forward_time": 0.024765968322753906, + "step": 23655 + }, + { + "epoch": 3.609466552734375e-05, + "step": 23655, + "training_step_time": 0.10445833206176758 + }, + { + "epoch": 3.609619140625e-05, + "model_forward_time": 0.02559375762939453, + "step": 23656 + }, + { + "epoch": 3.609619140625e-05, + "step": 23656, + "training_step_time": 0.10603451728820801 + }, + { + "epoch": 3.609771728515625e-05, + "model_forward_time": 0.026569604873657227, + "step": 23657 + }, + { + "epoch": 3.609771728515625e-05, + "step": 23657, + "training_step_time": 0.19005155563354492 + }, + { + "epoch": 3.60992431640625e-05, + "model_forward_time": 0.02618098258972168, + "step": 23658 + }, + { + "epoch": 3.60992431640625e-05, + "step": 23658, + "training_step_time": 0.1537942886352539 + }, + { + "epoch": 3.610076904296875e-05, + "model_forward_time": 0.024334430694580078, + "step": 23659 + }, + { + "epoch": 3.610076904296875e-05, + "step": 23659, + "training_step_time": 0.13201189041137695 + }, + { + "epoch": 3.6102294921875e-05, + "grad_norm": 0.12258859723806381, + "learning_rate": 1.1721406626812764e-05, + "loss": 0.0049, + "step": 23660 + }, + { + "epoch": 3.6102294921875e-05, + "model_forward_time": 0.024624347686767578, + "step": 23660 + }, + { + "epoch": 3.6102294921875e-05, + "step": 23660, + "training_step_time": 0.12688970565795898 + }, + { + "epoch": 3.610382080078125e-05, + "model_forward_time": 0.024461984634399414, + "step": 23661 + }, + { + "epoch": 3.610382080078125e-05, + "step": 23661, + "training_step_time": 0.21181607246398926 + }, + { + "epoch": 3.61053466796875e-05, + "model_forward_time": 0.024272680282592773, + "step": 23662 + }, + { + "epoch": 3.61053466796875e-05, + "step": 23662, + "training_step_time": 0.11909985542297363 + }, + { + "epoch": 3.610687255859375e-05, + "model_forward_time": 0.024024009704589844, + "step": 23663 + }, + { + "epoch": 3.610687255859375e-05, + "step": 23663, + "training_step_time": 0.10667204856872559 + }, + { + "epoch": 3.61083984375e-05, + "model_forward_time": 0.025383949279785156, + "step": 23664 + }, + { + "epoch": 3.61083984375e-05, + "step": 23664, + "training_step_time": 0.10444784164428711 + }, + { + "epoch": 3.610992431640625e-05, + "model_forward_time": 0.024931669235229492, + "step": 23665 + }, + { + "epoch": 3.610992431640625e-05, + "step": 23665, + "training_step_time": 0.1108388900756836 + }, + { + "epoch": 3.61114501953125e-05, + "model_forward_time": 0.024664640426635742, + "step": 23666 + }, + { + "epoch": 3.61114501953125e-05, + "step": 23666, + "training_step_time": 0.10907602310180664 + }, + { + "epoch": 3.611297607421875e-05, + "model_forward_time": 0.026160717010498047, + "step": 23667 + }, + { + "epoch": 3.611297607421875e-05, + "step": 23667, + "training_step_time": 0.10678267478942871 + }, + { + "epoch": 3.6114501953125e-05, + "model_forward_time": 0.023955821990966797, + "step": 23668 + }, + { + "epoch": 3.6114501953125e-05, + "step": 23668, + "training_step_time": 0.1043848991394043 + }, + { + "epoch": 3.611602783203125e-05, + "model_forward_time": 0.02502608299255371, + "step": 23669 + }, + { + "epoch": 3.611602783203125e-05, + "step": 23669, + "training_step_time": 0.10542917251586914 + }, + { + "epoch": 3.61175537109375e-05, + "grad_norm": 0.11462750285863876, + "learning_rate": 1.1685971208675539e-05, + "loss": 0.0065, + "step": 23670 + }, + { + "epoch": 3.61175537109375e-05, + "model_forward_time": 0.0245058536529541, + "step": 23670 + }, + { + "epoch": 3.61175537109375e-05, + "step": 23670, + "training_step_time": 0.18134307861328125 + }, + { + "epoch": 3.611907958984375e-05, + "model_forward_time": 0.02442145347595215, + "step": 23671 + }, + { + "epoch": 3.611907958984375e-05, + "step": 23671, + "training_step_time": 0.1262824535369873 + }, + { + "epoch": 3.612060546875e-05, + "model_forward_time": 0.024320125579833984, + "step": 23672 + }, + { + "epoch": 3.612060546875e-05, + "step": 23672, + "training_step_time": 0.10498189926147461 + }, + { + "epoch": 3.612213134765625e-05, + "model_forward_time": 0.024841785430908203, + "step": 23673 + }, + { + "epoch": 3.612213134765625e-05, + "step": 23673, + "training_step_time": 0.12695598602294922 + }, + { + "epoch": 3.61236572265625e-05, + "model_forward_time": 0.024845600128173828, + "step": 23674 + }, + { + "epoch": 3.61236572265625e-05, + "step": 23674, + "training_step_time": 0.12056541442871094 + }, + { + "epoch": 3.612518310546875e-05, + "model_forward_time": 0.024847745895385742, + "step": 23675 + }, + { + "epoch": 3.612518310546875e-05, + "step": 23675, + "training_step_time": 0.1084890365600586 + }, + { + "epoch": 3.6126708984375e-05, + "model_forward_time": 0.025242328643798828, + "step": 23676 + }, + { + "epoch": 3.6126708984375e-05, + "step": 23676, + "training_step_time": 0.11405134201049805 + }, + { + "epoch": 3.612823486328125e-05, + "model_forward_time": 0.02510547637939453, + "step": 23677 + }, + { + "epoch": 3.612823486328125e-05, + "step": 23677, + "training_step_time": 0.11397027969360352 + }, + { + "epoch": 3.61297607421875e-05, + "model_forward_time": 0.025266170501708984, + "step": 23678 + }, + { + "epoch": 3.61297607421875e-05, + "step": 23678, + "training_step_time": 0.10998272895812988 + }, + { + "epoch": 3.613128662109375e-05, + "model_forward_time": 0.025412797927856445, + "step": 23679 + }, + { + "epoch": 3.613128662109375e-05, + "step": 23679, + "training_step_time": 0.11212825775146484 + }, + { + "epoch": 3.61328125e-05, + "grad_norm": 0.06080739200115204, + "learning_rate": 1.1650582345696088e-05, + "loss": 0.0114, + "step": 23680 + }, + { + "epoch": 3.61328125e-05, + "model_forward_time": 0.025837421417236328, + "step": 23680 + }, + { + "epoch": 3.61328125e-05, + "step": 23680, + "training_step_time": 0.10612130165100098 + }, + { + "epoch": 3.613433837890625e-05, + "model_forward_time": 0.025225400924682617, + "step": 23681 + }, + { + "epoch": 3.613433837890625e-05, + "step": 23681, + "training_step_time": 0.10998797416687012 + }, + { + "epoch": 3.61358642578125e-05, + "model_forward_time": 0.02528214454650879, + "step": 23682 + }, + { + "epoch": 3.61358642578125e-05, + "step": 23682, + "training_step_time": 0.11130142211914062 + }, + { + "epoch": 3.613739013671875e-05, + "model_forward_time": 0.024903059005737305, + "step": 23683 + }, + { + "epoch": 3.613739013671875e-05, + "step": 23683, + "training_step_time": 0.11158251762390137 + }, + { + "epoch": 3.6138916015625e-05, + "model_forward_time": 0.025099515914916992, + "step": 23684 + }, + { + "epoch": 3.6138916015625e-05, + "step": 23684, + "training_step_time": 0.11320805549621582 + }, + { + "epoch": 3.614044189453125e-05, + "model_forward_time": 0.025210857391357422, + "step": 23685 + }, + { + "epoch": 3.614044189453125e-05, + "step": 23685, + "training_step_time": 0.10645222663879395 + }, + { + "epoch": 3.61419677734375e-05, + "model_forward_time": 0.02637791633605957, + "step": 23686 + }, + { + "epoch": 3.61419677734375e-05, + "step": 23686, + "training_step_time": 0.1257326602935791 + }, + { + "epoch": 3.614349365234375e-05, + "model_forward_time": 0.025270700454711914, + "step": 23687 + }, + { + "epoch": 3.614349365234375e-05, + "step": 23687, + "training_step_time": 0.11284756660461426 + }, + { + "epoch": 3.614501953125e-05, + "model_forward_time": 0.025583267211914062, + "step": 23688 + }, + { + "epoch": 3.614501953125e-05, + "step": 23688, + "training_step_time": 0.10629606246948242 + }, + { + "epoch": 3.614654541015625e-05, + "model_forward_time": 0.025105953216552734, + "step": 23689 + }, + { + "epoch": 3.614654541015625e-05, + "step": 23689, + "training_step_time": 0.10973095893859863 + }, + { + "epoch": 3.61480712890625e-05, + "grad_norm": 0.1435283124446869, + "learning_rate": 1.16152400808752e-05, + "loss": 0.004, + "step": 23690 + }, + { + "epoch": 3.61480712890625e-05, + "model_forward_time": 0.025171995162963867, + "step": 23690 + }, + { + "epoch": 3.61480712890625e-05, + "step": 23690, + "training_step_time": 0.1071023941040039 + }, + { + "epoch": 3.614959716796875e-05, + "model_forward_time": 0.02489447593688965, + "step": 23691 + }, + { + "epoch": 3.614959716796875e-05, + "step": 23691, + "training_step_time": 0.10735821723937988 + }, + { + "epoch": 3.6151123046875e-05, + "model_forward_time": 0.024934768676757812, + "step": 23692 + }, + { + "epoch": 3.6151123046875e-05, + "step": 23692, + "training_step_time": 0.10444879531860352 + }, + { + "epoch": 3.615264892578125e-05, + "model_forward_time": 0.02548670768737793, + "step": 23693 + }, + { + "epoch": 3.615264892578125e-05, + "step": 23693, + "training_step_time": 0.10754895210266113 + }, + { + "epoch": 3.61541748046875e-05, + "model_forward_time": 0.02497076988220215, + "step": 23694 + }, + { + "epoch": 3.61541748046875e-05, + "step": 23694, + "training_step_time": 0.1065518856048584 + }, + { + "epoch": 3.615570068359375e-05, + "model_forward_time": 0.02497124671936035, + "step": 23695 + }, + { + "epoch": 3.615570068359375e-05, + "step": 23695, + "training_step_time": 0.1099853515625 + }, + { + "epoch": 3.61572265625e-05, + "model_forward_time": 0.025304317474365234, + "step": 23696 + }, + { + "epoch": 3.61572265625e-05, + "step": 23696, + "training_step_time": 0.1062924861907959 + }, + { + "epoch": 3.615875244140625e-05, + "model_forward_time": 0.024989843368530273, + "step": 23697 + }, + { + "epoch": 3.615875244140625e-05, + "step": 23697, + "training_step_time": 0.10691165924072266 + }, + { + "epoch": 3.61602783203125e-05, + "model_forward_time": 0.025228261947631836, + "step": 23698 + }, + { + "epoch": 3.61602783203125e-05, + "step": 23698, + "training_step_time": 0.10608458518981934 + }, + { + "epoch": 3.616180419921875e-05, + "model_forward_time": 0.025272607803344727, + "step": 23699 + }, + { + "epoch": 3.616180419921875e-05, + "step": 23699, + "training_step_time": 0.10791301727294922 + }, + { + "epoch": 3.6163330078125e-05, + "grad_norm": 0.2573447525501251, + "learning_rate": 1.157994445715706e-05, + "loss": 0.0072, + "step": 23700 + }, + { + "epoch": 3.6163330078125e-05, + "model_forward_time": 0.02500152587890625, + "step": 23700 + }, + { + "epoch": 3.6163330078125e-05, + "step": 23700, + "training_step_time": 0.10466694831848145 + }, + { + "epoch": 3.616485595703125e-05, + "model_forward_time": 0.02492690086364746, + "step": 23701 + }, + { + "epoch": 3.616485595703125e-05, + "step": 23701, + "training_step_time": 0.10827350616455078 + }, + { + "epoch": 3.61663818359375e-05, + "model_forward_time": 0.02497076988220215, + "step": 23702 + }, + { + "epoch": 3.61663818359375e-05, + "step": 23702, + "training_step_time": 0.10385775566101074 + }, + { + "epoch": 3.616790771484375e-05, + "model_forward_time": 0.025234699249267578, + "step": 23703 + }, + { + "epoch": 3.616790771484375e-05, + "step": 23703, + "training_step_time": 0.10654640197753906 + }, + { + "epoch": 3.616943359375e-05, + "model_forward_time": 0.025424480438232422, + "step": 23704 + }, + { + "epoch": 3.616943359375e-05, + "step": 23704, + "training_step_time": 0.10701417922973633 + }, + { + "epoch": 3.617095947265625e-05, + "model_forward_time": 0.025267839431762695, + "step": 23705 + }, + { + "epoch": 3.617095947265625e-05, + "step": 23705, + "training_step_time": 0.10860729217529297 + }, + { + "epoch": 3.61724853515625e-05, + "model_forward_time": 0.025504589080810547, + "step": 23706 + }, + { + "epoch": 3.61724853515625e-05, + "step": 23706, + "training_step_time": 0.1421358585357666 + }, + { + "epoch": 3.617401123046875e-05, + "model_forward_time": 0.027516841888427734, + "step": 23707 + }, + { + "epoch": 3.617401123046875e-05, + "step": 23707, + "training_step_time": 0.11456799507141113 + }, + { + "epoch": 3.6175537109375e-05, + "model_forward_time": 0.0250546932220459, + "step": 23708 + }, + { + "epoch": 3.6175537109375e-05, + "step": 23708, + "training_step_time": 0.12677907943725586 + }, + { + "epoch": 3.617706298828125e-05, + "model_forward_time": 0.024838924407958984, + "step": 23709 + }, + { + "epoch": 3.617706298828125e-05, + "step": 23709, + "training_step_time": 0.15665555000305176 + }, + { + "epoch": 3.61785888671875e-05, + "grad_norm": 0.13485924899578094, + "learning_rate": 1.1544695517429178e-05, + "loss": 0.0042, + "step": 23710 + }, + { + "epoch": 3.61785888671875e-05, + "model_forward_time": 0.024256467819213867, + "step": 23710 + }, + { + "epoch": 3.61785888671875e-05, + "step": 23710, + "training_step_time": 0.22320318222045898 + }, + { + "epoch": 3.618011474609375e-05, + "model_forward_time": 0.023978710174560547, + "step": 23711 + }, + { + "epoch": 3.618011474609375e-05, + "step": 23711, + "training_step_time": 0.11704254150390625 + }, + { + "epoch": 3.6181640625e-05, + "model_forward_time": 0.024837255477905273, + "step": 23712 + }, + { + "epoch": 3.6181640625e-05, + "step": 23712, + "training_step_time": 0.10869765281677246 + }, + { + "epoch": 3.618316650390625e-05, + "model_forward_time": 0.024811744689941406, + "step": 23713 + }, + { + "epoch": 3.618316650390625e-05, + "step": 23713, + "training_step_time": 0.10901141166687012 + }, + { + "epoch": 3.61846923828125e-05, + "model_forward_time": 0.024979829788208008, + "step": 23714 + }, + { + "epoch": 3.61846923828125e-05, + "step": 23714, + "training_step_time": 0.11147952079772949 + }, + { + "epoch": 3.618621826171875e-05, + "model_forward_time": 0.02472519874572754, + "step": 23715 + }, + { + "epoch": 3.618621826171875e-05, + "step": 23715, + "training_step_time": 0.11027288436889648 + }, + { + "epoch": 3.6187744140625e-05, + "model_forward_time": 0.0242159366607666, + "step": 23716 + }, + { + "epoch": 3.6187744140625e-05, + "step": 23716, + "training_step_time": 0.11115813255310059 + }, + { + "epoch": 3.618927001953125e-05, + "model_forward_time": 0.024096965789794922, + "step": 23717 + }, + { + "epoch": 3.618927001953125e-05, + "step": 23717, + "training_step_time": 0.10878133773803711 + }, + { + "epoch": 3.61907958984375e-05, + "model_forward_time": 0.026072263717651367, + "step": 23718 + }, + { + "epoch": 3.61907958984375e-05, + "step": 23718, + "training_step_time": 0.10933971405029297 + }, + { + "epoch": 3.619232177734375e-05, + "model_forward_time": 0.025133132934570312, + "step": 23719 + }, + { + "epoch": 3.619232177734375e-05, + "step": 23719, + "training_step_time": 0.19052672386169434 + }, + { + "epoch": 3.619384765625e-05, + "grad_norm": 0.11386612057685852, + "learning_rate": 1.1509493304522329e-05, + "loss": 0.0046, + "step": 23720 + }, + { + "epoch": 3.619384765625e-05, + "model_forward_time": 0.024660110473632812, + "step": 23720 + }, + { + "epoch": 3.619384765625e-05, + "step": 23720, + "training_step_time": 0.11484551429748535 + }, + { + "epoch": 3.619537353515625e-05, + "model_forward_time": 0.02398228645324707, + "step": 23721 + }, + { + "epoch": 3.619537353515625e-05, + "step": 23721, + "training_step_time": 0.10903787612915039 + }, + { + "epoch": 3.61968994140625e-05, + "model_forward_time": 0.024988412857055664, + "step": 23722 + }, + { + "epoch": 3.61968994140625e-05, + "step": 23722, + "training_step_time": 0.10947918891906738 + }, + { + "epoch": 3.619842529296875e-05, + "model_forward_time": 0.025120019912719727, + "step": 23723 + }, + { + "epoch": 3.619842529296875e-05, + "step": 23723, + "training_step_time": 0.12113475799560547 + }, + { + "epoch": 3.6199951171875e-05, + "model_forward_time": 0.025365114212036133, + "step": 23724 + }, + { + "epoch": 3.6199951171875e-05, + "step": 23724, + "training_step_time": 0.10991501808166504 + }, + { + "epoch": 3.620147705078125e-05, + "model_forward_time": 0.025114059448242188, + "step": 23725 + }, + { + "epoch": 3.620147705078125e-05, + "step": 23725, + "training_step_time": 0.15848755836486816 + }, + { + "epoch": 3.62030029296875e-05, + "model_forward_time": 0.024485349655151367, + "step": 23726 + }, + { + "epoch": 3.62030029296875e-05, + "step": 23726, + "training_step_time": 0.10464096069335938 + }, + { + "epoch": 3.620452880859375e-05, + "model_forward_time": 0.026644468307495117, + "step": 23727 + }, + { + "epoch": 3.620452880859375e-05, + "step": 23727, + "training_step_time": 0.10659384727478027 + }, + { + "epoch": 3.62060546875e-05, + "model_forward_time": 0.025087833404541016, + "step": 23728 + }, + { + "epoch": 3.62060546875e-05, + "step": 23728, + "training_step_time": 0.1043539047241211 + }, + { + "epoch": 3.620758056640625e-05, + "model_forward_time": 0.024904727935791016, + "step": 23729 + }, + { + "epoch": 3.620758056640625e-05, + "step": 23729, + "training_step_time": 0.1120903491973877 + }, + { + "epoch": 3.62091064453125e-05, + "grad_norm": 0.34860873222351074, + "learning_rate": 1.1474337861210543e-05, + "loss": 0.0126, + "step": 23730 + }, + { + "epoch": 3.62091064453125e-05, + "model_forward_time": 0.02493906021118164, + "step": 23730 + }, + { + "epoch": 3.62091064453125e-05, + "step": 23730, + "training_step_time": 0.10784339904785156 + }, + { + "epoch": 3.621063232421875e-05, + "model_forward_time": 0.025957345962524414, + "step": 23731 + }, + { + "epoch": 3.621063232421875e-05, + "step": 23731, + "training_step_time": 0.10765457153320312 + }, + { + "epoch": 3.6212158203125e-05, + "model_forward_time": 0.025282621383666992, + "step": 23732 + }, + { + "epoch": 3.6212158203125e-05, + "step": 23732, + "training_step_time": 0.1086280345916748 + }, + { + "epoch": 3.621368408203125e-05, + "model_forward_time": 0.025543212890625, + "step": 23733 + }, + { + "epoch": 3.621368408203125e-05, + "step": 23733, + "training_step_time": 0.11067533493041992 + }, + { + "epoch": 3.62152099609375e-05, + "model_forward_time": 0.025578022003173828, + "step": 23734 + }, + { + "epoch": 3.62152099609375e-05, + "step": 23734, + "training_step_time": 0.1094663143157959 + }, + { + "epoch": 3.621673583984375e-05, + "model_forward_time": 0.02633523941040039, + "step": 23735 + }, + { + "epoch": 3.621673583984375e-05, + "step": 23735, + "training_step_time": 0.11171650886535645 + }, + { + "epoch": 3.621826171875e-05, + "model_forward_time": 0.02539205551147461, + "step": 23736 + }, + { + "epoch": 3.621826171875e-05, + "step": 23736, + "training_step_time": 0.10599112510681152 + }, + { + "epoch": 3.621978759765625e-05, + "model_forward_time": 0.02544379234313965, + "step": 23737 + }, + { + "epoch": 3.621978759765625e-05, + "step": 23737, + "training_step_time": 0.10599732398986816 + }, + { + "epoch": 3.62213134765625e-05, + "model_forward_time": 0.02514195442199707, + "step": 23738 + }, + { + "epoch": 3.62213134765625e-05, + "step": 23738, + "training_step_time": 0.10503172874450684 + }, + { + "epoch": 3.622283935546875e-05, + "model_forward_time": 0.025303125381469727, + "step": 23739 + }, + { + "epoch": 3.622283935546875e-05, + "step": 23739, + "training_step_time": 0.10688209533691406 + }, + { + "epoch": 3.6224365234375e-05, + "grad_norm": 0.10724588483572006, + "learning_rate": 1.143922923021099e-05, + "loss": 0.0042, + "step": 23740 + }, + { + "epoch": 3.6224365234375e-05, + "model_forward_time": 0.02524280548095703, + "step": 23740 + }, + { + "epoch": 3.6224365234375e-05, + "step": 23740, + "training_step_time": 0.10619878768920898 + }, + { + "epoch": 3.622589111328125e-05, + "model_forward_time": 0.024982213973999023, + "step": 23741 + }, + { + "epoch": 3.622589111328125e-05, + "step": 23741, + "training_step_time": 0.10676074028015137 + }, + { + "epoch": 3.62274169921875e-05, + "model_forward_time": 0.024823427200317383, + "step": 23742 + }, + { + "epoch": 3.62274169921875e-05, + "step": 23742, + "training_step_time": 0.10700488090515137 + }, + { + "epoch": 3.622894287109375e-05, + "model_forward_time": 0.02497386932373047, + "step": 23743 + }, + { + "epoch": 3.622894287109375e-05, + "step": 23743, + "training_step_time": 0.10875320434570312 + }, + { + "epoch": 3.623046875e-05, + "model_forward_time": 0.025289297103881836, + "step": 23744 + }, + { + "epoch": 3.623046875e-05, + "step": 23744, + "training_step_time": 0.1081380844116211 + }, + { + "epoch": 3.623199462890625e-05, + "model_forward_time": 0.024966716766357422, + "step": 23745 + }, + { + "epoch": 3.623199462890625e-05, + "step": 23745, + "training_step_time": 0.17196917533874512 + }, + { + "epoch": 3.62335205078125e-05, + "model_forward_time": 0.02452874183654785, + "step": 23746 + }, + { + "epoch": 3.62335205078125e-05, + "step": 23746, + "training_step_time": 0.2034451961517334 + }, + { + "epoch": 3.623504638671875e-05, + "model_forward_time": 0.02291083335876465, + "step": 23747 + }, + { + "epoch": 3.623504638671875e-05, + "step": 23747, + "training_step_time": 0.1963338851928711 + }, + { + "epoch": 3.6236572265625e-05, + "model_forward_time": 0.02420520782470703, + "step": 23748 + }, + { + "epoch": 3.6236572265625e-05, + "step": 23748, + "training_step_time": 0.18754792213439941 + }, + { + "epoch": 3.623809814453125e-05, + "model_forward_time": 0.023858070373535156, + "step": 23749 + }, + { + "epoch": 3.623809814453125e-05, + "step": 23749, + "training_step_time": 0.16849780082702637 + }, + { + "epoch": 3.62396240234375e-05, + "grad_norm": 0.0854354128241539, + "learning_rate": 1.1404167454183957e-05, + "loss": 0.005, + "step": 23750 + }, + { + "epoch": 3.62396240234375e-05, + "model_forward_time": 0.02428889274597168, + "step": 23750 + }, + { + "epoch": 3.62396240234375e-05, + "step": 23750, + "training_step_time": 0.21044492721557617 + }, + { + "epoch": 3.624114990234375e-05, + "model_forward_time": 0.02402639389038086, + "step": 23751 + }, + { + "epoch": 3.624114990234375e-05, + "step": 23751, + "training_step_time": 0.16014957427978516 + }, + { + "epoch": 3.624267578125e-05, + "model_forward_time": 0.024404048919677734, + "step": 23752 + }, + { + "epoch": 3.624267578125e-05, + "step": 23752, + "training_step_time": 0.1611499786376953 + }, + { + "epoch": 3.624420166015625e-05, + "model_forward_time": 0.02448582649230957, + "step": 23753 + }, + { + "epoch": 3.624420166015625e-05, + "step": 23753, + "training_step_time": 0.16031861305236816 + }, + { + "epoch": 3.62457275390625e-05, + "model_forward_time": 0.024632930755615234, + "step": 23754 + }, + { + "epoch": 3.62457275390625e-05, + "step": 23754, + "training_step_time": 0.16802358627319336 + }, + { + "epoch": 3.624725341796875e-05, + "model_forward_time": 0.024485349655151367, + "step": 23755 + }, + { + "epoch": 3.624725341796875e-05, + "step": 23755, + "training_step_time": 0.15432405471801758 + }, + { + "epoch": 3.6248779296875e-05, + "model_forward_time": 0.023845195770263672, + "step": 23756 + }, + { + "epoch": 3.6248779296875e-05, + "step": 23756, + "training_step_time": 0.10907173156738281 + }, + { + "epoch": 3.625030517578125e-05, + "model_forward_time": 0.024504423141479492, + "step": 23757 + }, + { + "epoch": 3.625030517578125e-05, + "step": 23757, + "training_step_time": 0.10340380668640137 + }, + { + "epoch": 3.62518310546875e-05, + "model_forward_time": 0.0244903564453125, + "step": 23758 + }, + { + "epoch": 3.62518310546875e-05, + "step": 23758, + "training_step_time": 0.10374259948730469 + }, + { + "epoch": 3.625335693359375e-05, + "model_forward_time": 0.025090456008911133, + "step": 23759 + }, + { + "epoch": 3.625335693359375e-05, + "step": 23759, + "training_step_time": 0.10402321815490723 + }, + { + "epoch": 3.62548828125e-05, + "grad_norm": 0.13724148273468018, + "learning_rate": 1.1369152575732822e-05, + "loss": 0.0066, + "step": 23760 + }, + { + "epoch": 3.62548828125e-05, + "model_forward_time": 0.02530074119567871, + "step": 23760 + }, + { + "epoch": 3.62548828125e-05, + "step": 23760, + "training_step_time": 0.10542869567871094 + }, + { + "epoch": 3.625640869140625e-05, + "model_forward_time": 0.024912595748901367, + "step": 23761 + }, + { + "epoch": 3.625640869140625e-05, + "step": 23761, + "training_step_time": 0.10939908027648926 + }, + { + "epoch": 3.62579345703125e-05, + "model_forward_time": 0.02526402473449707, + "step": 23762 + }, + { + "epoch": 3.62579345703125e-05, + "step": 23762, + "training_step_time": 0.10358381271362305 + }, + { + "epoch": 3.625946044921875e-05, + "model_forward_time": 0.026082754135131836, + "step": 23763 + }, + { + "epoch": 3.625946044921875e-05, + "step": 23763, + "training_step_time": 0.1731853485107422 + }, + { + "epoch": 3.6260986328125e-05, + "model_forward_time": 0.024230003356933594, + "step": 23764 + }, + { + "epoch": 3.6260986328125e-05, + "step": 23764, + "training_step_time": 0.1359238624572754 + }, + { + "epoch": 3.626251220703125e-05, + "model_forward_time": 0.02398991584777832, + "step": 23765 + }, + { + "epoch": 3.626251220703125e-05, + "step": 23765, + "training_step_time": 0.10796141624450684 + }, + { + "epoch": 3.62640380859375e-05, + "model_forward_time": 0.025463342666625977, + "step": 23766 + }, + { + "epoch": 3.62640380859375e-05, + "step": 23766, + "training_step_time": 0.13053488731384277 + }, + { + "epoch": 3.626556396484375e-05, + "model_forward_time": 0.024906396865844727, + "step": 23767 + }, + { + "epoch": 3.626556396484375e-05, + "step": 23767, + "training_step_time": 0.11472010612487793 + }, + { + "epoch": 3.626708984375e-05, + "model_forward_time": 0.0249788761138916, + "step": 23768 + }, + { + "epoch": 3.626708984375e-05, + "step": 23768, + "training_step_time": 0.10928010940551758 + }, + { + "epoch": 3.626861572265625e-05, + "model_forward_time": 0.02431035041809082, + "step": 23769 + }, + { + "epoch": 3.626861572265625e-05, + "step": 23769, + "training_step_time": 0.10584259033203125 + }, + { + "epoch": 3.62701416015625e-05, + "grad_norm": 0.1321377158164978, + "learning_rate": 1.133418463740395e-05, + "loss": 0.0038, + "step": 23770 + }, + { + "epoch": 3.62701416015625e-05, + "model_forward_time": 0.024942398071289062, + "step": 23770 + }, + { + "epoch": 3.62701416015625e-05, + "step": 23770, + "training_step_time": 0.10840415954589844 + }, + { + "epoch": 3.627166748046875e-05, + "model_forward_time": 0.02516007423400879, + "step": 23771 + }, + { + "epoch": 3.627166748046875e-05, + "step": 23771, + "training_step_time": 0.10595846176147461 + }, + { + "epoch": 3.6273193359375e-05, + "model_forward_time": 0.024936437606811523, + "step": 23772 + }, + { + "epoch": 3.6273193359375e-05, + "step": 23772, + "training_step_time": 0.10771822929382324 + }, + { + "epoch": 3.627471923828125e-05, + "model_forward_time": 0.0252683162689209, + "step": 23773 + }, + { + "epoch": 3.627471923828125e-05, + "step": 23773, + "training_step_time": 0.10668611526489258 + }, + { + "epoch": 3.62762451171875e-05, + "model_forward_time": 0.025457143783569336, + "step": 23774 + }, + { + "epoch": 3.62762451171875e-05, + "step": 23774, + "training_step_time": 0.12391829490661621 + }, + { + "epoch": 3.627777099609375e-05, + "model_forward_time": 0.02526545524597168, + "step": 23775 + }, + { + "epoch": 3.627777099609375e-05, + "step": 23775, + "training_step_time": 0.11473202705383301 + }, + { + "epoch": 3.6279296875e-05, + "model_forward_time": 0.025341033935546875, + "step": 23776 + }, + { + "epoch": 3.6279296875e-05, + "step": 23776, + "training_step_time": 0.1972365379333496 + }, + { + "epoch": 3.628082275390625e-05, + "model_forward_time": 0.02441263198852539, + "step": 23777 + }, + { + "epoch": 3.628082275390625e-05, + "step": 23777, + "training_step_time": 0.11819148063659668 + }, + { + "epoch": 3.62823486328125e-05, + "model_forward_time": 0.024697065353393555, + "step": 23778 + }, + { + "epoch": 3.62823486328125e-05, + "step": 23778, + "training_step_time": 0.12651538848876953 + }, + { + "epoch": 3.628387451171875e-05, + "model_forward_time": 0.024565458297729492, + "step": 23779 + }, + { + "epoch": 3.628387451171875e-05, + "step": 23779, + "training_step_time": 0.11623597145080566 + }, + { + "epoch": 3.6285400390625e-05, + "grad_norm": 0.2529659867286682, + "learning_rate": 1.1299263681686706e-05, + "loss": 0.0058, + "step": 23780 + }, + { + "epoch": 3.6285400390625e-05, + "model_forward_time": 0.02516651153564453, + "step": 23780 + }, + { + "epoch": 3.6285400390625e-05, + "step": 23780, + "training_step_time": 0.10579752922058105 + }, + { + "epoch": 3.628692626953125e-05, + "model_forward_time": 0.025121212005615234, + "step": 23781 + }, + { + "epoch": 3.628692626953125e-05, + "step": 23781, + "training_step_time": 0.10657811164855957 + }, + { + "epoch": 3.62884521484375e-05, + "model_forward_time": 0.025098323822021484, + "step": 23782 + }, + { + "epoch": 3.62884521484375e-05, + "step": 23782, + "training_step_time": 0.10640263557434082 + }, + { + "epoch": 3.628997802734375e-05, + "model_forward_time": 0.025368452072143555, + "step": 23783 + }, + { + "epoch": 3.628997802734375e-05, + "step": 23783, + "training_step_time": 0.10695052146911621 + }, + { + "epoch": 3.629150390625e-05, + "model_forward_time": 0.025005578994750977, + "step": 23784 + }, + { + "epoch": 3.629150390625e-05, + "step": 23784, + "training_step_time": 0.10613131523132324 + }, + { + "epoch": 3.629302978515625e-05, + "model_forward_time": 0.025133609771728516, + "step": 23785 + }, + { + "epoch": 3.629302978515625e-05, + "step": 23785, + "training_step_time": 0.1042792797088623 + }, + { + "epoch": 3.62945556640625e-05, + "model_forward_time": 0.027497053146362305, + "step": 23786 + }, + { + "epoch": 3.62945556640625e-05, + "step": 23786, + "training_step_time": 0.10802006721496582 + }, + { + "epoch": 3.629608154296875e-05, + "model_forward_time": 0.024399280548095703, + "step": 23787 + }, + { + "epoch": 3.629608154296875e-05, + "step": 23787, + "training_step_time": 0.1041872501373291 + }, + { + "epoch": 3.6297607421875e-05, + "model_forward_time": 0.025021791458129883, + "step": 23788 + }, + { + "epoch": 3.6297607421875e-05, + "step": 23788, + "training_step_time": 0.1070566177368164 + }, + { + "epoch": 3.629913330078125e-05, + "model_forward_time": 0.025044918060302734, + "step": 23789 + }, + { + "epoch": 3.629913330078125e-05, + "step": 23789, + "training_step_time": 0.10471248626708984 + }, + { + "epoch": 3.63006591796875e-05, + "grad_norm": 0.15855908393859863, + "learning_rate": 1.1264389751013326e-05, + "loss": 0.0045, + "step": 23790 + }, + { + "epoch": 3.63006591796875e-05, + "model_forward_time": 0.025020599365234375, + "step": 23790 + }, + { + "epoch": 3.63006591796875e-05, + "step": 23790, + "training_step_time": 0.10537505149841309 + }, + { + "epoch": 3.630218505859375e-05, + "model_forward_time": 0.025368213653564453, + "step": 23791 + }, + { + "epoch": 3.630218505859375e-05, + "step": 23791, + "training_step_time": 0.10362935066223145 + }, + { + "epoch": 3.63037109375e-05, + "model_forward_time": 0.025111913681030273, + "step": 23792 + }, + { + "epoch": 3.63037109375e-05, + "step": 23792, + "training_step_time": 0.10421872138977051 + }, + { + "epoch": 3.630523681640625e-05, + "model_forward_time": 0.024875879287719727, + "step": 23793 + }, + { + "epoch": 3.630523681640625e-05, + "step": 23793, + "training_step_time": 0.10898947715759277 + }, + { + "epoch": 3.63067626953125e-05, + "model_forward_time": 0.024104833602905273, + "step": 23794 + }, + { + "epoch": 3.63067626953125e-05, + "step": 23794, + "training_step_time": 0.10918593406677246 + }, + { + "epoch": 3.630828857421875e-05, + "model_forward_time": 0.02530384063720703, + "step": 23795 + }, + { + "epoch": 3.630828857421875e-05, + "step": 23795, + "training_step_time": 0.10532879829406738 + }, + { + "epoch": 3.6309814453125e-05, + "model_forward_time": 0.024891376495361328, + "step": 23796 + }, + { + "epoch": 3.6309814453125e-05, + "step": 23796, + "training_step_time": 0.1112673282623291 + }, + { + "epoch": 3.631134033203125e-05, + "model_forward_time": 0.024657487869262695, + "step": 23797 + }, + { + "epoch": 3.631134033203125e-05, + "step": 23797, + "training_step_time": 0.1267855167388916 + }, + { + "epoch": 3.63128662109375e-05, + "model_forward_time": 0.025045394897460938, + "step": 23798 + }, + { + "epoch": 3.63128662109375e-05, + "step": 23798, + "training_step_time": 0.1914212703704834 + }, + { + "epoch": 3.631439208984375e-05, + "model_forward_time": 0.024173974990844727, + "step": 23799 + }, + { + "epoch": 3.631439208984375e-05, + "step": 23799, + "training_step_time": 0.15396642684936523 + }, + { + "epoch": 3.631591796875e-05, + "grad_norm": 0.07535918056964874, + "learning_rate": 1.1229562887758926e-05, + "loss": 0.0061, + "step": 23800 + }, + { + "epoch": 3.631591796875e-05, + "model_forward_time": 0.023712158203125, + "step": 23800 + }, + { + "epoch": 3.631591796875e-05, + "step": 23800, + "training_step_time": 0.10831570625305176 + }, + { + "epoch": 3.631744384765625e-05, + "model_forward_time": 0.024420738220214844, + "step": 23801 + }, + { + "epoch": 3.631744384765625e-05, + "step": 23801, + "training_step_time": 0.1759941577911377 + }, + { + "epoch": 3.63189697265625e-05, + "model_forward_time": 0.024541139602661133, + "step": 23802 + }, + { + "epoch": 3.63189697265625e-05, + "step": 23802, + "training_step_time": 0.18945622444152832 + }, + { + "epoch": 3.632049560546875e-05, + "model_forward_time": 0.024092674255371094, + "step": 23803 + }, + { + "epoch": 3.632049560546875e-05, + "step": 23803, + "training_step_time": 0.11419963836669922 + }, + { + "epoch": 3.6322021484375e-05, + "model_forward_time": 0.024261951446533203, + "step": 23804 + }, + { + "epoch": 3.6322021484375e-05, + "step": 23804, + "training_step_time": 0.14516043663024902 + }, + { + "epoch": 3.632354736328125e-05, + "model_forward_time": 0.024548768997192383, + "step": 23805 + }, + { + "epoch": 3.632354736328125e-05, + "step": 23805, + "training_step_time": 0.10465645790100098 + }, + { + "epoch": 3.63250732421875e-05, + "model_forward_time": 0.025165319442749023, + "step": 23806 + }, + { + "epoch": 3.63250732421875e-05, + "step": 23806, + "training_step_time": 0.10359454154968262 + }, + { + "epoch": 3.632659912109375e-05, + "model_forward_time": 0.025231361389160156, + "step": 23807 + }, + { + "epoch": 3.632659912109375e-05, + "step": 23807, + "training_step_time": 0.10949087142944336 + }, + { + "epoch": 3.6328125e-05, + "model_forward_time": 0.025070905685424805, + "step": 23808 + }, + { + "epoch": 3.6328125e-05, + "step": 23808, + "training_step_time": 0.10441017150878906 + }, + { + "epoch": 3.632965087890625e-05, + "model_forward_time": 0.02825617790222168, + "step": 23809 + }, + { + "epoch": 3.632965087890625e-05, + "step": 23809, + "training_step_time": 0.10683703422546387 + }, + { + "epoch": 3.63311767578125e-05, + "grad_norm": 0.09677168726921082, + "learning_rate": 1.1194783134241437e-05, + "loss": 0.005, + "step": 23810 + }, + { + "epoch": 3.63311767578125e-05, + "model_forward_time": 0.024693727493286133, + "step": 23810 + }, + { + "epoch": 3.63311767578125e-05, + "step": 23810, + "training_step_time": 0.19266581535339355 + }, + { + "epoch": 3.633270263671875e-05, + "model_forward_time": 0.0240023136138916, + "step": 23811 + }, + { + "epoch": 3.633270263671875e-05, + "step": 23811, + "training_step_time": 0.14217042922973633 + }, + { + "epoch": 3.6334228515625e-05, + "model_forward_time": 0.02397942543029785, + "step": 23812 + }, + { + "epoch": 3.6334228515625e-05, + "step": 23812, + "training_step_time": 0.10151433944702148 + }, + { + "epoch": 3.633575439453125e-05, + "model_forward_time": 0.024928569793701172, + "step": 23813 + }, + { + "epoch": 3.633575439453125e-05, + "step": 23813, + "training_step_time": 0.12736773490905762 + }, + { + "epoch": 3.63372802734375e-05, + "model_forward_time": 0.02482891082763672, + "step": 23814 + }, + { + "epoch": 3.63372802734375e-05, + "step": 23814, + "training_step_time": 0.13560080528259277 + }, + { + "epoch": 3.633880615234375e-05, + "model_forward_time": 0.024355173110961914, + "step": 23815 + }, + { + "epoch": 3.633880615234375e-05, + "step": 23815, + "training_step_time": 0.1795063018798828 + }, + { + "epoch": 3.634033203125e-05, + "model_forward_time": 0.024530887603759766, + "step": 23816 + }, + { + "epoch": 3.634033203125e-05, + "step": 23816, + "training_step_time": 0.12770938873291016 + }, + { + "epoch": 3.634185791015625e-05, + "model_forward_time": 0.023942947387695312, + "step": 23817 + }, + { + "epoch": 3.634185791015625e-05, + "step": 23817, + "training_step_time": 0.11959004402160645 + }, + { + "epoch": 3.63433837890625e-05, + "model_forward_time": 0.024176359176635742, + "step": 23818 + }, + { + "epoch": 3.63433837890625e-05, + "step": 23818, + "training_step_time": 0.11889529228210449 + }, + { + "epoch": 3.634490966796875e-05, + "model_forward_time": 0.025031089782714844, + "step": 23819 + }, + { + "epoch": 3.634490966796875e-05, + "step": 23819, + "training_step_time": 0.11275529861450195 + }, + { + "epoch": 3.6346435546875e-05, + "grad_norm": 0.2195141464471817, + "learning_rate": 1.1160050532721528e-05, + "loss": 0.0041, + "step": 23820 + }, + { + "epoch": 3.6346435546875e-05, + "model_forward_time": 0.02460026741027832, + "step": 23820 + }, + { + "epoch": 3.6346435546875e-05, + "step": 23820, + "training_step_time": 0.11313271522521973 + }, + { + "epoch": 3.634796142578125e-05, + "model_forward_time": 0.02518296241760254, + "step": 23821 + }, + { + "epoch": 3.634796142578125e-05, + "step": 23821, + "training_step_time": 0.11475038528442383 + }, + { + "epoch": 3.63494873046875e-05, + "model_forward_time": 0.02507805824279785, + "step": 23822 + }, + { + "epoch": 3.63494873046875e-05, + "step": 23822, + "training_step_time": 0.11010575294494629 + }, + { + "epoch": 3.635101318359375e-05, + "model_forward_time": 0.026886940002441406, + "step": 23823 + }, + { + "epoch": 3.635101318359375e-05, + "step": 23823, + "training_step_time": 0.1113576889038086 + }, + { + "epoch": 3.63525390625e-05, + "model_forward_time": 0.025590181350708008, + "step": 23824 + }, + { + "epoch": 3.63525390625e-05, + "step": 23824, + "training_step_time": 0.12001800537109375 + }, + { + "epoch": 3.635406494140625e-05, + "model_forward_time": 0.02516341209411621, + "step": 23825 + }, + { + "epoch": 3.635406494140625e-05, + "step": 23825, + "training_step_time": 0.10926485061645508 + }, + { + "epoch": 3.63555908203125e-05, + "model_forward_time": 0.025412797927856445, + "step": 23826 + }, + { + "epoch": 3.63555908203125e-05, + "step": 23826, + "training_step_time": 0.1138448715209961 + }, + { + "epoch": 3.635711669921875e-05, + "model_forward_time": 0.024778127670288086, + "step": 23827 + }, + { + "epoch": 3.635711669921875e-05, + "step": 23827, + "training_step_time": 0.10615015029907227 + }, + { + "epoch": 3.6358642578125e-05, + "model_forward_time": 0.025089263916015625, + "step": 23828 + }, + { + "epoch": 3.6358642578125e-05, + "step": 23828, + "training_step_time": 0.10982227325439453 + }, + { + "epoch": 3.636016845703125e-05, + "model_forward_time": 0.024789094924926758, + "step": 23829 + }, + { + "epoch": 3.636016845703125e-05, + "step": 23829, + "training_step_time": 0.10493850708007812 + }, + { + "epoch": 3.63616943359375e-05, + "grad_norm": 0.08239518105983734, + "learning_rate": 1.1125365125402582e-05, + "loss": 0.0041, + "step": 23830 + }, + { + "epoch": 3.63616943359375e-05, + "model_forward_time": 0.025389909744262695, + "step": 23830 + }, + { + "epoch": 3.63616943359375e-05, + "step": 23830, + "training_step_time": 0.10818719863891602 + }, + { + "epoch": 3.636322021484375e-05, + "model_forward_time": 0.025417804718017578, + "step": 23831 + }, + { + "epoch": 3.636322021484375e-05, + "step": 23831, + "training_step_time": 0.10602855682373047 + }, + { + "epoch": 3.636474609375e-05, + "model_forward_time": 0.026076078414916992, + "step": 23832 + }, + { + "epoch": 3.636474609375e-05, + "step": 23832, + "training_step_time": 0.10586094856262207 + }, + { + "epoch": 3.636627197265625e-05, + "model_forward_time": 0.02544426918029785, + "step": 23833 + }, + { + "epoch": 3.636627197265625e-05, + "step": 23833, + "training_step_time": 0.1044168472290039 + }, + { + "epoch": 3.63677978515625e-05, + "model_forward_time": 0.025240421295166016, + "step": 23834 + }, + { + "epoch": 3.63677978515625e-05, + "step": 23834, + "training_step_time": 0.10423684120178223 + }, + { + "epoch": 3.636932373046875e-05, + "model_forward_time": 0.02508091926574707, + "step": 23835 + }, + { + "epoch": 3.636932373046875e-05, + "step": 23835, + "training_step_time": 0.10547232627868652 + }, + { + "epoch": 3.6370849609375e-05, + "model_forward_time": 0.025339126586914062, + "step": 23836 + }, + { + "epoch": 3.6370849609375e-05, + "step": 23836, + "training_step_time": 0.10489869117736816 + }, + { + "epoch": 3.637237548828125e-05, + "model_forward_time": 0.024822473526000977, + "step": 23837 + }, + { + "epoch": 3.637237548828125e-05, + "step": 23837, + "training_step_time": 0.10431265830993652 + }, + { + "epoch": 3.63739013671875e-05, + "model_forward_time": 0.025453805923461914, + "step": 23838 + }, + { + "epoch": 3.63739013671875e-05, + "step": 23838, + "training_step_time": 0.10352468490600586 + }, + { + "epoch": 3.637542724609375e-05, + "model_forward_time": 0.02507305145263672, + "step": 23839 + }, + { + "epoch": 3.637542724609375e-05, + "step": 23839, + "training_step_time": 0.1081230640411377 + }, + { + "epoch": 3.6376953125e-05, + "grad_norm": 0.09703323245048523, + "learning_rate": 1.1090726954430658e-05, + "loss": 0.0064, + "step": 23840 + }, + { + "epoch": 3.6376953125e-05, + "model_forward_time": 0.0249481201171875, + "step": 23840 + }, + { + "epoch": 3.6376953125e-05, + "step": 23840, + "training_step_time": 0.10661554336547852 + }, + { + "epoch": 3.637847900390625e-05, + "model_forward_time": 0.024932384490966797, + "step": 23841 + }, + { + "epoch": 3.637847900390625e-05, + "step": 23841, + "training_step_time": 0.10624051094055176 + }, + { + "epoch": 3.63800048828125e-05, + "model_forward_time": 0.024860858917236328, + "step": 23842 + }, + { + "epoch": 3.63800048828125e-05, + "step": 23842, + "training_step_time": 0.10806059837341309 + }, + { + "epoch": 3.638153076171875e-05, + "model_forward_time": 0.02544403076171875, + "step": 23843 + }, + { + "epoch": 3.638153076171875e-05, + "step": 23843, + "training_step_time": 0.1051173210144043 + }, + { + "epoch": 3.6383056640625e-05, + "model_forward_time": 0.025183916091918945, + "step": 23844 + }, + { + "epoch": 3.6383056640625e-05, + "step": 23844, + "training_step_time": 0.17462468147277832 + }, + { + "epoch": 3.638458251953125e-05, + "model_forward_time": 0.02468109130859375, + "step": 23845 + }, + { + "epoch": 3.638458251953125e-05, + "step": 23845, + "training_step_time": 0.12604117393493652 + }, + { + "epoch": 3.63861083984375e-05, + "model_forward_time": 0.024676799774169922, + "step": 23846 + }, + { + "epoch": 3.63861083984375e-05, + "step": 23846, + "training_step_time": 0.16299724578857422 + }, + { + "epoch": 3.638763427734375e-05, + "model_forward_time": 0.02454233169555664, + "step": 23847 + }, + { + "epoch": 3.638763427734375e-05, + "step": 23847, + "training_step_time": 0.22947454452514648 + }, + { + "epoch": 3.638916015625e-05, + "model_forward_time": 0.02444744110107422, + "step": 23848 + }, + { + "epoch": 3.638916015625e-05, + "step": 23848, + "training_step_time": 0.15347719192504883 + }, + { + "epoch": 3.639068603515625e-05, + "model_forward_time": 0.024225711822509766, + "step": 23849 + }, + { + "epoch": 3.639068603515625e-05, + "step": 23849, + "training_step_time": 0.17047715187072754 + }, + { + "epoch": 3.63922119140625e-05, + "grad_norm": 0.06358692049980164, + "learning_rate": 1.1056136061894384e-05, + "loss": 0.0038, + "step": 23850 + }, + { + "epoch": 3.63922119140625e-05, + "model_forward_time": 0.02421283721923828, + "step": 23850 + }, + { + "epoch": 3.63922119140625e-05, + "step": 23850, + "training_step_time": 0.16432428359985352 + }, + { + "epoch": 3.639373779296875e-05, + "model_forward_time": 0.025168657302856445, + "step": 23851 + }, + { + "epoch": 3.639373779296875e-05, + "step": 23851, + "training_step_time": 0.12441420555114746 + }, + { + "epoch": 3.6395263671875e-05, + "model_forward_time": 0.02466297149658203, + "step": 23852 + }, + { + "epoch": 3.6395263671875e-05, + "step": 23852, + "training_step_time": 0.10793924331665039 + }, + { + "epoch": 3.639678955078125e-05, + "model_forward_time": 0.025212764739990234, + "step": 23853 + }, + { + "epoch": 3.639678955078125e-05, + "step": 23853, + "training_step_time": 0.11177611351013184 + }, + { + "epoch": 3.63983154296875e-05, + "model_forward_time": 0.025574445724487305, + "step": 23854 + }, + { + "epoch": 3.63983154296875e-05, + "step": 23854, + "training_step_time": 0.10936093330383301 + }, + { + "epoch": 3.639984130859375e-05, + "model_forward_time": 0.024920940399169922, + "step": 23855 + }, + { + "epoch": 3.639984130859375e-05, + "step": 23855, + "training_step_time": 0.10675215721130371 + }, + { + "epoch": 3.64013671875e-05, + "model_forward_time": 0.02535247802734375, + "step": 23856 + }, + { + "epoch": 3.64013671875e-05, + "step": 23856, + "training_step_time": 0.15739750862121582 + }, + { + "epoch": 3.640289306640625e-05, + "model_forward_time": 0.024805784225463867, + "step": 23857 + }, + { + "epoch": 3.640289306640625e-05, + "step": 23857, + "training_step_time": 0.12184476852416992 + }, + { + "epoch": 3.64044189453125e-05, + "model_forward_time": 0.02467823028564453, + "step": 23858 + }, + { + "epoch": 3.64044189453125e-05, + "step": 23858, + "training_step_time": 0.1768941879272461 + }, + { + "epoch": 3.640594482421875e-05, + "model_forward_time": 0.024393081665039062, + "step": 23859 + }, + { + "epoch": 3.640594482421875e-05, + "step": 23859, + "training_step_time": 0.10309553146362305 + }, + { + "epoch": 3.6407470703125e-05, + "grad_norm": 0.107351154088974, + "learning_rate": 1.1021592489824967e-05, + "loss": 0.0038, + "step": 23860 + }, + { + "epoch": 3.6407470703125e-05, + "model_forward_time": 0.02430891990661621, + "step": 23860 + }, + { + "epoch": 3.6407470703125e-05, + "step": 23860, + "training_step_time": 0.1591331958770752 + }, + { + "epoch": 3.640899658203125e-05, + "model_forward_time": 0.02558588981628418, + "step": 23861 + }, + { + "epoch": 3.640899658203125e-05, + "step": 23861, + "training_step_time": 0.10560297966003418 + }, + { + "epoch": 3.64105224609375e-05, + "model_forward_time": 0.024373769760131836, + "step": 23862 + }, + { + "epoch": 3.64105224609375e-05, + "step": 23862, + "training_step_time": 0.10432147979736328 + }, + { + "epoch": 3.641204833984375e-05, + "model_forward_time": 0.024957895278930664, + "step": 23863 + }, + { + "epoch": 3.641204833984375e-05, + "step": 23863, + "training_step_time": 0.10355734825134277 + }, + { + "epoch": 3.641357421875e-05, + "model_forward_time": 0.025213003158569336, + "step": 23864 + }, + { + "epoch": 3.641357421875e-05, + "step": 23864, + "training_step_time": 0.10899066925048828 + }, + { + "epoch": 3.641510009765625e-05, + "model_forward_time": 0.02597975730895996, + "step": 23865 + }, + { + "epoch": 3.641510009765625e-05, + "step": 23865, + "training_step_time": 0.10654568672180176 + }, + { + "epoch": 3.64166259765625e-05, + "model_forward_time": 0.0258181095123291, + "step": 23866 + }, + { + "epoch": 3.64166259765625e-05, + "step": 23866, + "training_step_time": 0.10535192489624023 + }, + { + "epoch": 3.641815185546875e-05, + "model_forward_time": 0.025333404541015625, + "step": 23867 + }, + { + "epoch": 3.641815185546875e-05, + "step": 23867, + "training_step_time": 0.15082883834838867 + }, + { + "epoch": 3.6419677734375e-05, + "model_forward_time": 0.0247955322265625, + "step": 23868 + }, + { + "epoch": 3.6419677734375e-05, + "step": 23868, + "training_step_time": 0.17311906814575195 + }, + { + "epoch": 3.642120361328125e-05, + "model_forward_time": 0.024601221084594727, + "step": 23869 + }, + { + "epoch": 3.642120361328125e-05, + "step": 23869, + "training_step_time": 0.13028287887573242 + }, + { + "epoch": 3.64227294921875e-05, + "grad_norm": 0.17584475874900818, + "learning_rate": 1.0987096280196086e-05, + "loss": 0.0083, + "step": 23870 + }, + { + "epoch": 3.64227294921875e-05, + "model_forward_time": 0.02426910400390625, + "step": 23870 + }, + { + "epoch": 3.64227294921875e-05, + "step": 23870, + "training_step_time": 0.1327214241027832 + }, + { + "epoch": 3.642425537109375e-05, + "model_forward_time": 0.026093006134033203, + "step": 23871 + }, + { + "epoch": 3.642425537109375e-05, + "step": 23871, + "training_step_time": 0.1962425708770752 + }, + { + "epoch": 3.642578125e-05, + "model_forward_time": 0.02451348304748535, + "step": 23872 + }, + { + "epoch": 3.642578125e-05, + "step": 23872, + "training_step_time": 0.10332131385803223 + }, + { + "epoch": 3.642730712890625e-05, + "model_forward_time": 0.02447223663330078, + "step": 23873 + }, + { + "epoch": 3.642730712890625e-05, + "step": 23873, + "training_step_time": 0.1018836498260498 + }, + { + "epoch": 3.64288330078125e-05, + "model_forward_time": 0.025185346603393555, + "step": 23874 + }, + { + "epoch": 3.64288330078125e-05, + "step": 23874, + "training_step_time": 0.10932707786560059 + }, + { + "epoch": 3.643035888671875e-05, + "model_forward_time": 0.025146007537841797, + "step": 23875 + }, + { + "epoch": 3.643035888671875e-05, + "step": 23875, + "training_step_time": 0.1065664291381836 + }, + { + "epoch": 3.6431884765625e-05, + "model_forward_time": 0.02554178237915039, + "step": 23876 + }, + { + "epoch": 3.6431884765625e-05, + "step": 23876, + "training_step_time": 0.10508227348327637 + }, + { + "epoch": 3.643341064453125e-05, + "model_forward_time": 0.02531123161315918, + "step": 23877 + }, + { + "epoch": 3.643341064453125e-05, + "step": 23877, + "training_step_time": 0.11355972290039062 + }, + { + "epoch": 3.64349365234375e-05, + "model_forward_time": 0.024814128875732422, + "step": 23878 + }, + { + "epoch": 3.64349365234375e-05, + "step": 23878, + "training_step_time": 0.10688900947570801 + }, + { + "epoch": 3.643646240234375e-05, + "model_forward_time": 0.024965763092041016, + "step": 23879 + }, + { + "epoch": 3.643646240234375e-05, + "step": 23879, + "training_step_time": 0.10531783103942871 + }, + { + "epoch": 3.643798828125e-05, + "grad_norm": 0.10480281710624695, + "learning_rate": 1.095264747492391e-05, + "loss": 0.0032, + "step": 23880 + }, + { + "epoch": 3.643798828125e-05, + "model_forward_time": 0.024976253509521484, + "step": 23880 + }, + { + "epoch": 3.643798828125e-05, + "step": 23880, + "training_step_time": 0.10486531257629395 + }, + { + "epoch": 3.643951416015625e-05, + "model_forward_time": 0.025159120559692383, + "step": 23881 + }, + { + "epoch": 3.643951416015625e-05, + "step": 23881, + "training_step_time": 0.10704445838928223 + }, + { + "epoch": 3.64410400390625e-05, + "model_forward_time": 0.0252077579498291, + "step": 23882 + }, + { + "epoch": 3.64410400390625e-05, + "step": 23882, + "training_step_time": 0.10481810569763184 + }, + { + "epoch": 3.644256591796875e-05, + "model_forward_time": 0.024898767471313477, + "step": 23883 + }, + { + "epoch": 3.644256591796875e-05, + "step": 23883, + "training_step_time": 0.10427570343017578 + }, + { + "epoch": 3.6444091796875e-05, + "model_forward_time": 0.0252382755279541, + "step": 23884 + }, + { + "epoch": 3.6444091796875e-05, + "step": 23884, + "training_step_time": 0.1077737808227539 + }, + { + "epoch": 3.644561767578125e-05, + "model_forward_time": 0.025249242782592773, + "step": 23885 + }, + { + "epoch": 3.644561767578125e-05, + "step": 23885, + "training_step_time": 0.1056058406829834 + }, + { + "epoch": 3.64471435546875e-05, + "model_forward_time": 0.025231599807739258, + "step": 23886 + }, + { + "epoch": 3.64471435546875e-05, + "step": 23886, + "training_step_time": 0.1046438217163086 + }, + { + "epoch": 3.644866943359375e-05, + "model_forward_time": 0.02515578269958496, + "step": 23887 + }, + { + "epoch": 3.644866943359375e-05, + "step": 23887, + "training_step_time": 0.10477852821350098 + }, + { + "epoch": 3.64501953125e-05, + "model_forward_time": 0.02551889419555664, + "step": 23888 + }, + { + "epoch": 3.64501953125e-05, + "step": 23888, + "training_step_time": 0.10579419136047363 + }, + { + "epoch": 3.645172119140625e-05, + "model_forward_time": 0.02513885498046875, + "step": 23889 + }, + { + "epoch": 3.645172119140625e-05, + "step": 23889, + "training_step_time": 0.1233360767364502 + }, + { + "epoch": 3.64532470703125e-05, + "grad_norm": 0.09248506277799606, + "learning_rate": 1.0918246115866964e-05, + "loss": 0.0085, + "step": 23890 + }, + { + "epoch": 3.64532470703125e-05, + "model_forward_time": 0.02534198760986328, + "step": 23890 + }, + { + "epoch": 3.64532470703125e-05, + "step": 23890, + "training_step_time": 0.11260700225830078 + }, + { + "epoch": 3.645477294921875e-05, + "model_forward_time": 0.02507758140563965, + "step": 23891 + }, + { + "epoch": 3.645477294921875e-05, + "step": 23891, + "training_step_time": 0.1282210350036621 + }, + { + "epoch": 3.6456298828125e-05, + "model_forward_time": 0.025000810623168945, + "step": 23892 + }, + { + "epoch": 3.6456298828125e-05, + "step": 23892, + "training_step_time": 0.1393587589263916 + }, + { + "epoch": 3.645782470703125e-05, + "model_forward_time": 0.02394723892211914, + "step": 23893 + }, + { + "epoch": 3.645782470703125e-05, + "step": 23893, + "training_step_time": 0.11671829223632812 + }, + { + "epoch": 3.64593505859375e-05, + "model_forward_time": 0.025250911712646484, + "step": 23894 + }, + { + "epoch": 3.64593505859375e-05, + "step": 23894, + "training_step_time": 0.12872838973999023 + }, + { + "epoch": 3.646087646484375e-05, + "model_forward_time": 0.025410890579223633, + "step": 23895 + }, + { + "epoch": 3.646087646484375e-05, + "step": 23895, + "training_step_time": 0.11078715324401855 + }, + { + "epoch": 3.646240234375e-05, + "model_forward_time": 0.025033950805664062, + "step": 23896 + }, + { + "epoch": 3.646240234375e-05, + "step": 23896, + "training_step_time": 0.13236141204833984 + }, + { + "epoch": 3.646392822265625e-05, + "model_forward_time": 0.025265932083129883, + "step": 23897 + }, + { + "epoch": 3.646392822265625e-05, + "step": 23897, + "training_step_time": 0.11030721664428711 + }, + { + "epoch": 3.64654541015625e-05, + "model_forward_time": 0.02521538734436035, + "step": 23898 + }, + { + "epoch": 3.64654541015625e-05, + "step": 23898, + "training_step_time": 0.11383914947509766 + }, + { + "epoch": 3.646697998046875e-05, + "model_forward_time": 0.025220155715942383, + "step": 23899 + }, + { + "epoch": 3.646697998046875e-05, + "step": 23899, + "training_step_time": 0.10855746269226074 + }, + { + "epoch": 3.6468505859375e-05, + "grad_norm": 0.07143854349851608, + "learning_rate": 1.0883892244826172e-05, + "loss": 0.0035, + "step": 23900 + }, + { + "epoch": 3.6468505859375e-05, + "model_forward_time": 0.02645730972290039, + "step": 23900 + }, + { + "epoch": 3.6468505859375e-05, + "step": 23900, + "training_step_time": 0.18007969856262207 + }, + { + "epoch": 3.647003173828125e-05, + "model_forward_time": 0.024250030517578125, + "step": 23901 + }, + { + "epoch": 3.647003173828125e-05, + "step": 23901, + "training_step_time": 0.21277213096618652 + }, + { + "epoch": 3.64715576171875e-05, + "model_forward_time": 0.024412155151367188, + "step": 23902 + }, + { + "epoch": 3.64715576171875e-05, + "step": 23902, + "training_step_time": 0.20573067665100098 + }, + { + "epoch": 3.647308349609375e-05, + "model_forward_time": 0.024324893951416016, + "step": 23903 + }, + { + "epoch": 3.647308349609375e-05, + "step": 23903, + "training_step_time": 0.1971428394317627 + }, + { + "epoch": 3.6474609375e-05, + "model_forward_time": 0.02409052848815918, + "step": 23904 + }, + { + "epoch": 3.6474609375e-05, + "step": 23904, + "training_step_time": 0.21251535415649414 + }, + { + "epoch": 3.647613525390625e-05, + "model_forward_time": 0.024709463119506836, + "step": 23905 + }, + { + "epoch": 3.647613525390625e-05, + "step": 23905, + "training_step_time": 0.19163894653320312 + }, + { + "epoch": 3.64776611328125e-05, + "model_forward_time": 0.02375054359436035, + "step": 23906 + }, + { + "epoch": 3.64776611328125e-05, + "step": 23906, + "training_step_time": 0.177598237991333 + }, + { + "epoch": 3.647918701171875e-05, + "model_forward_time": 0.024397611618041992, + "step": 23907 + }, + { + "epoch": 3.647918701171875e-05, + "step": 23907, + "training_step_time": 0.09996867179870605 + }, + { + "epoch": 3.6480712890625e-05, + "model_forward_time": 0.02400803565979004, + "step": 23908 + }, + { + "epoch": 3.6480712890625e-05, + "step": 23908, + "training_step_time": 0.10216712951660156 + }, + { + "epoch": 3.648223876953125e-05, + "model_forward_time": 0.025124311447143555, + "step": 23909 + }, + { + "epoch": 3.648223876953125e-05, + "step": 23909, + "training_step_time": 0.10426497459411621 + }, + { + "epoch": 3.64837646484375e-05, + "grad_norm": 0.2721317410469055, + "learning_rate": 1.0849585903544706e-05, + "loss": 0.005, + "step": 23910 + }, + { + "epoch": 3.64837646484375e-05, + "model_forward_time": 0.025544166564941406, + "step": 23910 + }, + { + "epoch": 3.64837646484375e-05, + "step": 23910, + "training_step_time": 0.10668063163757324 + }, + { + "epoch": 3.648529052734375e-05, + "model_forward_time": 0.024471521377563477, + "step": 23911 + }, + { + "epoch": 3.648529052734375e-05, + "step": 23911, + "training_step_time": 0.14273571968078613 + }, + { + "epoch": 3.648681640625e-05, + "model_forward_time": 0.024634122848510742, + "step": 23912 + }, + { + "epoch": 3.648681640625e-05, + "step": 23912, + "training_step_time": 0.16340875625610352 + }, + { + "epoch": 3.648834228515625e-05, + "model_forward_time": 0.024509191513061523, + "step": 23913 + }, + { + "epoch": 3.648834228515625e-05, + "step": 23913, + "training_step_time": 0.11221194267272949 + }, + { + "epoch": 3.64898681640625e-05, + "model_forward_time": 0.024311065673828125, + "step": 23914 + }, + { + "epoch": 3.64898681640625e-05, + "step": 23914, + "training_step_time": 0.13302969932556152 + }, + { + "epoch": 3.649139404296875e-05, + "model_forward_time": 0.025389432907104492, + "step": 23915 + }, + { + "epoch": 3.649139404296875e-05, + "step": 23915, + "training_step_time": 0.20921683311462402 + }, + { + "epoch": 3.6492919921875e-05, + "model_forward_time": 0.02478766441345215, + "step": 23916 + }, + { + "epoch": 3.6492919921875e-05, + "step": 23916, + "training_step_time": 0.10566043853759766 + }, + { + "epoch": 3.649444580078125e-05, + "model_forward_time": 0.02468585968017578, + "step": 23917 + }, + { + "epoch": 3.649444580078125e-05, + "step": 23917, + "training_step_time": 0.10468053817749023 + }, + { + "epoch": 3.64959716796875e-05, + "model_forward_time": 0.025393009185791016, + "step": 23918 + }, + { + "epoch": 3.64959716796875e-05, + "step": 23918, + "training_step_time": 0.10818934440612793 + }, + { + "epoch": 3.649749755859375e-05, + "model_forward_time": 0.025679588317871094, + "step": 23919 + }, + { + "epoch": 3.649749755859375e-05, + "step": 23919, + "training_step_time": 0.10683751106262207 + }, + { + "epoch": 3.64990234375e-05, + "grad_norm": 0.17563557624816895, + "learning_rate": 1.0815327133708015e-05, + "loss": 0.0104, + "step": 23920 + }, + { + "epoch": 3.64990234375e-05, + "model_forward_time": 0.024729251861572266, + "step": 23920 + }, + { + "epoch": 3.64990234375e-05, + "step": 23920, + "training_step_time": 0.10470199584960938 + }, + { + "epoch": 3.650054931640625e-05, + "model_forward_time": 0.025269031524658203, + "step": 23921 + }, + { + "epoch": 3.650054931640625e-05, + "step": 23921, + "training_step_time": 0.10552239418029785 + }, + { + "epoch": 3.65020751953125e-05, + "model_forward_time": 0.025113582611083984, + "step": 23922 + }, + { + "epoch": 3.65020751953125e-05, + "step": 23922, + "training_step_time": 0.10371565818786621 + }, + { + "epoch": 3.650360107421875e-05, + "model_forward_time": 0.024776220321655273, + "step": 23923 + }, + { + "epoch": 3.650360107421875e-05, + "step": 23923, + "training_step_time": 0.10338854789733887 + }, + { + "epoch": 3.6505126953125e-05, + "model_forward_time": 0.024939298629760742, + "step": 23924 + }, + { + "epoch": 3.6505126953125e-05, + "step": 23924, + "training_step_time": 0.10313034057617188 + }, + { + "epoch": 3.650665283203125e-05, + "model_forward_time": 0.025725841522216797, + "step": 23925 + }, + { + "epoch": 3.650665283203125e-05, + "step": 23925, + "training_step_time": 0.10590362548828125 + }, + { + "epoch": 3.65081787109375e-05, + "model_forward_time": 0.02538156509399414, + "step": 23926 + }, + { + "epoch": 3.65081787109375e-05, + "step": 23926, + "training_step_time": 0.10534048080444336 + }, + { + "epoch": 3.650970458984375e-05, + "model_forward_time": 0.028499126434326172, + "step": 23927 + }, + { + "epoch": 3.650970458984375e-05, + "step": 23927, + "training_step_time": 0.10802626609802246 + }, + { + "epoch": 3.651123046875e-05, + "model_forward_time": 0.025043964385986328, + "step": 23928 + }, + { + "epoch": 3.651123046875e-05, + "step": 23928, + "training_step_time": 0.1066586971282959 + }, + { + "epoch": 3.651275634765625e-05, + "model_forward_time": 0.025124549865722656, + "step": 23929 + }, + { + "epoch": 3.651275634765625e-05, + "step": 23929, + "training_step_time": 0.10372424125671387 + }, + { + "epoch": 3.65142822265625e-05, + "grad_norm": 0.08833561837673187, + "learning_rate": 1.0781115976943717e-05, + "loss": 0.0036, + "step": 23930 + }, + { + "epoch": 3.65142822265625e-05, + "model_forward_time": 0.025107383728027344, + "step": 23930 + }, + { + "epoch": 3.65142822265625e-05, + "step": 23930, + "training_step_time": 0.10421562194824219 + }, + { + "epoch": 3.651580810546875e-05, + "model_forward_time": 0.024898529052734375, + "step": 23931 + }, + { + "epoch": 3.651580810546875e-05, + "step": 23931, + "training_step_time": 0.10519123077392578 + }, + { + "epoch": 3.6517333984375e-05, + "model_forward_time": 0.02464604377746582, + "step": 23932 + }, + { + "epoch": 3.6517333984375e-05, + "step": 23932, + "training_step_time": 0.19600939750671387 + }, + { + "epoch": 3.651885986328125e-05, + "model_forward_time": 0.024336576461791992, + "step": 23933 + }, + { + "epoch": 3.651885986328125e-05, + "step": 23933, + "training_step_time": 0.11583089828491211 + }, + { + "epoch": 3.65203857421875e-05, + "model_forward_time": 0.024325132369995117, + "step": 23934 + }, + { + "epoch": 3.65203857421875e-05, + "step": 23934, + "training_step_time": 0.1271059513092041 + }, + { + "epoch": 3.652191162109375e-05, + "model_forward_time": 0.025506019592285156, + "step": 23935 + }, + { + "epoch": 3.652191162109375e-05, + "step": 23935, + "training_step_time": 0.1427474021911621 + }, + { + "epoch": 3.65234375e-05, + "model_forward_time": 0.0247039794921875, + "step": 23936 + }, + { + "epoch": 3.65234375e-05, + "step": 23936, + "training_step_time": 0.20601701736450195 + }, + { + "epoch": 3.652496337890625e-05, + "model_forward_time": 0.02445197105407715, + "step": 23937 + }, + { + "epoch": 3.652496337890625e-05, + "step": 23937, + "training_step_time": 0.14899563789367676 + }, + { + "epoch": 3.65264892578125e-05, + "model_forward_time": 0.024459123611450195, + "step": 23938 + }, + { + "epoch": 3.65264892578125e-05, + "step": 23938, + "training_step_time": 0.20193839073181152 + }, + { + "epoch": 3.652801513671875e-05, + "model_forward_time": 0.024033069610595703, + "step": 23939 + }, + { + "epoch": 3.652801513671875e-05, + "step": 23939, + "training_step_time": 0.1343083381652832 + }, + { + "epoch": 3.6529541015625e-05, + "grad_norm": 0.11135763674974442, + "learning_rate": 1.0746952474821614e-05, + "loss": 0.0029, + "step": 23940 + }, + { + "epoch": 3.6529541015625e-05, + "model_forward_time": 0.024798870086669922, + "step": 23940 + }, + { + "epoch": 3.6529541015625e-05, + "step": 23940, + "training_step_time": 0.10696172714233398 + }, + { + "epoch": 3.653106689453125e-05, + "model_forward_time": 0.025301694869995117, + "step": 23941 + }, + { + "epoch": 3.653106689453125e-05, + "step": 23941, + "training_step_time": 0.11362552642822266 + }, + { + "epoch": 3.65325927734375e-05, + "model_forward_time": 0.025057077407836914, + "step": 23942 + }, + { + "epoch": 3.65325927734375e-05, + "step": 23942, + "training_step_time": 0.13768839836120605 + }, + { + "epoch": 3.653411865234375e-05, + "model_forward_time": 0.024944543838500977, + "step": 23943 + }, + { + "epoch": 3.653411865234375e-05, + "step": 23943, + "training_step_time": 0.1921687126159668 + }, + { + "epoch": 3.653564453125e-05, + "model_forward_time": 0.024840831756591797, + "step": 23944 + }, + { + "epoch": 3.653564453125e-05, + "step": 23944, + "training_step_time": 0.10192513465881348 + }, + { + "epoch": 3.653717041015625e-05, + "model_forward_time": 0.024465084075927734, + "step": 23945 + }, + { + "epoch": 3.653717041015625e-05, + "step": 23945, + "training_step_time": 0.2050459384918213 + }, + { + "epoch": 3.65386962890625e-05, + "model_forward_time": 0.025182723999023438, + "step": 23946 + }, + { + "epoch": 3.65386962890625e-05, + "step": 23946, + "training_step_time": 0.13811922073364258 + }, + { + "epoch": 3.654022216796875e-05, + "model_forward_time": 0.024602890014648438, + "step": 23947 + }, + { + "epoch": 3.654022216796875e-05, + "step": 23947, + "training_step_time": 0.10910487174987793 + }, + { + "epoch": 3.6541748046875e-05, + "model_forward_time": 0.025218725204467773, + "step": 23948 + }, + { + "epoch": 3.6541748046875e-05, + "step": 23948, + "training_step_time": 0.10687851905822754 + }, + { + "epoch": 3.654327392578125e-05, + "model_forward_time": 0.02527618408203125, + "step": 23949 + }, + { + "epoch": 3.654327392578125e-05, + "step": 23949, + "training_step_time": 0.10986137390136719 + }, + { + "epoch": 3.65447998046875e-05, + "grad_norm": 0.10144961625337601, + "learning_rate": 1.0712836668853582e-05, + "loss": 0.0029, + "step": 23950 + }, + { + "epoch": 3.65447998046875e-05, + "model_forward_time": 0.025519609451293945, + "step": 23950 + }, + { + "epoch": 3.65447998046875e-05, + "step": 23950, + "training_step_time": 0.10483956336975098 + }, + { + "epoch": 3.654632568359375e-05, + "model_forward_time": 0.025439977645874023, + "step": 23951 + }, + { + "epoch": 3.654632568359375e-05, + "step": 23951, + "training_step_time": 0.10793709754943848 + }, + { + "epoch": 3.65478515625e-05, + "model_forward_time": 0.025325775146484375, + "step": 23952 + }, + { + "epoch": 3.65478515625e-05, + "step": 23952, + "training_step_time": 0.10506129264831543 + }, + { + "epoch": 3.654937744140625e-05, + "model_forward_time": 0.025190114974975586, + "step": 23953 + }, + { + "epoch": 3.654937744140625e-05, + "step": 23953, + "training_step_time": 0.10608434677124023 + }, + { + "epoch": 3.65509033203125e-05, + "model_forward_time": 0.024977684020996094, + "step": 23954 + }, + { + "epoch": 3.65509033203125e-05, + "step": 23954, + "training_step_time": 0.10709381103515625 + }, + { + "epoch": 3.655242919921875e-05, + "model_forward_time": 0.025832176208496094, + "step": 23955 + }, + { + "epoch": 3.655242919921875e-05, + "step": 23955, + "training_step_time": 0.10762643814086914 + }, + { + "epoch": 3.6553955078125e-05, + "model_forward_time": 0.025383472442626953, + "step": 23956 + }, + { + "epoch": 3.6553955078125e-05, + "step": 23956, + "training_step_time": 0.14487195014953613 + }, + { + "epoch": 3.655548095703125e-05, + "model_forward_time": 0.024576663970947266, + "step": 23957 + }, + { + "epoch": 3.655548095703125e-05, + "step": 23957, + "training_step_time": 0.15160918235778809 + }, + { + "epoch": 3.65570068359375e-05, + "model_forward_time": 0.025400638580322266, + "step": 23958 + }, + { + "epoch": 3.65570068359375e-05, + "step": 23958, + "training_step_time": 0.11628866195678711 + }, + { + "epoch": 3.655853271484375e-05, + "model_forward_time": 0.02435898780822754, + "step": 23959 + }, + { + "epoch": 3.655853271484375e-05, + "step": 23959, + "training_step_time": 0.1323254108428955 + }, + { + "epoch": 3.656005859375e-05, + "grad_norm": 0.3526037931442261, + "learning_rate": 1.0678768600493528e-05, + "loss": 0.0139, + "step": 23960 + }, + { + "epoch": 3.656005859375e-05, + "model_forward_time": 0.025356292724609375, + "step": 23960 + }, + { + "epoch": 3.656005859375e-05, + "step": 23960, + "training_step_time": 0.10399365425109863 + }, + { + "epoch": 3.656158447265625e-05, + "model_forward_time": 0.02509784698486328, + "step": 23961 + }, + { + "epoch": 3.656158447265625e-05, + "step": 23961, + "training_step_time": 0.12304854393005371 + }, + { + "epoch": 3.65631103515625e-05, + "model_forward_time": 0.02531599998474121, + "step": 23962 + }, + { + "epoch": 3.65631103515625e-05, + "step": 23962, + "training_step_time": 0.10485243797302246 + }, + { + "epoch": 3.656463623046875e-05, + "model_forward_time": 0.025495529174804688, + "step": 23963 + }, + { + "epoch": 3.656463623046875e-05, + "step": 23963, + "training_step_time": 0.10354065895080566 + }, + { + "epoch": 3.6566162109375e-05, + "model_forward_time": 0.024891376495361328, + "step": 23964 + }, + { + "epoch": 3.6566162109375e-05, + "step": 23964, + "training_step_time": 0.1065068244934082 + }, + { + "epoch": 3.656768798828125e-05, + "model_forward_time": 0.02472209930419922, + "step": 23965 + }, + { + "epoch": 3.656768798828125e-05, + "step": 23965, + "training_step_time": 0.10564446449279785 + }, + { + "epoch": 3.65692138671875e-05, + "model_forward_time": 0.025160551071166992, + "step": 23966 + }, + { + "epoch": 3.65692138671875e-05, + "step": 23966, + "training_step_time": 0.10565853118896484 + }, + { + "epoch": 3.657073974609375e-05, + "model_forward_time": 0.02499699592590332, + "step": 23967 + }, + { + "epoch": 3.657073974609375e-05, + "step": 23967, + "training_step_time": 0.10349559783935547 + }, + { + "epoch": 3.6572265625e-05, + "model_forward_time": 0.02480483055114746, + "step": 23968 + }, + { + "epoch": 3.6572265625e-05, + "step": 23968, + "training_step_time": 0.105194091796875 + }, + { + "epoch": 3.657379150390625e-05, + "model_forward_time": 0.024136066436767578, + "step": 23969 + }, + { + "epoch": 3.657379150390625e-05, + "step": 23969, + "training_step_time": 0.10750436782836914 + }, + { + "epoch": 3.65753173828125e-05, + "grad_norm": 0.11314871907234192, + "learning_rate": 1.0644748311137376e-05, + "loss": 0.008, + "step": 23970 + }, + { + "epoch": 3.65753173828125e-05, + "model_forward_time": 0.025174617767333984, + "step": 23970 + }, + { + "epoch": 3.65753173828125e-05, + "step": 23970, + "training_step_time": 0.10592532157897949 + }, + { + "epoch": 3.657684326171875e-05, + "model_forward_time": 0.0250093936920166, + "step": 23971 + }, + { + "epoch": 3.657684326171875e-05, + "step": 23971, + "training_step_time": 0.10729050636291504 + }, + { + "epoch": 3.6578369140625e-05, + "model_forward_time": 0.02558279037475586, + "step": 23972 + }, + { + "epoch": 3.6578369140625e-05, + "step": 23972, + "training_step_time": 0.10511183738708496 + }, + { + "epoch": 3.657989501953125e-05, + "model_forward_time": 0.025240421295166016, + "step": 23973 + }, + { + "epoch": 3.657989501953125e-05, + "step": 23973, + "training_step_time": 0.1531233787536621 + }, + { + "epoch": 3.65814208984375e-05, + "model_forward_time": 0.02479243278503418, + "step": 23974 + }, + { + "epoch": 3.65814208984375e-05, + "step": 23974, + "training_step_time": 0.18065738677978516 + }, + { + "epoch": 3.658294677734375e-05, + "model_forward_time": 0.024245500564575195, + "step": 23975 + }, + { + "epoch": 3.658294677734375e-05, + "step": 23975, + "training_step_time": 0.18618035316467285 + }, + { + "epoch": 3.658447265625e-05, + "model_forward_time": 0.024186134338378906, + "step": 23976 + }, + { + "epoch": 3.658447265625e-05, + "step": 23976, + "training_step_time": 0.17686915397644043 + }, + { + "epoch": 3.658599853515625e-05, + "model_forward_time": 0.02416706085205078, + "step": 23977 + }, + { + "epoch": 3.658599853515625e-05, + "step": 23977, + "training_step_time": 0.2102205753326416 + }, + { + "epoch": 3.65875244140625e-05, + "model_forward_time": 0.02425098419189453, + "step": 23978 + }, + { + "epoch": 3.65875244140625e-05, + "step": 23978, + "training_step_time": 0.13461613655090332 + }, + { + "epoch": 3.658905029296875e-05, + "model_forward_time": 0.02405571937561035, + "step": 23979 + }, + { + "epoch": 3.658905029296875e-05, + "step": 23979, + "training_step_time": 0.1157839298248291 + }, + { + "epoch": 3.6590576171875e-05, + "grad_norm": 0.07854122668504715, + "learning_rate": 1.0610775842122972e-05, + "loss": 0.0039, + "step": 23980 + }, + { + "epoch": 3.6590576171875e-05, + "model_forward_time": 0.02569127082824707, + "step": 23980 + }, + { + "epoch": 3.6590576171875e-05, + "step": 23980, + "training_step_time": 0.19122552871704102 + }, + { + "epoch": 3.659210205078125e-05, + "model_forward_time": 0.024831533432006836, + "step": 23981 + }, + { + "epoch": 3.659210205078125e-05, + "step": 23981, + "training_step_time": 0.11115050315856934 + }, + { + "epoch": 3.65936279296875e-05, + "model_forward_time": 0.023618221282958984, + "step": 23982 + }, + { + "epoch": 3.65936279296875e-05, + "step": 23982, + "training_step_time": 0.11294746398925781 + }, + { + "epoch": 3.659515380859375e-05, + "model_forward_time": 0.02515387535095215, + "step": 23983 + }, + { + "epoch": 3.659515380859375e-05, + "step": 23983, + "training_step_time": 0.13150382041931152 + }, + { + "epoch": 3.65966796875e-05, + "model_forward_time": 0.024895429611206055, + "step": 23984 + }, + { + "epoch": 3.65966796875e-05, + "step": 23984, + "training_step_time": 0.11939787864685059 + }, + { + "epoch": 3.659820556640625e-05, + "model_forward_time": 0.025341272354125977, + "step": 23985 + }, + { + "epoch": 3.659820556640625e-05, + "step": 23985, + "training_step_time": 0.11521649360656738 + }, + { + "epoch": 3.65997314453125e-05, + "model_forward_time": 0.025594472885131836, + "step": 23986 + }, + { + "epoch": 3.65997314453125e-05, + "step": 23986, + "training_step_time": 0.11291790008544922 + }, + { + "epoch": 3.660125732421875e-05, + "model_forward_time": 0.024996280670166016, + "step": 23987 + }, + { + "epoch": 3.660125732421875e-05, + "step": 23987, + "training_step_time": 0.1248483657836914 + }, + { + "epoch": 3.6602783203125e-05, + "model_forward_time": 0.025066375732421875, + "step": 23988 + }, + { + "epoch": 3.6602783203125e-05, + "step": 23988, + "training_step_time": 0.10714173316955566 + }, + { + "epoch": 3.660430908203125e-05, + "model_forward_time": 0.025182247161865234, + "step": 23989 + }, + { + "epoch": 3.660430908203125e-05, + "step": 23989, + "training_step_time": 0.13114523887634277 + }, + { + "epoch": 3.66058349609375e-05, + "grad_norm": 0.0672249123454094, + "learning_rate": 1.0576851234730095e-05, + "loss": 0.0075, + "step": 23990 + }, + { + "epoch": 3.66058349609375e-05, + "model_forward_time": 0.025022268295288086, + "step": 23990 + }, + { + "epoch": 3.66058349609375e-05, + "step": 23990, + "training_step_time": 0.16492366790771484 + }, + { + "epoch": 3.660736083984375e-05, + "model_forward_time": 0.024395465850830078, + "step": 23991 + }, + { + "epoch": 3.660736083984375e-05, + "step": 23991, + "training_step_time": 0.12519478797912598 + }, + { + "epoch": 3.660888671875e-05, + "model_forward_time": 0.024024009704589844, + "step": 23992 + }, + { + "epoch": 3.660888671875e-05, + "step": 23992, + "training_step_time": 0.10299420356750488 + }, + { + "epoch": 3.661041259765625e-05, + "model_forward_time": 0.02498936653137207, + "step": 23993 + }, + { + "epoch": 3.661041259765625e-05, + "step": 23993, + "training_step_time": 0.10996794700622559 + }, + { + "epoch": 3.66119384765625e-05, + "model_forward_time": 0.024919509887695312, + "step": 23994 + }, + { + "epoch": 3.66119384765625e-05, + "step": 23994, + "training_step_time": 0.18923044204711914 + }, + { + "epoch": 3.661346435546875e-05, + "model_forward_time": 0.024245500564575195, + "step": 23995 + }, + { + "epoch": 3.661346435546875e-05, + "step": 23995, + "training_step_time": 0.10231709480285645 + }, + { + "epoch": 3.6614990234375e-05, + "model_forward_time": 0.024425029754638672, + "step": 23996 + }, + { + "epoch": 3.6614990234375e-05, + "step": 23996, + "training_step_time": 0.10216951370239258 + }, + { + "epoch": 3.661651611328125e-05, + "model_forward_time": 0.024704694747924805, + "step": 23997 + }, + { + "epoch": 3.661651611328125e-05, + "step": 23997, + "training_step_time": 0.10421514511108398 + }, + { + "epoch": 3.66180419921875e-05, + "model_forward_time": 0.025384187698364258, + "step": 23998 + }, + { + "epoch": 3.66180419921875e-05, + "step": 23998, + "training_step_time": 0.10660743713378906 + }, + { + "epoch": 3.661956787109375e-05, + "model_forward_time": 0.024947643280029297, + "step": 23999 + }, + { + "epoch": 3.661956787109375e-05, + "step": 23999, + "training_step_time": 0.10779118537902832 + }, + { + "epoch": 3.662109375e-05, + "grad_norm": 0.1890254020690918, + "learning_rate": 1.0542974530180327e-05, + "loss": 0.0041, + "step": 24000 + }, + { + "epoch": 3.662109375e-05, + "model_forward_time": 0.026225566864013672, + "step": 24000 + }, + { + "epoch": 3.662109375e-05, + "step": 24000, + "training_step_time": 0.09939074516296387 + }, + { + "epoch": 3.662261962890625e-05, + "model_forward_time": 0.023174762725830078, + "step": 24001 + }, + { + "epoch": 3.662261962890625e-05, + "step": 24001, + "training_step_time": 0.10173487663269043 + }, + { + "epoch": 3.66241455078125e-05, + "model_forward_time": 0.02447819709777832, + "step": 24002 + }, + { + "epoch": 3.66241455078125e-05, + "step": 24002, + "training_step_time": 0.10319709777832031 + }, + { + "epoch": 3.662567138671875e-05, + "model_forward_time": 0.026036977767944336, + "step": 24003 + }, + { + "epoch": 3.662567138671875e-05, + "step": 24003, + "training_step_time": 0.10711503028869629 + }, + { + "epoch": 3.6627197265625e-05, + "model_forward_time": 0.02511429786682129, + "step": 24004 + }, + { + "epoch": 3.6627197265625e-05, + "step": 24004, + "training_step_time": 0.14751219749450684 + }, + { + "epoch": 3.662872314453125e-05, + "model_forward_time": 0.02524852752685547, + "step": 24005 + }, + { + "epoch": 3.662872314453125e-05, + "step": 24005, + "training_step_time": 0.10389828681945801 + }, + { + "epoch": 3.66302490234375e-05, + "model_forward_time": 0.02554488182067871, + "step": 24006 + }, + { + "epoch": 3.66302490234375e-05, + "step": 24006, + "training_step_time": 0.1060783863067627 + }, + { + "epoch": 3.663177490234375e-05, + "model_forward_time": 0.02532052993774414, + "step": 24007 + }, + { + "epoch": 3.663177490234375e-05, + "step": 24007, + "training_step_time": 0.11531424522399902 + }, + { + "epoch": 3.663330078125e-05, + "model_forward_time": 0.02546381950378418, + "step": 24008 + }, + { + "epoch": 3.663330078125e-05, + "step": 24008, + "training_step_time": 0.1126258373260498 + }, + { + "epoch": 3.663482666015625e-05, + "model_forward_time": 0.025568008422851562, + "step": 24009 + }, + { + "epoch": 3.663482666015625e-05, + "step": 24009, + "training_step_time": 0.10782456398010254 + }, + { + "epoch": 3.66363525390625e-05, + "grad_norm": 0.06362523883581161, + "learning_rate": 1.0509145769637057e-05, + "loss": 0.0027, + "step": 24010 + }, + { + "epoch": 3.66363525390625e-05, + "model_forward_time": 0.025328397750854492, + "step": 24010 + }, + { + "epoch": 3.66363525390625e-05, + "step": 24010, + "training_step_time": 0.10945868492126465 + }, + { + "epoch": 3.663787841796875e-05, + "model_forward_time": 0.025268077850341797, + "step": 24011 + }, + { + "epoch": 3.663787841796875e-05, + "step": 24011, + "training_step_time": 0.10518336296081543 + }, + { + "epoch": 3.6639404296875e-05, + "model_forward_time": 0.025100231170654297, + "step": 24012 + }, + { + "epoch": 3.6639404296875e-05, + "step": 24012, + "training_step_time": 0.10564041137695312 + }, + { + "epoch": 3.664093017578125e-05, + "model_forward_time": 0.025072336196899414, + "step": 24013 + }, + { + "epoch": 3.664093017578125e-05, + "step": 24013, + "training_step_time": 0.1044607162475586 + }, + { + "epoch": 3.66424560546875e-05, + "model_forward_time": 0.025456905364990234, + "step": 24014 + }, + { + "epoch": 3.66424560546875e-05, + "step": 24014, + "training_step_time": 0.10441899299621582 + }, + { + "epoch": 3.664398193359375e-05, + "model_forward_time": 0.024611234664916992, + "step": 24015 + }, + { + "epoch": 3.664398193359375e-05, + "step": 24015, + "training_step_time": 0.10735774040222168 + }, + { + "epoch": 3.66455078125e-05, + "model_forward_time": 0.02546977996826172, + "step": 24016 + }, + { + "epoch": 3.66455078125e-05, + "step": 24016, + "training_step_time": 0.1321725845336914 + }, + { + "epoch": 3.664703369140625e-05, + "model_forward_time": 0.024175643920898438, + "step": 24017 + }, + { + "epoch": 3.664703369140625e-05, + "step": 24017, + "training_step_time": 0.15592145919799805 + }, + { + "epoch": 3.66485595703125e-05, + "model_forward_time": 0.02366948127746582, + "step": 24018 + }, + { + "epoch": 3.66485595703125e-05, + "step": 24018, + "training_step_time": 0.1436929702758789 + }, + { + "epoch": 3.665008544921875e-05, + "model_forward_time": 0.024038076400756836, + "step": 24019 + }, + { + "epoch": 3.665008544921875e-05, + "step": 24019, + "training_step_time": 0.15226960182189941 + }, + { + "epoch": 3.6651611328125e-05, + "grad_norm": 0.07818132638931274, + "learning_rate": 1.0475364994205411e-05, + "loss": 0.005, + "step": 24020 + }, + { + "epoch": 3.6651611328125e-05, + "model_forward_time": 0.023895978927612305, + "step": 24020 + }, + { + "epoch": 3.6651611328125e-05, + "step": 24020, + "training_step_time": 0.13908672332763672 + }, + { + "epoch": 3.665313720703125e-05, + "model_forward_time": 0.0238955020904541, + "step": 24021 + }, + { + "epoch": 3.665313720703125e-05, + "step": 24021, + "training_step_time": 0.12632393836975098 + }, + { + "epoch": 3.66546630859375e-05, + "model_forward_time": 0.02394866943359375, + "step": 24022 + }, + { + "epoch": 3.66546630859375e-05, + "step": 24022, + "training_step_time": 0.12409543991088867 + }, + { + "epoch": 3.665618896484375e-05, + "model_forward_time": 0.0240480899810791, + "step": 24023 + }, + { + "epoch": 3.665618896484375e-05, + "step": 24023, + "training_step_time": 0.17068696022033691 + }, + { + "epoch": 3.665771484375e-05, + "model_forward_time": 0.02702617645263672, + "step": 24024 + }, + { + "epoch": 3.665771484375e-05, + "step": 24024, + "training_step_time": 0.11970996856689453 + }, + { + "epoch": 3.665924072265625e-05, + "model_forward_time": 0.024559736251831055, + "step": 24025 + }, + { + "epoch": 3.665924072265625e-05, + "step": 24025, + "training_step_time": 0.11860418319702148 + }, + { + "epoch": 3.66607666015625e-05, + "model_forward_time": 0.02488398551940918, + "step": 24026 + }, + { + "epoch": 3.66607666015625e-05, + "step": 24026, + "training_step_time": 0.13718271255493164 + }, + { + "epoch": 3.666229248046875e-05, + "model_forward_time": 0.02512383460998535, + "step": 24027 + }, + { + "epoch": 3.666229248046875e-05, + "step": 24027, + "training_step_time": 0.1224820613861084 + }, + { + "epoch": 3.6663818359375e-05, + "model_forward_time": 0.025180339813232422, + "step": 24028 + }, + { + "epoch": 3.6663818359375e-05, + "step": 24028, + "training_step_time": 0.11996817588806152 + }, + { + "epoch": 3.666534423828125e-05, + "model_forward_time": 0.025171279907226562, + "step": 24029 + }, + { + "epoch": 3.666534423828125e-05, + "step": 24029, + "training_step_time": 0.11020040512084961 + }, + { + "epoch": 3.66668701171875e-05, + "grad_norm": 0.09496715664863586, + "learning_rate": 1.0441632244932237e-05, + "loss": 0.0044, + "step": 24030 + }, + { + "epoch": 3.66668701171875e-05, + "model_forward_time": 0.02509140968322754, + "step": 24030 + }, + { + "epoch": 3.66668701171875e-05, + "step": 24030, + "training_step_time": 0.11746025085449219 + }, + { + "epoch": 3.666839599609375e-05, + "model_forward_time": 0.02589702606201172, + "step": 24031 + }, + { + "epoch": 3.666839599609375e-05, + "step": 24031, + "training_step_time": 0.10252928733825684 + }, + { + "epoch": 3.6669921875e-05, + "model_forward_time": 0.025146007537841797, + "step": 24032 + }, + { + "epoch": 3.6669921875e-05, + "step": 24032, + "training_step_time": 0.10325884819030762 + }, + { + "epoch": 3.667144775390625e-05, + "model_forward_time": 0.025122880935668945, + "step": 24033 + }, + { + "epoch": 3.667144775390625e-05, + "step": 24033, + "training_step_time": 0.12383675575256348 + }, + { + "epoch": 3.66729736328125e-05, + "model_forward_time": 0.024909019470214844, + "step": 24034 + }, + { + "epoch": 3.66729736328125e-05, + "step": 24034, + "training_step_time": 0.1274123191833496 + }, + { + "epoch": 3.667449951171875e-05, + "model_forward_time": 0.02518606185913086, + "step": 24035 + }, + { + "epoch": 3.667449951171875e-05, + "step": 24035, + "training_step_time": 0.11529898643493652 + }, + { + "epoch": 3.6676025390625e-05, + "model_forward_time": 0.02518153190612793, + "step": 24036 + }, + { + "epoch": 3.6676025390625e-05, + "step": 24036, + "training_step_time": 0.10953950881958008 + }, + { + "epoch": 3.667755126953125e-05, + "model_forward_time": 0.025110960006713867, + "step": 24037 + }, + { + "epoch": 3.667755126953125e-05, + "step": 24037, + "training_step_time": 0.11234617233276367 + }, + { + "epoch": 3.66790771484375e-05, + "model_forward_time": 0.02500295639038086, + "step": 24038 + }, + { + "epoch": 3.66790771484375e-05, + "step": 24038, + "training_step_time": 0.14154791831970215 + }, + { + "epoch": 3.668060302734375e-05, + "model_forward_time": 0.025320053100585938, + "step": 24039 + }, + { + "epoch": 3.668060302734375e-05, + "step": 24039, + "training_step_time": 0.10712647438049316 + }, + { + "epoch": 3.668212890625e-05, + "grad_norm": 0.29370254278182983, + "learning_rate": 1.0407947562805986e-05, + "loss": 0.0063, + "step": 24040 + }, + { + "epoch": 3.668212890625e-05, + "model_forward_time": 0.0256500244140625, + "step": 24040 + }, + { + "epoch": 3.668212890625e-05, + "step": 24040, + "training_step_time": 0.10539865493774414 + }, + { + "epoch": 3.668365478515625e-05, + "model_forward_time": 0.024700403213500977, + "step": 24041 + }, + { + "epoch": 3.668365478515625e-05, + "step": 24041, + "training_step_time": 0.11933088302612305 + }, + { + "epoch": 3.66851806640625e-05, + "model_forward_time": 0.02482438087463379, + "step": 24042 + }, + { + "epoch": 3.66851806640625e-05, + "step": 24042, + "training_step_time": 0.10567212104797363 + }, + { + "epoch": 3.668670654296875e-05, + "model_forward_time": 0.025246858596801758, + "step": 24043 + }, + { + "epoch": 3.668670654296875e-05, + "step": 24043, + "training_step_time": 0.203230619430542 + }, + { + "epoch": 3.6688232421875e-05, + "model_forward_time": 0.023929119110107422, + "step": 24044 + }, + { + "epoch": 3.6688232421875e-05, + "step": 24044, + "training_step_time": 0.13109374046325684 + }, + { + "epoch": 3.668975830078125e-05, + "model_forward_time": 0.02285599708557129, + "step": 24045 + }, + { + "epoch": 3.668975830078125e-05, + "step": 24045, + "training_step_time": 0.13663125038146973 + }, + { + "epoch": 3.66912841796875e-05, + "model_forward_time": 0.025371074676513672, + "step": 24046 + }, + { + "epoch": 3.66912841796875e-05, + "step": 24046, + "training_step_time": 0.12169337272644043 + }, + { + "epoch": 3.669281005859375e-05, + "model_forward_time": 0.024966955184936523, + "step": 24047 + }, + { + "epoch": 3.669281005859375e-05, + "step": 24047, + "training_step_time": 0.12405586242675781 + }, + { + "epoch": 3.66943359375e-05, + "model_forward_time": 0.025287628173828125, + "step": 24048 + }, + { + "epoch": 3.66943359375e-05, + "step": 24048, + "training_step_time": 0.11823272705078125 + }, + { + "epoch": 3.669586181640625e-05, + "model_forward_time": 0.024600744247436523, + "step": 24049 + }, + { + "epoch": 3.669586181640625e-05, + "step": 24049, + "training_step_time": 0.10256004333496094 + }, + { + "epoch": 3.66973876953125e-05, + "grad_norm": 0.08640720695257187, + "learning_rate": 1.0374310988756747e-05, + "loss": 0.0049, + "step": 24050 + }, + { + "epoch": 3.66973876953125e-05, + "model_forward_time": 0.024354219436645508, + "step": 24050 + }, + { + "epoch": 3.66973876953125e-05, + "step": 24050, + "training_step_time": 0.14849400520324707 + }, + { + "epoch": 3.669891357421875e-05, + "model_forward_time": 0.024599313735961914, + "step": 24051 + }, + { + "epoch": 3.669891357421875e-05, + "step": 24051, + "training_step_time": 0.17339777946472168 + }, + { + "epoch": 3.6700439453125e-05, + "model_forward_time": 0.024622201919555664, + "step": 24052 + }, + { + "epoch": 3.6700439453125e-05, + "step": 24052, + "training_step_time": 0.12133049964904785 + }, + { + "epoch": 3.670196533203125e-05, + "model_forward_time": 0.027564048767089844, + "step": 24053 + }, + { + "epoch": 3.670196533203125e-05, + "step": 24053, + "training_step_time": 0.13035273551940918 + }, + { + "epoch": 3.67034912109375e-05, + "model_forward_time": 0.025209426879882812, + "step": 24054 + }, + { + "epoch": 3.67034912109375e-05, + "step": 24054, + "training_step_time": 0.10977888107299805 + }, + { + "epoch": 3.670501708984375e-05, + "model_forward_time": 0.025180339813232422, + "step": 24055 + }, + { + "epoch": 3.670501708984375e-05, + "step": 24055, + "training_step_time": 0.10635042190551758 + }, + { + "epoch": 3.670654296875e-05, + "model_forward_time": 0.02494502067565918, + "step": 24056 + }, + { + "epoch": 3.670654296875e-05, + "step": 24056, + "training_step_time": 0.1087186336517334 + }, + { + "epoch": 3.670806884765625e-05, + "model_forward_time": 0.024883270263671875, + "step": 24057 + }, + { + "epoch": 3.670806884765625e-05, + "step": 24057, + "training_step_time": 0.10693931579589844 + }, + { + "epoch": 3.67095947265625e-05, + "model_forward_time": 0.02525186538696289, + "step": 24058 + }, + { + "epoch": 3.67095947265625e-05, + "step": 24058, + "training_step_time": 0.10685396194458008 + }, + { + "epoch": 3.671112060546875e-05, + "model_forward_time": 0.0251619815826416, + "step": 24059 + }, + { + "epoch": 3.671112060546875e-05, + "step": 24059, + "training_step_time": 0.10617518424987793 + }, + { + "epoch": 3.6712646484375e-05, + "grad_norm": 0.07657795399427414, + "learning_rate": 1.0340722563656107e-05, + "loss": 0.0083, + "step": 24060 + }, + { + "epoch": 3.6712646484375e-05, + "model_forward_time": 0.024883270263671875, + "step": 24060 + }, + { + "epoch": 3.6712646484375e-05, + "step": 24060, + "training_step_time": 0.11042499542236328 + }, + { + "epoch": 3.671417236328125e-05, + "model_forward_time": 0.0251467227935791, + "step": 24061 + }, + { + "epoch": 3.671417236328125e-05, + "step": 24061, + "training_step_time": 0.11004877090454102 + }, + { + "epoch": 3.67156982421875e-05, + "model_forward_time": 0.02520895004272461, + "step": 24062 + }, + { + "epoch": 3.67156982421875e-05, + "step": 24062, + "training_step_time": 0.1058967113494873 + }, + { + "epoch": 3.671722412109375e-05, + "model_forward_time": 0.02517247200012207, + "step": 24063 + }, + { + "epoch": 3.671722412109375e-05, + "step": 24063, + "training_step_time": 0.10699629783630371 + }, + { + "epoch": 3.671875e-05, + "model_forward_time": 0.024727821350097656, + "step": 24064 + }, + { + "epoch": 3.671875e-05, + "step": 24064, + "training_step_time": 0.11183881759643555 + }, + { + "epoch": 3.672027587890625e-05, + "model_forward_time": 0.024860143661499023, + "step": 24065 + }, + { + "epoch": 3.672027587890625e-05, + "step": 24065, + "training_step_time": 0.10896825790405273 + }, + { + "epoch": 3.67218017578125e-05, + "model_forward_time": 0.025084495544433594, + "step": 24066 + }, + { + "epoch": 3.67218017578125e-05, + "step": 24066, + "training_step_time": 0.10719466209411621 + }, + { + "epoch": 3.672332763671875e-05, + "model_forward_time": 0.025223493576049805, + "step": 24067 + }, + { + "epoch": 3.672332763671875e-05, + "step": 24067, + "training_step_time": 0.10651993751525879 + }, + { + "epoch": 3.6724853515625e-05, + "model_forward_time": 0.025406837463378906, + "step": 24068 + }, + { + "epoch": 3.6724853515625e-05, + "step": 24068, + "training_step_time": 0.11262702941894531 + }, + { + "epoch": 3.672637939453125e-05, + "model_forward_time": 0.02527165412902832, + "step": 24069 + }, + { + "epoch": 3.672637939453125e-05, + "step": 24069, + "training_step_time": 0.15921354293823242 + }, + { + "epoch": 3.67279052734375e-05, + "grad_norm": 0.10217463225126266, + "learning_rate": 1.0307182328317188e-05, + "loss": 0.0035, + "step": 24070 + }, + { + "epoch": 3.67279052734375e-05, + "model_forward_time": 0.024503707885742188, + "step": 24070 + }, + { + "epoch": 3.67279052734375e-05, + "step": 24070, + "training_step_time": 0.10456991195678711 + }, + { + "epoch": 3.672943115234375e-05, + "model_forward_time": 0.024464130401611328, + "step": 24071 + }, + { + "epoch": 3.672943115234375e-05, + "step": 24071, + "training_step_time": 0.13351702690124512 + }, + { + "epoch": 3.673095703125e-05, + "model_forward_time": 0.025415897369384766, + "step": 24072 + }, + { + "epoch": 3.673095703125e-05, + "step": 24072, + "training_step_time": 0.1625065803527832 + }, + { + "epoch": 3.673248291015625e-05, + "model_forward_time": 0.024666309356689453, + "step": 24073 + }, + { + "epoch": 3.673248291015625e-05, + "step": 24073, + "training_step_time": 0.2170882225036621 + }, + { + "epoch": 3.67340087890625e-05, + "model_forward_time": 0.024489164352416992, + "step": 24074 + }, + { + "epoch": 3.67340087890625e-05, + "step": 24074, + "training_step_time": 0.11351466178894043 + }, + { + "epoch": 3.673553466796875e-05, + "model_forward_time": 0.024282217025756836, + "step": 24075 + }, + { + "epoch": 3.673553466796875e-05, + "step": 24075, + "training_step_time": 0.20580601692199707 + }, + { + "epoch": 3.6737060546875e-05, + "model_forward_time": 0.02455735206604004, + "step": 24076 + }, + { + "epoch": 3.6737060546875e-05, + "step": 24076, + "training_step_time": 0.10820841789245605 + }, + { + "epoch": 3.673858642578125e-05, + "model_forward_time": 0.024129867553710938, + "step": 24077 + }, + { + "epoch": 3.673858642578125e-05, + "step": 24077, + "training_step_time": 0.14322400093078613 + }, + { + "epoch": 3.67401123046875e-05, + "model_forward_time": 0.024748802185058594, + "step": 24078 + }, + { + "epoch": 3.67401123046875e-05, + "step": 24078, + "training_step_time": 0.11382627487182617 + }, + { + "epoch": 3.674163818359375e-05, + "model_forward_time": 0.0245363712310791, + "step": 24079 + }, + { + "epoch": 3.674163818359375e-05, + "step": 24079, + "training_step_time": 0.1187901496887207 + }, + { + "epoch": 3.67431640625e-05, + "grad_norm": 0.24320381879806519, + "learning_rate": 1.0273690323494523e-05, + "loss": 0.0081, + "step": 24080 + }, + { + "epoch": 3.67431640625e-05, + "model_forward_time": 0.025028467178344727, + "step": 24080 + }, + { + "epoch": 3.67431640625e-05, + "step": 24080, + "training_step_time": 0.12297463417053223 + }, + { + "epoch": 3.674468994140625e-05, + "model_forward_time": 0.02501964569091797, + "step": 24081 + }, + { + "epoch": 3.674468994140625e-05, + "step": 24081, + "training_step_time": 0.17127132415771484 + }, + { + "epoch": 3.67462158203125e-05, + "model_forward_time": 0.024918556213378906, + "step": 24082 + }, + { + "epoch": 3.67462158203125e-05, + "step": 24082, + "training_step_time": 0.14585256576538086 + }, + { + "epoch": 3.674774169921875e-05, + "model_forward_time": 0.02424168586730957, + "step": 24083 + }, + { + "epoch": 3.674774169921875e-05, + "step": 24083, + "training_step_time": 0.10650968551635742 + }, + { + "epoch": 3.6749267578125e-05, + "model_forward_time": 0.024976253509521484, + "step": 24084 + }, + { + "epoch": 3.6749267578125e-05, + "step": 24084, + "training_step_time": 0.10765480995178223 + }, + { + "epoch": 3.675079345703125e-05, + "model_forward_time": 0.025284290313720703, + "step": 24085 + }, + { + "epoch": 3.675079345703125e-05, + "step": 24085, + "training_step_time": 0.11167001724243164 + }, + { + "epoch": 3.67523193359375e-05, + "model_forward_time": 0.025534868240356445, + "step": 24086 + }, + { + "epoch": 3.67523193359375e-05, + "step": 24086, + "training_step_time": 0.10933351516723633 + }, + { + "epoch": 3.675384521484375e-05, + "model_forward_time": 0.025104284286499023, + "step": 24087 + }, + { + "epoch": 3.675384521484375e-05, + "step": 24087, + "training_step_time": 0.19156122207641602 + }, + { + "epoch": 3.675537109375e-05, + "model_forward_time": 0.026508331298828125, + "step": 24088 + }, + { + "epoch": 3.675537109375e-05, + "step": 24088, + "training_step_time": 0.10448050498962402 + }, + { + "epoch": 3.675689697265625e-05, + "model_forward_time": 0.0250244140625, + "step": 24089 + }, + { + "epoch": 3.675689697265625e-05, + "step": 24089, + "training_step_time": 0.10562634468078613 + }, + { + "epoch": 3.67584228515625e-05, + "grad_norm": 0.1064799502491951, + "learning_rate": 1.0240246589884044e-05, + "loss": 0.0033, + "step": 24090 + }, + { + "epoch": 3.67584228515625e-05, + "model_forward_time": 0.026424884796142578, + "step": 24090 + }, + { + "epoch": 3.67584228515625e-05, + "step": 24090, + "training_step_time": 0.11082077026367188 + }, + { + "epoch": 3.675994873046875e-05, + "model_forward_time": 0.0261383056640625, + "step": 24091 + }, + { + "epoch": 3.675994873046875e-05, + "step": 24091, + "training_step_time": 0.10821914672851562 + }, + { + "epoch": 3.6761474609375e-05, + "model_forward_time": 0.025788307189941406, + "step": 24092 + }, + { + "epoch": 3.6761474609375e-05, + "step": 24092, + "training_step_time": 0.10869264602661133 + }, + { + "epoch": 3.676300048828125e-05, + "model_forward_time": 0.0257418155670166, + "step": 24093 + }, + { + "epoch": 3.676300048828125e-05, + "step": 24093, + "training_step_time": 0.10656070709228516 + }, + { + "epoch": 3.67645263671875e-05, + "model_forward_time": 0.02525186538696289, + "step": 24094 + }, + { + "epoch": 3.67645263671875e-05, + "step": 24094, + "training_step_time": 0.10399937629699707 + }, + { + "epoch": 3.676605224609375e-05, + "model_forward_time": 0.024868488311767578, + "step": 24095 + }, + { + "epoch": 3.676605224609375e-05, + "step": 24095, + "training_step_time": 0.13977551460266113 + }, + { + "epoch": 3.6767578125e-05, + "model_forward_time": 0.025123119354248047, + "step": 24096 + }, + { + "epoch": 3.6767578125e-05, + "step": 24096, + "training_step_time": 0.16968226432800293 + }, + { + "epoch": 3.676910400390625e-05, + "model_forward_time": 0.024972200393676758, + "step": 24097 + }, + { + "epoch": 3.676910400390625e-05, + "step": 24097, + "training_step_time": 0.10448741912841797 + }, + { + "epoch": 3.67706298828125e-05, + "model_forward_time": 0.02499556541442871, + "step": 24098 + }, + { + "epoch": 3.67706298828125e-05, + "step": 24098, + "training_step_time": 0.13866209983825684 + }, + { + "epoch": 3.677215576171875e-05, + "model_forward_time": 0.02551865577697754, + "step": 24099 + }, + { + "epoch": 3.677215576171875e-05, + "step": 24099, + "training_step_time": 0.1945338249206543 + }, + { + "epoch": 3.6773681640625e-05, + "grad_norm": 0.13784803450107574, + "learning_rate": 1.0206851168123077e-05, + "loss": 0.0073, + "step": 24100 + }, + { + "epoch": 3.6773681640625e-05, + "model_forward_time": 0.024990320205688477, + "step": 24100 + }, + { + "epoch": 3.6773681640625e-05, + "step": 24100, + "training_step_time": 0.10333991050720215 + }, + { + "epoch": 3.677520751953125e-05, + "model_forward_time": 0.025194168090820312, + "step": 24101 + }, + { + "epoch": 3.677520751953125e-05, + "step": 24101, + "training_step_time": 0.10208559036254883 + }, + { + "epoch": 3.67767333984375e-05, + "model_forward_time": 0.02661895751953125, + "step": 24102 + }, + { + "epoch": 3.67767333984375e-05, + "step": 24102, + "training_step_time": 0.10495328903198242 + }, + { + "epoch": 3.677825927734375e-05, + "model_forward_time": 0.026109695434570312, + "step": 24103 + }, + { + "epoch": 3.677825927734375e-05, + "step": 24103, + "training_step_time": 0.10863399505615234 + }, + { + "epoch": 3.677978515625e-05, + "model_forward_time": 0.026014328002929688, + "step": 24104 + }, + { + "epoch": 3.677978515625e-05, + "step": 24104, + "training_step_time": 0.11199164390563965 + }, + { + "epoch": 3.678131103515625e-05, + "model_forward_time": 0.025629520416259766, + "step": 24105 + }, + { + "epoch": 3.678131103515625e-05, + "step": 24105, + "training_step_time": 0.10424065589904785 + }, + { + "epoch": 3.67828369140625e-05, + "model_forward_time": 0.02532219886779785, + "step": 24106 + }, + { + "epoch": 3.67828369140625e-05, + "step": 24106, + "training_step_time": 0.10615921020507812 + }, + { + "epoch": 3.678436279296875e-05, + "model_forward_time": 0.025867462158203125, + "step": 24107 + }, + { + "epoch": 3.678436279296875e-05, + "step": 24107, + "training_step_time": 0.10522961616516113 + }, + { + "epoch": 3.6785888671875e-05, + "model_forward_time": 0.025787830352783203, + "step": 24108 + }, + { + "epoch": 3.6785888671875e-05, + "step": 24108, + "training_step_time": 0.10486817359924316 + }, + { + "epoch": 3.678741455078125e-05, + "model_forward_time": 0.02577495574951172, + "step": 24109 + }, + { + "epoch": 3.678741455078125e-05, + "step": 24109, + "training_step_time": 0.10868716239929199 + }, + { + "epoch": 3.67889404296875e-05, + "grad_norm": 0.11030536144971848, + "learning_rate": 1.0173504098790187e-05, + "loss": 0.0044, + "step": 24110 + }, + { + "epoch": 3.67889404296875e-05, + "model_forward_time": 0.02561783790588379, + "step": 24110 + }, + { + "epoch": 3.67889404296875e-05, + "step": 24110, + "training_step_time": 0.1055595874786377 + }, + { + "epoch": 3.679046630859375e-05, + "model_forward_time": 0.025632858276367188, + "step": 24111 + }, + { + "epoch": 3.679046630859375e-05, + "step": 24111, + "training_step_time": 0.10642528533935547 + }, + { + "epoch": 3.67919921875e-05, + "model_forward_time": 0.026107311248779297, + "step": 24112 + }, + { + "epoch": 3.67919921875e-05, + "step": 24112, + "training_step_time": 0.10688638687133789 + }, + { + "epoch": 3.679351806640625e-05, + "model_forward_time": 0.025941133499145508, + "step": 24113 + }, + { + "epoch": 3.679351806640625e-05, + "step": 24113, + "training_step_time": 0.21828055381774902 + }, + { + "epoch": 3.67950439453125e-05, + "model_forward_time": 0.025509357452392578, + "step": 24114 + }, + { + "epoch": 3.67950439453125e-05, + "step": 24114, + "training_step_time": 0.12597942352294922 + }, + { + "epoch": 3.679656982421875e-05, + "model_forward_time": 0.02488994598388672, + "step": 24115 + }, + { + "epoch": 3.679656982421875e-05, + "step": 24115, + "training_step_time": 0.1425619125366211 + }, + { + "epoch": 3.6798095703125e-05, + "model_forward_time": 0.025248050689697266, + "step": 24116 + }, + { + "epoch": 3.6798095703125e-05, + "step": 24116, + "training_step_time": 0.14768481254577637 + }, + { + "epoch": 3.679962158203125e-05, + "model_forward_time": 0.02538466453552246, + "step": 24117 + }, + { + "epoch": 3.679962158203125e-05, + "step": 24117, + "training_step_time": 0.22845911979675293 + }, + { + "epoch": 3.68011474609375e-05, + "model_forward_time": 0.025609493255615234, + "step": 24118 + }, + { + "epoch": 3.68011474609375e-05, + "step": 24118, + "training_step_time": 0.12885069847106934 + }, + { + "epoch": 3.680267333984375e-05, + "model_forward_time": 0.02861762046813965, + "step": 24119 + }, + { + "epoch": 3.680267333984375e-05, + "step": 24119, + "training_step_time": 0.1259927749633789 + }, + { + "epoch": 3.680419921875e-05, + "grad_norm": 0.0730748251080513, + "learning_rate": 1.0140205422405214e-05, + "loss": 0.0031, + "step": 24120 + }, + { + "epoch": 3.680419921875e-05, + "model_forward_time": 0.025124549865722656, + "step": 24120 + }, + { + "epoch": 3.680419921875e-05, + "step": 24120, + "training_step_time": 0.1855602264404297 + }, + { + "epoch": 3.680572509765625e-05, + "model_forward_time": 0.02525949478149414, + "step": 24121 + }, + { + "epoch": 3.680572509765625e-05, + "step": 24121, + "training_step_time": 0.2160933017730713 + }, + { + "epoch": 3.68072509765625e-05, + "model_forward_time": 0.025052547454833984, + "step": 24122 + }, + { + "epoch": 3.68072509765625e-05, + "step": 24122, + "training_step_time": 0.1080467700958252 + }, + { + "epoch": 3.680877685546875e-05, + "model_forward_time": 0.024290084838867188, + "step": 24123 + }, + { + "epoch": 3.680877685546875e-05, + "step": 24123, + "training_step_time": 0.12437939643859863 + }, + { + "epoch": 3.6810302734375e-05, + "model_forward_time": 0.025772571563720703, + "step": 24124 + }, + { + "epoch": 3.6810302734375e-05, + "step": 24124, + "training_step_time": 0.12514424324035645 + }, + { + "epoch": 3.681182861328125e-05, + "model_forward_time": 0.025779008865356445, + "step": 24125 + }, + { + "epoch": 3.681182861328125e-05, + "step": 24125, + "training_step_time": 0.21355390548706055 + }, + { + "epoch": 3.68133544921875e-05, + "model_forward_time": 0.025203466415405273, + "step": 24126 + }, + { + "epoch": 3.68133544921875e-05, + "step": 24126, + "training_step_time": 0.11249518394470215 + }, + { + "epoch": 3.681488037109375e-05, + "model_forward_time": 0.026461124420166016, + "step": 24127 + }, + { + "epoch": 3.681488037109375e-05, + "step": 24127, + "training_step_time": 0.10911107063293457 + }, + { + "epoch": 3.681640625e-05, + "model_forward_time": 0.02622675895690918, + "step": 24128 + }, + { + "epoch": 3.681640625e-05, + "step": 24128, + "training_step_time": 0.11655497550964355 + }, + { + "epoch": 3.681793212890625e-05, + "model_forward_time": 0.025938749313354492, + "step": 24129 + }, + { + "epoch": 3.681793212890625e-05, + "step": 24129, + "training_step_time": 0.10959720611572266 + }, + { + "epoch": 3.68194580078125e-05, + "grad_norm": 0.1205686703324318, + "learning_rate": 1.0106955179429183e-05, + "loss": 0.0138, + "step": 24130 + }, + { + "epoch": 3.68194580078125e-05, + "model_forward_time": 0.02564716339111328, + "step": 24130 + }, + { + "epoch": 3.68194580078125e-05, + "step": 24130, + "training_step_time": 0.18543100357055664 + }, + { + "epoch": 3.682098388671875e-05, + "model_forward_time": 0.025803565979003906, + "step": 24131 + }, + { + "epoch": 3.682098388671875e-05, + "step": 24131, + "training_step_time": 0.11078238487243652 + }, + { + "epoch": 3.6822509765625e-05, + "model_forward_time": 0.025426864624023438, + "step": 24132 + }, + { + "epoch": 3.6822509765625e-05, + "step": 24132, + "training_step_time": 0.10596632957458496 + }, + { + "epoch": 3.682403564453125e-05, + "model_forward_time": 0.0252993106842041, + "step": 24133 + }, + { + "epoch": 3.682403564453125e-05, + "step": 24133, + "training_step_time": 0.10480976104736328 + }, + { + "epoch": 3.68255615234375e-05, + "model_forward_time": 0.025949954986572266, + "step": 24134 + }, + { + "epoch": 3.68255615234375e-05, + "step": 24134, + "training_step_time": 0.10796904563903809 + }, + { + "epoch": 3.682708740234375e-05, + "model_forward_time": 0.02588677406311035, + "step": 24135 + }, + { + "epoch": 3.682708740234375e-05, + "step": 24135, + "training_step_time": 0.10493755340576172 + }, + { + "epoch": 3.682861328125e-05, + "model_forward_time": 0.026416778564453125, + "step": 24136 + }, + { + "epoch": 3.682861328125e-05, + "step": 24136, + "training_step_time": 0.11046147346496582 + }, + { + "epoch": 3.683013916015625e-05, + "model_forward_time": 0.025254487991333008, + "step": 24137 + }, + { + "epoch": 3.683013916015625e-05, + "step": 24137, + "training_step_time": 0.1023256778717041 + }, + { + "epoch": 3.68316650390625e-05, + "model_forward_time": 0.025026321411132812, + "step": 24138 + }, + { + "epoch": 3.68316650390625e-05, + "step": 24138, + "training_step_time": 0.15315842628479004 + }, + { + "epoch": 3.683319091796875e-05, + "model_forward_time": 0.025055408477783203, + "step": 24139 + }, + { + "epoch": 3.683319091796875e-05, + "step": 24139, + "training_step_time": 0.16449975967407227 + }, + { + "epoch": 3.6834716796875e-05, + "grad_norm": 0.11838909238576889, + "learning_rate": 1.0073753410264263e-05, + "loss": 0.0096, + "step": 24140 + }, + { + "epoch": 3.6834716796875e-05, + "model_forward_time": 0.02541828155517578, + "step": 24140 + }, + { + "epoch": 3.6834716796875e-05, + "step": 24140, + "training_step_time": 0.10796213150024414 + }, + { + "epoch": 3.683624267578125e-05, + "model_forward_time": 0.02563643455505371, + "step": 24141 + }, + { + "epoch": 3.683624267578125e-05, + "step": 24141, + "training_step_time": 0.12060427665710449 + }, + { + "epoch": 3.68377685546875e-05, + "model_forward_time": 0.025612354278564453, + "step": 24142 + }, + { + "epoch": 3.68377685546875e-05, + "step": 24142, + "training_step_time": 0.19051265716552734 + }, + { + "epoch": 3.683929443359375e-05, + "model_forward_time": 0.025469064712524414, + "step": 24143 + }, + { + "epoch": 3.683929443359375e-05, + "step": 24143, + "training_step_time": 0.10236263275146484 + }, + { + "epoch": 3.68408203125e-05, + "model_forward_time": 0.025121212005615234, + "step": 24144 + }, + { + "epoch": 3.68408203125e-05, + "step": 24144, + "training_step_time": 0.10287714004516602 + }, + { + "epoch": 3.684234619140625e-05, + "model_forward_time": 0.026030302047729492, + "step": 24145 + }, + { + "epoch": 3.684234619140625e-05, + "step": 24145, + "training_step_time": 0.10700154304504395 + }, + { + "epoch": 3.68438720703125e-05, + "model_forward_time": 0.025827646255493164, + "step": 24146 + }, + { + "epoch": 3.68438720703125e-05, + "step": 24146, + "training_step_time": 0.10430169105529785 + }, + { + "epoch": 3.684539794921875e-05, + "model_forward_time": 0.029242515563964844, + "step": 24147 + }, + { + "epoch": 3.684539794921875e-05, + "step": 24147, + "training_step_time": 0.1156926155090332 + }, + { + "epoch": 3.6846923828125e-05, + "model_forward_time": 0.025898218154907227, + "step": 24148 + }, + { + "epoch": 3.6846923828125e-05, + "step": 24148, + "training_step_time": 0.1133279800415039 + }, + { + "epoch": 3.684844970703125e-05, + "model_forward_time": 0.025713682174682617, + "step": 24149 + }, + { + "epoch": 3.684844970703125e-05, + "step": 24149, + "training_step_time": 0.11545777320861816 + }, + { + "epoch": 3.68499755859375e-05, + "grad_norm": 0.26032018661499023, + "learning_rate": 1.0040600155253765e-05, + "loss": 0.005, + "step": 24150 + }, + { + "epoch": 3.68499755859375e-05, + "model_forward_time": 0.025847673416137695, + "step": 24150 + }, + { + "epoch": 3.68499755859375e-05, + "step": 24150, + "training_step_time": 0.11114215850830078 + }, + { + "epoch": 3.685150146484375e-05, + "model_forward_time": 0.02635669708251953, + "step": 24151 + }, + { + "epoch": 3.685150146484375e-05, + "step": 24151, + "training_step_time": 0.11066985130310059 + }, + { + "epoch": 3.685302734375e-05, + "model_forward_time": 0.026253700256347656, + "step": 24152 + }, + { + "epoch": 3.685302734375e-05, + "step": 24152, + "training_step_time": 0.10852861404418945 + }, + { + "epoch": 3.685455322265625e-05, + "model_forward_time": 0.02539205551147461, + "step": 24153 + }, + { + "epoch": 3.685455322265625e-05, + "step": 24153, + "training_step_time": 0.10694146156311035 + }, + { + "epoch": 3.68560791015625e-05, + "model_forward_time": 0.025690317153930664, + "step": 24154 + }, + { + "epoch": 3.68560791015625e-05, + "step": 24154, + "training_step_time": 0.11032772064208984 + }, + { + "epoch": 3.685760498046875e-05, + "model_forward_time": 0.026186466217041016, + "step": 24155 + }, + { + "epoch": 3.685760498046875e-05, + "step": 24155, + "training_step_time": 0.10882782936096191 + }, + { + "epoch": 3.6859130859375e-05, + "model_forward_time": 0.025942325592041016, + "step": 24156 + }, + { + "epoch": 3.6859130859375e-05, + "step": 24156, + "training_step_time": 0.19700336456298828 + }, + { + "epoch": 3.686065673828125e-05, + "model_forward_time": 0.025162458419799805, + "step": 24157 + }, + { + "epoch": 3.686065673828125e-05, + "step": 24157, + "training_step_time": 0.10867452621459961 + }, + { + "epoch": 3.68621826171875e-05, + "model_forward_time": 0.02543163299560547, + "step": 24158 + }, + { + "epoch": 3.68621826171875e-05, + "step": 24158, + "training_step_time": 0.12447953224182129 + }, + { + "epoch": 3.686370849609375e-05, + "model_forward_time": 0.025775432586669922, + "step": 24159 + }, + { + "epoch": 3.686370849609375e-05, + "step": 24159, + "training_step_time": 0.1308298110961914 + }, + { + "epoch": 3.6865234375e-05, + "grad_norm": 0.12172437459230423, + "learning_rate": 1.0007495454682004e-05, + "loss": 0.0066, + "step": 24160 + }, + { + "epoch": 3.6865234375e-05, + "model_forward_time": 0.0257568359375, + "step": 24160 + }, + { + "epoch": 3.6865234375e-05, + "step": 24160, + "training_step_time": 0.12293839454650879 + }, + { + "epoch": 3.686676025390625e-05, + "model_forward_time": 0.024862051010131836, + "step": 24161 + }, + { + "epoch": 3.686676025390625e-05, + "step": 24161, + "training_step_time": 0.11708998680114746 + }, + { + "epoch": 3.68682861328125e-05, + "model_forward_time": 0.025666475296020508, + "step": 24162 + }, + { + "epoch": 3.68682861328125e-05, + "step": 24162, + "training_step_time": 0.10637140274047852 + }, + { + "epoch": 3.686981201171875e-05, + "model_forward_time": 0.026049137115478516, + "step": 24163 + }, + { + "epoch": 3.686981201171875e-05, + "step": 24163, + "training_step_time": 0.10631394386291504 + }, + { + "epoch": 3.6871337890625e-05, + "model_forward_time": 0.02570509910583496, + "step": 24164 + }, + { + "epoch": 3.6871337890625e-05, + "step": 24164, + "training_step_time": 0.11016988754272461 + }, + { + "epoch": 3.687286376953125e-05, + "model_forward_time": 0.02590036392211914, + "step": 24165 + }, + { + "epoch": 3.687286376953125e-05, + "step": 24165, + "training_step_time": 0.10574626922607422 + }, + { + "epoch": 3.68743896484375e-05, + "model_forward_time": 0.025386333465576172, + "step": 24166 + }, + { + "epoch": 3.68743896484375e-05, + "step": 24166, + "training_step_time": 0.12269306182861328 + }, + { + "epoch": 3.687591552734375e-05, + "model_forward_time": 0.025882720947265625, + "step": 24167 + }, + { + "epoch": 3.687591552734375e-05, + "step": 24167, + "training_step_time": 0.10982513427734375 + }, + { + "epoch": 3.687744140625e-05, + "model_forward_time": 0.02607560157775879, + "step": 24168 + }, + { + "epoch": 3.687744140625e-05, + "step": 24168, + "training_step_time": 0.11633181571960449 + }, + { + "epoch": 3.687896728515625e-05, + "model_forward_time": 0.025753498077392578, + "step": 24169 + }, + { + "epoch": 3.687896728515625e-05, + "step": 24169, + "training_step_time": 0.14510607719421387 + }, + { + "epoch": 3.68804931640625e-05, + "grad_norm": 0.123519167304039, + "learning_rate": 9.974439348774295e-06, + "loss": 0.0034, + "step": 24170 + }, + { + "epoch": 3.68804931640625e-05, + "model_forward_time": 0.025382041931152344, + "step": 24170 + }, + { + "epoch": 3.68804931640625e-05, + "step": 24170, + "training_step_time": 0.12420201301574707 + }, + { + "epoch": 3.688201904296875e-05, + "model_forward_time": 0.02507162094116211, + "step": 24171 + }, + { + "epoch": 3.688201904296875e-05, + "step": 24171, + "training_step_time": 0.18939566612243652 + }, + { + "epoch": 3.6883544921875e-05, + "model_forward_time": 0.027229785919189453, + "step": 24172 + }, + { + "epoch": 3.6883544921875e-05, + "step": 24172, + "training_step_time": 0.14023590087890625 + }, + { + "epoch": 3.688507080078125e-05, + "model_forward_time": 0.025155305862426758, + "step": 24173 + }, + { + "epoch": 3.688507080078125e-05, + "step": 24173, + "training_step_time": 0.11326003074645996 + }, + { + "epoch": 3.68865966796875e-05, + "model_forward_time": 0.025065898895263672, + "step": 24174 + }, + { + "epoch": 3.68865966796875e-05, + "step": 24174, + "training_step_time": 0.11387276649475098 + }, + { + "epoch": 3.688812255859375e-05, + "model_forward_time": 0.02569413185119629, + "step": 24175 + }, + { + "epoch": 3.688812255859375e-05, + "step": 24175, + "training_step_time": 0.1069328784942627 + }, + { + "epoch": 3.68896484375e-05, + "model_forward_time": 0.025589466094970703, + "step": 24176 + }, + { + "epoch": 3.68896484375e-05, + "step": 24176, + "training_step_time": 0.15832018852233887 + }, + { + "epoch": 3.689117431640625e-05, + "model_forward_time": 0.025401830673217773, + "step": 24177 + }, + { + "epoch": 3.689117431640625e-05, + "step": 24177, + "training_step_time": 0.14029550552368164 + }, + { + "epoch": 3.68927001953125e-05, + "model_forward_time": 0.025074243545532227, + "step": 24178 + }, + { + "epoch": 3.68927001953125e-05, + "step": 24178, + "training_step_time": 0.1045222282409668 + }, + { + "epoch": 3.689422607421875e-05, + "model_forward_time": 0.02561354637145996, + "step": 24179 + }, + { + "epoch": 3.689422607421875e-05, + "step": 24179, + "training_step_time": 0.1047205924987793 + }, + { + "epoch": 3.6895751953125e-05, + "grad_norm": 0.11646206676959991, + "learning_rate": 9.941431877696955e-06, + "loss": 0.0039, + "step": 24180 + }, + { + "epoch": 3.6895751953125e-05, + "model_forward_time": 0.025722265243530273, + "step": 24180 + }, + { + "epoch": 3.6895751953125e-05, + "step": 24180, + "training_step_time": 0.11010861396789551 + }, + { + "epoch": 3.689727783203125e-05, + "model_forward_time": 0.025756120681762695, + "step": 24181 + }, + { + "epoch": 3.689727783203125e-05, + "step": 24181, + "training_step_time": 0.10695147514343262 + }, + { + "epoch": 3.68988037109375e-05, + "model_forward_time": 0.025637388229370117, + "step": 24182 + }, + { + "epoch": 3.68988037109375e-05, + "step": 24182, + "training_step_time": 0.10614395141601562 + }, + { + "epoch": 3.690032958984375e-05, + "model_forward_time": 0.02583003044128418, + "step": 24183 + }, + { + "epoch": 3.690032958984375e-05, + "step": 24183, + "training_step_time": 0.10625147819519043 + }, + { + "epoch": 3.690185546875e-05, + "model_forward_time": 0.02498459815979004, + "step": 24184 + }, + { + "epoch": 3.690185546875e-05, + "step": 24184, + "training_step_time": 0.14630532264709473 + }, + { + "epoch": 3.690338134765625e-05, + "model_forward_time": 0.026002168655395508, + "step": 24185 + }, + { + "epoch": 3.690338134765625e-05, + "step": 24185, + "training_step_time": 0.16147828102111816 + }, + { + "epoch": 3.69049072265625e-05, + "model_forward_time": 0.025487661361694336, + "step": 24186 + }, + { + "epoch": 3.69049072265625e-05, + "step": 24186, + "training_step_time": 0.10625791549682617 + }, + { + "epoch": 3.690643310546875e-05, + "model_forward_time": 0.02519059181213379, + "step": 24187 + }, + { + "epoch": 3.690643310546875e-05, + "step": 24187, + "training_step_time": 0.13268351554870605 + }, + { + "epoch": 3.6907958984375e-05, + "model_forward_time": 0.02602100372314453, + "step": 24188 + }, + { + "epoch": 3.6907958984375e-05, + "step": 24188, + "training_step_time": 0.16809439659118652 + }, + { + "epoch": 3.690948486328125e-05, + "model_forward_time": 0.025990724563598633, + "step": 24189 + }, + { + "epoch": 3.690948486328125e-05, + "step": 24189, + "training_step_time": 0.10544872283935547 + }, + { + "epoch": 3.69110107421875e-05, + "grad_norm": 0.07278118282556534, + "learning_rate": 9.908473081557151e-06, + "loss": 0.0046, + "step": 24190 + }, + { + "epoch": 3.69110107421875e-05, + "model_forward_time": 0.02511310577392578, + "step": 24190 + }, + { + "epoch": 3.69110107421875e-05, + "step": 24190, + "training_step_time": 0.10655426979064941 + }, + { + "epoch": 3.691253662109375e-05, + "model_forward_time": 0.025409698486328125, + "step": 24191 + }, + { + "epoch": 3.691253662109375e-05, + "step": 24191, + "training_step_time": 0.10576176643371582 + }, + { + "epoch": 3.69140625e-05, + "model_forward_time": 0.028889179229736328, + "step": 24192 + }, + { + "epoch": 3.69140625e-05, + "step": 24192, + "training_step_time": 0.10943078994750977 + }, + { + "epoch": 3.691558837890625e-05, + "model_forward_time": 0.025516271591186523, + "step": 24193 + }, + { + "epoch": 3.691558837890625e-05, + "step": 24193, + "training_step_time": 0.10556554794311523 + }, + { + "epoch": 3.69171142578125e-05, + "model_forward_time": 0.025563478469848633, + "step": 24194 + }, + { + "epoch": 3.69171142578125e-05, + "step": 24194, + "training_step_time": 0.10485434532165527 + }, + { + "epoch": 3.691864013671875e-05, + "model_forward_time": 0.025976896286010742, + "step": 24195 + }, + { + "epoch": 3.691864013671875e-05, + "step": 24195, + "training_step_time": 0.1061553955078125 + }, + { + "epoch": 3.6920166015625e-05, + "model_forward_time": 0.02554798126220703, + "step": 24196 + }, + { + "epoch": 3.6920166015625e-05, + "step": 24196, + "training_step_time": 0.1088249683380127 + }, + { + "epoch": 3.692169189453125e-05, + "model_forward_time": 0.025852680206298828, + "step": 24197 + }, + { + "epoch": 3.692169189453125e-05, + "step": 24197, + "training_step_time": 0.10515618324279785 + }, + { + "epoch": 3.69232177734375e-05, + "model_forward_time": 0.025780677795410156, + "step": 24198 + }, + { + "epoch": 3.69232177734375e-05, + "step": 24198, + "training_step_time": 0.1053006649017334 + }, + { + "epoch": 3.692474365234375e-05, + "model_forward_time": 0.0259549617767334, + "step": 24199 + }, + { + "epoch": 3.692474365234375e-05, + "step": 24199, + "training_step_time": 0.10728788375854492 + }, + { + "epoch": 3.692626953125e-05, + "grad_norm": 0.07233019173145294, + "learning_rate": 9.875563000402948e-06, + "loss": 0.0031, + "step": 24200 + }, + { + "epoch": 3.692626953125e-05, + "model_forward_time": 0.025726795196533203, + "step": 24200 + }, + { + "epoch": 3.692626953125e-05, + "step": 24200, + "training_step_time": 0.10914993286132812 + }, + { + "epoch": 3.692779541015625e-05, + "model_forward_time": 0.02552509307861328, + "step": 24201 + }, + { + "epoch": 3.692779541015625e-05, + "step": 24201, + "training_step_time": 0.10451960563659668 + }, + { + "epoch": 3.69293212890625e-05, + "model_forward_time": 0.026088237762451172, + "step": 24202 + }, + { + "epoch": 3.69293212890625e-05, + "step": 24202, + "training_step_time": 0.13700604438781738 + }, + { + "epoch": 3.693084716796875e-05, + "model_forward_time": 0.025475740432739258, + "step": 24203 + }, + { + "epoch": 3.693084716796875e-05, + "step": 24203, + "training_step_time": 0.11335897445678711 + }, + { + "epoch": 3.6932373046875e-05, + "model_forward_time": 0.02568197250366211, + "step": 24204 + }, + { + "epoch": 3.6932373046875e-05, + "step": 24204, + "training_step_time": 0.10724139213562012 + }, + { + "epoch": 3.693389892578125e-05, + "model_forward_time": 0.02583765983581543, + "step": 24205 + }, + { + "epoch": 3.693389892578125e-05, + "step": 24205, + "training_step_time": 0.1203920841217041 + }, + { + "epoch": 3.69354248046875e-05, + "model_forward_time": 0.025754690170288086, + "step": 24206 + }, + { + "epoch": 3.69354248046875e-05, + "step": 24206, + "training_step_time": 0.10697770118713379 + }, + { + "epoch": 3.693695068359375e-05, + "model_forward_time": 0.026006460189819336, + "step": 24207 + }, + { + "epoch": 3.693695068359375e-05, + "step": 24207, + "training_step_time": 0.12242317199707031 + }, + { + "epoch": 3.69384765625e-05, + "model_forward_time": 0.0244905948638916, + "step": 24208 + }, + { + "epoch": 3.69384765625e-05, + "step": 24208, + "training_step_time": 0.1125631332397461 + }, + { + "epoch": 3.694000244140625e-05, + "model_forward_time": 0.026227951049804688, + "step": 24209 + }, + { + "epoch": 3.694000244140625e-05, + "step": 24209, + "training_step_time": 0.10888433456420898 + }, + { + "epoch": 3.69415283203125e-05, + "grad_norm": 0.2593819200992584, + "learning_rate": 9.842701674223187e-06, + "loss": 0.0045, + "step": 24210 + }, + { + "epoch": 3.69415283203125e-05, + "model_forward_time": 0.025493860244750977, + "step": 24210 + }, + { + "epoch": 3.69415283203125e-05, + "step": 24210, + "training_step_time": 0.13140583038330078 + }, + { + "epoch": 3.694305419921875e-05, + "model_forward_time": 0.025914669036865234, + "step": 24211 + }, + { + "epoch": 3.694305419921875e-05, + "step": 24211, + "training_step_time": 0.15052556991577148 + }, + { + "epoch": 3.6944580078125e-05, + "model_forward_time": 0.025086641311645508, + "step": 24212 + }, + { + "epoch": 3.6944580078125e-05, + "step": 24212, + "training_step_time": 0.15983295440673828 + }, + { + "epoch": 3.694610595703125e-05, + "model_forward_time": 0.02484440803527832, + "step": 24213 + }, + { + "epoch": 3.694610595703125e-05, + "step": 24213, + "training_step_time": 0.19882583618164062 + }, + { + "epoch": 3.69476318359375e-05, + "model_forward_time": 0.02533435821533203, + "step": 24214 + }, + { + "epoch": 3.69476318359375e-05, + "step": 24214, + "training_step_time": 0.14635109901428223 + }, + { + "epoch": 3.694915771484375e-05, + "model_forward_time": 0.027299880981445312, + "step": 24215 + }, + { + "epoch": 3.694915771484375e-05, + "step": 24215, + "training_step_time": 0.20888018608093262 + }, + { + "epoch": 3.695068359375e-05, + "model_forward_time": 0.025694847106933594, + "step": 24216 + }, + { + "epoch": 3.695068359375e-05, + "step": 24216, + "training_step_time": 0.13883352279663086 + }, + { + "epoch": 3.695220947265625e-05, + "model_forward_time": 0.024866580963134766, + "step": 24217 + }, + { + "epoch": 3.695220947265625e-05, + "step": 24217, + "training_step_time": 0.1363205909729004 + }, + { + "epoch": 3.69537353515625e-05, + "model_forward_time": 0.02494668960571289, + "step": 24218 + }, + { + "epoch": 3.69537353515625e-05, + "step": 24218, + "training_step_time": 0.1144707202911377 + }, + { + "epoch": 3.695526123046875e-05, + "model_forward_time": 0.025547504425048828, + "step": 24219 + }, + { + "epoch": 3.695526123046875e-05, + "step": 24219, + "training_step_time": 0.11354517936706543 + }, + { + "epoch": 3.6956787109375e-05, + "grad_norm": 0.08765245974063873, + "learning_rate": 9.809889142947476e-06, + "loss": 0.0079, + "step": 24220 + }, + { + "epoch": 3.6956787109375e-05, + "model_forward_time": 0.02567148208618164, + "step": 24220 + }, + { + "epoch": 3.6956787109375e-05, + "step": 24220, + "training_step_time": 0.10988211631774902 + }, + { + "epoch": 3.695831298828125e-05, + "model_forward_time": 0.02645587921142578, + "step": 24221 + }, + { + "epoch": 3.695831298828125e-05, + "step": 24221, + "training_step_time": 0.10975050926208496 + }, + { + "epoch": 3.69598388671875e-05, + "model_forward_time": 0.028138399124145508, + "step": 24222 + }, + { + "epoch": 3.69598388671875e-05, + "step": 24222, + "training_step_time": 0.12267899513244629 + }, + { + "epoch": 3.696136474609375e-05, + "model_forward_time": 0.025582075119018555, + "step": 24223 + }, + { + "epoch": 3.696136474609375e-05, + "step": 24223, + "training_step_time": 0.12055397033691406 + }, + { + "epoch": 3.6962890625e-05, + "model_forward_time": 0.0255279541015625, + "step": 24224 + }, + { + "epoch": 3.6962890625e-05, + "step": 24224, + "training_step_time": 0.10575413703918457 + }, + { + "epoch": 3.696441650390625e-05, + "model_forward_time": 0.025485754013061523, + "step": 24225 + }, + { + "epoch": 3.696441650390625e-05, + "step": 24225, + "training_step_time": 0.10703039169311523 + }, + { + "epoch": 3.69659423828125e-05, + "model_forward_time": 0.025424718856811523, + "step": 24226 + }, + { + "epoch": 3.69659423828125e-05, + "step": 24226, + "training_step_time": 0.1045675277709961 + }, + { + "epoch": 3.696746826171875e-05, + "model_forward_time": 0.02596759796142578, + "step": 24227 + }, + { + "epoch": 3.696746826171875e-05, + "step": 24227, + "training_step_time": 0.10712075233459473 + }, + { + "epoch": 3.6968994140625e-05, + "model_forward_time": 0.025853872299194336, + "step": 24228 + }, + { + "epoch": 3.6968994140625e-05, + "step": 24228, + "training_step_time": 0.10766386985778809 + }, + { + "epoch": 3.697052001953125e-05, + "model_forward_time": 0.026806354522705078, + "step": 24229 + }, + { + "epoch": 3.697052001953125e-05, + "step": 24229, + "training_step_time": 0.10610604286193848 + }, + { + "epoch": 3.69720458984375e-05, + "grad_norm": 0.09023567289113998, + "learning_rate": 9.777125446446133e-06, + "loss": 0.0079, + "step": 24230 + }, + { + "epoch": 3.69720458984375e-05, + "model_forward_time": 0.02937483787536621, + "step": 24230 + }, + { + "epoch": 3.69720458984375e-05, + "step": 24230, + "training_step_time": 0.13452911376953125 + }, + { + "epoch": 3.697357177734375e-05, + "model_forward_time": 0.025794029235839844, + "step": 24231 + }, + { + "epoch": 3.697357177734375e-05, + "step": 24231, + "training_step_time": 0.16328811645507812 + }, + { + "epoch": 3.697509765625e-05, + "model_forward_time": 0.025360107421875, + "step": 24232 + }, + { + "epoch": 3.697509765625e-05, + "step": 24232, + "training_step_time": 0.11490869522094727 + }, + { + "epoch": 3.697662353515625e-05, + "model_forward_time": 0.02565741539001465, + "step": 24233 + }, + { + "epoch": 3.697662353515625e-05, + "step": 24233, + "training_step_time": 0.1259596347808838 + }, + { + "epoch": 3.69781494140625e-05, + "model_forward_time": 0.025437593460083008, + "step": 24234 + }, + { + "epoch": 3.69781494140625e-05, + "step": 24234, + "training_step_time": 0.19434595108032227 + }, + { + "epoch": 3.697967529296875e-05, + "model_forward_time": 0.02480769157409668, + "step": 24235 + }, + { + "epoch": 3.697967529296875e-05, + "step": 24235, + "training_step_time": 0.10785484313964844 + }, + { + "epoch": 3.6981201171875e-05, + "model_forward_time": 0.025592803955078125, + "step": 24236 + }, + { + "epoch": 3.6981201171875e-05, + "step": 24236, + "training_step_time": 0.10455536842346191 + }, + { + "epoch": 3.698272705078125e-05, + "model_forward_time": 0.027256011962890625, + "step": 24237 + }, + { + "epoch": 3.698272705078125e-05, + "step": 24237, + "training_step_time": 0.10758209228515625 + }, + { + "epoch": 3.69842529296875e-05, + "model_forward_time": 0.026010513305664062, + "step": 24238 + }, + { + "epoch": 3.69842529296875e-05, + "step": 24238, + "training_step_time": 0.10954928398132324 + }, + { + "epoch": 3.698577880859375e-05, + "model_forward_time": 0.026130199432373047, + "step": 24239 + }, + { + "epoch": 3.698577880859375e-05, + "step": 24239, + "training_step_time": 0.10959863662719727 + }, + { + "epoch": 3.69873046875e-05, + "grad_norm": 0.06264301389455795, + "learning_rate": 9.744410624530148e-06, + "loss": 0.0039, + "step": 24240 + }, + { + "epoch": 3.69873046875e-05, + "model_forward_time": 0.0256500244140625, + "step": 24240 + }, + { + "epoch": 3.69873046875e-05, + "step": 24240, + "training_step_time": 0.10589003562927246 + }, + { + "epoch": 3.698883056640625e-05, + "model_forward_time": 0.025907516479492188, + "step": 24241 + }, + { + "epoch": 3.698883056640625e-05, + "step": 24241, + "training_step_time": 0.10596799850463867 + }, + { + "epoch": 3.69903564453125e-05, + "model_forward_time": 0.025638818740844727, + "step": 24242 + }, + { + "epoch": 3.69903564453125e-05, + "step": 24242, + "training_step_time": 0.10683965682983398 + }, + { + "epoch": 3.699188232421875e-05, + "model_forward_time": 0.02575397491455078, + "step": 24243 + }, + { + "epoch": 3.699188232421875e-05, + "step": 24243, + "training_step_time": 0.10543656349182129 + }, + { + "epoch": 3.6993408203125e-05, + "model_forward_time": 0.02575373649597168, + "step": 24244 + }, + { + "epoch": 3.6993408203125e-05, + "step": 24244, + "training_step_time": 0.10977721214294434 + }, + { + "epoch": 3.699493408203125e-05, + "model_forward_time": 0.02539825439453125, + "step": 24245 + }, + { + "epoch": 3.699493408203125e-05, + "step": 24245, + "training_step_time": 0.1150813102722168 + }, + { + "epoch": 3.69964599609375e-05, + "model_forward_time": 0.02538752555847168, + "step": 24246 + }, + { + "epoch": 3.69964599609375e-05, + "step": 24246, + "training_step_time": 0.10520410537719727 + }, + { + "epoch": 3.699798583984375e-05, + "model_forward_time": 0.02498030662536621, + "step": 24247 + }, + { + "epoch": 3.699798583984375e-05, + "step": 24247, + "training_step_time": 0.10857295989990234 + }, + { + "epoch": 3.699951171875e-05, + "model_forward_time": 0.025450468063354492, + "step": 24248 + }, + { + "epoch": 3.699951171875e-05, + "step": 24248, + "training_step_time": 0.14297866821289062 + }, + { + "epoch": 3.700103759765625e-05, + "model_forward_time": 0.0248563289642334, + "step": 24249 + }, + { + "epoch": 3.700103759765625e-05, + "step": 24249, + "training_step_time": 0.12423872947692871 + }, + { + "epoch": 3.70025634765625e-05, + "grad_norm": 0.07255098223686218, + "learning_rate": 9.711744716951093e-06, + "loss": 0.0054, + "step": 24250 + }, + { + "epoch": 3.70025634765625e-05, + "model_forward_time": 0.025104284286499023, + "step": 24250 + }, + { + "epoch": 3.70025634765625e-05, + "step": 24250, + "training_step_time": 0.1201024055480957 + }, + { + "epoch": 3.700408935546875e-05, + "model_forward_time": 0.025100231170654297, + "step": 24251 + }, + { + "epoch": 3.700408935546875e-05, + "step": 24251, + "training_step_time": 0.11166095733642578 + }, + { + "epoch": 3.7005615234375e-05, + "model_forward_time": 0.024966716766357422, + "step": 24252 + }, + { + "epoch": 3.7005615234375e-05, + "step": 24252, + "training_step_time": 0.1812901496887207 + }, + { + "epoch": 3.700714111328125e-05, + "model_forward_time": 0.02512955665588379, + "step": 24253 + }, + { + "epoch": 3.700714111328125e-05, + "step": 24253, + "training_step_time": 0.13298702239990234 + }, + { + "epoch": 3.70086669921875e-05, + "model_forward_time": 0.024826526641845703, + "step": 24254 + }, + { + "epoch": 3.70086669921875e-05, + "step": 24254, + "training_step_time": 0.11890721321105957 + }, + { + "epoch": 3.701019287109375e-05, + "model_forward_time": 0.02501845359802246, + "step": 24255 + }, + { + "epoch": 3.701019287109375e-05, + "step": 24255, + "training_step_time": 0.10590553283691406 + }, + { + "epoch": 3.701171875e-05, + "model_forward_time": 0.024799108505249023, + "step": 24256 + }, + { + "epoch": 3.701171875e-05, + "step": 24256, + "training_step_time": 0.10848045349121094 + }, + { + "epoch": 3.701324462890625e-05, + "model_forward_time": 0.02550530433654785, + "step": 24257 + }, + { + "epoch": 3.701324462890625e-05, + "step": 24257, + "training_step_time": 0.10931611061096191 + }, + { + "epoch": 3.70147705078125e-05, + "model_forward_time": 0.025151729583740234, + "step": 24258 + }, + { + "epoch": 3.70147705078125e-05, + "step": 24258, + "training_step_time": 0.10706925392150879 + }, + { + "epoch": 3.701629638671875e-05, + "model_forward_time": 0.02559828758239746, + "step": 24259 + }, + { + "epoch": 3.701629638671875e-05, + "step": 24259, + "training_step_time": 0.11228418350219727 + }, + { + "epoch": 3.7017822265625e-05, + "grad_norm": 0.13656899333000183, + "learning_rate": 9.679127763401152e-06, + "loss": 0.0037, + "step": 24260 + }, + { + "epoch": 3.7017822265625e-05, + "model_forward_time": 0.02507615089416504, + "step": 24260 + }, + { + "epoch": 3.7017822265625e-05, + "step": 24260, + "training_step_time": 0.1109464168548584 + }, + { + "epoch": 3.701934814453125e-05, + "model_forward_time": 0.02564692497253418, + "step": 24261 + }, + { + "epoch": 3.701934814453125e-05, + "step": 24261, + "training_step_time": 0.17238974571228027 + }, + { + "epoch": 3.70208740234375e-05, + "model_forward_time": 0.0242769718170166, + "step": 24262 + }, + { + "epoch": 3.70208740234375e-05, + "step": 24262, + "training_step_time": 0.18323302268981934 + }, + { + "epoch": 3.702239990234375e-05, + "model_forward_time": 0.025003671646118164, + "step": 24263 + }, + { + "epoch": 3.702239990234375e-05, + "step": 24263, + "training_step_time": 0.14184880256652832 + }, + { + "epoch": 3.702392578125e-05, + "model_forward_time": 0.024840593338012695, + "step": 24264 + }, + { + "epoch": 3.702392578125e-05, + "step": 24264, + "training_step_time": 0.18009424209594727 + }, + { + "epoch": 3.702545166015625e-05, + "model_forward_time": 0.024814367294311523, + "step": 24265 + }, + { + "epoch": 3.702545166015625e-05, + "step": 24265, + "training_step_time": 0.19361662864685059 + }, + { + "epoch": 3.70269775390625e-05, + "model_forward_time": 0.026308298110961914, + "step": 24266 + }, + { + "epoch": 3.70269775390625e-05, + "step": 24266, + "training_step_time": 0.12687468528747559 + }, + { + "epoch": 3.702850341796875e-05, + "model_forward_time": 0.024618864059448242, + "step": 24267 + }, + { + "epoch": 3.702850341796875e-05, + "step": 24267, + "training_step_time": 0.10491251945495605 + }, + { + "epoch": 3.7030029296875e-05, + "model_forward_time": 0.025823354721069336, + "step": 24268 + }, + { + "epoch": 3.7030029296875e-05, + "step": 24268, + "training_step_time": 0.11254501342773438 + }, + { + "epoch": 3.703155517578125e-05, + "model_forward_time": 0.025709152221679688, + "step": 24269 + }, + { + "epoch": 3.703155517578125e-05, + "step": 24269, + "training_step_time": 0.11238861083984375 + }, + { + "epoch": 3.70330810546875e-05, + "grad_norm": 0.39457446336746216, + "learning_rate": 9.646559803512994e-06, + "loss": 0.0055, + "step": 24270 + }, + { + "epoch": 3.70330810546875e-05, + "model_forward_time": 0.025118350982666016, + "step": 24270 + }, + { + "epoch": 3.70330810546875e-05, + "step": 24270, + "training_step_time": 0.10356450080871582 + }, + { + "epoch": 3.703460693359375e-05, + "model_forward_time": 0.025590181350708008, + "step": 24271 + }, + { + "epoch": 3.703460693359375e-05, + "step": 24271, + "training_step_time": 0.10285258293151855 + }, + { + "epoch": 3.70361328125e-05, + "model_forward_time": 0.02526068687438965, + "step": 24272 + }, + { + "epoch": 3.70361328125e-05, + "step": 24272, + "training_step_time": 0.10517120361328125 + }, + { + "epoch": 3.703765869140625e-05, + "model_forward_time": 0.025374174118041992, + "step": 24273 + }, + { + "epoch": 3.703765869140625e-05, + "step": 24273, + "training_step_time": 0.10662317276000977 + }, + { + "epoch": 3.70391845703125e-05, + "model_forward_time": 0.025262117385864258, + "step": 24274 + }, + { + "epoch": 3.70391845703125e-05, + "step": 24274, + "training_step_time": 0.10935187339782715 + }, + { + "epoch": 3.704071044921875e-05, + "model_forward_time": 0.025469064712524414, + "step": 24275 + }, + { + "epoch": 3.704071044921875e-05, + "step": 24275, + "training_step_time": 0.14448213577270508 + }, + { + "epoch": 3.7042236328125e-05, + "model_forward_time": 0.025028705596923828, + "step": 24276 + }, + { + "epoch": 3.7042236328125e-05, + "step": 24276, + "training_step_time": 0.1568596363067627 + }, + { + "epoch": 3.704376220703125e-05, + "model_forward_time": 0.024761676788330078, + "step": 24277 + }, + { + "epoch": 3.704376220703125e-05, + "step": 24277, + "training_step_time": 0.10796070098876953 + }, + { + "epoch": 3.70452880859375e-05, + "model_forward_time": 0.02533578872680664, + "step": 24278 + }, + { + "epoch": 3.70452880859375e-05, + "step": 24278, + "training_step_time": 0.13182473182678223 + }, + { + "epoch": 3.704681396484375e-05, + "model_forward_time": 0.025144577026367188, + "step": 24279 + }, + { + "epoch": 3.704681396484375e-05, + "step": 24279, + "training_step_time": 0.11942291259765625 + }, + { + "epoch": 3.704833984375e-05, + "grad_norm": 0.25070905685424805, + "learning_rate": 9.614040876859748e-06, + "loss": 0.0054, + "step": 24280 + }, + { + "epoch": 3.704833984375e-05, + "model_forward_time": 0.02584052085876465, + "step": 24280 + }, + { + "epoch": 3.704833984375e-05, + "step": 24280, + "training_step_time": 0.11781001091003418 + }, + { + "epoch": 3.704986572265625e-05, + "model_forward_time": 0.025209426879882812, + "step": 24281 + }, + { + "epoch": 3.704986572265625e-05, + "step": 24281, + "training_step_time": 0.10901546478271484 + }, + { + "epoch": 3.70513916015625e-05, + "model_forward_time": 0.025323867797851562, + "step": 24282 + }, + { + "epoch": 3.70513916015625e-05, + "step": 24282, + "training_step_time": 0.10624408721923828 + }, + { + "epoch": 3.705291748046875e-05, + "model_forward_time": 0.025385141372680664, + "step": 24283 + }, + { + "epoch": 3.705291748046875e-05, + "step": 24283, + "training_step_time": 0.10848069190979004 + }, + { + "epoch": 3.7054443359375e-05, + "model_forward_time": 0.024928569793701172, + "step": 24284 + }, + { + "epoch": 3.7054443359375e-05, + "step": 24284, + "training_step_time": 0.10614657402038574 + }, + { + "epoch": 3.705596923828125e-05, + "model_forward_time": 0.025206327438354492, + "step": 24285 + }, + { + "epoch": 3.705596923828125e-05, + "step": 24285, + "training_step_time": 0.11389946937561035 + }, + { + "epoch": 3.70574951171875e-05, + "model_forward_time": 0.0251007080078125, + "step": 24286 + }, + { + "epoch": 3.70574951171875e-05, + "step": 24286, + "training_step_time": 0.10445451736450195 + }, + { + "epoch": 3.705902099609375e-05, + "model_forward_time": 0.025239944458007812, + "step": 24287 + }, + { + "epoch": 3.705902099609375e-05, + "step": 24287, + "training_step_time": 0.10410881042480469 + }, + { + "epoch": 3.7060546875e-05, + "model_forward_time": 0.02525496482849121, + "step": 24288 + }, + { + "epoch": 3.7060546875e-05, + "step": 24288, + "training_step_time": 0.10597419738769531 + }, + { + "epoch": 3.706207275390625e-05, + "model_forward_time": 0.02528691291809082, + "step": 24289 + }, + { + "epoch": 3.706207275390625e-05, + "step": 24289, + "training_step_time": 0.10604691505432129 + }, + { + "epoch": 3.70635986328125e-05, + "grad_norm": 0.14922821521759033, + "learning_rate": 9.581571022954988e-06, + "loss": 0.0094, + "step": 24290 + }, + { + "epoch": 3.70635986328125e-05, + "model_forward_time": 0.02492213249206543, + "step": 24290 + }, + { + "epoch": 3.70635986328125e-05, + "step": 24290, + "training_step_time": 0.10851073265075684 + }, + { + "epoch": 3.706512451171875e-05, + "model_forward_time": 0.025308609008789062, + "step": 24291 + }, + { + "epoch": 3.706512451171875e-05, + "step": 24291, + "training_step_time": 0.10402274131774902 + }, + { + "epoch": 3.7066650390625e-05, + "model_forward_time": 0.0250089168548584, + "step": 24292 + }, + { + "epoch": 3.7066650390625e-05, + "step": 24292, + "training_step_time": 0.10638809204101562 + }, + { + "epoch": 3.706817626953125e-05, + "model_forward_time": 0.02515721321105957, + "step": 24293 + }, + { + "epoch": 3.706817626953125e-05, + "step": 24293, + "training_step_time": 0.10640192031860352 + }, + { + "epoch": 3.70697021484375e-05, + "model_forward_time": 0.02560257911682129, + "step": 24294 + }, + { + "epoch": 3.70697021484375e-05, + "step": 24294, + "training_step_time": 0.17103195190429688 + }, + { + "epoch": 3.707122802734375e-05, + "model_forward_time": 0.02433156967163086, + "step": 24295 + }, + { + "epoch": 3.707122802734375e-05, + "step": 24295, + "training_step_time": 0.11850714683532715 + }, + { + "epoch": 3.707275390625e-05, + "model_forward_time": 0.0244901180267334, + "step": 24296 + }, + { + "epoch": 3.707275390625e-05, + "step": 24296, + "training_step_time": 0.1273956298828125 + }, + { + "epoch": 3.707427978515625e-05, + "model_forward_time": 0.025173664093017578, + "step": 24297 + }, + { + "epoch": 3.707427978515625e-05, + "step": 24297, + "training_step_time": 0.11911797523498535 + }, + { + "epoch": 3.70758056640625e-05, + "model_forward_time": 0.027826547622680664, + "step": 24298 + }, + { + "epoch": 3.70758056640625e-05, + "step": 24298, + "training_step_time": 0.10586071014404297 + }, + { + "epoch": 3.707733154296875e-05, + "model_forward_time": 0.025238990783691406, + "step": 24299 + }, + { + "epoch": 3.707733154296875e-05, + "step": 24299, + "training_step_time": 0.2242581844329834 + }, + { + "epoch": 3.7078857421875e-05, + "grad_norm": 0.1144566461443901, + "learning_rate": 9.549150281252633e-06, + "loss": 0.011, + "step": 24300 + }, + { + "epoch": 3.7078857421875e-05, + "model_forward_time": 0.024498939514160156, + "step": 24300 + }, + { + "epoch": 3.7078857421875e-05, + "step": 24300, + "training_step_time": 0.10791873931884766 + }, + { + "epoch": 3.708038330078125e-05, + "model_forward_time": 0.023784399032592773, + "step": 24301 + }, + { + "epoch": 3.708038330078125e-05, + "step": 24301, + "training_step_time": 0.10400652885437012 + }, + { + "epoch": 3.70819091796875e-05, + "model_forward_time": 0.02714848518371582, + "step": 24302 + }, + { + "epoch": 3.70819091796875e-05, + "step": 24302, + "training_step_time": 0.10984468460083008 + }, + { + "epoch": 3.708343505859375e-05, + "model_forward_time": 0.02475762367248535, + "step": 24303 + }, + { + "epoch": 3.708343505859375e-05, + "step": 24303, + "training_step_time": 0.10720181465148926 + }, + { + "epoch": 3.70849609375e-05, + "model_forward_time": 0.025285005569458008, + "step": 24304 + }, + { + "epoch": 3.70849609375e-05, + "step": 24304, + "training_step_time": 0.19831514358520508 + }, + { + "epoch": 3.708648681640625e-05, + "model_forward_time": 0.02477741241455078, + "step": 24305 + }, + { + "epoch": 3.708648681640625e-05, + "step": 24305, + "training_step_time": 0.10388517379760742 + }, + { + "epoch": 3.70880126953125e-05, + "model_forward_time": 0.024883747100830078, + "step": 24306 + }, + { + "epoch": 3.70880126953125e-05, + "step": 24306, + "training_step_time": 0.10828399658203125 + }, + { + "epoch": 3.708953857421875e-05, + "model_forward_time": 0.025094270706176758, + "step": 24307 + }, + { + "epoch": 3.708953857421875e-05, + "step": 24307, + "training_step_time": 0.13716506958007812 + }, + { + "epoch": 3.7091064453125e-05, + "model_forward_time": 0.024924278259277344, + "step": 24308 + }, + { + "epoch": 3.7091064453125e-05, + "step": 24308, + "training_step_time": 0.12142634391784668 + }, + { + "epoch": 3.709259033203125e-05, + "model_forward_time": 0.024669408798217773, + "step": 24309 + }, + { + "epoch": 3.709259033203125e-05, + "step": 24309, + "training_step_time": 0.11022329330444336 + }, + { + "epoch": 3.70941162109375e-05, + "grad_norm": 0.36761602759361267, + "learning_rate": 9.51677869114696e-06, + "loss": 0.0076, + "step": 24310 + }, + { + "epoch": 3.70941162109375e-05, + "model_forward_time": 0.024998903274536133, + "step": 24310 + }, + { + "epoch": 3.70941162109375e-05, + "step": 24310, + "training_step_time": 0.11482763290405273 + }, + { + "epoch": 3.709564208984375e-05, + "model_forward_time": 0.024855852127075195, + "step": 24311 + }, + { + "epoch": 3.709564208984375e-05, + "step": 24311, + "training_step_time": 0.1150212287902832 + }, + { + "epoch": 3.709716796875e-05, + "model_forward_time": 0.025383472442626953, + "step": 24312 + }, + { + "epoch": 3.709716796875e-05, + "step": 24312, + "training_step_time": 0.10722684860229492 + }, + { + "epoch": 3.709869384765625e-05, + "model_forward_time": 0.026248693466186523, + "step": 24313 + }, + { + "epoch": 3.709869384765625e-05, + "step": 24313, + "training_step_time": 0.11372828483581543 + }, + { + "epoch": 3.71002197265625e-05, + "model_forward_time": 0.025073766708374023, + "step": 24314 + }, + { + "epoch": 3.71002197265625e-05, + "step": 24314, + "training_step_time": 0.11293148994445801 + }, + { + "epoch": 3.710174560546875e-05, + "model_forward_time": 0.025626182556152344, + "step": 24315 + }, + { + "epoch": 3.710174560546875e-05, + "step": 24315, + "training_step_time": 0.10316872596740723 + }, + { + "epoch": 3.7103271484375e-05, + "model_forward_time": 0.02488255500793457, + "step": 24316 + }, + { + "epoch": 3.7103271484375e-05, + "step": 24316, + "training_step_time": 0.19737744331359863 + }, + { + "epoch": 3.710479736328125e-05, + "model_forward_time": 0.02450847625732422, + "step": 24317 + }, + { + "epoch": 3.710479736328125e-05, + "step": 24317, + "training_step_time": 0.1007838249206543 + }, + { + "epoch": 3.71063232421875e-05, + "model_forward_time": 0.024889469146728516, + "step": 24318 + }, + { + "epoch": 3.71063232421875e-05, + "step": 24318, + "training_step_time": 0.10272645950317383 + }, + { + "epoch": 3.710784912109375e-05, + "model_forward_time": 0.024867773056030273, + "step": 24319 + }, + { + "epoch": 3.710784912109375e-05, + "step": 24319, + "training_step_time": 0.1074526309967041 + }, + { + "epoch": 3.7109375e-05, + "grad_norm": 0.15049193799495697, + "learning_rate": 9.484456291972487e-06, + "loss": 0.0053, + "step": 24320 + }, + { + "epoch": 3.7109375e-05, + "model_forward_time": 0.025033235549926758, + "step": 24320 + }, + { + "epoch": 3.7109375e-05, + "step": 24320, + "training_step_time": 0.13138723373413086 + }, + { + "epoch": 3.711090087890625e-05, + "model_forward_time": 0.02538895606994629, + "step": 24321 + }, + { + "epoch": 3.711090087890625e-05, + "step": 24321, + "training_step_time": 0.13435578346252441 + }, + { + "epoch": 3.71124267578125e-05, + "model_forward_time": 0.024294614791870117, + "step": 24322 + }, + { + "epoch": 3.71124267578125e-05, + "step": 24322, + "training_step_time": 0.17002367973327637 + }, + { + "epoch": 3.711395263671875e-05, + "model_forward_time": 0.02425360679626465, + "step": 24323 + }, + { + "epoch": 3.711395263671875e-05, + "step": 24323, + "training_step_time": 0.17560982704162598 + }, + { + "epoch": 3.7115478515625e-05, + "model_forward_time": 0.024230480194091797, + "step": 24324 + }, + { + "epoch": 3.7115478515625e-05, + "step": 24324, + "training_step_time": 0.16555118560791016 + }, + { + "epoch": 3.711700439453125e-05, + "model_forward_time": 0.02448725700378418, + "step": 24325 + }, + { + "epoch": 3.711700439453125e-05, + "step": 24325, + "training_step_time": 0.17579197883605957 + }, + { + "epoch": 3.71185302734375e-05, + "model_forward_time": 0.024272441864013672, + "step": 24326 + }, + { + "epoch": 3.71185302734375e-05, + "step": 24326, + "training_step_time": 0.14416766166687012 + }, + { + "epoch": 3.712005615234375e-05, + "model_forward_time": 0.02452373504638672, + "step": 24327 + }, + { + "epoch": 3.712005615234375e-05, + "step": 24327, + "training_step_time": 0.1298818588256836 + }, + { + "epoch": 3.712158203125e-05, + "model_forward_time": 0.024457216262817383, + "step": 24328 + }, + { + "epoch": 3.712158203125e-05, + "step": 24328, + "training_step_time": 0.12549948692321777 + }, + { + "epoch": 3.712310791015625e-05, + "model_forward_time": 0.02436232566833496, + "step": 24329 + }, + { + "epoch": 3.712310791015625e-05, + "step": 24329, + "training_step_time": 0.11989140510559082 + }, + { + "epoch": 3.71246337890625e-05, + "grad_norm": 0.11885454505681992, + "learning_rate": 9.452183123004e-06, + "loss": 0.0027, + "step": 24330 + }, + { + "epoch": 3.71246337890625e-05, + "model_forward_time": 0.024841785430908203, + "step": 24330 + }, + { + "epoch": 3.71246337890625e-05, + "step": 24330, + "training_step_time": 0.11301326751708984 + }, + { + "epoch": 3.712615966796875e-05, + "model_forward_time": 0.02505350112915039, + "step": 24331 + }, + { + "epoch": 3.712615966796875e-05, + "step": 24331, + "training_step_time": 0.11555814743041992 + }, + { + "epoch": 3.7127685546875e-05, + "model_forward_time": 0.024890899658203125, + "step": 24332 + }, + { + "epoch": 3.7127685546875e-05, + "step": 24332, + "training_step_time": 0.11681270599365234 + }, + { + "epoch": 3.712921142578125e-05, + "model_forward_time": 0.025279760360717773, + "step": 24333 + }, + { + "epoch": 3.712921142578125e-05, + "step": 24333, + "training_step_time": 0.10809063911437988 + }, + { + "epoch": 3.71307373046875e-05, + "model_forward_time": 0.025611400604248047, + "step": 24334 + }, + { + "epoch": 3.71307373046875e-05, + "step": 24334, + "training_step_time": 0.10654711723327637 + }, + { + "epoch": 3.713226318359375e-05, + "model_forward_time": 0.025287866592407227, + "step": 24335 + }, + { + "epoch": 3.713226318359375e-05, + "step": 24335, + "training_step_time": 0.11050224304199219 + }, + { + "epoch": 3.71337890625e-05, + "model_forward_time": 0.02549290657043457, + "step": 24336 + }, + { + "epoch": 3.71337890625e-05, + "step": 24336, + "training_step_time": 0.10606861114501953 + }, + { + "epoch": 3.713531494140625e-05, + "model_forward_time": 0.02516484260559082, + "step": 24337 + }, + { + "epoch": 3.713531494140625e-05, + "step": 24337, + "training_step_time": 0.10801172256469727 + }, + { + "epoch": 3.71368408203125e-05, + "model_forward_time": 0.024900197982788086, + "step": 24338 + }, + { + "epoch": 3.71368408203125e-05, + "step": 24338, + "training_step_time": 0.10728049278259277 + }, + { + "epoch": 3.713836669921875e-05, + "model_forward_time": 0.024777889251708984, + "step": 24339 + }, + { + "epoch": 3.713836669921875e-05, + "step": 24339, + "training_step_time": 0.17107176780700684 + }, + { + "epoch": 3.7139892578125e-05, + "grad_norm": 0.059934068471193314, + "learning_rate": 9.41995922345642e-06, + "loss": 0.0063, + "step": 24340 + }, + { + "epoch": 3.7139892578125e-05, + "model_forward_time": 0.024144649505615234, + "step": 24340 + }, + { + "epoch": 3.7139892578125e-05, + "step": 24340, + "training_step_time": 0.1242973804473877 + }, + { + "epoch": 3.714141845703125e-05, + "model_forward_time": 0.024165630340576172, + "step": 24341 + }, + { + "epoch": 3.714141845703125e-05, + "step": 24341, + "training_step_time": 0.12415528297424316 + }, + { + "epoch": 3.71429443359375e-05, + "model_forward_time": 0.02523207664489746, + "step": 24342 + }, + { + "epoch": 3.71429443359375e-05, + "step": 24342, + "training_step_time": 0.10575342178344727 + }, + { + "epoch": 3.714447021484375e-05, + "model_forward_time": 0.025203704833984375, + "step": 24343 + }, + { + "epoch": 3.714447021484375e-05, + "step": 24343, + "training_step_time": 0.15754270553588867 + }, + { + "epoch": 3.714599609375e-05, + "model_forward_time": 0.024272680282592773, + "step": 24344 + }, + { + "epoch": 3.714599609375e-05, + "step": 24344, + "training_step_time": 0.1399390697479248 + }, + { + "epoch": 3.714752197265625e-05, + "model_forward_time": 0.024673938751220703, + "step": 24345 + }, + { + "epoch": 3.714752197265625e-05, + "step": 24345, + "training_step_time": 0.10219860076904297 + }, + { + "epoch": 3.71490478515625e-05, + "model_forward_time": 0.025735855102539062, + "step": 24346 + }, + { + "epoch": 3.71490478515625e-05, + "step": 24346, + "training_step_time": 0.10573911666870117 + }, + { + "epoch": 3.715057373046875e-05, + "model_forward_time": 0.02526378631591797, + "step": 24347 + }, + { + "epoch": 3.715057373046875e-05, + "step": 24347, + "training_step_time": 0.10457921028137207 + }, + { + "epoch": 3.7152099609375e-05, + "model_forward_time": 0.025121212005615234, + "step": 24348 + }, + { + "epoch": 3.7152099609375e-05, + "step": 24348, + "training_step_time": 0.10540246963500977 + }, + { + "epoch": 3.715362548828125e-05, + "model_forward_time": 0.02505970001220703, + "step": 24349 + }, + { + "epoch": 3.715362548828125e-05, + "step": 24349, + "training_step_time": 0.10681986808776855 + }, + { + "epoch": 3.71551513671875e-05, + "grad_norm": 0.08646565675735474, + "learning_rate": 9.387784632484826e-06, + "loss": 0.0082, + "step": 24350 + }, + { + "epoch": 3.71551513671875e-05, + "model_forward_time": 0.025452852249145508, + "step": 24350 + }, + { + "epoch": 3.71551513671875e-05, + "step": 24350, + "training_step_time": 0.1106417179107666 + }, + { + "epoch": 3.715667724609375e-05, + "model_forward_time": 0.025509119033813477, + "step": 24351 + }, + { + "epoch": 3.715667724609375e-05, + "step": 24351, + "training_step_time": 0.10989260673522949 + }, + { + "epoch": 3.7158203125e-05, + "model_forward_time": 0.025065898895263672, + "step": 24352 + }, + { + "epoch": 3.7158203125e-05, + "step": 24352, + "training_step_time": 0.10683155059814453 + }, + { + "epoch": 3.715972900390625e-05, + "model_forward_time": 0.024544239044189453, + "step": 24353 + }, + { + "epoch": 3.715972900390625e-05, + "step": 24353, + "training_step_time": 0.12331247329711914 + }, + { + "epoch": 3.71612548828125e-05, + "model_forward_time": 0.025012731552124023, + "step": 24354 + }, + { + "epoch": 3.71612548828125e-05, + "step": 24354, + "training_step_time": 0.1308460235595703 + }, + { + "epoch": 3.716278076171875e-05, + "model_forward_time": 0.025029420852661133, + "step": 24355 + }, + { + "epoch": 3.716278076171875e-05, + "step": 24355, + "training_step_time": 0.1532447338104248 + }, + { + "epoch": 3.7164306640625e-05, + "model_forward_time": 0.024352312088012695, + "step": 24356 + }, + { + "epoch": 3.7164306640625e-05, + "step": 24356, + "training_step_time": 0.17566919326782227 + }, + { + "epoch": 3.716583251953125e-05, + "model_forward_time": 0.02512955665588379, + "step": 24357 + }, + { + "epoch": 3.716583251953125e-05, + "step": 24357, + "training_step_time": 0.12132096290588379 + }, + { + "epoch": 3.71673583984375e-05, + "model_forward_time": 0.024603605270385742, + "step": 24358 + }, + { + "epoch": 3.71673583984375e-05, + "step": 24358, + "training_step_time": 0.15294384956359863 + }, + { + "epoch": 3.716888427734375e-05, + "model_forward_time": 0.024913311004638672, + "step": 24359 + }, + { + "epoch": 3.716888427734375e-05, + "step": 24359, + "training_step_time": 0.10367631912231445 + }, + { + "epoch": 3.717041015625e-05, + "grad_norm": 0.06944193691015244, + "learning_rate": 9.355659389184396e-06, + "loss": 0.0049, + "step": 24360 + }, + { + "epoch": 3.717041015625e-05, + "model_forward_time": 0.02496051788330078, + "step": 24360 + }, + { + "epoch": 3.717041015625e-05, + "step": 24360, + "training_step_time": 0.10398197174072266 + }, + { + "epoch": 3.717193603515625e-05, + "model_forward_time": 0.02520751953125, + "step": 24361 + }, + { + "epoch": 3.717193603515625e-05, + "step": 24361, + "training_step_time": 0.10443449020385742 + }, + { + "epoch": 3.71734619140625e-05, + "model_forward_time": 0.02516627311706543, + "step": 24362 + }, + { + "epoch": 3.71734619140625e-05, + "step": 24362, + "training_step_time": 0.10392260551452637 + }, + { + "epoch": 3.717498779296875e-05, + "model_forward_time": 0.02533245086669922, + "step": 24363 + }, + { + "epoch": 3.717498779296875e-05, + "step": 24363, + "training_step_time": 0.10829401016235352 + }, + { + "epoch": 3.7176513671875e-05, + "model_forward_time": 0.02538919448852539, + "step": 24364 + }, + { + "epoch": 3.7176513671875e-05, + "step": 24364, + "training_step_time": 0.10499691963195801 + }, + { + "epoch": 3.717803955078125e-05, + "model_forward_time": 0.024544715881347656, + "step": 24365 + }, + { + "epoch": 3.717803955078125e-05, + "step": 24365, + "training_step_time": 0.10553193092346191 + }, + { + "epoch": 3.71795654296875e-05, + "model_forward_time": 0.024393558502197266, + "step": 24366 + }, + { + "epoch": 3.71795654296875e-05, + "step": 24366, + "training_step_time": 0.10773301124572754 + }, + { + "epoch": 3.718109130859375e-05, + "model_forward_time": 0.02479386329650879, + "step": 24367 + }, + { + "epoch": 3.718109130859375e-05, + "step": 24367, + "training_step_time": 0.1049952507019043 + }, + { + "epoch": 3.71826171875e-05, + "model_forward_time": 0.025301694869995117, + "step": 24368 + }, + { + "epoch": 3.71826171875e-05, + "step": 24368, + "training_step_time": 0.1498570442199707 + }, + { + "epoch": 3.718414306640625e-05, + "model_forward_time": 0.02356433868408203, + "step": 24369 + }, + { + "epoch": 3.718414306640625e-05, + "step": 24369, + "training_step_time": 0.1030876636505127 + }, + { + "epoch": 3.71856689453125e-05, + "grad_norm": 0.12869904935359955, + "learning_rate": 9.32358353259032e-06, + "loss": 0.0062, + "step": 24370 + }, + { + "epoch": 3.71856689453125e-05, + "model_forward_time": 0.02393651008605957, + "step": 24370 + }, + { + "epoch": 3.71856689453125e-05, + "step": 24370, + "training_step_time": 0.20751118659973145 + }, + { + "epoch": 3.718719482421875e-05, + "model_forward_time": 0.022573232650756836, + "step": 24371 + }, + { + "epoch": 3.718719482421875e-05, + "step": 24371, + "training_step_time": 0.12214946746826172 + }, + { + "epoch": 3.7188720703125e-05, + "model_forward_time": 0.023389101028442383, + "step": 24372 + }, + { + "epoch": 3.7188720703125e-05, + "step": 24372, + "training_step_time": 0.1640775203704834 + }, + { + "epoch": 3.719024658203125e-05, + "model_forward_time": 0.023114442825317383, + "step": 24373 + }, + { + "epoch": 3.719024658203125e-05, + "step": 24373, + "training_step_time": 0.10543179512023926 + }, + { + "epoch": 3.71917724609375e-05, + "model_forward_time": 0.023762941360473633, + "step": 24374 + }, + { + "epoch": 3.71917724609375e-05, + "step": 24374, + "training_step_time": 0.1037454605102539 + }, + { + "epoch": 3.719329833984375e-05, + "model_forward_time": 0.023972511291503906, + "step": 24375 + }, + { + "epoch": 3.719329833984375e-05, + "step": 24375, + "training_step_time": 0.10392212867736816 + }, + { + "epoch": 3.719482421875e-05, + "model_forward_time": 0.02437138557434082, + "step": 24376 + }, + { + "epoch": 3.719482421875e-05, + "step": 24376, + "training_step_time": 0.1049506664276123 + }, + { + "epoch": 3.719635009765625e-05, + "model_forward_time": 0.024208545684814453, + "step": 24377 + }, + { + "epoch": 3.719635009765625e-05, + "step": 24377, + "training_step_time": 0.1049339771270752 + }, + { + "epoch": 3.71978759765625e-05, + "model_forward_time": 0.024483203887939453, + "step": 24378 + }, + { + "epoch": 3.71978759765625e-05, + "step": 24378, + "training_step_time": 0.10791301727294922 + }, + { + "epoch": 3.719940185546875e-05, + "model_forward_time": 0.024637222290039062, + "step": 24379 + }, + { + "epoch": 3.719940185546875e-05, + "step": 24379, + "training_step_time": 0.11005735397338867 + }, + { + "epoch": 3.7200927734375e-05, + "grad_norm": 0.19114813208580017, + "learning_rate": 9.291557101677784e-06, + "loss": 0.0033, + "step": 24380 + }, + { + "epoch": 3.7200927734375e-05, + "model_forward_time": 0.02460455894470215, + "step": 24380 + }, + { + "epoch": 3.7200927734375e-05, + "step": 24380, + "training_step_time": 0.10765218734741211 + }, + { + "epoch": 3.720245361328125e-05, + "model_forward_time": 0.023885726928710938, + "step": 24381 + }, + { + "epoch": 3.720245361328125e-05, + "step": 24381, + "training_step_time": 0.10705375671386719 + }, + { + "epoch": 3.72039794921875e-05, + "model_forward_time": 0.02405405044555664, + "step": 24382 + }, + { + "epoch": 3.72039794921875e-05, + "step": 24382, + "training_step_time": 0.10443878173828125 + }, + { + "epoch": 3.720550537109375e-05, + "model_forward_time": 0.02433943748474121, + "step": 24383 + }, + { + "epoch": 3.720550537109375e-05, + "step": 24383, + "training_step_time": 0.1049649715423584 + }, + { + "epoch": 3.720703125e-05, + "model_forward_time": 0.024621009826660156, + "step": 24384 + }, + { + "epoch": 3.720703125e-05, + "step": 24384, + "training_step_time": 0.10624170303344727 + }, + { + "epoch": 3.720855712890625e-05, + "model_forward_time": 0.024230003356933594, + "step": 24385 + }, + { + "epoch": 3.720855712890625e-05, + "step": 24385, + "training_step_time": 0.10561013221740723 + }, + { + "epoch": 3.72100830078125e-05, + "model_forward_time": 0.024133682250976562, + "step": 24386 + }, + { + "epoch": 3.72100830078125e-05, + "step": 24386, + "training_step_time": 0.18191909790039062 + }, + { + "epoch": 3.721160888671875e-05, + "model_forward_time": 0.02341628074645996, + "step": 24387 + }, + { + "epoch": 3.721160888671875e-05, + "step": 24387, + "training_step_time": 0.17897605895996094 + }, + { + "epoch": 3.7213134765625e-05, + "model_forward_time": 0.023186683654785156, + "step": 24388 + }, + { + "epoch": 3.7213134765625e-05, + "step": 24388, + "training_step_time": 0.10691070556640625 + }, + { + "epoch": 3.721466064453125e-05, + "model_forward_time": 0.023356914520263672, + "step": 24389 + }, + { + "epoch": 3.721466064453125e-05, + "step": 24389, + "training_step_time": 0.12597894668579102 + }, + { + "epoch": 3.72161865234375e-05, + "grad_norm": 0.08762897551059723, + "learning_rate": 9.259580135361929e-06, + "loss": 0.0032, + "step": 24390 + }, + { + "epoch": 3.72161865234375e-05, + "model_forward_time": 0.023990392684936523, + "step": 24390 + }, + { + "epoch": 3.72161865234375e-05, + "step": 24390, + "training_step_time": 0.10661005973815918 + }, + { + "epoch": 3.721771240234375e-05, + "model_forward_time": 0.02408909797668457, + "step": 24391 + }, + { + "epoch": 3.721771240234375e-05, + "step": 24391, + "training_step_time": 0.11261367797851562 + }, + { + "epoch": 3.721923828125e-05, + "model_forward_time": 0.023883819580078125, + "step": 24392 + }, + { + "epoch": 3.721923828125e-05, + "step": 24392, + "training_step_time": 0.1110224723815918 + }, + { + "epoch": 3.722076416015625e-05, + "model_forward_time": 0.024221420288085938, + "step": 24393 + }, + { + "epoch": 3.722076416015625e-05, + "step": 24393, + "training_step_time": 0.10687875747680664 + }, + { + "epoch": 3.72222900390625e-05, + "model_forward_time": 0.023711681365966797, + "step": 24394 + }, + { + "epoch": 3.72222900390625e-05, + "step": 24394, + "training_step_time": 0.10729122161865234 + }, + { + "epoch": 3.722381591796875e-05, + "model_forward_time": 0.024107694625854492, + "step": 24395 + }, + { + "epoch": 3.722381591796875e-05, + "step": 24395, + "training_step_time": 0.10476994514465332 + }, + { + "epoch": 3.7225341796875e-05, + "model_forward_time": 0.024468660354614258, + "step": 24396 + }, + { + "epoch": 3.7225341796875e-05, + "step": 24396, + "training_step_time": 0.10322022438049316 + }, + { + "epoch": 3.722686767578125e-05, + "model_forward_time": 0.024061203002929688, + "step": 24397 + }, + { + "epoch": 3.722686767578125e-05, + "step": 24397, + "training_step_time": 0.19710946083068848 + }, + { + "epoch": 3.72283935546875e-05, + "model_forward_time": 0.02508068084716797, + "step": 24398 + }, + { + "epoch": 3.72283935546875e-05, + "step": 24398, + "training_step_time": 0.21164894104003906 + }, + { + "epoch": 3.722991943359375e-05, + "model_forward_time": 0.023173809051513672, + "step": 24399 + }, + { + "epoch": 3.722991943359375e-05, + "step": 24399, + "training_step_time": 0.13428568840026855 + }, + { + "epoch": 3.72314453125e-05, + "grad_norm": 0.20159612596035004, + "learning_rate": 9.227652672497761e-06, + "loss": 0.0052, + "step": 24400 + }, + { + "epoch": 3.72314453125e-05, + "model_forward_time": 0.0236053466796875, + "step": 24400 + }, + { + "epoch": 3.72314453125e-05, + "step": 24400, + "training_step_time": 0.12832164764404297 + }, + { + "epoch": 3.723297119140625e-05, + "model_forward_time": 0.02382373809814453, + "step": 24401 + }, + { + "epoch": 3.723297119140625e-05, + "step": 24401, + "training_step_time": 0.1102597713470459 + }, + { + "epoch": 3.72344970703125e-05, + "model_forward_time": 0.024133920669555664, + "step": 24402 + }, + { + "epoch": 3.72344970703125e-05, + "step": 24402, + "training_step_time": 0.1150217056274414 + }, + { + "epoch": 3.723602294921875e-05, + "model_forward_time": 0.02417731285095215, + "step": 24403 + }, + { + "epoch": 3.723602294921875e-05, + "step": 24403, + "training_step_time": 0.2016441822052002 + }, + { + "epoch": 3.7237548828125e-05, + "model_forward_time": 0.024802446365356445, + "step": 24404 + }, + { + "epoch": 3.7237548828125e-05, + "step": 24404, + "training_step_time": 0.10185766220092773 + }, + { + "epoch": 3.723907470703125e-05, + "model_forward_time": 0.024893522262573242, + "step": 24405 + }, + { + "epoch": 3.723907470703125e-05, + "step": 24405, + "training_step_time": 0.1029655933380127 + }, + { + "epoch": 3.72406005859375e-05, + "model_forward_time": 0.025116920471191406, + "step": 24406 + }, + { + "epoch": 3.72406005859375e-05, + "step": 24406, + "training_step_time": 0.10986685752868652 + }, + { + "epoch": 3.724212646484375e-05, + "model_forward_time": 0.025323867797851562, + "step": 24407 + }, + { + "epoch": 3.724212646484375e-05, + "step": 24407, + "training_step_time": 0.10656118392944336 + }, + { + "epoch": 3.724365234375e-05, + "model_forward_time": 0.025153160095214844, + "step": 24408 + }, + { + "epoch": 3.724365234375e-05, + "step": 24408, + "training_step_time": 0.10987687110900879 + }, + { + "epoch": 3.724517822265625e-05, + "model_forward_time": 0.025244474411010742, + "step": 24409 + }, + { + "epoch": 3.724517822265625e-05, + "step": 24409, + "training_step_time": 0.10483217239379883 + }, + { + "epoch": 3.72467041015625e-05, + "grad_norm": 0.05615602433681488, + "learning_rate": 9.195774751880198e-06, + "loss": 0.0031, + "step": 24410 + }, + { + "epoch": 3.72467041015625e-05, + "model_forward_time": 0.02749037742614746, + "step": 24410 + }, + { + "epoch": 3.72467041015625e-05, + "step": 24410, + "training_step_time": 0.10707354545593262 + }, + { + "epoch": 3.724822998046875e-05, + "model_forward_time": 0.025300264358520508, + "step": 24411 + }, + { + "epoch": 3.724822998046875e-05, + "step": 24411, + "training_step_time": 0.10661149024963379 + }, + { + "epoch": 3.7249755859375e-05, + "model_forward_time": 0.024798870086669922, + "step": 24412 + }, + { + "epoch": 3.7249755859375e-05, + "step": 24412, + "training_step_time": 0.10559797286987305 + }, + { + "epoch": 3.725128173828125e-05, + "model_forward_time": 0.025254249572753906, + "step": 24413 + }, + { + "epoch": 3.725128173828125e-05, + "step": 24413, + "training_step_time": 0.10337686538696289 + }, + { + "epoch": 3.72528076171875e-05, + "model_forward_time": 0.024353742599487305, + "step": 24414 + }, + { + "epoch": 3.72528076171875e-05, + "step": 24414, + "training_step_time": 0.14342904090881348 + }, + { + "epoch": 3.725433349609375e-05, + "model_forward_time": 0.02440476417541504, + "step": 24415 + }, + { + "epoch": 3.725433349609375e-05, + "step": 24415, + "training_step_time": 0.1732161045074463 + }, + { + "epoch": 3.7255859375e-05, + "model_forward_time": 0.025637388229370117, + "step": 24416 + }, + { + "epoch": 3.7255859375e-05, + "step": 24416, + "training_step_time": 0.10689473152160645 + }, + { + "epoch": 3.725738525390625e-05, + "model_forward_time": 0.02557682991027832, + "step": 24417 + }, + { + "epoch": 3.725738525390625e-05, + "step": 24417, + "training_step_time": 0.13231158256530762 + }, + { + "epoch": 3.72589111328125e-05, + "model_forward_time": 0.025146007537841797, + "step": 24418 + }, + { + "epoch": 3.72589111328125e-05, + "step": 24418, + "training_step_time": 0.1983506679534912 + }, + { + "epoch": 3.726043701171875e-05, + "model_forward_time": 0.02427077293395996, + "step": 24419 + }, + { + "epoch": 3.726043701171875e-05, + "step": 24419, + "training_step_time": 0.10168337821960449 + }, + { + "epoch": 3.7261962890625e-05, + "grad_norm": 0.06053365767002106, + "learning_rate": 9.163946412243896e-06, + "loss": 0.0046, + "step": 24420 + }, + { + "epoch": 3.7261962890625e-05, + "model_forward_time": 0.023658275604248047, + "step": 24420 + }, + { + "epoch": 3.7261962890625e-05, + "step": 24420, + "training_step_time": 0.10434293746948242 + }, + { + "epoch": 3.726348876953125e-05, + "model_forward_time": 0.02411365509033203, + "step": 24421 + }, + { + "epoch": 3.726348876953125e-05, + "step": 24421, + "training_step_time": 0.10277986526489258 + }, + { + "epoch": 3.72650146484375e-05, + "model_forward_time": 0.02487945556640625, + "step": 24422 + }, + { + "epoch": 3.72650146484375e-05, + "step": 24422, + "training_step_time": 0.10518050193786621 + }, + { + "epoch": 3.726654052734375e-05, + "model_forward_time": 0.02506232261657715, + "step": 24423 + }, + { + "epoch": 3.726654052734375e-05, + "step": 24423, + "training_step_time": 0.1073160171508789 + }, + { + "epoch": 3.726806640625e-05, + "model_forward_time": 0.025388002395629883, + "step": 24424 + }, + { + "epoch": 3.726806640625e-05, + "step": 24424, + "training_step_time": 0.10558390617370605 + }, + { + "epoch": 3.726959228515625e-05, + "model_forward_time": 0.025506973266601562, + "step": 24425 + }, + { + "epoch": 3.726959228515625e-05, + "step": 24425, + "training_step_time": 0.10461640357971191 + }, + { + "epoch": 3.72711181640625e-05, + "model_forward_time": 0.02484297752380371, + "step": 24426 + }, + { + "epoch": 3.72711181640625e-05, + "step": 24426, + "training_step_time": 0.10875463485717773 + }, + { + "epoch": 3.727264404296875e-05, + "model_forward_time": 0.02524089813232422, + "step": 24427 + }, + { + "epoch": 3.727264404296875e-05, + "step": 24427, + "training_step_time": 0.10506010055541992 + }, + { + "epoch": 3.7274169921875e-05, + "model_forward_time": 0.025609731674194336, + "step": 24428 + }, + { + "epoch": 3.7274169921875e-05, + "step": 24428, + "training_step_time": 0.10543656349182129 + }, + { + "epoch": 3.727569580078125e-05, + "model_forward_time": 0.025098562240600586, + "step": 24429 + }, + { + "epoch": 3.727569580078125e-05, + "step": 24429, + "training_step_time": 0.11373162269592285 + }, + { + "epoch": 3.72772216796875e-05, + "grad_norm": 0.3164246678352356, + "learning_rate": 9.132167692263289e-06, + "loss": 0.0064, + "step": 24430 + }, + { + "epoch": 3.72772216796875e-05, + "model_forward_time": 0.025788307189941406, + "step": 24430 + }, + { + "epoch": 3.72772216796875e-05, + "step": 24430, + "training_step_time": 0.13280701637268066 + }, + { + "epoch": 3.727874755859375e-05, + "model_forward_time": 0.025031566619873047, + "step": 24431 + }, + { + "epoch": 3.727874755859375e-05, + "step": 24431, + "training_step_time": 0.13015151023864746 + }, + { + "epoch": 3.72802734375e-05, + "model_forward_time": 0.024872303009033203, + "step": 24432 + }, + { + "epoch": 3.72802734375e-05, + "step": 24432, + "training_step_time": 0.21679997444152832 + }, + { + "epoch": 3.728179931640625e-05, + "model_forward_time": 0.024282217025756836, + "step": 24433 + }, + { + "epoch": 3.728179931640625e-05, + "step": 24433, + "training_step_time": 0.13921308517456055 + }, + { + "epoch": 3.72833251953125e-05, + "model_forward_time": 0.024581193923950195, + "step": 24434 + }, + { + "epoch": 3.72833251953125e-05, + "step": 24434, + "training_step_time": 0.11746644973754883 + }, + { + "epoch": 3.728485107421875e-05, + "model_forward_time": 0.024698495864868164, + "step": 24435 + }, + { + "epoch": 3.728485107421875e-05, + "step": 24435, + "training_step_time": 0.11534976959228516 + }, + { + "epoch": 3.7286376953125e-05, + "model_forward_time": 0.02514052391052246, + "step": 24436 + }, + { + "epoch": 3.7286376953125e-05, + "step": 24436, + "training_step_time": 0.17078328132629395 + }, + { + "epoch": 3.728790283203125e-05, + "model_forward_time": 0.024447202682495117, + "step": 24437 + }, + { + "epoch": 3.728790283203125e-05, + "step": 24437, + "training_step_time": 0.1314399242401123 + }, + { + "epoch": 3.72894287109375e-05, + "model_forward_time": 0.0242919921875, + "step": 24438 + }, + { + "epoch": 3.72894287109375e-05, + "step": 24438, + "training_step_time": 0.1101677417755127 + }, + { + "epoch": 3.729095458984375e-05, + "model_forward_time": 0.025096654891967773, + "step": 24439 + }, + { + "epoch": 3.729095458984375e-05, + "step": 24439, + "training_step_time": 0.11149907112121582 + }, + { + "epoch": 3.729248046875e-05, + "grad_norm": 0.16114254295825958, + "learning_rate": 9.100438630552521e-06, + "loss": 0.0027, + "step": 24440 + }, + { + "epoch": 3.729248046875e-05, + "model_forward_time": 0.025304317474365234, + "step": 24440 + }, + { + "epoch": 3.729248046875e-05, + "step": 24440, + "training_step_time": 0.10672140121459961 + }, + { + "epoch": 3.729400634765625e-05, + "model_forward_time": 0.024938106536865234, + "step": 24441 + }, + { + "epoch": 3.729400634765625e-05, + "step": 24441, + "training_step_time": 0.10538053512573242 + }, + { + "epoch": 3.72955322265625e-05, + "model_forward_time": 0.025084495544433594, + "step": 24442 + }, + { + "epoch": 3.72955322265625e-05, + "step": 24442, + "training_step_time": 0.10523033142089844 + }, + { + "epoch": 3.729705810546875e-05, + "model_forward_time": 0.024934768676757812, + "step": 24443 + }, + { + "epoch": 3.729705810546875e-05, + "step": 24443, + "training_step_time": 0.11861705780029297 + }, + { + "epoch": 3.7298583984375e-05, + "model_forward_time": 0.024907588958740234, + "step": 24444 + }, + { + "epoch": 3.7298583984375e-05, + "step": 24444, + "training_step_time": 0.10475897789001465 + }, + { + "epoch": 3.730010986328125e-05, + "model_forward_time": 0.025435209274291992, + "step": 24445 + }, + { + "epoch": 3.730010986328125e-05, + "step": 24445, + "training_step_time": 0.12146592140197754 + }, + { + "epoch": 3.73016357421875e-05, + "model_forward_time": 0.024726152420043945, + "step": 24446 + }, + { + "epoch": 3.73016357421875e-05, + "step": 24446, + "training_step_time": 0.13011574745178223 + }, + { + "epoch": 3.730316162109375e-05, + "model_forward_time": 0.025139570236206055, + "step": 24447 + }, + { + "epoch": 3.730316162109375e-05, + "step": 24447, + "training_step_time": 0.1184072494506836 + }, + { + "epoch": 3.73046875e-05, + "model_forward_time": 0.025357723236083984, + "step": 24448 + }, + { + "epoch": 3.73046875e-05, + "step": 24448, + "training_step_time": 0.19695734977722168 + }, + { + "epoch": 3.730621337890625e-05, + "model_forward_time": 0.02479386329650879, + "step": 24449 + }, + { + "epoch": 3.730621337890625e-05, + "step": 24449, + "training_step_time": 0.13739514350891113 + }, + { + "epoch": 3.73077392578125e-05, + "grad_norm": 0.1747671365737915, + "learning_rate": 9.068759265665384e-06, + "loss": 0.0081, + "step": 24450 + }, + { + "epoch": 3.73077392578125e-05, + "model_forward_time": 0.02449202537536621, + "step": 24450 + }, + { + "epoch": 3.73077392578125e-05, + "step": 24450, + "training_step_time": 0.17432641983032227 + }, + { + "epoch": 3.730926513671875e-05, + "model_forward_time": 0.024774789810180664, + "step": 24451 + }, + { + "epoch": 3.730926513671875e-05, + "step": 24451, + "training_step_time": 0.10655570030212402 + }, + { + "epoch": 3.7310791015625e-05, + "model_forward_time": 0.024790525436401367, + "step": 24452 + }, + { + "epoch": 3.7310791015625e-05, + "step": 24452, + "training_step_time": 0.10491299629211426 + }, + { + "epoch": 3.731231689453125e-05, + "model_forward_time": 0.024010896682739258, + "step": 24453 + }, + { + "epoch": 3.731231689453125e-05, + "step": 24453, + "training_step_time": 0.10352134704589844 + }, + { + "epoch": 3.73138427734375e-05, + "model_forward_time": 0.02620077133178711, + "step": 24454 + }, + { + "epoch": 3.73138427734375e-05, + "step": 24454, + "training_step_time": 0.1097109317779541 + }, + { + "epoch": 3.731536865234375e-05, + "model_forward_time": 0.025884389877319336, + "step": 24455 + }, + { + "epoch": 3.731536865234375e-05, + "step": 24455, + "training_step_time": 0.10816216468811035 + }, + { + "epoch": 3.731689453125e-05, + "model_forward_time": 0.02610182762145996, + "step": 24456 + }, + { + "epoch": 3.731689453125e-05, + "step": 24456, + "training_step_time": 0.10862469673156738 + }, + { + "epoch": 3.731842041015625e-05, + "model_forward_time": 0.025356292724609375, + "step": 24457 + }, + { + "epoch": 3.731842041015625e-05, + "step": 24457, + "training_step_time": 0.1099553108215332 + }, + { + "epoch": 3.73199462890625e-05, + "model_forward_time": 0.025036096572875977, + "step": 24458 + }, + { + "epoch": 3.73199462890625e-05, + "step": 24458, + "training_step_time": 0.10505843162536621 + }, + { + "epoch": 3.732147216796875e-05, + "model_forward_time": 0.024212360382080078, + "step": 24459 + }, + { + "epoch": 3.732147216796875e-05, + "step": 24459, + "training_step_time": 0.14887213706970215 + }, + { + "epoch": 3.7322998046875e-05, + "grad_norm": 0.14061211049556732, + "learning_rate": 9.037129636095309e-06, + "loss": 0.006, + "step": 24460 + }, + { + "epoch": 3.7322998046875e-05, + "model_forward_time": 0.02426004409790039, + "step": 24460 + }, + { + "epoch": 3.7322998046875e-05, + "step": 24460, + "training_step_time": 0.15532779693603516 + }, + { + "epoch": 3.732452392578125e-05, + "model_forward_time": 0.026788949966430664, + "step": 24461 + }, + { + "epoch": 3.732452392578125e-05, + "step": 24461, + "training_step_time": 0.10747790336608887 + }, + { + "epoch": 3.73260498046875e-05, + "model_forward_time": 0.02523064613342285, + "step": 24462 + }, + { + "epoch": 3.73260498046875e-05, + "step": 24462, + "training_step_time": 0.14213895797729492 + }, + { + "epoch": 3.732757568359375e-05, + "model_forward_time": 0.02559971809387207, + "step": 24463 + }, + { + "epoch": 3.732757568359375e-05, + "step": 24463, + "training_step_time": 0.10694360733032227 + }, + { + "epoch": 3.73291015625e-05, + "model_forward_time": 0.02570652961730957, + "step": 24464 + }, + { + "epoch": 3.73291015625e-05, + "step": 24464, + "training_step_time": 0.11942481994628906 + }, + { + "epoch": 3.733062744140625e-05, + "model_forward_time": 0.02566814422607422, + "step": 24465 + }, + { + "epoch": 3.733062744140625e-05, + "step": 24465, + "training_step_time": 0.1065218448638916 + }, + { + "epoch": 3.73321533203125e-05, + "model_forward_time": 0.024989604949951172, + "step": 24466 + }, + { + "epoch": 3.73321533203125e-05, + "step": 24466, + "training_step_time": 0.10579180717468262 + }, + { + "epoch": 3.733367919921875e-05, + "model_forward_time": 0.025851011276245117, + "step": 24467 + }, + { + "epoch": 3.733367919921875e-05, + "step": 24467, + "training_step_time": 0.11101245880126953 + }, + { + "epoch": 3.7335205078125e-05, + "model_forward_time": 0.02535557746887207, + "step": 24468 + }, + { + "epoch": 3.7335205078125e-05, + "step": 24468, + "training_step_time": 0.10619950294494629 + }, + { + "epoch": 3.733673095703125e-05, + "model_forward_time": 0.02780938148498535, + "step": 24469 + }, + { + "epoch": 3.733673095703125e-05, + "step": 24469, + "training_step_time": 0.10766983032226562 + }, + { + "epoch": 3.73382568359375e-05, + "grad_norm": 0.08635987341403961, + "learning_rate": 9.005549780275263e-06, + "loss": 0.0076, + "step": 24470 + }, + { + "epoch": 3.73382568359375e-05, + "model_forward_time": 0.02531123161315918, + "step": 24470 + }, + { + "epoch": 3.73382568359375e-05, + "step": 24470, + "training_step_time": 0.10678935050964355 + }, + { + "epoch": 3.733978271484375e-05, + "model_forward_time": 0.025258302688598633, + "step": 24471 + }, + { + "epoch": 3.733978271484375e-05, + "step": 24471, + "training_step_time": 0.10660767555236816 + }, + { + "epoch": 3.734130859375e-05, + "model_forward_time": 0.024765729904174805, + "step": 24472 + }, + { + "epoch": 3.734130859375e-05, + "step": 24472, + "training_step_time": 0.10633611679077148 + }, + { + "epoch": 3.734283447265625e-05, + "model_forward_time": 0.02553105354309082, + "step": 24473 + }, + { + "epoch": 3.734283447265625e-05, + "step": 24473, + "training_step_time": 0.1060178279876709 + }, + { + "epoch": 3.73443603515625e-05, + "model_forward_time": 0.02529764175415039, + "step": 24474 + }, + { + "epoch": 3.73443603515625e-05, + "step": 24474, + "training_step_time": 0.10556530952453613 + }, + { + "epoch": 3.734588623046875e-05, + "model_forward_time": 0.025002717971801758, + "step": 24475 + }, + { + "epoch": 3.734588623046875e-05, + "step": 24475, + "training_step_time": 0.1084604263305664 + }, + { + "epoch": 3.7347412109375e-05, + "model_forward_time": 0.025008440017700195, + "step": 24476 + }, + { + "epoch": 3.7347412109375e-05, + "step": 24476, + "training_step_time": 0.11102771759033203 + }, + { + "epoch": 3.734893798828125e-05, + "model_forward_time": 0.02500605583190918, + "step": 24477 + }, + { + "epoch": 3.734893798828125e-05, + "step": 24477, + "training_step_time": 0.10927844047546387 + }, + { + "epoch": 3.73504638671875e-05, + "model_forward_time": 0.02552509307861328, + "step": 24478 + }, + { + "epoch": 3.73504638671875e-05, + "step": 24478, + "training_step_time": 0.11022305488586426 + }, + { + "epoch": 3.735198974609375e-05, + "model_forward_time": 0.024997472763061523, + "step": 24479 + }, + { + "epoch": 3.735198974609375e-05, + "step": 24479, + "training_step_time": 0.16833901405334473 + }, + { + "epoch": 3.7353515625e-05, + "grad_norm": 0.11926314234733582, + "learning_rate": 8.974019736577777e-06, + "loss": 0.0054, + "step": 24480 + }, + { + "epoch": 3.7353515625e-05, + "model_forward_time": 0.02425360679626465, + "step": 24480 + }, + { + "epoch": 3.7353515625e-05, + "step": 24480, + "training_step_time": 0.11188101768493652 + }, + { + "epoch": 3.735504150390625e-05, + "model_forward_time": 0.02490997314453125, + "step": 24481 + }, + { + "epoch": 3.735504150390625e-05, + "step": 24481, + "training_step_time": 0.10980653762817383 + }, + { + "epoch": 3.73565673828125e-05, + "model_forward_time": 0.025077342987060547, + "step": 24482 + }, + { + "epoch": 3.73565673828125e-05, + "step": 24482, + "training_step_time": 0.11562299728393555 + }, + { + "epoch": 3.735809326171875e-05, + "model_forward_time": 0.02558302879333496, + "step": 24483 + }, + { + "epoch": 3.735809326171875e-05, + "step": 24483, + "training_step_time": 0.11011409759521484 + }, + { + "epoch": 3.7359619140625e-05, + "model_forward_time": 0.024579286575317383, + "step": 24484 + }, + { + "epoch": 3.7359619140625e-05, + "step": 24484, + "training_step_time": 0.22366905212402344 + }, + { + "epoch": 3.736114501953125e-05, + "model_forward_time": 0.024477481842041016, + "step": 24485 + }, + { + "epoch": 3.736114501953125e-05, + "step": 24485, + "training_step_time": 0.10900163650512695 + }, + { + "epoch": 3.73626708984375e-05, + "model_forward_time": 0.024380922317504883, + "step": 24486 + }, + { + "epoch": 3.73626708984375e-05, + "step": 24486, + "training_step_time": 0.1042473316192627 + }, + { + "epoch": 3.736419677734375e-05, + "model_forward_time": 0.025431394577026367, + "step": 24487 + }, + { + "epoch": 3.736419677734375e-05, + "step": 24487, + "training_step_time": 0.10330891609191895 + }, + { + "epoch": 3.736572265625e-05, + "model_forward_time": 0.02518630027770996, + "step": 24488 + }, + { + "epoch": 3.736572265625e-05, + "step": 24488, + "training_step_time": 0.10445308685302734 + }, + { + "epoch": 3.736724853515625e-05, + "model_forward_time": 0.02534198760986328, + "step": 24489 + }, + { + "epoch": 3.736724853515625e-05, + "step": 24489, + "training_step_time": 0.2020111083984375 + }, + { + "epoch": 3.73687744140625e-05, + "grad_norm": 0.21885116398334503, + "learning_rate": 8.9425395433148e-06, + "loss": 0.0052, + "step": 24490 + }, + { + "epoch": 3.73687744140625e-05, + "model_forward_time": 0.025732040405273438, + "step": 24490 + }, + { + "epoch": 3.73687744140625e-05, + "step": 24490, + "training_step_time": 0.11340045928955078 + }, + { + "epoch": 3.737030029296875e-05, + "model_forward_time": 0.02470111846923828, + "step": 24491 + }, + { + "epoch": 3.737030029296875e-05, + "step": 24491, + "training_step_time": 0.14742755889892578 + }, + { + "epoch": 3.7371826171875e-05, + "model_forward_time": 0.027136802673339844, + "step": 24492 + }, + { + "epoch": 3.7371826171875e-05, + "step": 24492, + "training_step_time": 0.214522123336792 + }, + { + "epoch": 3.737335205078125e-05, + "model_forward_time": 0.024012327194213867, + "step": 24493 + }, + { + "epoch": 3.737335205078125e-05, + "step": 24493, + "training_step_time": 0.14451360702514648 + }, + { + "epoch": 3.73748779296875e-05, + "model_forward_time": 0.024347543716430664, + "step": 24494 + }, + { + "epoch": 3.73748779296875e-05, + "step": 24494, + "training_step_time": 0.17842626571655273 + }, + { + "epoch": 3.737640380859375e-05, + "model_forward_time": 0.024528980255126953, + "step": 24495 + }, + { + "epoch": 3.737640380859375e-05, + "step": 24495, + "training_step_time": 0.13213634490966797 + }, + { + "epoch": 3.73779296875e-05, + "model_forward_time": 0.02420806884765625, + "step": 24496 + }, + { + "epoch": 3.73779296875e-05, + "step": 24496, + "training_step_time": 0.11410212516784668 + }, + { + "epoch": 3.737945556640625e-05, + "model_forward_time": 0.02490687370300293, + "step": 24497 + }, + { + "epoch": 3.737945556640625e-05, + "step": 24497, + "training_step_time": 0.11078190803527832 + }, + { + "epoch": 3.73809814453125e-05, + "model_forward_time": 0.02488422393798828, + "step": 24498 + }, + { + "epoch": 3.73809814453125e-05, + "step": 24498, + "training_step_time": 0.1129605770111084 + }, + { + "epoch": 3.738250732421875e-05, + "model_forward_time": 0.02515125274658203, + "step": 24499 + }, + { + "epoch": 3.738250732421875e-05, + "step": 24499, + "training_step_time": 0.11073613166809082 + }, + { + "epoch": 3.7384033203125e-05, + "grad_norm": 0.20034775137901306, + "learning_rate": 8.911109238737747e-06, + "loss": 0.0062, + "step": 24500 + }, + { + "epoch": 3.7384033203125e-05, + "model_forward_time": 0.025101661682128906, + "step": 24500 + }, + { + "epoch": 3.7384033203125e-05, + "step": 24500, + "training_step_time": 0.11008238792419434 + }, + { + "epoch": 3.738555908203125e-05, + "model_forward_time": 0.025507688522338867, + "step": 24501 + }, + { + "epoch": 3.738555908203125e-05, + "step": 24501, + "training_step_time": 0.10880517959594727 + }, + { + "epoch": 3.73870849609375e-05, + "model_forward_time": 0.025183439254760742, + "step": 24502 + }, + { + "epoch": 3.73870849609375e-05, + "step": 24502, + "training_step_time": 0.11184215545654297 + }, + { + "epoch": 3.738861083984375e-05, + "model_forward_time": 0.025285959243774414, + "step": 24503 + }, + { + "epoch": 3.738861083984375e-05, + "step": 24503, + "training_step_time": 0.11021113395690918 + }, + { + "epoch": 3.739013671875e-05, + "model_forward_time": 0.025157690048217773, + "step": 24504 + }, + { + "epoch": 3.739013671875e-05, + "step": 24504, + "training_step_time": 0.10738492012023926 + }, + { + "epoch": 3.739166259765625e-05, + "model_forward_time": 0.025032758712768555, + "step": 24505 + }, + { + "epoch": 3.739166259765625e-05, + "step": 24505, + "training_step_time": 0.11628198623657227 + }, + { + "epoch": 3.73931884765625e-05, + "model_forward_time": 0.026024341583251953, + "step": 24506 + }, + { + "epoch": 3.73931884765625e-05, + "step": 24506, + "training_step_time": 0.161360502243042 + }, + { + "epoch": 3.739471435546875e-05, + "model_forward_time": 0.024524450302124023, + "step": 24507 + }, + { + "epoch": 3.739471435546875e-05, + "step": 24507, + "training_step_time": 0.13050246238708496 + }, + { + "epoch": 3.7396240234375e-05, + "model_forward_time": 0.024622678756713867, + "step": 24508 + }, + { + "epoch": 3.7396240234375e-05, + "step": 24508, + "training_step_time": 0.1219339370727539 + }, + { + "epoch": 3.739776611328125e-05, + "model_forward_time": 0.02512359619140625, + "step": 24509 + }, + { + "epoch": 3.739776611328125e-05, + "step": 24509, + "training_step_time": 0.10653972625732422 + }, + { + "epoch": 3.73992919921875e-05, + "grad_norm": 0.19504018127918243, + "learning_rate": 8.879728861037384e-06, + "loss": 0.0033, + "step": 24510 + }, + { + "epoch": 3.73992919921875e-05, + "model_forward_time": 0.026160478591918945, + "step": 24510 + }, + { + "epoch": 3.73992919921875e-05, + "step": 24510, + "training_step_time": 0.13931560516357422 + }, + { + "epoch": 3.740081787109375e-05, + "model_forward_time": 0.0252988338470459, + "step": 24511 + }, + { + "epoch": 3.740081787109375e-05, + "step": 24511, + "training_step_time": 0.15154314041137695 + }, + { + "epoch": 3.740234375e-05, + "model_forward_time": 0.024866819381713867, + "step": 24512 + }, + { + "epoch": 3.740234375e-05, + "step": 24512, + "training_step_time": 0.14258217811584473 + }, + { + "epoch": 3.740386962890625e-05, + "model_forward_time": 0.027960777282714844, + "step": 24513 + }, + { + "epoch": 3.740386962890625e-05, + "step": 24513, + "training_step_time": 0.1492781639099121 + }, + { + "epoch": 3.74053955078125e-05, + "model_forward_time": 0.024449586868286133, + "step": 24514 + }, + { + "epoch": 3.74053955078125e-05, + "step": 24514, + "training_step_time": 0.13181233406066895 + }, + { + "epoch": 3.740692138671875e-05, + "model_forward_time": 0.02438640594482422, + "step": 24515 + }, + { + "epoch": 3.740692138671875e-05, + "step": 24515, + "training_step_time": 0.12447261810302734 + }, + { + "epoch": 3.7408447265625e-05, + "model_forward_time": 0.024675607681274414, + "step": 24516 + }, + { + "epoch": 3.7408447265625e-05, + "step": 24516, + "training_step_time": 0.12322807312011719 + }, + { + "epoch": 3.740997314453125e-05, + "model_forward_time": 0.025222063064575195, + "step": 24517 + }, + { + "epoch": 3.740997314453125e-05, + "step": 24517, + "training_step_time": 0.11993098258972168 + }, + { + "epoch": 3.74114990234375e-05, + "model_forward_time": 0.02517843246459961, + "step": 24518 + }, + { + "epoch": 3.74114990234375e-05, + "step": 24518, + "training_step_time": 0.11707425117492676 + }, + { + "epoch": 3.741302490234375e-05, + "model_forward_time": 0.02510547637939453, + "step": 24519 + }, + { + "epoch": 3.741302490234375e-05, + "step": 24519, + "training_step_time": 0.11530876159667969 + }, + { + "epoch": 3.741455078125e-05, + "grad_norm": 0.12681689858436584, + "learning_rate": 8.848398448343859e-06, + "loss": 0.0087, + "step": 24520 + }, + { + "epoch": 3.741455078125e-05, + "model_forward_time": 0.02576756477355957, + "step": 24520 + }, + { + "epoch": 3.741455078125e-05, + "step": 24520, + "training_step_time": 0.11209583282470703 + }, + { + "epoch": 3.741607666015625e-05, + "model_forward_time": 0.025216341018676758, + "step": 24521 + }, + { + "epoch": 3.741607666015625e-05, + "step": 24521, + "training_step_time": 0.10951066017150879 + }, + { + "epoch": 3.74176025390625e-05, + "model_forward_time": 0.024831295013427734, + "step": 24522 + }, + { + "epoch": 3.74176025390625e-05, + "step": 24522, + "training_step_time": 0.11006712913513184 + }, + { + "epoch": 3.741912841796875e-05, + "model_forward_time": 0.02495098114013672, + "step": 24523 + }, + { + "epoch": 3.741912841796875e-05, + "step": 24523, + "training_step_time": 0.19521474838256836 + }, + { + "epoch": 3.7420654296875e-05, + "model_forward_time": 0.024242639541625977, + "step": 24524 + }, + { + "epoch": 3.7420654296875e-05, + "step": 24524, + "training_step_time": 0.12537288665771484 + }, + { + "epoch": 3.742218017578125e-05, + "model_forward_time": 0.02432560920715332, + "step": 24525 + }, + { + "epoch": 3.742218017578125e-05, + "step": 24525, + "training_step_time": 0.12476468086242676 + }, + { + "epoch": 3.74237060546875e-05, + "model_forward_time": 0.025181293487548828, + "step": 24526 + }, + { + "epoch": 3.74237060546875e-05, + "step": 24526, + "training_step_time": 0.11827707290649414 + }, + { + "epoch": 3.742523193359375e-05, + "model_forward_time": 0.02495574951171875, + "step": 24527 + }, + { + "epoch": 3.742523193359375e-05, + "step": 24527, + "training_step_time": 0.1655290126800537 + }, + { + "epoch": 3.74267578125e-05, + "model_forward_time": 0.024499177932739258, + "step": 24528 + }, + { + "epoch": 3.74267578125e-05, + "step": 24528, + "training_step_time": 0.12804722785949707 + }, + { + "epoch": 3.742828369140625e-05, + "model_forward_time": 0.024279356002807617, + "step": 24529 + }, + { + "epoch": 3.742828369140625e-05, + "step": 24529, + "training_step_time": 0.10935354232788086 + }, + { + "epoch": 3.74298095703125e-05, + "grad_norm": 0.09753312170505524, + "learning_rate": 8.817118038726558e-06, + "loss": 0.0037, + "step": 24530 + }, + { + "epoch": 3.74298095703125e-05, + "model_forward_time": 0.025224924087524414, + "step": 24530 + }, + { + "epoch": 3.74298095703125e-05, + "step": 24530, + "training_step_time": 0.10779190063476562 + }, + { + "epoch": 3.743133544921875e-05, + "model_forward_time": 0.025062084197998047, + "step": 24531 + }, + { + "epoch": 3.743133544921875e-05, + "step": 24531, + "training_step_time": 0.10641813278198242 + }, + { + "epoch": 3.7432861328125e-05, + "model_forward_time": 0.024801015853881836, + "step": 24532 + }, + { + "epoch": 3.7432861328125e-05, + "step": 24532, + "training_step_time": 0.10737800598144531 + }, + { + "epoch": 3.743438720703125e-05, + "model_forward_time": 0.025140762329101562, + "step": 24533 + }, + { + "epoch": 3.743438720703125e-05, + "step": 24533, + "training_step_time": 0.20610356330871582 + }, + { + "epoch": 3.74359130859375e-05, + "model_forward_time": 0.024512767791748047, + "step": 24534 + }, + { + "epoch": 3.74359130859375e-05, + "step": 24534, + "training_step_time": 0.18795490264892578 + }, + { + "epoch": 3.743743896484375e-05, + "model_forward_time": 0.024355173110961914, + "step": 24535 + }, + { + "epoch": 3.743743896484375e-05, + "step": 24535, + "training_step_time": 0.16538214683532715 + }, + { + "epoch": 3.743896484375e-05, + "model_forward_time": 0.024393320083618164, + "step": 24536 + }, + { + "epoch": 3.743896484375e-05, + "step": 24536, + "training_step_time": 0.1935722827911377 + }, + { + "epoch": 3.744049072265625e-05, + "model_forward_time": 0.024545907974243164, + "step": 24537 + }, + { + "epoch": 3.744049072265625e-05, + "step": 24537, + "training_step_time": 0.17475152015686035 + }, + { + "epoch": 3.74420166015625e-05, + "model_forward_time": 0.024323701858520508, + "step": 24538 + }, + { + "epoch": 3.74420166015625e-05, + "step": 24538, + "training_step_time": 0.13945984840393066 + }, + { + "epoch": 3.744354248046875e-05, + "model_forward_time": 0.02463841438293457, + "step": 24539 + }, + { + "epoch": 3.744354248046875e-05, + "step": 24539, + "training_step_time": 0.11001372337341309 + }, + { + "epoch": 3.7445068359375e-05, + "grad_norm": 0.2660139799118042, + "learning_rate": 8.785887670194138e-06, + "loss": 0.0046, + "step": 24540 + }, + { + "epoch": 3.7445068359375e-05, + "model_forward_time": 0.024878501892089844, + "step": 24540 + }, + { + "epoch": 3.7445068359375e-05, + "step": 24540, + "training_step_time": 0.11049199104309082 + }, + { + "epoch": 3.744659423828125e-05, + "model_forward_time": 0.0254669189453125, + "step": 24541 + }, + { + "epoch": 3.744659423828125e-05, + "step": 24541, + "training_step_time": 0.10457706451416016 + }, + { + "epoch": 3.74481201171875e-05, + "model_forward_time": 0.02501678466796875, + "step": 24542 + }, + { + "epoch": 3.74481201171875e-05, + "step": 24542, + "training_step_time": 0.10866785049438477 + }, + { + "epoch": 3.744964599609375e-05, + "model_forward_time": 0.02570652961730957, + "step": 24543 + }, + { + "epoch": 3.744964599609375e-05, + "step": 24543, + "training_step_time": 0.10526847839355469 + }, + { + "epoch": 3.7451171875e-05, + "model_forward_time": 0.025527000427246094, + "step": 24544 + }, + { + "epoch": 3.7451171875e-05, + "step": 24544, + "training_step_time": 0.10568881034851074 + }, + { + "epoch": 3.745269775390625e-05, + "model_forward_time": 0.025386810302734375, + "step": 24545 + }, + { + "epoch": 3.745269775390625e-05, + "step": 24545, + "training_step_time": 0.10553574562072754 + }, + { + "epoch": 3.74542236328125e-05, + "model_forward_time": 0.02509903907775879, + "step": 24546 + }, + { + "epoch": 3.74542236328125e-05, + "step": 24546, + "training_step_time": 0.10498642921447754 + }, + { + "epoch": 3.745574951171875e-05, + "model_forward_time": 0.025448322296142578, + "step": 24547 + }, + { + "epoch": 3.745574951171875e-05, + "step": 24547, + "training_step_time": 0.10721015930175781 + }, + { + "epoch": 3.7457275390625e-05, + "model_forward_time": 0.024565458297729492, + "step": 24548 + }, + { + "epoch": 3.7457275390625e-05, + "step": 24548, + "training_step_time": 0.11133456230163574 + }, + { + "epoch": 3.745880126953125e-05, + "model_forward_time": 0.026674509048461914, + "step": 24549 + }, + { + "epoch": 3.745880126953125e-05, + "step": 24549, + "training_step_time": 0.10602760314941406 + }, + { + "epoch": 3.74603271484375e-05, + "grad_norm": 0.17690864205360413, + "learning_rate": 8.754707380694427e-06, + "loss": 0.0065, + "step": 24550 + }, + { + "epoch": 3.74603271484375e-05, + "model_forward_time": 0.02669501304626465, + "step": 24550 + }, + { + "epoch": 3.74603271484375e-05, + "step": 24550, + "training_step_time": 0.18921208381652832 + }, + { + "epoch": 3.746185302734375e-05, + "model_forward_time": 0.02469015121459961, + "step": 24551 + }, + { + "epoch": 3.746185302734375e-05, + "step": 24551, + "training_step_time": 0.14371609687805176 + }, + { + "epoch": 3.746337890625e-05, + "model_forward_time": 0.024575471878051758, + "step": 24552 + }, + { + "epoch": 3.746337890625e-05, + "step": 24552, + "training_step_time": 0.19556093215942383 + }, + { + "epoch": 3.746490478515625e-05, + "model_forward_time": 0.024895191192626953, + "step": 24553 + }, + { + "epoch": 3.746490478515625e-05, + "step": 24553, + "training_step_time": 0.10324525833129883 + }, + { + "epoch": 3.74664306640625e-05, + "model_forward_time": 0.024805307388305664, + "step": 24554 + }, + { + "epoch": 3.74664306640625e-05, + "step": 24554, + "training_step_time": 0.10402107238769531 + }, + { + "epoch": 3.746795654296875e-05, + "model_forward_time": 0.025465965270996094, + "step": 24555 + }, + { + "epoch": 3.746795654296875e-05, + "step": 24555, + "training_step_time": 0.10894393920898438 + }, + { + "epoch": 3.7469482421875e-05, + "model_forward_time": 0.02540302276611328, + "step": 24556 + }, + { + "epoch": 3.7469482421875e-05, + "step": 24556, + "training_step_time": 0.10485053062438965 + }, + { + "epoch": 3.747100830078125e-05, + "model_forward_time": 0.02529740333557129, + "step": 24557 + }, + { + "epoch": 3.747100830078125e-05, + "step": 24557, + "training_step_time": 0.10475683212280273 + }, + { + "epoch": 3.74725341796875e-05, + "model_forward_time": 0.025225400924682617, + "step": 24558 + }, + { + "epoch": 3.74725341796875e-05, + "step": 24558, + "training_step_time": 0.10404729843139648 + }, + { + "epoch": 3.747406005859375e-05, + "model_forward_time": 0.0252072811126709, + "step": 24559 + }, + { + "epoch": 3.747406005859375e-05, + "step": 24559, + "training_step_time": 0.10393953323364258 + }, + { + "epoch": 3.74755859375e-05, + "grad_norm": 0.07185178995132446, + "learning_rate": 8.723577208114419e-06, + "loss": 0.0031, + "step": 24560 + }, + { + "epoch": 3.74755859375e-05, + "model_forward_time": 0.02567744255065918, + "step": 24560 + }, + { + "epoch": 3.74755859375e-05, + "step": 24560, + "training_step_time": 0.1043844223022461 + }, + { + "epoch": 3.747711181640625e-05, + "model_forward_time": 0.02534794807434082, + "step": 24561 + }, + { + "epoch": 3.747711181640625e-05, + "step": 24561, + "training_step_time": 0.10820889472961426 + }, + { + "epoch": 3.74786376953125e-05, + "model_forward_time": 0.0249326229095459, + "step": 24562 + }, + { + "epoch": 3.74786376953125e-05, + "step": 24562, + "training_step_time": 0.10959291458129883 + }, + { + "epoch": 3.748016357421875e-05, + "model_forward_time": 0.025355100631713867, + "step": 24563 + }, + { + "epoch": 3.748016357421875e-05, + "step": 24563, + "training_step_time": 0.10719919204711914 + }, + { + "epoch": 3.7481689453125e-05, + "model_forward_time": 0.02827930450439453, + "step": 24564 + }, + { + "epoch": 3.7481689453125e-05, + "step": 24564, + "training_step_time": 0.1087489128112793 + }, + { + "epoch": 3.748321533203125e-05, + "model_forward_time": 0.02530670166015625, + "step": 24565 + }, + { + "epoch": 3.748321533203125e-05, + "step": 24565, + "training_step_time": 0.10726213455200195 + }, + { + "epoch": 3.74847412109375e-05, + "model_forward_time": 0.024993896484375, + "step": 24566 + }, + { + "epoch": 3.74847412109375e-05, + "step": 24566, + "training_step_time": 0.1052238941192627 + }, + { + "epoch": 3.748626708984375e-05, + "model_forward_time": 0.02536606788635254, + "step": 24567 + }, + { + "epoch": 3.748626708984375e-05, + "step": 24567, + "training_step_time": 0.1067500114440918 + }, + { + "epoch": 3.748779296875e-05, + "model_forward_time": 0.024975299835205078, + "step": 24568 + }, + { + "epoch": 3.748779296875e-05, + "step": 24568, + "training_step_time": 0.1979060173034668 + }, + { + "epoch": 3.748931884765625e-05, + "model_forward_time": 0.024240970611572266, + "step": 24569 + }, + { + "epoch": 3.748931884765625e-05, + "step": 24569, + "training_step_time": 0.12394356727600098 + }, + { + "epoch": 3.74908447265625e-05, + "grad_norm": 0.0854315385222435, + "learning_rate": 8.692497190280224e-06, + "loss": 0.0034, + "step": 24570 + }, + { + "epoch": 3.74908447265625e-05, + "model_forward_time": 0.02436995506286621, + "step": 24570 + }, + { + "epoch": 3.74908447265625e-05, + "step": 24570, + "training_step_time": 0.11355400085449219 + }, + { + "epoch": 3.749237060546875e-05, + "model_forward_time": 0.025146007537841797, + "step": 24571 + }, + { + "epoch": 3.749237060546875e-05, + "step": 24571, + "training_step_time": 0.12517523765563965 + }, + { + "epoch": 3.7493896484375e-05, + "model_forward_time": 0.02553391456604004, + "step": 24572 + }, + { + "epoch": 3.7493896484375e-05, + "step": 24572, + "training_step_time": 0.1638336181640625 + }, + { + "epoch": 3.749542236328125e-05, + "model_forward_time": 0.02447676658630371, + "step": 24573 + }, + { + "epoch": 3.749542236328125e-05, + "step": 24573, + "training_step_time": 0.1622469425201416 + }, + { + "epoch": 3.74969482421875e-05, + "model_forward_time": 0.024545669555664062, + "step": 24574 + }, + { + "epoch": 3.74969482421875e-05, + "step": 24574, + "training_step_time": 0.11072659492492676 + }, + { + "epoch": 3.749847412109375e-05, + "model_forward_time": 0.024540424346923828, + "step": 24575 + }, + { + "epoch": 3.749847412109375e-05, + "step": 24575, + "training_step_time": 0.10304594039916992 + }, + { + "epoch": 3.75e-05, + "model_forward_time": 0.02496647834777832, + "step": 24576 + }, + { + "epoch": 3.75e-05, + "step": 24576, + "training_step_time": 0.10583209991455078 + }, + { + "epoch": 3.750152587890625e-05, + "model_forward_time": 0.0249941349029541, + "step": 24577 + }, + { + "epoch": 3.750152587890625e-05, + "step": 24577, + "training_step_time": 0.10685205459594727 + }, + { + "epoch": 3.75030517578125e-05, + "model_forward_time": 0.025499582290649414, + "step": 24578 + }, + { + "epoch": 3.75030517578125e-05, + "step": 24578, + "training_step_time": 0.2000105381011963 + }, + { + "epoch": 3.750457763671875e-05, + "model_forward_time": 0.02456212043762207, + "step": 24579 + }, + { + "epoch": 3.750457763671875e-05, + "step": 24579, + "training_step_time": 0.1266946792602539 + }, + { + "epoch": 3.7506103515625e-05, + "grad_norm": 0.30930033326148987, + "learning_rate": 8.661467364956993e-06, + "loss": 0.0062, + "step": 24580 + }, + { + "epoch": 3.7506103515625e-05, + "model_forward_time": 0.02426314353942871, + "step": 24580 + }, + { + "epoch": 3.7506103515625e-05, + "step": 24580, + "training_step_time": 0.2139286994934082 + }, + { + "epoch": 3.750762939453125e-05, + "model_forward_time": 0.024489641189575195, + "step": 24581 + }, + { + "epoch": 3.750762939453125e-05, + "step": 24581, + "training_step_time": 0.13774561882019043 + }, + { + "epoch": 3.75091552734375e-05, + "model_forward_time": 0.024532318115234375, + "step": 24582 + }, + { + "epoch": 3.75091552734375e-05, + "step": 24582, + "training_step_time": 0.11119675636291504 + }, + { + "epoch": 3.751068115234375e-05, + "model_forward_time": 0.02542281150817871, + "step": 24583 + }, + { + "epoch": 3.751068115234375e-05, + "step": 24583, + "training_step_time": 0.10898995399475098 + }, + { + "epoch": 3.751220703125e-05, + "model_forward_time": 0.0253450870513916, + "step": 24584 + }, + { + "epoch": 3.751220703125e-05, + "step": 24584, + "training_step_time": 0.19411587715148926 + }, + { + "epoch": 3.751373291015625e-05, + "model_forward_time": 0.0246124267578125, + "step": 24585 + }, + { + "epoch": 3.751373291015625e-05, + "step": 24585, + "training_step_time": 0.10469555854797363 + }, + { + "epoch": 3.75152587890625e-05, + "model_forward_time": 0.024412155151367188, + "step": 24586 + }, + { + "epoch": 3.75152587890625e-05, + "step": 24586, + "training_step_time": 0.10247659683227539 + }, + { + "epoch": 3.751678466796875e-05, + "model_forward_time": 0.024933815002441406, + "step": 24587 + }, + { + "epoch": 3.751678466796875e-05, + "step": 24587, + "training_step_time": 0.10512018203735352 + }, + { + "epoch": 3.7518310546875e-05, + "model_forward_time": 0.0258939266204834, + "step": 24588 + }, + { + "epoch": 3.7518310546875e-05, + "step": 24588, + "training_step_time": 0.10527467727661133 + }, + { + "epoch": 3.751983642578125e-05, + "model_forward_time": 0.02485203742980957, + "step": 24589 + }, + { + "epoch": 3.751983642578125e-05, + "step": 24589, + "training_step_time": 0.10344696044921875 + }, + { + "epoch": 3.75213623046875e-05, + "grad_norm": 0.07855116575956345, + "learning_rate": 8.630487769848877e-06, + "loss": 0.0042, + "step": 24590 + }, + { + "epoch": 3.75213623046875e-05, + "model_forward_time": 0.025447845458984375, + "step": 24590 + }, + { + "epoch": 3.75213623046875e-05, + "step": 24590, + "training_step_time": 0.10543131828308105 + }, + { + "epoch": 3.752288818359375e-05, + "model_forward_time": 0.02537369728088379, + "step": 24591 + }, + { + "epoch": 3.752288818359375e-05, + "step": 24591, + "training_step_time": 0.1058201789855957 + }, + { + "epoch": 3.75244140625e-05, + "model_forward_time": 0.02550053596496582, + "step": 24592 + }, + { + "epoch": 3.75244140625e-05, + "step": 24592, + "training_step_time": 0.10565638542175293 + }, + { + "epoch": 3.752593994140625e-05, + "model_forward_time": 0.027561664581298828, + "step": 24593 + }, + { + "epoch": 3.752593994140625e-05, + "step": 24593, + "training_step_time": 0.10858559608459473 + }, + { + "epoch": 3.75274658203125e-05, + "model_forward_time": 0.025360107421875, + "step": 24594 + }, + { + "epoch": 3.75274658203125e-05, + "step": 24594, + "training_step_time": 0.10986328125 + }, + { + "epoch": 3.752899169921875e-05, + "model_forward_time": 0.025528669357299805, + "step": 24595 + }, + { + "epoch": 3.752899169921875e-05, + "step": 24595, + "training_step_time": 0.11755108833312988 + }, + { + "epoch": 3.7530517578125e-05, + "model_forward_time": 0.025484800338745117, + "step": 24596 + }, + { + "epoch": 3.7530517578125e-05, + "step": 24596, + "training_step_time": 0.11875677108764648 + }, + { + "epoch": 3.753204345703125e-05, + "model_forward_time": 0.025776147842407227, + "step": 24597 + }, + { + "epoch": 3.753204345703125e-05, + "step": 24597, + "training_step_time": 0.11156797409057617 + }, + { + "epoch": 3.75335693359375e-05, + "model_forward_time": 0.02546834945678711, + "step": 24598 + }, + { + "epoch": 3.75335693359375e-05, + "step": 24598, + "training_step_time": 0.12378120422363281 + }, + { + "epoch": 3.753509521484375e-05, + "model_forward_time": 0.025303125381469727, + "step": 24599 + }, + { + "epoch": 3.753509521484375e-05, + "step": 24599, + "training_step_time": 0.11845684051513672 + }, + { + "epoch": 3.753662109375e-05, + "grad_norm": 0.11178535968065262, + "learning_rate": 8.599558442598998e-06, + "loss": 0.0046, + "step": 24600 + }, + { + "epoch": 3.753662109375e-05, + "model_forward_time": 0.025345325469970703, + "step": 24600 + }, + { + "epoch": 3.753662109375e-05, + "step": 24600, + "training_step_time": 0.10921835899353027 + }, + { + "epoch": 3.753814697265625e-05, + "model_forward_time": 0.02861499786376953, + "step": 24601 + }, + { + "epoch": 3.753814697265625e-05, + "step": 24601, + "training_step_time": 0.1116337776184082 + }, + { + "epoch": 3.75396728515625e-05, + "model_forward_time": 0.025134801864624023, + "step": 24602 + }, + { + "epoch": 3.75396728515625e-05, + "step": 24602, + "training_step_time": 0.11266422271728516 + }, + { + "epoch": 3.754119873046875e-05, + "model_forward_time": 0.02392745018005371, + "step": 24603 + }, + { + "epoch": 3.754119873046875e-05, + "step": 24603, + "training_step_time": 0.10880923271179199 + }, + { + "epoch": 3.7542724609375e-05, + "model_forward_time": 0.024075984954833984, + "step": 24604 + }, + { + "epoch": 3.7542724609375e-05, + "step": 24604, + "training_step_time": 0.10943150520324707 + }, + { + "epoch": 3.754425048828125e-05, + "model_forward_time": 0.025032997131347656, + "step": 24605 + }, + { + "epoch": 3.754425048828125e-05, + "step": 24605, + "training_step_time": 0.10854268074035645 + }, + { + "epoch": 3.75457763671875e-05, + "model_forward_time": 0.02542591094970703, + "step": 24606 + }, + { + "epoch": 3.75457763671875e-05, + "step": 24606, + "training_step_time": 0.11033391952514648 + }, + { + "epoch": 3.754730224609375e-05, + "model_forward_time": 0.0251157283782959, + "step": 24607 + }, + { + "epoch": 3.754730224609375e-05, + "step": 24607, + "training_step_time": 0.10775351524353027 + }, + { + "epoch": 3.7548828125e-05, + "model_forward_time": 0.025029420852661133, + "step": 24608 + }, + { + "epoch": 3.7548828125e-05, + "step": 24608, + "training_step_time": 0.10602116584777832 + }, + { + "epoch": 3.755035400390625e-05, + "model_forward_time": 0.025055885314941406, + "step": 24609 + }, + { + "epoch": 3.755035400390625e-05, + "step": 24609, + "training_step_time": 0.10934710502624512 + }, + { + "epoch": 3.75518798828125e-05, + "grad_norm": 0.09628903865814209, + "learning_rate": 8.568679420789433e-06, + "loss": 0.01, + "step": 24610 + }, + { + "epoch": 3.75518798828125e-05, + "model_forward_time": 0.024932861328125, + "step": 24610 + }, + { + "epoch": 3.75518798828125e-05, + "step": 24610, + "training_step_time": 0.10542559623718262 + }, + { + "epoch": 3.755340576171875e-05, + "model_forward_time": 0.02511429786682129, + "step": 24611 + }, + { + "epoch": 3.755340576171875e-05, + "step": 24611, + "training_step_time": 0.10642290115356445 + }, + { + "epoch": 3.7554931640625e-05, + "model_forward_time": 0.027379274368286133, + "step": 24612 + }, + { + "epoch": 3.7554931640625e-05, + "step": 24612, + "training_step_time": 0.10973906517028809 + }, + { + "epoch": 3.755645751953125e-05, + "model_forward_time": 0.02495551109313965, + "step": 24613 + }, + { + "epoch": 3.755645751953125e-05, + "step": 24613, + "training_step_time": 0.10364937782287598 + }, + { + "epoch": 3.75579833984375e-05, + "model_forward_time": 0.025000333786010742, + "step": 24614 + }, + { + "epoch": 3.75579833984375e-05, + "step": 24614, + "training_step_time": 0.10458111763000488 + }, + { + "epoch": 3.755950927734375e-05, + "model_forward_time": 0.02641916275024414, + "step": 24615 + }, + { + "epoch": 3.755950927734375e-05, + "step": 24615, + "training_step_time": 0.15184259414672852 + }, + { + "epoch": 3.756103515625e-05, + "model_forward_time": 0.024826526641845703, + "step": 24616 + }, + { + "epoch": 3.756103515625e-05, + "step": 24616, + "training_step_time": 0.11342358589172363 + }, + { + "epoch": 3.756256103515625e-05, + "model_forward_time": 0.024573564529418945, + "step": 24617 + }, + { + "epoch": 3.756256103515625e-05, + "step": 24617, + "training_step_time": 0.12888240814208984 + }, + { + "epoch": 3.75640869140625e-05, + "model_forward_time": 0.025684833526611328, + "step": 24618 + }, + { + "epoch": 3.75640869140625e-05, + "step": 24618, + "training_step_time": 0.1049187183380127 + }, + { + "epoch": 3.756561279296875e-05, + "model_forward_time": 0.02555990219116211, + "step": 24619 + }, + { + "epoch": 3.756561279296875e-05, + "step": 24619, + "training_step_time": 0.18158793449401855 + }, + { + "epoch": 3.7567138671875e-05, + "grad_norm": 0.11766628921031952, + "learning_rate": 8.537850741941073e-06, + "loss": 0.003, + "step": 24620 + }, + { + "epoch": 3.7567138671875e-05, + "model_forward_time": 0.02450847625732422, + "step": 24620 + }, + { + "epoch": 3.7567138671875e-05, + "step": 24620, + "training_step_time": 0.1171262264251709 + }, + { + "epoch": 3.756866455078125e-05, + "model_forward_time": 0.024476289749145508, + "step": 24621 + }, + { + "epoch": 3.756866455078125e-05, + "step": 24621, + "training_step_time": 0.11860322952270508 + }, + { + "epoch": 3.75701904296875e-05, + "model_forward_time": 0.025623559951782227, + "step": 24622 + }, + { + "epoch": 3.75701904296875e-05, + "step": 24622, + "training_step_time": 0.10486555099487305 + }, + { + "epoch": 3.757171630859375e-05, + "model_forward_time": 0.024907350540161133, + "step": 24623 + }, + { + "epoch": 3.757171630859375e-05, + "step": 24623, + "training_step_time": 0.1063544750213623 + }, + { + "epoch": 3.75732421875e-05, + "model_forward_time": 0.025706052780151367, + "step": 24624 + }, + { + "epoch": 3.75732421875e-05, + "step": 24624, + "training_step_time": 0.10533499717712402 + }, + { + "epoch": 3.757476806640625e-05, + "model_forward_time": 0.025083065032958984, + "step": 24625 + }, + { + "epoch": 3.757476806640625e-05, + "step": 24625, + "training_step_time": 0.20092034339904785 + }, + { + "epoch": 3.75762939453125e-05, + "model_forward_time": 0.023831605911254883, + "step": 24626 + }, + { + "epoch": 3.75762939453125e-05, + "step": 24626, + "training_step_time": 0.11516332626342773 + }, + { + "epoch": 3.757781982421875e-05, + "model_forward_time": 0.024568557739257812, + "step": 24627 + }, + { + "epoch": 3.757781982421875e-05, + "step": 24627, + "training_step_time": 0.13889455795288086 + }, + { + "epoch": 3.7579345703125e-05, + "model_forward_time": 0.02492380142211914, + "step": 24628 + }, + { + "epoch": 3.7579345703125e-05, + "step": 24628, + "training_step_time": 0.1076345443725586 + }, + { + "epoch": 3.758087158203125e-05, + "model_forward_time": 0.02507615089416504, + "step": 24629 + }, + { + "epoch": 3.758087158203125e-05, + "step": 24629, + "training_step_time": 0.1330573558807373 + }, + { + "epoch": 3.75823974609375e-05, + "grad_norm": 0.07475856691598892, + "learning_rate": 8.507072443513702e-06, + "loss": 0.0116, + "step": 24630 + }, + { + "epoch": 3.75823974609375e-05, + "model_forward_time": 0.025316238403320312, + "step": 24630 + }, + { + "epoch": 3.75823974609375e-05, + "step": 24630, + "training_step_time": 0.19191956520080566 + }, + { + "epoch": 3.758392333984375e-05, + "model_forward_time": 0.024086952209472656, + "step": 24631 + }, + { + "epoch": 3.758392333984375e-05, + "step": 24631, + "training_step_time": 0.19752264022827148 + }, + { + "epoch": 3.758544921875e-05, + "model_forward_time": 0.02460503578186035, + "step": 24632 + }, + { + "epoch": 3.758544921875e-05, + "step": 24632, + "training_step_time": 0.13787627220153809 + }, + { + "epoch": 3.758697509765625e-05, + "model_forward_time": 0.02401256561279297, + "step": 24633 + }, + { + "epoch": 3.758697509765625e-05, + "step": 24633, + "training_step_time": 0.14313602447509766 + }, + { + "epoch": 3.75885009765625e-05, + "model_forward_time": 0.024225234985351562, + "step": 24634 + }, + { + "epoch": 3.75885009765625e-05, + "step": 24634, + "training_step_time": 0.13921570777893066 + }, + { + "epoch": 3.759002685546875e-05, + "model_forward_time": 0.024692058563232422, + "step": 24635 + }, + { + "epoch": 3.759002685546875e-05, + "step": 24635, + "training_step_time": 0.13478803634643555 + }, + { + "epoch": 3.7591552734375e-05, + "model_forward_time": 0.024507999420166016, + "step": 24636 + }, + { + "epoch": 3.7591552734375e-05, + "step": 24636, + "training_step_time": 0.12508273124694824 + }, + { + "epoch": 3.759307861328125e-05, + "model_forward_time": 0.02461862564086914, + "step": 24637 + }, + { + "epoch": 3.759307861328125e-05, + "step": 24637, + "training_step_time": 0.12484216690063477 + }, + { + "epoch": 3.75946044921875e-05, + "model_forward_time": 0.02584218978881836, + "step": 24638 + }, + { + "epoch": 3.75946044921875e-05, + "step": 24638, + "training_step_time": 0.1053304672241211 + }, + { + "epoch": 3.759613037109375e-05, + "model_forward_time": 0.024356603622436523, + "step": 24639 + }, + { + "epoch": 3.759613037109375e-05, + "step": 24639, + "training_step_time": 0.11115026473999023 + }, + { + "epoch": 3.759765625e-05, + "grad_norm": 0.12253975868225098, + "learning_rate": 8.476344562905841e-06, + "loss": 0.0063, + "step": 24640 + }, + { + "epoch": 3.759765625e-05, + "model_forward_time": 0.025294065475463867, + "step": 24640 + }, + { + "epoch": 3.759765625e-05, + "step": 24640, + "training_step_time": 0.11259293556213379 + }, + { + "epoch": 3.759918212890625e-05, + "model_forward_time": 0.025040864944458008, + "step": 24641 + }, + { + "epoch": 3.759918212890625e-05, + "step": 24641, + "training_step_time": 0.10966968536376953 + }, + { + "epoch": 3.76007080078125e-05, + "model_forward_time": 0.0253143310546875, + "step": 24642 + }, + { + "epoch": 3.76007080078125e-05, + "step": 24642, + "training_step_time": 0.10686635971069336 + }, + { + "epoch": 3.760223388671875e-05, + "model_forward_time": 0.025565624237060547, + "step": 24643 + }, + { + "epoch": 3.760223388671875e-05, + "step": 24643, + "training_step_time": 0.1854720115661621 + }, + { + "epoch": 3.7603759765625e-05, + "model_forward_time": 0.024907350540161133, + "step": 24644 + }, + { + "epoch": 3.7603759765625e-05, + "step": 24644, + "training_step_time": 0.10588550567626953 + }, + { + "epoch": 3.760528564453125e-05, + "model_forward_time": 0.02510666847229004, + "step": 24645 + }, + { + "epoch": 3.760528564453125e-05, + "step": 24645, + "training_step_time": 0.1080477237701416 + }, + { + "epoch": 3.76068115234375e-05, + "model_forward_time": 0.025553464889526367, + "step": 24646 + }, + { + "epoch": 3.76068115234375e-05, + "step": 24646, + "training_step_time": 0.10716819763183594 + }, + { + "epoch": 3.760833740234375e-05, + "model_forward_time": 0.025299549102783203, + "step": 24647 + }, + { + "epoch": 3.760833740234375e-05, + "step": 24647, + "training_step_time": 0.10970139503479004 + }, + { + "epoch": 3.760986328125e-05, + "model_forward_time": 0.025234460830688477, + "step": 24648 + }, + { + "epoch": 3.760986328125e-05, + "step": 24648, + "training_step_time": 0.10617804527282715 + }, + { + "epoch": 3.761138916015625e-05, + "model_forward_time": 0.025331735610961914, + "step": 24649 + }, + { + "epoch": 3.761138916015625e-05, + "step": 24649, + "training_step_time": 0.10564613342285156 + }, + { + "epoch": 3.76129150390625e-05, + "grad_norm": 0.08911236375570297, + "learning_rate": 8.445667137454761e-06, + "loss": 0.0045, + "step": 24650 + }, + { + "epoch": 3.76129150390625e-05, + "model_forward_time": 0.02532219886779785, + "step": 24650 + }, + { + "epoch": 3.76129150390625e-05, + "step": 24650, + "training_step_time": 0.10675811767578125 + }, + { + "epoch": 3.761444091796875e-05, + "model_forward_time": 0.025267839431762695, + "step": 24651 + }, + { + "epoch": 3.761444091796875e-05, + "step": 24651, + "training_step_time": 0.10497117042541504 + }, + { + "epoch": 3.7615966796875e-05, + "model_forward_time": 0.0250399112701416, + "step": 24652 + }, + { + "epoch": 3.7615966796875e-05, + "step": 24652, + "training_step_time": 0.10841941833496094 + }, + { + "epoch": 3.761749267578125e-05, + "model_forward_time": 0.02491593360900879, + "step": 24653 + }, + { + "epoch": 3.761749267578125e-05, + "step": 24653, + "training_step_time": 0.10384988784790039 + }, + { + "epoch": 3.76190185546875e-05, + "model_forward_time": 0.028411388397216797, + "step": 24654 + }, + { + "epoch": 3.76190185546875e-05, + "step": 24654, + "training_step_time": 0.10800600051879883 + }, + { + "epoch": 3.762054443359375e-05, + "model_forward_time": 0.02501678466796875, + "step": 24655 + }, + { + "epoch": 3.762054443359375e-05, + "step": 24655, + "training_step_time": 0.10379433631896973 + }, + { + "epoch": 3.76220703125e-05, + "model_forward_time": 0.025246858596801758, + "step": 24656 + }, + { + "epoch": 3.76220703125e-05, + "step": 24656, + "training_step_time": 0.10580968856811523 + }, + { + "epoch": 3.762359619140625e-05, + "model_forward_time": 0.025104045867919922, + "step": 24657 + }, + { + "epoch": 3.762359619140625e-05, + "step": 24657, + "training_step_time": 0.10452532768249512 + }, + { + "epoch": 3.76251220703125e-05, + "model_forward_time": 0.025057554244995117, + "step": 24658 + }, + { + "epoch": 3.76251220703125e-05, + "step": 24658, + "training_step_time": 0.10411739349365234 + }, + { + "epoch": 3.762664794921875e-05, + "model_forward_time": 0.024983644485473633, + "step": 24659 + }, + { + "epoch": 3.762664794921875e-05, + "step": 24659, + "training_step_time": 0.10465526580810547 + }, + { + "epoch": 3.7628173828125e-05, + "grad_norm": 0.1461312621831894, + "learning_rate": 8.415040204436426e-06, + "loss": 0.008, + "step": 24660 + }, + { + "epoch": 3.7628173828125e-05, + "model_forward_time": 0.02513408660888672, + "step": 24660 + }, + { + "epoch": 3.7628173828125e-05, + "step": 24660, + "training_step_time": 0.1800389289855957 + }, + { + "epoch": 3.762969970703125e-05, + "model_forward_time": 0.024550676345825195, + "step": 24661 + }, + { + "epoch": 3.762969970703125e-05, + "step": 24661, + "training_step_time": 0.17178106307983398 + }, + { + "epoch": 3.76312255859375e-05, + "model_forward_time": 0.02426433563232422, + "step": 24662 + }, + { + "epoch": 3.76312255859375e-05, + "step": 24662, + "training_step_time": 0.13344168663024902 + }, + { + "epoch": 3.763275146484375e-05, + "model_forward_time": 0.024158954620361328, + "step": 24663 + }, + { + "epoch": 3.763275146484375e-05, + "step": 24663, + "training_step_time": 0.1519787311553955 + }, + { + "epoch": 3.763427734375e-05, + "model_forward_time": 0.024387121200561523, + "step": 24664 + }, + { + "epoch": 3.763427734375e-05, + "step": 24664, + "training_step_time": 0.10198736190795898 + }, + { + "epoch": 3.763580322265625e-05, + "model_forward_time": 0.02516317367553711, + "step": 24665 + }, + { + "epoch": 3.763580322265625e-05, + "step": 24665, + "training_step_time": 0.11841869354248047 + }, + { + "epoch": 3.76373291015625e-05, + "model_forward_time": 0.02498149871826172, + "step": 24666 + }, + { + "epoch": 3.76373291015625e-05, + "step": 24666, + "training_step_time": 0.1155397891998291 + }, + { + "epoch": 3.763885498046875e-05, + "model_forward_time": 0.025063514709472656, + "step": 24667 + }, + { + "epoch": 3.763885498046875e-05, + "step": 24667, + "training_step_time": 0.10306620597839355 + }, + { + "epoch": 3.7640380859375e-05, + "model_forward_time": 0.025312423706054688, + "step": 24668 + }, + { + "epoch": 3.7640380859375e-05, + "step": 24668, + "training_step_time": 0.10446715354919434 + }, + { + "epoch": 3.764190673828125e-05, + "model_forward_time": 0.02540874481201172, + "step": 24669 + }, + { + "epoch": 3.764190673828125e-05, + "step": 24669, + "training_step_time": 0.1122593879699707 + }, + { + "epoch": 3.76434326171875e-05, + "grad_norm": 0.11139220744371414, + "learning_rate": 8.384463801065434e-06, + "loss": 0.0091, + "step": 24670 + }, + { + "epoch": 3.76434326171875e-05, + "model_forward_time": 0.025043487548828125, + "step": 24670 + }, + { + "epoch": 3.76434326171875e-05, + "step": 24670, + "training_step_time": 0.12331867218017578 + }, + { + "epoch": 3.764495849609375e-05, + "model_forward_time": 0.02539825439453125, + "step": 24671 + }, + { + "epoch": 3.764495849609375e-05, + "step": 24671, + "training_step_time": 0.20186948776245117 + }, + { + "epoch": 3.7646484375e-05, + "model_forward_time": 0.024463891983032227, + "step": 24672 + }, + { + "epoch": 3.7646484375e-05, + "step": 24672, + "training_step_time": 0.13306069374084473 + }, + { + "epoch": 3.764801025390625e-05, + "model_forward_time": 0.024277448654174805, + "step": 24673 + }, + { + "epoch": 3.764801025390625e-05, + "step": 24673, + "training_step_time": 0.1932995319366455 + }, + { + "epoch": 3.76495361328125e-05, + "model_forward_time": 0.02436518669128418, + "step": 24674 + }, + { + "epoch": 3.76495361328125e-05, + "step": 24674, + "training_step_time": 0.16255617141723633 + }, + { + "epoch": 3.765106201171875e-05, + "model_forward_time": 0.024561166763305664, + "step": 24675 + }, + { + "epoch": 3.765106201171875e-05, + "step": 24675, + "training_step_time": 0.2116239070892334 + }, + { + "epoch": 3.7652587890625e-05, + "model_forward_time": 0.024524927139282227, + "step": 24676 + }, + { + "epoch": 3.7652587890625e-05, + "step": 24676, + "training_step_time": 0.1497359275817871 + }, + { + "epoch": 3.765411376953125e-05, + "model_forward_time": 0.024440526962280273, + "step": 24677 + }, + { + "epoch": 3.765411376953125e-05, + "step": 24677, + "training_step_time": 0.10516738891601562 + }, + { + "epoch": 3.76556396484375e-05, + "model_forward_time": 0.024184226989746094, + "step": 24678 + }, + { + "epoch": 3.76556396484375e-05, + "step": 24678, + "training_step_time": 0.10623979568481445 + }, + { + "epoch": 3.765716552734375e-05, + "model_forward_time": 0.025327682495117188, + "step": 24679 + }, + { + "epoch": 3.765716552734375e-05, + "step": 24679, + "training_step_time": 0.10723495483398438 + }, + { + "epoch": 3.765869140625e-05, + "grad_norm": 0.1620771437883377, + "learning_rate": 8.353937964495029e-06, + "loss": 0.0058, + "step": 24680 + }, + { + "epoch": 3.765869140625e-05, + "model_forward_time": 0.025048017501831055, + "step": 24680 + }, + { + "epoch": 3.765869140625e-05, + "step": 24680, + "training_step_time": 0.10497760772705078 + }, + { + "epoch": 3.766021728515625e-05, + "model_forward_time": 0.025429248809814453, + "step": 24681 + }, + { + "epoch": 3.766021728515625e-05, + "step": 24681, + "training_step_time": 0.11380624771118164 + }, + { + "epoch": 3.76617431640625e-05, + "model_forward_time": 0.025442123413085938, + "step": 24682 + }, + { + "epoch": 3.76617431640625e-05, + "step": 24682, + "training_step_time": 0.10442304611206055 + }, + { + "epoch": 3.766326904296875e-05, + "model_forward_time": 0.027236461639404297, + "step": 24683 + }, + { + "epoch": 3.766326904296875e-05, + "step": 24683, + "training_step_time": 0.10898351669311523 + }, + { + "epoch": 3.7664794921875e-05, + "model_forward_time": 0.025313138961791992, + "step": 24684 + }, + { + "epoch": 3.7664794921875e-05, + "step": 24684, + "training_step_time": 0.10648298263549805 + }, + { + "epoch": 3.766632080078125e-05, + "model_forward_time": 0.025181293487548828, + "step": 24685 + }, + { + "epoch": 3.766632080078125e-05, + "step": 24685, + "training_step_time": 0.10473275184631348 + }, + { + "epoch": 3.76678466796875e-05, + "model_forward_time": 0.02634143829345703, + "step": 24686 + }, + { + "epoch": 3.76678466796875e-05, + "step": 24686, + "training_step_time": 0.10670804977416992 + }, + { + "epoch": 3.766937255859375e-05, + "model_forward_time": 0.025484323501586914, + "step": 24687 + }, + { + "epoch": 3.766937255859375e-05, + "step": 24687, + "training_step_time": 0.11006927490234375 + }, + { + "epoch": 3.76708984375e-05, + "model_forward_time": 0.02533698081970215, + "step": 24688 + }, + { + "epoch": 3.76708984375e-05, + "step": 24688, + "training_step_time": 0.11202621459960938 + }, + { + "epoch": 3.767242431640625e-05, + "model_forward_time": 0.0254364013671875, + "step": 24689 + }, + { + "epoch": 3.767242431640625e-05, + "step": 24689, + "training_step_time": 0.10852289199829102 + }, + { + "epoch": 3.76739501953125e-05, + "grad_norm": 0.3249269127845764, + "learning_rate": 8.323462731816961e-06, + "loss": 0.0065, + "step": 24690 + }, + { + "epoch": 3.76739501953125e-05, + "model_forward_time": 0.025275707244873047, + "step": 24690 + }, + { + "epoch": 3.76739501953125e-05, + "step": 24690, + "training_step_time": 0.11052441596984863 + }, + { + "epoch": 3.767547607421875e-05, + "model_forward_time": 0.026571273803710938, + "step": 24691 + }, + { + "epoch": 3.767547607421875e-05, + "step": 24691, + "training_step_time": 0.11166548728942871 + }, + { + "epoch": 3.7677001953125e-05, + "model_forward_time": 0.025646209716796875, + "step": 24692 + }, + { + "epoch": 3.7677001953125e-05, + "step": 24692, + "training_step_time": 0.10544109344482422 + }, + { + "epoch": 3.767852783203125e-05, + "model_forward_time": 0.025808334350585938, + "step": 24693 + }, + { + "epoch": 3.767852783203125e-05, + "step": 24693, + "training_step_time": 0.10573482513427734 + }, + { + "epoch": 3.76800537109375e-05, + "model_forward_time": 0.025771141052246094, + "step": 24694 + }, + { + "epoch": 3.76800537109375e-05, + "step": 24694, + "training_step_time": 0.10361909866333008 + }, + { + "epoch": 3.768157958984375e-05, + "model_forward_time": 0.027199268341064453, + "step": 24695 + }, + { + "epoch": 3.768157958984375e-05, + "step": 24695, + "training_step_time": 0.10750150680541992 + }, + { + "epoch": 3.768310546875e-05, + "model_forward_time": 0.025446414947509766, + "step": 24696 + }, + { + "epoch": 3.768310546875e-05, + "step": 24696, + "training_step_time": 0.10345077514648438 + }, + { + "epoch": 3.768463134765625e-05, + "model_forward_time": 0.02542877197265625, + "step": 24697 + }, + { + "epoch": 3.768463134765625e-05, + "step": 24697, + "training_step_time": 0.1033928394317627 + }, + { + "epoch": 3.76861572265625e-05, + "model_forward_time": 0.025301456451416016, + "step": 24698 + }, + { + "epoch": 3.76861572265625e-05, + "step": 24698, + "training_step_time": 0.10829472541809082 + }, + { + "epoch": 3.768768310546875e-05, + "model_forward_time": 0.025367021560668945, + "step": 24699 + }, + { + "epoch": 3.768768310546875e-05, + "step": 24699, + "training_step_time": 0.10846209526062012 + }, + { + "epoch": 3.7689208984375e-05, + "grad_norm": 0.20272240042686462, + "learning_rate": 8.293038140061515e-06, + "loss": 0.0069, + "step": 24700 + }, + { + "epoch": 3.7689208984375e-05, + "model_forward_time": 0.02530694007873535, + "step": 24700 + }, + { + "epoch": 3.7689208984375e-05, + "step": 24700, + "training_step_time": 0.10509347915649414 + }, + { + "epoch": 3.769073486328125e-05, + "model_forward_time": 0.02567458152770996, + "step": 24701 + }, + { + "epoch": 3.769073486328125e-05, + "step": 24701, + "training_step_time": 0.10825347900390625 + }, + { + "epoch": 3.76922607421875e-05, + "model_forward_time": 0.026160478591918945, + "step": 24702 + }, + { + "epoch": 3.76922607421875e-05, + "step": 24702, + "training_step_time": 0.104736328125 + }, + { + "epoch": 3.769378662109375e-05, + "model_forward_time": 0.02494978904724121, + "step": 24703 + }, + { + "epoch": 3.769378662109375e-05, + "step": 24703, + "training_step_time": 0.10441446304321289 + }, + { + "epoch": 3.76953125e-05, + "model_forward_time": 0.025556325912475586, + "step": 24704 + }, + { + "epoch": 3.76953125e-05, + "step": 24704, + "training_step_time": 0.11941909790039062 + }, + { + "epoch": 3.769683837890625e-05, + "model_forward_time": 0.024611234664916992, + "step": 24705 + }, + { + "epoch": 3.769683837890625e-05, + "step": 24705, + "training_step_time": 0.15364336967468262 + }, + { + "epoch": 3.76983642578125e-05, + "model_forward_time": 0.024694204330444336, + "step": 24706 + }, + { + "epoch": 3.76983642578125e-05, + "step": 24706, + "training_step_time": 0.19009661674499512 + }, + { + "epoch": 3.769989013671875e-05, + "model_forward_time": 0.028205394744873047, + "step": 24707 + }, + { + "epoch": 3.769989013671875e-05, + "step": 24707, + "training_step_time": 0.17749595642089844 + }, + { + "epoch": 3.7701416015625e-05, + "model_forward_time": 0.024264097213745117, + "step": 24708 + }, + { + "epoch": 3.7701416015625e-05, + "step": 24708, + "training_step_time": 0.14220094680786133 + }, + { + "epoch": 3.770294189453125e-05, + "model_forward_time": 0.024566173553466797, + "step": 24709 + }, + { + "epoch": 3.770294189453125e-05, + "step": 24709, + "training_step_time": 0.1421966552734375 + }, + { + "epoch": 3.77044677734375e-05, + "grad_norm": 0.2357063889503479, + "learning_rate": 8.262664226197436e-06, + "loss": 0.0064, + "step": 24710 + }, + { + "epoch": 3.77044677734375e-05, + "model_forward_time": 0.024228572845458984, + "step": 24710 + }, + { + "epoch": 3.77044677734375e-05, + "step": 24710, + "training_step_time": 0.21212506294250488 + }, + { + "epoch": 3.770599365234375e-05, + "model_forward_time": 0.025084972381591797, + "step": 24711 + }, + { + "epoch": 3.770599365234375e-05, + "step": 24711, + "training_step_time": 0.1172323226928711 + }, + { + "epoch": 3.770751953125e-05, + "model_forward_time": 0.024891138076782227, + "step": 24712 + }, + { + "epoch": 3.770751953125e-05, + "step": 24712, + "training_step_time": 0.11734604835510254 + }, + { + "epoch": 3.770904541015625e-05, + "model_forward_time": 0.025814056396484375, + "step": 24713 + }, + { + "epoch": 3.770904541015625e-05, + "step": 24713, + "training_step_time": 0.113189697265625 + }, + { + "epoch": 3.77105712890625e-05, + "model_forward_time": 0.02547168731689453, + "step": 24714 + }, + { + "epoch": 3.77105712890625e-05, + "step": 24714, + "training_step_time": 0.11117315292358398 + }, + { + "epoch": 3.771209716796875e-05, + "model_forward_time": 0.024678707122802734, + "step": 24715 + }, + { + "epoch": 3.771209716796875e-05, + "step": 24715, + "training_step_time": 0.10904335975646973 + }, + { + "epoch": 3.7713623046875e-05, + "model_forward_time": 0.02523016929626465, + "step": 24716 + }, + { + "epoch": 3.7713623046875e-05, + "step": 24716, + "training_step_time": 0.11055374145507812 + }, + { + "epoch": 3.771514892578125e-05, + "model_forward_time": 0.025364398956298828, + "step": 24717 + }, + { + "epoch": 3.771514892578125e-05, + "step": 24717, + "training_step_time": 0.1768040657043457 + }, + { + "epoch": 3.77166748046875e-05, + "model_forward_time": 0.025030851364135742, + "step": 24718 + }, + { + "epoch": 3.77166748046875e-05, + "step": 24718, + "training_step_time": 0.11713981628417969 + }, + { + "epoch": 3.771820068359375e-05, + "model_forward_time": 0.02494072914123535, + "step": 24719 + }, + { + "epoch": 3.771820068359375e-05, + "step": 24719, + "training_step_time": 0.11198687553405762 + }, + { + "epoch": 3.77197265625e-05, + "grad_norm": 0.17401650547981262, + "learning_rate": 8.232341027131885e-06, + "loss": 0.0067, + "step": 24720 + }, + { + "epoch": 3.77197265625e-05, + "model_forward_time": 0.025256633758544922, + "step": 24720 + }, + { + "epoch": 3.77197265625e-05, + "step": 24720, + "training_step_time": 0.11959171295166016 + }, + { + "epoch": 3.772125244140625e-05, + "model_forward_time": 0.025823116302490234, + "step": 24721 + }, + { + "epoch": 3.772125244140625e-05, + "step": 24721, + "training_step_time": 0.1282958984375 + }, + { + "epoch": 3.77227783203125e-05, + "model_forward_time": 0.025454282760620117, + "step": 24722 + }, + { + "epoch": 3.77227783203125e-05, + "step": 24722, + "training_step_time": 0.11265850067138672 + }, + { + "epoch": 3.772430419921875e-05, + "model_forward_time": 0.0255277156829834, + "step": 24723 + }, + { + "epoch": 3.772430419921875e-05, + "step": 24723, + "training_step_time": 0.10761475563049316 + }, + { + "epoch": 3.7725830078125e-05, + "model_forward_time": 0.025624513626098633, + "step": 24724 + }, + { + "epoch": 3.7725830078125e-05, + "step": 24724, + "training_step_time": 0.10893058776855469 + }, + { + "epoch": 3.772735595703125e-05, + "model_forward_time": 0.025362253189086914, + "step": 24725 + }, + { + "epoch": 3.772735595703125e-05, + "step": 24725, + "training_step_time": 0.10471963882446289 + }, + { + "epoch": 3.77288818359375e-05, + "model_forward_time": 0.025725841522216797, + "step": 24726 + }, + { + "epoch": 3.77288818359375e-05, + "step": 24726, + "training_step_time": 0.10526204109191895 + }, + { + "epoch": 3.773040771484375e-05, + "model_forward_time": 0.025378704071044922, + "step": 24727 + }, + { + "epoch": 3.773040771484375e-05, + "step": 24727, + "training_step_time": 0.10561084747314453 + }, + { + "epoch": 3.773193359375e-05, + "model_forward_time": 0.025200366973876953, + "step": 24728 + }, + { + "epoch": 3.773193359375e-05, + "step": 24728, + "training_step_time": 0.10489869117736816 + }, + { + "epoch": 3.773345947265625e-05, + "model_forward_time": 0.024952173233032227, + "step": 24729 + }, + { + "epoch": 3.773345947265625e-05, + "step": 24729, + "training_step_time": 0.1089940071105957 + }, + { + "epoch": 3.77349853515625e-05, + "grad_norm": 0.09289814531803131, + "learning_rate": 8.202068579710431e-06, + "loss": 0.0054, + "step": 24730 + }, + { + "epoch": 3.77349853515625e-05, + "model_forward_time": 0.025407075881958008, + "step": 24730 + }, + { + "epoch": 3.77349853515625e-05, + "step": 24730, + "training_step_time": 0.1092367172241211 + }, + { + "epoch": 3.773651123046875e-05, + "model_forward_time": 0.02545022964477539, + "step": 24731 + }, + { + "epoch": 3.773651123046875e-05, + "step": 24731, + "training_step_time": 0.10315895080566406 + }, + { + "epoch": 3.7738037109375e-05, + "model_forward_time": 0.02464437484741211, + "step": 24732 + }, + { + "epoch": 3.7738037109375e-05, + "step": 24732, + "training_step_time": 0.14650726318359375 + }, + { + "epoch": 3.773956298828125e-05, + "model_forward_time": 0.02463507652282715, + "step": 24733 + }, + { + "epoch": 3.773956298828125e-05, + "step": 24733, + "training_step_time": 0.11372923851013184 + }, + { + "epoch": 3.77410888671875e-05, + "model_forward_time": 0.02526712417602539, + "step": 24734 + }, + { + "epoch": 3.77410888671875e-05, + "step": 24734, + "training_step_time": 0.11133694648742676 + }, + { + "epoch": 3.774261474609375e-05, + "model_forward_time": 0.02531719207763672, + "step": 24735 + }, + { + "epoch": 3.774261474609375e-05, + "step": 24735, + "training_step_time": 0.11350131034851074 + }, + { + "epoch": 3.7744140625e-05, + "model_forward_time": 0.02557826042175293, + "step": 24736 + }, + { + "epoch": 3.7744140625e-05, + "step": 24736, + "training_step_time": 0.19521737098693848 + }, + { + "epoch": 3.774566650390625e-05, + "model_forward_time": 0.02521228790283203, + "step": 24737 + }, + { + "epoch": 3.774566650390625e-05, + "step": 24737, + "training_step_time": 0.13204026222229004 + }, + { + "epoch": 3.77471923828125e-05, + "model_forward_time": 0.027198314666748047, + "step": 24738 + }, + { + "epoch": 3.77471923828125e-05, + "step": 24738, + "training_step_time": 0.10526013374328613 + }, + { + "epoch": 3.774871826171875e-05, + "model_forward_time": 0.025277376174926758, + "step": 24739 + }, + { + "epoch": 3.774871826171875e-05, + "step": 24739, + "training_step_time": 0.10287141799926758 + }, + { + "epoch": 3.7750244140625e-05, + "grad_norm": 0.11223476380109787, + "learning_rate": 8.17184692071694e-06, + "loss": 0.0061, + "step": 24740 + }, + { + "epoch": 3.7750244140625e-05, + "model_forward_time": 0.025348424911499023, + "step": 24740 + }, + { + "epoch": 3.7750244140625e-05, + "step": 24740, + "training_step_time": 0.1029973030090332 + }, + { + "epoch": 3.775177001953125e-05, + "model_forward_time": 0.025543212890625, + "step": 24741 + }, + { + "epoch": 3.775177001953125e-05, + "step": 24741, + "training_step_time": 0.10318160057067871 + }, + { + "epoch": 3.77532958984375e-05, + "model_forward_time": 0.025265216827392578, + "step": 24742 + }, + { + "epoch": 3.77532958984375e-05, + "step": 24742, + "training_step_time": 0.1047062873840332 + }, + { + "epoch": 3.775482177734375e-05, + "model_forward_time": 0.025072813034057617, + "step": 24743 + }, + { + "epoch": 3.775482177734375e-05, + "step": 24743, + "training_step_time": 0.10767769813537598 + }, + { + "epoch": 3.775634765625e-05, + "model_forward_time": 0.02535700798034668, + "step": 24744 + }, + { + "epoch": 3.775634765625e-05, + "step": 24744, + "training_step_time": 0.11373639106750488 + }, + { + "epoch": 3.775787353515625e-05, + "model_forward_time": 0.025676488876342773, + "step": 24745 + }, + { + "epoch": 3.775787353515625e-05, + "step": 24745, + "training_step_time": 0.12296128273010254 + }, + { + "epoch": 3.77593994140625e-05, + "model_forward_time": 0.02505350112915039, + "step": 24746 + }, + { + "epoch": 3.77593994140625e-05, + "step": 24746, + "training_step_time": 0.11015033721923828 + }, + { + "epoch": 3.776092529296875e-05, + "model_forward_time": 0.025101900100708008, + "step": 24747 + }, + { + "epoch": 3.776092529296875e-05, + "step": 24747, + "training_step_time": 0.11037921905517578 + }, + { + "epoch": 3.7762451171875e-05, + "model_forward_time": 0.02519989013671875, + "step": 24748 + }, + { + "epoch": 3.7762451171875e-05, + "step": 24748, + "training_step_time": 0.11372828483581543 + }, + { + "epoch": 3.776397705078125e-05, + "model_forward_time": 0.025419235229492188, + "step": 24749 + }, + { + "epoch": 3.776397705078125e-05, + "step": 24749, + "training_step_time": 0.1130373477935791 + }, + { + "epoch": 3.77655029296875e-05, + "grad_norm": 0.22124172747135162, + "learning_rate": 8.141676086873572e-06, + "loss": 0.0076, + "step": 24750 + }, + { + "epoch": 3.77655029296875e-05, + "model_forward_time": 0.02508831024169922, + "step": 24750 + }, + { + "epoch": 3.77655029296875e-05, + "step": 24750, + "training_step_time": 0.11056804656982422 + }, + { + "epoch": 3.776702880859375e-05, + "model_forward_time": 0.025234222412109375, + "step": 24751 + }, + { + "epoch": 3.776702880859375e-05, + "step": 24751, + "training_step_time": 0.11130142211914062 + }, + { + "epoch": 3.77685546875e-05, + "model_forward_time": 0.025200843811035156, + "step": 24752 + }, + { + "epoch": 3.77685546875e-05, + "step": 24752, + "training_step_time": 0.2108454704284668 + }, + { + "epoch": 3.777008056640625e-05, + "model_forward_time": 0.024896860122680664, + "step": 24753 + }, + { + "epoch": 3.777008056640625e-05, + "step": 24753, + "training_step_time": 0.1269211769104004 + }, + { + "epoch": 3.77716064453125e-05, + "model_forward_time": 0.02443552017211914, + "step": 24754 + }, + { + "epoch": 3.77716064453125e-05, + "step": 24754, + "training_step_time": 0.12057995796203613 + }, + { + "epoch": 3.777313232421875e-05, + "model_forward_time": 0.026112794876098633, + "step": 24755 + }, + { + "epoch": 3.777313232421875e-05, + "step": 24755, + "training_step_time": 0.15973544120788574 + }, + { + "epoch": 3.7774658203125e-05, + "model_forward_time": 0.024621009826660156, + "step": 24756 + }, + { + "epoch": 3.7774658203125e-05, + "step": 24756, + "training_step_time": 0.1714780330657959 + }, + { + "epoch": 3.777618408203125e-05, + "model_forward_time": 0.02417612075805664, + "step": 24757 + }, + { + "epoch": 3.777618408203125e-05, + "step": 24757, + "training_step_time": 0.16345715522766113 + }, + { + "epoch": 3.77777099609375e-05, + "model_forward_time": 0.024393081665039062, + "step": 24758 + }, + { + "epoch": 3.77777099609375e-05, + "step": 24758, + "training_step_time": 0.1044621467590332 + }, + { + "epoch": 3.777923583984375e-05, + "model_forward_time": 0.0247037410736084, + "step": 24759 + }, + { + "epoch": 3.777923583984375e-05, + "step": 24759, + "training_step_time": 0.10347580909729004 + }, + { + "epoch": 3.778076171875e-05, + "grad_norm": 0.2569579482078552, + "learning_rate": 8.111556114840746e-06, + "loss": 0.0091, + "step": 24760 + }, + { + "epoch": 3.778076171875e-05, + "model_forward_time": 0.025109529495239258, + "step": 24760 + }, + { + "epoch": 3.778076171875e-05, + "step": 24760, + "training_step_time": 0.1094667911529541 + }, + { + "epoch": 3.778228759765625e-05, + "model_forward_time": 0.025603771209716797, + "step": 24761 + }, + { + "epoch": 3.778228759765625e-05, + "step": 24761, + "training_step_time": 0.10563373565673828 + }, + { + "epoch": 3.77838134765625e-05, + "model_forward_time": 0.024660110473632812, + "step": 24762 + }, + { + "epoch": 3.77838134765625e-05, + "step": 24762, + "training_step_time": 0.10436439514160156 + }, + { + "epoch": 3.778533935546875e-05, + "model_forward_time": 0.025661706924438477, + "step": 24763 + }, + { + "epoch": 3.778533935546875e-05, + "step": 24763, + "training_step_time": 0.13325762748718262 + }, + { + "epoch": 3.7786865234375e-05, + "model_forward_time": 0.025525331497192383, + "step": 24764 + }, + { + "epoch": 3.7786865234375e-05, + "step": 24764, + "training_step_time": 0.14995694160461426 + }, + { + "epoch": 3.778839111328125e-05, + "model_forward_time": 0.025220155715942383, + "step": 24765 + }, + { + "epoch": 3.778839111328125e-05, + "step": 24765, + "training_step_time": 0.14374899864196777 + }, + { + "epoch": 3.77899169921875e-05, + "model_forward_time": 0.025132179260253906, + "step": 24766 + }, + { + "epoch": 3.77899169921875e-05, + "step": 24766, + "training_step_time": 0.18506383895874023 + }, + { + "epoch": 3.779144287109375e-05, + "model_forward_time": 0.024749279022216797, + "step": 24767 + }, + { + "epoch": 3.779144287109375e-05, + "step": 24767, + "training_step_time": 0.1897413730621338 + }, + { + "epoch": 3.779296875e-05, + "model_forward_time": 0.02484416961669922, + "step": 24768 + }, + { + "epoch": 3.779296875e-05, + "step": 24768, + "training_step_time": 0.17188239097595215 + }, + { + "epoch": 3.779449462890625e-05, + "model_forward_time": 0.024226903915405273, + "step": 24769 + }, + { + "epoch": 3.779449462890625e-05, + "step": 24769, + "training_step_time": 0.10722899436950684 + }, + { + "epoch": 3.77960205078125e-05, + "grad_norm": 0.1255207508802414, + "learning_rate": 8.08148704121705e-06, + "loss": 0.0067, + "step": 24770 + }, + { + "epoch": 3.77960205078125e-05, + "model_forward_time": 0.024807214736938477, + "step": 24770 + }, + { + "epoch": 3.77960205078125e-05, + "step": 24770, + "training_step_time": 0.1028585433959961 + }, + { + "epoch": 3.779754638671875e-05, + "model_forward_time": 0.02544236183166504, + "step": 24771 + }, + { + "epoch": 3.779754638671875e-05, + "step": 24771, + "training_step_time": 0.1083834171295166 + }, + { + "epoch": 3.7799072265625e-05, + "model_forward_time": 0.02590322494506836, + "step": 24772 + }, + { + "epoch": 3.7799072265625e-05, + "step": 24772, + "training_step_time": 0.10726642608642578 + }, + { + "epoch": 3.780059814453125e-05, + "model_forward_time": 0.028682708740234375, + "step": 24773 + }, + { + "epoch": 3.780059814453125e-05, + "step": 24773, + "training_step_time": 0.10828852653503418 + }, + { + "epoch": 3.78021240234375e-05, + "model_forward_time": 0.025840282440185547, + "step": 24774 + }, + { + "epoch": 3.78021240234375e-05, + "step": 24774, + "training_step_time": 0.10555505752563477 + }, + { + "epoch": 3.780364990234375e-05, + "model_forward_time": 0.025646448135375977, + "step": 24775 + }, + { + "epoch": 3.780364990234375e-05, + "step": 24775, + "training_step_time": 0.1038515567779541 + }, + { + "epoch": 3.780517578125e-05, + "model_forward_time": 0.02525782585144043, + "step": 24776 + }, + { + "epoch": 3.780517578125e-05, + "step": 24776, + "training_step_time": 0.11771941184997559 + }, + { + "epoch": 3.780670166015625e-05, + "model_forward_time": 0.02422308921813965, + "step": 24777 + }, + { + "epoch": 3.780670166015625e-05, + "step": 24777, + "training_step_time": 0.16733479499816895 + }, + { + "epoch": 3.78082275390625e-05, + "model_forward_time": 0.02463507652282715, + "step": 24778 + }, + { + "epoch": 3.78082275390625e-05, + "step": 24778, + "training_step_time": 0.17283844947814941 + }, + { + "epoch": 3.780975341796875e-05, + "model_forward_time": 0.024823904037475586, + "step": 24779 + }, + { + "epoch": 3.780975341796875e-05, + "step": 24779, + "training_step_time": 0.13217663764953613 + }, + { + "epoch": 3.7811279296875e-05, + "grad_norm": 0.19120188057422638, + "learning_rate": 8.051468902539272e-06, + "loss": 0.0061, + "step": 24780 + }, + { + "epoch": 3.7811279296875e-05, + "model_forward_time": 0.02465653419494629, + "step": 24780 + }, + { + "epoch": 3.7811279296875e-05, + "step": 24780, + "training_step_time": 0.11967062950134277 + }, + { + "epoch": 3.781280517578125e-05, + "model_forward_time": 0.025326967239379883, + "step": 24781 + }, + { + "epoch": 3.781280517578125e-05, + "step": 24781, + "training_step_time": 0.18398427963256836 + }, + { + "epoch": 3.78143310546875e-05, + "model_forward_time": 0.024626731872558594, + "step": 24782 + }, + { + "epoch": 3.78143310546875e-05, + "step": 24782, + "training_step_time": 0.1132512092590332 + }, + { + "epoch": 3.781585693359375e-05, + "model_forward_time": 0.025081634521484375, + "step": 24783 + }, + { + "epoch": 3.781585693359375e-05, + "step": 24783, + "training_step_time": 0.10939168930053711 + }, + { + "epoch": 3.78173828125e-05, + "model_forward_time": 0.025606393814086914, + "step": 24784 + }, + { + "epoch": 3.78173828125e-05, + "step": 24784, + "training_step_time": 0.1085500717163086 + }, + { + "epoch": 3.781890869140625e-05, + "model_forward_time": 0.024557113647460938, + "step": 24785 + }, + { + "epoch": 3.781890869140625e-05, + "step": 24785, + "training_step_time": 0.11060738563537598 + }, + { + "epoch": 3.78204345703125e-05, + "model_forward_time": 0.024101734161376953, + "step": 24786 + }, + { + "epoch": 3.78204345703125e-05, + "step": 24786, + "training_step_time": 0.10655021667480469 + }, + { + "epoch": 3.782196044921875e-05, + "model_forward_time": 0.025232315063476562, + "step": 24787 + }, + { + "epoch": 3.782196044921875e-05, + "step": 24787, + "training_step_time": 0.10672950744628906 + }, + { + "epoch": 3.7823486328125e-05, + "model_forward_time": 0.025208234786987305, + "step": 24788 + }, + { + "epoch": 3.7823486328125e-05, + "step": 24788, + "training_step_time": 0.10477900505065918 + }, + { + "epoch": 3.782501220703125e-05, + "model_forward_time": 0.024916887283325195, + "step": 24789 + }, + { + "epoch": 3.782501220703125e-05, + "step": 24789, + "training_step_time": 0.10869503021240234 + }, + { + "epoch": 3.78265380859375e-05, + "grad_norm": 0.0718858391046524, + "learning_rate": 8.021501735282266e-06, + "loss": 0.0065, + "step": 24790 + }, + { + "epoch": 3.78265380859375e-05, + "model_forward_time": 0.025175809860229492, + "step": 24790 + }, + { + "epoch": 3.78265380859375e-05, + "step": 24790, + "training_step_time": 0.10774946212768555 + }, + { + "epoch": 3.782806396484375e-05, + "model_forward_time": 0.02553534507751465, + "step": 24791 + }, + { + "epoch": 3.782806396484375e-05, + "step": 24791, + "training_step_time": 0.10527801513671875 + }, + { + "epoch": 3.782958984375e-05, + "model_forward_time": 0.025774002075195312, + "step": 24792 + }, + { + "epoch": 3.782958984375e-05, + "step": 24792, + "training_step_time": 0.10697340965270996 + }, + { + "epoch": 3.783111572265625e-05, + "model_forward_time": 0.0253450870513916, + "step": 24793 + }, + { + "epoch": 3.783111572265625e-05, + "step": 24793, + "training_step_time": 0.10596108436584473 + }, + { + "epoch": 3.78326416015625e-05, + "model_forward_time": 0.025180339813232422, + "step": 24794 + }, + { + "epoch": 3.78326416015625e-05, + "step": 24794, + "training_step_time": 0.10549402236938477 + }, + { + "epoch": 3.783416748046875e-05, + "model_forward_time": 0.025147438049316406, + "step": 24795 + }, + { + "epoch": 3.783416748046875e-05, + "step": 24795, + "training_step_time": 0.10689115524291992 + }, + { + "epoch": 3.7835693359375e-05, + "model_forward_time": 0.02551126480102539, + "step": 24796 + }, + { + "epoch": 3.7835693359375e-05, + "step": 24796, + "training_step_time": 0.1756894588470459 + }, + { + "epoch": 3.783721923828125e-05, + "model_forward_time": 0.024276256561279297, + "step": 24797 + }, + { + "epoch": 3.783721923828125e-05, + "step": 24797, + "training_step_time": 0.11551117897033691 + }, + { + "epoch": 3.78387451171875e-05, + "model_forward_time": 0.024549245834350586, + "step": 24798 + }, + { + "epoch": 3.78387451171875e-05, + "step": 24798, + "training_step_time": 0.13073468208312988 + }, + { + "epoch": 3.784027099609375e-05, + "model_forward_time": 0.024800539016723633, + "step": 24799 + }, + { + "epoch": 3.784027099609375e-05, + "step": 24799, + "training_step_time": 0.15263056755065918 + }, + { + "epoch": 3.7841796875e-05, + "grad_norm": 0.09488678723573685, + "learning_rate": 7.991585575858961e-06, + "loss": 0.0029, + "step": 24800 + }, + { + "epoch": 3.7841796875e-05, + "model_forward_time": 0.024598121643066406, + "step": 24800 + }, + { + "epoch": 3.7841796875e-05, + "step": 24800, + "training_step_time": 0.11114120483398438 + }, + { + "epoch": 3.784332275390625e-05, + "model_forward_time": 0.024631500244140625, + "step": 24801 + }, + { + "epoch": 3.784332275390625e-05, + "step": 24801, + "training_step_time": 0.11336207389831543 + }, + { + "epoch": 3.78448486328125e-05, + "model_forward_time": 0.025830745697021484, + "step": 24802 + }, + { + "epoch": 3.78448486328125e-05, + "step": 24802, + "training_step_time": 0.11868858337402344 + }, + { + "epoch": 3.784637451171875e-05, + "model_forward_time": 0.02573108673095703, + "step": 24803 + }, + { + "epoch": 3.784637451171875e-05, + "step": 24803, + "training_step_time": 0.10431504249572754 + }, + { + "epoch": 3.7847900390625e-05, + "model_forward_time": 0.025104284286499023, + "step": 24804 + }, + { + "epoch": 3.7847900390625e-05, + "step": 24804, + "training_step_time": 0.10731983184814453 + }, + { + "epoch": 3.784942626953125e-05, + "model_forward_time": 0.025099992752075195, + "step": 24805 + }, + { + "epoch": 3.784942626953125e-05, + "step": 24805, + "training_step_time": 0.10547733306884766 + }, + { + "epoch": 3.78509521484375e-05, + "model_forward_time": 0.02505350112915039, + "step": 24806 + }, + { + "epoch": 3.78509521484375e-05, + "step": 24806, + "training_step_time": 0.10595989227294922 + }, + { + "epoch": 3.785247802734375e-05, + "model_forward_time": 0.0246126651763916, + "step": 24807 + }, + { + "epoch": 3.785247802734375e-05, + "step": 24807, + "training_step_time": 0.10457277297973633 + }, + { + "epoch": 3.785400390625e-05, + "model_forward_time": 0.02515435218811035, + "step": 24808 + }, + { + "epoch": 3.785400390625e-05, + "step": 24808, + "training_step_time": 0.12071609497070312 + }, + { + "epoch": 3.785552978515625e-05, + "model_forward_time": 0.025447368621826172, + "step": 24809 + }, + { + "epoch": 3.785552978515625e-05, + "step": 24809, + "training_step_time": 0.14785480499267578 + }, + { + "epoch": 3.78570556640625e-05, + "grad_norm": 0.1033230870962143, + "learning_rate": 7.96172046062032e-06, + "loss": 0.0049, + "step": 24810 + }, + { + "epoch": 3.78570556640625e-05, + "model_forward_time": 0.024608850479125977, + "step": 24810 + }, + { + "epoch": 3.78570556640625e-05, + "step": 24810, + "training_step_time": 0.17712163925170898 + }, + { + "epoch": 3.785858154296875e-05, + "model_forward_time": 0.025165557861328125, + "step": 24811 + }, + { + "epoch": 3.785858154296875e-05, + "step": 24811, + "training_step_time": 0.17335987091064453 + }, + { + "epoch": 3.7860107421875e-05, + "model_forward_time": 0.024616241455078125, + "step": 24812 + }, + { + "epoch": 3.7860107421875e-05, + "step": 24812, + "training_step_time": 0.19646239280700684 + }, + { + "epoch": 3.786163330078125e-05, + "model_forward_time": 0.02452993392944336, + "step": 24813 + }, + { + "epoch": 3.786163330078125e-05, + "step": 24813, + "training_step_time": 0.13982915878295898 + }, + { + "epoch": 3.78631591796875e-05, + "model_forward_time": 0.025112152099609375, + "step": 24814 + }, + { + "epoch": 3.78631591796875e-05, + "step": 24814, + "training_step_time": 0.2377030849456787 + }, + { + "epoch": 3.786468505859375e-05, + "model_forward_time": 0.02540302276611328, + "step": 24815 + }, + { + "epoch": 3.786468505859375e-05, + "step": 24815, + "training_step_time": 0.10286259651184082 + }, + { + "epoch": 3.78662109375e-05, + "model_forward_time": 0.024864673614501953, + "step": 24816 + }, + { + "epoch": 3.78662109375e-05, + "step": 24816, + "training_step_time": 0.10845470428466797 + }, + { + "epoch": 3.786773681640625e-05, + "model_forward_time": 0.025322675704956055, + "step": 24817 + }, + { + "epoch": 3.786773681640625e-05, + "step": 24817, + "training_step_time": 0.10558915138244629 + }, + { + "epoch": 3.78692626953125e-05, + "model_forward_time": 0.025572776794433594, + "step": 24818 + }, + { + "epoch": 3.78692626953125e-05, + "step": 24818, + "training_step_time": 0.10982322692871094 + }, + { + "epoch": 3.787078857421875e-05, + "model_forward_time": 0.025274038314819336, + "step": 24819 + }, + { + "epoch": 3.787078857421875e-05, + "step": 24819, + "training_step_time": 0.11100578308105469 + }, + { + "epoch": 3.7872314453125e-05, + "grad_norm": 0.10404244065284729, + "learning_rate": 7.931906425855268e-06, + "loss": 0.0054, + "step": 24820 + }, + { + "epoch": 3.7872314453125e-05, + "model_forward_time": 0.02564859390258789, + "step": 24820 + }, + { + "epoch": 3.7872314453125e-05, + "step": 24820, + "training_step_time": 0.1286334991455078 + }, + { + "epoch": 3.787384033203125e-05, + "model_forward_time": 0.025827407836914062, + "step": 24821 + }, + { + "epoch": 3.787384033203125e-05, + "step": 24821, + "training_step_time": 0.11369776725769043 + }, + { + "epoch": 3.78753662109375e-05, + "model_forward_time": 0.024773597717285156, + "step": 24822 + }, + { + "epoch": 3.78753662109375e-05, + "step": 24822, + "training_step_time": 0.15262794494628906 + }, + { + "epoch": 3.787689208984375e-05, + "model_forward_time": 0.02492666244506836, + "step": 24823 + }, + { + "epoch": 3.787689208984375e-05, + "step": 24823, + "training_step_time": 0.15572428703308105 + }, + { + "epoch": 3.787841796875e-05, + "model_forward_time": 0.024875640869140625, + "step": 24824 + }, + { + "epoch": 3.787841796875e-05, + "step": 24824, + "training_step_time": 0.1204080581665039 + }, + { + "epoch": 3.787994384765625e-05, + "model_forward_time": 0.024516582489013672, + "step": 24825 + }, + { + "epoch": 3.787994384765625e-05, + "step": 24825, + "training_step_time": 0.11985611915588379 + }, + { + "epoch": 3.78814697265625e-05, + "model_forward_time": 0.025489330291748047, + "step": 24826 + }, + { + "epoch": 3.78814697265625e-05, + "step": 24826, + "training_step_time": 0.19942951202392578 + }, + { + "epoch": 3.788299560546875e-05, + "model_forward_time": 0.024477005004882812, + "step": 24827 + }, + { + "epoch": 3.788299560546875e-05, + "step": 24827, + "training_step_time": 0.10553121566772461 + }, + { + "epoch": 3.7884521484375e-05, + "model_forward_time": 0.024403810501098633, + "step": 24828 + }, + { + "epoch": 3.7884521484375e-05, + "step": 24828, + "training_step_time": 0.10562300682067871 + }, + { + "epoch": 3.788604736328125e-05, + "model_forward_time": 0.025500059127807617, + "step": 24829 + }, + { + "epoch": 3.788604736328125e-05, + "step": 24829, + "training_step_time": 0.11027669906616211 + }, + { + "epoch": 3.78875732421875e-05, + "grad_norm": 0.07293904572725296, + "learning_rate": 7.902143507790661e-06, + "loss": 0.0077, + "step": 24830 + }, + { + "epoch": 3.78875732421875e-05, + "model_forward_time": 0.02542424201965332, + "step": 24830 + }, + { + "epoch": 3.78875732421875e-05, + "step": 24830, + "training_step_time": 0.10589122772216797 + }, + { + "epoch": 3.788909912109375e-05, + "model_forward_time": 0.025682687759399414, + "step": 24831 + }, + { + "epoch": 3.788909912109375e-05, + "step": 24831, + "training_step_time": 0.10745906829833984 + }, + { + "epoch": 3.7890625e-05, + "model_forward_time": 0.025574207305908203, + "step": 24832 + }, + { + "epoch": 3.7890625e-05, + "step": 24832, + "training_step_time": 0.10569071769714355 + }, + { + "epoch": 3.789215087890625e-05, + "model_forward_time": 0.025380849838256836, + "step": 24833 + }, + { + "epoch": 3.789215087890625e-05, + "step": 24833, + "training_step_time": 0.10640430450439453 + }, + { + "epoch": 3.78936767578125e-05, + "model_forward_time": 0.02699446678161621, + "step": 24834 + }, + { + "epoch": 3.78936767578125e-05, + "step": 24834, + "training_step_time": 0.10876202583312988 + }, + { + "epoch": 3.789520263671875e-05, + "model_forward_time": 0.0253145694732666, + "step": 24835 + }, + { + "epoch": 3.789520263671875e-05, + "step": 24835, + "training_step_time": 0.10648202896118164 + }, + { + "epoch": 3.7896728515625e-05, + "model_forward_time": 0.025691986083984375, + "step": 24836 + }, + { + "epoch": 3.7896728515625e-05, + "step": 24836, + "training_step_time": 0.10921978950500488 + }, + { + "epoch": 3.789825439453125e-05, + "model_forward_time": 0.025574922561645508, + "step": 24837 + }, + { + "epoch": 3.789825439453125e-05, + "step": 24837, + "training_step_time": 0.10442423820495605 + }, + { + "epoch": 3.78997802734375e-05, + "model_forward_time": 0.02602386474609375, + "step": 24838 + }, + { + "epoch": 3.78997802734375e-05, + "step": 24838, + "training_step_time": 0.10700273513793945 + }, + { + "epoch": 3.790130615234375e-05, + "model_forward_time": 0.025583982467651367, + "step": 24839 + }, + { + "epoch": 3.790130615234375e-05, + "step": 24839, + "training_step_time": 0.1078481674194336 + }, + { + "epoch": 3.790283203125e-05, + "grad_norm": 0.2304941564798355, + "learning_rate": 7.872431742591268e-06, + "loss": 0.0055, + "step": 24840 + }, + { + "epoch": 3.790283203125e-05, + "model_forward_time": 0.025130748748779297, + "step": 24840 + }, + { + "epoch": 3.790283203125e-05, + "step": 24840, + "training_step_time": 0.10458827018737793 + }, + { + "epoch": 3.790435791015625e-05, + "model_forward_time": 0.025232791900634766, + "step": 24841 + }, + { + "epoch": 3.790435791015625e-05, + "step": 24841, + "training_step_time": 0.16073036193847656 + }, + { + "epoch": 3.79058837890625e-05, + "model_forward_time": 0.02534031867980957, + "step": 24842 + }, + { + "epoch": 3.79058837890625e-05, + "step": 24842, + "training_step_time": 0.12326836585998535 + }, + { + "epoch": 3.790740966796875e-05, + "model_forward_time": 0.024576425552368164, + "step": 24843 + }, + { + "epoch": 3.790740966796875e-05, + "step": 24843, + "training_step_time": 0.10553526878356934 + }, + { + "epoch": 3.7908935546875e-05, + "model_forward_time": 0.025333642959594727, + "step": 24844 + }, + { + "epoch": 3.7908935546875e-05, + "step": 24844, + "training_step_time": 0.1187744140625 + }, + { + "epoch": 3.791046142578125e-05, + "model_forward_time": 0.025060653686523438, + "step": 24845 + }, + { + "epoch": 3.791046142578125e-05, + "step": 24845, + "training_step_time": 0.17016363143920898 + }, + { + "epoch": 3.79119873046875e-05, + "model_forward_time": 0.02486729621887207, + "step": 24846 + }, + { + "epoch": 3.79119873046875e-05, + "step": 24846, + "training_step_time": 0.12769484519958496 + }, + { + "epoch": 3.791351318359375e-05, + "model_forward_time": 0.024884939193725586, + "step": 24847 + }, + { + "epoch": 3.791351318359375e-05, + "step": 24847, + "training_step_time": 0.10581326484680176 + }, + { + "epoch": 3.79150390625e-05, + "model_forward_time": 0.025372743606567383, + "step": 24848 + }, + { + "epoch": 3.79150390625e-05, + "step": 24848, + "training_step_time": 0.10676956176757812 + }, + { + "epoch": 3.791656494140625e-05, + "model_forward_time": 0.025522708892822266, + "step": 24849 + }, + { + "epoch": 3.791656494140625e-05, + "step": 24849, + "training_step_time": 0.10846567153930664 + }, + { + "epoch": 3.79180908203125e-05, + "grad_norm": 0.10245202481746674, + "learning_rate": 7.842771166359681e-06, + "loss": 0.0037, + "step": 24850 + }, + { + "epoch": 3.79180908203125e-05, + "model_forward_time": 0.025577783584594727, + "step": 24850 + }, + { + "epoch": 3.79180908203125e-05, + "step": 24850, + "training_step_time": 0.1047210693359375 + }, + { + "epoch": 3.791961669921875e-05, + "model_forward_time": 0.025493144989013672, + "step": 24851 + }, + { + "epoch": 3.791961669921875e-05, + "step": 24851, + "training_step_time": 0.10752224922180176 + }, + { + "epoch": 3.7921142578125e-05, + "model_forward_time": 0.025140762329101562, + "step": 24852 + }, + { + "epoch": 3.7921142578125e-05, + "step": 24852, + "training_step_time": 0.11283659934997559 + }, + { + "epoch": 3.792266845703125e-05, + "model_forward_time": 0.025024890899658203, + "step": 24853 + }, + { + "epoch": 3.792266845703125e-05, + "step": 24853, + "training_step_time": 0.15247511863708496 + }, + { + "epoch": 3.79241943359375e-05, + "model_forward_time": 0.025197505950927734, + "step": 24854 + }, + { + "epoch": 3.79241943359375e-05, + "step": 24854, + "training_step_time": 0.13590192794799805 + }, + { + "epoch": 3.792572021484375e-05, + "model_forward_time": 0.02521491050720215, + "step": 24855 + }, + { + "epoch": 3.792572021484375e-05, + "step": 24855, + "training_step_time": 0.15951108932495117 + }, + { + "epoch": 3.792724609375e-05, + "model_forward_time": 0.02447032928466797, + "step": 24856 + }, + { + "epoch": 3.792724609375e-05, + "step": 24856, + "training_step_time": 0.1755228042602539 + }, + { + "epoch": 3.792877197265625e-05, + "model_forward_time": 0.024471282958984375, + "step": 24857 + }, + { + "epoch": 3.792877197265625e-05, + "step": 24857, + "training_step_time": 0.17865204811096191 + }, + { + "epoch": 3.79302978515625e-05, + "model_forward_time": 0.025138378143310547, + "step": 24858 + }, + { + "epoch": 3.79302978515625e-05, + "step": 24858, + "training_step_time": 0.13460373878479004 + }, + { + "epoch": 3.793182373046875e-05, + "model_forward_time": 0.023801088333129883, + "step": 24859 + }, + { + "epoch": 3.793182373046875e-05, + "step": 24859, + "training_step_time": 0.10778236389160156 + }, + { + "epoch": 3.7933349609375e-05, + "grad_norm": 0.06984902173280716, + "learning_rate": 7.813161815136294e-06, + "loss": 0.0047, + "step": 24860 + }, + { + "epoch": 3.7933349609375e-05, + "model_forward_time": 0.0252077579498291, + "step": 24860 + }, + { + "epoch": 3.7933349609375e-05, + "step": 24860, + "training_step_time": 0.11913013458251953 + }, + { + "epoch": 3.793487548828125e-05, + "model_forward_time": 0.025884628295898438, + "step": 24861 + }, + { + "epoch": 3.793487548828125e-05, + "step": 24861, + "training_step_time": 0.11512041091918945 + }, + { + "epoch": 3.79364013671875e-05, + "model_forward_time": 0.025649070739746094, + "step": 24862 + }, + { + "epoch": 3.79364013671875e-05, + "step": 24862, + "training_step_time": 0.12096667289733887 + }, + { + "epoch": 3.793792724609375e-05, + "model_forward_time": 0.025758981704711914, + "step": 24863 + }, + { + "epoch": 3.793792724609375e-05, + "step": 24863, + "training_step_time": 0.14333343505859375 + }, + { + "epoch": 3.7939453125e-05, + "model_forward_time": 0.025099515914916992, + "step": 24864 + }, + { + "epoch": 3.7939453125e-05, + "step": 24864, + "training_step_time": 0.12896156311035156 + }, + { + "epoch": 3.794097900390625e-05, + "model_forward_time": 0.02467060089111328, + "step": 24865 + }, + { + "epoch": 3.794097900390625e-05, + "step": 24865, + "training_step_time": 0.1246025562286377 + }, + { + "epoch": 3.79425048828125e-05, + "model_forward_time": 0.025817394256591797, + "step": 24866 + }, + { + "epoch": 3.79425048828125e-05, + "step": 24866, + "training_step_time": 0.10906815528869629 + }, + { + "epoch": 3.794403076171875e-05, + "model_forward_time": 0.024901390075683594, + "step": 24867 + }, + { + "epoch": 3.794403076171875e-05, + "step": 24867, + "training_step_time": 0.15019822120666504 + }, + { + "epoch": 3.7945556640625e-05, + "model_forward_time": 0.025325298309326172, + "step": 24868 + }, + { + "epoch": 3.7945556640625e-05, + "step": 24868, + "training_step_time": 0.1845095157623291 + }, + { + "epoch": 3.794708251953125e-05, + "model_forward_time": 0.02500748634338379, + "step": 24869 + }, + { + "epoch": 3.794708251953125e-05, + "step": 24869, + "training_step_time": 0.11902189254760742 + }, + { + "epoch": 3.79486083984375e-05, + "grad_norm": 0.3862765431404114, + "learning_rate": 7.783603724899257e-06, + "loss": 0.0124, + "step": 24870 + }, + { + "epoch": 3.79486083984375e-05, + "model_forward_time": 0.02477431297302246, + "step": 24870 + }, + { + "epoch": 3.79486083984375e-05, + "step": 24870, + "training_step_time": 0.11454105377197266 + }, + { + "epoch": 3.795013427734375e-05, + "model_forward_time": 0.025516510009765625, + "step": 24871 + }, + { + "epoch": 3.795013427734375e-05, + "step": 24871, + "training_step_time": 0.11250638961791992 + }, + { + "epoch": 3.795166015625e-05, + "model_forward_time": 0.02574324607849121, + "step": 24872 + }, + { + "epoch": 3.795166015625e-05, + "step": 24872, + "training_step_time": 0.121307373046875 + }, + { + "epoch": 3.795318603515625e-05, + "model_forward_time": 0.027637243270874023, + "step": 24873 + }, + { + "epoch": 3.795318603515625e-05, + "step": 24873, + "training_step_time": 0.110382080078125 + }, + { + "epoch": 3.79547119140625e-05, + "model_forward_time": 0.02527165412902832, + "step": 24874 + }, + { + "epoch": 3.79547119140625e-05, + "step": 24874, + "training_step_time": 0.10771608352661133 + }, + { + "epoch": 3.795623779296875e-05, + "model_forward_time": 0.025479793548583984, + "step": 24875 + }, + { + "epoch": 3.795623779296875e-05, + "step": 24875, + "training_step_time": 0.11657094955444336 + }, + { + "epoch": 3.7957763671875e-05, + "model_forward_time": 0.025415897369384766, + "step": 24876 + }, + { + "epoch": 3.7957763671875e-05, + "step": 24876, + "training_step_time": 0.10892415046691895 + }, + { + "epoch": 3.795928955078125e-05, + "model_forward_time": 0.0256345272064209, + "step": 24877 + }, + { + "epoch": 3.795928955078125e-05, + "step": 24877, + "training_step_time": 0.10950374603271484 + }, + { + "epoch": 3.79608154296875e-05, + "model_forward_time": 0.02517533302307129, + "step": 24878 + }, + { + "epoch": 3.79608154296875e-05, + "step": 24878, + "training_step_time": 0.10645461082458496 + }, + { + "epoch": 3.796234130859375e-05, + "model_forward_time": 0.025511741638183594, + "step": 24879 + }, + { + "epoch": 3.796234130859375e-05, + "step": 24879, + "training_step_time": 0.10791182518005371 + }, + { + "epoch": 3.79638671875e-05, + "grad_norm": 0.10975080728530884, + "learning_rate": 7.754096931564431e-06, + "loss": 0.0035, + "step": 24880 + }, + { + "epoch": 3.79638671875e-05, + "model_forward_time": 0.025007247924804688, + "step": 24880 + }, + { + "epoch": 3.79638671875e-05, + "step": 24880, + "training_step_time": 0.10908365249633789 + }, + { + "epoch": 3.796539306640625e-05, + "model_forward_time": 0.025380611419677734, + "step": 24881 + }, + { + "epoch": 3.796539306640625e-05, + "step": 24881, + "training_step_time": 0.10821056365966797 + }, + { + "epoch": 3.79669189453125e-05, + "model_forward_time": 0.025385379791259766, + "step": 24882 + }, + { + "epoch": 3.79669189453125e-05, + "step": 24882, + "training_step_time": 0.10691094398498535 + }, + { + "epoch": 3.796844482421875e-05, + "model_forward_time": 0.025415658950805664, + "step": 24883 + }, + { + "epoch": 3.796844482421875e-05, + "step": 24883, + "training_step_time": 0.10950255393981934 + }, + { + "epoch": 3.7969970703125e-05, + "model_forward_time": 0.02545762062072754, + "step": 24884 + }, + { + "epoch": 3.7969970703125e-05, + "step": 24884, + "training_step_time": 0.10799217224121094 + }, + { + "epoch": 3.797149658203125e-05, + "model_forward_time": 0.025293588638305664, + "step": 24885 + }, + { + "epoch": 3.797149658203125e-05, + "step": 24885, + "training_step_time": 0.107025146484375 + }, + { + "epoch": 3.79730224609375e-05, + "model_forward_time": 0.02547144889831543, + "step": 24886 + }, + { + "epoch": 3.79730224609375e-05, + "step": 24886, + "training_step_time": 0.17642974853515625 + }, + { + "epoch": 3.797454833984375e-05, + "model_forward_time": 0.025175809860229492, + "step": 24887 + }, + { + "epoch": 3.797454833984375e-05, + "step": 24887, + "training_step_time": 0.12482285499572754 + }, + { + "epoch": 3.797607421875e-05, + "model_forward_time": 0.025087833404541016, + "step": 24888 + }, + { + "epoch": 3.797607421875e-05, + "step": 24888, + "training_step_time": 0.1306302547454834 + }, + { + "epoch": 3.797760009765625e-05, + "model_forward_time": 0.02541947364807129, + "step": 24889 + }, + { + "epoch": 3.797760009765625e-05, + "step": 24889, + "training_step_time": 0.10664749145507812 + }, + { + "epoch": 3.79791259765625e-05, + "grad_norm": 0.14474695920944214, + "learning_rate": 7.724641470985378e-06, + "loss": 0.0063, + "step": 24890 + }, + { + "epoch": 3.79791259765625e-05, + "model_forward_time": 0.02539348602294922, + "step": 24890 + }, + { + "epoch": 3.79791259765625e-05, + "step": 24890, + "training_step_time": 0.15142178535461426 + }, + { + "epoch": 3.798065185546875e-05, + "model_forward_time": 0.024908781051635742, + "step": 24891 + }, + { + "epoch": 3.798065185546875e-05, + "step": 24891, + "training_step_time": 0.12293601036071777 + }, + { + "epoch": 3.7982177734375e-05, + "model_forward_time": 0.024709463119506836, + "step": 24892 + }, + { + "epoch": 3.7982177734375e-05, + "step": 24892, + "training_step_time": 0.11086010932922363 + }, + { + "epoch": 3.798370361328125e-05, + "model_forward_time": 0.025502920150756836, + "step": 24893 + }, + { + "epoch": 3.798370361328125e-05, + "step": 24893, + "training_step_time": 0.10486412048339844 + }, + { + "epoch": 3.79852294921875e-05, + "model_forward_time": 0.025340557098388672, + "step": 24894 + }, + { + "epoch": 3.79852294921875e-05, + "step": 24894, + "training_step_time": 0.10611915588378906 + }, + { + "epoch": 3.798675537109375e-05, + "model_forward_time": 0.025301694869995117, + "step": 24895 + }, + { + "epoch": 3.798675537109375e-05, + "step": 24895, + "training_step_time": 0.10987544059753418 + }, + { + "epoch": 3.798828125e-05, + "model_forward_time": 0.02521967887878418, + "step": 24896 + }, + { + "epoch": 3.798828125e-05, + "step": 24896, + "training_step_time": 0.10521864891052246 + }, + { + "epoch": 3.798980712890625e-05, + "model_forward_time": 0.025878190994262695, + "step": 24897 + }, + { + "epoch": 3.798980712890625e-05, + "step": 24897, + "training_step_time": 0.11138153076171875 + }, + { + "epoch": 3.79913330078125e-05, + "model_forward_time": 0.0255584716796875, + "step": 24898 + }, + { + "epoch": 3.79913330078125e-05, + "step": 24898, + "training_step_time": 0.14584064483642578 + }, + { + "epoch": 3.799285888671875e-05, + "model_forward_time": 0.025267362594604492, + "step": 24899 + }, + { + "epoch": 3.799285888671875e-05, + "step": 24899, + "training_step_time": 0.14218854904174805 + }, + { + "epoch": 3.7994384765625e-05, + "grad_norm": 0.14371325075626373, + "learning_rate": 7.695237378953223e-06, + "loss": 0.0077, + "step": 24900 + }, + { + "epoch": 3.7994384765625e-05, + "model_forward_time": 0.02547931671142578, + "step": 24900 + }, + { + "epoch": 3.7994384765625e-05, + "step": 24900, + "training_step_time": 0.14834284782409668 + }, + { + "epoch": 3.799591064453125e-05, + "model_forward_time": 0.024954795837402344, + "step": 24901 + }, + { + "epoch": 3.799591064453125e-05, + "step": 24901, + "training_step_time": 0.20576214790344238 + }, + { + "epoch": 3.79974365234375e-05, + "model_forward_time": 0.024922847747802734, + "step": 24902 + }, + { + "epoch": 3.79974365234375e-05, + "step": 24902, + "training_step_time": 0.2338714599609375 + }, + { + "epoch": 3.799896240234375e-05, + "model_forward_time": 0.02437114715576172, + "step": 24903 + }, + { + "epoch": 3.799896240234375e-05, + "step": 24903, + "training_step_time": 0.14308381080627441 + }, + { + "epoch": 3.800048828125e-05, + "model_forward_time": 0.024288177490234375, + "step": 24904 + }, + { + "epoch": 3.800048828125e-05, + "step": 24904, + "training_step_time": 0.16642093658447266 + }, + { + "epoch": 3.800201416015625e-05, + "model_forward_time": 0.02491283416748047, + "step": 24905 + }, + { + "epoch": 3.800201416015625e-05, + "step": 24905, + "training_step_time": 0.13856101036071777 + }, + { + "epoch": 3.80035400390625e-05, + "model_forward_time": 0.024410724639892578, + "step": 24906 + }, + { + "epoch": 3.80035400390625e-05, + "step": 24906, + "training_step_time": 0.10593867301940918 + }, + { + "epoch": 3.800506591796875e-05, + "model_forward_time": 0.025135278701782227, + "step": 24907 + }, + { + "epoch": 3.800506591796875e-05, + "step": 24907, + "training_step_time": 0.10287213325500488 + }, + { + "epoch": 3.8006591796875e-05, + "model_forward_time": 0.025495290756225586, + "step": 24908 + }, + { + "epoch": 3.8006591796875e-05, + "step": 24908, + "training_step_time": 0.10518527030944824 + }, + { + "epoch": 3.800811767578125e-05, + "model_forward_time": 0.025587797164916992, + "step": 24909 + }, + { + "epoch": 3.800811767578125e-05, + "step": 24909, + "training_step_time": 0.10477018356323242 + }, + { + "epoch": 3.80096435546875e-05, + "grad_norm": 0.06774347275495529, + "learning_rate": 7.66588469119675e-06, + "loss": 0.0034, + "step": 24910 + }, + { + "epoch": 3.80096435546875e-05, + "model_forward_time": 0.025551795959472656, + "step": 24910 + }, + { + "epoch": 3.80096435546875e-05, + "step": 24910, + "training_step_time": 0.10431957244873047 + }, + { + "epoch": 3.801116943359375e-05, + "model_forward_time": 0.025641918182373047, + "step": 24911 + }, + { + "epoch": 3.801116943359375e-05, + "step": 24911, + "training_step_time": 0.10320639610290527 + }, + { + "epoch": 3.80126953125e-05, + "model_forward_time": 0.025409221649169922, + "step": 24912 + }, + { + "epoch": 3.80126953125e-05, + "step": 24912, + "training_step_time": 0.1884145736694336 + }, + { + "epoch": 3.801422119140625e-05, + "model_forward_time": 0.025036334991455078, + "step": 24913 + }, + { + "epoch": 3.801422119140625e-05, + "step": 24913, + "training_step_time": 0.10158181190490723 + }, + { + "epoch": 3.80157470703125e-05, + "model_forward_time": 0.024612903594970703, + "step": 24914 + }, + { + "epoch": 3.80157470703125e-05, + "step": 24914, + "training_step_time": 0.10265064239501953 + }, + { + "epoch": 3.801727294921875e-05, + "model_forward_time": 0.02495288848876953, + "step": 24915 + }, + { + "epoch": 3.801727294921875e-05, + "step": 24915, + "training_step_time": 0.1779017448425293 + }, + { + "epoch": 3.8018798828125e-05, + "model_forward_time": 0.025072097778320312, + "step": 24916 + }, + { + "epoch": 3.8018798828125e-05, + "step": 24916, + "training_step_time": 0.10364437103271484 + }, + { + "epoch": 3.802032470703125e-05, + "model_forward_time": 0.02512192726135254, + "step": 24917 + }, + { + "epoch": 3.802032470703125e-05, + "step": 24917, + "training_step_time": 0.102691650390625 + }, + { + "epoch": 3.80218505859375e-05, + "model_forward_time": 0.024658203125, + "step": 24918 + }, + { + "epoch": 3.80218505859375e-05, + "step": 24918, + "training_step_time": 0.10444974899291992 + }, + { + "epoch": 3.802337646484375e-05, + "model_forward_time": 0.024409055709838867, + "step": 24919 + }, + { + "epoch": 3.802337646484375e-05, + "step": 24919, + "training_step_time": 0.10645794868469238 + }, + { + "epoch": 3.802490234375e-05, + "grad_norm": 0.06481382250785828, + "learning_rate": 7.636583443382223e-06, + "loss": 0.0039, + "step": 24920 + }, + { + "epoch": 3.802490234375e-05, + "model_forward_time": 0.025545358657836914, + "step": 24920 + }, + { + "epoch": 3.802490234375e-05, + "step": 24920, + "training_step_time": 0.12274575233459473 + }, + { + "epoch": 3.802642822265625e-05, + "model_forward_time": 0.02527475357055664, + "step": 24921 + }, + { + "epoch": 3.802642822265625e-05, + "step": 24921, + "training_step_time": 0.16277265548706055 + }, + { + "epoch": 3.80279541015625e-05, + "model_forward_time": 0.024474620819091797, + "step": 24922 + }, + { + "epoch": 3.80279541015625e-05, + "step": 24922, + "training_step_time": 0.1651322841644287 + }, + { + "epoch": 3.802947998046875e-05, + "model_forward_time": 0.02408003807067871, + "step": 24923 + }, + { + "epoch": 3.802947998046875e-05, + "step": 24923, + "training_step_time": 0.15870404243469238 + }, + { + "epoch": 3.8031005859375e-05, + "model_forward_time": 0.024092435836791992, + "step": 24924 + }, + { + "epoch": 3.8031005859375e-05, + "step": 24924, + "training_step_time": 0.14549851417541504 + }, + { + "epoch": 3.803253173828125e-05, + "model_forward_time": 0.024658679962158203, + "step": 24925 + }, + { + "epoch": 3.803253173828125e-05, + "step": 24925, + "training_step_time": 0.14201974868774414 + }, + { + "epoch": 3.80340576171875e-05, + "model_forward_time": 0.024497270584106445, + "step": 24926 + }, + { + "epoch": 3.80340576171875e-05, + "step": 24926, + "training_step_time": 0.12845873832702637 + }, + { + "epoch": 3.803558349609375e-05, + "model_forward_time": 0.024453401565551758, + "step": 24927 + }, + { + "epoch": 3.803558349609375e-05, + "step": 24927, + "training_step_time": 0.12379312515258789 + }, + { + "epoch": 3.8037109375e-05, + "model_forward_time": 0.02467799186706543, + "step": 24928 + }, + { + "epoch": 3.8037109375e-05, + "step": 24928, + "training_step_time": 0.11667633056640625 + }, + { + "epoch": 3.803863525390625e-05, + "model_forward_time": 0.02508091926574707, + "step": 24929 + }, + { + "epoch": 3.803863525390625e-05, + "step": 24929, + "training_step_time": 0.17278504371643066 + }, + { + "epoch": 3.80401611328125e-05, + "grad_norm": 0.06115950644016266, + "learning_rate": 7.607333671113409e-06, + "loss": 0.0036, + "step": 24930 + }, + { + "epoch": 3.80401611328125e-05, + "model_forward_time": 0.02344369888305664, + "step": 24930 + }, + { + "epoch": 3.80401611328125e-05, + "step": 24930, + "training_step_time": 0.11387872695922852 + }, + { + "epoch": 3.804168701171875e-05, + "model_forward_time": 0.02404499053955078, + "step": 24931 + }, + { + "epoch": 3.804168701171875e-05, + "step": 24931, + "training_step_time": 0.13034701347351074 + }, + { + "epoch": 3.8043212890625e-05, + "model_forward_time": 0.025043249130249023, + "step": 24932 + }, + { + "epoch": 3.8043212890625e-05, + "step": 24932, + "training_step_time": 0.1595907211303711 + }, + { + "epoch": 3.804473876953125e-05, + "model_forward_time": 0.024644136428833008, + "step": 24933 + }, + { + "epoch": 3.804473876953125e-05, + "step": 24933, + "training_step_time": 0.21171975135803223 + }, + { + "epoch": 3.80462646484375e-05, + "model_forward_time": 0.02386307716369629, + "step": 24934 + }, + { + "epoch": 3.80462646484375e-05, + "step": 24934, + "training_step_time": 0.128248929977417 + }, + { + "epoch": 3.804779052734375e-05, + "model_forward_time": 0.024129629135131836, + "step": 24935 + }, + { + "epoch": 3.804779052734375e-05, + "step": 24935, + "training_step_time": 0.10307908058166504 + }, + { + "epoch": 3.804931640625e-05, + "model_forward_time": 0.025177001953125, + "step": 24936 + }, + { + "epoch": 3.804931640625e-05, + "step": 24936, + "training_step_time": 0.10569930076599121 + }, + { + "epoch": 3.805084228515625e-05, + "model_forward_time": 0.025166034698486328, + "step": 24937 + }, + { + "epoch": 3.805084228515625e-05, + "step": 24937, + "training_step_time": 0.10778021812438965 + }, + { + "epoch": 3.80523681640625e-05, + "model_forward_time": 0.02506113052368164, + "step": 24938 + }, + { + "epoch": 3.80523681640625e-05, + "step": 24938, + "training_step_time": 0.10543990135192871 + }, + { + "epoch": 3.805389404296875e-05, + "model_forward_time": 0.025112152099609375, + "step": 24939 + }, + { + "epoch": 3.805389404296875e-05, + "step": 24939, + "training_step_time": 0.10551738739013672 + }, + { + "epoch": 3.8055419921875e-05, + "grad_norm": 0.08124295622110367, + "learning_rate": 7.578135409931558e-06, + "loss": 0.0064, + "step": 24940 + }, + { + "epoch": 3.8055419921875e-05, + "model_forward_time": 0.02514934539794922, + "step": 24940 + }, + { + "epoch": 3.8055419921875e-05, + "step": 24940, + "training_step_time": 0.18249154090881348 + }, + { + "epoch": 3.805694580078125e-05, + "model_forward_time": 0.024329662322998047, + "step": 24941 + }, + { + "epoch": 3.805694580078125e-05, + "step": 24941, + "training_step_time": 0.12040019035339355 + }, + { + "epoch": 3.80584716796875e-05, + "model_forward_time": 0.02434563636779785, + "step": 24942 + }, + { + "epoch": 3.80584716796875e-05, + "step": 24942, + "training_step_time": 0.10678720474243164 + }, + { + "epoch": 3.805999755859375e-05, + "model_forward_time": 0.025267362594604492, + "step": 24943 + }, + { + "epoch": 3.805999755859375e-05, + "step": 24943, + "training_step_time": 0.19119977951049805 + }, + { + "epoch": 3.80615234375e-05, + "model_forward_time": 0.02421402931213379, + "step": 24944 + }, + { + "epoch": 3.80615234375e-05, + "step": 24944, + "training_step_time": 0.14198565483093262 + }, + { + "epoch": 3.806304931640625e-05, + "model_forward_time": 0.024152517318725586, + "step": 24945 + }, + { + "epoch": 3.806304931640625e-05, + "step": 24945, + "training_step_time": 0.2109529972076416 + }, + { + "epoch": 3.80645751953125e-05, + "model_forward_time": 0.024147987365722656, + "step": 24946 + }, + { + "epoch": 3.80645751953125e-05, + "step": 24946, + "training_step_time": 0.12692499160766602 + }, + { + "epoch": 3.806610107421875e-05, + "model_forward_time": 0.02456974983215332, + "step": 24947 + }, + { + "epoch": 3.806610107421875e-05, + "step": 24947, + "training_step_time": 0.11321187019348145 + }, + { + "epoch": 3.8067626953125e-05, + "model_forward_time": 0.02545475959777832, + "step": 24948 + }, + { + "epoch": 3.8067626953125e-05, + "step": 24948, + "training_step_time": 0.11681842803955078 + }, + { + "epoch": 3.806915283203125e-05, + "model_forward_time": 0.02518749237060547, + "step": 24949 + }, + { + "epoch": 3.806915283203125e-05, + "step": 24949, + "training_step_time": 0.10446286201477051 + }, + { + "epoch": 3.80706787109375e-05, + "grad_norm": 0.28254634141921997, + "learning_rate": 7.5489886953153125e-06, + "loss": 0.0096, + "step": 24950 + }, + { + "epoch": 3.80706787109375e-05, + "model_forward_time": 0.02513575553894043, + "step": 24950 + }, + { + "epoch": 3.80706787109375e-05, + "step": 24950, + "training_step_time": 0.1038975715637207 + }, + { + "epoch": 3.807220458984375e-05, + "model_forward_time": 0.025346040725708008, + "step": 24951 + }, + { + "epoch": 3.807220458984375e-05, + "step": 24951, + "training_step_time": 0.11147212982177734 + }, + { + "epoch": 3.807373046875e-05, + "model_forward_time": 0.024932384490966797, + "step": 24952 + }, + { + "epoch": 3.807373046875e-05, + "step": 24952, + "training_step_time": 0.11800408363342285 + }, + { + "epoch": 3.807525634765625e-05, + "model_forward_time": 0.025098323822021484, + "step": 24953 + }, + { + "epoch": 3.807525634765625e-05, + "step": 24953, + "training_step_time": 0.10401368141174316 + }, + { + "epoch": 3.80767822265625e-05, + "model_forward_time": 0.024615049362182617, + "step": 24954 + }, + { + "epoch": 3.80767822265625e-05, + "step": 24954, + "training_step_time": 0.15782952308654785 + }, + { + "epoch": 3.807830810546875e-05, + "model_forward_time": 0.024782896041870117, + "step": 24955 + }, + { + "epoch": 3.807830810546875e-05, + "step": 24955, + "training_step_time": 0.16702771186828613 + }, + { + "epoch": 3.8079833984375e-05, + "model_forward_time": 0.024484872817993164, + "step": 24956 + }, + { + "epoch": 3.8079833984375e-05, + "step": 24956, + "training_step_time": 0.10866475105285645 + }, + { + "epoch": 3.808135986328125e-05, + "model_forward_time": 0.02648019790649414, + "step": 24957 + }, + { + "epoch": 3.808135986328125e-05, + "step": 24957, + "training_step_time": 0.1665642261505127 + }, + { + "epoch": 3.80828857421875e-05, + "model_forward_time": 0.02428150177001953, + "step": 24958 + }, + { + "epoch": 3.80828857421875e-05, + "step": 24958, + "training_step_time": 0.16640949249267578 + }, + { + "epoch": 3.808441162109375e-05, + "model_forward_time": 0.02436995506286621, + "step": 24959 + }, + { + "epoch": 3.808441162109375e-05, + "step": 24959, + "training_step_time": 0.10438036918640137 + }, + { + "epoch": 3.80859375e-05, + "grad_norm": 0.4482848048210144, + "learning_rate": 7.519893562680663e-06, + "loss": 0.005, + "step": 24960 + }, + { + "epoch": 3.80859375e-05, + "model_forward_time": 0.024839401245117188, + "step": 24960 + }, + { + "epoch": 3.80859375e-05, + "step": 24960, + "training_step_time": 0.10334157943725586 + }, + { + "epoch": 3.808746337890625e-05, + "model_forward_time": 0.025321245193481445, + "step": 24961 + }, + { + "epoch": 3.808746337890625e-05, + "step": 24961, + "training_step_time": 0.11086511611938477 + }, + { + "epoch": 3.80889892578125e-05, + "model_forward_time": 0.025043725967407227, + "step": 24962 + }, + { + "epoch": 3.80889892578125e-05, + "step": 24962, + "training_step_time": 0.1073462963104248 + }, + { + "epoch": 3.809051513671875e-05, + "model_forward_time": 0.025263309478759766, + "step": 24963 + }, + { + "epoch": 3.809051513671875e-05, + "step": 24963, + "training_step_time": 0.10519695281982422 + }, + { + "epoch": 3.8092041015625e-05, + "model_forward_time": 0.026035308837890625, + "step": 24964 + }, + { + "epoch": 3.8092041015625e-05, + "step": 24964, + "training_step_time": 0.1066734790802002 + }, + { + "epoch": 3.809356689453125e-05, + "model_forward_time": 0.0254669189453125, + "step": 24965 + }, + { + "epoch": 3.809356689453125e-05, + "step": 24965, + "training_step_time": 0.1047217845916748 + }, + { + "epoch": 3.80950927734375e-05, + "model_forward_time": 0.025545358657836914, + "step": 24966 + }, + { + "epoch": 3.80950927734375e-05, + "step": 24966, + "training_step_time": 0.10612916946411133 + }, + { + "epoch": 3.809661865234375e-05, + "model_forward_time": 0.026499271392822266, + "step": 24967 + }, + { + "epoch": 3.809661865234375e-05, + "step": 24967, + "training_step_time": 0.10616946220397949 + }, + { + "epoch": 3.809814453125e-05, + "model_forward_time": 0.02509617805480957, + "step": 24968 + }, + { + "epoch": 3.809814453125e-05, + "step": 24968, + "training_step_time": 0.10578656196594238 + }, + { + "epoch": 3.809967041015625e-05, + "model_forward_time": 0.026165008544921875, + "step": 24969 + }, + { + "epoch": 3.809967041015625e-05, + "step": 24969, + "training_step_time": 0.10547590255737305 + }, + { + "epoch": 3.81011962890625e-05, + "grad_norm": 0.0776326060295105, + "learning_rate": 7.490850047380954e-06, + "loss": 0.0056, + "step": 24970 + }, + { + "epoch": 3.81011962890625e-05, + "model_forward_time": 0.025587797164916992, + "step": 24970 + }, + { + "epoch": 3.81011962890625e-05, + "step": 24970, + "training_step_time": 0.10755610466003418 + }, + { + "epoch": 3.810272216796875e-05, + "model_forward_time": 0.024991273880004883, + "step": 24971 + }, + { + "epoch": 3.810272216796875e-05, + "step": 24971, + "training_step_time": 0.10566306114196777 + }, + { + "epoch": 3.8104248046875e-05, + "model_forward_time": 0.025124549865722656, + "step": 24972 + }, + { + "epoch": 3.8104248046875e-05, + "step": 24972, + "training_step_time": 0.10666823387145996 + }, + { + "epoch": 3.810577392578125e-05, + "model_forward_time": 0.024968624114990234, + "step": 24973 + }, + { + "epoch": 3.810577392578125e-05, + "step": 24973, + "training_step_time": 0.10543656349182129 + }, + { + "epoch": 3.81072998046875e-05, + "model_forward_time": 0.02499532699584961, + "step": 24974 + }, + { + "epoch": 3.81072998046875e-05, + "step": 24974, + "training_step_time": 0.1470661163330078 + }, + { + "epoch": 3.810882568359375e-05, + "model_forward_time": 0.024958372116088867, + "step": 24975 + }, + { + "epoch": 3.810882568359375e-05, + "step": 24975, + "training_step_time": 0.1149139404296875 + }, + { + "epoch": 3.81103515625e-05, + "model_forward_time": 0.024538278579711914, + "step": 24976 + }, + { + "epoch": 3.81103515625e-05, + "step": 24976, + "training_step_time": 0.12495040893554688 + }, + { + "epoch": 3.811187744140625e-05, + "model_forward_time": 0.025423288345336914, + "step": 24977 + }, + { + "epoch": 3.811187744140625e-05, + "step": 24977, + "training_step_time": 0.14210271835327148 + }, + { + "epoch": 3.81134033203125e-05, + "model_forward_time": 0.025188922882080078, + "step": 24978 + }, + { + "epoch": 3.81134033203125e-05, + "step": 24978, + "training_step_time": 0.11399030685424805 + }, + { + "epoch": 3.811492919921875e-05, + "model_forward_time": 0.024793386459350586, + "step": 24979 + }, + { + "epoch": 3.811492919921875e-05, + "step": 24979, + "training_step_time": 0.12336969375610352 + }, + { + "epoch": 3.8116455078125e-05, + "grad_norm": 0.24275687336921692, + "learning_rate": 7.461858184706777e-06, + "loss": 0.004, + "step": 24980 + }, + { + "epoch": 3.8116455078125e-05, + "model_forward_time": 0.025673866271972656, + "step": 24980 + }, + { + "epoch": 3.8116455078125e-05, + "step": 24980, + "training_step_time": 0.11063647270202637 + }, + { + "epoch": 3.811798095703125e-05, + "model_forward_time": 0.025603532791137695, + "step": 24981 + }, + { + "epoch": 3.811798095703125e-05, + "step": 24981, + "training_step_time": 0.10349082946777344 + }, + { + "epoch": 3.81195068359375e-05, + "model_forward_time": 0.0253140926361084, + "step": 24982 + }, + { + "epoch": 3.81195068359375e-05, + "step": 24982, + "training_step_time": 0.10505938529968262 + }, + { + "epoch": 3.812103271484375e-05, + "model_forward_time": 0.02457404136657715, + "step": 24983 + }, + { + "epoch": 3.812103271484375e-05, + "step": 24983, + "training_step_time": 0.10200023651123047 + }, + { + "epoch": 3.812255859375e-05, + "model_forward_time": 0.025217533111572266, + "step": 24984 + }, + { + "epoch": 3.812255859375e-05, + "step": 24984, + "training_step_time": 0.1066584587097168 + }, + { + "epoch": 3.812408447265625e-05, + "model_forward_time": 0.025299787521362305, + "step": 24985 + }, + { + "epoch": 3.812408447265625e-05, + "step": 24985, + "training_step_time": 0.10512661933898926 + }, + { + "epoch": 3.81256103515625e-05, + "model_forward_time": 0.025567054748535156, + "step": 24986 + }, + { + "epoch": 3.81256103515625e-05, + "step": 24986, + "training_step_time": 0.10823464393615723 + }, + { + "epoch": 3.812713623046875e-05, + "model_forward_time": 0.02527451515197754, + "step": 24987 + }, + { + "epoch": 3.812713623046875e-05, + "step": 24987, + "training_step_time": 0.1365799903869629 + }, + { + "epoch": 3.8128662109375e-05, + "model_forward_time": 0.02547430992126465, + "step": 24988 + }, + { + "epoch": 3.8128662109375e-05, + "step": 24988, + "training_step_time": 0.15511178970336914 + }, + { + "epoch": 3.813018798828125e-05, + "model_forward_time": 0.02442479133605957, + "step": 24989 + }, + { + "epoch": 3.813018798828125e-05, + "step": 24989, + "training_step_time": 0.1694643497467041 + }, + { + "epoch": 3.81317138671875e-05, + "grad_norm": 0.14347414672374725, + "learning_rate": 7.432918009885997e-06, + "loss": 0.0141, + "step": 24990 + }, + { + "epoch": 3.81317138671875e-05, + "model_forward_time": 0.024274587631225586, + "step": 24990 + }, + { + "epoch": 3.81317138671875e-05, + "step": 24990, + "training_step_time": 0.2005767822265625 + }, + { + "epoch": 3.813323974609375e-05, + "model_forward_time": 0.024074077606201172, + "step": 24991 + }, + { + "epoch": 3.813323974609375e-05, + "step": 24991, + "training_step_time": 0.14263916015625 + }, + { + "epoch": 3.8134765625e-05, + "model_forward_time": 0.024479389190673828, + "step": 24992 + }, + { + "epoch": 3.8134765625e-05, + "step": 24992, + "training_step_time": 0.20880579948425293 + }, + { + "epoch": 3.813629150390625e-05, + "model_forward_time": 0.024613380432128906, + "step": 24993 + }, + { + "epoch": 3.813629150390625e-05, + "step": 24993, + "training_step_time": 0.14316463470458984 + }, + { + "epoch": 3.81378173828125e-05, + "model_forward_time": 0.024414777755737305, + "step": 24994 + }, + { + "epoch": 3.81378173828125e-05, + "step": 24994, + "training_step_time": 0.1896836757659912 + }, + { + "epoch": 3.813934326171875e-05, + "model_forward_time": 0.024456262588500977, + "step": 24995 + }, + { + "epoch": 3.813934326171875e-05, + "step": 24995, + "training_step_time": 0.12531328201293945 + }, + { + "epoch": 3.8140869140625e-05, + "model_forward_time": 0.02366161346435547, + "step": 24996 + }, + { + "epoch": 3.8140869140625e-05, + "step": 24996, + "training_step_time": 0.11908125877380371 + }, + { + "epoch": 3.814239501953125e-05, + "model_forward_time": 0.024873733520507812, + "step": 24997 + }, + { + "epoch": 3.814239501953125e-05, + "step": 24997, + "training_step_time": 0.11833357810974121 + }, + { + "epoch": 3.81439208984375e-05, + "model_forward_time": 0.025428295135498047, + "step": 24998 + }, + { + "epoch": 3.81439208984375e-05, + "step": 24998, + "training_step_time": 0.18886041641235352 + }, + { + "epoch": 3.814544677734375e-05, + "model_forward_time": 0.02479076385498047, + "step": 24999 + }, + { + "epoch": 3.814544677734375e-05, + "step": 24999, + "training_step_time": 0.11611771583557129 + }, + { + "epoch": 3.814697265625e-05, + "grad_norm": 0.24925269186496735, + "learning_rate": 7.404029558083653e-06, + "loss": 0.0049, + "step": 25000 + }, + { + "epoch": 3.814697265625e-05, + "model_forward_time": 0.02887892723083496, + "step": 25000 + }, + { + "epoch": 3.814697265625e-05, + "step": 25000, + "training_step_time": 0.10677766799926758 + }, + { + "epoch": 3.814849853515625e-05, + "model_forward_time": 0.023868560791015625, + "step": 25001 + }, + { + "epoch": 3.814849853515625e-05, + "step": 25001, + "training_step_time": 0.10685253143310547 + }, + { + "epoch": 3.81500244140625e-05, + "model_forward_time": 0.024806499481201172, + "step": 25002 + }, + { + "epoch": 3.81500244140625e-05, + "step": 25002, + "training_step_time": 0.10273003578186035 + }, + { + "epoch": 3.815155029296875e-05, + "model_forward_time": 0.02677750587463379, + "step": 25003 + }, + { + "epoch": 3.815155029296875e-05, + "step": 25003, + "training_step_time": 0.1894700527191162 + }, + { + "epoch": 3.8153076171875e-05, + "model_forward_time": 0.024010658264160156, + "step": 25004 + }, + { + "epoch": 3.8153076171875e-05, + "step": 25004, + "training_step_time": 0.1422407627105713 + }, + { + "epoch": 3.815460205078125e-05, + "model_forward_time": 0.024502038955688477, + "step": 25005 + }, + { + "epoch": 3.815460205078125e-05, + "step": 25005, + "training_step_time": 0.10293221473693848 + }, + { + "epoch": 3.81561279296875e-05, + "model_forward_time": 0.02557826042175293, + "step": 25006 + }, + { + "epoch": 3.81561279296875e-05, + "step": 25006, + "training_step_time": 0.10453414916992188 + }, + { + "epoch": 3.815765380859375e-05, + "model_forward_time": 0.025261878967285156, + "step": 25007 + }, + { + "epoch": 3.815765380859375e-05, + "step": 25007, + "training_step_time": 0.10351347923278809 + }, + { + "epoch": 3.81591796875e-05, + "model_forward_time": 0.025469303131103516, + "step": 25008 + }, + { + "epoch": 3.81591796875e-05, + "step": 25008, + "training_step_time": 0.10572147369384766 + }, + { + "epoch": 3.816070556640625e-05, + "model_forward_time": 0.02524566650390625, + "step": 25009 + }, + { + "epoch": 3.816070556640625e-05, + "step": 25009, + "training_step_time": 0.10544347763061523 + }, + { + "epoch": 3.81622314453125e-05, + "grad_norm": 0.09094515442848206, + "learning_rate": 7.375192864401931e-06, + "loss": 0.0079, + "step": 25010 + }, + { + "epoch": 3.81622314453125e-05, + "model_forward_time": 0.02510523796081543, + "step": 25010 + }, + { + "epoch": 3.81622314453125e-05, + "step": 25010, + "training_step_time": 0.10578727722167969 + }, + { + "epoch": 3.816375732421875e-05, + "model_forward_time": 0.024981975555419922, + "step": 25011 + }, + { + "epoch": 3.816375732421875e-05, + "step": 25011, + "training_step_time": 0.10748124122619629 + }, + { + "epoch": 3.8165283203125e-05, + "model_forward_time": 0.025338172912597656, + "step": 25012 + }, + { + "epoch": 3.8165283203125e-05, + "step": 25012, + "training_step_time": 0.10608077049255371 + }, + { + "epoch": 3.816680908203125e-05, + "model_forward_time": 0.025037527084350586, + "step": 25013 + }, + { + "epoch": 3.816680908203125e-05, + "step": 25013, + "training_step_time": 0.10314059257507324 + }, + { + "epoch": 3.81683349609375e-05, + "model_forward_time": 0.02476811408996582, + "step": 25014 + }, + { + "epoch": 3.81683349609375e-05, + "step": 25014, + "training_step_time": 0.13168883323669434 + }, + { + "epoch": 3.816986083984375e-05, + "model_forward_time": 0.02413010597229004, + "step": 25015 + }, + { + "epoch": 3.816986083984375e-05, + "step": 25015, + "training_step_time": 0.15410351753234863 + }, + { + "epoch": 3.817138671875e-05, + "model_forward_time": 0.023876428604125977, + "step": 25016 + }, + { + "epoch": 3.817138671875e-05, + "step": 25016, + "training_step_time": 0.1416473388671875 + }, + { + "epoch": 3.817291259765625e-05, + "model_forward_time": 0.02338433265686035, + "step": 25017 + }, + { + "epoch": 3.817291259765625e-05, + "step": 25017, + "training_step_time": 0.129685640335083 + }, + { + "epoch": 3.81744384765625e-05, + "model_forward_time": 0.02310633659362793, + "step": 25018 + }, + { + "epoch": 3.81744384765625e-05, + "step": 25018, + "training_step_time": 0.12094259262084961 + }, + { + "epoch": 3.817596435546875e-05, + "model_forward_time": 0.02378988265991211, + "step": 25019 + }, + { + "epoch": 3.817596435546875e-05, + "step": 25019, + "training_step_time": 0.11940789222717285 + }, + { + "epoch": 3.8177490234375e-05, + "grad_norm": 0.4444446265697479, + "learning_rate": 7.3464079638801365e-06, + "loss": 0.0072, + "step": 25020 + }, + { + "epoch": 3.8177490234375e-05, + "model_forward_time": 0.02408909797668457, + "step": 25020 + }, + { + "epoch": 3.8177490234375e-05, + "step": 25020, + "training_step_time": 0.15147876739501953 + }, + { + "epoch": 3.817901611328125e-05, + "model_forward_time": 0.024954557418823242, + "step": 25021 + }, + { + "epoch": 3.817901611328125e-05, + "step": 25021, + "training_step_time": 0.11741471290588379 + }, + { + "epoch": 3.81805419921875e-05, + "model_forward_time": 0.025370121002197266, + "step": 25022 + }, + { + "epoch": 3.81805419921875e-05, + "step": 25022, + "training_step_time": 0.11539936065673828 + }, + { + "epoch": 3.818206787109375e-05, + "model_forward_time": 0.025111675262451172, + "step": 25023 + }, + { + "epoch": 3.818206787109375e-05, + "step": 25023, + "training_step_time": 0.12031149864196777 + }, + { + "epoch": 3.818359375e-05, + "model_forward_time": 0.025614500045776367, + "step": 25024 + }, + { + "epoch": 3.818359375e-05, + "step": 25024, + "training_step_time": 0.20996356010437012 + }, + { + "epoch": 3.818511962890625e-05, + "model_forward_time": 0.024765729904174805, + "step": 25025 + }, + { + "epoch": 3.818511962890625e-05, + "step": 25025, + "training_step_time": 0.11164402961730957 + }, + { + "epoch": 3.81866455078125e-05, + "model_forward_time": 0.024080514907836914, + "step": 25026 + }, + { + "epoch": 3.81866455078125e-05, + "step": 25026, + "training_step_time": 0.10087704658508301 + }, + { + "epoch": 3.818817138671875e-05, + "model_forward_time": 0.02563333511352539, + "step": 25027 + }, + { + "epoch": 3.818817138671875e-05, + "step": 25027, + "training_step_time": 0.10654091835021973 + }, + { + "epoch": 3.8189697265625e-05, + "model_forward_time": 0.025554418563842773, + "step": 25028 + }, + { + "epoch": 3.8189697265625e-05, + "step": 25028, + "training_step_time": 0.10500931739807129 + }, + { + "epoch": 3.819122314453125e-05, + "model_forward_time": 0.026189327239990234, + "step": 25029 + }, + { + "epoch": 3.819122314453125e-05, + "step": 25029, + "training_step_time": 0.10583662986755371 + }, + { + "epoch": 3.81927490234375e-05, + "grad_norm": 0.3237009644508362, + "learning_rate": 7.317674891494625e-06, + "loss": 0.0146, + "step": 25030 + }, + { + "epoch": 3.81927490234375e-05, + "model_forward_time": 0.026188135147094727, + "step": 25030 + }, + { + "epoch": 3.81927490234375e-05, + "step": 25030, + "training_step_time": 0.10995769500732422 + }, + { + "epoch": 3.819427490234375e-05, + "model_forward_time": 0.025097131729125977, + "step": 25031 + }, + { + "epoch": 3.819427490234375e-05, + "step": 25031, + "training_step_time": 0.10294198989868164 + }, + { + "epoch": 3.819580078125e-05, + "model_forward_time": 0.025513887405395508, + "step": 25032 + }, + { + "epoch": 3.819580078125e-05, + "step": 25032, + "training_step_time": 0.10381102561950684 + }, + { + "epoch": 3.819732666015625e-05, + "model_forward_time": 0.025522232055664062, + "step": 25033 + }, + { + "epoch": 3.819732666015625e-05, + "step": 25033, + "training_step_time": 0.10345840454101562 + }, + { + "epoch": 3.81988525390625e-05, + "model_forward_time": 0.025183677673339844, + "step": 25034 + }, + { + "epoch": 3.81988525390625e-05, + "step": 25034, + "training_step_time": 0.17425274848937988 + }, + { + "epoch": 3.820037841796875e-05, + "model_forward_time": 0.024954557418823242, + "step": 25035 + }, + { + "epoch": 3.820037841796875e-05, + "step": 25035, + "training_step_time": 0.10679125785827637 + }, + { + "epoch": 3.8201904296875e-05, + "model_forward_time": 0.025027990341186523, + "step": 25036 + }, + { + "epoch": 3.8201904296875e-05, + "step": 25036, + "training_step_time": 0.1847996711730957 + }, + { + "epoch": 3.820343017578125e-05, + "model_forward_time": 0.02515578269958496, + "step": 25037 + }, + { + "epoch": 3.820343017578125e-05, + "step": 25037, + "training_step_time": 0.1349475383758545 + }, + { + "epoch": 3.82049560546875e-05, + "model_forward_time": 0.028076648712158203, + "step": 25038 + }, + { + "epoch": 3.82049560546875e-05, + "step": 25038, + "training_step_time": 0.2188117504119873 + }, + { + "epoch": 3.820648193359375e-05, + "model_forward_time": 0.02476954460144043, + "step": 25039 + }, + { + "epoch": 3.820648193359375e-05, + "step": 25039, + "training_step_time": 0.22105002403259277 + }, + { + "epoch": 3.82080078125e-05, + "grad_norm": 0.12376312166452408, + "learning_rate": 7.2889936821588125e-06, + "loss": 0.0047, + "step": 25040 + }, + { + "epoch": 3.82080078125e-05, + "model_forward_time": 0.024588346481323242, + "step": 25040 + }, + { + "epoch": 3.82080078125e-05, + "step": 25040, + "training_step_time": 0.12495565414428711 + }, + { + "epoch": 3.820953369140625e-05, + "model_forward_time": 0.024629831314086914, + "step": 25041 + }, + { + "epoch": 3.820953369140625e-05, + "step": 25041, + "training_step_time": 0.12820649147033691 + }, + { + "epoch": 3.82110595703125e-05, + "model_forward_time": 0.0247347354888916, + "step": 25042 + }, + { + "epoch": 3.82110595703125e-05, + "step": 25042, + "training_step_time": 0.11042594909667969 + }, + { + "epoch": 3.821258544921875e-05, + "model_forward_time": 0.025394201278686523, + "step": 25043 + }, + { + "epoch": 3.821258544921875e-05, + "step": 25043, + "training_step_time": 0.10480952262878418 + }, + { + "epoch": 3.8214111328125e-05, + "model_forward_time": 0.02598094940185547, + "step": 25044 + }, + { + "epoch": 3.8214111328125e-05, + "step": 25044, + "training_step_time": 0.10991525650024414 + }, + { + "epoch": 3.821563720703125e-05, + "model_forward_time": 0.02616095542907715, + "step": 25045 + }, + { + "epoch": 3.821563720703125e-05, + "step": 25045, + "training_step_time": 0.10643768310546875 + }, + { + "epoch": 3.82171630859375e-05, + "model_forward_time": 0.025621891021728516, + "step": 25046 + }, + { + "epoch": 3.82171630859375e-05, + "step": 25046, + "training_step_time": 0.12440085411071777 + }, + { + "epoch": 3.821868896484375e-05, + "model_forward_time": 0.02585124969482422, + "step": 25047 + }, + { + "epoch": 3.821868896484375e-05, + "step": 25047, + "training_step_time": 0.1076042652130127 + }, + { + "epoch": 3.822021484375e-05, + "model_forward_time": 0.025577545166015625, + "step": 25048 + }, + { + "epoch": 3.822021484375e-05, + "step": 25048, + "training_step_time": 0.2098526954650879 + }, + { + "epoch": 3.822174072265625e-05, + "model_forward_time": 0.02441716194152832, + "step": 25049 + }, + { + "epoch": 3.822174072265625e-05, + "step": 25049, + "training_step_time": 0.12787818908691406 + }, + { + "epoch": 3.82232666015625e-05, + "grad_norm": 0.1526920348405838, + "learning_rate": 7.260364370723044e-06, + "loss": 0.0064, + "step": 25050 + }, + { + "epoch": 3.82232666015625e-05, + "model_forward_time": 0.02455925941467285, + "step": 25050 + }, + { + "epoch": 3.82232666015625e-05, + "step": 25050, + "training_step_time": 0.21976304054260254 + }, + { + "epoch": 3.822479248046875e-05, + "model_forward_time": 0.024837970733642578, + "step": 25051 + }, + { + "epoch": 3.822479248046875e-05, + "step": 25051, + "training_step_time": 0.10841727256774902 + }, + { + "epoch": 3.8226318359375e-05, + "model_forward_time": 0.02497386932373047, + "step": 25052 + }, + { + "epoch": 3.8226318359375e-05, + "step": 25052, + "training_step_time": 0.11075544357299805 + }, + { + "epoch": 3.822784423828125e-05, + "model_forward_time": 0.0252230167388916, + "step": 25053 + }, + { + "epoch": 3.822784423828125e-05, + "step": 25053, + "training_step_time": 0.10830831527709961 + }, + { + "epoch": 3.82293701171875e-05, + "model_forward_time": 0.025828838348388672, + "step": 25054 + }, + { + "epoch": 3.82293701171875e-05, + "step": 25054, + "training_step_time": 0.11139225959777832 + }, + { + "epoch": 3.823089599609375e-05, + "model_forward_time": 0.025738000869750977, + "step": 25055 + }, + { + "epoch": 3.823089599609375e-05, + "step": 25055, + "training_step_time": 0.1082918643951416 + }, + { + "epoch": 3.8232421875e-05, + "model_forward_time": 0.025001049041748047, + "step": 25056 + }, + { + "epoch": 3.8232421875e-05, + "step": 25056, + "training_step_time": 0.10857510566711426 + }, + { + "epoch": 3.823394775390625e-05, + "model_forward_time": 0.025480985641479492, + "step": 25057 + }, + { + "epoch": 3.823394775390625e-05, + "step": 25057, + "training_step_time": 0.10761666297912598 + }, + { + "epoch": 3.82354736328125e-05, + "model_forward_time": 0.025412321090698242, + "step": 25058 + }, + { + "epoch": 3.82354736328125e-05, + "step": 25058, + "training_step_time": 0.10713672637939453 + }, + { + "epoch": 3.823699951171875e-05, + "model_forward_time": 0.025943756103515625, + "step": 25059 + }, + { + "epoch": 3.823699951171875e-05, + "step": 25059, + "training_step_time": 0.11415624618530273 + }, + { + "epoch": 3.8238525390625e-05, + "grad_norm": 0.11616542935371399, + "learning_rate": 7.2317869919746705e-06, + "loss": 0.0057, + "step": 25060 + }, + { + "epoch": 3.8238525390625e-05, + "model_forward_time": 0.025337934494018555, + "step": 25060 + }, + { + "epoch": 3.8238525390625e-05, + "step": 25060, + "training_step_time": 0.11009097099304199 + }, + { + "epoch": 3.824005126953125e-05, + "model_forward_time": 0.025023221969604492, + "step": 25061 + }, + { + "epoch": 3.824005126953125e-05, + "step": 25061, + "training_step_time": 0.10696959495544434 + }, + { + "epoch": 3.82415771484375e-05, + "model_forward_time": 0.025115489959716797, + "step": 25062 + }, + { + "epoch": 3.82415771484375e-05, + "step": 25062, + "training_step_time": 0.10770344734191895 + }, + { + "epoch": 3.824310302734375e-05, + "model_forward_time": 0.02571845054626465, + "step": 25063 + }, + { + "epoch": 3.824310302734375e-05, + "step": 25063, + "training_step_time": 0.10859918594360352 + }, + { + "epoch": 3.824462890625e-05, + "model_forward_time": 0.025569677352905273, + "step": 25064 + }, + { + "epoch": 3.824462890625e-05, + "step": 25064, + "training_step_time": 0.1079866886138916 + }, + { + "epoch": 3.824615478515625e-05, + "model_forward_time": 0.025261402130126953, + "step": 25065 + }, + { + "epoch": 3.824615478515625e-05, + "step": 25065, + "training_step_time": 0.20171332359313965 + }, + { + "epoch": 3.82476806640625e-05, + "model_forward_time": 0.0255277156829834, + "step": 25066 + }, + { + "epoch": 3.82476806640625e-05, + "step": 25066, + "training_step_time": 0.16324639320373535 + }, + { + "epoch": 3.824920654296875e-05, + "model_forward_time": 0.024769306182861328, + "step": 25067 + }, + { + "epoch": 3.824920654296875e-05, + "step": 25067, + "training_step_time": 0.1580650806427002 + }, + { + "epoch": 3.8250732421875e-05, + "model_forward_time": 0.024834156036376953, + "step": 25068 + }, + { + "epoch": 3.8250732421875e-05, + "step": 25068, + "training_step_time": 0.1171274185180664 + }, + { + "epoch": 3.825225830078125e-05, + "model_forward_time": 0.025124549865722656, + "step": 25069 + }, + { + "epoch": 3.825225830078125e-05, + "step": 25069, + "training_step_time": 0.19052863121032715 + }, + { + "epoch": 3.82537841796875e-05, + "grad_norm": 0.07350145280361176, + "learning_rate": 7.203261580637877e-06, + "loss": 0.0042, + "step": 25070 + }, + { + "epoch": 3.82537841796875e-05, + "model_forward_time": 0.0254669189453125, + "step": 25070 + }, + { + "epoch": 3.82537841796875e-05, + "step": 25070, + "training_step_time": 0.10476255416870117 + }, + { + "epoch": 3.825531005859375e-05, + "model_forward_time": 0.024896621704101562, + "step": 25071 + }, + { + "epoch": 3.825531005859375e-05, + "step": 25071, + "training_step_time": 0.10831308364868164 + }, + { + "epoch": 3.82568359375e-05, + "model_forward_time": 0.025502920150756836, + "step": 25072 + }, + { + "epoch": 3.82568359375e-05, + "step": 25072, + "training_step_time": 0.11693692207336426 + }, + { + "epoch": 3.825836181640625e-05, + "model_forward_time": 0.025450468063354492, + "step": 25073 + }, + { + "epoch": 3.825836181640625e-05, + "step": 25073, + "training_step_time": 0.11407184600830078 + }, + { + "epoch": 3.82598876953125e-05, + "model_forward_time": 0.0255887508392334, + "step": 25074 + }, + { + "epoch": 3.82598876953125e-05, + "step": 25074, + "training_step_time": 0.1074974536895752 + }, + { + "epoch": 3.826141357421875e-05, + "model_forward_time": 0.02521038055419922, + "step": 25075 + }, + { + "epoch": 3.826141357421875e-05, + "step": 25075, + "training_step_time": 0.10771012306213379 + }, + { + "epoch": 3.8262939453125e-05, + "model_forward_time": 0.02560257911682129, + "step": 25076 + }, + { + "epoch": 3.8262939453125e-05, + "step": 25076, + "training_step_time": 0.1079399585723877 + }, + { + "epoch": 3.826446533203125e-05, + "model_forward_time": 0.025557279586791992, + "step": 25077 + }, + { + "epoch": 3.826446533203125e-05, + "step": 25077, + "training_step_time": 0.1669304370880127 + }, + { + "epoch": 3.82659912109375e-05, + "model_forward_time": 0.02476668357849121, + "step": 25078 + }, + { + "epoch": 3.82659912109375e-05, + "step": 25078, + "training_step_time": 0.17777013778686523 + }, + { + "epoch": 3.826751708984375e-05, + "model_forward_time": 0.02395343780517578, + "step": 25079 + }, + { + "epoch": 3.826751708984375e-05, + "step": 25079, + "training_step_time": 0.19781708717346191 + }, + { + "epoch": 3.826904296875e-05, + "grad_norm": 0.09397833794355392, + "learning_rate": 7.174788171373731e-06, + "loss": 0.0061, + "step": 25080 + }, + { + "epoch": 3.826904296875e-05, + "model_forward_time": 0.02518463134765625, + "step": 25080 + }, + { + "epoch": 3.826904296875e-05, + "step": 25080, + "training_step_time": 0.17037034034729004 + }, + { + "epoch": 3.827056884765625e-05, + "model_forward_time": 0.02469348907470703, + "step": 25081 + }, + { + "epoch": 3.827056884765625e-05, + "step": 25081, + "training_step_time": 0.16231226921081543 + }, + { + "epoch": 3.82720947265625e-05, + "model_forward_time": 0.02478766441345215, + "step": 25082 + }, + { + "epoch": 3.82720947265625e-05, + "step": 25082, + "training_step_time": 0.19936633110046387 + }, + { + "epoch": 3.827362060546875e-05, + "model_forward_time": 0.024800777435302734, + "step": 25083 + }, + { + "epoch": 3.827362060546875e-05, + "step": 25083, + "training_step_time": 0.1291823387145996 + }, + { + "epoch": 3.8275146484375e-05, + "model_forward_time": 0.023612022399902344, + "step": 25084 + }, + { + "epoch": 3.8275146484375e-05, + "step": 25084, + "training_step_time": 0.11693453788757324 + }, + { + "epoch": 3.827667236328125e-05, + "model_forward_time": 0.0256500244140625, + "step": 25085 + }, + { + "epoch": 3.827667236328125e-05, + "step": 25085, + "training_step_time": 0.11795973777770996 + }, + { + "epoch": 3.82781982421875e-05, + "model_forward_time": 0.025600433349609375, + "step": 25086 + }, + { + "epoch": 3.82781982421875e-05, + "step": 25086, + "training_step_time": 0.11570882797241211 + }, + { + "epoch": 3.827972412109375e-05, + "model_forward_time": 0.025560855865478516, + "step": 25087 + }, + { + "epoch": 3.827972412109375e-05, + "step": 25087, + "training_step_time": 0.11034107208251953 + }, + { + "epoch": 3.828125e-05, + "model_forward_time": 0.025393009185791016, + "step": 25088 + }, + { + "epoch": 3.828125e-05, + "step": 25088, + "training_step_time": 0.1136167049407959 + }, + { + "epoch": 3.828277587890625e-05, + "model_forward_time": 0.025386810302734375, + "step": 25089 + }, + { + "epoch": 3.828277587890625e-05, + "step": 25089, + "training_step_time": 0.15917563438415527 + }, + { + "epoch": 3.82843017578125e-05, + "grad_norm": 0.29310888051986694, + "learning_rate": 7.146366798780096e-06, + "loss": 0.0061, + "step": 25090 + }, + { + "epoch": 3.82843017578125e-05, + "model_forward_time": 0.025224685668945312, + "step": 25090 + }, + { + "epoch": 3.82843017578125e-05, + "step": 25090, + "training_step_time": 0.1576242446899414 + }, + { + "epoch": 3.828582763671875e-05, + "model_forward_time": 0.024659156799316406, + "step": 25091 + }, + { + "epoch": 3.828582763671875e-05, + "step": 25091, + "training_step_time": 0.11840939521789551 + }, + { + "epoch": 3.8287353515625e-05, + "model_forward_time": 0.025058984756469727, + "step": 25092 + }, + { + "epoch": 3.8287353515625e-05, + "step": 25092, + "training_step_time": 0.10769271850585938 + }, + { + "epoch": 3.828887939453125e-05, + "model_forward_time": 0.0273892879486084, + "step": 25093 + }, + { + "epoch": 3.828887939453125e-05, + "step": 25093, + "training_step_time": 0.12745928764343262 + }, + { + "epoch": 3.82904052734375e-05, + "model_forward_time": 0.025051593780517578, + "step": 25094 + }, + { + "epoch": 3.82904052734375e-05, + "step": 25094, + "training_step_time": 0.1198267936706543 + }, + { + "epoch": 3.829193115234375e-05, + "model_forward_time": 0.025104284286499023, + "step": 25095 + }, + { + "epoch": 3.829193115234375e-05, + "step": 25095, + "training_step_time": 0.1038351058959961 + }, + { + "epoch": 3.829345703125e-05, + "model_forward_time": 0.025053977966308594, + "step": 25096 + }, + { + "epoch": 3.829345703125e-05, + "step": 25096, + "training_step_time": 0.10745882987976074 + }, + { + "epoch": 3.829498291015625e-05, + "model_forward_time": 0.025100231170654297, + "step": 25097 + }, + { + "epoch": 3.829498291015625e-05, + "step": 25097, + "training_step_time": 0.1049656867980957 + }, + { + "epoch": 3.82965087890625e-05, + "model_forward_time": 0.024740219116210938, + "step": 25098 + }, + { + "epoch": 3.82965087890625e-05, + "step": 25098, + "training_step_time": 0.10576200485229492 + }, + { + "epoch": 3.829803466796875e-05, + "model_forward_time": 0.02411651611328125, + "step": 25099 + }, + { + "epoch": 3.829803466796875e-05, + "step": 25099, + "training_step_time": 0.10433840751647949 + }, + { + "epoch": 3.8299560546875e-05, + "grad_norm": 0.09635431319475174, + "learning_rate": 7.1179974973916486e-06, + "loss": 0.0055, + "step": 25100 + }, + { + "epoch": 3.8299560546875e-05, + "model_forward_time": 0.024544954299926758, + "step": 25100 + }, + { + "epoch": 3.8299560546875e-05, + "step": 25100, + "training_step_time": 0.10450315475463867 + }, + { + "epoch": 3.830108642578125e-05, + "model_forward_time": 0.02492213249206543, + "step": 25101 + }, + { + "epoch": 3.830108642578125e-05, + "step": 25101, + "training_step_time": 0.10894417762756348 + }, + { + "epoch": 3.83026123046875e-05, + "model_forward_time": 0.024777650833129883, + "step": 25102 + }, + { + "epoch": 3.83026123046875e-05, + "step": 25102, + "training_step_time": 0.10705733299255371 + }, + { + "epoch": 3.830413818359375e-05, + "model_forward_time": 0.02514505386352539, + "step": 25103 + }, + { + "epoch": 3.830413818359375e-05, + "step": 25103, + "training_step_time": 0.10733366012573242 + }, + { + "epoch": 3.83056640625e-05, + "model_forward_time": 0.02443099021911621, + "step": 25104 + }, + { + "epoch": 3.83056640625e-05, + "step": 25104, + "training_step_time": 0.10692095756530762 + }, + { + "epoch": 3.830718994140625e-05, + "model_forward_time": 0.0247344970703125, + "step": 25105 + }, + { + "epoch": 3.830718994140625e-05, + "step": 25105, + "training_step_time": 0.10637307167053223 + }, + { + "epoch": 3.83087158203125e-05, + "model_forward_time": 0.024736881256103516, + "step": 25106 + }, + { + "epoch": 3.83087158203125e-05, + "step": 25106, + "training_step_time": 0.10445284843444824 + }, + { + "epoch": 3.831024169921875e-05, + "model_forward_time": 0.024989843368530273, + "step": 25107 + }, + { + "epoch": 3.831024169921875e-05, + "step": 25107, + "training_step_time": 0.11058211326599121 + }, + { + "epoch": 3.8311767578125e-05, + "model_forward_time": 0.02555108070373535, + "step": 25108 + }, + { + "epoch": 3.8311767578125e-05, + "step": 25108, + "training_step_time": 0.18748998641967773 + }, + { + "epoch": 3.831329345703125e-05, + "model_forward_time": 0.026265382766723633, + "step": 25109 + }, + { + "epoch": 3.831329345703125e-05, + "step": 25109, + "training_step_time": 0.16661643981933594 + }, + { + "epoch": 3.83148193359375e-05, + "grad_norm": 0.09016188234090805, + "learning_rate": 7.089680301679752e-06, + "loss": 0.0049, + "step": 25110 + }, + { + "epoch": 3.83148193359375e-05, + "model_forward_time": 0.023697853088378906, + "step": 25110 + }, + { + "epoch": 3.83148193359375e-05, + "step": 25110, + "training_step_time": 0.14166927337646484 + }, + { + "epoch": 3.831634521484375e-05, + "model_forward_time": 0.024077892303466797, + "step": 25111 + }, + { + "epoch": 3.831634521484375e-05, + "step": 25111, + "training_step_time": 0.12889647483825684 + }, + { + "epoch": 3.831787109375e-05, + "model_forward_time": 0.024435043334960938, + "step": 25112 + }, + { + "epoch": 3.831787109375e-05, + "step": 25112, + "training_step_time": 0.18923521041870117 + }, + { + "epoch": 3.831939697265625e-05, + "model_forward_time": 0.02459430694580078, + "step": 25113 + }, + { + "epoch": 3.831939697265625e-05, + "step": 25113, + "training_step_time": 0.11600804328918457 + }, + { + "epoch": 3.83209228515625e-05, + "model_forward_time": 0.02452397346496582, + "step": 25114 + }, + { + "epoch": 3.83209228515625e-05, + "step": 25114, + "training_step_time": 0.1154024600982666 + }, + { + "epoch": 3.832244873046875e-05, + "model_forward_time": 0.024894237518310547, + "step": 25115 + }, + { + "epoch": 3.832244873046875e-05, + "step": 25115, + "training_step_time": 0.11747336387634277 + }, + { + "epoch": 3.8323974609375e-05, + "model_forward_time": 0.025285959243774414, + "step": 25116 + }, + { + "epoch": 3.8323974609375e-05, + "step": 25116, + "training_step_time": 0.11216425895690918 + }, + { + "epoch": 3.832550048828125e-05, + "model_forward_time": 0.02533102035522461, + "step": 25117 + }, + { + "epoch": 3.832550048828125e-05, + "step": 25117, + "training_step_time": 0.12253761291503906 + }, + { + "epoch": 3.83270263671875e-05, + "model_forward_time": 0.02374267578125, + "step": 25118 + }, + { + "epoch": 3.83270263671875e-05, + "step": 25118, + "training_step_time": 0.11725926399230957 + }, + { + "epoch": 3.832855224609375e-05, + "model_forward_time": 0.0248870849609375, + "step": 25119 + }, + { + "epoch": 3.832855224609375e-05, + "step": 25119, + "training_step_time": 0.11341476440429688 + }, + { + "epoch": 3.8330078125e-05, + "grad_norm": 0.20135025680065155, + "learning_rate": 7.061415246052466e-06, + "loss": 0.0069, + "step": 25120 + }, + { + "epoch": 3.8330078125e-05, + "model_forward_time": 0.024172067642211914, + "step": 25120 + }, + { + "epoch": 3.8330078125e-05, + "step": 25120, + "training_step_time": 0.16681456565856934 + }, + { + "epoch": 3.833160400390625e-05, + "model_forward_time": 0.02413463592529297, + "step": 25121 + }, + { + "epoch": 3.833160400390625e-05, + "step": 25121, + "training_step_time": 0.1462693214416504 + }, + { + "epoch": 3.83331298828125e-05, + "model_forward_time": 0.024616003036499023, + "step": 25122 + }, + { + "epoch": 3.83331298828125e-05, + "step": 25122, + "training_step_time": 0.10736346244812012 + }, + { + "epoch": 3.833465576171875e-05, + "model_forward_time": 0.02481245994567871, + "step": 25123 + }, + { + "epoch": 3.833465576171875e-05, + "step": 25123, + "training_step_time": 0.1087954044342041 + }, + { + "epoch": 3.8336181640625e-05, + "model_forward_time": 0.02494955062866211, + "step": 25124 + }, + { + "epoch": 3.8336181640625e-05, + "step": 25124, + "training_step_time": 0.1054372787475586 + }, + { + "epoch": 3.833770751953125e-05, + "model_forward_time": 0.024939775466918945, + "step": 25125 + }, + { + "epoch": 3.833770751953125e-05, + "step": 25125, + "training_step_time": 0.13022804260253906 + }, + { + "epoch": 3.83392333984375e-05, + "model_forward_time": 0.024874448776245117, + "step": 25126 + }, + { + "epoch": 3.83392333984375e-05, + "step": 25126, + "training_step_time": 0.23420286178588867 + }, + { + "epoch": 3.834075927734375e-05, + "model_forward_time": 0.02402496337890625, + "step": 25127 + }, + { + "epoch": 3.834075927734375e-05, + "step": 25127, + "training_step_time": 0.1067955493927002 + }, + { + "epoch": 3.834228515625e-05, + "model_forward_time": 0.02367877960205078, + "step": 25128 + }, + { + "epoch": 3.834228515625e-05, + "step": 25128, + "training_step_time": 0.13083958625793457 + }, + { + "epoch": 3.834381103515625e-05, + "model_forward_time": 0.02477741241455078, + "step": 25129 + }, + { + "epoch": 3.834381103515625e-05, + "step": 25129, + "training_step_time": 0.12528634071350098 + }, + { + "epoch": 3.83453369140625e-05, + "grad_norm": 0.08731285482645035, + "learning_rate": 7.0332023648544965e-06, + "loss": 0.0131, + "step": 25130 + }, + { + "epoch": 3.83453369140625e-05, + "model_forward_time": 0.02522420883178711, + "step": 25130 + }, + { + "epoch": 3.83453369140625e-05, + "step": 25130, + "training_step_time": 0.11361551284790039 + }, + { + "epoch": 3.834686279296875e-05, + "model_forward_time": 0.027456283569335938, + "step": 25131 + }, + { + "epoch": 3.834686279296875e-05, + "step": 25131, + "training_step_time": 0.13729524612426758 + }, + { + "epoch": 3.8348388671875e-05, + "model_forward_time": 0.025107145309448242, + "step": 25132 + }, + { + "epoch": 3.8348388671875e-05, + "step": 25132, + "training_step_time": 0.10460591316223145 + }, + { + "epoch": 3.834991455078125e-05, + "model_forward_time": 0.02473306655883789, + "step": 25133 + }, + { + "epoch": 3.834991455078125e-05, + "step": 25133, + "training_step_time": 0.10378789901733398 + }, + { + "epoch": 3.83514404296875e-05, + "model_forward_time": 0.024318456649780273, + "step": 25134 + }, + { + "epoch": 3.83514404296875e-05, + "step": 25134, + "training_step_time": 0.14612698554992676 + }, + { + "epoch": 3.835296630859375e-05, + "model_forward_time": 0.024158000946044922, + "step": 25135 + }, + { + "epoch": 3.835296630859375e-05, + "step": 25135, + "training_step_time": 0.10166192054748535 + }, + { + "epoch": 3.83544921875e-05, + "model_forward_time": 0.02513265609741211, + "step": 25136 + }, + { + "epoch": 3.83544921875e-05, + "step": 25136, + "training_step_time": 0.20757484436035156 + }, + { + "epoch": 3.835601806640625e-05, + "model_forward_time": 0.02434396743774414, + "step": 25137 + }, + { + "epoch": 3.835601806640625e-05, + "step": 25137, + "training_step_time": 0.131483793258667 + }, + { + "epoch": 3.83575439453125e-05, + "model_forward_time": 0.024017333984375, + "step": 25138 + }, + { + "epoch": 3.83575439453125e-05, + "step": 25138, + "training_step_time": 0.10367035865783691 + }, + { + "epoch": 3.835906982421875e-05, + "model_forward_time": 0.024717092514038086, + "step": 25139 + }, + { + "epoch": 3.835906982421875e-05, + "step": 25139, + "training_step_time": 0.11057567596435547 + }, + { + "epoch": 3.8360595703125e-05, + "grad_norm": 0.12092097848653793, + "learning_rate": 7.005041692367154e-06, + "loss": 0.0064, + "step": 25140 + }, + { + "epoch": 3.8360595703125e-05, + "model_forward_time": 0.02500295639038086, + "step": 25140 + }, + { + "epoch": 3.8360595703125e-05, + "step": 25140, + "training_step_time": 0.10316228866577148 + }, + { + "epoch": 3.836212158203125e-05, + "model_forward_time": 0.027331113815307617, + "step": 25141 + }, + { + "epoch": 3.836212158203125e-05, + "step": 25141, + "training_step_time": 0.1068110466003418 + }, + { + "epoch": 3.83636474609375e-05, + "model_forward_time": 0.025352001190185547, + "step": 25142 + }, + { + "epoch": 3.83636474609375e-05, + "step": 25142, + "training_step_time": 0.10509109497070312 + }, + { + "epoch": 3.836517333984375e-05, + "model_forward_time": 0.02638864517211914, + "step": 25143 + }, + { + "epoch": 3.836517333984375e-05, + "step": 25143, + "training_step_time": 0.10861968994140625 + }, + { + "epoch": 3.836669921875e-05, + "model_forward_time": 0.02500152587890625, + "step": 25144 + }, + { + "epoch": 3.836669921875e-05, + "step": 25144, + "training_step_time": 0.10393834114074707 + }, + { + "epoch": 3.836822509765625e-05, + "model_forward_time": 0.025623083114624023, + "step": 25145 + }, + { + "epoch": 3.836822509765625e-05, + "step": 25145, + "training_step_time": 0.10502266883850098 + }, + { + "epoch": 3.83697509765625e-05, + "model_forward_time": 0.025543689727783203, + "step": 25146 + }, + { + "epoch": 3.83697509765625e-05, + "step": 25146, + "training_step_time": 0.10497379302978516 + }, + { + "epoch": 3.837127685546875e-05, + "model_forward_time": 0.02489328384399414, + "step": 25147 + }, + { + "epoch": 3.837127685546875e-05, + "step": 25147, + "training_step_time": 0.10405993461608887 + }, + { + "epoch": 3.8372802734375e-05, + "model_forward_time": 0.025033235549926758, + "step": 25148 + }, + { + "epoch": 3.8372802734375e-05, + "step": 25148, + "training_step_time": 0.1046895980834961 + }, + { + "epoch": 3.837432861328125e-05, + "model_forward_time": 0.024989843368530273, + "step": 25149 + }, + { + "epoch": 3.837432861328125e-05, + "step": 25149, + "training_step_time": 0.10675525665283203 + }, + { + "epoch": 3.83758544921875e-05, + "grad_norm": 0.09404527395963669, + "learning_rate": 6.976933262808322e-06, + "loss": 0.0044, + "step": 25150 + }, + { + "epoch": 3.83758544921875e-05, + "model_forward_time": 0.024916410446166992, + "step": 25150 + }, + { + "epoch": 3.83758544921875e-05, + "step": 25150, + "training_step_time": 0.10892200469970703 + }, + { + "epoch": 3.837738037109375e-05, + "model_forward_time": 0.024976730346679688, + "step": 25151 + }, + { + "epoch": 3.837738037109375e-05, + "step": 25151, + "training_step_time": 0.13535356521606445 + }, + { + "epoch": 3.837890625e-05, + "model_forward_time": 0.024065494537353516, + "step": 25152 + }, + { + "epoch": 3.837890625e-05, + "step": 25152, + "training_step_time": 0.1938610076904297 + }, + { + "epoch": 3.838043212890625e-05, + "model_forward_time": 0.02413201332092285, + "step": 25153 + }, + { + "epoch": 3.838043212890625e-05, + "step": 25153, + "training_step_time": 0.21370458602905273 + }, + { + "epoch": 3.83819580078125e-05, + "model_forward_time": 0.024088144302368164, + "step": 25154 + }, + { + "epoch": 3.83819580078125e-05, + "step": 25154, + "training_step_time": 0.11835098266601562 + }, + { + "epoch": 3.838348388671875e-05, + "model_forward_time": 0.023360490798950195, + "step": 25155 + }, + { + "epoch": 3.838348388671875e-05, + "step": 25155, + "training_step_time": 0.12659215927124023 + }, + { + "epoch": 3.8385009765625e-05, + "model_forward_time": 0.024137020111083984, + "step": 25156 + }, + { + "epoch": 3.8385009765625e-05, + "step": 25156, + "training_step_time": 0.16567277908325195 + }, + { + "epoch": 3.838653564453125e-05, + "model_forward_time": 0.024781465530395508, + "step": 25157 + }, + { + "epoch": 3.838653564453125e-05, + "step": 25157, + "training_step_time": 0.11098098754882812 + }, + { + "epoch": 3.83880615234375e-05, + "model_forward_time": 0.023736238479614258, + "step": 25158 + }, + { + "epoch": 3.83880615234375e-05, + "step": 25158, + "training_step_time": 0.11285543441772461 + }, + { + "epoch": 3.838958740234375e-05, + "model_forward_time": 0.02384042739868164, + "step": 25159 + }, + { + "epoch": 3.838958740234375e-05, + "step": 25159, + "training_step_time": 0.11149430274963379 + }, + { + "epoch": 3.839111328125e-05, + "grad_norm": 0.1139388158917427, + "learning_rate": 6.948877110332386e-06, + "loss": 0.0043, + "step": 25160 + }, + { + "epoch": 3.839111328125e-05, + "model_forward_time": 0.02417922019958496, + "step": 25160 + }, + { + "epoch": 3.839111328125e-05, + "step": 25160, + "training_step_time": 0.10627007484436035 + }, + { + "epoch": 3.839263916015625e-05, + "model_forward_time": 0.0244596004486084, + "step": 25161 + }, + { + "epoch": 3.839263916015625e-05, + "step": 25161, + "training_step_time": 0.10705280303955078 + }, + { + "epoch": 3.83941650390625e-05, + "model_forward_time": 0.02525639533996582, + "step": 25162 + }, + { + "epoch": 3.83941650390625e-05, + "step": 25162, + "training_step_time": 0.10651969909667969 + }, + { + "epoch": 3.839569091796875e-05, + "model_forward_time": 0.024994850158691406, + "step": 25163 + }, + { + "epoch": 3.839569091796875e-05, + "step": 25163, + "training_step_time": 0.10958194732666016 + }, + { + "epoch": 3.8397216796875e-05, + "model_forward_time": 0.024828433990478516, + "step": 25164 + }, + { + "epoch": 3.8397216796875e-05, + "step": 25164, + "training_step_time": 0.10748767852783203 + }, + { + "epoch": 3.839874267578125e-05, + "model_forward_time": 0.02532339096069336, + "step": 25165 + }, + { + "epoch": 3.839874267578125e-05, + "step": 25165, + "training_step_time": 0.1090538501739502 + }, + { + "epoch": 3.84002685546875e-05, + "model_forward_time": 0.025115489959716797, + "step": 25166 + }, + { + "epoch": 3.84002685546875e-05, + "step": 25166, + "training_step_time": 0.14368319511413574 + }, + { + "epoch": 3.840179443359375e-05, + "model_forward_time": 0.02502155303955078, + "step": 25167 + }, + { + "epoch": 3.840179443359375e-05, + "step": 25167, + "training_step_time": 0.11168575286865234 + }, + { + "epoch": 3.84033203125e-05, + "model_forward_time": 0.02512502670288086, + "step": 25168 + }, + { + "epoch": 3.84033203125e-05, + "step": 25168, + "training_step_time": 0.10748672485351562 + }, + { + "epoch": 3.840484619140625e-05, + "model_forward_time": 0.025332927703857422, + "step": 25169 + }, + { + "epoch": 3.840484619140625e-05, + "step": 25169, + "training_step_time": 0.11197352409362793 + }, + { + "epoch": 3.84063720703125e-05, + "grad_norm": 0.08475989103317261, + "learning_rate": 6.92087326903022e-06, + "loss": 0.0028, + "step": 25170 + }, + { + "epoch": 3.84063720703125e-05, + "model_forward_time": 0.026821374893188477, + "step": 25170 + }, + { + "epoch": 3.84063720703125e-05, + "step": 25170, + "training_step_time": 0.1554727554321289 + }, + { + "epoch": 3.840789794921875e-05, + "model_forward_time": 0.02486419677734375, + "step": 25171 + }, + { + "epoch": 3.840789794921875e-05, + "step": 25171, + "training_step_time": 0.15167880058288574 + }, + { + "epoch": 3.8409423828125e-05, + "model_forward_time": 0.023644447326660156, + "step": 25172 + }, + { + "epoch": 3.8409423828125e-05, + "step": 25172, + "training_step_time": 0.17271733283996582 + }, + { + "epoch": 3.841094970703125e-05, + "model_forward_time": 0.024074316024780273, + "step": 25173 + }, + { + "epoch": 3.841094970703125e-05, + "step": 25173, + "training_step_time": 0.13364219665527344 + }, + { + "epoch": 3.84124755859375e-05, + "model_forward_time": 0.024051904678344727, + "step": 25174 + }, + { + "epoch": 3.84124755859375e-05, + "step": 25174, + "training_step_time": 0.1359097957611084 + }, + { + "epoch": 3.841400146484375e-05, + "model_forward_time": 0.02477550506591797, + "step": 25175 + }, + { + "epoch": 3.841400146484375e-05, + "step": 25175, + "training_step_time": 0.12213563919067383 + }, + { + "epoch": 3.841552734375e-05, + "model_forward_time": 0.024684667587280273, + "step": 25176 + }, + { + "epoch": 3.841552734375e-05, + "step": 25176, + "training_step_time": 0.12748503684997559 + }, + { + "epoch": 3.841705322265625e-05, + "model_forward_time": 0.02458786964416504, + "step": 25177 + }, + { + "epoch": 3.841705322265625e-05, + "step": 25177, + "training_step_time": 0.11606669425964355 + }, + { + "epoch": 3.84185791015625e-05, + "model_forward_time": 0.025003910064697266, + "step": 25178 + }, + { + "epoch": 3.84185791015625e-05, + "step": 25178, + "training_step_time": 0.10651397705078125 + }, + { + "epoch": 3.842010498046875e-05, + "model_forward_time": 0.026062726974487305, + "step": 25179 + }, + { + "epoch": 3.842010498046875e-05, + "step": 25179, + "training_step_time": 0.10550117492675781 + }, + { + "epoch": 3.8421630859375e-05, + "grad_norm": 0.1294359266757965, + "learning_rate": 6.892921772929112e-06, + "loss": 0.0046, + "step": 25180 + }, + { + "epoch": 3.8421630859375e-05, + "model_forward_time": 0.02583479881286621, + "step": 25180 + }, + { + "epoch": 3.8421630859375e-05, + "step": 25180, + "training_step_time": 0.11344361305236816 + }, + { + "epoch": 3.842315673828125e-05, + "model_forward_time": 0.025258302688598633, + "step": 25181 + }, + { + "epoch": 3.842315673828125e-05, + "step": 25181, + "training_step_time": 0.1184835433959961 + }, + { + "epoch": 3.84246826171875e-05, + "model_forward_time": 0.025079011917114258, + "step": 25182 + }, + { + "epoch": 3.84246826171875e-05, + "step": 25182, + "training_step_time": 0.13540220260620117 + }, + { + "epoch": 3.842620849609375e-05, + "model_forward_time": 0.02540755271911621, + "step": 25183 + }, + { + "epoch": 3.842620849609375e-05, + "step": 25183, + "training_step_time": 0.2040727138519287 + }, + { + "epoch": 3.8427734375e-05, + "model_forward_time": 0.024396181106567383, + "step": 25184 + }, + { + "epoch": 3.8427734375e-05, + "step": 25184, + "training_step_time": 0.14046525955200195 + }, + { + "epoch": 3.842926025390625e-05, + "model_forward_time": 0.024919509887695312, + "step": 25185 + }, + { + "epoch": 3.842926025390625e-05, + "step": 25185, + "training_step_time": 0.13116455078125 + }, + { + "epoch": 3.84307861328125e-05, + "model_forward_time": 0.02443552017211914, + "step": 25186 + }, + { + "epoch": 3.84307861328125e-05, + "step": 25186, + "training_step_time": 0.12222766876220703 + }, + { + "epoch": 3.843231201171875e-05, + "model_forward_time": 0.024930477142333984, + "step": 25187 + }, + { + "epoch": 3.843231201171875e-05, + "step": 25187, + "training_step_time": 0.11912012100219727 + }, + { + "epoch": 3.8433837890625e-05, + "model_forward_time": 0.024785757064819336, + "step": 25188 + }, + { + "epoch": 3.8433837890625e-05, + "step": 25188, + "training_step_time": 0.11849308013916016 + }, + { + "epoch": 3.843536376953125e-05, + "model_forward_time": 0.025221824645996094, + "step": 25189 + }, + { + "epoch": 3.843536376953125e-05, + "step": 25189, + "training_step_time": 0.10950469970703125 + }, + { + "epoch": 3.84368896484375e-05, + "grad_norm": 0.09333091974258423, + "learning_rate": 6.865022655992798e-06, + "loss": 0.0083, + "step": 25190 + }, + { + "epoch": 3.84368896484375e-05, + "model_forward_time": 0.024535417556762695, + "step": 25190 + }, + { + "epoch": 3.84368896484375e-05, + "step": 25190, + "training_step_time": 0.11505532264709473 + }, + { + "epoch": 3.843841552734375e-05, + "model_forward_time": 0.02514815330505371, + "step": 25191 + }, + { + "epoch": 3.843841552734375e-05, + "step": 25191, + "training_step_time": 0.11290836334228516 + }, + { + "epoch": 3.843994140625e-05, + "model_forward_time": 0.024916410446166992, + "step": 25192 + }, + { + "epoch": 3.843994140625e-05, + "step": 25192, + "training_step_time": 0.8118109703063965 + }, + { + "epoch": 3.844146728515625e-05, + "model_forward_time": 0.02252984046936035, + "step": 25193 + }, + { + "epoch": 3.844146728515625e-05, + "step": 25193, + "training_step_time": 0.10952901840209961 + }, + { + "epoch": 3.84429931640625e-05, + "model_forward_time": 0.02465200424194336, + "step": 25194 + }, + { + "epoch": 3.84429931640625e-05, + "step": 25194, + "training_step_time": 0.14066648483276367 + }, + { + "epoch": 3.844451904296875e-05, + "model_forward_time": 0.024449586868286133, + "step": 25195 + }, + { + "epoch": 3.844451904296875e-05, + "step": 25195, + "training_step_time": 0.1685500144958496 + }, + { + "epoch": 3.8446044921875e-05, + "model_forward_time": 0.024286746978759766, + "step": 25196 + }, + { + "epoch": 3.8446044921875e-05, + "step": 25196, + "training_step_time": 0.10648798942565918 + }, + { + "epoch": 3.844757080078125e-05, + "model_forward_time": 0.02438497543334961, + "step": 25197 + }, + { + "epoch": 3.844757080078125e-05, + "step": 25197, + "training_step_time": 0.10839629173278809 + }, + { + "epoch": 3.84490966796875e-05, + "model_forward_time": 0.024999618530273438, + "step": 25198 + }, + { + "epoch": 3.84490966796875e-05, + "step": 25198, + "training_step_time": 0.10742521286010742 + }, + { + "epoch": 3.845062255859375e-05, + "model_forward_time": 0.028119325637817383, + "step": 25199 + }, + { + "epoch": 3.845062255859375e-05, + "step": 25199, + "training_step_time": 0.10874581336975098 + }, + { + "epoch": 3.84521484375e-05, + "grad_norm": 0.10074328631162643, + "learning_rate": 6.837175952121306e-06, + "loss": 0.0033, + "step": 25200 + }, + { + "epoch": 3.84521484375e-05, + "model_forward_time": 0.025171279907226562, + "step": 25200 + }, + { + "epoch": 3.84521484375e-05, + "step": 25200, + "training_step_time": 0.11043524742126465 + }, + { + "epoch": 3.845367431640625e-05, + "model_forward_time": 0.024697065353393555, + "step": 25201 + }, + { + "epoch": 3.845367431640625e-05, + "step": 25201, + "training_step_time": 0.11094307899475098 + }, + { + "epoch": 3.84552001953125e-05, + "model_forward_time": 0.024790287017822266, + "step": 25202 + }, + { + "epoch": 3.84552001953125e-05, + "step": 25202, + "training_step_time": 0.10855364799499512 + }, + { + "epoch": 3.845672607421875e-05, + "model_forward_time": 0.024943113327026367, + "step": 25203 + }, + { + "epoch": 3.845672607421875e-05, + "step": 25203, + "training_step_time": 0.10452532768249512 + }, + { + "epoch": 3.8458251953125e-05, + "model_forward_time": 0.025376319885253906, + "step": 25204 + }, + { + "epoch": 3.8458251953125e-05, + "step": 25204, + "training_step_time": 0.10694217681884766 + }, + { + "epoch": 3.845977783203125e-05, + "model_forward_time": 0.02469921112060547, + "step": 25205 + }, + { + "epoch": 3.845977783203125e-05, + "step": 25205, + "training_step_time": 0.10993647575378418 + }, + { + "epoch": 3.84613037109375e-05, + "model_forward_time": 0.02429652214050293, + "step": 25206 + }, + { + "epoch": 3.84613037109375e-05, + "step": 25206, + "training_step_time": 0.11123275756835938 + }, + { + "epoch": 3.846282958984375e-05, + "model_forward_time": 0.024782657623291016, + "step": 25207 + }, + { + "epoch": 3.846282958984375e-05, + "step": 25207, + "training_step_time": 0.1198720932006836 + }, + { + "epoch": 3.846435546875e-05, + "model_forward_time": 0.02489304542541504, + "step": 25208 + }, + { + "epoch": 3.846435546875e-05, + "step": 25208, + "training_step_time": 0.1151118278503418 + }, + { + "epoch": 3.846588134765625e-05, + "model_forward_time": 0.02565455436706543, + "step": 25209 + }, + { + "epoch": 3.846588134765625e-05, + "step": 25209, + "training_step_time": 0.10985040664672852 + }, + { + "epoch": 3.84674072265625e-05, + "grad_norm": 0.14441217482089996, + "learning_rate": 6.809381695151029e-06, + "loss": 0.0081, + "step": 25210 + }, + { + "epoch": 3.84674072265625e-05, + "model_forward_time": 0.024967432022094727, + "step": 25210 + }, + { + "epoch": 3.84674072265625e-05, + "step": 25210, + "training_step_time": 0.10529351234436035 + }, + { + "epoch": 3.846893310546875e-05, + "model_forward_time": 0.02514958381652832, + "step": 25211 + }, + { + "epoch": 3.846893310546875e-05, + "step": 25211, + "training_step_time": 0.16489481925964355 + }, + { + "epoch": 3.8470458984375e-05, + "model_forward_time": 0.024161815643310547, + "step": 25212 + }, + { + "epoch": 3.8470458984375e-05, + "step": 25212, + "training_step_time": 0.16814017295837402 + }, + { + "epoch": 3.847198486328125e-05, + "model_forward_time": 0.024060964584350586, + "step": 25213 + }, + { + "epoch": 3.847198486328125e-05, + "step": 25213, + "training_step_time": 0.10434556007385254 + }, + { + "epoch": 3.84735107421875e-05, + "model_forward_time": 0.02449941635131836, + "step": 25214 + }, + { + "epoch": 3.84735107421875e-05, + "step": 25214, + "training_step_time": 0.11175775527954102 + }, + { + "epoch": 3.847503662109375e-05, + "model_forward_time": 0.025772809982299805, + "step": 25215 + }, + { + "epoch": 3.847503662109375e-05, + "step": 25215, + "training_step_time": 0.12450742721557617 + }, + { + "epoch": 3.84765625e-05, + "model_forward_time": 0.025485992431640625, + "step": 25216 + }, + { + "epoch": 3.84765625e-05, + "step": 25216, + "training_step_time": 0.12683820724487305 + }, + { + "epoch": 3.847808837890625e-05, + "model_forward_time": 0.024663448333740234, + "step": 25217 + }, + { + "epoch": 3.847808837890625e-05, + "step": 25217, + "training_step_time": 0.1242976188659668 + }, + { + "epoch": 3.84796142578125e-05, + "model_forward_time": 0.02523517608642578, + "step": 25218 + }, + { + "epoch": 3.84796142578125e-05, + "step": 25218, + "training_step_time": 0.11219596862792969 + }, + { + "epoch": 3.848114013671875e-05, + "model_forward_time": 0.025315523147583008, + "step": 25219 + }, + { + "epoch": 3.848114013671875e-05, + "step": 25219, + "training_step_time": 0.10870885848999023 + }, + { + "epoch": 3.8482666015625e-05, + "grad_norm": 0.0839950442314148, + "learning_rate": 6.781639918854604e-06, + "loss": 0.0044, + "step": 25220 + }, + { + "epoch": 3.8482666015625e-05, + "model_forward_time": 0.024003982543945312, + "step": 25220 + }, + { + "epoch": 3.8482666015625e-05, + "step": 25220, + "training_step_time": 0.1184072494506836 + }, + { + "epoch": 3.848419189453125e-05, + "model_forward_time": 0.02411937713623047, + "step": 25221 + }, + { + "epoch": 3.848419189453125e-05, + "step": 25221, + "training_step_time": 0.21102666854858398 + }, + { + "epoch": 3.84857177734375e-05, + "model_forward_time": 0.026324987411499023, + "step": 25222 + }, + { + "epoch": 3.84857177734375e-05, + "step": 25222, + "training_step_time": 0.16436362266540527 + }, + { + "epoch": 3.848724365234375e-05, + "model_forward_time": 0.024543285369873047, + "step": 25223 + }, + { + "epoch": 3.848724365234375e-05, + "step": 25223, + "training_step_time": 0.17293238639831543 + }, + { + "epoch": 3.848876953125e-05, + "model_forward_time": 0.024868488311767578, + "step": 25224 + }, + { + "epoch": 3.848876953125e-05, + "step": 25224, + "training_step_time": 0.21651887893676758 + }, + { + "epoch": 3.849029541015625e-05, + "model_forward_time": 0.02461862564086914, + "step": 25225 + }, + { + "epoch": 3.849029541015625e-05, + "step": 25225, + "training_step_time": 0.14304709434509277 + }, + { + "epoch": 3.84918212890625e-05, + "model_forward_time": 0.02400660514831543, + "step": 25226 + }, + { + "epoch": 3.84918212890625e-05, + "step": 25226, + "training_step_time": 0.12431192398071289 + }, + { + "epoch": 3.849334716796875e-05, + "model_forward_time": 0.02480006217956543, + "step": 25227 + }, + { + "epoch": 3.849334716796875e-05, + "step": 25227, + "training_step_time": 0.12487483024597168 + }, + { + "epoch": 3.8494873046875e-05, + "model_forward_time": 0.027973413467407227, + "step": 25228 + }, + { + "epoch": 3.8494873046875e-05, + "step": 25228, + "training_step_time": 0.12360429763793945 + }, + { + "epoch": 3.849639892578125e-05, + "model_forward_time": 0.025107383728027344, + "step": 25229 + }, + { + "epoch": 3.849639892578125e-05, + "step": 25229, + "training_step_time": 0.11370325088500977 + }, + { + "epoch": 3.84979248046875e-05, + "grad_norm": 0.16888341307640076, + "learning_rate": 6.753950656940905e-06, + "loss": 0.0071, + "step": 25230 + }, + { + "epoch": 3.84979248046875e-05, + "model_forward_time": 0.025272130966186523, + "step": 25230 + }, + { + "epoch": 3.84979248046875e-05, + "step": 25230, + "training_step_time": 0.11623501777648926 + }, + { + "epoch": 3.849945068359375e-05, + "model_forward_time": 0.02508234977722168, + "step": 25231 + }, + { + "epoch": 3.849945068359375e-05, + "step": 25231, + "training_step_time": 0.1121664047241211 + }, + { + "epoch": 3.85009765625e-05, + "model_forward_time": 0.02481555938720703, + "step": 25232 + }, + { + "epoch": 3.85009765625e-05, + "step": 25232, + "training_step_time": 0.10651111602783203 + }, + { + "epoch": 3.850250244140625e-05, + "model_forward_time": 0.02503061294555664, + "step": 25233 + }, + { + "epoch": 3.850250244140625e-05, + "step": 25233, + "training_step_time": 0.10639071464538574 + }, + { + "epoch": 3.85040283203125e-05, + "model_forward_time": 0.02515244483947754, + "step": 25234 + }, + { + "epoch": 3.85040283203125e-05, + "step": 25234, + "training_step_time": 0.1109929084777832 + }, + { + "epoch": 3.850555419921875e-05, + "model_forward_time": 0.025001049041748047, + "step": 25235 + }, + { + "epoch": 3.850555419921875e-05, + "step": 25235, + "training_step_time": 0.15361475944519043 + }, + { + "epoch": 3.8507080078125e-05, + "model_forward_time": 0.02449822425842285, + "step": 25236 + }, + { + "epoch": 3.8507080078125e-05, + "step": 25236, + "training_step_time": 0.11384963989257812 + }, + { + "epoch": 3.850860595703125e-05, + "model_forward_time": 0.02476048469543457, + "step": 25237 + }, + { + "epoch": 3.850860595703125e-05, + "step": 25237, + "training_step_time": 0.2113199234008789 + }, + { + "epoch": 3.85101318359375e-05, + "model_forward_time": 0.023433446884155273, + "step": 25238 + }, + { + "epoch": 3.85101318359375e-05, + "step": 25238, + "training_step_time": 0.10644245147705078 + }, + { + "epoch": 3.851165771484375e-05, + "model_forward_time": 0.024072647094726562, + "step": 25239 + }, + { + "epoch": 3.851165771484375e-05, + "step": 25239, + "training_step_time": 0.12456560134887695 + }, + { + "epoch": 3.851318359375e-05, + "grad_norm": 0.20325633883476257, + "learning_rate": 6.726313943054991e-06, + "loss": 0.0037, + "step": 25240 + }, + { + "epoch": 3.851318359375e-05, + "model_forward_time": 0.024942398071289062, + "step": 25240 + }, + { + "epoch": 3.851318359375e-05, + "step": 25240, + "training_step_time": 0.1884911060333252 + }, + { + "epoch": 3.851470947265625e-05, + "model_forward_time": 0.023912429809570312, + "step": 25241 + }, + { + "epoch": 3.851470947265625e-05, + "step": 25241, + "training_step_time": 0.10571026802062988 + }, + { + "epoch": 3.85162353515625e-05, + "model_forward_time": 0.02435302734375, + "step": 25242 + }, + { + "epoch": 3.85162353515625e-05, + "step": 25242, + "training_step_time": 0.10498785972595215 + }, + { + "epoch": 3.851776123046875e-05, + "model_forward_time": 0.02514195442199707, + "step": 25243 + }, + { + "epoch": 3.851776123046875e-05, + "step": 25243, + "training_step_time": 0.10512852668762207 + }, + { + "epoch": 3.8519287109375e-05, + "model_forward_time": 0.02498173713684082, + "step": 25244 + }, + { + "epoch": 3.8519287109375e-05, + "step": 25244, + "training_step_time": 0.1062934398651123 + }, + { + "epoch": 3.852081298828125e-05, + "model_forward_time": 0.025046586990356445, + "step": 25245 + }, + { + "epoch": 3.852081298828125e-05, + "step": 25245, + "training_step_time": 0.10586977005004883 + }, + { + "epoch": 3.85223388671875e-05, + "model_forward_time": 0.025201797485351562, + "step": 25246 + }, + { + "epoch": 3.85223388671875e-05, + "step": 25246, + "training_step_time": 0.11025691032409668 + }, + { + "epoch": 3.852386474609375e-05, + "model_forward_time": 0.02483844757080078, + "step": 25247 + }, + { + "epoch": 3.852386474609375e-05, + "step": 25247, + "training_step_time": 0.10923433303833008 + }, + { + "epoch": 3.8525390625e-05, + "model_forward_time": 0.025164127349853516, + "step": 25248 + }, + { + "epoch": 3.8525390625e-05, + "step": 25248, + "training_step_time": 0.1074984073638916 + }, + { + "epoch": 3.852691650390625e-05, + "model_forward_time": 0.025119543075561523, + "step": 25249 + }, + { + "epoch": 3.852691650390625e-05, + "step": 25249, + "training_step_time": 0.10442519187927246 + }, + { + "epoch": 3.85284423828125e-05, + "grad_norm": 0.14675350487232208, + "learning_rate": 6.698729810778065e-06, + "loss": 0.0026, + "step": 25250 + }, + { + "epoch": 3.85284423828125e-05, + "model_forward_time": 0.024845600128173828, + "step": 25250 + }, + { + "epoch": 3.85284423828125e-05, + "step": 25250, + "training_step_time": 0.18504643440246582 + }, + { + "epoch": 3.852996826171875e-05, + "model_forward_time": 0.024434328079223633, + "step": 25251 + }, + { + "epoch": 3.852996826171875e-05, + "step": 25251, + "training_step_time": 0.1173238754272461 + }, + { + "epoch": 3.8531494140625e-05, + "model_forward_time": 0.025635957717895508, + "step": 25252 + }, + { + "epoch": 3.8531494140625e-05, + "step": 25252, + "training_step_time": 0.11297369003295898 + }, + { + "epoch": 3.853302001953125e-05, + "model_forward_time": 0.024103879928588867, + "step": 25253 + }, + { + "epoch": 3.853302001953125e-05, + "step": 25253, + "training_step_time": 0.1129603385925293 + }, + { + "epoch": 3.85345458984375e-05, + "model_forward_time": 0.024142026901245117, + "step": 25254 + }, + { + "epoch": 3.85345458984375e-05, + "step": 25254, + "training_step_time": 0.11487889289855957 + }, + { + "epoch": 3.853607177734375e-05, + "model_forward_time": 0.02368330955505371, + "step": 25255 + }, + { + "epoch": 3.853607177734375e-05, + "step": 25255, + "training_step_time": 0.1786808967590332 + }, + { + "epoch": 3.853759765625e-05, + "model_forward_time": 0.024399757385253906, + "step": 25256 + }, + { + "epoch": 3.853759765625e-05, + "step": 25256, + "training_step_time": 0.16494536399841309 + }, + { + "epoch": 3.853912353515625e-05, + "model_forward_time": 0.024187564849853516, + "step": 25257 + }, + { + "epoch": 3.853912353515625e-05, + "step": 25257, + "training_step_time": 0.18324923515319824 + }, + { + "epoch": 3.85406494140625e-05, + "model_forward_time": 0.024520397186279297, + "step": 25258 + }, + { + "epoch": 3.85406494140625e-05, + "step": 25258, + "training_step_time": 0.1116330623626709 + }, + { + "epoch": 3.854217529296875e-05, + "model_forward_time": 0.02411341667175293, + "step": 25259 + }, + { + "epoch": 3.854217529296875e-05, + "step": 25259, + "training_step_time": 0.12581205368041992 + }, + { + "epoch": 3.8543701171875e-05, + "grad_norm": 0.0906037762761116, + "learning_rate": 6.671198293627479e-06, + "loss": 0.0033, + "step": 25260 + }, + { + "epoch": 3.8543701171875e-05, + "model_forward_time": 0.024939298629760742, + "step": 25260 + }, + { + "epoch": 3.8543701171875e-05, + "step": 25260, + "training_step_time": 0.12609195709228516 + }, + { + "epoch": 3.854522705078125e-05, + "model_forward_time": 0.025319337844848633, + "step": 25261 + }, + { + "epoch": 3.854522705078125e-05, + "step": 25261, + "training_step_time": 0.11149287223815918 + }, + { + "epoch": 3.85467529296875e-05, + "model_forward_time": 0.02513265609741211, + "step": 25262 + }, + { + "epoch": 3.85467529296875e-05, + "step": 25262, + "training_step_time": 0.12021136283874512 + }, + { + "epoch": 3.854827880859375e-05, + "model_forward_time": 0.025261402130126953, + "step": 25263 + }, + { + "epoch": 3.854827880859375e-05, + "step": 25263, + "training_step_time": 0.10793924331665039 + }, + { + "epoch": 3.85498046875e-05, + "model_forward_time": 0.02543020248413086, + "step": 25264 + }, + { + "epoch": 3.85498046875e-05, + "step": 25264, + "training_step_time": 0.10606551170349121 + }, + { + "epoch": 3.855133056640625e-05, + "model_forward_time": 0.0254666805267334, + "step": 25265 + }, + { + "epoch": 3.855133056640625e-05, + "step": 25265, + "training_step_time": 0.10589218139648438 + }, + { + "epoch": 3.85528564453125e-05, + "model_forward_time": 0.025512218475341797, + "step": 25266 + }, + { + "epoch": 3.85528564453125e-05, + "step": 25266, + "training_step_time": 0.1088101863861084 + }, + { + "epoch": 3.855438232421875e-05, + "model_forward_time": 0.02538442611694336, + "step": 25267 + }, + { + "epoch": 3.855438232421875e-05, + "step": 25267, + "training_step_time": 0.10999250411987305 + }, + { + "epoch": 3.8555908203125e-05, + "model_forward_time": 0.02529740333557129, + "step": 25268 + }, + { + "epoch": 3.8555908203125e-05, + "step": 25268, + "training_step_time": 0.1115717887878418 + }, + { + "epoch": 3.855743408203125e-05, + "model_forward_time": 0.02535414695739746, + "step": 25269 + }, + { + "epoch": 3.855743408203125e-05, + "step": 25269, + "training_step_time": 0.2054903507232666 + }, + { + "epoch": 3.85589599609375e-05, + "grad_norm": 0.08070753514766693, + "learning_rate": 6.6437194250566e-06, + "loss": 0.0056, + "step": 25270 + }, + { + "epoch": 3.85589599609375e-05, + "model_forward_time": 0.0259246826171875, + "step": 25270 + }, + { + "epoch": 3.85589599609375e-05, + "step": 25270, + "training_step_time": 0.12135863304138184 + }, + { + "epoch": 3.856048583984375e-05, + "model_forward_time": 0.024018287658691406, + "step": 25271 + }, + { + "epoch": 3.856048583984375e-05, + "step": 25271, + "training_step_time": 0.10303616523742676 + }, + { + "epoch": 3.856201171875e-05, + "model_forward_time": 0.024944543838500977, + "step": 25272 + }, + { + "epoch": 3.856201171875e-05, + "step": 25272, + "training_step_time": 0.10857033729553223 + }, + { + "epoch": 3.856353759765625e-05, + "model_forward_time": 0.02508831024169922, + "step": 25273 + }, + { + "epoch": 3.856353759765625e-05, + "step": 25273, + "training_step_time": 0.10518026351928711 + }, + { + "epoch": 3.85650634765625e-05, + "model_forward_time": 0.025967836380004883, + "step": 25274 + }, + { + "epoch": 3.85650634765625e-05, + "step": 25274, + "training_step_time": 0.1077573299407959 + }, + { + "epoch": 3.856658935546875e-05, + "model_forward_time": 0.02474665641784668, + "step": 25275 + }, + { + "epoch": 3.856658935546875e-05, + "step": 25275, + "training_step_time": 0.10894632339477539 + }, + { + "epoch": 3.8568115234375e-05, + "model_forward_time": 0.02567911148071289, + "step": 25276 + }, + { + "epoch": 3.8568115234375e-05, + "step": 25276, + "training_step_time": 0.10602593421936035 + }, + { + "epoch": 3.856964111328125e-05, + "model_forward_time": 0.02532672882080078, + "step": 25277 + }, + { + "epoch": 3.856964111328125e-05, + "step": 25277, + "training_step_time": 0.10677409172058105 + }, + { + "epoch": 3.85711669921875e-05, + "model_forward_time": 0.02521967887878418, + "step": 25278 + }, + { + "epoch": 3.85711669921875e-05, + "step": 25278, + "training_step_time": 0.10509228706359863 + }, + { + "epoch": 3.857269287109375e-05, + "model_forward_time": 0.0249483585357666, + "step": 25279 + }, + { + "epoch": 3.857269287109375e-05, + "step": 25279, + "training_step_time": 0.10787010192871094 + }, + { + "epoch": 3.857421875e-05, + "grad_norm": 0.11665597558021545, + "learning_rate": 6.6162932384548515e-06, + "loss": 0.0056, + "step": 25280 + }, + { + "epoch": 3.857421875e-05, + "model_forward_time": 0.02484130859375, + "step": 25280 + }, + { + "epoch": 3.857421875e-05, + "step": 25280, + "training_step_time": 0.10541749000549316 + }, + { + "epoch": 3.857574462890625e-05, + "model_forward_time": 0.025354862213134766, + "step": 25281 + }, + { + "epoch": 3.857574462890625e-05, + "step": 25281, + "training_step_time": 0.19835305213928223 + }, + { + "epoch": 3.85772705078125e-05, + "model_forward_time": 0.02431964874267578, + "step": 25282 + }, + { + "epoch": 3.85772705078125e-05, + "step": 25282, + "training_step_time": 0.1661357879638672 + }, + { + "epoch": 3.857879638671875e-05, + "model_forward_time": 0.024013280868530273, + "step": 25283 + }, + { + "epoch": 3.857879638671875e-05, + "step": 25283, + "training_step_time": 0.1451733112335205 + }, + { + "epoch": 3.8580322265625e-05, + "model_forward_time": 0.024252891540527344, + "step": 25284 + }, + { + "epoch": 3.8580322265625e-05, + "step": 25284, + "training_step_time": 0.12817168235778809 + }, + { + "epoch": 3.858184814453125e-05, + "model_forward_time": 0.02437138557434082, + "step": 25285 + }, + { + "epoch": 3.858184814453125e-05, + "step": 25285, + "training_step_time": 0.15700125694274902 + }, + { + "epoch": 3.85833740234375e-05, + "model_forward_time": 0.02458333969116211, + "step": 25286 + }, + { + "epoch": 3.85833740234375e-05, + "step": 25286, + "training_step_time": 0.10454750061035156 + }, + { + "epoch": 3.858489990234375e-05, + "model_forward_time": 0.025320768356323242, + "step": 25287 + }, + { + "epoch": 3.858489990234375e-05, + "step": 25287, + "training_step_time": 0.10701179504394531 + }, + { + "epoch": 3.858642578125e-05, + "model_forward_time": 0.02560734748840332, + "step": 25288 + }, + { + "epoch": 3.858642578125e-05, + "step": 25288, + "training_step_time": 0.1059112548828125 + }, + { + "epoch": 3.858795166015625e-05, + "model_forward_time": 0.028051376342773438, + "step": 25289 + }, + { + "epoch": 3.858795166015625e-05, + "step": 25289, + "training_step_time": 0.10999107360839844 + }, + { + "epoch": 3.85894775390625e-05, + "grad_norm": 0.059240199625492096, + "learning_rate": 6.588919767147639e-06, + "loss": 0.0036, + "step": 25290 + }, + { + "epoch": 3.85894775390625e-05, + "model_forward_time": 0.02502155303955078, + "step": 25290 + }, + { + "epoch": 3.85894775390625e-05, + "step": 25290, + "training_step_time": 0.10929417610168457 + }, + { + "epoch": 3.859100341796875e-05, + "model_forward_time": 0.025186777114868164, + "step": 25291 + }, + { + "epoch": 3.859100341796875e-05, + "step": 25291, + "training_step_time": 0.10497212409973145 + }, + { + "epoch": 3.8592529296875e-05, + "model_forward_time": 0.02509284019470215, + "step": 25292 + }, + { + "epoch": 3.8592529296875e-05, + "step": 25292, + "training_step_time": 0.10451841354370117 + }, + { + "epoch": 3.859405517578125e-05, + "model_forward_time": 0.02517223358154297, + "step": 25293 + }, + { + "epoch": 3.859405517578125e-05, + "step": 25293, + "training_step_time": 0.10543155670166016 + }, + { + "epoch": 3.85955810546875e-05, + "model_forward_time": 0.025140047073364258, + "step": 25294 + }, + { + "epoch": 3.85955810546875e-05, + "step": 25294, + "training_step_time": 0.11189889907836914 + }, + { + "epoch": 3.859710693359375e-05, + "model_forward_time": 0.02509140968322754, + "step": 25295 + }, + { + "epoch": 3.859710693359375e-05, + "step": 25295, + "training_step_time": 0.10634517669677734 + }, + { + "epoch": 3.85986328125e-05, + "model_forward_time": 0.026823997497558594, + "step": 25296 + }, + { + "epoch": 3.85986328125e-05, + "step": 25296, + "training_step_time": 0.1382431983947754 + }, + { + "epoch": 3.860015869140625e-05, + "model_forward_time": 0.025195598602294922, + "step": 25297 + }, + { + "epoch": 3.860015869140625e-05, + "step": 25297, + "training_step_time": 0.1555635929107666 + }, + { + "epoch": 3.86016845703125e-05, + "model_forward_time": 0.024409055709838867, + "step": 25298 + }, + { + "epoch": 3.86016845703125e-05, + "step": 25298, + "training_step_time": 0.13388967514038086 + }, + { + "epoch": 3.860321044921875e-05, + "model_forward_time": 0.024042606353759766, + "step": 25299 + }, + { + "epoch": 3.860321044921875e-05, + "step": 25299, + "training_step_time": 0.11905717849731445 + }, + { + "epoch": 3.8604736328125e-05, + "grad_norm": 0.1335275024175644, + "learning_rate": 6.561599044396288e-06, + "loss": 0.0055, + "step": 25300 + }, + { + "epoch": 3.8604736328125e-05, + "model_forward_time": 0.025087594985961914, + "step": 25300 + }, + { + "epoch": 3.8604736328125e-05, + "step": 25300, + "training_step_time": 0.18134593963623047 + }, + { + "epoch": 3.860626220703125e-05, + "model_forward_time": 0.025032520294189453, + "step": 25301 + }, + { + "epoch": 3.860626220703125e-05, + "step": 25301, + "training_step_time": 0.1632080078125 + }, + { + "epoch": 3.86077880859375e-05, + "model_forward_time": 0.024095773696899414, + "step": 25302 + }, + { + "epoch": 3.86077880859375e-05, + "step": 25302, + "training_step_time": 0.17557477951049805 + }, + { + "epoch": 3.860931396484375e-05, + "model_forward_time": 0.024164676666259766, + "step": 25303 + }, + { + "epoch": 3.860931396484375e-05, + "step": 25303, + "training_step_time": 0.10519266128540039 + }, + { + "epoch": 3.861083984375e-05, + "model_forward_time": 0.024523258209228516, + "step": 25304 + }, + { + "epoch": 3.861083984375e-05, + "step": 25304, + "training_step_time": 0.11417293548583984 + }, + { + "epoch": 3.861236572265625e-05, + "model_forward_time": 0.024652719497680664, + "step": 25305 + }, + { + "epoch": 3.861236572265625e-05, + "step": 25305, + "training_step_time": 0.12977242469787598 + }, + { + "epoch": 3.86138916015625e-05, + "model_forward_time": 0.02606821060180664, + "step": 25306 + }, + { + "epoch": 3.86138916015625e-05, + "step": 25306, + "training_step_time": 0.12696194648742676 + }, + { + "epoch": 3.861541748046875e-05, + "model_forward_time": 0.024944305419921875, + "step": 25307 + }, + { + "epoch": 3.861541748046875e-05, + "step": 25307, + "training_step_time": 0.11117339134216309 + }, + { + "epoch": 3.8616943359375e-05, + "model_forward_time": 0.02504730224609375, + "step": 25308 + }, + { + "epoch": 3.8616943359375e-05, + "step": 25308, + "training_step_time": 0.1170191764831543 + }, + { + "epoch": 3.861846923828125e-05, + "model_forward_time": 0.025201082229614258, + "step": 25309 + }, + { + "epoch": 3.861846923828125e-05, + "step": 25309, + "training_step_time": 0.10639405250549316 + }, + { + "epoch": 3.86199951171875e-05, + "grad_norm": 0.3475443720817566, + "learning_rate": 6.5343311033980895e-06, + "loss": 0.0055, + "step": 25310 + }, + { + "epoch": 3.86199951171875e-05, + "model_forward_time": 0.025807619094848633, + "step": 25310 + }, + { + "epoch": 3.86199951171875e-05, + "step": 25310, + "training_step_time": 0.10708856582641602 + }, + { + "epoch": 3.862152099609375e-05, + "model_forward_time": 0.02544879913330078, + "step": 25311 + }, + { + "epoch": 3.862152099609375e-05, + "step": 25311, + "training_step_time": 0.15946292877197266 + }, + { + "epoch": 3.8623046875e-05, + "model_forward_time": 0.024806737899780273, + "step": 25312 + }, + { + "epoch": 3.8623046875e-05, + "step": 25312, + "training_step_time": 0.15897512435913086 + }, + { + "epoch": 3.862457275390625e-05, + "model_forward_time": 0.024178504943847656, + "step": 25313 + }, + { + "epoch": 3.862457275390625e-05, + "step": 25313, + "training_step_time": 0.11198568344116211 + }, + { + "epoch": 3.86260986328125e-05, + "model_forward_time": 0.02472519874572754, + "step": 25314 + }, + { + "epoch": 3.86260986328125e-05, + "step": 25314, + "training_step_time": 0.16807842254638672 + }, + { + "epoch": 3.862762451171875e-05, + "model_forward_time": 0.02388763427734375, + "step": 25315 + }, + { + "epoch": 3.862762451171875e-05, + "step": 25315, + "training_step_time": 0.16779780387878418 + }, + { + "epoch": 3.8629150390625e-05, + "model_forward_time": 0.02494359016418457, + "step": 25316 + }, + { + "epoch": 3.8629150390625e-05, + "step": 25316, + "training_step_time": 0.10834145545959473 + }, + { + "epoch": 3.863067626953125e-05, + "model_forward_time": 0.02557682991027832, + "step": 25317 + }, + { + "epoch": 3.863067626953125e-05, + "step": 25317, + "training_step_time": 0.10547757148742676 + }, + { + "epoch": 3.86322021484375e-05, + "model_forward_time": 0.024999380111694336, + "step": 25318 + }, + { + "epoch": 3.86322021484375e-05, + "step": 25318, + "training_step_time": 0.10612010955810547 + }, + { + "epoch": 3.863372802734375e-05, + "model_forward_time": 0.025054931640625, + "step": 25319 + }, + { + "epoch": 3.863372802734375e-05, + "step": 25319, + "training_step_time": 0.10550165176391602 + }, + { + "epoch": 3.863525390625e-05, + "grad_norm": 0.07553213089704514, + "learning_rate": 6.5071159772861436e-06, + "loss": 0.0032, + "step": 25320 + }, + { + "epoch": 3.863525390625e-05, + "model_forward_time": 0.025118589401245117, + "step": 25320 + }, + { + "epoch": 3.863525390625e-05, + "step": 25320, + "training_step_time": 0.10319638252258301 + }, + { + "epoch": 3.863677978515625e-05, + "model_forward_time": 0.02546977996826172, + "step": 25321 + }, + { + "epoch": 3.863677978515625e-05, + "step": 25321, + "training_step_time": 0.10753941535949707 + }, + { + "epoch": 3.86383056640625e-05, + "model_forward_time": 0.025127649307250977, + "step": 25322 + }, + { + "epoch": 3.86383056640625e-05, + "step": 25322, + "training_step_time": 0.10435867309570312 + }, + { + "epoch": 3.863983154296875e-05, + "model_forward_time": 0.025287628173828125, + "step": 25323 + }, + { + "epoch": 3.863983154296875e-05, + "step": 25323, + "training_step_time": 0.11372256278991699 + }, + { + "epoch": 3.8641357421875e-05, + "model_forward_time": 0.02526235580444336, + "step": 25324 + }, + { + "epoch": 3.8641357421875e-05, + "step": 25324, + "training_step_time": 0.10542845726013184 + }, + { + "epoch": 3.864288330078125e-05, + "model_forward_time": 0.028254270553588867, + "step": 25325 + }, + { + "epoch": 3.864288330078125e-05, + "step": 25325, + "training_step_time": 0.17860078811645508 + }, + { + "epoch": 3.86444091796875e-05, + "model_forward_time": 0.02472853660583496, + "step": 25326 + }, + { + "epoch": 3.86444091796875e-05, + "step": 25326, + "training_step_time": 0.1298069953918457 + }, + { + "epoch": 3.864593505859375e-05, + "model_forward_time": 0.02404642105102539, + "step": 25327 + }, + { + "epoch": 3.864593505859375e-05, + "step": 25327, + "training_step_time": 0.13077878952026367 + }, + { + "epoch": 3.86474609375e-05, + "model_forward_time": 0.024319887161254883, + "step": 25328 + }, + { + "epoch": 3.86474609375e-05, + "step": 25328, + "training_step_time": 0.10719943046569824 + }, + { + "epoch": 3.864898681640625e-05, + "model_forward_time": 0.024889230728149414, + "step": 25329 + }, + { + "epoch": 3.864898681640625e-05, + "step": 25329, + "training_step_time": 0.17375946044921875 + }, + { + "epoch": 3.86505126953125e-05, + "grad_norm": 0.16404885053634644, + "learning_rate": 6.479953699129382e-06, + "loss": 0.0111, + "step": 25330 + }, + { + "epoch": 3.86505126953125e-05, + "model_forward_time": 0.024220705032348633, + "step": 25330 + }, + { + "epoch": 3.86505126953125e-05, + "step": 25330, + "training_step_time": 0.13584685325622559 + }, + { + "epoch": 3.865203857421875e-05, + "model_forward_time": 0.02403426170349121, + "step": 25331 + }, + { + "epoch": 3.865203857421875e-05, + "step": 25331, + "training_step_time": 0.11289763450622559 + }, + { + "epoch": 3.8653564453125e-05, + "model_forward_time": 0.025214433670043945, + "step": 25332 + }, + { + "epoch": 3.8653564453125e-05, + "step": 25332, + "training_step_time": 0.10968136787414551 + }, + { + "epoch": 3.865509033203125e-05, + "model_forward_time": 0.02488541603088379, + "step": 25333 + }, + { + "epoch": 3.865509033203125e-05, + "step": 25333, + "training_step_time": 0.10695290565490723 + }, + { + "epoch": 3.86566162109375e-05, + "model_forward_time": 0.025032520294189453, + "step": 25334 + }, + { + "epoch": 3.86566162109375e-05, + "step": 25334, + "training_step_time": 0.10447812080383301 + }, + { + "epoch": 3.865814208984375e-05, + "model_forward_time": 0.02508997917175293, + "step": 25335 + }, + { + "epoch": 3.865814208984375e-05, + "step": 25335, + "training_step_time": 0.1040501594543457 + }, + { + "epoch": 3.865966796875e-05, + "model_forward_time": 0.025412321090698242, + "step": 25336 + }, + { + "epoch": 3.865966796875e-05, + "step": 25336, + "training_step_time": 0.10371184349060059 + }, + { + "epoch": 3.866119384765625e-05, + "model_forward_time": 0.02505183219909668, + "step": 25337 + }, + { + "epoch": 3.866119384765625e-05, + "step": 25337, + "training_step_time": 0.10430741310119629 + }, + { + "epoch": 3.86627197265625e-05, + "model_forward_time": 0.024888992309570312, + "step": 25338 + }, + { + "epoch": 3.86627197265625e-05, + "step": 25338, + "training_step_time": 0.10526514053344727 + }, + { + "epoch": 3.866424560546875e-05, + "model_forward_time": 0.02501511573791504, + "step": 25339 + }, + { + "epoch": 3.866424560546875e-05, + "step": 25339, + "training_step_time": 0.10430669784545898 + }, + { + "epoch": 3.8665771484375e-05, + "grad_norm": 0.061237893998622894, + "learning_rate": 6.452844301932559e-06, + "loss": 0.0028, + "step": 25340 + }, + { + "epoch": 3.8665771484375e-05, + "model_forward_time": 0.02504134178161621, + "step": 25340 + }, + { + "epoch": 3.8665771484375e-05, + "step": 25340, + "training_step_time": 0.10606074333190918 + }, + { + "epoch": 3.866729736328125e-05, + "model_forward_time": 0.024821043014526367, + "step": 25341 + }, + { + "epoch": 3.866729736328125e-05, + "step": 25341, + "training_step_time": 0.16702795028686523 + }, + { + "epoch": 3.86688232421875e-05, + "model_forward_time": 0.0244293212890625, + "step": 25342 + }, + { + "epoch": 3.86688232421875e-05, + "step": 25342, + "training_step_time": 0.24446725845336914 + }, + { + "epoch": 3.867034912109375e-05, + "model_forward_time": 0.02311396598815918, + "step": 25343 + }, + { + "epoch": 3.867034912109375e-05, + "step": 25343, + "training_step_time": 0.2396857738494873 + }, + { + "epoch": 3.8671875e-05, + "model_forward_time": 0.023109912872314453, + "step": 25344 + }, + { + "epoch": 3.8671875e-05, + "step": 25344, + "training_step_time": 0.23199129104614258 + }, + { + "epoch": 3.867340087890625e-05, + "model_forward_time": 0.0237734317779541, + "step": 25345 + }, + { + "epoch": 3.867340087890625e-05, + "step": 25345, + "training_step_time": 0.603271484375 + }, + { + "epoch": 3.86749267578125e-05, + "model_forward_time": 0.021698713302612305, + "step": 25346 + }, + { + "epoch": 3.86749267578125e-05, + "step": 25346, + "training_step_time": 0.17925190925598145 + }, + { + "epoch": 3.867645263671875e-05, + "model_forward_time": 0.022940397262573242, + "step": 25347 + }, + { + "epoch": 3.867645263671875e-05, + "step": 25347, + "training_step_time": 0.1144556999206543 + }, + { + "epoch": 3.8677978515625e-05, + "model_forward_time": 0.023488998413085938, + "step": 25348 + }, + { + "epoch": 3.8677978515625e-05, + "step": 25348, + "training_step_time": 0.11192727088928223 + }, + { + "epoch": 3.867950439453125e-05, + "model_forward_time": 0.02436518669128418, + "step": 25349 + }, + { + "epoch": 3.867950439453125e-05, + "step": 25349, + "training_step_time": 0.10484552383422852 + }, + { + "epoch": 3.86810302734375e-05, + "grad_norm": 0.14866124093532562, + "learning_rate": 6.425787818636131e-06, + "loss": 0.005, + "step": 25350 + }, + { + "epoch": 3.86810302734375e-05, + "model_forward_time": 0.024731159210205078, + "step": 25350 + }, + { + "epoch": 3.86810302734375e-05, + "step": 25350, + "training_step_time": 0.10678625106811523 + }, + { + "epoch": 3.868255615234375e-05, + "model_forward_time": 0.02425098419189453, + "step": 25351 + }, + { + "epoch": 3.868255615234375e-05, + "step": 25351, + "training_step_time": 0.22017335891723633 + }, + { + "epoch": 3.868408203125e-05, + "model_forward_time": 0.02360820770263672, + "step": 25352 + }, + { + "epoch": 3.868408203125e-05, + "step": 25352, + "training_step_time": 0.13580703735351562 + }, + { + "epoch": 3.868560791015625e-05, + "model_forward_time": 0.02344369888305664, + "step": 25353 + }, + { + "epoch": 3.868560791015625e-05, + "step": 25353, + "training_step_time": 0.1740431785583496 + }, + { + "epoch": 3.86871337890625e-05, + "model_forward_time": 0.023654937744140625, + "step": 25354 + }, + { + "epoch": 3.86871337890625e-05, + "step": 25354, + "training_step_time": 0.13831400871276855 + }, + { + "epoch": 3.868865966796875e-05, + "model_forward_time": 0.024185895919799805, + "step": 25355 + }, + { + "epoch": 3.868865966796875e-05, + "step": 25355, + "training_step_time": 0.12133073806762695 + }, + { + "epoch": 3.8690185546875e-05, + "model_forward_time": 0.02361750602722168, + "step": 25356 + }, + { + "epoch": 3.8690185546875e-05, + "step": 25356, + "training_step_time": 0.11653017997741699 + }, + { + "epoch": 3.869171142578125e-05, + "model_forward_time": 0.024273395538330078, + "step": 25357 + }, + { + "epoch": 3.869171142578125e-05, + "step": 25357, + "training_step_time": 0.11455297470092773 + }, + { + "epoch": 3.86932373046875e-05, + "model_forward_time": 0.02433919906616211, + "step": 25358 + }, + { + "epoch": 3.86932373046875e-05, + "step": 25358, + "training_step_time": 0.11362361907958984 + }, + { + "epoch": 3.869476318359375e-05, + "model_forward_time": 0.023983001708984375, + "step": 25359 + }, + { + "epoch": 3.869476318359375e-05, + "step": 25359, + "training_step_time": 0.11241531372070312 + }, + { + "epoch": 3.86962890625e-05, + "grad_norm": 0.1651516705751419, + "learning_rate": 6.398784282116293e-06, + "loss": 0.0034, + "step": 25360 + }, + { + "epoch": 3.86962890625e-05, + "model_forward_time": 0.02460503578186035, + "step": 25360 + }, + { + "epoch": 3.86962890625e-05, + "step": 25360, + "training_step_time": 0.11083292961120605 + }, + { + "epoch": 3.869781494140625e-05, + "model_forward_time": 0.024419784545898438, + "step": 25361 + }, + { + "epoch": 3.869781494140625e-05, + "step": 25361, + "training_step_time": 0.10853338241577148 + }, + { + "epoch": 3.86993408203125e-05, + "model_forward_time": 0.024364948272705078, + "step": 25362 + }, + { + "epoch": 3.86993408203125e-05, + "step": 25362, + "training_step_time": 0.11388635635375977 + }, + { + "epoch": 3.870086669921875e-05, + "model_forward_time": 0.024256229400634766, + "step": 25363 + }, + { + "epoch": 3.870086669921875e-05, + "step": 25363, + "training_step_time": 0.1103055477142334 + }, + { + "epoch": 3.8702392578125e-05, + "model_forward_time": 0.024222612380981445, + "step": 25364 + }, + { + "epoch": 3.8702392578125e-05, + "step": 25364, + "training_step_time": 0.11123013496398926 + }, + { + "epoch": 3.870391845703125e-05, + "model_forward_time": 0.024135828018188477, + "step": 25365 + }, + { + "epoch": 3.870391845703125e-05, + "step": 25365, + "training_step_time": 0.174058198928833 + }, + { + "epoch": 3.87054443359375e-05, + "model_forward_time": 0.023679733276367188, + "step": 25366 + }, + { + "epoch": 3.87054443359375e-05, + "step": 25366, + "training_step_time": 0.12102866172790527 + }, + { + "epoch": 3.870697021484375e-05, + "model_forward_time": 0.023659467697143555, + "step": 25367 + }, + { + "epoch": 3.870697021484375e-05, + "step": 25367, + "training_step_time": 0.12207388877868652 + }, + { + "epoch": 3.870849609375e-05, + "model_forward_time": 0.024297714233398438, + "step": 25368 + }, + { + "epoch": 3.870849609375e-05, + "step": 25368, + "training_step_time": 0.1427316665649414 + }, + { + "epoch": 3.871002197265625e-05, + "model_forward_time": 0.024331331253051758, + "step": 25369 + }, + { + "epoch": 3.871002197265625e-05, + "step": 25369, + "training_step_time": 0.11813926696777344 + }, + { + "epoch": 3.87115478515625e-05, + "grad_norm": 0.3118550777435303, + "learning_rate": 6.3718337251848785e-06, + "loss": 0.0062, + "step": 25370 + }, + { + "epoch": 3.87115478515625e-05, + "model_forward_time": 0.02426457405090332, + "step": 25370 + }, + { + "epoch": 3.87115478515625e-05, + "step": 25370, + "training_step_time": 0.12220478057861328 + }, + { + "epoch": 3.871307373046875e-05, + "model_forward_time": 0.02439427375793457, + "step": 25371 + }, + { + "epoch": 3.871307373046875e-05, + "step": 25371, + "training_step_time": 0.11439251899719238 + }, + { + "epoch": 3.8714599609375e-05, + "model_forward_time": 0.024241924285888672, + "step": 25372 + }, + { + "epoch": 3.8714599609375e-05, + "step": 25372, + "training_step_time": 0.11199736595153809 + }, + { + "epoch": 3.871612548828125e-05, + "model_forward_time": 0.02438664436340332, + "step": 25373 + }, + { + "epoch": 3.871612548828125e-05, + "step": 25373, + "training_step_time": 0.1089169979095459 + }, + { + "epoch": 3.87176513671875e-05, + "model_forward_time": 0.023924827575683594, + "step": 25374 + }, + { + "epoch": 3.87176513671875e-05, + "step": 25374, + "training_step_time": 0.11027193069458008 + }, + { + "epoch": 3.871917724609375e-05, + "model_forward_time": 0.024034738540649414, + "step": 25375 + }, + { + "epoch": 3.871917724609375e-05, + "step": 25375, + "training_step_time": 0.10641932487487793 + }, + { + "epoch": 3.8720703125e-05, + "model_forward_time": 0.024310588836669922, + "step": 25376 + }, + { + "epoch": 3.8720703125e-05, + "step": 25376, + "training_step_time": 0.10937619209289551 + }, + { + "epoch": 3.872222900390625e-05, + "model_forward_time": 0.02431774139404297, + "step": 25377 + }, + { + "epoch": 3.872222900390625e-05, + "step": 25377, + "training_step_time": 0.10970902442932129 + }, + { + "epoch": 3.87237548828125e-05, + "model_forward_time": 0.023674726486206055, + "step": 25378 + }, + { + "epoch": 3.87237548828125e-05, + "step": 25378, + "training_step_time": 0.1079556941986084 + }, + { + "epoch": 3.872528076171875e-05, + "model_forward_time": 0.02441573143005371, + "step": 25379 + }, + { + "epoch": 3.872528076171875e-05, + "step": 25379, + "training_step_time": 0.1071014404296875 + }, + { + "epoch": 3.8726806640625e-05, + "grad_norm": 0.05735749006271362, + "learning_rate": 6.344936180589351e-06, + "loss": 0.0028, + "step": 25380 + }, + { + "epoch": 3.8726806640625e-05, + "model_forward_time": 0.023977994918823242, + "step": 25380 + }, + { + "epoch": 3.8726806640625e-05, + "step": 25380, + "training_step_time": 0.17162346839904785 + }, + { + "epoch": 3.872833251953125e-05, + "model_forward_time": 0.023584365844726562, + "step": 25381 + }, + { + "epoch": 3.872833251953125e-05, + "step": 25381, + "training_step_time": 0.11606383323669434 + }, + { + "epoch": 3.87298583984375e-05, + "model_forward_time": 0.023459911346435547, + "step": 25382 + }, + { + "epoch": 3.87298583984375e-05, + "step": 25382, + "training_step_time": 0.10790681838989258 + }, + { + "epoch": 3.873138427734375e-05, + "model_forward_time": 0.024460792541503906, + "step": 25383 + }, + { + "epoch": 3.873138427734375e-05, + "step": 25383, + "training_step_time": 0.12716245651245117 + }, + { + "epoch": 3.873291015625e-05, + "model_forward_time": 0.024287939071655273, + "step": 25384 + }, + { + "epoch": 3.873291015625e-05, + "step": 25384, + "training_step_time": 0.12456107139587402 + }, + { + "epoch": 3.873443603515625e-05, + "model_forward_time": 0.02418804168701172, + "step": 25385 + }, + { + "epoch": 3.873443603515625e-05, + "step": 25385, + "training_step_time": 0.11058545112609863 + }, + { + "epoch": 3.87359619140625e-05, + "model_forward_time": 0.02461099624633789, + "step": 25386 + }, + { + "epoch": 3.87359619140625e-05, + "step": 25386, + "training_step_time": 0.11049628257751465 + }, + { + "epoch": 3.873748779296875e-05, + "model_forward_time": 0.025989532470703125, + "step": 25387 + }, + { + "epoch": 3.873748779296875e-05, + "step": 25387, + "training_step_time": 0.11275649070739746 + }, + { + "epoch": 3.8739013671875e-05, + "model_forward_time": 0.024090051651000977, + "step": 25388 + }, + { + "epoch": 3.8739013671875e-05, + "step": 25388, + "training_step_time": 0.1157078742980957 + }, + { + "epoch": 3.874053955078125e-05, + "model_forward_time": 0.02395319938659668, + "step": 25389 + }, + { + "epoch": 3.874053955078125e-05, + "step": 25389, + "training_step_time": 0.15762114524841309 + }, + { + "epoch": 3.87420654296875e-05, + "grad_norm": 0.04693985730409622, + "learning_rate": 6.318091681012772e-06, + "loss": 0.0033, + "step": 25390 + }, + { + "epoch": 3.87420654296875e-05, + "model_forward_time": 0.024863004684448242, + "step": 25390 + }, + { + "epoch": 3.87420654296875e-05, + "step": 25390, + "training_step_time": 0.10880446434020996 + }, + { + "epoch": 3.874359130859375e-05, + "model_forward_time": 0.023578882217407227, + "step": 25391 + }, + { + "epoch": 3.874359130859375e-05, + "step": 25391, + "training_step_time": 0.11078977584838867 + }, + { + "epoch": 3.87451171875e-05, + "model_forward_time": 0.024204254150390625, + "step": 25392 + }, + { + "epoch": 3.87451171875e-05, + "step": 25392, + "training_step_time": 0.12035489082336426 + }, + { + "epoch": 3.874664306640625e-05, + "model_forward_time": 0.025246620178222656, + "step": 25393 + }, + { + "epoch": 3.874664306640625e-05, + "step": 25393, + "training_step_time": 0.1287531852722168 + }, + { + "epoch": 3.87481689453125e-05, + "model_forward_time": 0.02458643913269043, + "step": 25394 + }, + { + "epoch": 3.87481689453125e-05, + "step": 25394, + "training_step_time": 0.1159520149230957 + }, + { + "epoch": 3.874969482421875e-05, + "model_forward_time": 0.024595260620117188, + "step": 25395 + }, + { + "epoch": 3.874969482421875e-05, + "step": 25395, + "training_step_time": 0.11445164680480957 + }, + { + "epoch": 3.8751220703125e-05, + "model_forward_time": 0.02393341064453125, + "step": 25396 + }, + { + "epoch": 3.8751220703125e-05, + "step": 25396, + "training_step_time": 0.10309743881225586 + }, + { + "epoch": 3.875274658203125e-05, + "model_forward_time": 0.023143291473388672, + "step": 25397 + }, + { + "epoch": 3.875274658203125e-05, + "step": 25397, + "training_step_time": 0.14821481704711914 + }, + { + "epoch": 3.87542724609375e-05, + "model_forward_time": 0.02362966537475586, + "step": 25398 + }, + { + "epoch": 3.87542724609375e-05, + "step": 25398, + "training_step_time": 0.10236740112304688 + }, + { + "epoch": 3.875579833984375e-05, + "model_forward_time": 0.024408578872680664, + "step": 25399 + }, + { + "epoch": 3.875579833984375e-05, + "step": 25399, + "training_step_time": 0.19769644737243652 + }, + { + "epoch": 3.875732421875e-05, + "grad_norm": 0.07606486231088638, + "learning_rate": 6.291300259073724e-06, + "loss": 0.0031, + "step": 25400 + }, + { + "epoch": 3.875732421875e-05, + "model_forward_time": 0.023204565048217773, + "step": 25400 + }, + { + "epoch": 3.875732421875e-05, + "step": 25400, + "training_step_time": 0.13474082946777344 + }, + { + "epoch": 3.875885009765625e-05, + "model_forward_time": 0.023603200912475586, + "step": 25401 + }, + { + "epoch": 3.875885009765625e-05, + "step": 25401, + "training_step_time": 0.10623502731323242 + }, + { + "epoch": 3.87603759765625e-05, + "model_forward_time": 0.02403569221496582, + "step": 25402 + }, + { + "epoch": 3.87603759765625e-05, + "step": 25402, + "training_step_time": 0.11485481262207031 + }, + { + "epoch": 3.876190185546875e-05, + "model_forward_time": 0.024296283721923828, + "step": 25403 + }, + { + "epoch": 3.876190185546875e-05, + "step": 25403, + "training_step_time": 0.10331153869628906 + }, + { + "epoch": 3.8763427734375e-05, + "model_forward_time": 0.024227380752563477, + "step": 25404 + }, + { + "epoch": 3.8763427734375e-05, + "step": 25404, + "training_step_time": 0.10404515266418457 + }, + { + "epoch": 3.876495361328125e-05, + "model_forward_time": 0.024018526077270508, + "step": 25405 + }, + { + "epoch": 3.876495361328125e-05, + "step": 25405, + "training_step_time": 0.10932493209838867 + }, + { + "epoch": 3.87664794921875e-05, + "model_forward_time": 0.02414703369140625, + "step": 25406 + }, + { + "epoch": 3.87664794921875e-05, + "step": 25406, + "training_step_time": 0.10357952117919922 + }, + { + "epoch": 3.876800537109375e-05, + "model_forward_time": 0.025178909301757812, + "step": 25407 + }, + { + "epoch": 3.876800537109375e-05, + "step": 25407, + "training_step_time": 0.1069800853729248 + }, + { + "epoch": 3.876953125e-05, + "model_forward_time": 0.023879051208496094, + "step": 25408 + }, + { + "epoch": 3.876953125e-05, + "step": 25408, + "training_step_time": 0.10327744483947754 + }, + { + "epoch": 3.877105712890625e-05, + "model_forward_time": 0.024163246154785156, + "step": 25409 + }, + { + "epoch": 3.877105712890625e-05, + "step": 25409, + "training_step_time": 0.1080482006072998 + }, + { + "epoch": 3.87725830078125e-05, + "grad_norm": 0.39555227756500244, + "learning_rate": 6.264561947326331e-06, + "loss": 0.0053, + "step": 25410 + }, + { + "epoch": 3.87725830078125e-05, + "model_forward_time": 0.0241546630859375, + "step": 25410 + }, + { + "epoch": 3.87725830078125e-05, + "step": 25410, + "training_step_time": 0.10388040542602539 + }, + { + "epoch": 3.877410888671875e-05, + "model_forward_time": 0.024187326431274414, + "step": 25411 + }, + { + "epoch": 3.877410888671875e-05, + "step": 25411, + "training_step_time": 0.20867228507995605 + }, + { + "epoch": 3.8775634765625e-05, + "model_forward_time": 0.02392411231994629, + "step": 25412 + }, + { + "epoch": 3.8775634765625e-05, + "step": 25412, + "training_step_time": 0.1299741268157959 + }, + { + "epoch": 3.877716064453125e-05, + "model_forward_time": 0.023581981658935547, + "step": 25413 + }, + { + "epoch": 3.877716064453125e-05, + "step": 25413, + "training_step_time": 0.10932707786560059 + }, + { + "epoch": 3.87786865234375e-05, + "model_forward_time": 0.024120330810546875, + "step": 25414 + }, + { + "epoch": 3.87786865234375e-05, + "step": 25414, + "training_step_time": 0.11785531044006348 + }, + { + "epoch": 3.878021240234375e-05, + "model_forward_time": 0.024335145950317383, + "step": 25415 + }, + { + "epoch": 3.878021240234375e-05, + "step": 25415, + "training_step_time": 0.10770893096923828 + }, + { + "epoch": 3.878173828125e-05, + "model_forward_time": 0.024138927459716797, + "step": 25416 + }, + { + "epoch": 3.878173828125e-05, + "step": 25416, + "training_step_time": 0.12378740310668945 + }, + { + "epoch": 3.878326416015625e-05, + "model_forward_time": 0.024194955825805664, + "step": 25417 + }, + { + "epoch": 3.878326416015625e-05, + "step": 25417, + "training_step_time": 0.11513447761535645 + }, + { + "epoch": 3.87847900390625e-05, + "model_forward_time": 0.024196863174438477, + "step": 25418 + }, + { + "epoch": 3.87847900390625e-05, + "step": 25418, + "training_step_time": 0.11105608940124512 + }, + { + "epoch": 3.878631591796875e-05, + "model_forward_time": 0.024331092834472656, + "step": 25419 + }, + { + "epoch": 3.878631591796875e-05, + "step": 25419, + "training_step_time": 0.11199045181274414 + }, + { + "epoch": 3.8787841796875e-05, + "grad_norm": 0.40201613306999207, + "learning_rate": 6.237876778260155e-06, + "loss": 0.0041, + "step": 25420 + }, + { + "epoch": 3.8787841796875e-05, + "model_forward_time": 0.02438807487487793, + "step": 25420 + }, + { + "epoch": 3.8787841796875e-05, + "step": 25420, + "training_step_time": 0.10882282257080078 + }, + { + "epoch": 3.878936767578125e-05, + "model_forward_time": 0.024309873580932617, + "step": 25421 + }, + { + "epoch": 3.878936767578125e-05, + "step": 25421, + "training_step_time": 0.10506296157836914 + }, + { + "epoch": 3.87908935546875e-05, + "model_forward_time": 0.0242612361907959, + "step": 25422 + }, + { + "epoch": 3.87908935546875e-05, + "step": 25422, + "training_step_time": 0.10626745223999023 + }, + { + "epoch": 3.879241943359375e-05, + "model_forward_time": 0.0245208740234375, + "step": 25423 + }, + { + "epoch": 3.879241943359375e-05, + "step": 25423, + "training_step_time": 0.10762453079223633 + }, + { + "epoch": 3.87939453125e-05, + "model_forward_time": 0.024312734603881836, + "step": 25424 + }, + { + "epoch": 3.87939453125e-05, + "step": 25424, + "training_step_time": 0.10991978645324707 + }, + { + "epoch": 3.879547119140625e-05, + "model_forward_time": 0.02463364601135254, + "step": 25425 + }, + { + "epoch": 3.879547119140625e-05, + "step": 25425, + "training_step_time": 0.11229228973388672 + }, + { + "epoch": 3.87969970703125e-05, + "model_forward_time": 0.024311065673828125, + "step": 25426 + }, + { + "epoch": 3.87969970703125e-05, + "step": 25426, + "training_step_time": 0.10521245002746582 + }, + { + "epoch": 3.879852294921875e-05, + "model_forward_time": 0.02433466911315918, + "step": 25427 + }, + { + "epoch": 3.879852294921875e-05, + "step": 25427, + "training_step_time": 0.18108463287353516 + }, + { + "epoch": 3.8800048828125e-05, + "model_forward_time": 0.023327112197875977, + "step": 25428 + }, + { + "epoch": 3.8800048828125e-05, + "step": 25428, + "training_step_time": 0.10900735855102539 + }, + { + "epoch": 3.880157470703125e-05, + "model_forward_time": 0.02354598045349121, + "step": 25429 + }, + { + "epoch": 3.880157470703125e-05, + "step": 25429, + "training_step_time": 0.11237430572509766 + }, + { + "epoch": 3.88031005859375e-05, + "grad_norm": 0.3016217052936554, + "learning_rate": 6.211244784300197e-06, + "loss": 0.0086, + "step": 25430 + }, + { + "epoch": 3.88031005859375e-05, + "model_forward_time": 0.0246126651763916, + "step": 25430 + }, + { + "epoch": 3.88031005859375e-05, + "step": 25430, + "training_step_time": 0.10767698287963867 + }, + { + "epoch": 3.880462646484375e-05, + "model_forward_time": 0.02469348907470703, + "step": 25431 + }, + { + "epoch": 3.880462646484375e-05, + "step": 25431, + "training_step_time": 0.12374615669250488 + }, + { + "epoch": 3.880615234375e-05, + "model_forward_time": 0.024221181869506836, + "step": 25432 + }, + { + "epoch": 3.880615234375e-05, + "step": 25432, + "training_step_time": 0.10997200012207031 + }, + { + "epoch": 3.880767822265625e-05, + "model_forward_time": 0.02444601058959961, + "step": 25433 + }, + { + "epoch": 3.880767822265625e-05, + "step": 25433, + "training_step_time": 0.12539339065551758 + }, + { + "epoch": 3.88092041015625e-05, + "model_forward_time": 0.024117469787597656, + "step": 25434 + }, + { + "epoch": 3.88092041015625e-05, + "step": 25434, + "training_step_time": 0.11922788619995117 + }, + { + "epoch": 3.881072998046875e-05, + "model_forward_time": 0.024095535278320312, + "step": 25435 + }, + { + "epoch": 3.881072998046875e-05, + "step": 25435, + "training_step_time": 0.10279631614685059 + }, + { + "epoch": 3.8812255859375e-05, + "model_forward_time": 0.024552345275878906, + "step": 25436 + }, + { + "epoch": 3.8812255859375e-05, + "step": 25436, + "training_step_time": 0.14910650253295898 + }, + { + "epoch": 3.881378173828125e-05, + "model_forward_time": 0.025129079818725586, + "step": 25437 + }, + { + "epoch": 3.881378173828125e-05, + "step": 25437, + "training_step_time": 0.10879397392272949 + }, + { + "epoch": 3.88153076171875e-05, + "model_forward_time": 0.02491164207458496, + "step": 25438 + }, + { + "epoch": 3.88153076171875e-05, + "step": 25438, + "training_step_time": 0.10946846008300781 + }, + { + "epoch": 3.881683349609375e-05, + "model_forward_time": 0.027214765548706055, + "step": 25439 + }, + { + "epoch": 3.881683349609375e-05, + "step": 25439, + "training_step_time": 0.12307286262512207 + }, + { + "epoch": 3.8818359375e-05, + "grad_norm": 0.13172994554042816, + "learning_rate": 6.184665997806832e-06, + "loss": 0.0059, + "step": 25440 + }, + { + "epoch": 3.8818359375e-05, + "model_forward_time": 0.025621652603149414, + "step": 25440 + }, + { + "epoch": 3.8818359375e-05, + "step": 25440, + "training_step_time": 0.12520098686218262 + }, + { + "epoch": 3.881988525390625e-05, + "model_forward_time": 0.025380373001098633, + "step": 25441 + }, + { + "epoch": 3.881988525390625e-05, + "step": 25441, + "training_step_time": 0.11148405075073242 + }, + { + "epoch": 3.88214111328125e-05, + "model_forward_time": 0.02640247344970703, + "step": 25442 + }, + { + "epoch": 3.88214111328125e-05, + "step": 25442, + "training_step_time": 0.12137484550476074 + }, + { + "epoch": 3.882293701171875e-05, + "model_forward_time": 0.02528095245361328, + "step": 25443 + }, + { + "epoch": 3.882293701171875e-05, + "step": 25443, + "training_step_time": 0.10832333564758301 + }, + { + "epoch": 3.8824462890625e-05, + "model_forward_time": 0.025427579879760742, + "step": 25444 + }, + { + "epoch": 3.8824462890625e-05, + "step": 25444, + "training_step_time": 0.10382676124572754 + }, + { + "epoch": 3.882598876953125e-05, + "model_forward_time": 0.025367021560668945, + "step": 25445 + }, + { + "epoch": 3.882598876953125e-05, + "step": 25445, + "training_step_time": 0.11740756034851074 + }, + { + "epoch": 3.88275146484375e-05, + "model_forward_time": 0.025234222412109375, + "step": 25446 + }, + { + "epoch": 3.88275146484375e-05, + "step": 25446, + "training_step_time": 0.11193680763244629 + }, + { + "epoch": 3.882904052734375e-05, + "model_forward_time": 0.025371074676513672, + "step": 25447 + }, + { + "epoch": 3.882904052734375e-05, + "step": 25447, + "training_step_time": 0.11610627174377441 + }, + { + "epoch": 3.883056640625e-05, + "model_forward_time": 0.025132179260253906, + "step": 25448 + }, + { + "epoch": 3.883056640625e-05, + "step": 25448, + "training_step_time": 0.1174323558807373 + }, + { + "epoch": 3.883209228515625e-05, + "model_forward_time": 0.025345563888549805, + "step": 25449 + }, + { + "epoch": 3.883209228515625e-05, + "step": 25449, + "training_step_time": 0.1506328582763672 + }, + { + "epoch": 3.88336181640625e-05, + "grad_norm": 0.10546907037496567, + "learning_rate": 6.158140451075795e-06, + "loss": 0.0098, + "step": 25450 + }, + { + "epoch": 3.88336181640625e-05, + "model_forward_time": 0.025005578994750977, + "step": 25450 + }, + { + "epoch": 3.88336181640625e-05, + "step": 25450, + "training_step_time": 0.2106471061706543 + }, + { + "epoch": 3.883514404296875e-05, + "model_forward_time": 0.0242156982421875, + "step": 25451 + }, + { + "epoch": 3.883514404296875e-05, + "step": 25451, + "training_step_time": 0.1814403533935547 + }, + { + "epoch": 3.8836669921875e-05, + "model_forward_time": 0.023371458053588867, + "step": 25452 + }, + { + "epoch": 3.8836669921875e-05, + "step": 25452, + "training_step_time": 0.15460419654846191 + }, + { + "epoch": 3.883819580078125e-05, + "model_forward_time": 0.024232149124145508, + "step": 25453 + }, + { + "epoch": 3.883819580078125e-05, + "step": 25453, + "training_step_time": 0.14319753646850586 + }, + { + "epoch": 3.88397216796875e-05, + "model_forward_time": 0.024031400680541992, + "step": 25454 + }, + { + "epoch": 3.88397216796875e-05, + "step": 25454, + "training_step_time": 0.13707470893859863 + }, + { + "epoch": 3.884124755859375e-05, + "model_forward_time": 0.024137258529663086, + "step": 25455 + }, + { + "epoch": 3.884124755859375e-05, + "step": 25455, + "training_step_time": 0.12358450889587402 + }, + { + "epoch": 3.88427734375e-05, + "model_forward_time": 0.024290084838867188, + "step": 25456 + }, + { + "epoch": 3.88427734375e-05, + "step": 25456, + "training_step_time": 0.12165403366088867 + }, + { + "epoch": 3.884429931640625e-05, + "model_forward_time": 0.024755239486694336, + "step": 25457 + }, + { + "epoch": 3.884429931640625e-05, + "step": 25457, + "training_step_time": 0.15129709243774414 + }, + { + "epoch": 3.88458251953125e-05, + "model_forward_time": 0.02418971061706543, + "step": 25458 + }, + { + "epoch": 3.88458251953125e-05, + "step": 25458, + "training_step_time": 0.1242375373840332 + }, + { + "epoch": 3.884735107421875e-05, + "model_forward_time": 0.02429056167602539, + "step": 25459 + }, + { + "epoch": 3.884735107421875e-05, + "step": 25459, + "training_step_time": 0.19938230514526367 + }, + { + "epoch": 3.8848876953125e-05, + "grad_norm": 0.13312338292598724, + "learning_rate": 6.131668176338118e-06, + "loss": 0.004, + "step": 25460 + }, + { + "epoch": 3.8848876953125e-05, + "model_forward_time": 0.02457737922668457, + "step": 25460 + }, + { + "epoch": 3.8848876953125e-05, + "step": 25460, + "training_step_time": 0.10796260833740234 + }, + { + "epoch": 3.885040283203125e-05, + "model_forward_time": 0.02477264404296875, + "step": 25461 + }, + { + "epoch": 3.885040283203125e-05, + "step": 25461, + "training_step_time": 0.11083841323852539 + }, + { + "epoch": 3.88519287109375e-05, + "model_forward_time": 0.0251312255859375, + "step": 25462 + }, + { + "epoch": 3.88519287109375e-05, + "step": 25462, + "training_step_time": 0.1939094066619873 + }, + { + "epoch": 3.885345458984375e-05, + "model_forward_time": 0.024007797241210938, + "step": 25463 + }, + { + "epoch": 3.885345458984375e-05, + "step": 25463, + "training_step_time": 0.10380721092224121 + }, + { + "epoch": 3.885498046875e-05, + "model_forward_time": 0.024460315704345703, + "step": 25464 + }, + { + "epoch": 3.885498046875e-05, + "step": 25464, + "training_step_time": 0.10663843154907227 + }, + { + "epoch": 3.885650634765625e-05, + "model_forward_time": 0.025180578231811523, + "step": 25465 + }, + { + "epoch": 3.885650634765625e-05, + "step": 25465, + "training_step_time": 0.10714197158813477 + }, + { + "epoch": 3.88580322265625e-05, + "model_forward_time": 0.024775028228759766, + "step": 25466 + }, + { + "epoch": 3.88580322265625e-05, + "step": 25466, + "training_step_time": 0.1098780632019043 + }, + { + "epoch": 3.885955810546875e-05, + "model_forward_time": 0.024919986724853516, + "step": 25467 + }, + { + "epoch": 3.885955810546875e-05, + "step": 25467, + "training_step_time": 0.10839533805847168 + }, + { + "epoch": 3.8861083984375e-05, + "model_forward_time": 0.024760007858276367, + "step": 25468 + }, + { + "epoch": 3.8861083984375e-05, + "step": 25468, + "training_step_time": 0.10966777801513672 + }, + { + "epoch": 3.886260986328125e-05, + "model_forward_time": 0.025433778762817383, + "step": 25469 + }, + { + "epoch": 3.886260986328125e-05, + "step": 25469, + "training_step_time": 0.10736584663391113 + }, + { + "epoch": 3.88641357421875e-05, + "grad_norm": 0.1521390676498413, + "learning_rate": 6.1052492057601275e-06, + "loss": 0.0096, + "step": 25470 + }, + { + "epoch": 3.88641357421875e-05, + "model_forward_time": 0.02476644515991211, + "step": 25470 + }, + { + "epoch": 3.88641357421875e-05, + "step": 25470, + "training_step_time": 0.11431407928466797 + }, + { + "epoch": 3.886566162109375e-05, + "model_forward_time": 0.02544426918029785, + "step": 25471 + }, + { + "epoch": 3.886566162109375e-05, + "step": 25471, + "training_step_time": 0.10705018043518066 + }, + { + "epoch": 3.88671875e-05, + "model_forward_time": 0.02576446533203125, + "step": 25472 + }, + { + "epoch": 3.88671875e-05, + "step": 25472, + "training_step_time": 0.15487051010131836 + }, + { + "epoch": 3.886871337890625e-05, + "model_forward_time": 0.02484726905822754, + "step": 25473 + }, + { + "epoch": 3.886871337890625e-05, + "step": 25473, + "training_step_time": 0.10788321495056152 + }, + { + "epoch": 3.88702392578125e-05, + "model_forward_time": 0.024809598922729492, + "step": 25474 + }, + { + "epoch": 3.88702392578125e-05, + "step": 25474, + "training_step_time": 0.20707416534423828 + }, + { + "epoch": 3.887176513671875e-05, + "model_forward_time": 0.02425360679626465, + "step": 25475 + }, + { + "epoch": 3.887176513671875e-05, + "step": 25475, + "training_step_time": 0.1291813850402832 + }, + { + "epoch": 3.8873291015625e-05, + "model_forward_time": 0.02449941635131836, + "step": 25476 + }, + { + "epoch": 3.8873291015625e-05, + "step": 25476, + "training_step_time": 0.10614562034606934 + }, + { + "epoch": 3.887481689453125e-05, + "model_forward_time": 0.025359392166137695, + "step": 25477 + }, + { + "epoch": 3.887481689453125e-05, + "step": 25477, + "training_step_time": 0.11648726463317871 + }, + { + "epoch": 3.88763427734375e-05, + "model_forward_time": 0.024828672409057617, + "step": 25478 + }, + { + "epoch": 3.88763427734375e-05, + "step": 25478, + "training_step_time": 0.11557388305664062 + }, + { + "epoch": 3.887786865234375e-05, + "model_forward_time": 0.024878501892089844, + "step": 25479 + }, + { + "epoch": 3.887786865234375e-05, + "step": 25479, + "training_step_time": 0.10682368278503418 + }, + { + "epoch": 3.887939453125e-05, + "grad_norm": 0.09279409795999527, + "learning_rate": 6.07888357144335e-06, + "loss": 0.0043, + "step": 25480 + }, + { + "epoch": 3.887939453125e-05, + "model_forward_time": 0.024669170379638672, + "step": 25480 + }, + { + "epoch": 3.887939453125e-05, + "step": 25480, + "training_step_time": 0.18503689765930176 + }, + { + "epoch": 3.888092041015625e-05, + "model_forward_time": 0.024567604064941406, + "step": 25481 + }, + { + "epoch": 3.888092041015625e-05, + "step": 25481, + "training_step_time": 0.11229681968688965 + }, + { + "epoch": 3.88824462890625e-05, + "model_forward_time": 0.024555206298828125, + "step": 25482 + }, + { + "epoch": 3.88824462890625e-05, + "step": 25482, + "training_step_time": 0.10936594009399414 + }, + { + "epoch": 3.888397216796875e-05, + "model_forward_time": 0.024868488311767578, + "step": 25483 + }, + { + "epoch": 3.888397216796875e-05, + "step": 25483, + "training_step_time": 0.1266651153564453 + }, + { + "epoch": 3.8885498046875e-05, + "model_forward_time": 0.025046825408935547, + "step": 25484 + }, + { + "epoch": 3.8885498046875e-05, + "step": 25484, + "training_step_time": 0.12912607192993164 + }, + { + "epoch": 3.888702392578125e-05, + "model_forward_time": 0.02477574348449707, + "step": 25485 + }, + { + "epoch": 3.888702392578125e-05, + "step": 25485, + "training_step_time": 0.11734819412231445 + }, + { + "epoch": 3.88885498046875e-05, + "model_forward_time": 0.02499842643737793, + "step": 25486 + }, + { + "epoch": 3.88885498046875e-05, + "step": 25486, + "training_step_time": 0.10732221603393555 + }, + { + "epoch": 3.889007568359375e-05, + "model_forward_time": 0.02551555633544922, + "step": 25487 + }, + { + "epoch": 3.889007568359375e-05, + "step": 25487, + "training_step_time": 0.11947464942932129 + }, + { + "epoch": 3.88916015625e-05, + "model_forward_time": 0.02565455436706543, + "step": 25488 + }, + { + "epoch": 3.88916015625e-05, + "step": 25488, + "training_step_time": 0.10640478134155273 + }, + { + "epoch": 3.889312744140625e-05, + "model_forward_time": 0.02460479736328125, + "step": 25489 + }, + { + "epoch": 3.889312744140625e-05, + "step": 25489, + "training_step_time": 0.14508295059204102 + }, + { + "epoch": 3.88946533203125e-05, + "grad_norm": 0.05056336522102356, + "learning_rate": 6.052571305424531e-06, + "loss": 0.0042, + "step": 25490 + }, + { + "epoch": 3.88946533203125e-05, + "model_forward_time": 0.024528980255126953, + "step": 25490 + }, + { + "epoch": 3.88946533203125e-05, + "step": 25490, + "training_step_time": 0.10434603691101074 + }, + { + "epoch": 3.889617919921875e-05, + "model_forward_time": 0.02530503273010254, + "step": 25491 + }, + { + "epoch": 3.889617919921875e-05, + "step": 25491, + "training_step_time": 0.20235967636108398 + }, + { + "epoch": 3.8897705078125e-05, + "model_forward_time": 0.023984193801879883, + "step": 25492 + }, + { + "epoch": 3.8897705078125e-05, + "step": 25492, + "training_step_time": 0.170928955078125 + }, + { + "epoch": 3.889923095703125e-05, + "model_forward_time": 0.023818016052246094, + "step": 25493 + }, + { + "epoch": 3.889923095703125e-05, + "step": 25493, + "training_step_time": 0.19007015228271484 + }, + { + "epoch": 3.89007568359375e-05, + "model_forward_time": 0.024364233016967773, + "step": 25494 + }, + { + "epoch": 3.89007568359375e-05, + "step": 25494, + "training_step_time": 0.15430808067321777 + }, + { + "epoch": 3.890228271484375e-05, + "model_forward_time": 0.02402782440185547, + "step": 25495 + }, + { + "epoch": 3.890228271484375e-05, + "step": 25495, + "training_step_time": 0.1305985450744629 + }, + { + "epoch": 3.890380859375e-05, + "model_forward_time": 0.024447917938232422, + "step": 25496 + }, + { + "epoch": 3.890380859375e-05, + "step": 25496, + "training_step_time": 0.12353014945983887 + }, + { + "epoch": 3.890533447265625e-05, + "model_forward_time": 0.025117874145507812, + "step": 25497 + }, + { + "epoch": 3.890533447265625e-05, + "step": 25497, + "training_step_time": 0.12730169296264648 + }, + { + "epoch": 3.89068603515625e-05, + "model_forward_time": 0.024924755096435547, + "step": 25498 + }, + { + "epoch": 3.89068603515625e-05, + "step": 25498, + "training_step_time": 0.11938858032226562 + }, + { + "epoch": 3.890838623046875e-05, + "model_forward_time": 0.028336763381958008, + "step": 25499 + }, + { + "epoch": 3.890838623046875e-05, + "step": 25499, + "training_step_time": 0.11535835266113281 + }, + { + "epoch": 3.8909912109375e-05, + "grad_norm": 0.18776196241378784, + "learning_rate": 6.026312439675552e-06, + "loss": 0.0063, + "step": 25500 + }, + { + "epoch": 3.8909912109375e-05, + "model_forward_time": 0.02557992935180664, + "step": 25500 + }, + { + "epoch": 3.8909912109375e-05, + "step": 25500, + "training_step_time": 0.1474306583404541 + }, + { + "epoch": 3.891143798828125e-05, + "model_forward_time": 0.024645328521728516, + "step": 25501 + }, + { + "epoch": 3.891143798828125e-05, + "step": 25501, + "training_step_time": 0.11099815368652344 + }, + { + "epoch": 3.89129638671875e-05, + "model_forward_time": 0.024839401245117188, + "step": 25502 + }, + { + "epoch": 3.89129638671875e-05, + "step": 25502, + "training_step_time": 0.19975566864013672 + }, + { + "epoch": 3.891448974609375e-05, + "model_forward_time": 0.024172067642211914, + "step": 25503 + }, + { + "epoch": 3.891448974609375e-05, + "step": 25503, + "training_step_time": 0.1842026710510254 + }, + { + "epoch": 3.8916015625e-05, + "model_forward_time": 0.024082422256469727, + "step": 25504 + }, + { + "epoch": 3.8916015625e-05, + "step": 25504, + "training_step_time": 0.13479351997375488 + }, + { + "epoch": 3.891754150390625e-05, + "model_forward_time": 0.023392200469970703, + "step": 25505 + }, + { + "epoch": 3.891754150390625e-05, + "step": 25505, + "training_step_time": 0.11596822738647461 + }, + { + "epoch": 3.89190673828125e-05, + "model_forward_time": 0.02487778663635254, + "step": 25506 + }, + { + "epoch": 3.89190673828125e-05, + "step": 25506, + "training_step_time": 0.10736489295959473 + }, + { + "epoch": 3.892059326171875e-05, + "model_forward_time": 0.02492833137512207, + "step": 25507 + }, + { + "epoch": 3.892059326171875e-05, + "step": 25507, + "training_step_time": 0.10467791557312012 + }, + { + "epoch": 3.8922119140625e-05, + "model_forward_time": 0.025055885314941406, + "step": 25508 + }, + { + "epoch": 3.8922119140625e-05, + "step": 25508, + "training_step_time": 0.10467839241027832 + }, + { + "epoch": 3.892364501953125e-05, + "model_forward_time": 0.024783611297607422, + "step": 25509 + }, + { + "epoch": 3.892364501953125e-05, + "step": 25509, + "training_step_time": 0.10506153106689453 + }, + { + "epoch": 3.89251708984375e-05, + "grad_norm": 0.10060159116983414, + "learning_rate": 6.0001070061033945e-06, + "loss": 0.0056, + "step": 25510 + }, + { + "epoch": 3.89251708984375e-05, + "model_forward_time": 0.02523636817932129, + "step": 25510 + }, + { + "epoch": 3.89251708984375e-05, + "step": 25510, + "training_step_time": 0.10757756233215332 + }, + { + "epoch": 3.892669677734375e-05, + "model_forward_time": 0.0253751277923584, + "step": 25511 + }, + { + "epoch": 3.892669677734375e-05, + "step": 25511, + "training_step_time": 0.10904908180236816 + }, + { + "epoch": 3.892822265625e-05, + "model_forward_time": 0.025452375411987305, + "step": 25512 + }, + { + "epoch": 3.892822265625e-05, + "step": 25512, + "training_step_time": 0.10541844367980957 + }, + { + "epoch": 3.892974853515625e-05, + "model_forward_time": 0.02606201171875, + "step": 25513 + }, + { + "epoch": 3.892974853515625e-05, + "step": 25513, + "training_step_time": 0.10569357872009277 + }, + { + "epoch": 3.89312744140625e-05, + "model_forward_time": 0.025310277938842773, + "step": 25514 + }, + { + "epoch": 3.89312744140625e-05, + "step": 25514, + "training_step_time": 0.10470843315124512 + }, + { + "epoch": 3.893280029296875e-05, + "model_forward_time": 0.02553725242614746, + "step": 25515 + }, + { + "epoch": 3.893280029296875e-05, + "step": 25515, + "training_step_time": 0.10432934761047363 + }, + { + "epoch": 3.8934326171875e-05, + "model_forward_time": 0.025233983993530273, + "step": 25516 + }, + { + "epoch": 3.8934326171875e-05, + "step": 25516, + "training_step_time": 0.1124420166015625 + }, + { + "epoch": 3.893585205078125e-05, + "model_forward_time": 0.025113344192504883, + "step": 25517 + }, + { + "epoch": 3.893585205078125e-05, + "step": 25517, + "training_step_time": 0.1388993263244629 + }, + { + "epoch": 3.89373779296875e-05, + "model_forward_time": 0.025104522705078125, + "step": 25518 + }, + { + "epoch": 3.89373779296875e-05, + "step": 25518, + "training_step_time": 0.10932540893554688 + }, + { + "epoch": 3.893890380859375e-05, + "model_forward_time": 0.02524733543395996, + "step": 25519 + }, + { + "epoch": 3.893890380859375e-05, + "step": 25519, + "training_step_time": 0.1075446605682373 + }, + { + "epoch": 3.89404296875e-05, + "grad_norm": 0.16142567992210388, + "learning_rate": 5.9739550365501494e-06, + "loss": 0.0043, + "step": 25520 + }, + { + "epoch": 3.89404296875e-05, + "model_forward_time": 0.02544569969177246, + "step": 25520 + }, + { + "epoch": 3.89404296875e-05, + "step": 25520, + "training_step_time": 0.18267250061035156 + }, + { + "epoch": 3.894195556640625e-05, + "model_forward_time": 0.024279117584228516, + "step": 25521 + }, + { + "epoch": 3.894195556640625e-05, + "step": 25521, + "training_step_time": 0.11330366134643555 + }, + { + "epoch": 3.89434814453125e-05, + "model_forward_time": 0.024857759475708008, + "step": 25522 + }, + { + "epoch": 3.89434814453125e-05, + "step": 25522, + "training_step_time": 0.10360121726989746 + }, + { + "epoch": 3.894500732421875e-05, + "model_forward_time": 0.024950742721557617, + "step": 25523 + }, + { + "epoch": 3.894500732421875e-05, + "step": 25523, + "training_step_time": 0.10548281669616699 + }, + { + "epoch": 3.8946533203125e-05, + "model_forward_time": 0.02507638931274414, + "step": 25524 + }, + { + "epoch": 3.8946533203125e-05, + "step": 25524, + "training_step_time": 0.10666632652282715 + }, + { + "epoch": 3.894805908203125e-05, + "model_forward_time": 0.024740219116210938, + "step": 25525 + }, + { + "epoch": 3.894805908203125e-05, + "step": 25525, + "training_step_time": 0.17696356773376465 + }, + { + "epoch": 3.89495849609375e-05, + "model_forward_time": 0.024800777435302734, + "step": 25526 + }, + { + "epoch": 3.89495849609375e-05, + "step": 25526, + "training_step_time": 0.11639523506164551 + }, + { + "epoch": 3.895111083984375e-05, + "model_forward_time": 0.0248258113861084, + "step": 25527 + }, + { + "epoch": 3.895111083984375e-05, + "step": 25527, + "training_step_time": 0.10590386390686035 + }, + { + "epoch": 3.895263671875e-05, + "model_forward_time": 0.025141239166259766, + "step": 25528 + }, + { + "epoch": 3.895263671875e-05, + "step": 25528, + "training_step_time": 0.12512445449829102 + }, + { + "epoch": 3.895416259765625e-05, + "model_forward_time": 0.025000810623168945, + "step": 25529 + }, + { + "epoch": 3.895416259765625e-05, + "step": 25529, + "training_step_time": 0.12553691864013672 + }, + { + "epoch": 3.89556884765625e-05, + "grad_norm": 0.19904199242591858, + "learning_rate": 5.947856562792925e-06, + "loss": 0.0089, + "step": 25530 + }, + { + "epoch": 3.89556884765625e-05, + "model_forward_time": 0.025087594985961914, + "step": 25530 + }, + { + "epoch": 3.89556884765625e-05, + "step": 25530, + "training_step_time": 0.11814475059509277 + }, + { + "epoch": 3.895721435546875e-05, + "model_forward_time": 0.025214195251464844, + "step": 25531 + }, + { + "epoch": 3.895721435546875e-05, + "step": 25531, + "training_step_time": 0.10726809501647949 + }, + { + "epoch": 3.8958740234375e-05, + "model_forward_time": 0.024995088577270508, + "step": 25532 + }, + { + "epoch": 3.8958740234375e-05, + "step": 25532, + "training_step_time": 0.10777974128723145 + }, + { + "epoch": 3.896026611328125e-05, + "model_forward_time": 0.025814533233642578, + "step": 25533 + }, + { + "epoch": 3.896026611328125e-05, + "step": 25533, + "training_step_time": 0.10593247413635254 + }, + { + "epoch": 3.89617919921875e-05, + "model_forward_time": 0.02538609504699707, + "step": 25534 + }, + { + "epoch": 3.89617919921875e-05, + "step": 25534, + "training_step_time": 0.12034368515014648 + }, + { + "epoch": 3.896331787109375e-05, + "model_forward_time": 0.025116443634033203, + "step": 25535 + }, + { + "epoch": 3.896331787109375e-05, + "step": 25535, + "training_step_time": 0.10885787010192871 + }, + { + "epoch": 3.896484375e-05, + "model_forward_time": 0.02543926239013672, + "step": 25536 + }, + { + "epoch": 3.896484375e-05, + "step": 25536, + "training_step_time": 0.11658310890197754 + }, + { + "epoch": 3.896636962890625e-05, + "model_forward_time": 0.025649547576904297, + "step": 25537 + }, + { + "epoch": 3.896636962890625e-05, + "step": 25537, + "training_step_time": 0.14539098739624023 + }, + { + "epoch": 3.89678955078125e-05, + "model_forward_time": 0.025517702102661133, + "step": 25538 + }, + { + "epoch": 3.89678955078125e-05, + "step": 25538, + "training_step_time": 0.21758008003234863 + }, + { + "epoch": 3.896942138671875e-05, + "model_forward_time": 0.024441957473754883, + "step": 25539 + }, + { + "epoch": 3.896942138671875e-05, + "step": 25539, + "training_step_time": 0.1700425148010254 + }, + { + "epoch": 3.8970947265625e-05, + "grad_norm": 0.22975318133831024, + "learning_rate": 5.921811616543821e-06, + "loss": 0.0053, + "step": 25540 + }, + { + "epoch": 3.8970947265625e-05, + "model_forward_time": 0.024410009384155273, + "step": 25540 + }, + { + "epoch": 3.8970947265625e-05, + "step": 25540, + "training_step_time": 0.1416919231414795 + }, + { + "epoch": 3.897247314453125e-05, + "model_forward_time": 0.024623394012451172, + "step": 25541 + }, + { + "epoch": 3.897247314453125e-05, + "step": 25541, + "training_step_time": 0.14141368865966797 + }, + { + "epoch": 3.89739990234375e-05, + "model_forward_time": 0.02434563636779785, + "step": 25542 + }, + { + "epoch": 3.89739990234375e-05, + "step": 25542, + "training_step_time": 0.126176118850708 + }, + { + "epoch": 3.897552490234375e-05, + "model_forward_time": 0.02461528778076172, + "step": 25543 + }, + { + "epoch": 3.897552490234375e-05, + "step": 25543, + "training_step_time": 0.1281261444091797 + }, + { + "epoch": 3.897705078125e-05, + "model_forward_time": 0.024849653244018555, + "step": 25544 + }, + { + "epoch": 3.897705078125e-05, + "step": 25544, + "training_step_time": 0.11936807632446289 + }, + { + "epoch": 3.897857666015625e-05, + "model_forward_time": 0.025206565856933594, + "step": 25545 + }, + { + "epoch": 3.897857666015625e-05, + "step": 25545, + "training_step_time": 0.13366270065307617 + }, + { + "epoch": 3.89801025390625e-05, + "model_forward_time": 0.02492046356201172, + "step": 25546 + }, + { + "epoch": 3.89801025390625e-05, + "step": 25546, + "training_step_time": 0.12001824378967285 + }, + { + "epoch": 3.898162841796875e-05, + "model_forward_time": 0.025002002716064453, + "step": 25547 + }, + { + "epoch": 3.898162841796875e-05, + "step": 25547, + "training_step_time": 0.2169508934020996 + }, + { + "epoch": 3.8983154296875e-05, + "model_forward_time": 0.024854421615600586, + "step": 25548 + }, + { + "epoch": 3.8983154296875e-05, + "step": 25548, + "training_step_time": 0.10827016830444336 + }, + { + "epoch": 3.898468017578125e-05, + "model_forward_time": 0.024919748306274414, + "step": 25549 + }, + { + "epoch": 3.898468017578125e-05, + "step": 25549, + "training_step_time": 0.11370134353637695 + }, + { + "epoch": 3.89862060546875e-05, + "grad_norm": 0.18529093265533447, + "learning_rate": 5.895820229449906e-06, + "loss": 0.0036, + "step": 25550 + }, + { + "epoch": 3.89862060546875e-05, + "model_forward_time": 0.025145769119262695, + "step": 25550 + }, + { + "epoch": 3.89862060546875e-05, + "step": 25550, + "training_step_time": 0.11251330375671387 + }, + { + "epoch": 3.898773193359375e-05, + "model_forward_time": 0.02513885498046875, + "step": 25551 + }, + { + "epoch": 3.898773193359375e-05, + "step": 25551, + "training_step_time": 0.10723400115966797 + }, + { + "epoch": 3.89892578125e-05, + "model_forward_time": 0.02491307258605957, + "step": 25552 + }, + { + "epoch": 3.89892578125e-05, + "step": 25552, + "training_step_time": 0.10551571846008301 + }, + { + "epoch": 3.899078369140625e-05, + "model_forward_time": 0.025411367416381836, + "step": 25553 + }, + { + "epoch": 3.899078369140625e-05, + "step": 25553, + "training_step_time": 0.10664844512939453 + }, + { + "epoch": 3.89923095703125e-05, + "model_forward_time": 0.025348663330078125, + "step": 25554 + }, + { + "epoch": 3.89923095703125e-05, + "step": 25554, + "training_step_time": 0.11042571067810059 + }, + { + "epoch": 3.899383544921875e-05, + "model_forward_time": 0.02509307861328125, + "step": 25555 + }, + { + "epoch": 3.899383544921875e-05, + "step": 25555, + "training_step_time": 0.10487008094787598 + }, + { + "epoch": 3.8995361328125e-05, + "model_forward_time": 0.025470972061157227, + "step": 25556 + }, + { + "epoch": 3.8995361328125e-05, + "step": 25556, + "training_step_time": 0.10513949394226074 + }, + { + "epoch": 3.899688720703125e-05, + "model_forward_time": 0.02498602867126465, + "step": 25557 + }, + { + "epoch": 3.899688720703125e-05, + "step": 25557, + "training_step_time": 0.10964155197143555 + }, + { + "epoch": 3.89984130859375e-05, + "model_forward_time": 0.02649235725402832, + "step": 25558 + }, + { + "epoch": 3.89984130859375e-05, + "step": 25558, + "training_step_time": 0.11021280288696289 + }, + { + "epoch": 3.899993896484375e-05, + "model_forward_time": 0.025105714797973633, + "step": 25559 + }, + { + "epoch": 3.899993896484375e-05, + "step": 25559, + "training_step_time": 0.1058199405670166 + }, + { + "epoch": 3.900146484375e-05, + "grad_norm": 0.29988551139831543, + "learning_rate": 5.869882433093155e-06, + "loss": 0.0076, + "step": 25560 + }, + { + "epoch": 3.900146484375e-05, + "model_forward_time": 0.025360822677612305, + "step": 25560 + }, + { + "epoch": 3.900146484375e-05, + "step": 25560, + "training_step_time": 0.10655856132507324 + }, + { + "epoch": 3.900299072265625e-05, + "model_forward_time": 0.02500176429748535, + "step": 25561 + }, + { + "epoch": 3.900299072265625e-05, + "step": 25561, + "training_step_time": 0.13875365257263184 + }, + { + "epoch": 3.90045166015625e-05, + "model_forward_time": 0.02505636215209961, + "step": 25562 + }, + { + "epoch": 3.90045166015625e-05, + "step": 25562, + "training_step_time": 0.1727886199951172 + }, + { + "epoch": 3.900604248046875e-05, + "model_forward_time": 0.02421259880065918, + "step": 25563 + }, + { + "epoch": 3.900604248046875e-05, + "step": 25563, + "training_step_time": 0.10332989692687988 + }, + { + "epoch": 3.9007568359375e-05, + "model_forward_time": 0.024941682815551758, + "step": 25564 + }, + { + "epoch": 3.9007568359375e-05, + "step": 25564, + "training_step_time": 0.14249873161315918 + }, + { + "epoch": 3.900909423828125e-05, + "model_forward_time": 0.024708986282348633, + "step": 25565 + }, + { + "epoch": 3.900909423828125e-05, + "step": 25565, + "training_step_time": 0.14078879356384277 + }, + { + "epoch": 3.90106201171875e-05, + "model_forward_time": 0.024506807327270508, + "step": 25566 + }, + { + "epoch": 3.90106201171875e-05, + "step": 25566, + "training_step_time": 0.10442519187927246 + }, + { + "epoch": 3.901214599609375e-05, + "model_forward_time": 0.0277099609375, + "step": 25567 + }, + { + "epoch": 3.901214599609375e-05, + "step": 25567, + "training_step_time": 0.10838723182678223 + }, + { + "epoch": 3.9013671875e-05, + "model_forward_time": 0.02527618408203125, + "step": 25568 + }, + { + "epoch": 3.9013671875e-05, + "step": 25568, + "training_step_time": 0.10336041450500488 + }, + { + "epoch": 3.901519775390625e-05, + "model_forward_time": 0.025417804718017578, + "step": 25569 + }, + { + "epoch": 3.901519775390625e-05, + "step": 25569, + "training_step_time": 0.10367798805236816 + }, + { + "epoch": 3.90167236328125e-05, + "grad_norm": 0.16870911419391632, + "learning_rate": 5.843998258990452e-06, + "loss": 0.0053, + "step": 25570 + }, + { + "epoch": 3.90167236328125e-05, + "model_forward_time": 0.02520155906677246, + "step": 25570 + }, + { + "epoch": 3.90167236328125e-05, + "step": 25570, + "training_step_time": 0.10549402236938477 + }, + { + "epoch": 3.901824951171875e-05, + "model_forward_time": 0.026612281799316406, + "step": 25571 + }, + { + "epoch": 3.901824951171875e-05, + "step": 25571, + "training_step_time": 0.12080693244934082 + }, + { + "epoch": 3.9019775390625e-05, + "model_forward_time": 0.02537369728088379, + "step": 25572 + }, + { + "epoch": 3.9019775390625e-05, + "step": 25572, + "training_step_time": 0.11647725105285645 + }, + { + "epoch": 3.902130126953125e-05, + "model_forward_time": 0.025553226470947266, + "step": 25573 + }, + { + "epoch": 3.902130126953125e-05, + "step": 25573, + "training_step_time": 0.11553478240966797 + }, + { + "epoch": 3.90228271484375e-05, + "model_forward_time": 0.025547504425048828, + "step": 25574 + }, + { + "epoch": 3.90228271484375e-05, + "step": 25574, + "training_step_time": 0.11660146713256836 + }, + { + "epoch": 3.902435302734375e-05, + "model_forward_time": 0.025110960006713867, + "step": 25575 + }, + { + "epoch": 3.902435302734375e-05, + "step": 25575, + "training_step_time": 0.13137245178222656 + }, + { + "epoch": 3.902587890625e-05, + "model_forward_time": 0.025641679763793945, + "step": 25576 + }, + { + "epoch": 3.902587890625e-05, + "step": 25576, + "training_step_time": 0.13186287879943848 + }, + { + "epoch": 3.902740478515625e-05, + "model_forward_time": 0.0248410701751709, + "step": 25577 + }, + { + "epoch": 3.902740478515625e-05, + "step": 25577, + "training_step_time": 0.1991100311279297 + }, + { + "epoch": 3.90289306640625e-05, + "model_forward_time": 0.025206804275512695, + "step": 25578 + }, + { + "epoch": 3.90289306640625e-05, + "step": 25578, + "training_step_time": 0.10902571678161621 + }, + { + "epoch": 3.903045654296875e-05, + "model_forward_time": 0.024801254272460938, + "step": 25579 + }, + { + "epoch": 3.903045654296875e-05, + "step": 25579, + "training_step_time": 0.11234736442565918 + }, + { + "epoch": 3.9031982421875e-05, + "grad_norm": 0.07595854252576828, + "learning_rate": 5.818167738593505e-06, + "loss": 0.0044, + "step": 25580 + }, + { + "epoch": 3.9031982421875e-05, + "model_forward_time": 0.025113344192504883, + "step": 25580 + }, + { + "epoch": 3.9031982421875e-05, + "step": 25580, + "training_step_time": 0.11252951622009277 + }, + { + "epoch": 3.903350830078125e-05, + "model_forward_time": 0.02551102638244629, + "step": 25581 + }, + { + "epoch": 3.903350830078125e-05, + "step": 25581, + "training_step_time": 0.11149215698242188 + }, + { + "epoch": 3.90350341796875e-05, + "model_forward_time": 0.02556443214416504, + "step": 25582 + }, + { + "epoch": 3.90350341796875e-05, + "step": 25582, + "training_step_time": 0.15487217903137207 + }, + { + "epoch": 3.903656005859375e-05, + "model_forward_time": 0.024904489517211914, + "step": 25583 + }, + { + "epoch": 3.903656005859375e-05, + "step": 25583, + "training_step_time": 0.15715956687927246 + }, + { + "epoch": 3.90380859375e-05, + "model_forward_time": 0.02442646026611328, + "step": 25584 + }, + { + "epoch": 3.90380859375e-05, + "step": 25584, + "training_step_time": 0.12434697151184082 + }, + { + "epoch": 3.903961181640625e-05, + "model_forward_time": 0.024468183517456055, + "step": 25585 + }, + { + "epoch": 3.903961181640625e-05, + "step": 25585, + "training_step_time": 0.10962200164794922 + }, + { + "epoch": 3.90411376953125e-05, + "model_forward_time": 0.025383949279785156, + "step": 25586 + }, + { + "epoch": 3.90411376953125e-05, + "step": 25586, + "training_step_time": 0.10750842094421387 + }, + { + "epoch": 3.904266357421875e-05, + "model_forward_time": 0.0253145694732666, + "step": 25587 + }, + { + "epoch": 3.904266357421875e-05, + "step": 25587, + "training_step_time": 0.10890340805053711 + }, + { + "epoch": 3.9044189453125e-05, + "model_forward_time": 0.024791955947875977, + "step": 25588 + }, + { + "epoch": 3.9044189453125e-05, + "step": 25588, + "training_step_time": 0.10819697380065918 + }, + { + "epoch": 3.904571533203125e-05, + "model_forward_time": 0.024837732315063477, + "step": 25589 + }, + { + "epoch": 3.904571533203125e-05, + "step": 25589, + "training_step_time": 0.10453629493713379 + }, + { + "epoch": 3.90472412109375e-05, + "grad_norm": 0.0564974881708622, + "learning_rate": 5.79239090328883e-06, + "loss": 0.0061, + "step": 25590 + }, + { + "epoch": 3.90472412109375e-05, + "model_forward_time": 0.025051355361938477, + "step": 25590 + }, + { + "epoch": 3.90472412109375e-05, + "step": 25590, + "training_step_time": 0.10926365852355957 + }, + { + "epoch": 3.904876708984375e-05, + "model_forward_time": 0.02535700798034668, + "step": 25591 + }, + { + "epoch": 3.904876708984375e-05, + "step": 25591, + "training_step_time": 0.17145252227783203 + }, + { + "epoch": 3.905029296875e-05, + "model_forward_time": 0.023957490921020508, + "step": 25592 + }, + { + "epoch": 3.905029296875e-05, + "step": 25592, + "training_step_time": 0.1307966709136963 + }, + { + "epoch": 3.905181884765625e-05, + "model_forward_time": 0.024610519409179688, + "step": 25593 + }, + { + "epoch": 3.905181884765625e-05, + "step": 25593, + "training_step_time": 0.20780324935913086 + }, + { + "epoch": 3.90533447265625e-05, + "model_forward_time": 0.023279190063476562, + "step": 25594 + }, + { + "epoch": 3.90533447265625e-05, + "step": 25594, + "training_step_time": 0.10392260551452637 + }, + { + "epoch": 3.905487060546875e-05, + "model_forward_time": 0.024003267288208008, + "step": 25595 + }, + { + "epoch": 3.905487060546875e-05, + "step": 25595, + "training_step_time": 0.10835456848144531 + }, + { + "epoch": 3.9056396484375e-05, + "model_forward_time": 0.02535080909729004, + "step": 25596 + }, + { + "epoch": 3.9056396484375e-05, + "step": 25596, + "training_step_time": 0.16095876693725586 + }, + { + "epoch": 3.905792236328125e-05, + "model_forward_time": 0.024729013442993164, + "step": 25597 + }, + { + "epoch": 3.905792236328125e-05, + "step": 25597, + "training_step_time": 0.10477471351623535 + }, + { + "epoch": 3.90594482421875e-05, + "model_forward_time": 0.024656295776367188, + "step": 25598 + }, + { + "epoch": 3.90594482421875e-05, + "step": 25598, + "training_step_time": 0.11065888404846191 + }, + { + "epoch": 3.906097412109375e-05, + "model_forward_time": 0.025243282318115234, + "step": 25599 + }, + { + "epoch": 3.906097412109375e-05, + "step": 25599, + "training_step_time": 0.10509681701660156 + }, + { + "epoch": 3.90625e-05, + "grad_norm": 0.07542379200458527, + "learning_rate": 5.766667784397706e-06, + "loss": 0.0044, + "step": 25600 + }, + { + "epoch": 3.90625e-05, + "model_forward_time": 0.025211334228515625, + "step": 25600 + }, + { + "epoch": 3.90625e-05, + "step": 25600, + "training_step_time": 0.10764813423156738 + }, + { + "epoch": 3.906402587890625e-05, + "model_forward_time": 0.025035858154296875, + "step": 25601 + }, + { + "epoch": 3.906402587890625e-05, + "step": 25601, + "training_step_time": 0.10495615005493164 + }, + { + "epoch": 3.90655517578125e-05, + "model_forward_time": 0.024874210357666016, + "step": 25602 + }, + { + "epoch": 3.90655517578125e-05, + "step": 25602, + "training_step_time": 0.10628414154052734 + }, + { + "epoch": 3.906707763671875e-05, + "model_forward_time": 0.025064706802368164, + "step": 25603 + }, + { + "epoch": 3.906707763671875e-05, + "step": 25603, + "training_step_time": 0.10490751266479492 + }, + { + "epoch": 3.9068603515625e-05, + "model_forward_time": 0.0250551700592041, + "step": 25604 + }, + { + "epoch": 3.9068603515625e-05, + "step": 25604, + "training_step_time": 0.10753703117370605 + }, + { + "epoch": 3.907012939453125e-05, + "model_forward_time": 0.025205612182617188, + "step": 25605 + }, + { + "epoch": 3.907012939453125e-05, + "step": 25605, + "training_step_time": 0.1823282241821289 + }, + { + "epoch": 3.90716552734375e-05, + "model_forward_time": 0.024515628814697266, + "step": 25606 + }, + { + "epoch": 3.90716552734375e-05, + "step": 25606, + "training_step_time": 0.16070127487182617 + }, + { + "epoch": 3.907318115234375e-05, + "model_forward_time": 0.024546384811401367, + "step": 25607 + }, + { + "epoch": 3.907318115234375e-05, + "step": 25607, + "training_step_time": 0.15581202507019043 + }, + { + "epoch": 3.907470703125e-05, + "model_forward_time": 0.024753332138061523, + "step": 25608 + }, + { + "epoch": 3.907470703125e-05, + "step": 25608, + "training_step_time": 0.11556077003479004 + }, + { + "epoch": 3.907623291015625e-05, + "model_forward_time": 0.02471303939819336, + "step": 25609 + }, + { + "epoch": 3.907623291015625e-05, + "step": 25609, + "training_step_time": 0.15000581741333008 + }, + { + "epoch": 3.90777587890625e-05, + "grad_norm": 0.06577904522418976, + "learning_rate": 5.740998413176163e-06, + "loss": 0.0073, + "step": 25610 + }, + { + "epoch": 3.90777587890625e-05, + "model_forward_time": 0.02368474006652832, + "step": 25610 + }, + { + "epoch": 3.90777587890625e-05, + "step": 25610, + "training_step_time": 0.1308121681213379 + }, + { + "epoch": 3.907928466796875e-05, + "model_forward_time": 0.024390220642089844, + "step": 25611 + }, + { + "epoch": 3.907928466796875e-05, + "step": 25611, + "training_step_time": 0.13128447532653809 + }, + { + "epoch": 3.9080810546875e-05, + "model_forward_time": 0.023720741271972656, + "step": 25612 + }, + { + "epoch": 3.9080810546875e-05, + "step": 25612, + "training_step_time": 0.12599420547485352 + }, + { + "epoch": 3.908233642578125e-05, + "model_forward_time": 0.023225784301757812, + "step": 25613 + }, + { + "epoch": 3.908233642578125e-05, + "step": 25613, + "training_step_time": 0.12048888206481934 + }, + { + "epoch": 3.90838623046875e-05, + "model_forward_time": 0.023950576782226562, + "step": 25614 + }, + { + "epoch": 3.90838623046875e-05, + "step": 25614, + "training_step_time": 0.1151275634765625 + }, + { + "epoch": 3.908538818359375e-05, + "model_forward_time": 0.024353504180908203, + "step": 25615 + }, + { + "epoch": 3.908538818359375e-05, + "step": 25615, + "training_step_time": 0.19266080856323242 + }, + { + "epoch": 3.90869140625e-05, + "model_forward_time": 0.024433612823486328, + "step": 25616 + }, + { + "epoch": 3.90869140625e-05, + "step": 25616, + "training_step_time": 0.12280988693237305 + }, + { + "epoch": 3.908843994140625e-05, + "model_forward_time": 0.02443528175354004, + "step": 25617 + }, + { + "epoch": 3.908843994140625e-05, + "step": 25617, + "training_step_time": 0.10584902763366699 + }, + { + "epoch": 3.90899658203125e-05, + "model_forward_time": 0.025110483169555664, + "step": 25618 + }, + { + "epoch": 3.90899658203125e-05, + "step": 25618, + "training_step_time": 0.12384200096130371 + }, + { + "epoch": 3.909149169921875e-05, + "model_forward_time": 0.025412321090698242, + "step": 25619 + }, + { + "epoch": 3.909149169921875e-05, + "step": 25619, + "training_step_time": 0.12585735321044922 + }, + { + "epoch": 3.9093017578125e-05, + "grad_norm": 0.11997129768133163, + "learning_rate": 5.715382820814885e-06, + "loss": 0.0072, + "step": 25620 + }, + { + "epoch": 3.9093017578125e-05, + "model_forward_time": 0.024902820587158203, + "step": 25620 + }, + { + "epoch": 3.9093017578125e-05, + "step": 25620, + "training_step_time": 0.11174440383911133 + }, + { + "epoch": 3.909454345703125e-05, + "model_forward_time": 0.02500295639038086, + "step": 25621 + }, + { + "epoch": 3.909454345703125e-05, + "step": 25621, + "training_step_time": 0.11805343627929688 + }, + { + "epoch": 3.90960693359375e-05, + "model_forward_time": 0.02550220489501953, + "step": 25622 + }, + { + "epoch": 3.90960693359375e-05, + "step": 25622, + "training_step_time": 0.10918116569519043 + }, + { + "epoch": 3.909759521484375e-05, + "model_forward_time": 0.026175260543823242, + "step": 25623 + }, + { + "epoch": 3.909759521484375e-05, + "step": 25623, + "training_step_time": 0.10597038269042969 + }, + { + "epoch": 3.909912109375e-05, + "model_forward_time": 0.025219202041625977, + "step": 25624 + }, + { + "epoch": 3.909912109375e-05, + "step": 25624, + "training_step_time": 0.1372206211090088 + }, + { + "epoch": 3.910064697265625e-05, + "model_forward_time": 0.025004148483276367, + "step": 25625 + }, + { + "epoch": 3.910064697265625e-05, + "step": 25625, + "training_step_time": 0.18448543548583984 + }, + { + "epoch": 3.91021728515625e-05, + "model_forward_time": 0.024839162826538086, + "step": 25626 + }, + { + "epoch": 3.91021728515625e-05, + "step": 25626, + "training_step_time": 0.1083991527557373 + }, + { + "epoch": 3.910369873046875e-05, + "model_forward_time": 0.024603843688964844, + "step": 25627 + }, + { + "epoch": 3.910369873046875e-05, + "step": 25627, + "training_step_time": 0.104400634765625 + }, + { + "epoch": 3.9105224609375e-05, + "model_forward_time": 0.02505183219909668, + "step": 25628 + }, + { + "epoch": 3.9105224609375e-05, + "step": 25628, + "training_step_time": 0.12014245986938477 + }, + { + "epoch": 3.910675048828125e-05, + "model_forward_time": 0.025826454162597656, + "step": 25629 + }, + { + "epoch": 3.910675048828125e-05, + "step": 25629, + "training_step_time": 0.12395095825195312 + }, + { + "epoch": 3.91082763671875e-05, + "grad_norm": 0.15930169820785522, + "learning_rate": 5.689821038439263e-06, + "loss": 0.0073, + "step": 25630 + }, + { + "epoch": 3.91082763671875e-05, + "model_forward_time": 0.027637243270874023, + "step": 25630 + }, + { + "epoch": 3.91082763671875e-05, + "step": 25630, + "training_step_time": 0.11075472831726074 + }, + { + "epoch": 3.910980224609375e-05, + "model_forward_time": 0.02682781219482422, + "step": 25631 + }, + { + "epoch": 3.910980224609375e-05, + "step": 25631, + "training_step_time": 0.11002898216247559 + }, + { + "epoch": 3.9111328125e-05, + "model_forward_time": 0.026536941528320312, + "step": 25632 + }, + { + "epoch": 3.9111328125e-05, + "step": 25632, + "training_step_time": 0.10537457466125488 + }, + { + "epoch": 3.911285400390625e-05, + "model_forward_time": 0.02462172508239746, + "step": 25633 + }, + { + "epoch": 3.911285400390625e-05, + "step": 25633, + "training_step_time": 0.10932016372680664 + }, + { + "epoch": 3.91143798828125e-05, + "model_forward_time": 0.025548696517944336, + "step": 25634 + }, + { + "epoch": 3.91143798828125e-05, + "step": 25634, + "training_step_time": 0.10496759414672852 + }, + { + "epoch": 3.911590576171875e-05, + "model_forward_time": 0.02543187141418457, + "step": 25635 + }, + { + "epoch": 3.911590576171875e-05, + "step": 25635, + "training_step_time": 0.10525250434875488 + }, + { + "epoch": 3.9117431640625e-05, + "model_forward_time": 0.025232315063476562, + "step": 25636 + }, + { + "epoch": 3.9117431640625e-05, + "step": 25636, + "training_step_time": 0.15641379356384277 + }, + { + "epoch": 3.911895751953125e-05, + "model_forward_time": 0.025319814682006836, + "step": 25637 + }, + { + "epoch": 3.911895751953125e-05, + "step": 25637, + "training_step_time": 0.11387395858764648 + }, + { + "epoch": 3.91204833984375e-05, + "model_forward_time": 0.025980472564697266, + "step": 25638 + }, + { + "epoch": 3.91204833984375e-05, + "step": 25638, + "training_step_time": 0.21283435821533203 + }, + { + "epoch": 3.912200927734375e-05, + "model_forward_time": 0.025949716567993164, + "step": 25639 + }, + { + "epoch": 3.912200927734375e-05, + "step": 25639, + "training_step_time": 0.10436105728149414 + }, + { + "epoch": 3.912353515625e-05, + "grad_norm": 0.07371170818805695, + "learning_rate": 5.6643130971092525e-06, + "loss": 0.0029, + "step": 25640 + }, + { + "epoch": 3.912353515625e-05, + "model_forward_time": 0.025353670120239258, + "step": 25640 + }, + { + "epoch": 3.912353515625e-05, + "step": 25640, + "training_step_time": 0.11340928077697754 + }, + { + "epoch": 3.912506103515625e-05, + "model_forward_time": 0.02543354034423828, + "step": 25641 + }, + { + "epoch": 3.912506103515625e-05, + "step": 25641, + "training_step_time": 0.19071650505065918 + }, + { + "epoch": 3.91265869140625e-05, + "model_forward_time": 0.024455785751342773, + "step": 25642 + }, + { + "epoch": 3.91265869140625e-05, + "step": 25642, + "training_step_time": 0.10558080673217773 + }, + { + "epoch": 3.912811279296875e-05, + "model_forward_time": 0.0236358642578125, + "step": 25643 + }, + { + "epoch": 3.912811279296875e-05, + "step": 25643, + "training_step_time": 0.10583257675170898 + }, + { + "epoch": 3.9129638671875e-05, + "model_forward_time": 0.025152921676635742, + "step": 25644 + }, + { + "epoch": 3.9129638671875e-05, + "step": 25644, + "training_step_time": 0.10885071754455566 + }, + { + "epoch": 3.913116455078125e-05, + "model_forward_time": 0.02553868293762207, + "step": 25645 + }, + { + "epoch": 3.913116455078125e-05, + "step": 25645, + "training_step_time": 0.10665583610534668 + }, + { + "epoch": 3.91326904296875e-05, + "model_forward_time": 0.02538466453552246, + "step": 25646 + }, + { + "epoch": 3.91326904296875e-05, + "step": 25646, + "training_step_time": 0.1088404655456543 + }, + { + "epoch": 3.913421630859375e-05, + "model_forward_time": 0.025075435638427734, + "step": 25647 + }, + { + "epoch": 3.913421630859375e-05, + "step": 25647, + "training_step_time": 0.10593247413635254 + }, + { + "epoch": 3.91357421875e-05, + "model_forward_time": 0.02521657943725586, + "step": 25648 + }, + { + "epoch": 3.91357421875e-05, + "step": 25648, + "training_step_time": 0.10477113723754883 + }, + { + "epoch": 3.913726806640625e-05, + "model_forward_time": 0.02536940574645996, + "step": 25649 + }, + { + "epoch": 3.913726806640625e-05, + "step": 25649, + "training_step_time": 0.12700748443603516 + }, + { + "epoch": 3.91387939453125e-05, + "grad_norm": 0.16840949654579163, + "learning_rate": 5.6388590278194096e-06, + "loss": 0.0073, + "step": 25650 + }, + { + "epoch": 3.91387939453125e-05, + "model_forward_time": 0.02490377426147461, + "step": 25650 + }, + { + "epoch": 3.91387939453125e-05, + "step": 25650, + "training_step_time": 0.15639615058898926 + }, + { + "epoch": 3.914031982421875e-05, + "model_forward_time": 0.024352073669433594, + "step": 25651 + }, + { + "epoch": 3.914031982421875e-05, + "step": 25651, + "training_step_time": 0.1851494312286377 + }, + { + "epoch": 3.9141845703125e-05, + "model_forward_time": 0.02477884292602539, + "step": 25652 + }, + { + "epoch": 3.9141845703125e-05, + "step": 25652, + "training_step_time": 0.1990673542022705 + }, + { + "epoch": 3.914337158203125e-05, + "model_forward_time": 0.024531841278076172, + "step": 25653 + }, + { + "epoch": 3.914337158203125e-05, + "step": 25653, + "training_step_time": 0.10471057891845703 + }, + { + "epoch": 3.91448974609375e-05, + "model_forward_time": 0.026381492614746094, + "step": 25654 + }, + { + "epoch": 3.91448974609375e-05, + "step": 25654, + "training_step_time": 0.10498905181884766 + }, + { + "epoch": 3.914642333984375e-05, + "model_forward_time": 0.025449037551879883, + "step": 25655 + }, + { + "epoch": 3.914642333984375e-05, + "step": 25655, + "training_step_time": 0.10742378234863281 + }, + { + "epoch": 3.914794921875e-05, + "model_forward_time": 0.025290489196777344, + "step": 25656 + }, + { + "epoch": 3.914794921875e-05, + "step": 25656, + "training_step_time": 0.10626339912414551 + }, + { + "epoch": 3.914947509765625e-05, + "model_forward_time": 0.028519153594970703, + "step": 25657 + }, + { + "epoch": 3.914947509765625e-05, + "step": 25657, + "training_step_time": 0.11069893836975098 + }, + { + "epoch": 3.91510009765625e-05, + "model_forward_time": 0.025428295135498047, + "step": 25658 + }, + { + "epoch": 3.91510009765625e-05, + "step": 25658, + "training_step_time": 0.10828733444213867 + }, + { + "epoch": 3.915252685546875e-05, + "model_forward_time": 0.025597095489501953, + "step": 25659 + }, + { + "epoch": 3.915252685546875e-05, + "step": 25659, + "training_step_time": 0.10703015327453613 + }, + { + "epoch": 3.9154052734375e-05, + "grad_norm": 0.08039369434118271, + "learning_rate": 5.613458861498832e-06, + "loss": 0.0021, + "step": 25660 + }, + { + "epoch": 3.9154052734375e-05, + "model_forward_time": 0.026313304901123047, + "step": 25660 + }, + { + "epoch": 3.9154052734375e-05, + "step": 25660, + "training_step_time": 0.1084744930267334 + }, + { + "epoch": 3.915557861328125e-05, + "model_forward_time": 0.025353193283081055, + "step": 25661 + }, + { + "epoch": 3.915557861328125e-05, + "step": 25661, + "training_step_time": 0.19197702407836914 + }, + { + "epoch": 3.91571044921875e-05, + "model_forward_time": 0.02449178695678711, + "step": 25662 + }, + { + "epoch": 3.91571044921875e-05, + "step": 25662, + "training_step_time": 0.11492657661437988 + }, + { + "epoch": 3.915863037109375e-05, + "model_forward_time": 0.02454972267150879, + "step": 25663 + }, + { + "epoch": 3.915863037109375e-05, + "step": 25663, + "training_step_time": 0.10592961311340332 + }, + { + "epoch": 3.916015625e-05, + "model_forward_time": 0.025347471237182617, + "step": 25664 + }, + { + "epoch": 3.916015625e-05, + "step": 25664, + "training_step_time": 0.12412405014038086 + }, + { + "epoch": 3.916168212890625e-05, + "model_forward_time": 0.026282548904418945, + "step": 25665 + }, + { + "epoch": 3.916168212890625e-05, + "step": 25665, + "training_step_time": 0.13102340698242188 + }, + { + "epoch": 3.91632080078125e-05, + "model_forward_time": 0.024775981903076172, + "step": 25666 + }, + { + "epoch": 3.91632080078125e-05, + "step": 25666, + "training_step_time": 0.11069440841674805 + }, + { + "epoch": 3.916473388671875e-05, + "model_forward_time": 0.0256500244140625, + "step": 25667 + }, + { + "epoch": 3.916473388671875e-05, + "step": 25667, + "training_step_time": 0.14937353134155273 + }, + { + "epoch": 3.9166259765625e-05, + "model_forward_time": 0.024778366088867188, + "step": 25668 + }, + { + "epoch": 3.9166259765625e-05, + "step": 25668, + "training_step_time": 0.1075131893157959 + }, + { + "epoch": 3.916778564453125e-05, + "model_forward_time": 0.024269819259643555, + "step": 25669 + }, + { + "epoch": 3.916778564453125e-05, + "step": 25669, + "training_step_time": 0.10494184494018555 + }, + { + "epoch": 3.91693115234375e-05, + "grad_norm": 0.10808060318231583, + "learning_rate": 5.58811262901111e-06, + "loss": 0.0138, + "step": 25670 + }, + { + "epoch": 3.91693115234375e-05, + "model_forward_time": 0.024515390396118164, + "step": 25670 + }, + { + "epoch": 3.91693115234375e-05, + "step": 25670, + "training_step_time": 0.10735058784484863 + }, + { + "epoch": 3.917083740234375e-05, + "model_forward_time": 0.02536463737487793, + "step": 25671 + }, + { + "epoch": 3.917083740234375e-05, + "step": 25671, + "training_step_time": 0.1108241081237793 + }, + { + "epoch": 3.917236328125e-05, + "model_forward_time": 0.024974346160888672, + "step": 25672 + }, + { + "epoch": 3.917236328125e-05, + "step": 25672, + "training_step_time": 0.11455512046813965 + }, + { + "epoch": 3.917388916015625e-05, + "model_forward_time": 0.0255124568939209, + "step": 25673 + }, + { + "epoch": 3.917388916015625e-05, + "step": 25673, + "training_step_time": 0.21761584281921387 + }, + { + "epoch": 3.91754150390625e-05, + "model_forward_time": 0.024854183197021484, + "step": 25674 + }, + { + "epoch": 3.91754150390625e-05, + "step": 25674, + "training_step_time": 0.11709213256835938 + }, + { + "epoch": 3.917694091796875e-05, + "model_forward_time": 0.024358510971069336, + "step": 25675 + }, + { + "epoch": 3.917694091796875e-05, + "step": 25675, + "training_step_time": 0.1076667308807373 + }, + { + "epoch": 3.9178466796875e-05, + "model_forward_time": 0.02534341812133789, + "step": 25676 + }, + { + "epoch": 3.9178466796875e-05, + "step": 25676, + "training_step_time": 0.10744547843933105 + }, + { + "epoch": 3.917999267578125e-05, + "model_forward_time": 0.02511453628540039, + "step": 25677 + }, + { + "epoch": 3.917999267578125e-05, + "step": 25677, + "training_step_time": 0.11441755294799805 + }, + { + "epoch": 3.91815185546875e-05, + "model_forward_time": 0.025025129318237305, + "step": 25678 + }, + { + "epoch": 3.91815185546875e-05, + "step": 25678, + "training_step_time": 0.10583353042602539 + }, + { + "epoch": 3.918304443359375e-05, + "model_forward_time": 0.024213552474975586, + "step": 25679 + }, + { + "epoch": 3.918304443359375e-05, + "step": 25679, + "training_step_time": 0.10575151443481445 + }, + { + "epoch": 3.91845703125e-05, + "grad_norm": 0.062384042888879776, + "learning_rate": 5.562820361154314e-06, + "loss": 0.0027, + "step": 25680 + }, + { + "epoch": 3.91845703125e-05, + "model_forward_time": 0.024627685546875, + "step": 25680 + }, + { + "epoch": 3.91845703125e-05, + "step": 25680, + "training_step_time": 0.11226844787597656 + }, + { + "epoch": 3.918609619140625e-05, + "model_forward_time": 0.024151086807250977, + "step": 25681 + }, + { + "epoch": 3.918609619140625e-05, + "step": 25681, + "training_step_time": 0.1999814510345459 + }, + { + "epoch": 3.91876220703125e-05, + "model_forward_time": 0.02432727813720703, + "step": 25682 + }, + { + "epoch": 3.91876220703125e-05, + "step": 25682, + "training_step_time": 0.21282196044921875 + }, + { + "epoch": 3.918914794921875e-05, + "model_forward_time": 0.02435016632080078, + "step": 25683 + }, + { + "epoch": 3.918914794921875e-05, + "step": 25683, + "training_step_time": 0.1020662784576416 + }, + { + "epoch": 3.9190673828125e-05, + "model_forward_time": 0.02452397346496582, + "step": 25684 + }, + { + "epoch": 3.9190673828125e-05, + "step": 25684, + "training_step_time": 0.12163519859313965 + }, + { + "epoch": 3.919219970703125e-05, + "model_forward_time": 0.024925947189331055, + "step": 25685 + }, + { + "epoch": 3.919219970703125e-05, + "step": 25685, + "training_step_time": 0.19746088981628418 + }, + { + "epoch": 3.91937255859375e-05, + "model_forward_time": 0.023735523223876953, + "step": 25686 + }, + { + "epoch": 3.91937255859375e-05, + "step": 25686, + "training_step_time": 0.10606765747070312 + }, + { + "epoch": 3.919525146484375e-05, + "model_forward_time": 0.02452254295349121, + "step": 25687 + }, + { + "epoch": 3.919525146484375e-05, + "step": 25687, + "training_step_time": 0.1051628589630127 + }, + { + "epoch": 3.919677734375e-05, + "model_forward_time": 0.025292396545410156, + "step": 25688 + }, + { + "epoch": 3.919677734375e-05, + "step": 25688, + "training_step_time": 0.10851192474365234 + }, + { + "epoch": 3.919830322265625e-05, + "model_forward_time": 0.025179147720336914, + "step": 25689 + }, + { + "epoch": 3.919830322265625e-05, + "step": 25689, + "training_step_time": 0.1076967716217041 + }, + { + "epoch": 3.91998291015625e-05, + "grad_norm": 0.056697502732276917, + "learning_rate": 5.537582088660937e-06, + "loss": 0.0038, + "step": 25690 + }, + { + "epoch": 3.91998291015625e-05, + "model_forward_time": 0.024643898010253906, + "step": 25690 + }, + { + "epoch": 3.91998291015625e-05, + "step": 25690, + "training_step_time": 0.10825562477111816 + }, + { + "epoch": 3.920135498046875e-05, + "model_forward_time": 0.02487349510192871, + "step": 25691 + }, + { + "epoch": 3.920135498046875e-05, + "step": 25691, + "training_step_time": 0.10646200180053711 + }, + { + "epoch": 3.9202880859375e-05, + "model_forward_time": 0.024039030075073242, + "step": 25692 + }, + { + "epoch": 3.9202880859375e-05, + "step": 25692, + "training_step_time": 0.14775347709655762 + }, + { + "epoch": 3.920440673828125e-05, + "model_forward_time": 0.025356769561767578, + "step": 25693 + }, + { + "epoch": 3.920440673828125e-05, + "step": 25693, + "training_step_time": 0.1188511848449707 + }, + { + "epoch": 3.92059326171875e-05, + "model_forward_time": 0.025005817413330078, + "step": 25694 + }, + { + "epoch": 3.92059326171875e-05, + "step": 25694, + "training_step_time": 0.11051774024963379 + }, + { + "epoch": 3.920745849609375e-05, + "model_forward_time": 0.025234699249267578, + "step": 25695 + }, + { + "epoch": 3.920745849609375e-05, + "step": 25695, + "training_step_time": 0.11675667762756348 + }, + { + "epoch": 3.9208984375e-05, + "model_forward_time": 0.02503037452697754, + "step": 25696 + }, + { + "epoch": 3.9208984375e-05, + "step": 25696, + "training_step_time": 0.13229918479919434 + }, + { + "epoch": 3.921051025390625e-05, + "model_forward_time": 0.027141332626342773, + "step": 25697 + }, + { + "epoch": 3.921051025390625e-05, + "step": 25697, + "training_step_time": 0.11215829849243164 + }, + { + "epoch": 3.92120361328125e-05, + "model_forward_time": 0.025089263916015625, + "step": 25698 + }, + { + "epoch": 3.92120361328125e-05, + "step": 25698, + "training_step_time": 0.10865283012390137 + }, + { + "epoch": 3.921356201171875e-05, + "model_forward_time": 0.025014638900756836, + "step": 25699 + }, + { + "epoch": 3.921356201171875e-05, + "step": 25699, + "training_step_time": 0.11489462852478027 + }, + { + "epoch": 3.9215087890625e-05, + "grad_norm": 0.22654645144939423, + "learning_rate": 5.512397842197847e-06, + "loss": 0.0035, + "step": 25700 + }, + { + "epoch": 3.9215087890625e-05, + "model_forward_time": 0.025211811065673828, + "step": 25700 + }, + { + "epoch": 3.9215087890625e-05, + "step": 25700, + "training_step_time": 0.10718441009521484 + }, + { + "epoch": 3.921661376953125e-05, + "model_forward_time": 0.025472640991210938, + "step": 25701 + }, + { + "epoch": 3.921661376953125e-05, + "step": 25701, + "training_step_time": 0.10580039024353027 + }, + { + "epoch": 3.92181396484375e-05, + "model_forward_time": 0.025278806686401367, + "step": 25702 + }, + { + "epoch": 3.92181396484375e-05, + "step": 25702, + "training_step_time": 0.10976624488830566 + }, + { + "epoch": 3.921966552734375e-05, + "model_forward_time": 0.0254058837890625, + "step": 25703 + }, + { + "epoch": 3.921966552734375e-05, + "step": 25703, + "training_step_time": 0.10440897941589355 + }, + { + "epoch": 3.922119140625e-05, + "model_forward_time": 0.025019407272338867, + "step": 25704 + }, + { + "epoch": 3.922119140625e-05, + "step": 25704, + "training_step_time": 0.10904145240783691 + }, + { + "epoch": 3.922271728515625e-05, + "model_forward_time": 0.024916410446166992, + "step": 25705 + }, + { + "epoch": 3.922271728515625e-05, + "step": 25705, + "training_step_time": 0.10468459129333496 + }, + { + "epoch": 3.92242431640625e-05, + "model_forward_time": 0.0260467529296875, + "step": 25706 + }, + { + "epoch": 3.92242431640625e-05, + "step": 25706, + "training_step_time": 0.10792136192321777 + }, + { + "epoch": 3.922576904296875e-05, + "model_forward_time": 0.02516460418701172, + "step": 25707 + }, + { + "epoch": 3.922576904296875e-05, + "step": 25707, + "training_step_time": 0.1550905704498291 + }, + { + "epoch": 3.9227294921875e-05, + "model_forward_time": 0.02447032928466797, + "step": 25708 + }, + { + "epoch": 3.9227294921875e-05, + "step": 25708, + "training_step_time": 0.11077237129211426 + }, + { + "epoch": 3.922882080078125e-05, + "model_forward_time": 0.0246274471282959, + "step": 25709 + }, + { + "epoch": 3.922882080078125e-05, + "step": 25709, + "training_step_time": 0.11900806427001953 + }, + { + "epoch": 3.92303466796875e-05, + "grad_norm": 0.09287601709365845, + "learning_rate": 5.48726765236629e-06, + "loss": 0.0045, + "step": 25710 + }, + { + "epoch": 3.92303466796875e-05, + "model_forward_time": 0.024851322174072266, + "step": 25710 + }, + { + "epoch": 3.92303466796875e-05, + "step": 25710, + "training_step_time": 0.10952281951904297 + }, + { + "epoch": 3.923187255859375e-05, + "model_forward_time": 0.025232315063476562, + "step": 25711 + }, + { + "epoch": 3.923187255859375e-05, + "step": 25711, + "training_step_time": 0.13521337509155273 + }, + { + "epoch": 3.92333984375e-05, + "model_forward_time": 0.025053977966308594, + "step": 25712 + }, + { + "epoch": 3.92333984375e-05, + "step": 25712, + "training_step_time": 0.10973072052001953 + }, + { + "epoch": 3.923492431640625e-05, + "model_forward_time": 0.024811744689941406, + "step": 25713 + }, + { + "epoch": 3.923492431640625e-05, + "step": 25713, + "training_step_time": 0.12299489974975586 + }, + { + "epoch": 3.92364501953125e-05, + "model_forward_time": 0.025449275970458984, + "step": 25714 + }, + { + "epoch": 3.92364501953125e-05, + "step": 25714, + "training_step_time": 0.12157297134399414 + }, + { + "epoch": 3.923797607421875e-05, + "model_forward_time": 0.025534629821777344, + "step": 25715 + }, + { + "epoch": 3.923797607421875e-05, + "step": 25715, + "training_step_time": 0.1352531909942627 + }, + { + "epoch": 3.9239501953125e-05, + "model_forward_time": 0.024773120880126953, + "step": 25716 + }, + { + "epoch": 3.9239501953125e-05, + "step": 25716, + "training_step_time": 0.1955418586730957 + }, + { + "epoch": 3.924102783203125e-05, + "model_forward_time": 0.025046586990356445, + "step": 25717 + }, + { + "epoch": 3.924102783203125e-05, + "step": 25717, + "training_step_time": 0.15355563163757324 + }, + { + "epoch": 3.92425537109375e-05, + "model_forward_time": 0.024736642837524414, + "step": 25718 + }, + { + "epoch": 3.92425537109375e-05, + "step": 25718, + "training_step_time": 0.1703050136566162 + }, + { + "epoch": 3.924407958984375e-05, + "model_forward_time": 0.025574445724487305, + "step": 25719 + }, + { + "epoch": 3.924407958984375e-05, + "step": 25719, + "training_step_time": 0.13472294807434082 + }, + { + "epoch": 3.924560546875e-05, + "grad_norm": 0.08618535101413727, + "learning_rate": 5.462191549701806e-06, + "loss": 0.0028, + "step": 25720 + }, + { + "epoch": 3.924560546875e-05, + "model_forward_time": 0.024643421173095703, + "step": 25720 + }, + { + "epoch": 3.924560546875e-05, + "step": 25720, + "training_step_time": 0.12391018867492676 + }, + { + "epoch": 3.924713134765625e-05, + "model_forward_time": 0.024260759353637695, + "step": 25721 + }, + { + "epoch": 3.924713134765625e-05, + "step": 25721, + "training_step_time": 0.11868810653686523 + }, + { + "epoch": 3.92486572265625e-05, + "model_forward_time": 0.025470495223999023, + "step": 25722 + }, + { + "epoch": 3.92486572265625e-05, + "step": 25722, + "training_step_time": 0.11401057243347168 + }, + { + "epoch": 3.925018310546875e-05, + "model_forward_time": 0.02627086639404297, + "step": 25723 + }, + { + "epoch": 3.925018310546875e-05, + "step": 25723, + "training_step_time": 0.11565279960632324 + }, + { + "epoch": 3.9251708984375e-05, + "model_forward_time": 0.025832653045654297, + "step": 25724 + }, + { + "epoch": 3.9251708984375e-05, + "step": 25724, + "training_step_time": 0.11051440238952637 + }, + { + "epoch": 3.925323486328125e-05, + "model_forward_time": 0.025552034378051758, + "step": 25725 + }, + { + "epoch": 3.925323486328125e-05, + "step": 25725, + "training_step_time": 0.20847535133361816 + }, + { + "epoch": 3.92547607421875e-05, + "model_forward_time": 0.025653600692749023, + "step": 25726 + }, + { + "epoch": 3.92547607421875e-05, + "step": 25726, + "training_step_time": 0.12891721725463867 + }, + { + "epoch": 3.925628662109375e-05, + "model_forward_time": 0.02463674545288086, + "step": 25727 + }, + { + "epoch": 3.925628662109375e-05, + "step": 25727, + "training_step_time": 0.1246955394744873 + }, + { + "epoch": 3.92578125e-05, + "model_forward_time": 0.025015592575073242, + "step": 25728 + }, + { + "epoch": 3.92578125e-05, + "step": 25728, + "training_step_time": 0.10704970359802246 + }, + { + "epoch": 3.925933837890625e-05, + "model_forward_time": 0.025545358657836914, + "step": 25729 + }, + { + "epoch": 3.925933837890625e-05, + "step": 25729, + "training_step_time": 0.17906618118286133 + }, + { + "epoch": 3.92608642578125e-05, + "grad_norm": 0.4258580505847931, + "learning_rate": 5.437169564674233e-06, + "loss": 0.0074, + "step": 25730 + }, + { + "epoch": 3.92608642578125e-05, + "model_forward_time": 0.02471303939819336, + "step": 25730 + }, + { + "epoch": 3.92608642578125e-05, + "step": 25730, + "training_step_time": 0.1317582130432129 + }, + { + "epoch": 3.926239013671875e-05, + "model_forward_time": 0.024985551834106445, + "step": 25731 + }, + { + "epoch": 3.926239013671875e-05, + "step": 25731, + "training_step_time": 0.12200403213500977 + }, + { + "epoch": 3.9263916015625e-05, + "model_forward_time": 0.025798797607421875, + "step": 25732 + }, + { + "epoch": 3.9263916015625e-05, + "step": 25732, + "training_step_time": 0.10599493980407715 + }, + { + "epoch": 3.926544189453125e-05, + "model_forward_time": 0.02905583381652832, + "step": 25733 + }, + { + "epoch": 3.926544189453125e-05, + "step": 25733, + "training_step_time": 0.11008810997009277 + }, + { + "epoch": 3.92669677734375e-05, + "model_forward_time": 0.025323867797851562, + "step": 25734 + }, + { + "epoch": 3.92669677734375e-05, + "step": 25734, + "training_step_time": 0.10917782783508301 + }, + { + "epoch": 3.926849365234375e-05, + "model_forward_time": 0.025103330612182617, + "step": 25735 + }, + { + "epoch": 3.926849365234375e-05, + "step": 25735, + "training_step_time": 0.10687589645385742 + }, + { + "epoch": 3.927001953125e-05, + "model_forward_time": 0.02430582046508789, + "step": 25736 + }, + { + "epoch": 3.927001953125e-05, + "step": 25736, + "training_step_time": 0.10477185249328613 + }, + { + "epoch": 3.927154541015625e-05, + "model_forward_time": 0.024344205856323242, + "step": 25737 + }, + { + "epoch": 3.927154541015625e-05, + "step": 25737, + "training_step_time": 0.10497522354125977 + }, + { + "epoch": 3.92730712890625e-05, + "model_forward_time": 0.024689435958862305, + "step": 25738 + }, + { + "epoch": 3.92730712890625e-05, + "step": 25738, + "training_step_time": 0.1224520206451416 + }, + { + "epoch": 3.927459716796875e-05, + "model_forward_time": 0.025063037872314453, + "step": 25739 + }, + { + "epoch": 3.927459716796875e-05, + "step": 25739, + "training_step_time": 0.14628863334655762 + }, + { + "epoch": 3.9276123046875e-05, + "grad_norm": 0.1529744565486908, + "learning_rate": 5.412201727687644e-06, + "loss": 0.0041, + "step": 25740 + }, + { + "epoch": 3.9276123046875e-05, + "model_forward_time": 0.02437138557434082, + "step": 25740 + }, + { + "epoch": 3.9276123046875e-05, + "step": 25740, + "training_step_time": 0.10451459884643555 + }, + { + "epoch": 3.927764892578125e-05, + "model_forward_time": 0.02879023551940918, + "step": 25741 + }, + { + "epoch": 3.927764892578125e-05, + "step": 25741, + "training_step_time": 0.10657334327697754 + }, + { + "epoch": 3.92791748046875e-05, + "model_forward_time": 0.02557826042175293, + "step": 25742 + }, + { + "epoch": 3.92791748046875e-05, + "step": 25742, + "training_step_time": 0.11675786972045898 + }, + { + "epoch": 3.928070068359375e-05, + "model_forward_time": 0.025258541107177734, + "step": 25743 + }, + { + "epoch": 3.928070068359375e-05, + "step": 25743, + "training_step_time": 0.1812269687652588 + }, + { + "epoch": 3.92822265625e-05, + "model_forward_time": 0.024876832962036133, + "step": 25744 + }, + { + "epoch": 3.92822265625e-05, + "step": 25744, + "training_step_time": 0.12037539482116699 + }, + { + "epoch": 3.928375244140625e-05, + "model_forward_time": 0.02480792999267578, + "step": 25745 + }, + { + "epoch": 3.928375244140625e-05, + "step": 25745, + "training_step_time": 0.10139083862304688 + }, + { + "epoch": 3.92852783203125e-05, + "model_forward_time": 0.025223493576049805, + "step": 25746 + }, + { + "epoch": 3.92852783203125e-05, + "step": 25746, + "training_step_time": 0.1031332015991211 + }, + { + "epoch": 3.928680419921875e-05, + "model_forward_time": 0.025554656982421875, + "step": 25747 + }, + { + "epoch": 3.928680419921875e-05, + "step": 25747, + "training_step_time": 0.10414552688598633 + }, + { + "epoch": 3.9288330078125e-05, + "model_forward_time": 0.0257260799407959, + "step": 25748 + }, + { + "epoch": 3.9288330078125e-05, + "step": 25748, + "training_step_time": 0.10771751403808594 + }, + { + "epoch": 3.928985595703125e-05, + "model_forward_time": 0.029303789138793945, + "step": 25749 + }, + { + "epoch": 3.928985595703125e-05, + "step": 25749, + "training_step_time": 0.11090660095214844 + }, + { + "epoch": 3.92913818359375e-05, + "grad_norm": 0.10374678671360016, + "learning_rate": 5.387288069080299e-06, + "loss": 0.0034, + "step": 25750 + }, + { + "epoch": 3.92913818359375e-05, + "model_forward_time": 0.026171207427978516, + "step": 25750 + }, + { + "epoch": 3.92913818359375e-05, + "step": 25750, + "training_step_time": 0.11107182502746582 + }, + { + "epoch": 3.929290771484375e-05, + "model_forward_time": 0.026156902313232422, + "step": 25751 + }, + { + "epoch": 3.929290771484375e-05, + "step": 25751, + "training_step_time": 0.1349201202392578 + }, + { + "epoch": 3.929443359375e-05, + "model_forward_time": 0.02539992332458496, + "step": 25752 + }, + { + "epoch": 3.929443359375e-05, + "step": 25752, + "training_step_time": 0.18234872817993164 + }, + { + "epoch": 3.929595947265625e-05, + "model_forward_time": 0.026047468185424805, + "step": 25753 + }, + { + "epoch": 3.929595947265625e-05, + "step": 25753, + "training_step_time": 0.1540982723236084 + }, + { + "epoch": 3.92974853515625e-05, + "model_forward_time": 0.024721145629882812, + "step": 25754 + }, + { + "epoch": 3.92974853515625e-05, + "step": 25754, + "training_step_time": 0.18913793563842773 + }, + { + "epoch": 3.929901123046875e-05, + "model_forward_time": 0.024372339248657227, + "step": 25755 + }, + { + "epoch": 3.929901123046875e-05, + "step": 25755, + "training_step_time": 0.1396317481994629 + }, + { + "epoch": 3.9300537109375e-05, + "model_forward_time": 0.025102615356445312, + "step": 25756 + }, + { + "epoch": 3.9300537109375e-05, + "step": 25756, + "training_step_time": 0.12041687965393066 + }, + { + "epoch": 3.930206298828125e-05, + "model_forward_time": 0.02565765380859375, + "step": 25757 + }, + { + "epoch": 3.930206298828125e-05, + "step": 25757, + "training_step_time": 0.15949535369873047 + }, + { + "epoch": 3.93035888671875e-05, + "model_forward_time": 0.025142431259155273, + "step": 25758 + }, + { + "epoch": 3.93035888671875e-05, + "step": 25758, + "training_step_time": 0.10625123977661133 + }, + { + "epoch": 3.930511474609375e-05, + "model_forward_time": 0.02498149871826172, + "step": 25759 + }, + { + "epoch": 3.930511474609375e-05, + "step": 25759, + "training_step_time": 0.20183086395263672 + }, + { + "epoch": 3.9306640625e-05, + "grad_norm": 0.16779078543186188, + "learning_rate": 5.362428619124666e-06, + "loss": 0.0043, + "step": 25760 + }, + { + "epoch": 3.9306640625e-05, + "model_forward_time": 0.024631977081298828, + "step": 25760 + }, + { + "epoch": 3.9306640625e-05, + "step": 25760, + "training_step_time": 0.11922073364257812 + }, + { + "epoch": 3.930816650390625e-05, + "model_forward_time": 0.025808334350585938, + "step": 25761 + }, + { + "epoch": 3.930816650390625e-05, + "step": 25761, + "training_step_time": 0.10827970504760742 + }, + { + "epoch": 3.93096923828125e-05, + "model_forward_time": 0.026005029678344727, + "step": 25762 + }, + { + "epoch": 3.93096923828125e-05, + "step": 25762, + "training_step_time": 0.17543554306030273 + }, + { + "epoch": 3.931121826171875e-05, + "model_forward_time": 0.024573087692260742, + "step": 25763 + }, + { + "epoch": 3.931121826171875e-05, + "step": 25763, + "training_step_time": 0.15593242645263672 + }, + { + "epoch": 3.9312744140625e-05, + "model_forward_time": 0.024667739868164062, + "step": 25764 + }, + { + "epoch": 3.9312744140625e-05, + "step": 25764, + "training_step_time": 0.10363101959228516 + }, + { + "epoch": 3.931427001953125e-05, + "model_forward_time": 0.025342702865600586, + "step": 25765 + }, + { + "epoch": 3.931427001953125e-05, + "step": 25765, + "training_step_time": 0.10560822486877441 + }, + { + "epoch": 3.93157958984375e-05, + "model_forward_time": 0.025417327880859375, + "step": 25766 + }, + { + "epoch": 3.93157958984375e-05, + "step": 25766, + "training_step_time": 0.10665202140808105 + }, + { + "epoch": 3.931732177734375e-05, + "model_forward_time": 0.025538206100463867, + "step": 25767 + }, + { + "epoch": 3.931732177734375e-05, + "step": 25767, + "training_step_time": 0.10861325263977051 + }, + { + "epoch": 3.931884765625e-05, + "model_forward_time": 0.025404930114746094, + "step": 25768 + }, + { + "epoch": 3.931884765625e-05, + "step": 25768, + "training_step_time": 0.10567378997802734 + }, + { + "epoch": 3.932037353515625e-05, + "model_forward_time": 0.02545166015625, + "step": 25769 + }, + { + "epoch": 3.932037353515625e-05, + "step": 25769, + "training_step_time": 0.19520998001098633 + }, + { + "epoch": 3.93218994140625e-05, + "grad_norm": 0.06318707764148712, + "learning_rate": 5.337623408027293e-06, + "loss": 0.0036, + "step": 25770 + }, + { + "epoch": 3.93218994140625e-05, + "model_forward_time": 0.02448296546936035, + "step": 25770 + }, + { + "epoch": 3.93218994140625e-05, + "step": 25770, + "training_step_time": 0.11513018608093262 + }, + { + "epoch": 3.932342529296875e-05, + "model_forward_time": 0.024393796920776367, + "step": 25771 + }, + { + "epoch": 3.932342529296875e-05, + "step": 25771, + "training_step_time": 0.10962867736816406 + }, + { + "epoch": 3.9324951171875e-05, + "model_forward_time": 0.025110244750976562, + "step": 25772 + }, + { + "epoch": 3.9324951171875e-05, + "step": 25772, + "training_step_time": 0.12342977523803711 + }, + { + "epoch": 3.932647705078125e-05, + "model_forward_time": 0.025315284729003906, + "step": 25773 + }, + { + "epoch": 3.932647705078125e-05, + "step": 25773, + "training_step_time": 0.10531783103942871 + }, + { + "epoch": 3.93280029296875e-05, + "model_forward_time": 0.024760007858276367, + "step": 25774 + }, + { + "epoch": 3.93280029296875e-05, + "step": 25774, + "training_step_time": 0.10808610916137695 + }, + { + "epoch": 3.932952880859375e-05, + "model_forward_time": 0.025420427322387695, + "step": 25775 + }, + { + "epoch": 3.932952880859375e-05, + "step": 25775, + "training_step_time": 0.11801028251647949 + }, + { + "epoch": 3.93310546875e-05, + "model_forward_time": 0.025914430618286133, + "step": 25776 + }, + { + "epoch": 3.93310546875e-05, + "step": 25776, + "training_step_time": 0.11122965812683105 + }, + { + "epoch": 3.933258056640625e-05, + "model_forward_time": 0.02548813819885254, + "step": 25777 + }, + { + "epoch": 3.933258056640625e-05, + "step": 25777, + "training_step_time": 0.10440516471862793 + }, + { + "epoch": 3.93341064453125e-05, + "model_forward_time": 0.025194644927978516, + "step": 25778 + }, + { + "epoch": 3.93341064453125e-05, + "step": 25778, + "training_step_time": 0.10861372947692871 + }, + { + "epoch": 3.933563232421875e-05, + "model_forward_time": 0.024774789810180664, + "step": 25779 + }, + { + "epoch": 3.933563232421875e-05, + "step": 25779, + "training_step_time": 0.10414600372314453 + }, + { + "epoch": 3.9337158203125e-05, + "grad_norm": 0.08112119138240814, + "learning_rate": 5.312872465928881e-06, + "loss": 0.0025, + "step": 25780 + }, + { + "epoch": 3.9337158203125e-05, + "model_forward_time": 0.024657011032104492, + "step": 25780 + }, + { + "epoch": 3.9337158203125e-05, + "step": 25780, + "training_step_time": 0.10488772392272949 + }, + { + "epoch": 3.933868408203125e-05, + "model_forward_time": 0.025719404220581055, + "step": 25781 + }, + { + "epoch": 3.933868408203125e-05, + "step": 25781, + "training_step_time": 0.10438919067382812 + }, + { + "epoch": 3.93402099609375e-05, + "model_forward_time": 0.025116920471191406, + "step": 25782 + }, + { + "epoch": 3.93402099609375e-05, + "step": 25782, + "training_step_time": 0.16245317459106445 + }, + { + "epoch": 3.934173583984375e-05, + "model_forward_time": 0.02440667152404785, + "step": 25783 + }, + { + "epoch": 3.934173583984375e-05, + "step": 25783, + "training_step_time": 0.1704566478729248 + }, + { + "epoch": 3.934326171875e-05, + "model_forward_time": 0.025737762451171875, + "step": 25784 + }, + { + "epoch": 3.934326171875e-05, + "step": 25784, + "training_step_time": 0.10982155799865723 + }, + { + "epoch": 3.934478759765625e-05, + "model_forward_time": 0.02428412437438965, + "step": 25785 + }, + { + "epoch": 3.934478759765625e-05, + "step": 25785, + "training_step_time": 0.10641980171203613 + }, + { + "epoch": 3.93463134765625e-05, + "model_forward_time": 0.02510356903076172, + "step": 25786 + }, + { + "epoch": 3.93463134765625e-05, + "step": 25786, + "training_step_time": 0.12202334403991699 + }, + { + "epoch": 3.934783935546875e-05, + "model_forward_time": 0.024773120880126953, + "step": 25787 + }, + { + "epoch": 3.934783935546875e-05, + "step": 25787, + "training_step_time": 0.11218762397766113 + }, + { + "epoch": 3.9349365234375e-05, + "model_forward_time": 0.025330781936645508, + "step": 25788 + }, + { + "epoch": 3.9349365234375e-05, + "step": 25788, + "training_step_time": 0.18675541877746582 + }, + { + "epoch": 3.935089111328125e-05, + "model_forward_time": 0.024500370025634766, + "step": 25789 + }, + { + "epoch": 3.935089111328125e-05, + "step": 25789, + "training_step_time": 0.11127114295959473 + }, + { + "epoch": 3.93524169921875e-05, + "grad_norm": 0.12586982548236847, + "learning_rate": 5.28817582290414e-06, + "loss": 0.0038, + "step": 25790 + }, + { + "epoch": 3.93524169921875e-05, + "model_forward_time": 0.024554014205932617, + "step": 25790 + }, + { + "epoch": 3.93524169921875e-05, + "step": 25790, + "training_step_time": 0.1174323558807373 + }, + { + "epoch": 3.935394287109375e-05, + "model_forward_time": 0.024805545806884766, + "step": 25791 + }, + { + "epoch": 3.935394287109375e-05, + "step": 25791, + "training_step_time": 0.11386752128601074 + }, + { + "epoch": 3.935546875e-05, + "model_forward_time": 0.02553391456604004, + "step": 25792 + }, + { + "epoch": 3.935546875e-05, + "step": 25792, + "training_step_time": 0.10772323608398438 + }, + { + "epoch": 3.935699462890625e-05, + "model_forward_time": 0.025073766708374023, + "step": 25793 + }, + { + "epoch": 3.935699462890625e-05, + "step": 25793, + "training_step_time": 0.11549782752990723 + }, + { + "epoch": 3.93585205078125e-05, + "model_forward_time": 0.024329423904418945, + "step": 25794 + }, + { + "epoch": 3.93585205078125e-05, + "step": 25794, + "training_step_time": 0.11139535903930664 + }, + { + "epoch": 3.936004638671875e-05, + "model_forward_time": 0.024079322814941406, + "step": 25795 + }, + { + "epoch": 3.936004638671875e-05, + "step": 25795, + "training_step_time": 0.11285805702209473 + }, + { + "epoch": 3.9361572265625e-05, + "model_forward_time": 0.023992538452148438, + "step": 25796 + }, + { + "epoch": 3.9361572265625e-05, + "step": 25796, + "training_step_time": 0.19027233123779297 + }, + { + "epoch": 3.936309814453125e-05, + "model_forward_time": 0.024167299270629883, + "step": 25797 + }, + { + "epoch": 3.936309814453125e-05, + "step": 25797, + "training_step_time": 0.10805416107177734 + }, + { + "epoch": 3.93646240234375e-05, + "model_forward_time": 0.02456951141357422, + "step": 25798 + }, + { + "epoch": 3.93646240234375e-05, + "step": 25798, + "training_step_time": 0.11515474319458008 + }, + { + "epoch": 3.936614990234375e-05, + "model_forward_time": 0.02531719207763672, + "step": 25799 + }, + { + "epoch": 3.936614990234375e-05, + "step": 25799, + "training_step_time": 0.12576889991760254 + }, + { + "epoch": 3.936767578125e-05, + "grad_norm": 0.06786226481199265, + "learning_rate": 5.263533508961827e-06, + "loss": 0.0037, + "step": 25800 + }, + { + "epoch": 3.936767578125e-05, + "model_forward_time": 0.025504112243652344, + "step": 25800 + }, + { + "epoch": 3.936767578125e-05, + "step": 25800, + "training_step_time": 0.12991046905517578 + }, + { + "epoch": 3.936920166015625e-05, + "model_forward_time": 0.024959087371826172, + "step": 25801 + }, + { + "epoch": 3.936920166015625e-05, + "step": 25801, + "training_step_time": 0.11065268516540527 + }, + { + "epoch": 3.93707275390625e-05, + "model_forward_time": 0.025199174880981445, + "step": 25802 + }, + { + "epoch": 3.93707275390625e-05, + "step": 25802, + "training_step_time": 0.1100606918334961 + }, + { + "epoch": 3.937225341796875e-05, + "model_forward_time": 0.025185108184814453, + "step": 25803 + }, + { + "epoch": 3.937225341796875e-05, + "step": 25803, + "training_step_time": 0.10994124412536621 + }, + { + "epoch": 3.9373779296875e-05, + "model_forward_time": 0.024806737899780273, + "step": 25804 + }, + { + "epoch": 3.9373779296875e-05, + "step": 25804, + "training_step_time": 0.10913348197937012 + }, + { + "epoch": 3.937530517578125e-05, + "model_forward_time": 0.025193214416503906, + "step": 25805 + }, + { + "epoch": 3.937530517578125e-05, + "step": 25805, + "training_step_time": 0.10763764381408691 + }, + { + "epoch": 3.93768310546875e-05, + "model_forward_time": 0.024234294891357422, + "step": 25806 + }, + { + "epoch": 3.93768310546875e-05, + "step": 25806, + "training_step_time": 0.1584615707397461 + }, + { + "epoch": 3.937835693359375e-05, + "model_forward_time": 0.02449822425842285, + "step": 25807 + }, + { + "epoch": 3.937835693359375e-05, + "step": 25807, + "training_step_time": 0.15854668617248535 + }, + { + "epoch": 3.93798828125e-05, + "model_forward_time": 0.024201154708862305, + "step": 25808 + }, + { + "epoch": 3.93798828125e-05, + "step": 25808, + "training_step_time": 0.11106729507446289 + }, + { + "epoch": 3.938140869140625e-05, + "model_forward_time": 0.0257720947265625, + "step": 25809 + }, + { + "epoch": 3.938140869140625e-05, + "step": 25809, + "training_step_time": 0.13876628875732422 + }, + { + "epoch": 3.93829345703125e-05, + "grad_norm": 0.16554300487041473, + "learning_rate": 5.238945554044672e-06, + "loss": 0.0029, + "step": 25810 + }, + { + "epoch": 3.93829345703125e-05, + "model_forward_time": 0.02499842643737793, + "step": 25810 + }, + { + "epoch": 3.93829345703125e-05, + "step": 25810, + "training_step_time": 0.10503816604614258 + }, + { + "epoch": 3.938446044921875e-05, + "model_forward_time": 0.02539682388305664, + "step": 25811 + }, + { + "epoch": 3.938446044921875e-05, + "step": 25811, + "training_step_time": 0.10937714576721191 + }, + { + "epoch": 3.9385986328125e-05, + "model_forward_time": 0.025429248809814453, + "step": 25812 + }, + { + "epoch": 3.9385986328125e-05, + "step": 25812, + "training_step_time": 0.10881733894348145 + }, + { + "epoch": 3.938751220703125e-05, + "model_forward_time": 0.024729013442993164, + "step": 25813 + }, + { + "epoch": 3.938751220703125e-05, + "step": 25813, + "training_step_time": 0.11156845092773438 + }, + { + "epoch": 3.93890380859375e-05, + "model_forward_time": 0.02475428581237793, + "step": 25814 + }, + { + "epoch": 3.93890380859375e-05, + "step": 25814, + "training_step_time": 0.10944437980651855 + }, + { + "epoch": 3.939056396484375e-05, + "model_forward_time": 0.025429248809814453, + "step": 25815 + }, + { + "epoch": 3.939056396484375e-05, + "step": 25815, + "training_step_time": 0.10462164878845215 + }, + { + "epoch": 3.939208984375e-05, + "model_forward_time": 0.02531886100769043, + "step": 25816 + }, + { + "epoch": 3.939208984375e-05, + "step": 25816, + "training_step_time": 0.11170005798339844 + }, + { + "epoch": 3.939361572265625e-05, + "model_forward_time": 0.025026559829711914, + "step": 25817 + }, + { + "epoch": 3.939361572265625e-05, + "step": 25817, + "training_step_time": 0.11561369895935059 + }, + { + "epoch": 3.93951416015625e-05, + "model_forward_time": 0.024882078170776367, + "step": 25818 + }, + { + "epoch": 3.93951416015625e-05, + "step": 25818, + "training_step_time": 0.1978001594543457 + }, + { + "epoch": 3.939666748046875e-05, + "model_forward_time": 0.023891210556030273, + "step": 25819 + }, + { + "epoch": 3.939666748046875e-05, + "step": 25819, + "training_step_time": 0.10515642166137695 + }, + { + "epoch": 3.9398193359375e-05, + "grad_norm": 0.0756898820400238, + "learning_rate": 5.214411988029355e-06, + "loss": 0.0034, + "step": 25820 + }, + { + "epoch": 3.9398193359375e-05, + "model_forward_time": 0.02410435676574707, + "step": 25820 + }, + { + "epoch": 3.9398193359375e-05, + "step": 25820, + "training_step_time": 0.1097557544708252 + }, + { + "epoch": 3.939971923828125e-05, + "model_forward_time": 0.024553298950195312, + "step": 25821 + }, + { + "epoch": 3.939971923828125e-05, + "step": 25821, + "training_step_time": 0.1083064079284668 + }, + { + "epoch": 3.94012451171875e-05, + "model_forward_time": 0.025244712829589844, + "step": 25822 + }, + { + "epoch": 3.94012451171875e-05, + "step": 25822, + "training_step_time": 0.10894370079040527 + }, + { + "epoch": 3.940277099609375e-05, + "model_forward_time": 0.025018692016601562, + "step": 25823 + }, + { + "epoch": 3.940277099609375e-05, + "step": 25823, + "training_step_time": 0.11373591423034668 + }, + { + "epoch": 3.9404296875e-05, + "model_forward_time": 0.024715423583984375, + "step": 25824 + }, + { + "epoch": 3.9404296875e-05, + "step": 25824, + "training_step_time": 0.10692095756530762 + }, + { + "epoch": 3.940582275390625e-05, + "model_forward_time": 0.024602413177490234, + "step": 25825 + }, + { + "epoch": 3.940582275390625e-05, + "step": 25825, + "training_step_time": 0.10637497901916504 + }, + { + "epoch": 3.94073486328125e-05, + "model_forward_time": 0.025034427642822266, + "step": 25826 + }, + { + "epoch": 3.94073486328125e-05, + "step": 25826, + "training_step_time": 0.10526394844055176 + }, + { + "epoch": 3.940887451171875e-05, + "model_forward_time": 0.025151491165161133, + "step": 25827 + }, + { + "epoch": 3.940887451171875e-05, + "step": 25827, + "training_step_time": 0.10574555397033691 + }, + { + "epoch": 3.9410400390625e-05, + "model_forward_time": 0.02508687973022461, + "step": 25828 + }, + { + "epoch": 3.9410400390625e-05, + "step": 25828, + "training_step_time": 0.10580277442932129 + }, + { + "epoch": 3.941192626953125e-05, + "model_forward_time": 0.02500295639038086, + "step": 25829 + }, + { + "epoch": 3.941192626953125e-05, + "step": 25829, + "training_step_time": 0.1128387451171875 + }, + { + "epoch": 3.94134521484375e-05, + "grad_norm": 0.06646464765071869, + "learning_rate": 5.1899328407264855e-06, + "loss": 0.0031, + "step": 25830 + }, + { + "epoch": 3.94134521484375e-05, + "model_forward_time": 0.024760961532592773, + "step": 25830 + }, + { + "epoch": 3.94134521484375e-05, + "step": 25830, + "training_step_time": 0.13074183464050293 + }, + { + "epoch": 3.941497802734375e-05, + "model_forward_time": 0.02509331703186035, + "step": 25831 + }, + { + "epoch": 3.941497802734375e-05, + "step": 25831, + "training_step_time": 0.10674715042114258 + }, + { + "epoch": 3.941650390625e-05, + "model_forward_time": 0.025125503540039062, + "step": 25832 + }, + { + "epoch": 3.941650390625e-05, + "step": 25832, + "training_step_time": 0.10644292831420898 + }, + { + "epoch": 3.941802978515625e-05, + "model_forward_time": 0.024548768997192383, + "step": 25833 + }, + { + "epoch": 3.941802978515625e-05, + "step": 25833, + "training_step_time": 0.11115407943725586 + }, + { + "epoch": 3.94195556640625e-05, + "model_forward_time": 0.027149438858032227, + "step": 25834 + }, + { + "epoch": 3.94195556640625e-05, + "step": 25834, + "training_step_time": 0.11219453811645508 + }, + { + "epoch": 3.942108154296875e-05, + "model_forward_time": 0.0251617431640625, + "step": 25835 + }, + { + "epoch": 3.942108154296875e-05, + "step": 25835, + "training_step_time": 0.19274139404296875 + }, + { + "epoch": 3.9422607421875e-05, + "model_forward_time": 0.024132490158081055, + "step": 25836 + }, + { + "epoch": 3.9422607421875e-05, + "step": 25836, + "training_step_time": 0.10600972175598145 + }, + { + "epoch": 3.942413330078125e-05, + "model_forward_time": 0.024664878845214844, + "step": 25837 + }, + { + "epoch": 3.942413330078125e-05, + "step": 25837, + "training_step_time": 0.10587453842163086 + }, + { + "epoch": 3.94256591796875e-05, + "model_forward_time": 0.02506852149963379, + "step": 25838 + }, + { + "epoch": 3.94256591796875e-05, + "step": 25838, + "training_step_time": 0.10826420783996582 + }, + { + "epoch": 3.942718505859375e-05, + "model_forward_time": 0.02536296844482422, + "step": 25839 + }, + { + "epoch": 3.942718505859375e-05, + "step": 25839, + "training_step_time": 0.10845065116882324 + }, + { + "epoch": 3.94287109375e-05, + "grad_norm": 0.12488628923892975, + "learning_rate": 5.165508141880526e-06, + "loss": 0.0079, + "step": 25840 + }, + { + "epoch": 3.94287109375e-05, + "model_forward_time": 0.025051116943359375, + "step": 25840 + }, + { + "epoch": 3.94287109375e-05, + "step": 25840, + "training_step_time": 0.10573840141296387 + }, + { + "epoch": 3.943023681640625e-05, + "model_forward_time": 0.02532649040222168, + "step": 25841 + }, + { + "epoch": 3.943023681640625e-05, + "step": 25841, + "training_step_time": 0.10563445091247559 + }, + { + "epoch": 3.94317626953125e-05, + "model_forward_time": 0.025229692459106445, + "step": 25842 + }, + { + "epoch": 3.94317626953125e-05, + "step": 25842, + "training_step_time": 0.10614538192749023 + }, + { + "epoch": 3.943328857421875e-05, + "model_forward_time": 0.025068998336791992, + "step": 25843 + }, + { + "epoch": 3.943328857421875e-05, + "step": 25843, + "training_step_time": 0.1072382926940918 + }, + { + "epoch": 3.9434814453125e-05, + "model_forward_time": 0.02489781379699707, + "step": 25844 + }, + { + "epoch": 3.9434814453125e-05, + "step": 25844, + "training_step_time": 0.17466282844543457 + }, + { + "epoch": 3.943634033203125e-05, + "model_forward_time": 0.0245053768157959, + "step": 25845 + }, + { + "epoch": 3.943634033203125e-05, + "step": 25845, + "training_step_time": 0.1095881462097168 + }, + { + "epoch": 3.94378662109375e-05, + "model_forward_time": 0.024741411209106445, + "step": 25846 + }, + { + "epoch": 3.94378662109375e-05, + "step": 25846, + "training_step_time": 0.11125016212463379 + }, + { + "epoch": 3.943939208984375e-05, + "model_forward_time": 0.025305986404418945, + "step": 25847 + }, + { + "epoch": 3.943939208984375e-05, + "step": 25847, + "training_step_time": 0.12735271453857422 + }, + { + "epoch": 3.944091796875e-05, + "model_forward_time": 0.025331735610961914, + "step": 25848 + }, + { + "epoch": 3.944091796875e-05, + "step": 25848, + "training_step_time": 0.12441110610961914 + }, + { + "epoch": 3.944244384765625e-05, + "model_forward_time": 0.024831056594848633, + "step": 25849 + }, + { + "epoch": 3.944244384765625e-05, + "step": 25849, + "training_step_time": 0.10965299606323242 + }, + { + "epoch": 3.94439697265625e-05, + "grad_norm": 0.07019779831171036, + "learning_rate": 5.141137921169792e-06, + "loss": 0.0046, + "step": 25850 + }, + { + "epoch": 3.94439697265625e-05, + "model_forward_time": 0.025171756744384766, + "step": 25850 + }, + { + "epoch": 3.94439697265625e-05, + "step": 25850, + "training_step_time": 0.11248254776000977 + }, + { + "epoch": 3.944549560546875e-05, + "model_forward_time": 0.02486562728881836, + "step": 25851 + }, + { + "epoch": 3.944549560546875e-05, + "step": 25851, + "training_step_time": 0.10500907897949219 + }, + { + "epoch": 3.9447021484375e-05, + "model_forward_time": 0.02517986297607422, + "step": 25852 + }, + { + "epoch": 3.9447021484375e-05, + "step": 25852, + "training_step_time": 0.10858440399169922 + }, + { + "epoch": 3.944854736328125e-05, + "model_forward_time": 0.025420188903808594, + "step": 25853 + }, + { + "epoch": 3.944854736328125e-05, + "step": 25853, + "training_step_time": 0.10614418983459473 + }, + { + "epoch": 3.94500732421875e-05, + "model_forward_time": 0.024549484252929688, + "step": 25854 + }, + { + "epoch": 3.94500732421875e-05, + "step": 25854, + "training_step_time": 0.14817190170288086 + }, + { + "epoch": 3.945159912109375e-05, + "model_forward_time": 0.024013757705688477, + "step": 25855 + }, + { + "epoch": 3.945159912109375e-05, + "step": 25855, + "training_step_time": 0.1492147445678711 + }, + { + "epoch": 3.9453125e-05, + "model_forward_time": 0.024506330490112305, + "step": 25856 + }, + { + "epoch": 3.9453125e-05, + "step": 25856, + "training_step_time": 0.15873360633850098 + }, + { + "epoch": 3.945465087890625e-05, + "model_forward_time": 0.02424478530883789, + "step": 25857 + }, + { + "epoch": 3.945465087890625e-05, + "step": 25857, + "training_step_time": 0.14077019691467285 + }, + { + "epoch": 3.94561767578125e-05, + "model_forward_time": 0.023600339889526367, + "step": 25858 + }, + { + "epoch": 3.94561767578125e-05, + "step": 25858, + "training_step_time": 0.17223834991455078 + }, + { + "epoch": 3.945770263671875e-05, + "model_forward_time": 0.024563074111938477, + "step": 25859 + }, + { + "epoch": 3.945770263671875e-05, + "step": 25859, + "training_step_time": 0.13352560997009277 + }, + { + "epoch": 3.9459228515625e-05, + "grad_norm": 0.08645788580179214, + "learning_rate": 5.116822208206396e-06, + "loss": 0.0033, + "step": 25860 + }, + { + "epoch": 3.9459228515625e-05, + "model_forward_time": 0.024778127670288086, + "step": 25860 + }, + { + "epoch": 3.9459228515625e-05, + "step": 25860, + "training_step_time": 0.11933231353759766 + }, + { + "epoch": 3.946075439453125e-05, + "model_forward_time": 0.025158166885375977, + "step": 25861 + }, + { + "epoch": 3.946075439453125e-05, + "step": 25861, + "training_step_time": 0.12124156951904297 + }, + { + "epoch": 3.94622802734375e-05, + "model_forward_time": 0.025393009185791016, + "step": 25862 + }, + { + "epoch": 3.94622802734375e-05, + "step": 25862, + "training_step_time": 0.1193697452545166 + }, + { + "epoch": 3.946380615234375e-05, + "model_forward_time": 0.025251388549804688, + "step": 25863 + }, + { + "epoch": 3.946380615234375e-05, + "step": 25863, + "training_step_time": 0.11401009559631348 + }, + { + "epoch": 3.946533203125e-05, + "model_forward_time": 0.025269031524658203, + "step": 25864 + }, + { + "epoch": 3.946533203125e-05, + "step": 25864, + "training_step_time": 0.19955134391784668 + }, + { + "epoch": 3.946685791015625e-05, + "model_forward_time": 0.02443218231201172, + "step": 25865 + }, + { + "epoch": 3.946685791015625e-05, + "step": 25865, + "training_step_time": 0.10823512077331543 + }, + { + "epoch": 3.94683837890625e-05, + "model_forward_time": 0.02444624900817871, + "step": 25866 + }, + { + "epoch": 3.94683837890625e-05, + "step": 25866, + "training_step_time": 0.11129999160766602 + }, + { + "epoch": 3.946990966796875e-05, + "model_forward_time": 0.025321006774902344, + "step": 25867 + }, + { + "epoch": 3.946990966796875e-05, + "step": 25867, + "training_step_time": 0.16054272651672363 + }, + { + "epoch": 3.9471435546875e-05, + "model_forward_time": 0.025015830993652344, + "step": 25868 + }, + { + "epoch": 3.9471435546875e-05, + "step": 25868, + "training_step_time": 0.10740137100219727 + }, + { + "epoch": 3.947296142578125e-05, + "model_forward_time": 0.024646520614624023, + "step": 25869 + }, + { + "epoch": 3.947296142578125e-05, + "step": 25869, + "training_step_time": 0.10657501220703125 + }, + { + "epoch": 3.94744873046875e-05, + "grad_norm": 0.06746818125247955, + "learning_rate": 5.092561032536225e-06, + "loss": 0.0053, + "step": 25870 + }, + { + "epoch": 3.94744873046875e-05, + "model_forward_time": 0.02529740333557129, + "step": 25870 + }, + { + "epoch": 3.94744873046875e-05, + "step": 25870, + "training_step_time": 0.1047513484954834 + }, + { + "epoch": 3.947601318359375e-05, + "model_forward_time": 0.02526378631591797, + "step": 25871 + }, + { + "epoch": 3.947601318359375e-05, + "step": 25871, + "training_step_time": 0.10605978965759277 + }, + { + "epoch": 3.94775390625e-05, + "model_forward_time": 0.025127410888671875, + "step": 25872 + }, + { + "epoch": 3.94775390625e-05, + "step": 25872, + "training_step_time": 0.106475830078125 + }, + { + "epoch": 3.947906494140625e-05, + "model_forward_time": 0.025197744369506836, + "step": 25873 + }, + { + "epoch": 3.947906494140625e-05, + "step": 25873, + "training_step_time": 0.10813331604003906 + }, + { + "epoch": 3.94805908203125e-05, + "model_forward_time": 0.02519702911376953, + "step": 25874 + }, + { + "epoch": 3.94805908203125e-05, + "step": 25874, + "training_step_time": 0.1748976707458496 + }, + { + "epoch": 3.948211669921875e-05, + "model_forward_time": 0.024928569793701172, + "step": 25875 + }, + { + "epoch": 3.948211669921875e-05, + "step": 25875, + "training_step_time": 0.14101552963256836 + }, + { + "epoch": 3.9483642578125e-05, + "model_forward_time": 0.02435612678527832, + "step": 25876 + }, + { + "epoch": 3.9483642578125e-05, + "step": 25876, + "training_step_time": 0.10667800903320312 + }, + { + "epoch": 3.948516845703125e-05, + "model_forward_time": 0.025377750396728516, + "step": 25877 + }, + { + "epoch": 3.948516845703125e-05, + "step": 25877, + "training_step_time": 0.10916304588317871 + }, + { + "epoch": 3.94866943359375e-05, + "model_forward_time": 0.025196552276611328, + "step": 25878 + }, + { + "epoch": 3.94866943359375e-05, + "step": 25878, + "training_step_time": 0.11152124404907227 + }, + { + "epoch": 3.948822021484375e-05, + "model_forward_time": 0.025081396102905273, + "step": 25879 + }, + { + "epoch": 3.948822021484375e-05, + "step": 25879, + "training_step_time": 0.11188125610351562 + }, + { + "epoch": 3.948974609375e-05, + "grad_norm": 0.2724435329437256, + "learning_rate": 5.068354423638882e-06, + "loss": 0.0144, + "step": 25880 + }, + { + "epoch": 3.948974609375e-05, + "model_forward_time": 0.0252227783203125, + "step": 25880 + }, + { + "epoch": 3.948974609375e-05, + "step": 25880, + "training_step_time": 0.18145179748535156 + }, + { + "epoch": 3.949127197265625e-05, + "model_forward_time": 0.02443838119506836, + "step": 25881 + }, + { + "epoch": 3.949127197265625e-05, + "step": 25881, + "training_step_time": 0.1037139892578125 + }, + { + "epoch": 3.94927978515625e-05, + "model_forward_time": 0.02463984489440918, + "step": 25882 + }, + { + "epoch": 3.94927978515625e-05, + "step": 25882, + "training_step_time": 0.10824704170227051 + }, + { + "epoch": 3.949432373046875e-05, + "model_forward_time": 0.02535414695739746, + "step": 25883 + }, + { + "epoch": 3.949432373046875e-05, + "step": 25883, + "training_step_time": 0.10746097564697266 + }, + { + "epoch": 3.9495849609375e-05, + "model_forward_time": 0.024901628494262695, + "step": 25884 + }, + { + "epoch": 3.9495849609375e-05, + "step": 25884, + "training_step_time": 0.10793447494506836 + }, + { + "epoch": 3.949737548828125e-05, + "model_forward_time": 0.025607585906982422, + "step": 25885 + }, + { + "epoch": 3.949737548828125e-05, + "step": 25885, + "training_step_time": 0.10767006874084473 + }, + { + "epoch": 3.94989013671875e-05, + "model_forward_time": 0.02497100830078125, + "step": 25886 + }, + { + "epoch": 3.94989013671875e-05, + "step": 25886, + "training_step_time": 0.1054372787475586 + }, + { + "epoch": 3.950042724609375e-05, + "model_forward_time": 0.025310754776000977, + "step": 25887 + }, + { + "epoch": 3.950042724609375e-05, + "step": 25887, + "training_step_time": 0.10844683647155762 + }, + { + "epoch": 3.9501953125e-05, + "model_forward_time": 0.024905920028686523, + "step": 25888 + }, + { + "epoch": 3.9501953125e-05, + "step": 25888, + "training_step_time": 0.10787725448608398 + }, + { + "epoch": 3.950347900390625e-05, + "model_forward_time": 0.025113344192504883, + "step": 25889 + }, + { + "epoch": 3.950347900390625e-05, + "step": 25889, + "training_step_time": 0.10764265060424805 + }, + { + "epoch": 3.95050048828125e-05, + "grad_norm": 0.0626010000705719, + "learning_rate": 5.044202410927706e-06, + "loss": 0.0053, + "step": 25890 + }, + { + "epoch": 3.95050048828125e-05, + "model_forward_time": 0.025409221649169922, + "step": 25890 + }, + { + "epoch": 3.95050048828125e-05, + "step": 25890, + "training_step_time": 0.15995287895202637 + }, + { + "epoch": 3.950653076171875e-05, + "model_forward_time": 0.0246884822845459, + "step": 25891 + }, + { + "epoch": 3.950653076171875e-05, + "step": 25891, + "training_step_time": 0.23072195053100586 + }, + { + "epoch": 3.9508056640625e-05, + "model_forward_time": 0.024190664291381836, + "step": 25892 + }, + { + "epoch": 3.9508056640625e-05, + "step": 25892, + "training_step_time": 0.10637116432189941 + }, + { + "epoch": 3.950958251953125e-05, + "model_forward_time": 0.02459120750427246, + "step": 25893 + }, + { + "epoch": 3.950958251953125e-05, + "step": 25893, + "training_step_time": 0.12749242782592773 + }, + { + "epoch": 3.95111083984375e-05, + "model_forward_time": 0.02506875991821289, + "step": 25894 + }, + { + "epoch": 3.95111083984375e-05, + "step": 25894, + "training_step_time": 0.12562847137451172 + }, + { + "epoch": 3.951263427734375e-05, + "model_forward_time": 0.02498769760131836, + "step": 25895 + }, + { + "epoch": 3.951263427734375e-05, + "step": 25895, + "training_step_time": 0.11781740188598633 + }, + { + "epoch": 3.951416015625e-05, + "model_forward_time": 0.025946617126464844, + "step": 25896 + }, + { + "epoch": 3.951416015625e-05, + "step": 25896, + "training_step_time": 0.1454763412475586 + }, + { + "epoch": 3.951568603515625e-05, + "model_forward_time": 0.02486252784729004, + "step": 25897 + }, + { + "epoch": 3.951568603515625e-05, + "step": 25897, + "training_step_time": 0.10721755027770996 + }, + { + "epoch": 3.95172119140625e-05, + "model_forward_time": 0.0254514217376709, + "step": 25898 + }, + { + "epoch": 3.95172119140625e-05, + "step": 25898, + "training_step_time": 0.10650753974914551 + }, + { + "epoch": 3.951873779296875e-05, + "model_forward_time": 0.024816513061523438, + "step": 25899 + }, + { + "epoch": 3.951873779296875e-05, + "step": 25899, + "training_step_time": 0.11905241012573242 + }, + { + "epoch": 3.9520263671875e-05, + "grad_norm": 0.08930026739835739, + "learning_rate": 5.020105023749644e-06, + "loss": 0.0028, + "step": 25900 + }, + { + "epoch": 3.9520263671875e-05, + "model_forward_time": 0.025156259536743164, + "step": 25900 + }, + { + "epoch": 3.9520263671875e-05, + "step": 25900, + "training_step_time": 0.10870695114135742 + }, + { + "epoch": 3.952178955078125e-05, + "model_forward_time": 0.02541208267211914, + "step": 25901 + }, + { + "epoch": 3.952178955078125e-05, + "step": 25901, + "training_step_time": 0.11130738258361816 + }, + { + "epoch": 3.95233154296875e-05, + "model_forward_time": 0.025548696517944336, + "step": 25902 + }, + { + "epoch": 3.95233154296875e-05, + "step": 25902, + "training_step_time": 0.11432433128356934 + }, + { + "epoch": 3.952484130859375e-05, + "model_forward_time": 0.025470733642578125, + "step": 25903 + }, + { + "epoch": 3.952484130859375e-05, + "step": 25903, + "training_step_time": 0.11244487762451172 + }, + { + "epoch": 3.95263671875e-05, + "model_forward_time": 0.024974584579467773, + "step": 25904 + }, + { + "epoch": 3.95263671875e-05, + "step": 25904, + "training_step_time": 0.12554359436035156 + }, + { + "epoch": 3.952789306640625e-05, + "model_forward_time": 0.02522897720336914, + "step": 25905 + }, + { + "epoch": 3.952789306640625e-05, + "step": 25905, + "training_step_time": 0.12853384017944336 + }, + { + "epoch": 3.95294189453125e-05, + "model_forward_time": 0.025153398513793945, + "step": 25906 + }, + { + "epoch": 3.95294189453125e-05, + "step": 25906, + "training_step_time": 0.14028525352478027 + }, + { + "epoch": 3.953094482421875e-05, + "model_forward_time": 0.024993181228637695, + "step": 25907 + }, + { + "epoch": 3.953094482421875e-05, + "step": 25907, + "training_step_time": 0.17972278594970703 + }, + { + "epoch": 3.9532470703125e-05, + "model_forward_time": 0.024346113204956055, + "step": 25908 + }, + { + "epoch": 3.9532470703125e-05, + "step": 25908, + "training_step_time": 0.1316385269165039 + }, + { + "epoch": 3.953399658203125e-05, + "model_forward_time": 0.024237394332885742, + "step": 25909 + }, + { + "epoch": 3.953399658203125e-05, + "step": 25909, + "training_step_time": 0.12460756301879883 + }, + { + "epoch": 3.95355224609375e-05, + "grad_norm": 0.0735696479678154, + "learning_rate": 4.996062291385317e-06, + "loss": 0.0079, + "step": 25910 + }, + { + "epoch": 3.95355224609375e-05, + "model_forward_time": 0.024611711502075195, + "step": 25910 + }, + { + "epoch": 3.95355224609375e-05, + "step": 25910, + "training_step_time": 0.16543006896972656 + }, + { + "epoch": 3.953704833984375e-05, + "model_forward_time": 0.023830652236938477, + "step": 25911 + }, + { + "epoch": 3.953704833984375e-05, + "step": 25911, + "training_step_time": 0.1216738224029541 + }, + { + "epoch": 3.953857421875e-05, + "model_forward_time": 0.024100542068481445, + "step": 25912 + }, + { + "epoch": 3.953857421875e-05, + "step": 25912, + "training_step_time": 0.1915435791015625 + }, + { + "epoch": 3.954010009765625e-05, + "model_forward_time": 0.023758649826049805, + "step": 25913 + }, + { + "epoch": 3.954010009765625e-05, + "step": 25913, + "training_step_time": 0.10660505294799805 + }, + { + "epoch": 3.95416259765625e-05, + "model_forward_time": 0.024452686309814453, + "step": 25914 + }, + { + "epoch": 3.95416259765625e-05, + "step": 25914, + "training_step_time": 0.1056051254272461 + }, + { + "epoch": 3.954315185546875e-05, + "model_forward_time": 0.025722026824951172, + "step": 25915 + }, + { + "epoch": 3.954315185546875e-05, + "step": 25915, + "training_step_time": 0.10785388946533203 + }, + { + "epoch": 3.9544677734375e-05, + "model_forward_time": 0.0253298282623291, + "step": 25916 + }, + { + "epoch": 3.9544677734375e-05, + "step": 25916, + "training_step_time": 0.1070556640625 + }, + { + "epoch": 3.954620361328125e-05, + "model_forward_time": 0.025110483169555664, + "step": 25917 + }, + { + "epoch": 3.954620361328125e-05, + "step": 25917, + "training_step_time": 0.10883021354675293 + }, + { + "epoch": 3.95477294921875e-05, + "model_forward_time": 0.025153398513793945, + "step": 25918 + }, + { + "epoch": 3.95477294921875e-05, + "step": 25918, + "training_step_time": 0.10463094711303711 + }, + { + "epoch": 3.954925537109375e-05, + "model_forward_time": 0.02500152587890625, + "step": 25919 + }, + { + "epoch": 3.954925537109375e-05, + "step": 25919, + "training_step_time": 0.13227343559265137 + }, + { + "epoch": 3.955078125e-05, + "grad_norm": 0.304913729429245, + "learning_rate": 4.972074243048897e-06, + "loss": 0.005, + "step": 25920 + }, + { + "epoch": 3.955078125e-05, + "model_forward_time": 0.025394916534423828, + "step": 25920 + }, + { + "epoch": 3.955078125e-05, + "step": 25920, + "training_step_time": 0.13139581680297852 + }, + { + "epoch": 3.955230712890625e-05, + "model_forward_time": 0.024790287017822266, + "step": 25921 + }, + { + "epoch": 3.955230712890625e-05, + "step": 25921, + "training_step_time": 0.10678887367248535 + }, + { + "epoch": 3.95538330078125e-05, + "model_forward_time": 0.024930477142333984, + "step": 25922 + }, + { + "epoch": 3.95538330078125e-05, + "step": 25922, + "training_step_time": 0.10747551918029785 + }, + { + "epoch": 3.955535888671875e-05, + "model_forward_time": 0.025041580200195312, + "step": 25923 + }, + { + "epoch": 3.955535888671875e-05, + "step": 25923, + "training_step_time": 0.12308549880981445 + }, + { + "epoch": 3.9556884765625e-05, + "model_forward_time": 0.025037527084350586, + "step": 25924 + }, + { + "epoch": 3.9556884765625e-05, + "step": 25924, + "training_step_time": 0.10683393478393555 + }, + { + "epoch": 3.955841064453125e-05, + "model_forward_time": 0.025164365768432617, + "step": 25925 + }, + { + "epoch": 3.955841064453125e-05, + "step": 25925, + "training_step_time": 0.13396239280700684 + }, + { + "epoch": 3.95599365234375e-05, + "model_forward_time": 0.02528214454650879, + "step": 25926 + }, + { + "epoch": 3.95599365234375e-05, + "step": 25926, + "training_step_time": 0.12454581260681152 + }, + { + "epoch": 3.956146240234375e-05, + "model_forward_time": 0.025037765502929688, + "step": 25927 + }, + { + "epoch": 3.956146240234375e-05, + "step": 25927, + "training_step_time": 0.10522723197937012 + }, + { + "epoch": 3.956298828125e-05, + "model_forward_time": 0.024893760681152344, + "step": 25928 + }, + { + "epoch": 3.956298828125e-05, + "step": 25928, + "training_step_time": 0.11438608169555664 + }, + { + "epoch": 3.956451416015625e-05, + "model_forward_time": 0.02494525909423828, + "step": 25929 + }, + { + "epoch": 3.956451416015625e-05, + "step": 25929, + "training_step_time": 0.10952448844909668 + }, + { + "epoch": 3.95660400390625e-05, + "grad_norm": 0.08029066026210785, + "learning_rate": 4.948140907888121e-06, + "loss": 0.0034, + "step": 25930 + }, + { + "epoch": 3.95660400390625e-05, + "model_forward_time": 0.02507638931274414, + "step": 25930 + }, + { + "epoch": 3.95660400390625e-05, + "step": 25930, + "training_step_time": 0.1027231216430664 + }, + { + "epoch": 3.956756591796875e-05, + "model_forward_time": 0.025275707244873047, + "step": 25931 + }, + { + "epoch": 3.956756591796875e-05, + "step": 25931, + "training_step_time": 0.10364890098571777 + }, + { + "epoch": 3.9569091796875e-05, + "model_forward_time": 0.025071382522583008, + "step": 25932 + }, + { + "epoch": 3.9569091796875e-05, + "step": 25932, + "training_step_time": 0.1037895679473877 + }, + { + "epoch": 3.957061767578125e-05, + "model_forward_time": 0.025154590606689453, + "step": 25933 + }, + { + "epoch": 3.957061767578125e-05, + "step": 25933, + "training_step_time": 0.10785722732543945 + }, + { + "epoch": 3.95721435546875e-05, + "model_forward_time": 0.025271892547607422, + "step": 25934 + }, + { + "epoch": 3.95721435546875e-05, + "step": 25934, + "training_step_time": 0.1046285629272461 + }, + { + "epoch": 3.957366943359375e-05, + "model_forward_time": 0.025661230087280273, + "step": 25935 + }, + { + "epoch": 3.957366943359375e-05, + "step": 25935, + "training_step_time": 0.10770130157470703 + }, + { + "epoch": 3.95751953125e-05, + "model_forward_time": 0.025554656982421875, + "step": 25936 + }, + { + "epoch": 3.95751953125e-05, + "step": 25936, + "training_step_time": 0.10528850555419922 + }, + { + "epoch": 3.957672119140625e-05, + "model_forward_time": 0.025127410888671875, + "step": 25937 + }, + { + "epoch": 3.957672119140625e-05, + "step": 25937, + "training_step_time": 0.14862775802612305 + }, + { + "epoch": 3.95782470703125e-05, + "model_forward_time": 0.024837970733642578, + "step": 25938 + }, + { + "epoch": 3.95782470703125e-05, + "step": 25938, + "training_step_time": 0.10601663589477539 + }, + { + "epoch": 3.957977294921875e-05, + "model_forward_time": 0.024684667587280273, + "step": 25939 + }, + { + "epoch": 3.957977294921875e-05, + "step": 25939, + "training_step_time": 0.11926937103271484 + }, + { + "epoch": 3.9581298828125e-05, + "grad_norm": 0.0714607685804367, + "learning_rate": 4.924262314984262e-06, + "loss": 0.0022, + "step": 25940 + }, + { + "epoch": 3.9581298828125e-05, + "model_forward_time": 0.024963855743408203, + "step": 25940 + }, + { + "epoch": 3.9581298828125e-05, + "step": 25940, + "training_step_time": 0.13007211685180664 + }, + { + "epoch": 3.958282470703125e-05, + "model_forward_time": 0.02481245994567871, + "step": 25941 + }, + { + "epoch": 3.958282470703125e-05, + "step": 25941, + "training_step_time": 0.13599586486816406 + }, + { + "epoch": 3.95843505859375e-05, + "model_forward_time": 0.02435779571533203, + "step": 25942 + }, + { + "epoch": 3.95843505859375e-05, + "step": 25942, + "training_step_time": 0.12369418144226074 + }, + { + "epoch": 3.958587646484375e-05, + "model_forward_time": 0.02498030662536621, + "step": 25943 + }, + { + "epoch": 3.958587646484375e-05, + "step": 25943, + "training_step_time": 0.12888717651367188 + }, + { + "epoch": 3.958740234375e-05, + "model_forward_time": 0.02518939971923828, + "step": 25944 + }, + { + "epoch": 3.958740234375e-05, + "step": 25944, + "training_step_time": 0.1187584400177002 + }, + { + "epoch": 3.958892822265625e-05, + "model_forward_time": 0.02643132209777832, + "step": 25945 + }, + { + "epoch": 3.958892822265625e-05, + "step": 25945, + "training_step_time": 0.10482215881347656 + }, + { + "epoch": 3.95904541015625e-05, + "model_forward_time": 0.024663686752319336, + "step": 25946 + }, + { + "epoch": 3.95904541015625e-05, + "step": 25946, + "training_step_time": 0.10834193229675293 + }, + { + "epoch": 3.959197998046875e-05, + "model_forward_time": 0.025209903717041016, + "step": 25947 + }, + { + "epoch": 3.959197998046875e-05, + "step": 25947, + "training_step_time": 0.11103463172912598 + }, + { + "epoch": 3.9593505859375e-05, + "model_forward_time": 0.025087833404541016, + "step": 25948 + }, + { + "epoch": 3.9593505859375e-05, + "step": 25948, + "training_step_time": 0.10713934898376465 + }, + { + "epoch": 3.959503173828125e-05, + "model_forward_time": 0.026057004928588867, + "step": 25949 + }, + { + "epoch": 3.959503173828125e-05, + "step": 25949, + "training_step_time": 0.1267542839050293 + }, + { + "epoch": 3.95965576171875e-05, + "grad_norm": 0.07329155504703522, + "learning_rate": 4.900438493352055e-06, + "loss": 0.0059, + "step": 25950 + }, + { + "epoch": 3.95965576171875e-05, + "model_forward_time": 0.026989459991455078, + "step": 25950 + }, + { + "epoch": 3.95965576171875e-05, + "step": 25950, + "training_step_time": 0.13417577743530273 + }, + { + "epoch": 3.959808349609375e-05, + "model_forward_time": 0.02520275115966797, + "step": 25951 + }, + { + "epoch": 3.959808349609375e-05, + "step": 25951, + "training_step_time": 0.11925911903381348 + }, + { + "epoch": 3.9599609375e-05, + "model_forward_time": 0.025282621383666992, + "step": 25952 + }, + { + "epoch": 3.9599609375e-05, + "step": 25952, + "training_step_time": 0.10979390144348145 + }, + { + "epoch": 3.960113525390625e-05, + "model_forward_time": 0.0253143310546875, + "step": 25953 + }, + { + "epoch": 3.960113525390625e-05, + "step": 25953, + "training_step_time": 0.1065518856048584 + }, + { + "epoch": 3.96026611328125e-05, + "model_forward_time": 0.02487921714782715, + "step": 25954 + }, + { + "epoch": 3.96026611328125e-05, + "step": 25954, + "training_step_time": 0.13965511322021484 + }, + { + "epoch": 3.960418701171875e-05, + "model_forward_time": 0.024860620498657227, + "step": 25955 + }, + { + "epoch": 3.960418701171875e-05, + "step": 25955, + "training_step_time": 0.1117701530456543 + }, + { + "epoch": 3.9605712890625e-05, + "model_forward_time": 0.0248110294342041, + "step": 25956 + }, + { + "epoch": 3.9605712890625e-05, + "step": 25956, + "training_step_time": 0.1071329116821289 + }, + { + "epoch": 3.960723876953125e-05, + "model_forward_time": 0.024823665618896484, + "step": 25957 + }, + { + "epoch": 3.960723876953125e-05, + "step": 25957, + "training_step_time": 0.11965560913085938 + }, + { + "epoch": 3.96087646484375e-05, + "model_forward_time": 0.024296998977661133, + "step": 25958 + }, + { + "epoch": 3.96087646484375e-05, + "step": 25958, + "training_step_time": 0.10968160629272461 + }, + { + "epoch": 3.961029052734375e-05, + "model_forward_time": 0.025357484817504883, + "step": 25959 + }, + { + "epoch": 3.961029052734375e-05, + "step": 25959, + "training_step_time": 0.10692405700683594 + }, + { + "epoch": 3.961181640625e-05, + "grad_norm": 0.0413772389292717, + "learning_rate": 4.8766694719396875e-06, + "loss": 0.0062, + "step": 25960 + }, + { + "epoch": 3.961181640625e-05, + "model_forward_time": 0.024907827377319336, + "step": 25960 + }, + { + "epoch": 3.961181640625e-05, + "step": 25960, + "training_step_time": 0.18484234809875488 + }, + { + "epoch": 3.961334228515625e-05, + "model_forward_time": 0.02427530288696289, + "step": 25961 + }, + { + "epoch": 3.961334228515625e-05, + "step": 25961, + "training_step_time": 0.1016690731048584 + }, + { + "epoch": 3.96148681640625e-05, + "model_forward_time": 0.024558067321777344, + "step": 25962 + }, + { + "epoch": 3.96148681640625e-05, + "step": 25962, + "training_step_time": 0.10198664665222168 + }, + { + "epoch": 3.961639404296875e-05, + "model_forward_time": 0.024968624114990234, + "step": 25963 + }, + { + "epoch": 3.961639404296875e-05, + "step": 25963, + "training_step_time": 0.10433244705200195 + }, + { + "epoch": 3.9617919921875e-05, + "model_forward_time": 0.024876832962036133, + "step": 25964 + }, + { + "epoch": 3.9617919921875e-05, + "step": 25964, + "training_step_time": 0.11409640312194824 + }, + { + "epoch": 3.961944580078125e-05, + "model_forward_time": 0.024922847747802734, + "step": 25965 + }, + { + "epoch": 3.961944580078125e-05, + "step": 25965, + "training_step_time": 0.11059832572937012 + }, + { + "epoch": 3.96209716796875e-05, + "model_forward_time": 0.025029420852661133, + "step": 25966 + }, + { + "epoch": 3.96209716796875e-05, + "step": 25966, + "training_step_time": 0.1095435619354248 + }, + { + "epoch": 3.962249755859375e-05, + "model_forward_time": 0.025345802307128906, + "step": 25967 + }, + { + "epoch": 3.962249755859375e-05, + "step": 25967, + "training_step_time": 0.16017556190490723 + }, + { + "epoch": 3.96240234375e-05, + "model_forward_time": 0.024622201919555664, + "step": 25968 + }, + { + "epoch": 3.96240234375e-05, + "step": 25968, + "training_step_time": 0.17154526710510254 + }, + { + "epoch": 3.962554931640625e-05, + "model_forward_time": 0.0243685245513916, + "step": 25969 + }, + { + "epoch": 3.962554931640625e-05, + "step": 25969, + "training_step_time": 0.11273956298828125 + }, + { + "epoch": 3.96270751953125e-05, + "grad_norm": 0.13631302118301392, + "learning_rate": 4.852955279628768e-06, + "loss": 0.0046, + "step": 25970 + }, + { + "epoch": 3.96270751953125e-05, + "model_forward_time": 0.024259090423583984, + "step": 25970 + }, + { + "epoch": 3.96270751953125e-05, + "step": 25970, + "training_step_time": 0.109344482421875 + }, + { + "epoch": 3.962860107421875e-05, + "model_forward_time": 0.024977445602416992, + "step": 25971 + }, + { + "epoch": 3.962860107421875e-05, + "step": 25971, + "training_step_time": 0.11905670166015625 + }, + { + "epoch": 3.9630126953125e-05, + "model_forward_time": 0.024840593338012695, + "step": 25972 + }, + { + "epoch": 3.9630126953125e-05, + "step": 25972, + "training_step_time": 0.11056184768676758 + }, + { + "epoch": 3.963165283203125e-05, + "model_forward_time": 0.024866104125976562, + "step": 25973 + }, + { + "epoch": 3.963165283203125e-05, + "step": 25973, + "training_step_time": 0.11102890968322754 + }, + { + "epoch": 3.96331787109375e-05, + "model_forward_time": 0.025064706802368164, + "step": 25974 + }, + { + "epoch": 3.96331787109375e-05, + "step": 25974, + "training_step_time": 0.11691665649414062 + }, + { + "epoch": 3.963470458984375e-05, + "model_forward_time": 0.024753332138061523, + "step": 25975 + }, + { + "epoch": 3.963470458984375e-05, + "step": 25975, + "training_step_time": 0.10817646980285645 + }, + { + "epoch": 3.963623046875e-05, + "model_forward_time": 0.02526068687438965, + "step": 25976 + }, + { + "epoch": 3.963623046875e-05, + "step": 25976, + "training_step_time": 0.10428309440612793 + }, + { + "epoch": 3.963775634765625e-05, + "model_forward_time": 0.02501201629638672, + "step": 25977 + }, + { + "epoch": 3.963775634765625e-05, + "step": 25977, + "training_step_time": 0.10531401634216309 + }, + { + "epoch": 3.96392822265625e-05, + "model_forward_time": 0.024677753448486328, + "step": 25978 + }, + { + "epoch": 3.96392822265625e-05, + "step": 25978, + "training_step_time": 0.10532999038696289 + }, + { + "epoch": 3.964080810546875e-05, + "model_forward_time": 0.02463674545288086, + "step": 25979 + }, + { + "epoch": 3.964080810546875e-05, + "step": 25979, + "training_step_time": 0.10957884788513184 + }, + { + "epoch": 3.9642333984375e-05, + "grad_norm": 0.22917073965072632, + "learning_rate": 4.829295945234258e-06, + "loss": 0.0043, + "step": 25980 + }, + { + "epoch": 3.9642333984375e-05, + "model_forward_time": 0.02520012855529785, + "step": 25980 + }, + { + "epoch": 3.9642333984375e-05, + "step": 25980, + "training_step_time": 0.10792231559753418 + }, + { + "epoch": 3.964385986328125e-05, + "model_forward_time": 0.025702953338623047, + "step": 25981 + }, + { + "epoch": 3.964385986328125e-05, + "step": 25981, + "training_step_time": 0.10661911964416504 + }, + { + "epoch": 3.96453857421875e-05, + "model_forward_time": 0.025025606155395508, + "step": 25982 + }, + { + "epoch": 3.96453857421875e-05, + "step": 25982, + "training_step_time": 0.10843038558959961 + }, + { + "epoch": 3.964691162109375e-05, + "model_forward_time": 0.025310277938842773, + "step": 25983 + }, + { + "epoch": 3.964691162109375e-05, + "step": 25983, + "training_step_time": 0.11055278778076172 + }, + { + "epoch": 3.96484375e-05, + "model_forward_time": 0.024824857711791992, + "step": 25984 + }, + { + "epoch": 3.96484375e-05, + "step": 25984, + "training_step_time": 0.10470247268676758 + }, + { + "epoch": 3.964996337890625e-05, + "model_forward_time": 0.025389909744262695, + "step": 25985 + }, + { + "epoch": 3.964996337890625e-05, + "step": 25985, + "training_step_time": 0.1932811737060547 + }, + { + "epoch": 3.96514892578125e-05, + "model_forward_time": 0.02440667152404785, + "step": 25986 + }, + { + "epoch": 3.96514892578125e-05, + "step": 25986, + "training_step_time": 0.1116487979888916 + }, + { + "epoch": 3.965301513671875e-05, + "model_forward_time": 0.024024486541748047, + "step": 25987 + }, + { + "epoch": 3.965301513671875e-05, + "step": 25987, + "training_step_time": 0.10891580581665039 + }, + { + "epoch": 3.9654541015625e-05, + "model_forward_time": 0.025049686431884766, + "step": 25988 + }, + { + "epoch": 3.9654541015625e-05, + "step": 25988, + "training_step_time": 0.12834739685058594 + }, + { + "epoch": 3.965606689453125e-05, + "model_forward_time": 0.024780988693237305, + "step": 25989 + }, + { + "epoch": 3.965606689453125e-05, + "step": 25989, + "training_step_time": 0.12465190887451172 + }, + { + "epoch": 3.96575927734375e-05, + "grad_norm": 0.06205694004893303, + "learning_rate": 4.805691497504505e-06, + "loss": 0.0045, + "step": 25990 + }, + { + "epoch": 3.96575927734375e-05, + "model_forward_time": 0.024813413619995117, + "step": 25990 + }, + { + "epoch": 3.96575927734375e-05, + "step": 25990, + "training_step_time": 0.10761165618896484 + }, + { + "epoch": 3.965911865234375e-05, + "model_forward_time": 0.025182247161865234, + "step": 25991 + }, + { + "epoch": 3.965911865234375e-05, + "step": 25991, + "training_step_time": 0.12384939193725586 + }, + { + "epoch": 3.966064453125e-05, + "model_forward_time": 0.02520585060119629, + "step": 25992 + }, + { + "epoch": 3.966064453125e-05, + "step": 25992, + "training_step_time": 0.10612058639526367 + }, + { + "epoch": 3.966217041015625e-05, + "model_forward_time": 0.0250701904296875, + "step": 25993 + }, + { + "epoch": 3.966217041015625e-05, + "step": 25993, + "training_step_time": 0.1062781810760498 + }, + { + "epoch": 3.96636962890625e-05, + "model_forward_time": 0.024768352508544922, + "step": 25994 + }, + { + "epoch": 3.96636962890625e-05, + "step": 25994, + "training_step_time": 0.11225390434265137 + }, + { + "epoch": 3.966522216796875e-05, + "model_forward_time": 0.024724721908569336, + "step": 25995 + }, + { + "epoch": 3.966522216796875e-05, + "step": 25995, + "training_step_time": 0.1076667308807373 + }, + { + "epoch": 3.9666748046875e-05, + "model_forward_time": 0.025886058807373047, + "step": 25996 + }, + { + "epoch": 3.9666748046875e-05, + "step": 25996, + "training_step_time": 0.11101484298706055 + }, + { + "epoch": 3.966827392578125e-05, + "model_forward_time": 0.025547027587890625, + "step": 25997 + }, + { + "epoch": 3.966827392578125e-05, + "step": 25997, + "training_step_time": 0.11372542381286621 + }, + { + "epoch": 3.96697998046875e-05, + "model_forward_time": 0.02540874481201172, + "step": 25998 + }, + { + "epoch": 3.96697998046875e-05, + "step": 25998, + "training_step_time": 0.10763955116271973 + }, + { + "epoch": 3.967132568359375e-05, + "model_forward_time": 0.025618791580200195, + "step": 25999 + }, + { + "epoch": 3.967132568359375e-05, + "step": 25999, + "training_step_time": 0.12445354461669922 + }, + { + "epoch": 3.96728515625e-05, + "grad_norm": 0.12980355322360992, + "learning_rate": 4.782141965121128e-06, + "loss": 0.0042, + "step": 26000 + }, + { + "epoch": 3.96728515625e-05, + "model_forward_time": 0.026137351989746094, + "step": 26000 + }, + { + "epoch": 3.96728515625e-05, + "step": 26000, + "training_step_time": 0.10444116592407227 + }, + { + "epoch": 3.967437744140625e-05, + "model_forward_time": 0.02349567413330078, + "step": 26001 + }, + { + "epoch": 3.967437744140625e-05, + "step": 26001, + "training_step_time": 0.13078618049621582 + }, + { + "epoch": 3.96759033203125e-05, + "model_forward_time": 0.024665117263793945, + "step": 26002 + }, + { + "epoch": 3.96759033203125e-05, + "step": 26002, + "training_step_time": 0.1827259063720703 + }, + { + "epoch": 3.967742919921875e-05, + "model_forward_time": 0.02470684051513672, + "step": 26003 + }, + { + "epoch": 3.967742919921875e-05, + "step": 26003, + "training_step_time": 0.11550045013427734 + }, + { + "epoch": 3.9678955078125e-05, + "model_forward_time": 0.026877403259277344, + "step": 26004 + }, + { + "epoch": 3.9678955078125e-05, + "step": 26004, + "training_step_time": 0.2109205722808838 + }, + { + "epoch": 3.968048095703125e-05, + "model_forward_time": 0.024364471435546875, + "step": 26005 + }, + { + "epoch": 3.968048095703125e-05, + "step": 26005, + "training_step_time": 0.11206769943237305 + }, + { + "epoch": 3.96820068359375e-05, + "model_forward_time": 0.025311708450317383, + "step": 26006 + }, + { + "epoch": 3.96820068359375e-05, + "step": 26006, + "training_step_time": 0.10742998123168945 + }, + { + "epoch": 3.968353271484375e-05, + "model_forward_time": 0.02502894401550293, + "step": 26007 + }, + { + "epoch": 3.968353271484375e-05, + "step": 26007, + "training_step_time": 0.1581413745880127 + }, + { + "epoch": 3.968505859375e-05, + "model_forward_time": 0.02397918701171875, + "step": 26008 + }, + { + "epoch": 3.968505859375e-05, + "step": 26008, + "training_step_time": 0.1049644947052002 + }, + { + "epoch": 3.968658447265625e-05, + "model_forward_time": 0.024847745895385742, + "step": 26009 + }, + { + "epoch": 3.968658447265625e-05, + "step": 26009, + "training_step_time": 0.10390734672546387 + }, + { + "epoch": 3.96881103515625e-05, + "grad_norm": 0.1348857283592224, + "learning_rate": 4.758647376699032e-06, + "loss": 0.0082, + "step": 26010 + }, + { + "epoch": 3.96881103515625e-05, + "model_forward_time": 0.025327682495117188, + "step": 26010 + }, + { + "epoch": 3.96881103515625e-05, + "step": 26010, + "training_step_time": 0.10662007331848145 + }, + { + "epoch": 3.968963623046875e-05, + "model_forward_time": 0.025835275650024414, + "step": 26011 + }, + { + "epoch": 3.968963623046875e-05, + "step": 26011, + "training_step_time": 0.10666418075561523 + }, + { + "epoch": 3.9691162109375e-05, + "model_forward_time": 0.02539539337158203, + "step": 26012 + }, + { + "epoch": 3.9691162109375e-05, + "step": 26012, + "training_step_time": 0.10662555694580078 + }, + { + "epoch": 3.969268798828125e-05, + "model_forward_time": 0.02554488182067871, + "step": 26013 + }, + { + "epoch": 3.969268798828125e-05, + "step": 26013, + "training_step_time": 0.1114494800567627 + }, + { + "epoch": 3.96942138671875e-05, + "model_forward_time": 0.025473833084106445, + "step": 26014 + }, + { + "epoch": 3.96942138671875e-05, + "step": 26014, + "training_step_time": 0.12279200553894043 + }, + { + "epoch": 3.969573974609375e-05, + "model_forward_time": 0.024950027465820312, + "step": 26015 + }, + { + "epoch": 3.969573974609375e-05, + "step": 26015, + "training_step_time": 0.1349194049835205 + }, + { + "epoch": 3.9697265625e-05, + "model_forward_time": 0.024435758590698242, + "step": 26016 + }, + { + "epoch": 3.9697265625e-05, + "step": 26016, + "training_step_time": 0.21700835227966309 + }, + { + "epoch": 3.969879150390625e-05, + "model_forward_time": 0.024574756622314453, + "step": 26017 + }, + { + "epoch": 3.969879150390625e-05, + "step": 26017, + "training_step_time": 0.11485719680786133 + }, + { + "epoch": 3.97003173828125e-05, + "model_forward_time": 0.024589061737060547, + "step": 26018 + }, + { + "epoch": 3.97003173828125e-05, + "step": 26018, + "training_step_time": 0.11760711669921875 + }, + { + "epoch": 3.970184326171875e-05, + "model_forward_time": 0.02534961700439453, + "step": 26019 + }, + { + "epoch": 3.970184326171875e-05, + "step": 26019, + "training_step_time": 0.16478848457336426 + }, + { + "epoch": 3.9703369140625e-05, + "grad_norm": 0.0655096098780632, + "learning_rate": 4.7352077607863475e-06, + "loss": 0.0036, + "step": 26020 + }, + { + "epoch": 3.9703369140625e-05, + "model_forward_time": 0.02577376365661621, + "step": 26020 + }, + { + "epoch": 3.9703369140625e-05, + "step": 26020, + "training_step_time": 0.13031339645385742 + }, + { + "epoch": 3.970489501953125e-05, + "model_forward_time": 0.024147987365722656, + "step": 26021 + }, + { + "epoch": 3.970489501953125e-05, + "step": 26021, + "training_step_time": 0.10988593101501465 + }, + { + "epoch": 3.97064208984375e-05, + "model_forward_time": 0.025187015533447266, + "step": 26022 + }, + { + "epoch": 3.97064208984375e-05, + "step": 26022, + "training_step_time": 0.10748648643493652 + }, + { + "epoch": 3.970794677734375e-05, + "model_forward_time": 0.024553298950195312, + "step": 26023 + }, + { + "epoch": 3.970794677734375e-05, + "step": 26023, + "training_step_time": 0.10675692558288574 + }, + { + "epoch": 3.970947265625e-05, + "model_forward_time": 0.02523493766784668, + "step": 26024 + }, + { + "epoch": 3.970947265625e-05, + "step": 26024, + "training_step_time": 0.10614490509033203 + }, + { + "epoch": 3.971099853515625e-05, + "model_forward_time": 0.025247812271118164, + "step": 26025 + }, + { + "epoch": 3.971099853515625e-05, + "step": 26025, + "training_step_time": 0.10645580291748047 + }, + { + "epoch": 3.97125244140625e-05, + "model_forward_time": 0.024952173233032227, + "step": 26026 + }, + { + "epoch": 3.97125244140625e-05, + "step": 26026, + "training_step_time": 0.10694742202758789 + }, + { + "epoch": 3.971405029296875e-05, + "model_forward_time": 0.02524423599243164, + "step": 26027 + }, + { + "epoch": 3.971405029296875e-05, + "step": 26027, + "training_step_time": 0.10592460632324219 + }, + { + "epoch": 3.9715576171875e-05, + "model_forward_time": 0.025447845458984375, + "step": 26028 + }, + { + "epoch": 3.9715576171875e-05, + "step": 26028, + "training_step_time": 0.10930728912353516 + }, + { + "epoch": 3.971710205078125e-05, + "model_forward_time": 0.02537393569946289, + "step": 26029 + }, + { + "epoch": 3.971710205078125e-05, + "step": 26029, + "training_step_time": 0.10821032524108887 + }, + { + "epoch": 3.97186279296875e-05, + "grad_norm": 0.3153933584690094, + "learning_rate": 4.711823145864419e-06, + "loss": 0.0058, + "step": 26030 + }, + { + "epoch": 3.97186279296875e-05, + "model_forward_time": 0.025151491165161133, + "step": 26030 + }, + { + "epoch": 3.97186279296875e-05, + "step": 26030, + "training_step_time": 0.10540318489074707 + }, + { + "epoch": 3.972015380859375e-05, + "model_forward_time": 0.02481532096862793, + "step": 26031 + }, + { + "epoch": 3.972015380859375e-05, + "step": 26031, + "training_step_time": 0.10720038414001465 + }, + { + "epoch": 3.97216796875e-05, + "model_forward_time": 0.024936437606811523, + "step": 26032 + }, + { + "epoch": 3.97216796875e-05, + "step": 26032, + "training_step_time": 0.10388326644897461 + }, + { + "epoch": 3.972320556640625e-05, + "model_forward_time": 0.025258302688598633, + "step": 26033 + }, + { + "epoch": 3.972320556640625e-05, + "step": 26033, + "training_step_time": 0.1055140495300293 + }, + { + "epoch": 3.97247314453125e-05, + "model_forward_time": 0.02509784698486328, + "step": 26034 + }, + { + "epoch": 3.97247314453125e-05, + "step": 26034, + "training_step_time": 0.12137413024902344 + }, + { + "epoch": 3.972625732421875e-05, + "model_forward_time": 0.025412321090698242, + "step": 26035 + }, + { + "epoch": 3.972625732421875e-05, + "step": 26035, + "training_step_time": 0.11072444915771484 + }, + { + "epoch": 3.9727783203125e-05, + "model_forward_time": 0.025491714477539062, + "step": 26036 + }, + { + "epoch": 3.9727783203125e-05, + "step": 26036, + "training_step_time": 0.11000943183898926 + }, + { + "epoch": 3.972930908203125e-05, + "model_forward_time": 0.03134632110595703, + "step": 26037 + }, + { + "epoch": 3.972930908203125e-05, + "step": 26037, + "training_step_time": 0.12406206130981445 + }, + { + "epoch": 3.97308349609375e-05, + "model_forward_time": 0.02571725845336914, + "step": 26038 + }, + { + "epoch": 3.97308349609375e-05, + "step": 26038, + "training_step_time": 0.13384413719177246 + }, + { + "epoch": 3.973236083984375e-05, + "model_forward_time": 0.025203466415405273, + "step": 26039 + }, + { + "epoch": 3.973236083984375e-05, + "step": 26039, + "training_step_time": 0.11167407035827637 + }, + { + "epoch": 3.973388671875e-05, + "grad_norm": 0.22465716302394867, + "learning_rate": 4.688493560347773e-06, + "loss": 0.0074, + "step": 26040 + }, + { + "epoch": 3.973388671875e-05, + "model_forward_time": 0.025099754333496094, + "step": 26040 + }, + { + "epoch": 3.973388671875e-05, + "step": 26040, + "training_step_time": 0.10657453536987305 + }, + { + "epoch": 3.973541259765625e-05, + "model_forward_time": 0.0262906551361084, + "step": 26041 + }, + { + "epoch": 3.973541259765625e-05, + "step": 26041, + "training_step_time": 0.10577845573425293 + }, + { + "epoch": 3.97369384765625e-05, + "model_forward_time": 0.02516031265258789, + "step": 26042 + }, + { + "epoch": 3.97369384765625e-05, + "step": 26042, + "training_step_time": 0.10335135459899902 + }, + { + "epoch": 3.973846435546875e-05, + "model_forward_time": 0.025517702102661133, + "step": 26043 + }, + { + "epoch": 3.973846435546875e-05, + "step": 26043, + "training_step_time": 0.10529327392578125 + }, + { + "epoch": 3.9739990234375e-05, + "model_forward_time": 0.02576899528503418, + "step": 26044 + }, + { + "epoch": 3.9739990234375e-05, + "step": 26044, + "training_step_time": 0.5889902114868164 + }, + { + "epoch": 3.974151611328125e-05, + "model_forward_time": 0.02310466766357422, + "step": 26045 + }, + { + "epoch": 3.974151611328125e-05, + "step": 26045, + "training_step_time": 0.1740434169769287 + }, + { + "epoch": 3.97430419921875e-05, + "model_forward_time": 0.02442336082458496, + "step": 26046 + }, + { + "epoch": 3.97430419921875e-05, + "step": 26046, + "training_step_time": 0.1050863265991211 + }, + { + "epoch": 3.974456787109375e-05, + "model_forward_time": 0.024844884872436523, + "step": 26047 + }, + { + "epoch": 3.974456787109375e-05, + "step": 26047, + "training_step_time": 0.16608238220214844 + }, + { + "epoch": 3.974609375e-05, + "model_forward_time": 0.024823665618896484, + "step": 26048 + }, + { + "epoch": 3.974609375e-05, + "step": 26048, + "training_step_time": 0.14832162857055664 + }, + { + "epoch": 3.974761962890625e-05, + "model_forward_time": 0.024488210678100586, + "step": 26049 + }, + { + "epoch": 3.974761962890625e-05, + "step": 26049, + "training_step_time": 0.11818242073059082 + }, + { + "epoch": 3.97491455078125e-05, + "grad_norm": 0.05569394305348396, + "learning_rate": 4.66521903258404e-06, + "loss": 0.0059, + "step": 26050 + }, + { + "epoch": 3.97491455078125e-05, + "model_forward_time": 0.024953126907348633, + "step": 26050 + }, + { + "epoch": 3.97491455078125e-05, + "step": 26050, + "training_step_time": 0.15697765350341797 + }, + { + "epoch": 3.975067138671875e-05, + "model_forward_time": 0.024326086044311523, + "step": 26051 + }, + { + "epoch": 3.975067138671875e-05, + "step": 26051, + "training_step_time": 0.10287904739379883 + }, + { + "epoch": 3.9752197265625e-05, + "model_forward_time": 0.024932861328125, + "step": 26052 + }, + { + "epoch": 3.9752197265625e-05, + "step": 26052, + "training_step_time": 0.10367321968078613 + }, + { + "epoch": 3.975372314453125e-05, + "model_forward_time": 0.025317907333374023, + "step": 26053 + }, + { + "epoch": 3.975372314453125e-05, + "step": 26053, + "training_step_time": 0.10651707649230957 + }, + { + "epoch": 3.97552490234375e-05, + "model_forward_time": 0.02503681182861328, + "step": 26054 + }, + { + "epoch": 3.97552490234375e-05, + "step": 26054, + "training_step_time": 0.10993552207946777 + }, + { + "epoch": 3.975677490234375e-05, + "model_forward_time": 0.024713516235351562, + "step": 26055 + }, + { + "epoch": 3.975677490234375e-05, + "step": 26055, + "training_step_time": 0.10696077346801758 + }, + { + "epoch": 3.975830078125e-05, + "model_forward_time": 0.028592348098754883, + "step": 26056 + }, + { + "epoch": 3.975830078125e-05, + "step": 26056, + "training_step_time": 0.10900521278381348 + }, + { + "epoch": 3.975982666015625e-05, + "model_forward_time": 0.0252683162689209, + "step": 26057 + }, + { + "epoch": 3.975982666015625e-05, + "step": 26057, + "training_step_time": 0.1077885627746582 + }, + { + "epoch": 3.97613525390625e-05, + "model_forward_time": 0.02539658546447754, + "step": 26058 + }, + { + "epoch": 3.97613525390625e-05, + "step": 26058, + "training_step_time": 0.10639500617980957 + }, + { + "epoch": 3.976287841796875e-05, + "model_forward_time": 0.025281190872192383, + "step": 26059 + }, + { + "epoch": 3.976287841796875e-05, + "step": 26059, + "training_step_time": 0.13728713989257812 + }, + { + "epoch": 3.9764404296875e-05, + "grad_norm": 0.06518325209617615, + "learning_rate": 4.64199959085398e-06, + "loss": 0.0031, + "step": 26060 + }, + { + "epoch": 3.9764404296875e-05, + "model_forward_time": 0.024903297424316406, + "step": 26060 + }, + { + "epoch": 3.9764404296875e-05, + "step": 26060, + "training_step_time": 0.13059306144714355 + }, + { + "epoch": 3.976593017578125e-05, + "model_forward_time": 0.02465987205505371, + "step": 26061 + }, + { + "epoch": 3.976593017578125e-05, + "step": 26061, + "training_step_time": 0.10272479057312012 + }, + { + "epoch": 3.97674560546875e-05, + "model_forward_time": 0.02546215057373047, + "step": 26062 + }, + { + "epoch": 3.97674560546875e-05, + "step": 26062, + "training_step_time": 0.11530399322509766 + }, + { + "epoch": 3.976898193359375e-05, + "model_forward_time": 0.02499675750732422, + "step": 26063 + }, + { + "epoch": 3.976898193359375e-05, + "step": 26063, + "training_step_time": 0.11090779304504395 + }, + { + "epoch": 3.97705078125e-05, + "model_forward_time": 0.02827739715576172, + "step": 26064 + }, + { + "epoch": 3.97705078125e-05, + "step": 26064, + "training_step_time": 0.10848402976989746 + }, + { + "epoch": 3.977203369140625e-05, + "model_forward_time": 0.025260448455810547, + "step": 26065 + }, + { + "epoch": 3.977203369140625e-05, + "step": 26065, + "training_step_time": 0.1930980682373047 + }, + { + "epoch": 3.97735595703125e-05, + "model_forward_time": 0.024288654327392578, + "step": 26066 + }, + { + "epoch": 3.97735595703125e-05, + "step": 26066, + "training_step_time": 0.10209774971008301 + }, + { + "epoch": 3.977508544921875e-05, + "model_forward_time": 0.0244748592376709, + "step": 26067 + }, + { + "epoch": 3.977508544921875e-05, + "step": 26067, + "training_step_time": 0.10619449615478516 + }, + { + "epoch": 3.9776611328125e-05, + "model_forward_time": 0.02485489845275879, + "step": 26068 + }, + { + "epoch": 3.9776611328125e-05, + "step": 26068, + "training_step_time": 0.10968208312988281 + }, + { + "epoch": 3.977813720703125e-05, + "model_forward_time": 0.027309417724609375, + "step": 26069 + }, + { + "epoch": 3.977813720703125e-05, + "step": 26069, + "training_step_time": 0.11208653450012207 + }, + { + "epoch": 3.97796630859375e-05, + "grad_norm": 0.1016891598701477, + "learning_rate": 4.618835263371396e-06, + "loss": 0.0075, + "step": 26070 + }, + { + "epoch": 3.97796630859375e-05, + "model_forward_time": 0.025282621383666992, + "step": 26070 + }, + { + "epoch": 3.97796630859375e-05, + "step": 26070, + "training_step_time": 0.11693763732910156 + }, + { + "epoch": 3.978118896484375e-05, + "model_forward_time": 0.02524876594543457, + "step": 26071 + }, + { + "epoch": 3.978118896484375e-05, + "step": 26071, + "training_step_time": 0.11122441291809082 + }, + { + "epoch": 3.978271484375e-05, + "model_forward_time": 0.025646686553955078, + "step": 26072 + }, + { + "epoch": 3.978271484375e-05, + "step": 26072, + "training_step_time": 0.1073143482208252 + }, + { + "epoch": 3.978424072265625e-05, + "model_forward_time": 0.02508234977722168, + "step": 26073 + }, + { + "epoch": 3.978424072265625e-05, + "step": 26073, + "training_step_time": 0.11060881614685059 + }, + { + "epoch": 3.97857666015625e-05, + "model_forward_time": 0.02503681182861328, + "step": 26074 + }, + { + "epoch": 3.97857666015625e-05, + "step": 26074, + "training_step_time": 0.1115560531616211 + }, + { + "epoch": 3.978729248046875e-05, + "model_forward_time": 0.02522110939025879, + "step": 26075 + }, + { + "epoch": 3.978729248046875e-05, + "step": 26075, + "training_step_time": 0.10809612274169922 + }, + { + "epoch": 3.9788818359375e-05, + "model_forward_time": 0.025020599365234375, + "step": 26076 + }, + { + "epoch": 3.9788818359375e-05, + "step": 26076, + "training_step_time": 0.10836076736450195 + }, + { + "epoch": 3.979034423828125e-05, + "model_forward_time": 0.02520895004272461, + "step": 26077 + }, + { + "epoch": 3.979034423828125e-05, + "step": 26077, + "training_step_time": 0.10910201072692871 + }, + { + "epoch": 3.97918701171875e-05, + "model_forward_time": 0.025068998336791992, + "step": 26078 + }, + { + "epoch": 3.97918701171875e-05, + "step": 26078, + "training_step_time": 0.15309977531433105 + }, + { + "epoch": 3.979339599609375e-05, + "model_forward_time": 0.02517390251159668, + "step": 26079 + }, + { + "epoch": 3.979339599609375e-05, + "step": 26079, + "training_step_time": 0.1118612289428711 + }, + { + "epoch": 3.9794921875e-05, + "grad_norm": 0.2546069920063019, + "learning_rate": 4.595726078283136e-06, + "loss": 0.0139, + "step": 26080 + }, + { + "epoch": 3.9794921875e-05, + "model_forward_time": 0.02528071403503418, + "step": 26080 + }, + { + "epoch": 3.9794921875e-05, + "step": 26080, + "training_step_time": 0.1091151237487793 + }, + { + "epoch": 3.979644775390625e-05, + "model_forward_time": 0.025032520294189453, + "step": 26081 + }, + { + "epoch": 3.979644775390625e-05, + "step": 26081, + "training_step_time": 0.11984515190124512 + }, + { + "epoch": 3.97979736328125e-05, + "model_forward_time": 0.02518296241760254, + "step": 26082 + }, + { + "epoch": 3.97979736328125e-05, + "step": 26082, + "training_step_time": 0.13815522193908691 + }, + { + "epoch": 3.979949951171875e-05, + "model_forward_time": 0.02478480339050293, + "step": 26083 + }, + { + "epoch": 3.979949951171875e-05, + "step": 26083, + "training_step_time": 0.11045360565185547 + }, + { + "epoch": 3.9801025390625e-05, + "model_forward_time": 0.025148868560791016, + "step": 26084 + }, + { + "epoch": 3.9801025390625e-05, + "step": 26084, + "training_step_time": 0.11093306541442871 + }, + { + "epoch": 3.980255126953125e-05, + "model_forward_time": 0.025937318801879883, + "step": 26085 + }, + { + "epoch": 3.980255126953125e-05, + "step": 26085, + "training_step_time": 0.10609292984008789 + }, + { + "epoch": 3.98040771484375e-05, + "model_forward_time": 0.025574922561645508, + "step": 26086 + }, + { + "epoch": 3.98040771484375e-05, + "step": 26086, + "training_step_time": 0.10830426216125488 + }, + { + "epoch": 3.980560302734375e-05, + "model_forward_time": 0.027197837829589844, + "step": 26087 + }, + { + "epoch": 3.980560302734375e-05, + "step": 26087, + "training_step_time": 0.11148428916931152 + }, + { + "epoch": 3.980712890625e-05, + "model_forward_time": 0.025763273239135742, + "step": 26088 + }, + { + "epoch": 3.980712890625e-05, + "step": 26088, + "training_step_time": 0.11086273193359375 + }, + { + "epoch": 3.980865478515625e-05, + "model_forward_time": 0.026128292083740234, + "step": 26089 + }, + { + "epoch": 3.980865478515625e-05, + "step": 26089, + "training_step_time": 0.10941433906555176 + }, + { + "epoch": 3.98101806640625e-05, + "grad_norm": 0.06303591281175613, + "learning_rate": 4.57267206366902e-06, + "loss": 0.0079, + "step": 26090 + }, + { + "epoch": 3.98101806640625e-05, + "model_forward_time": 0.025932788848876953, + "step": 26090 + }, + { + "epoch": 3.98101806640625e-05, + "step": 26090, + "training_step_time": 0.21492481231689453 + }, + { + "epoch": 3.981170654296875e-05, + "model_forward_time": 0.0245361328125, + "step": 26091 + }, + { + "epoch": 3.981170654296875e-05, + "step": 26091, + "training_step_time": 0.11951041221618652 + }, + { + "epoch": 3.9813232421875e-05, + "model_forward_time": 0.024872779846191406, + "step": 26092 + }, + { + "epoch": 3.9813232421875e-05, + "step": 26092, + "training_step_time": 0.18544244766235352 + }, + { + "epoch": 3.981475830078125e-05, + "model_forward_time": 0.0245816707611084, + "step": 26093 + }, + { + "epoch": 3.981475830078125e-05, + "step": 26093, + "training_step_time": 0.20705294609069824 + }, + { + "epoch": 3.98162841796875e-05, + "model_forward_time": 0.024883270263671875, + "step": 26094 + }, + { + "epoch": 3.98162841796875e-05, + "step": 26094, + "training_step_time": 0.18047523498535156 + }, + { + "epoch": 3.981781005859375e-05, + "model_forward_time": 0.024225234985351562, + "step": 26095 + }, + { + "epoch": 3.981781005859375e-05, + "step": 26095, + "training_step_time": 0.13805270195007324 + }, + { + "epoch": 3.98193359375e-05, + "model_forward_time": 0.024847030639648438, + "step": 26096 + }, + { + "epoch": 3.98193359375e-05, + "step": 26096, + "training_step_time": 0.11334061622619629 + }, + { + "epoch": 3.982086181640625e-05, + "model_forward_time": 0.02487659454345703, + "step": 26097 + }, + { + "epoch": 3.982086181640625e-05, + "step": 26097, + "training_step_time": 0.10382723808288574 + }, + { + "epoch": 3.98223876953125e-05, + "model_forward_time": 0.025098562240600586, + "step": 26098 + }, + { + "epoch": 3.98223876953125e-05, + "step": 26098, + "training_step_time": 0.10719513893127441 + }, + { + "epoch": 3.982391357421875e-05, + "model_forward_time": 0.02562999725341797, + "step": 26099 + }, + { + "epoch": 3.982391357421875e-05, + "step": 26099, + "training_step_time": 0.1083986759185791 + }, + { + "epoch": 3.9825439453125e-05, + "grad_norm": 0.4417112469673157, + "learning_rate": 4.549673247541875e-06, + "loss": 0.0074, + "step": 26100 + }, + { + "epoch": 3.9825439453125e-05, + "model_forward_time": 0.02549600601196289, + "step": 26100 + }, + { + "epoch": 3.9825439453125e-05, + "step": 26100, + "training_step_time": 0.18639373779296875 + }, + { + "epoch": 3.982696533203125e-05, + "model_forward_time": 0.0247344970703125, + "step": 26101 + }, + { + "epoch": 3.982696533203125e-05, + "step": 26101, + "training_step_time": 0.2057359218597412 + }, + { + "epoch": 3.98284912109375e-05, + "model_forward_time": 0.024443387985229492, + "step": 26102 + }, + { + "epoch": 3.98284912109375e-05, + "step": 26102, + "training_step_time": 0.21073675155639648 + }, + { + "epoch": 3.983001708984375e-05, + "model_forward_time": 0.024087905883789062, + "step": 26103 + }, + { + "epoch": 3.983001708984375e-05, + "step": 26103, + "training_step_time": 0.20769286155700684 + }, + { + "epoch": 3.983154296875e-05, + "model_forward_time": 0.024395227432250977, + "step": 26104 + }, + { + "epoch": 3.983154296875e-05, + "step": 26104, + "training_step_time": 0.2117927074432373 + }, + { + "epoch": 3.983306884765625e-05, + "model_forward_time": 0.024407625198364258, + "step": 26105 + }, + { + "epoch": 3.983306884765625e-05, + "step": 26105, + "training_step_time": 0.18606281280517578 + }, + { + "epoch": 3.98345947265625e-05, + "model_forward_time": 0.02480602264404297, + "step": 26106 + }, + { + "epoch": 3.98345947265625e-05, + "step": 26106, + "training_step_time": 0.13823771476745605 + }, + { + "epoch": 3.983612060546875e-05, + "model_forward_time": 0.024769306182861328, + "step": 26107 + }, + { + "epoch": 3.983612060546875e-05, + "step": 26107, + "training_step_time": 0.143110990524292 + }, + { + "epoch": 3.9837646484375e-05, + "model_forward_time": 0.02431321144104004, + "step": 26108 + }, + { + "epoch": 3.9837646484375e-05, + "step": 26108, + "training_step_time": 0.11896681785583496 + }, + { + "epoch": 3.983917236328125e-05, + "model_forward_time": 0.024739503860473633, + "step": 26109 + }, + { + "epoch": 3.983917236328125e-05, + "step": 26109, + "training_step_time": 0.10262560844421387 + }, + { + "epoch": 3.98406982421875e-05, + "grad_norm": 0.08231714367866516, + "learning_rate": 4.526729657847423e-06, + "loss": 0.0027, + "step": 26110 + }, + { + "epoch": 3.98406982421875e-05, + "model_forward_time": 0.025008678436279297, + "step": 26110 + }, + { + "epoch": 3.98406982421875e-05, + "step": 26110, + "training_step_time": 0.10529971122741699 + }, + { + "epoch": 3.984222412109375e-05, + "model_forward_time": 0.025465011596679688, + "step": 26111 + }, + { + "epoch": 3.984222412109375e-05, + "step": 26111, + "training_step_time": 0.10645794868469238 + }, + { + "epoch": 3.984375e-05, + "model_forward_time": 0.025362491607666016, + "step": 26112 + }, + { + "epoch": 3.984375e-05, + "step": 26112, + "training_step_time": 0.1071310043334961 + }, + { + "epoch": 3.984527587890625e-05, + "model_forward_time": 0.028286218643188477, + "step": 26113 + }, + { + "epoch": 3.984527587890625e-05, + "step": 26113, + "training_step_time": 0.10985136032104492 + }, + { + "epoch": 3.98468017578125e-05, + "model_forward_time": 0.02552056312561035, + "step": 26114 + }, + { + "epoch": 3.98468017578125e-05, + "step": 26114, + "training_step_time": 0.1099252700805664 + }, + { + "epoch": 3.984832763671875e-05, + "model_forward_time": 0.02526402473449707, + "step": 26115 + }, + { + "epoch": 3.984832763671875e-05, + "step": 26115, + "training_step_time": 0.10544943809509277 + }, + { + "epoch": 3.9849853515625e-05, + "model_forward_time": 0.025406837463378906, + "step": 26116 + }, + { + "epoch": 3.9849853515625e-05, + "step": 26116, + "training_step_time": 0.1081991195678711 + }, + { + "epoch": 3.985137939453125e-05, + "model_forward_time": 0.025668621063232422, + "step": 26117 + }, + { + "epoch": 3.985137939453125e-05, + "step": 26117, + "training_step_time": 0.11345839500427246 + }, + { + "epoch": 3.98529052734375e-05, + "model_forward_time": 0.025363683700561523, + "step": 26118 + }, + { + "epoch": 3.98529052734375e-05, + "step": 26118, + "training_step_time": 0.10754275321960449 + }, + { + "epoch": 3.985443115234375e-05, + "model_forward_time": 0.025487184524536133, + "step": 26119 + }, + { + "epoch": 3.985443115234375e-05, + "step": 26119, + "training_step_time": 0.1067957878112793 + }, + { + "epoch": 3.985595703125e-05, + "grad_norm": 0.07585153728723526, + "learning_rate": 4.5038413224642805e-06, + "loss": 0.0049, + "step": 26120 + }, + { + "epoch": 3.985595703125e-05, + "model_forward_time": 0.025071144104003906, + "step": 26120 + }, + { + "epoch": 3.985595703125e-05, + "step": 26120, + "training_step_time": 0.10821080207824707 + }, + { + "epoch": 3.985748291015625e-05, + "model_forward_time": 0.025506019592285156, + "step": 26121 + }, + { + "epoch": 3.985748291015625e-05, + "step": 26121, + "training_step_time": 0.1264328956604004 + }, + { + "epoch": 3.98590087890625e-05, + "model_forward_time": 0.025180339813232422, + "step": 26122 + }, + { + "epoch": 3.98590087890625e-05, + "step": 26122, + "training_step_time": 0.11570143699645996 + }, + { + "epoch": 3.986053466796875e-05, + "model_forward_time": 0.02397322654724121, + "step": 26123 + }, + { + "epoch": 3.986053466796875e-05, + "step": 26123, + "training_step_time": 0.17651724815368652 + }, + { + "epoch": 3.9862060546875e-05, + "model_forward_time": 0.024910688400268555, + "step": 26124 + }, + { + "epoch": 3.9862060546875e-05, + "step": 26124, + "training_step_time": 0.1839592456817627 + }, + { + "epoch": 3.986358642578125e-05, + "model_forward_time": 0.024498462677001953, + "step": 26125 + }, + { + "epoch": 3.986358642578125e-05, + "step": 26125, + "training_step_time": 0.11597871780395508 + }, + { + "epoch": 3.98651123046875e-05, + "model_forward_time": 0.025295734405517578, + "step": 26126 + }, + { + "epoch": 3.98651123046875e-05, + "step": 26126, + "training_step_time": 0.10580778121948242 + }, + { + "epoch": 3.986663818359375e-05, + "model_forward_time": 0.0245358943939209, + "step": 26127 + }, + { + "epoch": 3.986663818359375e-05, + "step": 26127, + "training_step_time": 0.10715460777282715 + }, + { + "epoch": 3.98681640625e-05, + "model_forward_time": 0.02546095848083496, + "step": 26128 + }, + { + "epoch": 3.98681640625e-05, + "step": 26128, + "training_step_time": 0.10633111000061035 + }, + { + "epoch": 3.986968994140625e-05, + "model_forward_time": 0.025569677352905273, + "step": 26129 + }, + { + "epoch": 3.986968994140625e-05, + "step": 26129, + "training_step_time": 0.1130685806274414 + }, + { + "epoch": 3.98712158203125e-05, + "grad_norm": 0.09220074862241745, + "learning_rate": 4.48100826920394e-06, + "loss": 0.0079, + "step": 26130 + }, + { + "epoch": 3.98712158203125e-05, + "model_forward_time": 0.02476215362548828, + "step": 26130 + }, + { + "epoch": 3.98712158203125e-05, + "step": 26130, + "training_step_time": 0.11162185668945312 + }, + { + "epoch": 3.987274169921875e-05, + "model_forward_time": 0.024988174438476562, + "step": 26131 + }, + { + "epoch": 3.987274169921875e-05, + "step": 26131, + "training_step_time": 0.21276402473449707 + }, + { + "epoch": 3.9874267578125e-05, + "model_forward_time": 0.024491310119628906, + "step": 26132 + }, + { + "epoch": 3.9874267578125e-05, + "step": 26132, + "training_step_time": 0.2246253490447998 + }, + { + "epoch": 3.987579345703125e-05, + "model_forward_time": 0.024363279342651367, + "step": 26133 + }, + { + "epoch": 3.987579345703125e-05, + "step": 26133, + "training_step_time": 0.12912964820861816 + }, + { + "epoch": 3.98773193359375e-05, + "model_forward_time": 0.02394723892211914, + "step": 26134 + }, + { + "epoch": 3.98773193359375e-05, + "step": 26134, + "training_step_time": 0.12833380699157715 + }, + { + "epoch": 3.987884521484375e-05, + "model_forward_time": 0.024984359741210938, + "step": 26135 + }, + { + "epoch": 3.987884521484375e-05, + "step": 26135, + "training_step_time": 0.15347051620483398 + }, + { + "epoch": 3.988037109375e-05, + "model_forward_time": 0.024308443069458008, + "step": 26136 + }, + { + "epoch": 3.988037109375e-05, + "step": 26136, + "training_step_time": 0.22919178009033203 + }, + { + "epoch": 3.988189697265625e-05, + "model_forward_time": 0.02466106414794922, + "step": 26137 + }, + { + "epoch": 3.988189697265625e-05, + "step": 26137, + "training_step_time": 0.1167445182800293 + }, + { + "epoch": 3.98834228515625e-05, + "model_forward_time": 0.025573253631591797, + "step": 26138 + }, + { + "epoch": 3.98834228515625e-05, + "step": 26138, + "training_step_time": 0.11499834060668945 + }, + { + "epoch": 3.988494873046875e-05, + "model_forward_time": 0.025053977966308594, + "step": 26139 + }, + { + "epoch": 3.988494873046875e-05, + "step": 26139, + "training_step_time": 0.11554551124572754 + }, + { + "epoch": 3.9886474609375e-05, + "grad_norm": 0.11878912150859833, + "learning_rate": 4.458230525810708e-06, + "loss": 0.0039, + "step": 26140 + }, + { + "epoch": 3.9886474609375e-05, + "model_forward_time": 0.025297880172729492, + "step": 26140 + }, + { + "epoch": 3.9886474609375e-05, + "step": 26140, + "training_step_time": 0.11057496070861816 + }, + { + "epoch": 3.988800048828125e-05, + "model_forward_time": 0.026903152465820312, + "step": 26141 + }, + { + "epoch": 3.988800048828125e-05, + "step": 26141, + "training_step_time": 0.11000514030456543 + }, + { + "epoch": 3.98895263671875e-05, + "model_forward_time": 0.024990558624267578, + "step": 26142 + }, + { + "epoch": 3.98895263671875e-05, + "step": 26142, + "training_step_time": 0.11150717735290527 + }, + { + "epoch": 3.989105224609375e-05, + "model_forward_time": 0.025076866149902344, + "step": 26143 + }, + { + "epoch": 3.989105224609375e-05, + "step": 26143, + "training_step_time": 0.10941052436828613 + }, + { + "epoch": 3.9892578125e-05, + "model_forward_time": 0.025425434112548828, + "step": 26144 + }, + { + "epoch": 3.9892578125e-05, + "step": 26144, + "training_step_time": 0.1105337142944336 + }, + { + "epoch": 3.989410400390625e-05, + "model_forward_time": 0.024982213973999023, + "step": 26145 + }, + { + "epoch": 3.989410400390625e-05, + "step": 26145, + "training_step_time": 0.10650372505187988 + }, + { + "epoch": 3.98956298828125e-05, + "model_forward_time": 0.025403738021850586, + "step": 26146 + }, + { + "epoch": 3.98956298828125e-05, + "step": 26146, + "training_step_time": 0.1954793930053711 + }, + { + "epoch": 3.989715576171875e-05, + "model_forward_time": 0.024821996688842773, + "step": 26147 + }, + { + "epoch": 3.989715576171875e-05, + "step": 26147, + "training_step_time": 0.14196300506591797 + }, + { + "epoch": 3.9898681640625e-05, + "model_forward_time": 0.02449512481689453, + "step": 26148 + }, + { + "epoch": 3.9898681640625e-05, + "step": 26148, + "training_step_time": 0.11445331573486328 + }, + { + "epoch": 3.990020751953125e-05, + "model_forward_time": 0.024861812591552734, + "step": 26149 + }, + { + "epoch": 3.990020751953125e-05, + "step": 26149, + "training_step_time": 0.10686993598937988 + }, + { + "epoch": 3.99017333984375e-05, + "grad_norm": 0.09383201599121094, + "learning_rate": 4.435508119961701e-06, + "loss": 0.0127, + "step": 26150 + }, + { + "epoch": 3.99017333984375e-05, + "model_forward_time": 0.025095701217651367, + "step": 26150 + }, + { + "epoch": 3.99017333984375e-05, + "step": 26150, + "training_step_time": 0.12297630310058594 + }, + { + "epoch": 3.990325927734375e-05, + "model_forward_time": 0.025191068649291992, + "step": 26151 + }, + { + "epoch": 3.990325927734375e-05, + "step": 26151, + "training_step_time": 0.11027121543884277 + }, + { + "epoch": 3.990478515625e-05, + "model_forward_time": 0.025113821029663086, + "step": 26152 + }, + { + "epoch": 3.990478515625e-05, + "step": 26152, + "training_step_time": 0.1085059642791748 + }, + { + "epoch": 3.990631103515625e-05, + "model_forward_time": 0.025587081909179688, + "step": 26153 + }, + { + "epoch": 3.990631103515625e-05, + "step": 26153, + "training_step_time": 0.10966634750366211 + }, + { + "epoch": 3.99078369140625e-05, + "model_forward_time": 0.025266170501708984, + "step": 26154 + }, + { + "epoch": 3.99078369140625e-05, + "step": 26154, + "training_step_time": 0.10693669319152832 + }, + { + "epoch": 3.990936279296875e-05, + "model_forward_time": 0.02513265609741211, + "step": 26155 + }, + { + "epoch": 3.990936279296875e-05, + "step": 26155, + "training_step_time": 0.10780739784240723 + }, + { + "epoch": 3.9910888671875e-05, + "model_forward_time": 0.025136947631835938, + "step": 26156 + }, + { + "epoch": 3.9910888671875e-05, + "step": 26156, + "training_step_time": 0.11208248138427734 + }, + { + "epoch": 3.991241455078125e-05, + "model_forward_time": 0.025364398956298828, + "step": 26157 + }, + { + "epoch": 3.991241455078125e-05, + "step": 26157, + "training_step_time": 0.10563969612121582 + }, + { + "epoch": 3.99139404296875e-05, + "model_forward_time": 0.02491283416748047, + "step": 26158 + }, + { + "epoch": 3.99139404296875e-05, + "step": 26158, + "training_step_time": 0.10661864280700684 + }, + { + "epoch": 3.991546630859375e-05, + "model_forward_time": 0.025142192840576172, + "step": 26159 + }, + { + "epoch": 3.991546630859375e-05, + "step": 26159, + "training_step_time": 0.10689234733581543 + }, + { + "epoch": 3.99169921875e-05, + "grad_norm": 0.06598341464996338, + "learning_rate": 4.412841079266777e-06, + "loss": 0.0028, + "step": 26160 + }, + { + "epoch": 3.99169921875e-05, + "model_forward_time": 0.024890899658203125, + "step": 26160 + }, + { + "epoch": 3.99169921875e-05, + "step": 26160, + "training_step_time": 0.1058659553527832 + }, + { + "epoch": 3.991851806640625e-05, + "model_forward_time": 0.024783611297607422, + "step": 26161 + }, + { + "epoch": 3.991851806640625e-05, + "step": 26161, + "training_step_time": 0.10630965232849121 + }, + { + "epoch": 3.99200439453125e-05, + "model_forward_time": 0.02535724639892578, + "step": 26162 + }, + { + "epoch": 3.99200439453125e-05, + "step": 26162, + "training_step_time": 0.10709619522094727 + }, + { + "epoch": 3.992156982421875e-05, + "model_forward_time": 0.025089740753173828, + "step": 26163 + }, + { + "epoch": 3.992156982421875e-05, + "step": 26163, + "training_step_time": 0.10756564140319824 + }, + { + "epoch": 3.9923095703125e-05, + "model_forward_time": 0.0255587100982666, + "step": 26164 + }, + { + "epoch": 3.9923095703125e-05, + "step": 26164, + "training_step_time": 0.11348557472229004 + }, + { + "epoch": 3.992462158203125e-05, + "model_forward_time": 0.025056123733520508, + "step": 26165 + }, + { + "epoch": 3.992462158203125e-05, + "step": 26165, + "training_step_time": 0.10958075523376465 + }, + { + "epoch": 3.99261474609375e-05, + "model_forward_time": 0.02517080307006836, + "step": 26166 + }, + { + "epoch": 3.99261474609375e-05, + "step": 26166, + "training_step_time": 0.11191010475158691 + }, + { + "epoch": 3.992767333984375e-05, + "model_forward_time": 0.025331497192382812, + "step": 26167 + }, + { + "epoch": 3.992767333984375e-05, + "step": 26167, + "training_step_time": 0.10962629318237305 + }, + { + "epoch": 3.992919921875e-05, + "model_forward_time": 0.02539205551147461, + "step": 26168 + }, + { + "epoch": 3.992919921875e-05, + "step": 26168, + "training_step_time": 0.1092219352722168 + }, + { + "epoch": 3.993072509765625e-05, + "model_forward_time": 0.025058507919311523, + "step": 26169 + }, + { + "epoch": 3.993072509765625e-05, + "step": 26169, + "training_step_time": 0.11667251586914062 + }, + { + "epoch": 3.99322509765625e-05, + "grad_norm": 0.07526319473981857, + "learning_rate": 4.390229431268534e-06, + "loss": 0.0053, + "step": 26170 + }, + { + "epoch": 3.99322509765625e-05, + "model_forward_time": 0.025171518325805664, + "step": 26170 + }, + { + "epoch": 3.99322509765625e-05, + "step": 26170, + "training_step_time": 0.14108991622924805 + }, + { + "epoch": 3.993377685546875e-05, + "model_forward_time": 0.025013208389282227, + "step": 26171 + }, + { + "epoch": 3.993377685546875e-05, + "step": 26171, + "training_step_time": 0.11156725883483887 + }, + { + "epoch": 3.9935302734375e-05, + "model_forward_time": 0.024898052215576172, + "step": 26172 + }, + { + "epoch": 3.9935302734375e-05, + "step": 26172, + "training_step_time": 0.17752766609191895 + }, + { + "epoch": 3.993682861328125e-05, + "model_forward_time": 0.02470254898071289, + "step": 26173 + }, + { + "epoch": 3.993682861328125e-05, + "step": 26173, + "training_step_time": 0.21763992309570312 + }, + { + "epoch": 3.99383544921875e-05, + "model_forward_time": 0.02448129653930664, + "step": 26174 + }, + { + "epoch": 3.99383544921875e-05, + "step": 26174, + "training_step_time": 0.20544886589050293 + }, + { + "epoch": 3.993988037109375e-05, + "model_forward_time": 0.024476051330566406, + "step": 26175 + }, + { + "epoch": 3.993988037109375e-05, + "step": 26175, + "training_step_time": 0.20569634437561035 + }, + { + "epoch": 3.994140625e-05, + "model_forward_time": 0.024744033813476562, + "step": 26176 + }, + { + "epoch": 3.994140625e-05, + "step": 26176, + "training_step_time": 0.17775177955627441 + }, + { + "epoch": 3.994293212890625e-05, + "model_forward_time": 0.024734973907470703, + "step": 26177 + }, + { + "epoch": 3.994293212890625e-05, + "step": 26177, + "training_step_time": 0.17873358726501465 + }, + { + "epoch": 3.99444580078125e-05, + "model_forward_time": 0.02461409568786621, + "step": 26178 + }, + { + "epoch": 3.99444580078125e-05, + "step": 26178, + "training_step_time": 0.13012242317199707 + }, + { + "epoch": 3.994598388671875e-05, + "model_forward_time": 0.02439403533935547, + "step": 26179 + }, + { + "epoch": 3.994598388671875e-05, + "step": 26179, + "training_step_time": 0.14882636070251465 + }, + { + "epoch": 3.9947509765625e-05, + "grad_norm": 0.08003715425729752, + "learning_rate": 4.367673203442241e-06, + "loss": 0.0052, + "step": 26180 + }, + { + "epoch": 3.9947509765625e-05, + "model_forward_time": 0.02449512481689453, + "step": 26180 + }, + { + "epoch": 3.9947509765625e-05, + "step": 26180, + "training_step_time": 0.21740436553955078 + }, + { + "epoch": 3.994903564453125e-05, + "model_forward_time": 0.024868249893188477, + "step": 26181 + }, + { + "epoch": 3.994903564453125e-05, + "step": 26181, + "training_step_time": 0.11270999908447266 + }, + { + "epoch": 3.99505615234375e-05, + "model_forward_time": 0.02455306053161621, + "step": 26182 + }, + { + "epoch": 3.99505615234375e-05, + "step": 26182, + "training_step_time": 0.10604977607727051 + }, + { + "epoch": 3.995208740234375e-05, + "model_forward_time": 0.025559425354003906, + "step": 26183 + }, + { + "epoch": 3.995208740234375e-05, + "step": 26183, + "training_step_time": 0.11426830291748047 + }, + { + "epoch": 3.995361328125e-05, + "model_forward_time": 0.024730920791625977, + "step": 26184 + }, + { + "epoch": 3.995361328125e-05, + "step": 26184, + "training_step_time": 0.10819077491760254 + }, + { + "epoch": 3.995513916015625e-05, + "model_forward_time": 0.025238037109375, + "step": 26185 + }, + { + "epoch": 3.995513916015625e-05, + "step": 26185, + "training_step_time": 0.1103212833404541 + }, + { + "epoch": 3.99566650390625e-05, + "model_forward_time": 0.02535271644592285, + "step": 26186 + }, + { + "epoch": 3.99566650390625e-05, + "step": 26186, + "training_step_time": 0.10993123054504395 + }, + { + "epoch": 3.995819091796875e-05, + "model_forward_time": 0.024819612503051758, + "step": 26187 + }, + { + "epoch": 3.995819091796875e-05, + "step": 26187, + "training_step_time": 0.12958049774169922 + }, + { + "epoch": 3.9959716796875e-05, + "model_forward_time": 0.025050878524780273, + "step": 26188 + }, + { + "epoch": 3.9959716796875e-05, + "step": 26188, + "training_step_time": 0.12558507919311523 + }, + { + "epoch": 3.996124267578125e-05, + "model_forward_time": 0.025292634963989258, + "step": 26189 + }, + { + "epoch": 3.996124267578125e-05, + "step": 26189, + "training_step_time": 0.12148118019104004 + }, + { + "epoch": 3.99627685546875e-05, + "grad_norm": 0.3032190501689911, + "learning_rate": 4.3451724231958644e-06, + "loss": 0.0053, + "step": 26190 + }, + { + "epoch": 3.99627685546875e-05, + "model_forward_time": 0.025188684463500977, + "step": 26190 + }, + { + "epoch": 3.99627685546875e-05, + "step": 26190, + "training_step_time": 0.12166810035705566 + }, + { + "epoch": 3.996429443359375e-05, + "model_forward_time": 0.02518630027770996, + "step": 26191 + }, + { + "epoch": 3.996429443359375e-05, + "step": 26191, + "training_step_time": 0.223527193069458 + }, + { + "epoch": 3.99658203125e-05, + "model_forward_time": 0.024287939071655273, + "step": 26192 + }, + { + "epoch": 3.99658203125e-05, + "step": 26192, + "training_step_time": 0.12400555610656738 + }, + { + "epoch": 3.996734619140625e-05, + "model_forward_time": 0.0239715576171875, + "step": 26193 + }, + { + "epoch": 3.996734619140625e-05, + "step": 26193, + "training_step_time": 0.1965327262878418 + }, + { + "epoch": 3.99688720703125e-05, + "model_forward_time": 0.024260282516479492, + "step": 26194 + }, + { + "epoch": 3.99688720703125e-05, + "step": 26194, + "training_step_time": 0.11929082870483398 + }, + { + "epoch": 3.997039794921875e-05, + "model_forward_time": 0.024402856826782227, + "step": 26195 + }, + { + "epoch": 3.997039794921875e-05, + "step": 26195, + "training_step_time": 0.19179344177246094 + }, + { + "epoch": 3.9971923828125e-05, + "model_forward_time": 0.025172710418701172, + "step": 26196 + }, + { + "epoch": 3.9971923828125e-05, + "step": 26196, + "training_step_time": 0.10701131820678711 + }, + { + "epoch": 3.997344970703125e-05, + "model_forward_time": 0.024416685104370117, + "step": 26197 + }, + { + "epoch": 3.997344970703125e-05, + "step": 26197, + "training_step_time": 0.11088728904724121 + }, + { + "epoch": 3.99749755859375e-05, + "model_forward_time": 0.0250546932220459, + "step": 26198 + }, + { + "epoch": 3.99749755859375e-05, + "step": 26198, + "training_step_time": 0.10824060440063477 + }, + { + "epoch": 3.997650146484375e-05, + "model_forward_time": 0.0250396728515625, + "step": 26199 + }, + { + "epoch": 3.997650146484375e-05, + "step": 26199, + "training_step_time": 0.11015963554382324 + }, + { + "epoch": 3.997802734375e-05, + "grad_norm": 0.08461698889732361, + "learning_rate": 4.322727117869951e-06, + "loss": 0.0061, + "step": 26200 + }, + { + "epoch": 3.997802734375e-05, + "model_forward_time": 0.025383472442626953, + "step": 26200 + }, + { + "epoch": 3.997802734375e-05, + "step": 26200, + "training_step_time": 0.11152458190917969 + }, + { + "epoch": 3.997955322265625e-05, + "model_forward_time": 0.025038719177246094, + "step": 26201 + }, + { + "epoch": 3.997955322265625e-05, + "step": 26201, + "training_step_time": 0.10735130310058594 + }, + { + "epoch": 3.99810791015625e-05, + "model_forward_time": 0.025051593780517578, + "step": 26202 + }, + { + "epoch": 3.99810791015625e-05, + "step": 26202, + "training_step_time": 0.1056675910949707 + }, + { + "epoch": 3.998260498046875e-05, + "model_forward_time": 0.025054454803466797, + "step": 26203 + }, + { + "epoch": 3.998260498046875e-05, + "step": 26203, + "training_step_time": 0.10694456100463867 + }, + { + "epoch": 3.9984130859375e-05, + "model_forward_time": 0.025494813919067383, + "step": 26204 + }, + { + "epoch": 3.9984130859375e-05, + "step": 26204, + "training_step_time": 0.11286544799804688 + }, + { + "epoch": 3.998565673828125e-05, + "model_forward_time": 0.025062084197998047, + "step": 26205 + }, + { + "epoch": 3.998565673828125e-05, + "step": 26205, + "training_step_time": 0.10840964317321777 + }, + { + "epoch": 3.99871826171875e-05, + "model_forward_time": 0.025116920471191406, + "step": 26206 + }, + { + "epoch": 3.99871826171875e-05, + "step": 26206, + "training_step_time": 0.10819077491760254 + }, + { + "epoch": 3.998870849609375e-05, + "model_forward_time": 0.02537989616394043, + "step": 26207 + }, + { + "epoch": 3.998870849609375e-05, + "step": 26207, + "training_step_time": 0.10566186904907227 + }, + { + "epoch": 3.9990234375e-05, + "model_forward_time": 0.02541804313659668, + "step": 26208 + }, + { + "epoch": 3.9990234375e-05, + "step": 26208, + "training_step_time": 0.18210983276367188 + }, + { + "epoch": 3.999176025390625e-05, + "model_forward_time": 0.024603843688964844, + "step": 26209 + }, + { + "epoch": 3.999176025390625e-05, + "step": 26209, + "training_step_time": 0.10646319389343262 + }, + { + "epoch": 3.99932861328125e-05, + "grad_norm": 0.0800323560833931, + "learning_rate": 4.300337314737685e-06, + "loss": 0.0072, + "step": 26210 + }, + { + "epoch": 3.99932861328125e-05, + "model_forward_time": 0.02482914924621582, + "step": 26210 + }, + { + "epoch": 3.99932861328125e-05, + "step": 26210, + "training_step_time": 0.10445237159729004 + }, + { + "epoch": 3.999481201171875e-05, + "model_forward_time": 0.025142431259155273, + "step": 26211 + }, + { + "epoch": 3.999481201171875e-05, + "step": 26211, + "training_step_time": 0.1262831687927246 + }, + { + "epoch": 3.9996337890625e-05, + "model_forward_time": 0.02533888816833496, + "step": 26212 + }, + { + "epoch": 3.9996337890625e-05, + "step": 26212, + "training_step_time": 0.13247418403625488 + }, + { + "epoch": 3.999786376953125e-05, + "model_forward_time": 0.02544379234313965, + "step": 26213 + }, + { + "epoch": 3.999786376953125e-05, + "step": 26213, + "training_step_time": 0.10804557800292969 + }, + { + "epoch": 3.99993896484375e-05, + "model_forward_time": 0.025707721710205078, + "step": 26214 + }, + { + "epoch": 3.99993896484375e-05, + "step": 26214, + "training_step_time": 0.11357831954956055 + }, + { + "epoch": 4.000091552734375e-05, + "model_forward_time": 0.025601863861083984, + "step": 26215 + }, + { + "epoch": 4.000091552734375e-05, + "step": 26215, + "training_step_time": 0.19805407524108887 + }, + { + "epoch": 4.000244140625e-05, + "model_forward_time": 0.024850845336914062, + "step": 26216 + }, + { + "epoch": 4.000244140625e-05, + "step": 26216, + "training_step_time": 0.11610579490661621 + }, + { + "epoch": 4.000396728515625e-05, + "model_forward_time": 0.024520397186279297, + "step": 26217 + }, + { + "epoch": 4.000396728515625e-05, + "step": 26217, + "training_step_time": 0.10756587982177734 + }, + { + "epoch": 4.00054931640625e-05, + "model_forward_time": 0.02550959587097168, + "step": 26218 + }, + { + "epoch": 4.00054931640625e-05, + "step": 26218, + "training_step_time": 0.10598874092102051 + }, + { + "epoch": 4.000701904296875e-05, + "model_forward_time": 0.02501988410949707, + "step": 26219 + }, + { + "epoch": 4.000701904296875e-05, + "step": 26219, + "training_step_time": 0.11534667015075684 + }, + { + "epoch": 4.0008544921875e-05, + "grad_norm": 0.17419543862342834, + "learning_rate": 4.27800304100478e-06, + "loss": 0.0053, + "step": 26220 + }, + { + "epoch": 4.0008544921875e-05, + "model_forward_time": 0.02565741539001465, + "step": 26220 + }, + { + "epoch": 4.0008544921875e-05, + "step": 26220, + "training_step_time": 0.20604896545410156 + }, + { + "epoch": 4.001007080078125e-05, + "model_forward_time": 0.024756669998168945, + "step": 26221 + }, + { + "epoch": 4.001007080078125e-05, + "step": 26221, + "training_step_time": 0.11130571365356445 + }, + { + "epoch": 4.00115966796875e-05, + "model_forward_time": 0.024776697158813477, + "step": 26222 + }, + { + "epoch": 4.00115966796875e-05, + "step": 26222, + "training_step_time": 0.11346602439880371 + }, + { + "epoch": 4.001312255859375e-05, + "model_forward_time": 0.025264739990234375, + "step": 26223 + }, + { + "epoch": 4.001312255859375e-05, + "step": 26223, + "training_step_time": 0.12429213523864746 + }, + { + "epoch": 4.00146484375e-05, + "model_forward_time": 0.025708436965942383, + "step": 26224 + }, + { + "epoch": 4.00146484375e-05, + "step": 26224, + "training_step_time": 0.15922069549560547 + }, + { + "epoch": 4.001617431640625e-05, + "model_forward_time": 0.0243837833404541, + "step": 26225 + }, + { + "epoch": 4.001617431640625e-05, + "step": 26225, + "training_step_time": 0.16666841506958008 + }, + { + "epoch": 4.00177001953125e-05, + "model_forward_time": 0.024760007858276367, + "step": 26226 + }, + { + "epoch": 4.00177001953125e-05, + "step": 26226, + "training_step_time": 0.10526013374328613 + }, + { + "epoch": 4.001922607421875e-05, + "model_forward_time": 0.02439141273498535, + "step": 26227 + }, + { + "epoch": 4.001922607421875e-05, + "step": 26227, + "training_step_time": 0.13845491409301758 + }, + { + "epoch": 4.0020751953125e-05, + "model_forward_time": 0.025290250778198242, + "step": 26228 + }, + { + "epoch": 4.0020751953125e-05, + "step": 26228, + "training_step_time": 0.17240548133850098 + }, + { + "epoch": 4.002227783203125e-05, + "model_forward_time": 0.02397012710571289, + "step": 26229 + }, + { + "epoch": 4.002227783203125e-05, + "step": 26229, + "training_step_time": 0.15689969062805176 + }, + { + "epoch": 4.00238037109375e-05, + "grad_norm": 0.17113907635211945, + "learning_rate": 4.255724323809479e-06, + "loss": 0.0049, + "step": 26230 + }, + { + "epoch": 4.00238037109375e-05, + "model_forward_time": 0.024091482162475586, + "step": 26230 + }, + { + "epoch": 4.00238037109375e-05, + "step": 26230, + "training_step_time": 0.13382649421691895 + }, + { + "epoch": 4.002532958984375e-05, + "model_forward_time": 0.024300336837768555, + "step": 26231 + }, + { + "epoch": 4.002532958984375e-05, + "step": 26231, + "training_step_time": 0.13757538795471191 + }, + { + "epoch": 4.002685546875e-05, + "model_forward_time": 0.026445865631103516, + "step": 26232 + }, + { + "epoch": 4.002685546875e-05, + "step": 26232, + "training_step_time": 0.12434101104736328 + }, + { + "epoch": 4.002838134765625e-05, + "model_forward_time": 0.024916887283325195, + "step": 26233 + }, + { + "epoch": 4.002838134765625e-05, + "step": 26233, + "training_step_time": 0.12484383583068848 + }, + { + "epoch": 4.00299072265625e-05, + "model_forward_time": 0.02515125274658203, + "step": 26234 + }, + { + "epoch": 4.00299072265625e-05, + "step": 26234, + "training_step_time": 0.12027263641357422 + }, + { + "epoch": 4.003143310546875e-05, + "model_forward_time": 0.025537490844726562, + "step": 26235 + }, + { + "epoch": 4.003143310546875e-05, + "step": 26235, + "training_step_time": 0.1382596492767334 + }, + { + "epoch": 4.0032958984375e-05, + "model_forward_time": 0.025041580200195312, + "step": 26236 + }, + { + "epoch": 4.0032958984375e-05, + "step": 26236, + "training_step_time": 0.11221122741699219 + }, + { + "epoch": 4.003448486328125e-05, + "model_forward_time": 0.02627420425415039, + "step": 26237 + }, + { + "epoch": 4.003448486328125e-05, + "step": 26237, + "training_step_time": 0.11217617988586426 + }, + { + "epoch": 4.00360107421875e-05, + "model_forward_time": 0.025739192962646484, + "step": 26238 + }, + { + "epoch": 4.00360107421875e-05, + "step": 26238, + "training_step_time": 0.17177367210388184 + }, + { + "epoch": 4.003753662109375e-05, + "model_forward_time": 0.026119232177734375, + "step": 26239 + }, + { + "epoch": 4.003753662109375e-05, + "step": 26239, + "training_step_time": 0.15797948837280273 + }, + { + "epoch": 4.00390625e-05, + "grad_norm": 0.09232489764690399, + "learning_rate": 4.233501190222533e-06, + "loss": 0.0039, + "step": 26240 + }, + { + "epoch": 4.00390625e-05, + "model_forward_time": 0.03803396224975586, + "step": 26240 + }, + { + "epoch": 4.00390625e-05, + "step": 26240, + "training_step_time": 0.16290974617004395 + }, + { + "epoch": 4.004058837890625e-05, + "model_forward_time": 0.02740168571472168, + "step": 26241 + }, + { + "epoch": 4.004058837890625e-05, + "step": 26241, + "training_step_time": 0.20537710189819336 + }, + { + "epoch": 4.00421142578125e-05, + "model_forward_time": 0.027785778045654297, + "step": 26242 + }, + { + "epoch": 4.00421142578125e-05, + "step": 26242, + "training_step_time": 0.2800023555755615 + }, + { + "epoch": 4.004364013671875e-05, + "model_forward_time": 0.028873443603515625, + "step": 26243 + }, + { + "epoch": 4.004364013671875e-05, + "step": 26243, + "training_step_time": 0.2522609233856201 + }, + { + "epoch": 4.0045166015625e-05, + "model_forward_time": 0.028441667556762695, + "step": 26244 + }, + { + "epoch": 4.0045166015625e-05, + "step": 26244, + "training_step_time": 0.2918663024902344 + }, + { + "epoch": 4.004669189453125e-05, + "model_forward_time": 0.03096938133239746, + "step": 26245 + }, + { + "epoch": 4.004669189453125e-05, + "step": 26245, + "training_step_time": 0.30705881118774414 + }, + { + "epoch": 4.00482177734375e-05, + "model_forward_time": 0.03237438201904297, + "step": 26246 + }, + { + "epoch": 4.00482177734375e-05, + "step": 26246, + "training_step_time": 0.3271634578704834 + }, + { + "epoch": 4.004974365234375e-05, + "model_forward_time": 0.03686237335205078, + "step": 26247 + }, + { + "epoch": 4.004974365234375e-05, + "step": 26247, + "training_step_time": 0.3153567314147949 + }, + { + "epoch": 4.005126953125e-05, + "model_forward_time": 0.029470443725585938, + "step": 26248 + }, + { + "epoch": 4.005126953125e-05, + "step": 26248, + "training_step_time": 0.24451017379760742 + }, + { + "epoch": 4.005279541015625e-05, + "model_forward_time": 0.031524658203125, + "step": 26249 + }, + { + "epoch": 4.005279541015625e-05, + "step": 26249, + "training_step_time": 0.2888178825378418 + }, + { + "epoch": 4.00543212890625e-05, + "grad_norm": 0.11314801126718521, + "learning_rate": 4.2113336672471245e-06, + "loss": 0.0077, + "step": 26250 + }, + { + "epoch": 4.00543212890625e-05, + "model_forward_time": 0.030463457107543945, + "step": 26250 + }, + { + "epoch": 4.00543212890625e-05, + "step": 26250, + "training_step_time": 0.2657449245452881 + }, + { + "epoch": 4.005584716796875e-05, + "model_forward_time": 0.03507041931152344, + "step": 26251 + }, + { + "epoch": 4.005584716796875e-05, + "step": 26251, + "training_step_time": 0.2915992736816406 + }, + { + "epoch": 4.0057373046875e-05, + "model_forward_time": 0.03037714958190918, + "step": 26252 + }, + { + "epoch": 4.0057373046875e-05, + "step": 26252, + "training_step_time": 0.19980287551879883 + }, + { + "epoch": 4.005889892578125e-05, + "model_forward_time": 0.03186750411987305, + "step": 26253 + }, + { + "epoch": 4.005889892578125e-05, + "step": 26253, + "training_step_time": 0.2978029251098633 + }, + { + "epoch": 4.00604248046875e-05, + "model_forward_time": 0.035239458084106445, + "step": 26254 + }, + { + "epoch": 4.00604248046875e-05, + "step": 26254, + "training_step_time": 0.2939913272857666 + }, + { + "epoch": 4.006195068359375e-05, + "model_forward_time": 0.028951644897460938, + "step": 26255 + }, + { + "epoch": 4.006195068359375e-05, + "step": 26255, + "training_step_time": 0.14375066757202148 + }, + { + "epoch": 4.00634765625e-05, + "model_forward_time": 0.03395342826843262, + "step": 26256 + }, + { + "epoch": 4.00634765625e-05, + "step": 26256, + "training_step_time": 0.13275599479675293 + }, + { + "epoch": 4.006500244140625e-05, + "model_forward_time": 0.03153800964355469, + "step": 26257 + }, + { + "epoch": 4.006500244140625e-05, + "step": 26257, + "training_step_time": 0.1263735294342041 + }, + { + "epoch": 4.00665283203125e-05, + "model_forward_time": 0.028304338455200195, + "step": 26258 + }, + { + "epoch": 4.00665283203125e-05, + "step": 26258, + "training_step_time": 0.12805747985839844 + }, + { + "epoch": 4.006805419921875e-05, + "model_forward_time": 0.02751898765563965, + "step": 26259 + }, + { + "epoch": 4.006805419921875e-05, + "step": 26259, + "training_step_time": 0.11527013778686523 + }, + { + "epoch": 4.0069580078125e-05, + "grad_norm": 0.1763758361339569, + "learning_rate": 4.189221781818914e-06, + "loss": 0.0084, + "step": 26260 + }, + { + "epoch": 4.0069580078125e-05, + "model_forward_time": 0.027667760848999023, + "step": 26260 + }, + { + "epoch": 4.0069580078125e-05, + "step": 26260, + "training_step_time": 0.12492895126342773 + }, + { + "epoch": 4.007110595703125e-05, + "model_forward_time": 0.02668285369873047, + "step": 26261 + }, + { + "epoch": 4.007110595703125e-05, + "step": 26261, + "training_step_time": 0.1195363998413086 + }, + { + "epoch": 4.00726318359375e-05, + "model_forward_time": 0.02624964714050293, + "step": 26262 + }, + { + "epoch": 4.00726318359375e-05, + "step": 26262, + "training_step_time": 0.10950708389282227 + }, + { + "epoch": 4.007415771484375e-05, + "model_forward_time": 0.025242328643798828, + "step": 26263 + }, + { + "epoch": 4.007415771484375e-05, + "step": 26263, + "training_step_time": 0.13776874542236328 + }, + { + "epoch": 4.007568359375e-05, + "model_forward_time": 0.025519132614135742, + "step": 26264 + }, + { + "epoch": 4.007568359375e-05, + "step": 26264, + "training_step_time": 0.141005277633667 + }, + { + "epoch": 4.007720947265625e-05, + "model_forward_time": 0.02430891990661621, + "step": 26265 + }, + { + "epoch": 4.007720947265625e-05, + "step": 26265, + "training_step_time": 0.10855317115783691 + }, + { + "epoch": 4.00787353515625e-05, + "model_forward_time": 0.025052309036254883, + "step": 26266 + }, + { + "epoch": 4.00787353515625e-05, + "step": 26266, + "training_step_time": 0.11240291595458984 + }, + { + "epoch": 4.008026123046875e-05, + "model_forward_time": 0.025258541107177734, + "step": 26267 + }, + { + "epoch": 4.008026123046875e-05, + "step": 26267, + "training_step_time": 0.10968446731567383 + }, + { + "epoch": 4.0081787109375e-05, + "model_forward_time": 0.025557994842529297, + "step": 26268 + }, + { + "epoch": 4.0081787109375e-05, + "step": 26268, + "training_step_time": 0.1083521842956543 + }, + { + "epoch": 4.008331298828125e-05, + "model_forward_time": 0.0256807804107666, + "step": 26269 + }, + { + "epoch": 4.008331298828125e-05, + "step": 26269, + "training_step_time": 0.19491934776306152 + }, + { + "epoch": 4.00848388671875e-05, + "grad_norm": 0.08767119795084, + "learning_rate": 4.167165560805914e-06, + "loss": 0.0035, + "step": 26270 + }, + { + "epoch": 4.00848388671875e-05, + "model_forward_time": 0.02497243881225586, + "step": 26270 + }, + { + "epoch": 4.00848388671875e-05, + "step": 26270, + "training_step_time": 0.11090731620788574 + }, + { + "epoch": 4.008636474609375e-05, + "model_forward_time": 0.023956298828125, + "step": 26271 + }, + { + "epoch": 4.008636474609375e-05, + "step": 26271, + "training_step_time": 0.1124274730682373 + }, + { + "epoch": 4.0087890625e-05, + "model_forward_time": 0.025119304656982422, + "step": 26272 + }, + { + "epoch": 4.0087890625e-05, + "step": 26272, + "training_step_time": 0.11515450477600098 + }, + { + "epoch": 4.008941650390625e-05, + "model_forward_time": 0.025896310806274414, + "step": 26273 + }, + { + "epoch": 4.008941650390625e-05, + "step": 26273, + "training_step_time": 0.11102080345153809 + }, + { + "epoch": 4.00909423828125e-05, + "model_forward_time": 0.025412321090698242, + "step": 26274 + }, + { + "epoch": 4.00909423828125e-05, + "step": 26274, + "training_step_time": 0.11011767387390137 + }, + { + "epoch": 4.009246826171875e-05, + "model_forward_time": 0.025719881057739258, + "step": 26275 + }, + { + "epoch": 4.009246826171875e-05, + "step": 26275, + "training_step_time": 0.1140134334564209 + }, + { + "epoch": 4.0093994140625e-05, + "model_forward_time": 0.02560257911682129, + "step": 26276 + }, + { + "epoch": 4.0093994140625e-05, + "step": 26276, + "training_step_time": 0.11352872848510742 + }, + { + "epoch": 4.009552001953125e-05, + "model_forward_time": 0.028265953063964844, + "step": 26277 + }, + { + "epoch": 4.009552001953125e-05, + "step": 26277, + "training_step_time": 0.1090998649597168 + }, + { + "epoch": 4.00970458984375e-05, + "model_forward_time": 0.0254209041595459, + "step": 26278 + }, + { + "epoch": 4.00970458984375e-05, + "step": 26278, + "training_step_time": 0.11704158782958984 + }, + { + "epoch": 4.009857177734375e-05, + "model_forward_time": 0.02718353271484375, + "step": 26279 + }, + { + "epoch": 4.009857177734375e-05, + "step": 26279, + "training_step_time": 0.11139988899230957 + }, + { + "epoch": 4.010009765625e-05, + "grad_norm": 0.09713966399431229, + "learning_rate": 4.145165031008508e-06, + "loss": 0.0138, + "step": 26280 + }, + { + "epoch": 4.010009765625e-05, + "model_forward_time": 0.02546834945678711, + "step": 26280 + }, + { + "epoch": 4.010009765625e-05, + "step": 26280, + "training_step_time": 0.11081910133361816 + }, + { + "epoch": 4.010162353515625e-05, + "model_forward_time": 0.025545835494995117, + "step": 26281 + }, + { + "epoch": 4.010162353515625e-05, + "step": 26281, + "training_step_time": 0.10771059989929199 + }, + { + "epoch": 4.01031494140625e-05, + "model_forward_time": 0.025603294372558594, + "step": 26282 + }, + { + "epoch": 4.01031494140625e-05, + "step": 26282, + "training_step_time": 0.10892319679260254 + }, + { + "epoch": 4.010467529296875e-05, + "model_forward_time": 0.026063919067382812, + "step": 26283 + }, + { + "epoch": 4.010467529296875e-05, + "step": 26283, + "training_step_time": 0.11043667793273926 + }, + { + "epoch": 4.0106201171875e-05, + "model_forward_time": 0.025173187255859375, + "step": 26284 + }, + { + "epoch": 4.0106201171875e-05, + "step": 26284, + "training_step_time": 0.19211983680725098 + }, + { + "epoch": 4.010772705078125e-05, + "model_forward_time": 0.02514362335205078, + "step": 26285 + }, + { + "epoch": 4.010772705078125e-05, + "step": 26285, + "training_step_time": 0.10571622848510742 + }, + { + "epoch": 4.01092529296875e-05, + "model_forward_time": 0.024885177612304688, + "step": 26286 + }, + { + "epoch": 4.01092529296875e-05, + "step": 26286, + "training_step_time": 0.11598443984985352 + }, + { + "epoch": 4.011077880859375e-05, + "model_forward_time": 0.025402307510375977, + "step": 26287 + }, + { + "epoch": 4.011077880859375e-05, + "step": 26287, + "training_step_time": 0.12391304969787598 + }, + { + "epoch": 4.01123046875e-05, + "model_forward_time": 0.024918556213378906, + "step": 26288 + }, + { + "epoch": 4.01123046875e-05, + "step": 26288, + "training_step_time": 0.1025230884552002 + }, + { + "epoch": 4.011383056640625e-05, + "model_forward_time": 0.025389432907104492, + "step": 26289 + }, + { + "epoch": 4.011383056640625e-05, + "step": 26289, + "training_step_time": 0.15187287330627441 + }, + { + "epoch": 4.01153564453125e-05, + "grad_norm": 0.08062807470560074, + "learning_rate": 4.123220219159418e-06, + "loss": 0.0121, + "step": 26290 + }, + { + "epoch": 4.01153564453125e-05, + "model_forward_time": 0.025385618209838867, + "step": 26290 + }, + { + "epoch": 4.01153564453125e-05, + "step": 26290, + "training_step_time": 0.11150217056274414 + }, + { + "epoch": 4.011688232421875e-05, + "model_forward_time": 0.025077104568481445, + "step": 26291 + }, + { + "epoch": 4.011688232421875e-05, + "step": 26291, + "training_step_time": 0.11345291137695312 + }, + { + "epoch": 4.0118408203125e-05, + "model_forward_time": 0.025686979293823242, + "step": 26292 + }, + { + "epoch": 4.0118408203125e-05, + "step": 26292, + "training_step_time": 0.14574646949768066 + }, + { + "epoch": 4.011993408203125e-05, + "model_forward_time": 0.025452136993408203, + "step": 26293 + }, + { + "epoch": 4.011993408203125e-05, + "step": 26293, + "training_step_time": 0.18892145156860352 + }, + { + "epoch": 4.01214599609375e-05, + "model_forward_time": 0.024883270263671875, + "step": 26294 + }, + { + "epoch": 4.01214599609375e-05, + "step": 26294, + "training_step_time": 0.15984296798706055 + }, + { + "epoch": 4.012298583984375e-05, + "model_forward_time": 0.02430868148803711, + "step": 26295 + }, + { + "epoch": 4.012298583984375e-05, + "step": 26295, + "training_step_time": 0.18121623992919922 + }, + { + "epoch": 4.012451171875e-05, + "model_forward_time": 0.02453923225402832, + "step": 26296 + }, + { + "epoch": 4.012451171875e-05, + "step": 26296, + "training_step_time": 0.149245023727417 + }, + { + "epoch": 4.012603759765625e-05, + "model_forward_time": 0.02492809295654297, + "step": 26297 + }, + { + "epoch": 4.012603759765625e-05, + "step": 26297, + "training_step_time": 0.16715240478515625 + }, + { + "epoch": 4.01275634765625e-05, + "model_forward_time": 0.025471210479736328, + "step": 26298 + }, + { + "epoch": 4.01275634765625e-05, + "step": 26298, + "training_step_time": 0.12182927131652832 + }, + { + "epoch": 4.012908935546875e-05, + "model_forward_time": 0.02447342872619629, + "step": 26299 + }, + { + "epoch": 4.012908935546875e-05, + "step": 26299, + "training_step_time": 0.12989473342895508 + }, + { + "epoch": 4.0130615234375e-05, + "grad_norm": 0.09044896066188812, + "learning_rate": 4.101331151923649e-06, + "loss": 0.0091, + "step": 26300 + }, + { + "epoch": 4.0130615234375e-05, + "model_forward_time": 0.025290727615356445, + "step": 26300 + }, + { + "epoch": 4.0130615234375e-05, + "step": 26300, + "training_step_time": 0.11214041709899902 + }, + { + "epoch": 4.013214111328125e-05, + "model_forward_time": 0.025206804275512695, + "step": 26301 + }, + { + "epoch": 4.013214111328125e-05, + "step": 26301, + "training_step_time": 0.10429215431213379 + }, + { + "epoch": 4.01336669921875e-05, + "model_forward_time": 0.026093721389770508, + "step": 26302 + }, + { + "epoch": 4.01336669921875e-05, + "step": 26302, + "training_step_time": 0.10824108123779297 + }, + { + "epoch": 4.013519287109375e-05, + "model_forward_time": 0.02540755271911621, + "step": 26303 + }, + { + "epoch": 4.013519287109375e-05, + "step": 26303, + "training_step_time": 0.10756969451904297 + }, + { + "epoch": 4.013671875e-05, + "model_forward_time": 0.025562763214111328, + "step": 26304 + }, + { + "epoch": 4.013671875e-05, + "step": 26304, + "training_step_time": 0.12741827964782715 + }, + { + "epoch": 4.013824462890625e-05, + "model_forward_time": 0.026329755783081055, + "step": 26305 + }, + { + "epoch": 4.013824462890625e-05, + "step": 26305, + "training_step_time": 0.17176508903503418 + }, + { + "epoch": 4.01397705078125e-05, + "model_forward_time": 0.024481773376464844, + "step": 26306 + }, + { + "epoch": 4.01397705078125e-05, + "step": 26306, + "training_step_time": 0.16790342330932617 + }, + { + "epoch": 4.014129638671875e-05, + "model_forward_time": 0.02565908432006836, + "step": 26307 + }, + { + "epoch": 4.014129638671875e-05, + "step": 26307, + "training_step_time": 0.1396794319152832 + }, + { + "epoch": 4.0142822265625e-05, + "model_forward_time": 0.0247647762298584, + "step": 26308 + }, + { + "epoch": 4.0142822265625e-05, + "step": 26308, + "training_step_time": 0.1678149700164795 + }, + { + "epoch": 4.014434814453125e-05, + "model_forward_time": 0.026042699813842773, + "step": 26309 + }, + { + "epoch": 4.014434814453125e-05, + "step": 26309, + "training_step_time": 0.14809608459472656 + }, + { + "epoch": 4.01458740234375e-05, + "grad_norm": 0.1574499011039734, + "learning_rate": 4.079497855898501e-06, + "loss": 0.014, + "step": 26310 + }, + { + "epoch": 4.01458740234375e-05, + "model_forward_time": 0.025024890899658203, + "step": 26310 + }, + { + "epoch": 4.01458740234375e-05, + "step": 26310, + "training_step_time": 0.12175464630126953 + }, + { + "epoch": 4.014739990234375e-05, + "model_forward_time": 0.024291038513183594, + "step": 26311 + }, + { + "epoch": 4.014739990234375e-05, + "step": 26311, + "training_step_time": 0.18211960792541504 + }, + { + "epoch": 4.014892578125e-05, + "model_forward_time": 0.025204896926879883, + "step": 26312 + }, + { + "epoch": 4.014892578125e-05, + "step": 26312, + "training_step_time": 0.11302924156188965 + }, + { + "epoch": 4.015045166015625e-05, + "model_forward_time": 0.024596452713012695, + "step": 26313 + }, + { + "epoch": 4.015045166015625e-05, + "step": 26313, + "training_step_time": 0.11298274993896484 + }, + { + "epoch": 4.01519775390625e-05, + "model_forward_time": 0.025521278381347656, + "step": 26314 + }, + { + "epoch": 4.01519775390625e-05, + "step": 26314, + "training_step_time": 0.11246871948242188 + }, + { + "epoch": 4.015350341796875e-05, + "model_forward_time": 0.024985074996948242, + "step": 26315 + }, + { + "epoch": 4.015350341796875e-05, + "step": 26315, + "training_step_time": 0.11112833023071289 + }, + { + "epoch": 4.0155029296875e-05, + "model_forward_time": 0.025078535079956055, + "step": 26316 + }, + { + "epoch": 4.0155029296875e-05, + "step": 26316, + "training_step_time": 0.1080927848815918 + }, + { + "epoch": 4.015655517578125e-05, + "model_forward_time": 0.025440692901611328, + "step": 26317 + }, + { + "epoch": 4.015655517578125e-05, + "step": 26317, + "training_step_time": 0.10802435874938965 + }, + { + "epoch": 4.01580810546875e-05, + "model_forward_time": 0.02532958984375, + "step": 26318 + }, + { + "epoch": 4.01580810546875e-05, + "step": 26318, + "training_step_time": 0.11025214195251465 + }, + { + "epoch": 4.015960693359375e-05, + "model_forward_time": 0.025005817413330078, + "step": 26319 + }, + { + "epoch": 4.015960693359375e-05, + "step": 26319, + "training_step_time": 0.11236977577209473 + }, + { + "epoch": 4.01611328125e-05, + "grad_norm": 0.19923123717308044, + "learning_rate": 4.057720357613482e-06, + "loss": 0.0043, + "step": 26320 + }, + { + "epoch": 4.01611328125e-05, + "model_forward_time": 0.025292634963989258, + "step": 26320 + }, + { + "epoch": 4.01611328125e-05, + "step": 26320, + "training_step_time": 0.11114263534545898 + }, + { + "epoch": 4.016265869140625e-05, + "model_forward_time": 0.02508997917175293, + "step": 26321 + }, + { + "epoch": 4.016265869140625e-05, + "step": 26321, + "training_step_time": 0.10986208915710449 + }, + { + "epoch": 4.01641845703125e-05, + "model_forward_time": 0.025566816329956055, + "step": 26322 + }, + { + "epoch": 4.01641845703125e-05, + "step": 26322, + "training_step_time": 0.10797309875488281 + }, + { + "epoch": 4.016571044921875e-05, + "model_forward_time": 0.02617669105529785, + "step": 26323 + }, + { + "epoch": 4.016571044921875e-05, + "step": 26323, + "training_step_time": 0.11313390731811523 + }, + { + "epoch": 4.0167236328125e-05, + "model_forward_time": 0.025249481201171875, + "step": 26324 + }, + { + "epoch": 4.0167236328125e-05, + "step": 26324, + "training_step_time": 0.1083984375 + }, + { + "epoch": 4.016876220703125e-05, + "model_forward_time": 0.025038719177246094, + "step": 26325 + }, + { + "epoch": 4.016876220703125e-05, + "step": 26325, + "training_step_time": 0.10836148262023926 + }, + { + "epoch": 4.01702880859375e-05, + "model_forward_time": 0.02548050880432129, + "step": 26326 + }, + { + "epoch": 4.01702880859375e-05, + "step": 26326, + "training_step_time": 0.10759091377258301 + }, + { + "epoch": 4.017181396484375e-05, + "model_forward_time": 0.024790048599243164, + "step": 26327 + }, + { + "epoch": 4.017181396484375e-05, + "step": 26327, + "training_step_time": 0.11237931251525879 + }, + { + "epoch": 4.017333984375e-05, + "model_forward_time": 0.025119781494140625, + "step": 26328 + }, + { + "epoch": 4.017333984375e-05, + "step": 26328, + "training_step_time": 0.14493417739868164 + }, + { + "epoch": 4.017486572265625e-05, + "model_forward_time": 0.025468111038208008, + "step": 26329 + }, + { + "epoch": 4.017486572265625e-05, + "step": 26329, + "training_step_time": 0.10816597938537598 + }, + { + "epoch": 4.01763916015625e-05, + "grad_norm": 0.0620352141559124, + "learning_rate": 4.03599868353029e-06, + "loss": 0.0066, + "step": 26330 + }, + { + "epoch": 4.01763916015625e-05, + "model_forward_time": 0.02503514289855957, + "step": 26330 + }, + { + "epoch": 4.01763916015625e-05, + "step": 26330, + "training_step_time": 0.11126065254211426 + }, + { + "epoch": 4.017791748046875e-05, + "model_forward_time": 0.025090932846069336, + "step": 26331 + }, + { + "epoch": 4.017791748046875e-05, + "step": 26331, + "training_step_time": 0.11881351470947266 + }, + { + "epoch": 4.0179443359375e-05, + "model_forward_time": 0.02516317367553711, + "step": 26332 + }, + { + "epoch": 4.0179443359375e-05, + "step": 26332, + "training_step_time": 0.10435605049133301 + }, + { + "epoch": 4.018096923828125e-05, + "model_forward_time": 0.02465367317199707, + "step": 26333 + }, + { + "epoch": 4.018096923828125e-05, + "step": 26333, + "training_step_time": 0.1433720588684082 + }, + { + "epoch": 4.01824951171875e-05, + "model_forward_time": 0.02499556541442871, + "step": 26334 + }, + { + "epoch": 4.01824951171875e-05, + "step": 26334, + "training_step_time": 0.14278602600097656 + }, + { + "epoch": 4.018402099609375e-05, + "model_forward_time": 0.025307416915893555, + "step": 26335 + }, + { + "epoch": 4.018402099609375e-05, + "step": 26335, + "training_step_time": 0.18710780143737793 + }, + { + "epoch": 4.0185546875e-05, + "model_forward_time": 0.02431321144104004, + "step": 26336 + }, + { + "epoch": 4.0185546875e-05, + "step": 26336, + "training_step_time": 0.1337270736694336 + }, + { + "epoch": 4.018707275390625e-05, + "model_forward_time": 0.02416229248046875, + "step": 26337 + }, + { + "epoch": 4.018707275390625e-05, + "step": 26337, + "training_step_time": 0.16265416145324707 + }, + { + "epoch": 4.01885986328125e-05, + "model_forward_time": 0.024775981903076172, + "step": 26338 + }, + { + "epoch": 4.01885986328125e-05, + "step": 26338, + "training_step_time": 0.15987110137939453 + }, + { + "epoch": 4.019012451171875e-05, + "model_forward_time": 0.02425408363342285, + "step": 26339 + }, + { + "epoch": 4.019012451171875e-05, + "step": 26339, + "training_step_time": 0.12137556076049805 + }, + { + "epoch": 4.0191650390625e-05, + "grad_norm": 0.25910672545433044, + "learning_rate": 4.01433286004283e-06, + "loss": 0.0048, + "step": 26340 + }, + { + "epoch": 4.0191650390625e-05, + "model_forward_time": 0.024231672286987305, + "step": 26340 + }, + { + "epoch": 4.0191650390625e-05, + "step": 26340, + "training_step_time": 0.21265602111816406 + }, + { + "epoch": 4.019317626953125e-05, + "model_forward_time": 0.024422645568847656, + "step": 26341 + }, + { + "epoch": 4.019317626953125e-05, + "step": 26341, + "training_step_time": 0.11136794090270996 + }, + { + "epoch": 4.01947021484375e-05, + "model_forward_time": 0.024634122848510742, + "step": 26342 + }, + { + "epoch": 4.01947021484375e-05, + "step": 26342, + "training_step_time": 0.17721796035766602 + }, + { + "epoch": 4.019622802734375e-05, + "model_forward_time": 0.02380228042602539, + "step": 26343 + }, + { + "epoch": 4.019622802734375e-05, + "step": 26343, + "training_step_time": 0.20404767990112305 + }, + { + "epoch": 4.019775390625e-05, + "model_forward_time": 0.024129152297973633, + "step": 26344 + }, + { + "epoch": 4.019775390625e-05, + "step": 26344, + "training_step_time": 0.18899965286254883 + }, + { + "epoch": 4.019927978515625e-05, + "model_forward_time": 0.02433466911315918, + "step": 26345 + }, + { + "epoch": 4.019927978515625e-05, + "step": 26345, + "training_step_time": 0.17934775352478027 + }, + { + "epoch": 4.02008056640625e-05, + "model_forward_time": 0.024580717086791992, + "step": 26346 + }, + { + "epoch": 4.02008056640625e-05, + "step": 26346, + "training_step_time": 0.1727914810180664 + }, + { + "epoch": 4.020233154296875e-05, + "model_forward_time": 0.024190902709960938, + "step": 26347 + }, + { + "epoch": 4.020233154296875e-05, + "step": 26347, + "training_step_time": 0.10953640937805176 + }, + { + "epoch": 4.0203857421875e-05, + "model_forward_time": 0.024502992630004883, + "step": 26348 + }, + { + "epoch": 4.0203857421875e-05, + "step": 26348, + "training_step_time": 0.1653285026550293 + }, + { + "epoch": 4.020538330078125e-05, + "model_forward_time": 0.024463415145874023, + "step": 26349 + }, + { + "epoch": 4.020538330078125e-05, + "step": 26349, + "training_step_time": 0.1340937614440918 + }, + { + "epoch": 4.02069091796875e-05, + "grad_norm": 0.06537821143865585, + "learning_rate": 3.9927229134771035e-06, + "loss": 0.0036, + "step": 26350 + }, + { + "epoch": 4.02069091796875e-05, + "model_forward_time": 0.024655818939208984, + "step": 26350 + }, + { + "epoch": 4.02069091796875e-05, + "step": 26350, + "training_step_time": 0.10665559768676758 + }, + { + "epoch": 4.020843505859375e-05, + "model_forward_time": 0.025043249130249023, + "step": 26351 + }, + { + "epoch": 4.020843505859375e-05, + "step": 26351, + "training_step_time": 0.10814428329467773 + }, + { + "epoch": 4.02099609375e-05, + "model_forward_time": 0.02535414695739746, + "step": 26352 + }, + { + "epoch": 4.02099609375e-05, + "step": 26352, + "training_step_time": 0.10964179039001465 + }, + { + "epoch": 4.021148681640625e-05, + "model_forward_time": 0.02823662757873535, + "step": 26353 + }, + { + "epoch": 4.021148681640625e-05, + "step": 26353, + "training_step_time": 0.10802412033081055 + }, + { + "epoch": 4.02130126953125e-05, + "model_forward_time": 0.024797439575195312, + "step": 26354 + }, + { + "epoch": 4.02130126953125e-05, + "step": 26354, + "training_step_time": 0.20056724548339844 + }, + { + "epoch": 4.021453857421875e-05, + "model_forward_time": 0.024448394775390625, + "step": 26355 + }, + { + "epoch": 4.021453857421875e-05, + "step": 26355, + "training_step_time": 0.10429954528808594 + }, + { + "epoch": 4.0216064453125e-05, + "model_forward_time": 0.024451494216918945, + "step": 26356 + }, + { + "epoch": 4.0216064453125e-05, + "step": 26356, + "training_step_time": 0.10266828536987305 + }, + { + "epoch": 4.021759033203125e-05, + "model_forward_time": 0.025339841842651367, + "step": 26357 + }, + { + "epoch": 4.021759033203125e-05, + "step": 26357, + "training_step_time": 0.10928606986999512 + }, + { + "epoch": 4.02191162109375e-05, + "model_forward_time": 0.025394439697265625, + "step": 26358 + }, + { + "epoch": 4.02191162109375e-05, + "step": 26358, + "training_step_time": 0.10451841354370117 + }, + { + "epoch": 4.022064208984375e-05, + "model_forward_time": 0.024864912033081055, + "step": 26359 + }, + { + "epoch": 4.022064208984375e-05, + "step": 26359, + "training_step_time": 0.10608720779418945 + }, + { + "epoch": 4.022216796875e-05, + "grad_norm": 0.05001050978899002, + "learning_rate": 3.971168870091247e-06, + "loss": 0.0076, + "step": 26360 + }, + { + "epoch": 4.022216796875e-05, + "model_forward_time": 0.024997472763061523, + "step": 26360 + }, + { + "epoch": 4.022216796875e-05, + "step": 26360, + "training_step_time": 0.10564303398132324 + }, + { + "epoch": 4.022369384765625e-05, + "model_forward_time": 0.026213407516479492, + "step": 26361 + }, + { + "epoch": 4.022369384765625e-05, + "step": 26361, + "training_step_time": 0.10607624053955078 + }, + { + "epoch": 4.02252197265625e-05, + "model_forward_time": 0.02534341812133789, + "step": 26362 + }, + { + "epoch": 4.02252197265625e-05, + "step": 26362, + "training_step_time": 0.1103672981262207 + }, + { + "epoch": 4.022674560546875e-05, + "model_forward_time": 0.025134801864624023, + "step": 26363 + }, + { + "epoch": 4.022674560546875e-05, + "step": 26363, + "training_step_time": 0.10824394226074219 + }, + { + "epoch": 4.0228271484375e-05, + "model_forward_time": 0.025542020797729492, + "step": 26364 + }, + { + "epoch": 4.0228271484375e-05, + "step": 26364, + "training_step_time": 0.10752582550048828 + }, + { + "epoch": 4.022979736328125e-05, + "model_forward_time": 0.026169776916503906, + "step": 26365 + }, + { + "epoch": 4.022979736328125e-05, + "step": 26365, + "training_step_time": 0.10811948776245117 + }, + { + "epoch": 4.02313232421875e-05, + "model_forward_time": 0.025082826614379883, + "step": 26366 + }, + { + "epoch": 4.02313232421875e-05, + "step": 26366, + "training_step_time": 0.10401391983032227 + }, + { + "epoch": 4.023284912109375e-05, + "model_forward_time": 0.025108814239501953, + "step": 26367 + }, + { + "epoch": 4.023284912109375e-05, + "step": 26367, + "training_step_time": 0.10463786125183105 + }, + { + "epoch": 4.0234375e-05, + "model_forward_time": 0.025428056716918945, + "step": 26368 + }, + { + "epoch": 4.0234375e-05, + "step": 26368, + "training_step_time": 0.10335922241210938 + }, + { + "epoch": 4.023590087890625e-05, + "model_forward_time": 0.02483844757080078, + "step": 26369 + }, + { + "epoch": 4.023590087890625e-05, + "step": 26369, + "training_step_time": 0.10768008232116699 + }, + { + "epoch": 4.02374267578125e-05, + "grad_norm": 0.08122275024652481, + "learning_rate": 3.949670756075447e-06, + "loss": 0.0033, + "step": 26370 + }, + { + "epoch": 4.02374267578125e-05, + "model_forward_time": 0.025269746780395508, + "step": 26370 + }, + { + "epoch": 4.02374267578125e-05, + "step": 26370, + "training_step_time": 0.11851239204406738 + }, + { + "epoch": 4.023895263671875e-05, + "model_forward_time": 0.025038719177246094, + "step": 26371 + }, + { + "epoch": 4.023895263671875e-05, + "step": 26371, + "training_step_time": 0.13715100288391113 + }, + { + "epoch": 4.0240478515625e-05, + "model_forward_time": 0.02512812614440918, + "step": 26372 + }, + { + "epoch": 4.0240478515625e-05, + "step": 26372, + "training_step_time": 0.12050580978393555 + }, + { + "epoch": 4.024200439453125e-05, + "model_forward_time": 0.025053024291992188, + "step": 26373 + }, + { + "epoch": 4.024200439453125e-05, + "step": 26373, + "training_step_time": 0.2238011360168457 + }, + { + "epoch": 4.02435302734375e-05, + "model_forward_time": 0.024322509765625, + "step": 26374 + }, + { + "epoch": 4.02435302734375e-05, + "step": 26374, + "training_step_time": 0.1360483169555664 + }, + { + "epoch": 4.024505615234375e-05, + "model_forward_time": 0.02450728416442871, + "step": 26375 + }, + { + "epoch": 4.024505615234375e-05, + "step": 26375, + "training_step_time": 0.11437273025512695 + }, + { + "epoch": 4.024658203125e-05, + "model_forward_time": 0.02482008934020996, + "step": 26376 + }, + { + "epoch": 4.024658203125e-05, + "step": 26376, + "training_step_time": 0.18973517417907715 + }, + { + "epoch": 4.024810791015625e-05, + "model_forward_time": 0.024373769760131836, + "step": 26377 + }, + { + "epoch": 4.024810791015625e-05, + "step": 26377, + "training_step_time": 0.1875293254852295 + }, + { + "epoch": 4.02496337890625e-05, + "model_forward_time": 0.024494647979736328, + "step": 26378 + }, + { + "epoch": 4.02496337890625e-05, + "step": 26378, + "training_step_time": 0.17230749130249023 + }, + { + "epoch": 4.025115966796875e-05, + "model_forward_time": 0.024723529815673828, + "step": 26379 + }, + { + "epoch": 4.025115966796875e-05, + "step": 26379, + "training_step_time": 0.1215677261352539 + }, + { + "epoch": 4.0252685546875e-05, + "grad_norm": 0.10025465488433838, + "learning_rate": 3.928228597551947e-06, + "loss": 0.0044, + "step": 26380 + }, + { + "epoch": 4.0252685546875e-05, + "model_forward_time": 0.024968862533569336, + "step": 26380 + }, + { + "epoch": 4.0252685546875e-05, + "step": 26380, + "training_step_time": 0.1457688808441162 + }, + { + "epoch": 4.025421142578125e-05, + "model_forward_time": 0.02488112449645996, + "step": 26381 + }, + { + "epoch": 4.025421142578125e-05, + "step": 26381, + "training_step_time": 0.12195348739624023 + }, + { + "epoch": 4.02557373046875e-05, + "model_forward_time": 0.024956226348876953, + "step": 26382 + }, + { + "epoch": 4.02557373046875e-05, + "step": 26382, + "training_step_time": 0.21177315711975098 + }, + { + "epoch": 4.025726318359375e-05, + "model_forward_time": 0.023922443389892578, + "step": 26383 + }, + { + "epoch": 4.025726318359375e-05, + "step": 26383, + "training_step_time": 0.18485093116760254 + }, + { + "epoch": 4.02587890625e-05, + "model_forward_time": 0.024397850036621094, + "step": 26384 + }, + { + "epoch": 4.02587890625e-05, + "step": 26384, + "training_step_time": 0.12829279899597168 + }, + { + "epoch": 4.026031494140625e-05, + "model_forward_time": 0.02417445182800293, + "step": 26385 + }, + { + "epoch": 4.026031494140625e-05, + "step": 26385, + "training_step_time": 0.1228783130645752 + }, + { + "epoch": 4.02618408203125e-05, + "model_forward_time": 0.025017261505126953, + "step": 26386 + }, + { + "epoch": 4.02618408203125e-05, + "step": 26386, + "training_step_time": 0.10435962677001953 + }, + { + "epoch": 4.026336669921875e-05, + "model_forward_time": 0.025168180465698242, + "step": 26387 + }, + { + "epoch": 4.026336669921875e-05, + "step": 26387, + "training_step_time": 0.10535383224487305 + }, + { + "epoch": 4.0264892578125e-05, + "model_forward_time": 0.025782108306884766, + "step": 26388 + }, + { + "epoch": 4.0264892578125e-05, + "step": 26388, + "training_step_time": 0.1072545051574707 + }, + { + "epoch": 4.026641845703125e-05, + "model_forward_time": 0.026050090789794922, + "step": 26389 + }, + { + "epoch": 4.026641845703125e-05, + "step": 26389, + "training_step_time": 0.10728645324707031 + }, + { + "epoch": 4.02679443359375e-05, + "grad_norm": 0.08983786404132843, + "learning_rate": 3.90684242057498e-06, + "loss": 0.0084, + "step": 26390 + }, + { + "epoch": 4.02679443359375e-05, + "model_forward_time": 0.024805068969726562, + "step": 26390 + }, + { + "epoch": 4.02679443359375e-05, + "step": 26390, + "training_step_time": 0.10869193077087402 + }, + { + "epoch": 4.026947021484375e-05, + "model_forward_time": 0.02477717399597168, + "step": 26391 + }, + { + "epoch": 4.026947021484375e-05, + "step": 26391, + "training_step_time": 0.1058351993560791 + }, + { + "epoch": 4.027099609375e-05, + "model_forward_time": 0.025067567825317383, + "step": 26392 + }, + { + "epoch": 4.027099609375e-05, + "step": 26392, + "training_step_time": 0.11029458045959473 + }, + { + "epoch": 4.027252197265625e-05, + "model_forward_time": 0.02496480941772461, + "step": 26393 + }, + { + "epoch": 4.027252197265625e-05, + "step": 26393, + "training_step_time": 0.11024212837219238 + }, + { + "epoch": 4.02740478515625e-05, + "model_forward_time": 0.024397850036621094, + "step": 26394 + }, + { + "epoch": 4.02740478515625e-05, + "step": 26394, + "training_step_time": 0.1381516456604004 + }, + { + "epoch": 4.027557373046875e-05, + "model_forward_time": 0.02532196044921875, + "step": 26395 + }, + { + "epoch": 4.027557373046875e-05, + "step": 26395, + "training_step_time": 0.13750410079956055 + }, + { + "epoch": 4.0277099609375e-05, + "model_forward_time": 0.02460765838623047, + "step": 26396 + }, + { + "epoch": 4.0277099609375e-05, + "step": 26396, + "training_step_time": 0.11692500114440918 + }, + { + "epoch": 4.027862548828125e-05, + "model_forward_time": 0.02514791488647461, + "step": 26397 + }, + { + "epoch": 4.027862548828125e-05, + "step": 26397, + "training_step_time": 0.12416553497314453 + }, + { + "epoch": 4.02801513671875e-05, + "model_forward_time": 0.024776220321655273, + "step": 26398 + }, + { + "epoch": 4.02801513671875e-05, + "step": 26398, + "training_step_time": 0.10987281799316406 + }, + { + "epoch": 4.028167724609375e-05, + "model_forward_time": 0.024753808975219727, + "step": 26399 + }, + { + "epoch": 4.028167724609375e-05, + "step": 26399, + "training_step_time": 0.10681009292602539 + }, + { + "epoch": 4.0283203125e-05, + "grad_norm": 0.1022719219326973, + "learning_rate": 3.885512251130763e-06, + "loss": 0.0051, + "step": 26400 + }, + { + "epoch": 4.0283203125e-05, + "model_forward_time": 0.024988412857055664, + "step": 26400 + }, + { + "epoch": 4.0283203125e-05, + "step": 26400, + "training_step_time": 0.1995549201965332 + }, + { + "epoch": 4.028472900390625e-05, + "model_forward_time": 0.02407240867614746, + "step": 26401 + }, + { + "epoch": 4.028472900390625e-05, + "step": 26401, + "training_step_time": 0.1286015510559082 + }, + { + "epoch": 4.02862548828125e-05, + "model_forward_time": 0.023145198822021484, + "step": 26402 + }, + { + "epoch": 4.02862548828125e-05, + "step": 26402, + "training_step_time": 0.1265878677368164 + }, + { + "epoch": 4.028778076171875e-05, + "model_forward_time": 0.02324390411376953, + "step": 26403 + }, + { + "epoch": 4.028778076171875e-05, + "step": 26403, + "training_step_time": 0.12350606918334961 + }, + { + "epoch": 4.0289306640625e-05, + "model_forward_time": 0.02460479736328125, + "step": 26404 + }, + { + "epoch": 4.0289306640625e-05, + "step": 26404, + "training_step_time": 0.11295199394226074 + }, + { + "epoch": 4.029083251953125e-05, + "model_forward_time": 0.02408003807067871, + "step": 26405 + }, + { + "epoch": 4.029083251953125e-05, + "step": 26405, + "training_step_time": 0.115081787109375 + }, + { + "epoch": 4.02923583984375e-05, + "model_forward_time": 0.024715423583984375, + "step": 26406 + }, + { + "epoch": 4.02923583984375e-05, + "step": 26406, + "training_step_time": 0.11436986923217773 + }, + { + "epoch": 4.029388427734375e-05, + "model_forward_time": 0.024031639099121094, + "step": 26407 + }, + { + "epoch": 4.029388427734375e-05, + "step": 26407, + "training_step_time": 0.6010837554931641 + }, + { + "epoch": 4.029541015625e-05, + "model_forward_time": 0.022745609283447266, + "step": 26408 + }, + { + "epoch": 4.029541015625e-05, + "step": 26408, + "training_step_time": 0.10480260848999023 + }, + { + "epoch": 4.029693603515625e-05, + "model_forward_time": 0.024315357208251953, + "step": 26409 + }, + { + "epoch": 4.029693603515625e-05, + "step": 26409, + "training_step_time": 0.1067349910736084 + }, + { + "epoch": 4.02984619140625e-05, + "grad_norm": 0.07938051968812943, + "learning_rate": 3.864238115137481e-06, + "loss": 0.0034, + "step": 26410 + }, + { + "epoch": 4.02984619140625e-05, + "model_forward_time": 0.025067806243896484, + "step": 26410 + }, + { + "epoch": 4.02984619140625e-05, + "step": 26410, + "training_step_time": 0.11238455772399902 + }, + { + "epoch": 4.029998779296875e-05, + "model_forward_time": 0.02537989616394043, + "step": 26411 + }, + { + "epoch": 4.029998779296875e-05, + "step": 26411, + "training_step_time": 0.17967605590820312 + }, + { + "epoch": 4.0301513671875e-05, + "model_forward_time": 0.024684906005859375, + "step": 26412 + }, + { + "epoch": 4.0301513671875e-05, + "step": 26412, + "training_step_time": 0.10774850845336914 + }, + { + "epoch": 4.030303955078125e-05, + "model_forward_time": 0.024586200714111328, + "step": 26413 + }, + { + "epoch": 4.030303955078125e-05, + "step": 26413, + "training_step_time": 0.11280250549316406 + }, + { + "epoch": 4.03045654296875e-05, + "model_forward_time": 0.025382280349731445, + "step": 26414 + }, + { + "epoch": 4.03045654296875e-05, + "step": 26414, + "training_step_time": 0.13556909561157227 + }, + { + "epoch": 4.030609130859375e-05, + "model_forward_time": 0.028500795364379883, + "step": 26415 + }, + { + "epoch": 4.030609130859375e-05, + "step": 26415, + "training_step_time": 0.10759377479553223 + }, + { + "epoch": 4.03076171875e-05, + "model_forward_time": 0.02465653419494629, + "step": 26416 + }, + { + "epoch": 4.03076171875e-05, + "step": 26416, + "training_step_time": 0.1723332405090332 + }, + { + "epoch": 4.030914306640625e-05, + "model_forward_time": 0.024422883987426758, + "step": 26417 + }, + { + "epoch": 4.030914306640625e-05, + "step": 26417, + "training_step_time": 0.145768404006958 + }, + { + "epoch": 4.03106689453125e-05, + "model_forward_time": 0.025044679641723633, + "step": 26418 + }, + { + "epoch": 4.03106689453125e-05, + "step": 26418, + "training_step_time": 0.10959315299987793 + }, + { + "epoch": 4.031219482421875e-05, + "model_forward_time": 0.024940013885498047, + "step": 26419 + }, + { + "epoch": 4.031219482421875e-05, + "step": 26419, + "training_step_time": 0.10508084297180176 + }, + { + "epoch": 4.0313720703125e-05, + "grad_norm": 0.08243121206760406, + "learning_rate": 3.843020038445211e-06, + "loss": 0.0028, + "step": 26420 + }, + { + "epoch": 4.0313720703125e-05, + "model_forward_time": 0.0259702205657959, + "step": 26420 + }, + { + "epoch": 4.0313720703125e-05, + "step": 26420, + "training_step_time": 0.11414027214050293 + }, + { + "epoch": 4.031524658203125e-05, + "model_forward_time": 0.026123046875, + "step": 26421 + }, + { + "epoch": 4.031524658203125e-05, + "step": 26421, + "training_step_time": 0.18726468086242676 + }, + { + "epoch": 4.03167724609375e-05, + "model_forward_time": 0.02807331085205078, + "step": 26422 + }, + { + "epoch": 4.03167724609375e-05, + "step": 26422, + "training_step_time": 0.11030387878417969 + }, + { + "epoch": 4.031829833984375e-05, + "model_forward_time": 0.024532079696655273, + "step": 26423 + }, + { + "epoch": 4.031829833984375e-05, + "step": 26423, + "training_step_time": 0.10832548141479492 + }, + { + "epoch": 4.031982421875e-05, + "model_forward_time": 0.025362253189086914, + "step": 26424 + }, + { + "epoch": 4.031982421875e-05, + "step": 26424, + "training_step_time": 0.15369796752929688 + }, + { + "epoch": 4.032135009765625e-05, + "model_forward_time": 0.024271726608276367, + "step": 26425 + }, + { + "epoch": 4.032135009765625e-05, + "step": 26425, + "training_step_time": 0.10685300827026367 + }, + { + "epoch": 4.03228759765625e-05, + "model_forward_time": 0.02485799789428711, + "step": 26426 + }, + { + "epoch": 4.03228759765625e-05, + "step": 26426, + "training_step_time": 0.1215517520904541 + }, + { + "epoch": 4.032440185546875e-05, + "model_forward_time": 0.02566695213317871, + "step": 26427 + }, + { + "epoch": 4.032440185546875e-05, + "step": 26427, + "training_step_time": 0.11046886444091797 + }, + { + "epoch": 4.0325927734375e-05, + "model_forward_time": 0.026460886001586914, + "step": 26428 + }, + { + "epoch": 4.0325927734375e-05, + "step": 26428, + "training_step_time": 0.11052560806274414 + }, + { + "epoch": 4.032745361328125e-05, + "model_forward_time": 0.0252227783203125, + "step": 26429 + }, + { + "epoch": 4.032745361328125e-05, + "step": 26429, + "training_step_time": 0.10758137702941895 + }, + { + "epoch": 4.03289794921875e-05, + "grad_norm": 0.06858857721090317, + "learning_rate": 3.821858046835913e-06, + "loss": 0.0059, + "step": 26430 + }, + { + "epoch": 4.03289794921875e-05, + "model_forward_time": 0.02806711196899414, + "step": 26430 + }, + { + "epoch": 4.03289794921875e-05, + "step": 26430, + "training_step_time": 0.10823345184326172 + }, + { + "epoch": 4.033050537109375e-05, + "model_forward_time": 0.025559186935424805, + "step": 26431 + }, + { + "epoch": 4.033050537109375e-05, + "step": 26431, + "training_step_time": 0.11278486251831055 + }, + { + "epoch": 4.033203125e-05, + "model_forward_time": 0.025243520736694336, + "step": 26432 + }, + { + "epoch": 4.033203125e-05, + "step": 26432, + "training_step_time": 0.10969781875610352 + }, + { + "epoch": 4.033355712890625e-05, + "model_forward_time": 0.025149106979370117, + "step": 26433 + }, + { + "epoch": 4.033355712890625e-05, + "step": 26433, + "training_step_time": 0.10625052452087402 + }, + { + "epoch": 4.03350830078125e-05, + "model_forward_time": 0.02505350112915039, + "step": 26434 + }, + { + "epoch": 4.03350830078125e-05, + "step": 26434, + "training_step_time": 0.10821533203125 + }, + { + "epoch": 4.033660888671875e-05, + "model_forward_time": 0.025459766387939453, + "step": 26435 + }, + { + "epoch": 4.033660888671875e-05, + "step": 26435, + "training_step_time": 0.12248015403747559 + }, + { + "epoch": 4.0338134765625e-05, + "model_forward_time": 0.025197744369506836, + "step": 26436 + }, + { + "epoch": 4.0338134765625e-05, + "step": 26436, + "training_step_time": 0.16705107688903809 + }, + { + "epoch": 4.033966064453125e-05, + "model_forward_time": 0.024364709854125977, + "step": 26437 + }, + { + "epoch": 4.033966064453125e-05, + "step": 26437, + "training_step_time": 0.13602328300476074 + }, + { + "epoch": 4.03411865234375e-05, + "model_forward_time": 0.024669885635375977, + "step": 26438 + }, + { + "epoch": 4.03411865234375e-05, + "step": 26438, + "training_step_time": 0.10366678237915039 + }, + { + "epoch": 4.034271240234375e-05, + "model_forward_time": 0.02506709098815918, + "step": 26439 + }, + { + "epoch": 4.034271240234375e-05, + "step": 26439, + "training_step_time": 0.11863088607788086 + }, + { + "epoch": 4.034423828125e-05, + "grad_norm": 0.07096494734287262, + "learning_rate": 3.8007521660234023e-06, + "loss": 0.006, + "step": 26440 + }, + { + "epoch": 4.034423828125e-05, + "model_forward_time": 0.025040626525878906, + "step": 26440 + }, + { + "epoch": 4.034423828125e-05, + "step": 26440, + "training_step_time": 0.10867071151733398 + }, + { + "epoch": 4.034576416015625e-05, + "model_forward_time": 0.025470495223999023, + "step": 26441 + }, + { + "epoch": 4.034576416015625e-05, + "step": 26441, + "training_step_time": 0.10976910591125488 + }, + { + "epoch": 4.03472900390625e-05, + "model_forward_time": 0.025163650512695312, + "step": 26442 + }, + { + "epoch": 4.03472900390625e-05, + "step": 26442, + "training_step_time": 0.19699859619140625 + }, + { + "epoch": 4.034881591796875e-05, + "model_forward_time": 0.02436351776123047, + "step": 26443 + }, + { + "epoch": 4.034881591796875e-05, + "step": 26443, + "training_step_time": 0.10846352577209473 + }, + { + "epoch": 4.0350341796875e-05, + "model_forward_time": 0.024471044540405273, + "step": 26444 + }, + { + "epoch": 4.0350341796875e-05, + "step": 26444, + "training_step_time": 0.1071784496307373 + }, + { + "epoch": 4.035186767578125e-05, + "model_forward_time": 0.025505781173706055, + "step": 26445 + }, + { + "epoch": 4.035186767578125e-05, + "step": 26445, + "training_step_time": 0.10872721672058105 + }, + { + "epoch": 4.03533935546875e-05, + "model_forward_time": 0.025043964385986328, + "step": 26446 + }, + { + "epoch": 4.03533935546875e-05, + "step": 26446, + "training_step_time": 0.1049811840057373 + }, + { + "epoch": 4.035491943359375e-05, + "model_forward_time": 0.02552008628845215, + "step": 26447 + }, + { + "epoch": 4.035491943359375e-05, + "step": 26447, + "training_step_time": 0.11053943634033203 + }, + { + "epoch": 4.03564453125e-05, + "model_forward_time": 0.025560617446899414, + "step": 26448 + }, + { + "epoch": 4.03564453125e-05, + "step": 26448, + "training_step_time": 0.12161636352539062 + }, + { + "epoch": 4.035797119140625e-05, + "model_forward_time": 0.025191545486450195, + "step": 26449 + }, + { + "epoch": 4.035797119140625e-05, + "step": 26449, + "training_step_time": 0.1221151351928711 + }, + { + "epoch": 4.03594970703125e-05, + "grad_norm": 0.08621404320001602, + "learning_rate": 3.7797024216533138e-06, + "loss": 0.0081, + "step": 26450 + }, + { + "epoch": 4.03594970703125e-05, + "model_forward_time": 0.02521681785583496, + "step": 26450 + }, + { + "epoch": 4.03594970703125e-05, + "step": 26450, + "training_step_time": 0.12389802932739258 + }, + { + "epoch": 4.036102294921875e-05, + "model_forward_time": 0.025003433227539062, + "step": 26451 + }, + { + "epoch": 4.036102294921875e-05, + "step": 26451, + "training_step_time": 0.11805891990661621 + }, + { + "epoch": 4.0362548828125e-05, + "model_forward_time": 0.025041580200195312, + "step": 26452 + }, + { + "epoch": 4.0362548828125e-05, + "step": 26452, + "training_step_time": 0.12157082557678223 + }, + { + "epoch": 4.036407470703125e-05, + "model_forward_time": 0.025294065475463867, + "step": 26453 + }, + { + "epoch": 4.036407470703125e-05, + "step": 26453, + "training_step_time": 0.12359094619750977 + }, + { + "epoch": 4.03656005859375e-05, + "model_forward_time": 0.02542257308959961, + "step": 26454 + }, + { + "epoch": 4.03656005859375e-05, + "step": 26454, + "training_step_time": 0.11923432350158691 + }, + { + "epoch": 4.036712646484375e-05, + "model_forward_time": 0.025609731674194336, + "step": 26455 + }, + { + "epoch": 4.036712646484375e-05, + "step": 26455, + "training_step_time": 0.11363005638122559 + }, + { + "epoch": 4.036865234375e-05, + "model_forward_time": 0.025257587432861328, + "step": 26456 + }, + { + "epoch": 4.036865234375e-05, + "step": 26456, + "training_step_time": 0.11240792274475098 + }, + { + "epoch": 4.037017822265625e-05, + "model_forward_time": 0.025002002716064453, + "step": 26457 + }, + { + "epoch": 4.037017822265625e-05, + "step": 26457, + "training_step_time": 0.1110074520111084 + }, + { + "epoch": 4.03717041015625e-05, + "model_forward_time": 0.025066375732421875, + "step": 26458 + }, + { + "epoch": 4.03717041015625e-05, + "step": 26458, + "training_step_time": 0.11243224143981934 + }, + { + "epoch": 4.037322998046875e-05, + "model_forward_time": 0.02495265007019043, + "step": 26459 + }, + { + "epoch": 4.037322998046875e-05, + "step": 26459, + "training_step_time": 0.11172652244567871 + }, + { + "epoch": 4.0374755859375e-05, + "grad_norm": 0.07990297675132751, + "learning_rate": 3.75870883930306e-06, + "loss": 0.0067, + "step": 26460 + }, + { + "epoch": 4.0374755859375e-05, + "model_forward_time": 0.025435447692871094, + "step": 26460 + }, + { + "epoch": 4.0374755859375e-05, + "step": 26460, + "training_step_time": 0.11028671264648438 + }, + { + "epoch": 4.037628173828125e-05, + "model_forward_time": 0.024938344955444336, + "step": 26461 + }, + { + "epoch": 4.037628173828125e-05, + "step": 26461, + "training_step_time": 0.10533738136291504 + }, + { + "epoch": 4.03778076171875e-05, + "model_forward_time": 0.024785518646240234, + "step": 26462 + }, + { + "epoch": 4.03778076171875e-05, + "step": 26462, + "training_step_time": 0.17598772048950195 + }, + { + "epoch": 4.037933349609375e-05, + "model_forward_time": 0.024675369262695312, + "step": 26463 + }, + { + "epoch": 4.037933349609375e-05, + "step": 26463, + "training_step_time": 0.1821300983428955 + }, + { + "epoch": 4.0380859375e-05, + "model_forward_time": 0.024676084518432617, + "step": 26464 + }, + { + "epoch": 4.0380859375e-05, + "step": 26464, + "training_step_time": 0.11289048194885254 + }, + { + "epoch": 4.038238525390625e-05, + "model_forward_time": 0.02447509765625, + "step": 26465 + }, + { + "epoch": 4.038238525390625e-05, + "step": 26465, + "training_step_time": 0.21402430534362793 + }, + { + "epoch": 4.03839111328125e-05, + "model_forward_time": 0.024451732635498047, + "step": 26466 + }, + { + "epoch": 4.03839111328125e-05, + "step": 26466, + "training_step_time": 0.160994291305542 + }, + { + "epoch": 4.038543701171875e-05, + "model_forward_time": 0.024286270141601562, + "step": 26467 + }, + { + "epoch": 4.038543701171875e-05, + "step": 26467, + "training_step_time": 0.1130833625793457 + }, + { + "epoch": 4.0386962890625e-05, + "model_forward_time": 0.02477288246154785, + "step": 26468 + }, + { + "epoch": 4.0386962890625e-05, + "step": 26468, + "training_step_time": 0.1181955337524414 + }, + { + "epoch": 4.038848876953125e-05, + "model_forward_time": 0.02525806427001953, + "step": 26469 + }, + { + "epoch": 4.038848876953125e-05, + "step": 26469, + "training_step_time": 0.12119412422180176 + }, + { + "epoch": 4.03900146484375e-05, + "grad_norm": 0.33543387055397034, + "learning_rate": 3.7377714444818468e-06, + "loss": 0.0048, + "step": 26470 + }, + { + "epoch": 4.03900146484375e-05, + "model_forward_time": 0.026371002197265625, + "step": 26470 + }, + { + "epoch": 4.03900146484375e-05, + "step": 26470, + "training_step_time": 0.156721830368042 + }, + { + "epoch": 4.039154052734375e-05, + "model_forward_time": 0.024292469024658203, + "step": 26471 + }, + { + "epoch": 4.039154052734375e-05, + "step": 26471, + "training_step_time": 0.16886067390441895 + }, + { + "epoch": 4.039306640625e-05, + "model_forward_time": 0.02477550506591797, + "step": 26472 + }, + { + "epoch": 4.039306640625e-05, + "step": 26472, + "training_step_time": 0.11124873161315918 + }, + { + "epoch": 4.039459228515625e-05, + "model_forward_time": 0.023722171783447266, + "step": 26473 + }, + { + "epoch": 4.039459228515625e-05, + "step": 26473, + "training_step_time": 0.12565827369689941 + }, + { + "epoch": 4.03961181640625e-05, + "model_forward_time": 0.024488449096679688, + "step": 26474 + }, + { + "epoch": 4.03961181640625e-05, + "step": 26474, + "training_step_time": 0.12562036514282227 + }, + { + "epoch": 4.039764404296875e-05, + "model_forward_time": 0.024092674255371094, + "step": 26475 + }, + { + "epoch": 4.039764404296875e-05, + "step": 26475, + "training_step_time": 0.12891721725463867 + }, + { + "epoch": 4.0399169921875e-05, + "model_forward_time": 0.024013042449951172, + "step": 26476 + }, + { + "epoch": 4.0399169921875e-05, + "step": 26476, + "training_step_time": 0.12344646453857422 + }, + { + "epoch": 4.040069580078125e-05, + "model_forward_time": 0.023740291595458984, + "step": 26477 + }, + { + "epoch": 4.040069580078125e-05, + "step": 26477, + "training_step_time": 0.11871886253356934 + }, + { + "epoch": 4.04022216796875e-05, + "model_forward_time": 0.02512645721435547, + "step": 26478 + }, + { + "epoch": 4.04022216796875e-05, + "step": 26478, + "training_step_time": 0.12000632286071777 + }, + { + "epoch": 4.040374755859375e-05, + "model_forward_time": 0.025534629821777344, + "step": 26479 + }, + { + "epoch": 4.040374755859375e-05, + "step": 26479, + "training_step_time": 0.1130666732788086 + }, + { + "epoch": 4.04052734375e-05, + "grad_norm": 0.07303806394338608, + "learning_rate": 3.7168902626305622e-06, + "loss": 0.0033, + "step": 26480 + }, + { + "epoch": 4.04052734375e-05, + "model_forward_time": 0.02535390853881836, + "step": 26480 + }, + { + "epoch": 4.04052734375e-05, + "step": 26480, + "training_step_time": 0.1931018829345703 + }, + { + "epoch": 4.040679931640625e-05, + "model_forward_time": 0.02527928352355957, + "step": 26481 + }, + { + "epoch": 4.040679931640625e-05, + "step": 26481, + "training_step_time": 0.13720417022705078 + }, + { + "epoch": 4.04083251953125e-05, + "model_forward_time": 0.02448129653930664, + "step": 26482 + }, + { + "epoch": 4.04083251953125e-05, + "step": 26482, + "training_step_time": 0.11026358604431152 + }, + { + "epoch": 4.040985107421875e-05, + "model_forward_time": 0.025110483169555664, + "step": 26483 + }, + { + "epoch": 4.040985107421875e-05, + "step": 26483, + "training_step_time": 0.10820269584655762 + }, + { + "epoch": 4.0411376953125e-05, + "model_forward_time": 0.025191068649291992, + "step": 26484 + }, + { + "epoch": 4.0411376953125e-05, + "step": 26484, + "training_step_time": 0.10896587371826172 + }, + { + "epoch": 4.041290283203125e-05, + "model_forward_time": 0.025431394577026367, + "step": 26485 + }, + { + "epoch": 4.041290283203125e-05, + "step": 26485, + "training_step_time": 0.10830354690551758 + }, + { + "epoch": 4.04144287109375e-05, + "model_forward_time": 0.02522444725036621, + "step": 26486 + }, + { + "epoch": 4.04144287109375e-05, + "step": 26486, + "training_step_time": 0.19467496871948242 + }, + { + "epoch": 4.041595458984375e-05, + "model_forward_time": 0.024538755416870117, + "step": 26487 + }, + { + "epoch": 4.041595458984375e-05, + "step": 26487, + "training_step_time": 0.10838747024536133 + }, + { + "epoch": 4.041748046875e-05, + "model_forward_time": 0.024657487869262695, + "step": 26488 + }, + { + "epoch": 4.041748046875e-05, + "step": 26488, + "training_step_time": 0.10874557495117188 + }, + { + "epoch": 4.041900634765625e-05, + "model_forward_time": 0.02512359619140625, + "step": 26489 + }, + { + "epoch": 4.041900634765625e-05, + "step": 26489, + "training_step_time": 0.10472989082336426 + }, + { + "epoch": 4.04205322265625e-05, + "grad_norm": 0.05251162871718407, + "learning_rate": 3.696065319121833e-06, + "loss": 0.0048, + "step": 26490 + }, + { + "epoch": 4.04205322265625e-05, + "model_forward_time": 0.025049209594726562, + "step": 26490 + }, + { + "epoch": 4.04205322265625e-05, + "step": 26490, + "training_step_time": 0.10488724708557129 + }, + { + "epoch": 4.042205810546875e-05, + "model_forward_time": 0.025122404098510742, + "step": 26491 + }, + { + "epoch": 4.042205810546875e-05, + "step": 26491, + "training_step_time": 0.10743975639343262 + }, + { + "epoch": 4.0423583984375e-05, + "model_forward_time": 0.02494072914123535, + "step": 26492 + }, + { + "epoch": 4.0423583984375e-05, + "step": 26492, + "training_step_time": 0.10522890090942383 + }, + { + "epoch": 4.042510986328125e-05, + "model_forward_time": 0.02551436424255371, + "step": 26493 + }, + { + "epoch": 4.042510986328125e-05, + "step": 26493, + "training_step_time": 0.10524749755859375 + }, + { + "epoch": 4.04266357421875e-05, + "model_forward_time": 0.024808168411254883, + "step": 26494 + }, + { + "epoch": 4.04266357421875e-05, + "step": 26494, + "training_step_time": 0.10324645042419434 + }, + { + "epoch": 4.042816162109375e-05, + "model_forward_time": 0.02505207061767578, + "step": 26495 + }, + { + "epoch": 4.042816162109375e-05, + "step": 26495, + "training_step_time": 0.1071326732635498 + }, + { + "epoch": 4.04296875e-05, + "model_forward_time": 0.024913787841796875, + "step": 26496 + }, + { + "epoch": 4.04296875e-05, + "step": 26496, + "training_step_time": 0.10564470291137695 + }, + { + "epoch": 4.043121337890625e-05, + "model_forward_time": 0.025305986404418945, + "step": 26497 + }, + { + "epoch": 4.043121337890625e-05, + "step": 26497, + "training_step_time": 0.10999155044555664 + }, + { + "epoch": 4.04327392578125e-05, + "model_forward_time": 0.025078773498535156, + "step": 26498 + }, + { + "epoch": 4.04327392578125e-05, + "step": 26498, + "training_step_time": 0.10939908027648926 + }, + { + "epoch": 4.043426513671875e-05, + "model_forward_time": 0.02516913414001465, + "step": 26499 + }, + { + "epoch": 4.043426513671875e-05, + "step": 26499, + "training_step_time": 0.10502409934997559 + }, + { + "epoch": 4.0435791015625e-05, + "grad_norm": 0.1022261381149292, + "learning_rate": 3.675296639259912e-06, + "loss": 0.0056, + "step": 26500 + }, + { + "epoch": 4.0435791015625e-05, + "model_forward_time": 0.025397300720214844, + "step": 26500 + }, + { + "epoch": 4.0435791015625e-05, + "step": 26500, + "training_step_time": 0.10549211502075195 + }, + { + "epoch": 4.043731689453125e-05, + "model_forward_time": 0.02512192726135254, + "step": 26501 + }, + { + "epoch": 4.043731689453125e-05, + "step": 26501, + "training_step_time": 0.105438232421875 + }, + { + "epoch": 4.04388427734375e-05, + "model_forward_time": 0.02480316162109375, + "step": 26502 + }, + { + "epoch": 4.04388427734375e-05, + "step": 26502, + "training_step_time": 0.10332369804382324 + }, + { + "epoch": 4.044036865234375e-05, + "model_forward_time": 0.02492213249206543, + "step": 26503 + }, + { + "epoch": 4.044036865234375e-05, + "step": 26503, + "training_step_time": 0.18656635284423828 + }, + { + "epoch": 4.044189453125e-05, + "model_forward_time": 0.02458357810974121, + "step": 26504 + }, + { + "epoch": 4.044189453125e-05, + "step": 26504, + "training_step_time": 0.11342453956604004 + }, + { + "epoch": 4.044342041015625e-05, + "model_forward_time": 0.024757862091064453, + "step": 26505 + }, + { + "epoch": 4.044342041015625e-05, + "step": 26505, + "training_step_time": 0.10750746726989746 + }, + { + "epoch": 4.04449462890625e-05, + "model_forward_time": 0.02530074119567871, + "step": 26506 + }, + { + "epoch": 4.04449462890625e-05, + "step": 26506, + "training_step_time": 0.10714459419250488 + }, + { + "epoch": 4.044647216796875e-05, + "model_forward_time": 0.024649620056152344, + "step": 26507 + }, + { + "epoch": 4.044647216796875e-05, + "step": 26507, + "training_step_time": 0.19402313232421875 + }, + { + "epoch": 4.0447998046875e-05, + "model_forward_time": 0.024662017822265625, + "step": 26508 + }, + { + "epoch": 4.0447998046875e-05, + "step": 26508, + "training_step_time": 0.14632773399353027 + }, + { + "epoch": 4.044952392578125e-05, + "model_forward_time": 0.024420976638793945, + "step": 26509 + }, + { + "epoch": 4.044952392578125e-05, + "step": 26509, + "training_step_time": 0.1154334545135498 + }, + { + "epoch": 4.04510498046875e-05, + "grad_norm": 0.057058922946453094, + "learning_rate": 3.654584248280707e-06, + "loss": 0.0122, + "step": 26510 + }, + { + "epoch": 4.04510498046875e-05, + "model_forward_time": 0.024847745895385742, + "step": 26510 + }, + { + "epoch": 4.04510498046875e-05, + "step": 26510, + "training_step_time": 0.12876415252685547 + }, + { + "epoch": 4.045257568359375e-05, + "model_forward_time": 0.02538323402404785, + "step": 26511 + }, + { + "epoch": 4.045257568359375e-05, + "step": 26511, + "training_step_time": 0.11016511917114258 + }, + { + "epoch": 4.04541015625e-05, + "model_forward_time": 0.025345563888549805, + "step": 26512 + }, + { + "epoch": 4.04541015625e-05, + "step": 26512, + "training_step_time": 0.15119099617004395 + }, + { + "epoch": 4.045562744140625e-05, + "model_forward_time": 0.024898529052734375, + "step": 26513 + }, + { + "epoch": 4.045562744140625e-05, + "step": 26513, + "training_step_time": 0.12807059288024902 + }, + { + "epoch": 4.04571533203125e-05, + "model_forward_time": 0.024813413619995117, + "step": 26514 + }, + { + "epoch": 4.04571533203125e-05, + "step": 26514, + "training_step_time": 0.2071993350982666 + }, + { + "epoch": 4.045867919921875e-05, + "model_forward_time": 0.024770259857177734, + "step": 26515 + }, + { + "epoch": 4.045867919921875e-05, + "step": 26515, + "training_step_time": 0.1227271556854248 + }, + { + "epoch": 4.0460205078125e-05, + "model_forward_time": 0.02407050132751465, + "step": 26516 + }, + { + "epoch": 4.0460205078125e-05, + "step": 26516, + "training_step_time": 0.16160154342651367 + }, + { + "epoch": 4.046173095703125e-05, + "model_forward_time": 0.024768590927124023, + "step": 26517 + }, + { + "epoch": 4.046173095703125e-05, + "step": 26517, + "training_step_time": 0.13242316246032715 + }, + { + "epoch": 4.04632568359375e-05, + "model_forward_time": 0.02495551109313965, + "step": 26518 + }, + { + "epoch": 4.04632568359375e-05, + "step": 26518, + "training_step_time": 0.11339974403381348 + }, + { + "epoch": 4.046478271484375e-05, + "model_forward_time": 0.02517080307006836, + "step": 26519 + }, + { + "epoch": 4.046478271484375e-05, + "step": 26519, + "training_step_time": 0.11597990989685059 + }, + { + "epoch": 4.046630859375e-05, + "grad_norm": 0.04885173216462135, + "learning_rate": 3.6339281713517303e-06, + "loss": 0.0059, + "step": 26520 + }, + { + "epoch": 4.046630859375e-05, + "model_forward_time": 0.02517080307006836, + "step": 26520 + }, + { + "epoch": 4.046630859375e-05, + "step": 26520, + "training_step_time": 0.10831117630004883 + }, + { + "epoch": 4.046783447265625e-05, + "model_forward_time": 0.026674270629882812, + "step": 26521 + }, + { + "epoch": 4.046783447265625e-05, + "step": 26521, + "training_step_time": 0.1100010871887207 + }, + { + "epoch": 4.04693603515625e-05, + "model_forward_time": 0.025594472885131836, + "step": 26522 + }, + { + "epoch": 4.04693603515625e-05, + "step": 26522, + "training_step_time": 0.10642051696777344 + }, + { + "epoch": 4.047088623046875e-05, + "model_forward_time": 0.025460243225097656, + "step": 26523 + }, + { + "epoch": 4.047088623046875e-05, + "step": 26523, + "training_step_time": 0.10879063606262207 + }, + { + "epoch": 4.0472412109375e-05, + "model_forward_time": 0.024884939193725586, + "step": 26524 + }, + { + "epoch": 4.0472412109375e-05, + "step": 26524, + "training_step_time": 0.10812640190124512 + }, + { + "epoch": 4.047393798828125e-05, + "model_forward_time": 0.02557826042175293, + "step": 26525 + }, + { + "epoch": 4.047393798828125e-05, + "step": 26525, + "training_step_time": 0.10730242729187012 + }, + { + "epoch": 4.04754638671875e-05, + "model_forward_time": 0.0276339054107666, + "step": 26526 + }, + { + "epoch": 4.04754638671875e-05, + "step": 26526, + "training_step_time": 0.15433645248413086 + }, + { + "epoch": 4.047698974609375e-05, + "model_forward_time": 0.024886608123779297, + "step": 26527 + }, + { + "epoch": 4.047698974609375e-05, + "step": 26527, + "training_step_time": 0.1389927864074707 + }, + { + "epoch": 4.0478515625e-05, + "model_forward_time": 0.0249631404876709, + "step": 26528 + }, + { + "epoch": 4.0478515625e-05, + "step": 26528, + "training_step_time": 0.10765862464904785 + }, + { + "epoch": 4.048004150390625e-05, + "model_forward_time": 0.025698423385620117, + "step": 26529 + }, + { + "epoch": 4.048004150390625e-05, + "step": 26529, + "training_step_time": 0.11196327209472656 + }, + { + "epoch": 4.04815673828125e-05, + "grad_norm": 0.08183445781469345, + "learning_rate": 3.6133284335720605e-06, + "loss": 0.0061, + "step": 26530 + }, + { + "epoch": 4.04815673828125e-05, + "model_forward_time": 0.025316476821899414, + "step": 26530 + }, + { + "epoch": 4.04815673828125e-05, + "step": 26530, + "training_step_time": 0.11513996124267578 + }, + { + "epoch": 4.048309326171875e-05, + "model_forward_time": 0.025295019149780273, + "step": 26531 + }, + { + "epoch": 4.048309326171875e-05, + "step": 26531, + "training_step_time": 0.18512296676635742 + }, + { + "epoch": 4.0484619140625e-05, + "model_forward_time": 0.023864269256591797, + "step": 26532 + }, + { + "epoch": 4.0484619140625e-05, + "step": 26532, + "training_step_time": 0.10978341102600098 + }, + { + "epoch": 4.048614501953125e-05, + "model_forward_time": 0.02470874786376953, + "step": 26533 + }, + { + "epoch": 4.048614501953125e-05, + "step": 26533, + "training_step_time": 0.12314629554748535 + }, + { + "epoch": 4.04876708984375e-05, + "model_forward_time": 0.02522110939025879, + "step": 26534 + }, + { + "epoch": 4.04876708984375e-05, + "step": 26534, + "training_step_time": 0.10663628578186035 + }, + { + "epoch": 4.048919677734375e-05, + "model_forward_time": 0.025143146514892578, + "step": 26535 + }, + { + "epoch": 4.048919677734375e-05, + "step": 26535, + "training_step_time": 0.10529589653015137 + }, + { + "epoch": 4.049072265625e-05, + "model_forward_time": 0.025355815887451172, + "step": 26536 + }, + { + "epoch": 4.049072265625e-05, + "step": 26536, + "training_step_time": 0.10533380508422852 + }, + { + "epoch": 4.049224853515625e-05, + "model_forward_time": 0.02504563331604004, + "step": 26537 + }, + { + "epoch": 4.049224853515625e-05, + "step": 26537, + "training_step_time": 0.10556244850158691 + }, + { + "epoch": 4.04937744140625e-05, + "model_forward_time": 0.02545642852783203, + "step": 26538 + }, + { + "epoch": 4.04937744140625e-05, + "step": 26538, + "training_step_time": 0.10567760467529297 + }, + { + "epoch": 4.049530029296875e-05, + "model_forward_time": 0.02527308464050293, + "step": 26539 + }, + { + "epoch": 4.049530029296875e-05, + "step": 26539, + "training_step_time": 0.10600471496582031 + }, + { + "epoch": 4.0496826171875e-05, + "grad_norm": 0.09746471792459488, + "learning_rate": 3.59278505997232e-06, + "loss": 0.0049, + "step": 26540 + }, + { + "epoch": 4.0496826171875e-05, + "model_forward_time": 0.025417566299438477, + "step": 26540 + }, + { + "epoch": 4.0496826171875e-05, + "step": 26540, + "training_step_time": 0.10756278038024902 + }, + { + "epoch": 4.049835205078125e-05, + "model_forward_time": 0.025043487548828125, + "step": 26541 + }, + { + "epoch": 4.049835205078125e-05, + "step": 26541, + "training_step_time": 0.10797357559204102 + }, + { + "epoch": 4.04998779296875e-05, + "model_forward_time": 0.02576589584350586, + "step": 26542 + }, + { + "epoch": 4.04998779296875e-05, + "step": 26542, + "training_step_time": 0.10625720024108887 + }, + { + "epoch": 4.050140380859375e-05, + "model_forward_time": 0.025609254837036133, + "step": 26543 + }, + { + "epoch": 4.050140380859375e-05, + "step": 26543, + "training_step_time": 0.1117391586303711 + }, + { + "epoch": 4.05029296875e-05, + "model_forward_time": 0.025373458862304688, + "step": 26544 + }, + { + "epoch": 4.05029296875e-05, + "step": 26544, + "training_step_time": 0.10754847526550293 + }, + { + "epoch": 4.050445556640625e-05, + "model_forward_time": 0.02575850486755371, + "step": 26545 + }, + { + "epoch": 4.050445556640625e-05, + "step": 26545, + "training_step_time": 0.10830569267272949 + }, + { + "epoch": 4.05059814453125e-05, + "model_forward_time": 0.025150299072265625, + "step": 26546 + }, + { + "epoch": 4.05059814453125e-05, + "step": 26546, + "training_step_time": 0.10763216018676758 + }, + { + "epoch": 4.050750732421875e-05, + "model_forward_time": 0.02489161491394043, + "step": 26547 + }, + { + "epoch": 4.050750732421875e-05, + "step": 26547, + "training_step_time": 0.105926513671875 + }, + { + "epoch": 4.0509033203125e-05, + "model_forward_time": 0.025455713272094727, + "step": 26548 + }, + { + "epoch": 4.0509033203125e-05, + "step": 26548, + "training_step_time": 0.10813164710998535 + }, + { + "epoch": 4.051055908203125e-05, + "model_forward_time": 0.0253446102142334, + "step": 26549 + }, + { + "epoch": 4.051055908203125e-05, + "step": 26549, + "training_step_time": 0.17074990272521973 + }, + { + "epoch": 4.05120849609375e-05, + "grad_norm": 0.0958266481757164, + "learning_rate": 3.5722980755146517e-06, + "loss": 0.0039, + "step": 26550 + }, + { + "epoch": 4.05120849609375e-05, + "model_forward_time": 0.02494359016418457, + "step": 26550 + }, + { + "epoch": 4.05120849609375e-05, + "step": 26550, + "training_step_time": 0.10689473152160645 + }, + { + "epoch": 4.051361083984375e-05, + "model_forward_time": 0.02472686767578125, + "step": 26551 + }, + { + "epoch": 4.051361083984375e-05, + "step": 26551, + "training_step_time": 0.11568999290466309 + }, + { + "epoch": 4.051513671875e-05, + "model_forward_time": 0.025069475173950195, + "step": 26552 + }, + { + "epoch": 4.051513671875e-05, + "step": 26552, + "training_step_time": 0.10408234596252441 + }, + { + "epoch": 4.051666259765625e-05, + "model_forward_time": 0.024181604385375977, + "step": 26553 + }, + { + "epoch": 4.051666259765625e-05, + "step": 26553, + "training_step_time": 0.16146063804626465 + }, + { + "epoch": 4.05181884765625e-05, + "model_forward_time": 0.02450728416442871, + "step": 26554 + }, + { + "epoch": 4.05181884765625e-05, + "step": 26554, + "training_step_time": 0.15445470809936523 + }, + { + "epoch": 4.051971435546875e-05, + "model_forward_time": 0.024485111236572266, + "step": 26555 + }, + { + "epoch": 4.051971435546875e-05, + "step": 26555, + "training_step_time": 0.10858750343322754 + }, + { + "epoch": 4.0521240234375e-05, + "model_forward_time": 0.025087594985961914, + "step": 26556 + }, + { + "epoch": 4.0521240234375e-05, + "step": 26556, + "training_step_time": 0.13478565216064453 + }, + { + "epoch": 4.052276611328125e-05, + "model_forward_time": 0.0253903865814209, + "step": 26557 + }, + { + "epoch": 4.052276611328125e-05, + "step": 26557, + "training_step_time": 0.1949324607849121 + }, + { + "epoch": 4.05242919921875e-05, + "model_forward_time": 0.024905681610107422, + "step": 26558 + }, + { + "epoch": 4.05242919921875e-05, + "step": 26558, + "training_step_time": 0.11893033981323242 + }, + { + "epoch": 4.052581787109375e-05, + "model_forward_time": 0.024436235427856445, + "step": 26559 + }, + { + "epoch": 4.052581787109375e-05, + "step": 26559, + "training_step_time": 0.21183371543884277 + }, + { + "epoch": 4.052734375e-05, + "grad_norm": 0.1997993439435959, + "learning_rate": 3.5518675050926544e-06, + "loss": 0.006, + "step": 26560 + }, + { + "epoch": 4.052734375e-05, + "model_forward_time": 0.02456831932067871, + "step": 26560 + }, + { + "epoch": 4.052734375e-05, + "step": 26560, + "training_step_time": 0.10870885848999023 + }, + { + "epoch": 4.052886962890625e-05, + "model_forward_time": 0.02470111846923828, + "step": 26561 + }, + { + "epoch": 4.052886962890625e-05, + "step": 26561, + "training_step_time": 0.10820937156677246 + }, + { + "epoch": 4.05303955078125e-05, + "model_forward_time": 0.025211572647094727, + "step": 26562 + }, + { + "epoch": 4.05303955078125e-05, + "step": 26562, + "training_step_time": 0.19577383995056152 + }, + { + "epoch": 4.053192138671875e-05, + "model_forward_time": 0.024295330047607422, + "step": 26563 + }, + { + "epoch": 4.053192138671875e-05, + "step": 26563, + "training_step_time": 0.10260367393493652 + }, + { + "epoch": 4.0533447265625e-05, + "model_forward_time": 0.02475595474243164, + "step": 26564 + }, + { + "epoch": 4.0533447265625e-05, + "step": 26564, + "training_step_time": 0.10417723655700684 + }, + { + "epoch": 4.053497314453125e-05, + "model_forward_time": 0.025842905044555664, + "step": 26565 + }, + { + "epoch": 4.053497314453125e-05, + "step": 26565, + "training_step_time": 0.10833430290222168 + }, + { + "epoch": 4.05364990234375e-05, + "model_forward_time": 0.0262451171875, + "step": 26566 + }, + { + "epoch": 4.05364990234375e-05, + "step": 26566, + "training_step_time": 0.11400413513183594 + }, + { + "epoch": 4.053802490234375e-05, + "model_forward_time": 0.025316715240478516, + "step": 26567 + }, + { + "epoch": 4.053802490234375e-05, + "step": 26567, + "training_step_time": 0.11634063720703125 + }, + { + "epoch": 4.053955078125e-05, + "model_forward_time": 0.027282238006591797, + "step": 26568 + }, + { + "epoch": 4.053955078125e-05, + "step": 26568, + "training_step_time": 0.11847662925720215 + }, + { + "epoch": 4.054107666015625e-05, + "model_forward_time": 0.025174379348754883, + "step": 26569 + }, + { + "epoch": 4.054107666015625e-05, + "step": 26569, + "training_step_time": 0.11097383499145508 + }, + { + "epoch": 4.05426025390625e-05, + "grad_norm": 0.0691823661327362, + "learning_rate": 3.531493373531419e-06, + "loss": 0.0029, + "step": 26570 + }, + { + "epoch": 4.05426025390625e-05, + "model_forward_time": 0.024933815002441406, + "step": 26570 + }, + { + "epoch": 4.05426025390625e-05, + "step": 26570, + "training_step_time": 0.1118321418762207 + }, + { + "epoch": 4.054412841796875e-05, + "model_forward_time": 0.025927305221557617, + "step": 26571 + }, + { + "epoch": 4.054412841796875e-05, + "step": 26571, + "training_step_time": 0.11410021781921387 + }, + { + "epoch": 4.0545654296875e-05, + "model_forward_time": 0.025362730026245117, + "step": 26572 + }, + { + "epoch": 4.0545654296875e-05, + "step": 26572, + "training_step_time": 0.11338162422180176 + }, + { + "epoch": 4.054718017578125e-05, + "model_forward_time": 0.026100873947143555, + "step": 26573 + }, + { + "epoch": 4.054718017578125e-05, + "step": 26573, + "training_step_time": 0.11757349967956543 + }, + { + "epoch": 4.05487060546875e-05, + "model_forward_time": 0.025223255157470703, + "step": 26574 + }, + { + "epoch": 4.05487060546875e-05, + "step": 26574, + "training_step_time": 0.11042094230651855 + }, + { + "epoch": 4.055023193359375e-05, + "model_forward_time": 0.025331735610961914, + "step": 26575 + }, + { + "epoch": 4.055023193359375e-05, + "step": 26575, + "training_step_time": 0.10987567901611328 + }, + { + "epoch": 4.05517578125e-05, + "model_forward_time": 0.0251772403717041, + "step": 26576 + }, + { + "epoch": 4.05517578125e-05, + "step": 26576, + "training_step_time": 0.10582566261291504 + }, + { + "epoch": 4.055328369140625e-05, + "model_forward_time": 0.02513265609741211, + "step": 26577 + }, + { + "epoch": 4.055328369140625e-05, + "step": 26577, + "training_step_time": 0.1081385612487793 + }, + { + "epoch": 4.05548095703125e-05, + "model_forward_time": 0.025162220001220703, + "step": 26578 + }, + { + "epoch": 4.05548095703125e-05, + "step": 26578, + "training_step_time": 0.19886493682861328 + }, + { + "epoch": 4.055633544921875e-05, + "model_forward_time": 0.024438858032226562, + "step": 26579 + }, + { + "epoch": 4.055633544921875e-05, + "step": 26579, + "training_step_time": 0.10661792755126953 + }, + { + "epoch": 4.0557861328125e-05, + "grad_norm": 0.10050533711910248, + "learning_rate": 3.511175705587433e-06, + "loss": 0.0029, + "step": 26580 + }, + { + "epoch": 4.0557861328125e-05, + "model_forward_time": 0.024654865264892578, + "step": 26580 + }, + { + "epoch": 4.0557861328125e-05, + "step": 26580, + "training_step_time": 0.10755205154418945 + }, + { + "epoch": 4.055938720703125e-05, + "model_forward_time": 0.02519965171813965, + "step": 26581 + }, + { + "epoch": 4.055938720703125e-05, + "step": 26581, + "training_step_time": 0.10943269729614258 + }, + { + "epoch": 4.05609130859375e-05, + "model_forward_time": 0.025081396102905273, + "step": 26582 + }, + { + "epoch": 4.05609130859375e-05, + "step": 26582, + "training_step_time": 0.10694456100463867 + }, + { + "epoch": 4.056243896484375e-05, + "model_forward_time": 0.025464773178100586, + "step": 26583 + }, + { + "epoch": 4.056243896484375e-05, + "step": 26583, + "training_step_time": 0.10962510108947754 + }, + { + "epoch": 4.056396484375e-05, + "model_forward_time": 0.025272130966186523, + "step": 26584 + }, + { + "epoch": 4.056396484375e-05, + "step": 26584, + "training_step_time": 0.10564017295837402 + }, + { + "epoch": 4.056549072265625e-05, + "model_forward_time": 0.025301456451416016, + "step": 26585 + }, + { + "epoch": 4.056549072265625e-05, + "step": 26585, + "training_step_time": 0.10627508163452148 + }, + { + "epoch": 4.05670166015625e-05, + "model_forward_time": 0.02558279037475586, + "step": 26586 + }, + { + "epoch": 4.05670166015625e-05, + "step": 26586, + "training_step_time": 0.10618305206298828 + }, + { + "epoch": 4.056854248046875e-05, + "model_forward_time": 0.025369644165039062, + "step": 26587 + }, + { + "epoch": 4.056854248046875e-05, + "step": 26587, + "training_step_time": 0.10348916053771973 + }, + { + "epoch": 4.0570068359375e-05, + "model_forward_time": 0.024962425231933594, + "step": 26588 + }, + { + "epoch": 4.0570068359375e-05, + "step": 26588, + "training_step_time": 0.11166763305664062 + }, + { + "epoch": 4.057159423828125e-05, + "model_forward_time": 0.02548956871032715, + "step": 26589 + }, + { + "epoch": 4.057159423828125e-05, + "step": 26589, + "training_step_time": 0.10599136352539062 + }, + { + "epoch": 4.05731201171875e-05, + "grad_norm": 0.34594714641571045, + "learning_rate": 3.4909145259485744e-06, + "loss": 0.0051, + "step": 26590 + }, + { + "epoch": 4.05731201171875e-05, + "model_forward_time": 0.025170087814331055, + "step": 26590 + }, + { + "epoch": 4.05731201171875e-05, + "step": 26590, + "training_step_time": 0.10874271392822266 + }, + { + "epoch": 4.057464599609375e-05, + "model_forward_time": 0.025215864181518555, + "step": 26591 + }, + { + "epoch": 4.057464599609375e-05, + "step": 26591, + "training_step_time": 0.11495256423950195 + }, + { + "epoch": 4.0576171875e-05, + "model_forward_time": 0.025197505950927734, + "step": 26592 + }, + { + "epoch": 4.0576171875e-05, + "step": 26592, + "training_step_time": 0.18700385093688965 + }, + { + "epoch": 4.057769775390625e-05, + "model_forward_time": 0.02479720115661621, + "step": 26593 + }, + { + "epoch": 4.057769775390625e-05, + "step": 26593, + "training_step_time": 0.21073341369628906 + }, + { + "epoch": 4.05792236328125e-05, + "model_forward_time": 0.024804115295410156, + "step": 26594 + }, + { + "epoch": 4.05792236328125e-05, + "step": 26594, + "training_step_time": 0.20987486839294434 + }, + { + "epoch": 4.058074951171875e-05, + "model_forward_time": 0.024443387985229492, + "step": 26595 + }, + { + "epoch": 4.058074951171875e-05, + "step": 26595, + "training_step_time": 0.20466256141662598 + }, + { + "epoch": 4.0582275390625e-05, + "model_forward_time": 0.025411128997802734, + "step": 26596 + }, + { + "epoch": 4.0582275390625e-05, + "step": 26596, + "training_step_time": 0.20511174201965332 + }, + { + "epoch": 4.058380126953125e-05, + "model_forward_time": 0.025126218795776367, + "step": 26597 + }, + { + "epoch": 4.058380126953125e-05, + "step": 26597, + "training_step_time": 0.2215898036956787 + }, + { + "epoch": 4.05853271484375e-05, + "model_forward_time": 0.024886369705200195, + "step": 26598 + }, + { + "epoch": 4.05853271484375e-05, + "step": 26598, + "training_step_time": 0.16161108016967773 + }, + { + "epoch": 4.058685302734375e-05, + "model_forward_time": 0.025318384170532227, + "step": 26599 + }, + { + "epoch": 4.058685302734375e-05, + "step": 26599, + "training_step_time": 0.16342496871948242 + }, + { + "epoch": 4.058837890625e-05, + "grad_norm": 0.10978017747402191, + "learning_rate": 3.470709859234084e-06, + "loss": 0.0066, + "step": 26600 + }, + { + "epoch": 4.058837890625e-05, + "model_forward_time": 0.024515628814697266, + "step": 26600 + }, + { + "epoch": 4.058837890625e-05, + "step": 26600, + "training_step_time": 0.1556835174560547 + }, + { + "epoch": 4.058990478515625e-05, + "model_forward_time": 0.023957252502441406, + "step": 26601 + }, + { + "epoch": 4.058990478515625e-05, + "step": 26601, + "training_step_time": 0.15072917938232422 + }, + { + "epoch": 4.05914306640625e-05, + "model_forward_time": 0.023457050323486328, + "step": 26602 + }, + { + "epoch": 4.05914306640625e-05, + "step": 26602, + "training_step_time": 0.15877771377563477 + }, + { + "epoch": 4.059295654296875e-05, + "model_forward_time": 0.02448248863220215, + "step": 26603 + }, + { + "epoch": 4.059295654296875e-05, + "step": 26603, + "training_step_time": 0.12450098991394043 + }, + { + "epoch": 4.0594482421875e-05, + "model_forward_time": 0.024402379989624023, + "step": 26604 + }, + { + "epoch": 4.0594482421875e-05, + "step": 26604, + "training_step_time": 0.16680693626403809 + }, + { + "epoch": 4.059600830078125e-05, + "model_forward_time": 0.0240628719329834, + "step": 26605 + }, + { + "epoch": 4.059600830078125e-05, + "step": 26605, + "training_step_time": 0.10515880584716797 + }, + { + "epoch": 4.05975341796875e-05, + "model_forward_time": 0.02437424659729004, + "step": 26606 + }, + { + "epoch": 4.05975341796875e-05, + "step": 26606, + "training_step_time": 0.10426831245422363 + }, + { + "epoch": 4.059906005859375e-05, + "model_forward_time": 0.024747371673583984, + "step": 26607 + }, + { + "epoch": 4.059906005859375e-05, + "step": 26607, + "training_step_time": 0.10286664962768555 + }, + { + "epoch": 4.06005859375e-05, + "model_forward_time": 0.025056123733520508, + "step": 26608 + }, + { + "epoch": 4.06005859375e-05, + "step": 26608, + "training_step_time": 0.10657477378845215 + }, + { + "epoch": 4.060211181640625e-05, + "model_forward_time": 0.02533125877380371, + "step": 26609 + }, + { + "epoch": 4.060211181640625e-05, + "step": 26609, + "training_step_time": 0.10516476631164551 + }, + { + "epoch": 4.06036376953125e-05, + "grad_norm": 0.05121985822916031, + "learning_rate": 3.4505617299945336e-06, + "loss": 0.0039, + "step": 26610 + }, + { + "epoch": 4.06036376953125e-05, + "model_forward_time": 0.025144100189208984, + "step": 26610 + }, + { + "epoch": 4.06036376953125e-05, + "step": 26610, + "training_step_time": 0.18141984939575195 + }, + { + "epoch": 4.060516357421875e-05, + "model_forward_time": 0.024670839309692383, + "step": 26611 + }, + { + "epoch": 4.060516357421875e-05, + "step": 26611, + "training_step_time": 0.18514800071716309 + }, + { + "epoch": 4.0606689453125e-05, + "model_forward_time": 0.025215864181518555, + "step": 26612 + }, + { + "epoch": 4.0606689453125e-05, + "step": 26612, + "training_step_time": 0.18893837928771973 + }, + { + "epoch": 4.060821533203125e-05, + "model_forward_time": 0.024189472198486328, + "step": 26613 + }, + { + "epoch": 4.060821533203125e-05, + "step": 26613, + "training_step_time": 0.18543744087219238 + }, + { + "epoch": 4.06097412109375e-05, + "model_forward_time": 0.0242002010345459, + "step": 26614 + }, + { + "epoch": 4.06097412109375e-05, + "step": 26614, + "training_step_time": 0.17374396324157715 + }, + { + "epoch": 4.061126708984375e-05, + "model_forward_time": 0.025160789489746094, + "step": 26615 + }, + { + "epoch": 4.061126708984375e-05, + "step": 26615, + "training_step_time": 0.17132878303527832 + }, + { + "epoch": 4.061279296875e-05, + "model_forward_time": 0.024704694747924805, + "step": 26616 + }, + { + "epoch": 4.061279296875e-05, + "step": 26616, + "training_step_time": 0.1777970790863037 + }, + { + "epoch": 4.061431884765625e-05, + "model_forward_time": 0.02519369125366211, + "step": 26617 + }, + { + "epoch": 4.061431884765625e-05, + "step": 26617, + "training_step_time": 0.11552858352661133 + }, + { + "epoch": 4.06158447265625e-05, + "model_forward_time": 0.02439570426940918, + "step": 26618 + }, + { + "epoch": 4.06158447265625e-05, + "step": 26618, + "training_step_time": 0.1066274642944336 + }, + { + "epoch": 4.061737060546875e-05, + "model_forward_time": 0.025094985961914062, + "step": 26619 + }, + { + "epoch": 4.061737060546875e-05, + "step": 26619, + "training_step_time": 0.10495281219482422 + }, + { + "epoch": 4.0618896484375e-05, + "grad_norm": 0.1307193785905838, + "learning_rate": 3.430470162711813e-06, + "loss": 0.0073, + "step": 26620 + }, + { + "epoch": 4.0618896484375e-05, + "model_forward_time": 0.02320241928100586, + "step": 26620 + }, + { + "epoch": 4.0618896484375e-05, + "step": 26620, + "training_step_time": 0.10384321212768555 + }, + { + "epoch": 4.062042236328125e-05, + "model_forward_time": 0.024834632873535156, + "step": 26621 + }, + { + "epoch": 4.062042236328125e-05, + "step": 26621, + "training_step_time": 0.12114906311035156 + }, + { + "epoch": 4.06219482421875e-05, + "model_forward_time": 0.025135278701782227, + "step": 26622 + }, + { + "epoch": 4.06219482421875e-05, + "step": 26622, + "training_step_time": 0.13491582870483398 + }, + { + "epoch": 4.062347412109375e-05, + "model_forward_time": 0.024900436401367188, + "step": 26623 + }, + { + "epoch": 4.062347412109375e-05, + "step": 26623, + "training_step_time": 0.13203811645507812 + }, + { + "epoch": 4.0625e-05, + "model_forward_time": 0.02464437484741211, + "step": 26624 + }, + { + "epoch": 4.0625e-05, + "step": 26624, + "training_step_time": 0.12476205825805664 + }, + { + "epoch": 4.062652587890625e-05, + "model_forward_time": 0.024642467498779297, + "step": 26625 + }, + { + "epoch": 4.062652587890625e-05, + "step": 26625, + "training_step_time": 0.12337183952331543 + }, + { + "epoch": 4.06280517578125e-05, + "model_forward_time": 0.025154829025268555, + "step": 26626 + }, + { + "epoch": 4.06280517578125e-05, + "step": 26626, + "training_step_time": 0.1196751594543457 + }, + { + "epoch": 4.062957763671875e-05, + "model_forward_time": 0.02513718605041504, + "step": 26627 + }, + { + "epoch": 4.062957763671875e-05, + "step": 26627, + "training_step_time": 0.11545872688293457 + }, + { + "epoch": 4.0631103515625e-05, + "model_forward_time": 0.025327205657958984, + "step": 26628 + }, + { + "epoch": 4.0631103515625e-05, + "step": 26628, + "training_step_time": 0.11024999618530273 + }, + { + "epoch": 4.063262939453125e-05, + "model_forward_time": 0.024974346160888672, + "step": 26629 + }, + { + "epoch": 4.063262939453125e-05, + "step": 26629, + "training_step_time": 0.10829448699951172 + }, + { + "epoch": 4.06341552734375e-05, + "grad_norm": 0.13951286673545837, + "learning_rate": 3.41043518179906e-06, + "loss": 0.0049, + "step": 26630 + }, + { + "epoch": 4.06341552734375e-05, + "model_forward_time": 0.024405956268310547, + "step": 26630 + }, + { + "epoch": 4.06341552734375e-05, + "step": 26630, + "training_step_time": 0.11149811744689941 + }, + { + "epoch": 4.063568115234375e-05, + "model_forward_time": 0.024730443954467773, + "step": 26631 + }, + { + "epoch": 4.063568115234375e-05, + "step": 26631, + "training_step_time": 0.10738492012023926 + }, + { + "epoch": 4.063720703125e-05, + "model_forward_time": 0.025090932846069336, + "step": 26632 + }, + { + "epoch": 4.063720703125e-05, + "step": 26632, + "training_step_time": 0.10735487937927246 + }, + { + "epoch": 4.063873291015625e-05, + "model_forward_time": 0.025223970413208008, + "step": 26633 + }, + { + "epoch": 4.063873291015625e-05, + "step": 26633, + "training_step_time": 0.10879302024841309 + }, + { + "epoch": 4.06402587890625e-05, + "model_forward_time": 0.025603055953979492, + "step": 26634 + }, + { + "epoch": 4.06402587890625e-05, + "step": 26634, + "training_step_time": 0.19719862937927246 + }, + { + "epoch": 4.064178466796875e-05, + "model_forward_time": 0.024847030639648438, + "step": 26635 + }, + { + "epoch": 4.064178466796875e-05, + "step": 26635, + "training_step_time": 0.10843777656555176 + }, + { + "epoch": 4.0643310546875e-05, + "model_forward_time": 0.02484440803527832, + "step": 26636 + }, + { + "epoch": 4.0643310546875e-05, + "step": 26636, + "training_step_time": 0.11187100410461426 + }, + { + "epoch": 4.064483642578125e-05, + "model_forward_time": 0.025930166244506836, + "step": 26637 + }, + { + "epoch": 4.064483642578125e-05, + "step": 26637, + "training_step_time": 0.10788345336914062 + }, + { + "epoch": 4.06463623046875e-05, + "model_forward_time": 0.0254364013671875, + "step": 26638 + }, + { + "epoch": 4.06463623046875e-05, + "step": 26638, + "training_step_time": 0.14639043807983398 + }, + { + "epoch": 4.064788818359375e-05, + "model_forward_time": 0.025045156478881836, + "step": 26639 + }, + { + "epoch": 4.064788818359375e-05, + "step": 26639, + "training_step_time": 0.16502904891967773 + }, + { + "epoch": 4.06494140625e-05, + "grad_norm": 0.07353484630584717, + "learning_rate": 3.390456811600673e-06, + "loss": 0.0034, + "step": 26640 + }, + { + "epoch": 4.06494140625e-05, + "model_forward_time": 0.027915239334106445, + "step": 26640 + }, + { + "epoch": 4.06494140625e-05, + "step": 26640, + "training_step_time": 0.13318920135498047 + }, + { + "epoch": 4.065093994140625e-05, + "model_forward_time": 0.025032997131347656, + "step": 26641 + }, + { + "epoch": 4.065093994140625e-05, + "step": 26641, + "training_step_time": 0.10764670372009277 + }, + { + "epoch": 4.06524658203125e-05, + "model_forward_time": 0.025531291961669922, + "step": 26642 + }, + { + "epoch": 4.06524658203125e-05, + "step": 26642, + "training_step_time": 0.19179439544677734 + }, + { + "epoch": 4.065399169921875e-05, + "model_forward_time": 0.02549004554748535, + "step": 26643 + }, + { + "epoch": 4.065399169921875e-05, + "step": 26643, + "training_step_time": 0.10766959190368652 + }, + { + "epoch": 4.0655517578125e-05, + "model_forward_time": 0.024964094161987305, + "step": 26644 + }, + { + "epoch": 4.0655517578125e-05, + "step": 26644, + "training_step_time": 0.10794544219970703 + }, + { + "epoch": 4.065704345703125e-05, + "model_forward_time": 0.024914264678955078, + "step": 26645 + }, + { + "epoch": 4.065704345703125e-05, + "step": 26645, + "training_step_time": 0.10828471183776855 + }, + { + "epoch": 4.06585693359375e-05, + "model_forward_time": 0.025846004486083984, + "step": 26646 + }, + { + "epoch": 4.06585693359375e-05, + "step": 26646, + "training_step_time": 0.17346644401550293 + }, + { + "epoch": 4.066009521484375e-05, + "model_forward_time": 0.02689337730407715, + "step": 26647 + }, + { + "epoch": 4.066009521484375e-05, + "step": 26647, + "training_step_time": 0.14132261276245117 + }, + { + "epoch": 4.066162109375e-05, + "model_forward_time": 0.024379968643188477, + "step": 26648 + }, + { + "epoch": 4.066162109375e-05, + "step": 26648, + "training_step_time": 0.11193156242370605 + }, + { + "epoch": 4.066314697265625e-05, + "model_forward_time": 0.025130510330200195, + "step": 26649 + }, + { + "epoch": 4.066314697265625e-05, + "step": 26649, + "training_step_time": 0.10616374015808105 + }, + { + "epoch": 4.06646728515625e-05, + "grad_norm": 0.0812462866306305, + "learning_rate": 3.3705350763922562e-06, + "loss": 0.0021, + "step": 26650 + }, + { + "epoch": 4.06646728515625e-05, + "model_forward_time": 0.025546789169311523, + "step": 26650 + }, + { + "epoch": 4.06646728515625e-05, + "step": 26650, + "training_step_time": 0.10674285888671875 + }, + { + "epoch": 4.066619873046875e-05, + "model_forward_time": 0.025378942489624023, + "step": 26651 + }, + { + "epoch": 4.066619873046875e-05, + "step": 26651, + "training_step_time": 0.10825586318969727 + }, + { + "epoch": 4.0667724609375e-05, + "model_forward_time": 0.02554154396057129, + "step": 26652 + }, + { + "epoch": 4.0667724609375e-05, + "step": 26652, + "training_step_time": 0.10586810111999512 + }, + { + "epoch": 4.066925048828125e-05, + "model_forward_time": 0.02534031867980957, + "step": 26653 + }, + { + "epoch": 4.066925048828125e-05, + "step": 26653, + "training_step_time": 0.10802030563354492 + }, + { + "epoch": 4.06707763671875e-05, + "model_forward_time": 0.025092601776123047, + "step": 26654 + }, + { + "epoch": 4.06707763671875e-05, + "step": 26654, + "training_step_time": 0.10811805725097656 + }, + { + "epoch": 4.067230224609375e-05, + "model_forward_time": 0.025314807891845703, + "step": 26655 + }, + { + "epoch": 4.067230224609375e-05, + "step": 26655, + "training_step_time": 0.11012387275695801 + }, + { + "epoch": 4.0673828125e-05, + "model_forward_time": 0.02529430389404297, + "step": 26656 + }, + { + "epoch": 4.0673828125e-05, + "step": 26656, + "training_step_time": 0.10884690284729004 + }, + { + "epoch": 4.067535400390625e-05, + "model_forward_time": 0.02531886100769043, + "step": 26657 + }, + { + "epoch": 4.067535400390625e-05, + "step": 26657, + "training_step_time": 0.10961246490478516 + }, + { + "epoch": 4.06768798828125e-05, + "model_forward_time": 0.025769948959350586, + "step": 26658 + }, + { + "epoch": 4.06768798828125e-05, + "step": 26658, + "training_step_time": 0.15196943283081055 + }, + { + "epoch": 4.067840576171875e-05, + "model_forward_time": 0.024996042251586914, + "step": 26659 + }, + { + "epoch": 4.067840576171875e-05, + "step": 26659, + "training_step_time": 0.11664271354675293 + }, + { + "epoch": 4.0679931640625e-05, + "grad_norm": 0.08683722466230392, + "learning_rate": 3.35067000038059e-06, + "loss": 0.0084, + "step": 26660 + }, + { + "epoch": 4.0679931640625e-05, + "model_forward_time": 0.024899721145629883, + "step": 26660 + }, + { + "epoch": 4.0679931640625e-05, + "step": 26660, + "training_step_time": 0.10618901252746582 + }, + { + "epoch": 4.068145751953125e-05, + "model_forward_time": 0.025148630142211914, + "step": 26661 + }, + { + "epoch": 4.068145751953125e-05, + "step": 26661, + "training_step_time": 0.10790419578552246 + }, + { + "epoch": 4.06829833984375e-05, + "model_forward_time": 0.025118350982666016, + "step": 26662 + }, + { + "epoch": 4.06829833984375e-05, + "step": 26662, + "training_step_time": 0.11268496513366699 + }, + { + "epoch": 4.068450927734375e-05, + "model_forward_time": 0.026336193084716797, + "step": 26663 + }, + { + "epoch": 4.068450927734375e-05, + "step": 26663, + "training_step_time": 0.11171507835388184 + }, + { + "epoch": 4.068603515625e-05, + "model_forward_time": 0.025574922561645508, + "step": 26664 + }, + { + "epoch": 4.068603515625e-05, + "step": 26664, + "training_step_time": 0.19835853576660156 + }, + { + "epoch": 4.068756103515625e-05, + "model_forward_time": 0.02395796775817871, + "step": 26665 + }, + { + "epoch": 4.068756103515625e-05, + "step": 26665, + "training_step_time": 0.10691976547241211 + }, + { + "epoch": 4.06890869140625e-05, + "model_forward_time": 0.024701595306396484, + "step": 26666 + }, + { + "epoch": 4.06890869140625e-05, + "step": 26666, + "training_step_time": 0.10498547554016113 + }, + { + "epoch": 4.069061279296875e-05, + "model_forward_time": 0.02523493766784668, + "step": 26667 + }, + { + "epoch": 4.069061279296875e-05, + "step": 26667, + "training_step_time": 0.10854887962341309 + }, + { + "epoch": 4.0692138671875e-05, + "model_forward_time": 0.02513575553894043, + "step": 26668 + }, + { + "epoch": 4.0692138671875e-05, + "step": 26668, + "training_step_time": 0.10857033729553223 + }, + { + "epoch": 4.069366455078125e-05, + "model_forward_time": 0.025070905685424805, + "step": 26669 + }, + { + "epoch": 4.069366455078125e-05, + "step": 26669, + "training_step_time": 0.10947585105895996 + }, + { + "epoch": 4.06951904296875e-05, + "grad_norm": 0.11693254113197327, + "learning_rate": 3.3308616077036115e-06, + "loss": 0.0033, + "step": 26670 + }, + { + "epoch": 4.06951904296875e-05, + "model_forward_time": 0.02499222755432129, + "step": 26670 + }, + { + "epoch": 4.06951904296875e-05, + "step": 26670, + "training_step_time": 0.1063694953918457 + }, + { + "epoch": 4.069671630859375e-05, + "model_forward_time": 0.028609037399291992, + "step": 26671 + }, + { + "epoch": 4.069671630859375e-05, + "step": 26671, + "training_step_time": 0.10840559005737305 + }, + { + "epoch": 4.06982421875e-05, + "model_forward_time": 0.02524876594543457, + "step": 26672 + }, + { + "epoch": 4.06982421875e-05, + "step": 26672, + "training_step_time": 0.10749602317810059 + }, + { + "epoch": 4.069976806640625e-05, + "model_forward_time": 0.025388240814208984, + "step": 26673 + }, + { + "epoch": 4.069976806640625e-05, + "step": 26673, + "training_step_time": 0.10989093780517578 + }, + { + "epoch": 4.07012939453125e-05, + "model_forward_time": 0.024699926376342773, + "step": 26674 + }, + { + "epoch": 4.07012939453125e-05, + "step": 26674, + "training_step_time": 0.10926246643066406 + }, + { + "epoch": 4.070281982421875e-05, + "model_forward_time": 0.025566816329956055, + "step": 26675 + }, + { + "epoch": 4.070281982421875e-05, + "step": 26675, + "training_step_time": 0.11078023910522461 + }, + { + "epoch": 4.0704345703125e-05, + "model_forward_time": 0.025050878524780273, + "step": 26676 + }, + { + "epoch": 4.0704345703125e-05, + "step": 26676, + "training_step_time": 0.11076092720031738 + }, + { + "epoch": 4.070587158203125e-05, + "model_forward_time": 0.025349855422973633, + "step": 26677 + }, + { + "epoch": 4.070587158203125e-05, + "step": 26677, + "training_step_time": 0.10786747932434082 + }, + { + "epoch": 4.07073974609375e-05, + "model_forward_time": 0.02698349952697754, + "step": 26678 + }, + { + "epoch": 4.07073974609375e-05, + "step": 26678, + "training_step_time": 0.1774294376373291 + }, + { + "epoch": 4.070892333984375e-05, + "model_forward_time": 0.024695396423339844, + "step": 26679 + }, + { + "epoch": 4.070892333984375e-05, + "step": 26679, + "training_step_time": 0.20139288902282715 + }, + { + "epoch": 4.071044921875e-05, + "grad_norm": 0.14898306131362915, + "learning_rate": 3.3111099224304e-06, + "loss": 0.0061, + "step": 26680 + }, + { + "epoch": 4.071044921875e-05, + "model_forward_time": 0.024250030517578125, + "step": 26680 + }, + { + "epoch": 4.071044921875e-05, + "step": 26680, + "training_step_time": 0.18573236465454102 + }, + { + "epoch": 4.071197509765625e-05, + "model_forward_time": 0.0242156982421875, + "step": 26681 + }, + { + "epoch": 4.071197509765625e-05, + "step": 26681, + "training_step_time": 0.16794943809509277 + }, + { + "epoch": 4.07135009765625e-05, + "model_forward_time": 0.024793386459350586, + "step": 26682 + }, + { + "epoch": 4.07135009765625e-05, + "step": 26682, + "training_step_time": 0.16138625144958496 + }, + { + "epoch": 4.071502685546875e-05, + "model_forward_time": 0.02394556999206543, + "step": 26683 + }, + { + "epoch": 4.071502685546875e-05, + "step": 26683, + "training_step_time": 0.1855635643005371 + }, + { + "epoch": 4.0716552734375e-05, + "model_forward_time": 0.025079011917114258, + "step": 26684 + }, + { + "epoch": 4.0716552734375e-05, + "step": 26684, + "training_step_time": 0.1862490177154541 + }, + { + "epoch": 4.071807861328125e-05, + "model_forward_time": 0.02399921417236328, + "step": 26685 + }, + { + "epoch": 4.071807861328125e-05, + "step": 26685, + "training_step_time": 0.13516879081726074 + }, + { + "epoch": 4.07196044921875e-05, + "model_forward_time": 0.02427983283996582, + "step": 26686 + }, + { + "epoch": 4.07196044921875e-05, + "step": 26686, + "training_step_time": 0.17879915237426758 + }, + { + "epoch": 4.072113037109375e-05, + "model_forward_time": 0.02459239959716797, + "step": 26687 + }, + { + "epoch": 4.072113037109375e-05, + "step": 26687, + "training_step_time": 0.14362812042236328 + }, + { + "epoch": 4.072265625e-05, + "model_forward_time": 0.02516913414001465, + "step": 26688 + }, + { + "epoch": 4.072265625e-05, + "step": 26688, + "training_step_time": 0.13523173332214355 + }, + { + "epoch": 4.072418212890625e-05, + "model_forward_time": 0.024919986724853516, + "step": 26689 + }, + { + "epoch": 4.072418212890625e-05, + "step": 26689, + "training_step_time": 0.19314074516296387 + }, + { + "epoch": 4.07257080078125e-05, + "grad_norm": 0.10780028998851776, + "learning_rate": 3.2914149685611073e-06, + "loss": 0.0052, + "step": 26690 + }, + { + "epoch": 4.07257080078125e-05, + "model_forward_time": 0.026250839233398438, + "step": 26690 + }, + { + "epoch": 4.07257080078125e-05, + "step": 26690, + "training_step_time": 0.17038750648498535 + }, + { + "epoch": 4.072723388671875e-05, + "model_forward_time": 0.02421712875366211, + "step": 26691 + }, + { + "epoch": 4.072723388671875e-05, + "step": 26691, + "training_step_time": 0.10178661346435547 + }, + { + "epoch": 4.0728759765625e-05, + "model_forward_time": 0.024585723876953125, + "step": 26692 + }, + { + "epoch": 4.0728759765625e-05, + "step": 26692, + "training_step_time": 0.10254025459289551 + }, + { + "epoch": 4.073028564453125e-05, + "model_forward_time": 0.025119781494140625, + "step": 26693 + }, + { + "epoch": 4.073028564453125e-05, + "step": 26693, + "training_step_time": 0.10536932945251465 + }, + { + "epoch": 4.07318115234375e-05, + "model_forward_time": 0.02521491050720215, + "step": 26694 + }, + { + "epoch": 4.07318115234375e-05, + "step": 26694, + "training_step_time": 0.1072688102722168 + }, + { + "epoch": 4.073333740234375e-05, + "model_forward_time": 0.025226593017578125, + "step": 26695 + }, + { + "epoch": 4.073333740234375e-05, + "step": 26695, + "training_step_time": 0.10624980926513672 + }, + { + "epoch": 4.073486328125e-05, + "model_forward_time": 0.02539682388305664, + "step": 26696 + }, + { + "epoch": 4.073486328125e-05, + "step": 26696, + "training_step_time": 0.10838603973388672 + }, + { + "epoch": 4.073638916015625e-05, + "model_forward_time": 0.0249021053314209, + "step": 26697 + }, + { + "epoch": 4.073638916015625e-05, + "step": 26697, + "training_step_time": 0.11003398895263672 + }, + { + "epoch": 4.07379150390625e-05, + "model_forward_time": 0.025248050689697266, + "step": 26698 + }, + { + "epoch": 4.07379150390625e-05, + "step": 26698, + "training_step_time": 0.10663175582885742 + }, + { + "epoch": 4.073944091796875e-05, + "model_forward_time": 0.02561020851135254, + "step": 26699 + }, + { + "epoch": 4.073944091796875e-05, + "step": 26699, + "training_step_time": 0.10784435272216797 + }, + { + "epoch": 4.0740966796875e-05, + "grad_norm": 0.24510358273983002, + "learning_rate": 3.271776770026963e-06, + "loss": 0.0108, + "step": 26700 + }, + { + "epoch": 4.0740966796875e-05, + "model_forward_time": 0.025061368942260742, + "step": 26700 + }, + { + "epoch": 4.0740966796875e-05, + "step": 26700, + "training_step_time": 0.10606741905212402 + }, + { + "epoch": 4.074249267578125e-05, + "model_forward_time": 0.025350332260131836, + "step": 26701 + }, + { + "epoch": 4.074249267578125e-05, + "step": 26701, + "training_step_time": 0.10470747947692871 + }, + { + "epoch": 4.07440185546875e-05, + "model_forward_time": 0.02533864974975586, + "step": 26702 + }, + { + "epoch": 4.07440185546875e-05, + "step": 26702, + "training_step_time": 0.11341714859008789 + }, + { + "epoch": 4.074554443359375e-05, + "model_forward_time": 0.025208711624145508, + "step": 26703 + }, + { + "epoch": 4.074554443359375e-05, + "step": 26703, + "training_step_time": 0.1367814540863037 + }, + { + "epoch": 4.07470703125e-05, + "model_forward_time": 0.025407075881958008, + "step": 26704 + }, + { + "epoch": 4.07470703125e-05, + "step": 26704, + "training_step_time": 0.11318778991699219 + }, + { + "epoch": 4.074859619140625e-05, + "model_forward_time": 0.02510857582092285, + "step": 26705 + }, + { + "epoch": 4.074859619140625e-05, + "step": 26705, + "training_step_time": 0.10726761817932129 + }, + { + "epoch": 4.07501220703125e-05, + "model_forward_time": 0.026613235473632812, + "step": 26706 + }, + { + "epoch": 4.07501220703125e-05, + "step": 26706, + "training_step_time": 0.11190485954284668 + }, + { + "epoch": 4.075164794921875e-05, + "model_forward_time": 0.02526402473449707, + "step": 26707 + }, + { + "epoch": 4.075164794921875e-05, + "step": 26707, + "training_step_time": 0.11483931541442871 + }, + { + "epoch": 4.0753173828125e-05, + "model_forward_time": 0.025832414627075195, + "step": 26708 + }, + { + "epoch": 4.0753173828125e-05, + "step": 26708, + "training_step_time": 0.1880347728729248 + }, + { + "epoch": 4.075469970703125e-05, + "model_forward_time": 0.024509668350219727, + "step": 26709 + }, + { + "epoch": 4.075469970703125e-05, + "step": 26709, + "training_step_time": 0.10419464111328125 + }, + { + "epoch": 4.07562255859375e-05, + "grad_norm": 0.22987960278987885, + "learning_rate": 3.2521953506902237e-06, + "loss": 0.0057, + "step": 26710 + }, + { + "epoch": 4.07562255859375e-05, + "model_forward_time": 0.02458977699279785, + "step": 26710 + }, + { + "epoch": 4.07562255859375e-05, + "step": 26710, + "training_step_time": 0.10155248641967773 + }, + { + "epoch": 4.075775146484375e-05, + "model_forward_time": 0.02584075927734375, + "step": 26711 + }, + { + "epoch": 4.075775146484375e-05, + "step": 26711, + "training_step_time": 0.10361647605895996 + }, + { + "epoch": 4.075927734375e-05, + "model_forward_time": 0.025406599044799805, + "step": 26712 + }, + { + "epoch": 4.075927734375e-05, + "step": 26712, + "training_step_time": 0.10613656044006348 + }, + { + "epoch": 4.076080322265625e-05, + "model_forward_time": 0.025420188903808594, + "step": 26713 + }, + { + "epoch": 4.076080322265625e-05, + "step": 26713, + "training_step_time": 0.10767412185668945 + }, + { + "epoch": 4.07623291015625e-05, + "model_forward_time": 0.025574922561645508, + "step": 26714 + }, + { + "epoch": 4.07623291015625e-05, + "step": 26714, + "training_step_time": 0.180009126663208 + }, + { + "epoch": 4.076385498046875e-05, + "model_forward_time": 0.024447202682495117, + "step": 26715 + }, + { + "epoch": 4.076385498046875e-05, + "step": 26715, + "training_step_time": 0.19122052192687988 + }, + { + "epoch": 4.0765380859375e-05, + "model_forward_time": 0.024560928344726562, + "step": 26716 + }, + { + "epoch": 4.0765380859375e-05, + "step": 26716, + "training_step_time": 0.19205188751220703 + }, + { + "epoch": 4.076690673828125e-05, + "model_forward_time": 0.024631977081298828, + "step": 26717 + }, + { + "epoch": 4.076690673828125e-05, + "step": 26717, + "training_step_time": 0.19199252128601074 + }, + { + "epoch": 4.07684326171875e-05, + "model_forward_time": 0.024318695068359375, + "step": 26718 + }, + { + "epoch": 4.07684326171875e-05, + "step": 26718, + "training_step_time": 0.1804361343383789 + }, + { + "epoch": 4.076995849609375e-05, + "model_forward_time": 0.027651071548461914, + "step": 26719 + }, + { + "epoch": 4.076995849609375e-05, + "step": 26719, + "training_step_time": 0.15955328941345215 + }, + { + "epoch": 4.0771484375e-05, + "grad_norm": 0.08157902956008911, + "learning_rate": 3.2326707343441566e-06, + "loss": 0.0024, + "step": 26720 + }, + { + "epoch": 4.0771484375e-05, + "model_forward_time": 0.024436235427856445, + "step": 26720 + }, + { + "epoch": 4.0771484375e-05, + "step": 26720, + "training_step_time": 0.14250731468200684 + }, + { + "epoch": 4.077301025390625e-05, + "model_forward_time": 0.024405479431152344, + "step": 26721 + }, + { + "epoch": 4.077301025390625e-05, + "step": 26721, + "training_step_time": 0.14423108100891113 + }, + { + "epoch": 4.07745361328125e-05, + "model_forward_time": 0.02418994903564453, + "step": 26722 + }, + { + "epoch": 4.07745361328125e-05, + "step": 26722, + "training_step_time": 0.16389107704162598 + }, + { + "epoch": 4.077606201171875e-05, + "model_forward_time": 0.025127172470092773, + "step": 26723 + }, + { + "epoch": 4.077606201171875e-05, + "step": 26723, + "training_step_time": 0.11426854133605957 + }, + { + "epoch": 4.0777587890625e-05, + "model_forward_time": 0.02463507652282715, + "step": 26724 + }, + { + "epoch": 4.0777587890625e-05, + "step": 26724, + "training_step_time": 0.141495943069458 + }, + { + "epoch": 4.077911376953125e-05, + "model_forward_time": 0.026463031768798828, + "step": 26725 + }, + { + "epoch": 4.077911376953125e-05, + "step": 26725, + "training_step_time": 0.15899109840393066 + }, + { + "epoch": 4.07806396484375e-05, + "model_forward_time": 0.024477720260620117, + "step": 26726 + }, + { + "epoch": 4.07806396484375e-05, + "step": 26726, + "training_step_time": 0.1990516185760498 + }, + { + "epoch": 4.078216552734375e-05, + "model_forward_time": 0.02467632293701172, + "step": 26727 + }, + { + "epoch": 4.078216552734375e-05, + "step": 26727, + "training_step_time": 0.15700173377990723 + }, + { + "epoch": 4.078369140625e-05, + "model_forward_time": 0.024187088012695312, + "step": 26728 + }, + { + "epoch": 4.078369140625e-05, + "step": 26728, + "training_step_time": 0.14815807342529297 + }, + { + "epoch": 4.078521728515625e-05, + "model_forward_time": 0.02472543716430664, + "step": 26729 + }, + { + "epoch": 4.078521728515625e-05, + "step": 26729, + "training_step_time": 0.13523554801940918 + }, + { + "epoch": 4.07867431640625e-05, + "grad_norm": 0.05186415836215019, + "learning_rate": 3.213202944713023e-06, + "loss": 0.0028, + "step": 26730 + }, + { + "epoch": 4.07867431640625e-05, + "model_forward_time": 0.024548768997192383, + "step": 26730 + }, + { + "epoch": 4.07867431640625e-05, + "step": 26730, + "training_step_time": 0.12346673011779785 + }, + { + "epoch": 4.078826904296875e-05, + "model_forward_time": 0.025125503540039062, + "step": 26731 + }, + { + "epoch": 4.078826904296875e-05, + "step": 26731, + "training_step_time": 0.10602045059204102 + }, + { + "epoch": 4.0789794921875e-05, + "model_forward_time": 0.025434494018554688, + "step": 26732 + }, + { + "epoch": 4.0789794921875e-05, + "step": 26732, + "training_step_time": 0.10860824584960938 + }, + { + "epoch": 4.079132080078125e-05, + "model_forward_time": 0.024926185607910156, + "step": 26733 + }, + { + "epoch": 4.079132080078125e-05, + "step": 26733, + "training_step_time": 0.21651959419250488 + }, + { + "epoch": 4.07928466796875e-05, + "model_forward_time": 0.023987531661987305, + "step": 26734 + }, + { + "epoch": 4.07928466796875e-05, + "step": 26734, + "training_step_time": 0.19706368446350098 + }, + { + "epoch": 4.079437255859375e-05, + "model_forward_time": 0.02391672134399414, + "step": 26735 + }, + { + "epoch": 4.079437255859375e-05, + "step": 26735, + "training_step_time": 0.1784071922302246 + }, + { + "epoch": 4.07958984375e-05, + "model_forward_time": 0.024206876754760742, + "step": 26736 + }, + { + "epoch": 4.07958984375e-05, + "step": 26736, + "training_step_time": 0.17437005043029785 + }, + { + "epoch": 4.079742431640625e-05, + "model_forward_time": 0.025784730911254883, + "step": 26737 + }, + { + "epoch": 4.079742431640625e-05, + "step": 26737, + "training_step_time": 0.15831613540649414 + }, + { + "epoch": 4.07989501953125e-05, + "model_forward_time": 0.023953676223754883, + "step": 26738 + }, + { + "epoch": 4.07989501953125e-05, + "step": 26738, + "training_step_time": 0.14695072174072266 + }, + { + "epoch": 4.080047607421875e-05, + "model_forward_time": 0.023875951766967773, + "step": 26739 + }, + { + "epoch": 4.080047607421875e-05, + "step": 26739, + "training_step_time": 0.131239652633667 + }, + { + "epoch": 4.0802001953125e-05, + "grad_norm": 0.07744824141263962, + "learning_rate": 3.193792005452018e-06, + "loss": 0.0041, + "step": 26740 + }, + { + "epoch": 4.0802001953125e-05, + "model_forward_time": 0.02471017837524414, + "step": 26740 + }, + { + "epoch": 4.0802001953125e-05, + "step": 26740, + "training_step_time": 0.12274861335754395 + }, + { + "epoch": 4.080352783203125e-05, + "model_forward_time": 0.02469348907470703, + "step": 26741 + }, + { + "epoch": 4.080352783203125e-05, + "step": 26741, + "training_step_time": 0.13964629173278809 + }, + { + "epoch": 4.08050537109375e-05, + "model_forward_time": 0.024678707122802734, + "step": 26742 + }, + { + "epoch": 4.08050537109375e-05, + "step": 26742, + "training_step_time": 0.13474655151367188 + }, + { + "epoch": 4.080657958984375e-05, + "model_forward_time": 0.02449202537536621, + "step": 26743 + }, + { + "epoch": 4.080657958984375e-05, + "step": 26743, + "training_step_time": 0.11082839965820312 + }, + { + "epoch": 4.080810546875e-05, + "model_forward_time": 0.025324583053588867, + "step": 26744 + }, + { + "epoch": 4.080810546875e-05, + "step": 26744, + "training_step_time": 0.10487627983093262 + }, + { + "epoch": 4.080963134765625e-05, + "model_forward_time": 0.02512812614440918, + "step": 26745 + }, + { + "epoch": 4.080963134765625e-05, + "step": 26745, + "training_step_time": 0.11162781715393066 + }, + { + "epoch": 4.08111572265625e-05, + "model_forward_time": 0.025099515914916992, + "step": 26746 + }, + { + "epoch": 4.08111572265625e-05, + "step": 26746, + "training_step_time": 0.10545039176940918 + }, + { + "epoch": 4.081268310546875e-05, + "model_forward_time": 0.025190353393554688, + "step": 26747 + }, + { + "epoch": 4.081268310546875e-05, + "step": 26747, + "training_step_time": 0.19527888298034668 + }, + { + "epoch": 4.0814208984375e-05, + "model_forward_time": 0.024199962615966797, + "step": 26748 + }, + { + "epoch": 4.0814208984375e-05, + "step": 26748, + "training_step_time": 0.10188746452331543 + }, + { + "epoch": 4.081573486328125e-05, + "model_forward_time": 0.024327754974365234, + "step": 26749 + }, + { + "epoch": 4.081573486328125e-05, + "step": 26749, + "training_step_time": 0.10188412666320801 + }, + { + "epoch": 4.08172607421875e-05, + "grad_norm": 0.14027544856071472, + "learning_rate": 3.1744379401472677e-06, + "loss": 0.0084, + "step": 26750 + }, + { + "epoch": 4.08172607421875e-05, + "model_forward_time": 0.025066137313842773, + "step": 26750 + }, + { + "epoch": 4.08172607421875e-05, + "step": 26750, + "training_step_time": 0.10620760917663574 + }, + { + "epoch": 4.081878662109375e-05, + "model_forward_time": 0.025079727172851562, + "step": 26751 + }, + { + "epoch": 4.081878662109375e-05, + "step": 26751, + "training_step_time": 0.11285281181335449 + }, + { + "epoch": 4.08203125e-05, + "model_forward_time": 0.025304079055786133, + "step": 26752 + }, + { + "epoch": 4.08203125e-05, + "step": 26752, + "training_step_time": 0.10619235038757324 + }, + { + "epoch": 4.082183837890625e-05, + "model_forward_time": 0.024847984313964844, + "step": 26753 + }, + { + "epoch": 4.082183837890625e-05, + "step": 26753, + "training_step_time": 0.10709619522094727 + }, + { + "epoch": 4.08233642578125e-05, + "model_forward_time": 0.02524399757385254, + "step": 26754 + }, + { + "epoch": 4.08233642578125e-05, + "step": 26754, + "training_step_time": 0.1036221981048584 + }, + { + "epoch": 4.082489013671875e-05, + "model_forward_time": 0.025450468063354492, + "step": 26755 + }, + { + "epoch": 4.082489013671875e-05, + "step": 26755, + "training_step_time": 0.1078188419342041 + }, + { + "epoch": 4.0826416015625e-05, + "model_forward_time": 0.025645732879638672, + "step": 26756 + }, + { + "epoch": 4.0826416015625e-05, + "step": 26756, + "training_step_time": 0.10690975189208984 + }, + { + "epoch": 4.082794189453125e-05, + "model_forward_time": 0.02541065216064453, + "step": 26757 + }, + { + "epoch": 4.082794189453125e-05, + "step": 26757, + "training_step_time": 0.17798852920532227 + }, + { + "epoch": 4.08294677734375e-05, + "model_forward_time": 0.02449488639831543, + "step": 26758 + }, + { + "epoch": 4.08294677734375e-05, + "step": 26758, + "training_step_time": 0.18977570533752441 + }, + { + "epoch": 4.083099365234375e-05, + "model_forward_time": 0.026149272918701172, + "step": 26759 + }, + { + "epoch": 4.083099365234375e-05, + "step": 26759, + "training_step_time": 0.18576788902282715 + }, + { + "epoch": 4.083251953125e-05, + "grad_norm": 0.10226710885763168, + "learning_rate": 3.155140772315773e-06, + "loss": 0.0043, + "step": 26760 + }, + { + "epoch": 4.083251953125e-05, + "model_forward_time": 0.024295806884765625, + "step": 26760 + }, + { + "epoch": 4.083251953125e-05, + "step": 26760, + "training_step_time": 0.17508864402770996 + }, + { + "epoch": 4.083404541015625e-05, + "model_forward_time": 0.024616003036499023, + "step": 26761 + }, + { + "epoch": 4.083404541015625e-05, + "step": 26761, + "training_step_time": 0.17728924751281738 + }, + { + "epoch": 4.08355712890625e-05, + "model_forward_time": 0.02429938316345215, + "step": 26762 + }, + { + "epoch": 4.08355712890625e-05, + "step": 26762, + "training_step_time": 0.1758437156677246 + }, + { + "epoch": 4.083709716796875e-05, + "model_forward_time": 0.02487659454345703, + "step": 26763 + }, + { + "epoch": 4.083709716796875e-05, + "step": 26763, + "training_step_time": 0.10165286064147949 + }, + { + "epoch": 4.0838623046875e-05, + "model_forward_time": 0.027752161026000977, + "step": 26764 + }, + { + "epoch": 4.0838623046875e-05, + "step": 26764, + "training_step_time": 0.14945292472839355 + }, + { + "epoch": 4.084014892578125e-05, + "model_forward_time": 0.024996519088745117, + "step": 26765 + }, + { + "epoch": 4.084014892578125e-05, + "step": 26765, + "training_step_time": 0.19479680061340332 + }, + { + "epoch": 4.08416748046875e-05, + "model_forward_time": 0.02440166473388672, + "step": 26766 + }, + { + "epoch": 4.08416748046875e-05, + "step": 26766, + "training_step_time": 0.206467866897583 + }, + { + "epoch": 4.084320068359375e-05, + "model_forward_time": 0.02463555335998535, + "step": 26767 + }, + { + "epoch": 4.084320068359375e-05, + "step": 26767, + "training_step_time": 0.1618201732635498 + }, + { + "epoch": 4.08447265625e-05, + "model_forward_time": 0.024322032928466797, + "step": 26768 + }, + { + "epoch": 4.08447265625e-05, + "step": 26768, + "training_step_time": 0.158919095993042 + }, + { + "epoch": 4.084625244140625e-05, + "model_forward_time": 0.024488449096679688, + "step": 26769 + }, + { + "epoch": 4.084625244140625e-05, + "step": 26769, + "training_step_time": 0.11172962188720703 + }, + { + "epoch": 4.08477783203125e-05, + "grad_norm": 0.09542731940746307, + "learning_rate": 3.1359005254054273e-06, + "loss": 0.0027, + "step": 26770 + }, + { + "epoch": 4.08477783203125e-05, + "model_forward_time": 0.024994373321533203, + "step": 26770 + }, + { + "epoch": 4.08477783203125e-05, + "step": 26770, + "training_step_time": 0.17457294464111328 + }, + { + "epoch": 4.084930419921875e-05, + "model_forward_time": 0.02418994903564453, + "step": 26771 + }, + { + "epoch": 4.084930419921875e-05, + "step": 26771, + "training_step_time": 0.13950490951538086 + }, + { + "epoch": 4.0850830078125e-05, + "model_forward_time": 0.024289369583129883, + "step": 26772 + }, + { + "epoch": 4.0850830078125e-05, + "step": 26772, + "training_step_time": 0.11224365234375 + }, + { + "epoch": 4.085235595703125e-05, + "model_forward_time": 0.025055408477783203, + "step": 26773 + }, + { + "epoch": 4.085235595703125e-05, + "step": 26773, + "training_step_time": 0.10435986518859863 + }, + { + "epoch": 4.08538818359375e-05, + "model_forward_time": 0.02481532096862793, + "step": 26774 + }, + { + "epoch": 4.08538818359375e-05, + "step": 26774, + "training_step_time": 0.10432195663452148 + }, + { + "epoch": 4.085540771484375e-05, + "model_forward_time": 0.02518939971923828, + "step": 26775 + }, + { + "epoch": 4.085540771484375e-05, + "step": 26775, + "training_step_time": 0.10755133628845215 + }, + { + "epoch": 4.085693359375e-05, + "model_forward_time": 0.02528667449951172, + "step": 26776 + }, + { + "epoch": 4.085693359375e-05, + "step": 26776, + "training_step_time": 0.10510921478271484 + }, + { + "epoch": 4.085845947265625e-05, + "model_forward_time": 0.025311708450317383, + "step": 26777 + }, + { + "epoch": 4.085845947265625e-05, + "step": 26777, + "training_step_time": 0.10868144035339355 + }, + { + "epoch": 4.08599853515625e-05, + "model_forward_time": 0.025835514068603516, + "step": 26778 + }, + { + "epoch": 4.08599853515625e-05, + "step": 26778, + "training_step_time": 0.10531020164489746 + }, + { + "epoch": 4.086151123046875e-05, + "model_forward_time": 0.025369882583618164, + "step": 26779 + }, + { + "epoch": 4.086151123046875e-05, + "step": 26779, + "training_step_time": 0.1085515022277832 + }, + { + "epoch": 4.0863037109375e-05, + "grad_norm": 0.09529156982898712, + "learning_rate": 3.1167172227949347e-06, + "loss": 0.0045, + "step": 26780 + }, + { + "epoch": 4.0863037109375e-05, + "model_forward_time": 0.025146007537841797, + "step": 26780 + }, + { + "epoch": 4.0863037109375e-05, + "step": 26780, + "training_step_time": 0.10555911064147949 + }, + { + "epoch": 4.086456298828125e-05, + "model_forward_time": 0.02534937858581543, + "step": 26781 + }, + { + "epoch": 4.086456298828125e-05, + "step": 26781, + "training_step_time": 0.10546731948852539 + }, + { + "epoch": 4.08660888671875e-05, + "model_forward_time": 0.025066614151000977, + "step": 26782 + }, + { + "epoch": 4.08660888671875e-05, + "step": 26782, + "training_step_time": 0.10415339469909668 + }, + { + "epoch": 4.086761474609375e-05, + "model_forward_time": 0.025035619735717773, + "step": 26783 + }, + { + "epoch": 4.086761474609375e-05, + "step": 26783, + "training_step_time": 0.10475659370422363 + }, + { + "epoch": 4.0869140625e-05, + "model_forward_time": 0.025317668914794922, + "step": 26784 + }, + { + "epoch": 4.0869140625e-05, + "step": 26784, + "training_step_time": 0.15226459503173828 + }, + { + "epoch": 4.087066650390625e-05, + "model_forward_time": 0.025641441345214844, + "step": 26785 + }, + { + "epoch": 4.087066650390625e-05, + "step": 26785, + "training_step_time": 0.11969327926635742 + }, + { + "epoch": 4.08721923828125e-05, + "model_forward_time": 0.02508997917175293, + "step": 26786 + }, + { + "epoch": 4.08721923828125e-05, + "step": 26786, + "training_step_time": 0.17935442924499512 + }, + { + "epoch": 4.087371826171875e-05, + "model_forward_time": 0.02457404136657715, + "step": 26787 + }, + { + "epoch": 4.087371826171875e-05, + "step": 26787, + "training_step_time": 0.17623567581176758 + }, + { + "epoch": 4.0875244140625e-05, + "model_forward_time": 0.02387523651123047, + "step": 26788 + }, + { + "epoch": 4.0875244140625e-05, + "step": 26788, + "training_step_time": 0.16893410682678223 + }, + { + "epoch": 4.087677001953125e-05, + "model_forward_time": 0.024147748947143555, + "step": 26789 + }, + { + "epoch": 4.087677001953125e-05, + "step": 26789, + "training_step_time": 0.11099481582641602 + }, + { + "epoch": 4.08782958984375e-05, + "grad_norm": 0.1236596554517746, + "learning_rate": 3.0975908877938277e-06, + "loss": 0.0068, + "step": 26790 + }, + { + "epoch": 4.08782958984375e-05, + "model_forward_time": 0.024781465530395508, + "step": 26790 + }, + { + "epoch": 4.08782958984375e-05, + "step": 26790, + "training_step_time": 0.10897636413574219 + }, + { + "epoch": 4.087982177734375e-05, + "model_forward_time": 0.025019168853759766, + "step": 26791 + }, + { + "epoch": 4.087982177734375e-05, + "step": 26791, + "training_step_time": 0.11070489883422852 + }, + { + "epoch": 4.088134765625e-05, + "model_forward_time": 0.0256502628326416, + "step": 26792 + }, + { + "epoch": 4.088134765625e-05, + "step": 26792, + "training_step_time": 0.11133837699890137 + }, + { + "epoch": 4.088287353515625e-05, + "model_forward_time": 0.02516770362854004, + "step": 26793 + }, + { + "epoch": 4.088287353515625e-05, + "step": 26793, + "training_step_time": 0.10945343971252441 + }, + { + "epoch": 4.08843994140625e-05, + "model_forward_time": 0.025018692016601562, + "step": 26794 + }, + { + "epoch": 4.08843994140625e-05, + "step": 26794, + "training_step_time": 0.1097710132598877 + }, + { + "epoch": 4.088592529296875e-05, + "model_forward_time": 0.02643418312072754, + "step": 26795 + }, + { + "epoch": 4.088592529296875e-05, + "step": 26795, + "training_step_time": 0.1076805591583252 + }, + { + "epoch": 4.0887451171875e-05, + "model_forward_time": 0.025021076202392578, + "step": 26796 + }, + { + "epoch": 4.0887451171875e-05, + "step": 26796, + "training_step_time": 0.10661625862121582 + }, + { + "epoch": 4.088897705078125e-05, + "model_forward_time": 0.024930477142333984, + "step": 26797 + }, + { + "epoch": 4.088897705078125e-05, + "step": 26797, + "training_step_time": 0.10617876052856445 + }, + { + "epoch": 4.08905029296875e-05, + "model_forward_time": 0.025120973587036133, + "step": 26798 + }, + { + "epoch": 4.08905029296875e-05, + "step": 26798, + "training_step_time": 0.1139533519744873 + }, + { + "epoch": 4.089202880859375e-05, + "model_forward_time": 0.025290727615356445, + "step": 26799 + }, + { + "epoch": 4.089202880859375e-05, + "step": 26799, + "training_step_time": 0.10721564292907715 + }, + { + "epoch": 4.08935546875e-05, + "grad_norm": 0.07082542032003403, + "learning_rate": 3.078521543642399e-06, + "loss": 0.004, + "step": 26800 + }, + { + "epoch": 4.08935546875e-05, + "model_forward_time": 0.02545785903930664, + "step": 26800 + }, + { + "epoch": 4.08935546875e-05, + "step": 26800, + "training_step_time": 0.10879826545715332 + }, + { + "epoch": 4.089508056640625e-05, + "model_forward_time": 0.025609493255615234, + "step": 26801 + }, + { + "epoch": 4.089508056640625e-05, + "step": 26801, + "training_step_time": 0.10764265060424805 + }, + { + "epoch": 4.08966064453125e-05, + "model_forward_time": 0.025213003158569336, + "step": 26802 + }, + { + "epoch": 4.08966064453125e-05, + "step": 26802, + "training_step_time": 0.10943818092346191 + }, + { + "epoch": 4.089813232421875e-05, + "model_forward_time": 0.025181293487548828, + "step": 26803 + }, + { + "epoch": 4.089813232421875e-05, + "step": 26803, + "training_step_time": 0.10697007179260254 + }, + { + "epoch": 4.0899658203125e-05, + "model_forward_time": 0.025313138961791992, + "step": 26804 + }, + { + "epoch": 4.0899658203125e-05, + "step": 26804, + "training_step_time": 0.10487627983093262 + }, + { + "epoch": 4.090118408203125e-05, + "model_forward_time": 0.025311946868896484, + "step": 26805 + }, + { + "epoch": 4.090118408203125e-05, + "step": 26805, + "training_step_time": 0.10634446144104004 + }, + { + "epoch": 4.09027099609375e-05, + "model_forward_time": 0.02514195442199707, + "step": 26806 + }, + { + "epoch": 4.09027099609375e-05, + "step": 26806, + "training_step_time": 0.10591602325439453 + }, + { + "epoch": 4.090423583984375e-05, + "model_forward_time": 0.024975299835205078, + "step": 26807 + }, + { + "epoch": 4.090423583984375e-05, + "step": 26807, + "training_step_time": 0.1493215560913086 + }, + { + "epoch": 4.090576171875e-05, + "model_forward_time": 0.02936553955078125, + "step": 26808 + }, + { + "epoch": 4.090576171875e-05, + "step": 26808, + "training_step_time": 0.10948824882507324 + }, + { + "epoch": 4.090728759765625e-05, + "model_forward_time": 0.02452254295349121, + "step": 26809 + }, + { + "epoch": 4.090728759765625e-05, + "step": 26809, + "training_step_time": 0.14825439453125 + }, + { + "epoch": 4.09088134765625e-05, + "grad_norm": 0.10529822111129761, + "learning_rate": 3.059509213511702e-06, + "loss": 0.0035, + "step": 26810 + }, + { + "epoch": 4.09088134765625e-05, + "model_forward_time": 0.024562597274780273, + "step": 26810 + }, + { + "epoch": 4.09088134765625e-05, + "step": 26810, + "training_step_time": 0.150299072265625 + }, + { + "epoch": 4.091033935546875e-05, + "model_forward_time": 0.024527311325073242, + "step": 26811 + }, + { + "epoch": 4.091033935546875e-05, + "step": 26811, + "training_step_time": 0.18941593170166016 + }, + { + "epoch": 4.0911865234375e-05, + "model_forward_time": 0.024666547775268555, + "step": 26812 + }, + { + "epoch": 4.0911865234375e-05, + "step": 26812, + "training_step_time": 0.15288901329040527 + }, + { + "epoch": 4.091339111328125e-05, + "model_forward_time": 0.02435779571533203, + "step": 26813 + }, + { + "epoch": 4.091339111328125e-05, + "step": 26813, + "training_step_time": 0.1166837215423584 + }, + { + "epoch": 4.09149169921875e-05, + "model_forward_time": 0.024791479110717773, + "step": 26814 + }, + { + "epoch": 4.09149169921875e-05, + "step": 26814, + "training_step_time": 0.1302340030670166 + }, + { + "epoch": 4.091644287109375e-05, + "model_forward_time": 0.025376081466674805, + "step": 26815 + }, + { + "epoch": 4.091644287109375e-05, + "step": 26815, + "training_step_time": 0.10962843894958496 + }, + { + "epoch": 4.091796875e-05, + "model_forward_time": 0.02527165412902832, + "step": 26816 + }, + { + "epoch": 4.091796875e-05, + "step": 26816, + "training_step_time": 0.1553654670715332 + }, + { + "epoch": 4.091949462890625e-05, + "model_forward_time": 0.0246429443359375, + "step": 26817 + }, + { + "epoch": 4.091949462890625e-05, + "step": 26817, + "training_step_time": 0.14779233932495117 + }, + { + "epoch": 4.09210205078125e-05, + "model_forward_time": 0.024532318115234375, + "step": 26818 + }, + { + "epoch": 4.09210205078125e-05, + "step": 26818, + "training_step_time": 0.1265120506286621 + }, + { + "epoch": 4.092254638671875e-05, + "model_forward_time": 0.023479461669921875, + "step": 26819 + }, + { + "epoch": 4.092254638671875e-05, + "step": 26819, + "training_step_time": 0.12608742713928223 + }, + { + "epoch": 4.0924072265625e-05, + "grad_norm": 0.12813250720500946, + "learning_rate": 3.040553920503503e-06, + "loss": 0.0037, + "step": 26820 + }, + { + "epoch": 4.0924072265625e-05, + "model_forward_time": 0.02372288703918457, + "step": 26820 + }, + { + "epoch": 4.0924072265625e-05, + "step": 26820, + "training_step_time": 0.12379789352416992 + }, + { + "epoch": 4.092559814453125e-05, + "model_forward_time": 0.023946046829223633, + "step": 26821 + }, + { + "epoch": 4.092559814453125e-05, + "step": 26821, + "training_step_time": 0.13062000274658203 + }, + { + "epoch": 4.09271240234375e-05, + "model_forward_time": 0.024103641510009766, + "step": 26822 + }, + { + "epoch": 4.09271240234375e-05, + "step": 26822, + "training_step_time": 0.12062263488769531 + }, + { + "epoch": 4.092864990234375e-05, + "model_forward_time": 0.023974180221557617, + "step": 26823 + }, + { + "epoch": 4.092864990234375e-05, + "step": 26823, + "training_step_time": 0.11594200134277344 + }, + { + "epoch": 4.093017578125e-05, + "model_forward_time": 0.024115800857543945, + "step": 26824 + }, + { + "epoch": 4.093017578125e-05, + "step": 26824, + "training_step_time": 0.11386752128601074 + }, + { + "epoch": 4.093170166015625e-05, + "model_forward_time": 0.02384805679321289, + "step": 26825 + }, + { + "epoch": 4.093170166015625e-05, + "step": 26825, + "training_step_time": 0.11443924903869629 + }, + { + "epoch": 4.09332275390625e-05, + "model_forward_time": 0.023669958114624023, + "step": 26826 + }, + { + "epoch": 4.09332275390625e-05, + "step": 26826, + "training_step_time": 0.11028933525085449 + }, + { + "epoch": 4.093475341796875e-05, + "model_forward_time": 0.024934053421020508, + "step": 26827 + }, + { + "epoch": 4.093475341796875e-05, + "step": 26827, + "training_step_time": 0.1070094108581543 + }, + { + "epoch": 4.0936279296875e-05, + "model_forward_time": 0.025259733200073242, + "step": 26828 + }, + { + "epoch": 4.0936279296875e-05, + "step": 26828, + "training_step_time": 0.13768768310546875 + }, + { + "epoch": 4.093780517578125e-05, + "model_forward_time": 0.025411367416381836, + "step": 26829 + }, + { + "epoch": 4.093780517578125e-05, + "step": 26829, + "training_step_time": 0.1508162021636963 + }, + { + "epoch": 4.09393310546875e-05, + "grad_norm": 0.0825091302394867, + "learning_rate": 3.021655687650282e-06, + "loss": 0.003, + "step": 26830 + }, + { + "epoch": 4.09393310546875e-05, + "model_forward_time": 0.02431035041809082, + "step": 26830 + }, + { + "epoch": 4.09393310546875e-05, + "step": 26830, + "training_step_time": 0.14912962913513184 + }, + { + "epoch": 4.094085693359375e-05, + "model_forward_time": 0.024685382843017578, + "step": 26831 + }, + { + "epoch": 4.094085693359375e-05, + "step": 26831, + "training_step_time": 0.17029905319213867 + }, + { + "epoch": 4.09423828125e-05, + "model_forward_time": 0.0244143009185791, + "step": 26832 + }, + { + "epoch": 4.09423828125e-05, + "step": 26832, + "training_step_time": 0.10464024543762207 + }, + { + "epoch": 4.094390869140625e-05, + "model_forward_time": 0.024786949157714844, + "step": 26833 + }, + { + "epoch": 4.094390869140625e-05, + "step": 26833, + "training_step_time": 0.1954343318939209 + }, + { + "epoch": 4.09454345703125e-05, + "model_forward_time": 0.02444767951965332, + "step": 26834 + }, + { + "epoch": 4.09454345703125e-05, + "step": 26834, + "training_step_time": 0.103271484375 + }, + { + "epoch": 4.094696044921875e-05, + "model_forward_time": 0.02415156364440918, + "step": 26835 + }, + { + "epoch": 4.094696044921875e-05, + "step": 26835, + "training_step_time": 0.10453104972839355 + }, + { + "epoch": 4.0948486328125e-05, + "model_forward_time": 0.025792598724365234, + "step": 26836 + }, + { + "epoch": 4.0948486328125e-05, + "step": 26836, + "training_step_time": 0.10685515403747559 + }, + { + "epoch": 4.095001220703125e-05, + "model_forward_time": 0.025383710861206055, + "step": 26837 + }, + { + "epoch": 4.095001220703125e-05, + "step": 26837, + "training_step_time": 0.10424304008483887 + }, + { + "epoch": 4.09515380859375e-05, + "model_forward_time": 0.025351524353027344, + "step": 26838 + }, + { + "epoch": 4.09515380859375e-05, + "step": 26838, + "training_step_time": 0.10774588584899902 + }, + { + "epoch": 4.095306396484375e-05, + "model_forward_time": 0.024941205978393555, + "step": 26839 + }, + { + "epoch": 4.095306396484375e-05, + "step": 26839, + "training_step_time": 0.10336971282958984 + }, + { + "epoch": 4.095458984375e-05, + "grad_norm": 0.24770134687423706, + "learning_rate": 3.0028145379151716e-06, + "loss": 0.0088, + "step": 26840 + }, + { + "epoch": 4.095458984375e-05, + "model_forward_time": 0.02497553825378418, + "step": 26840 + }, + { + "epoch": 4.095458984375e-05, + "step": 26840, + "training_step_time": 0.10447144508361816 + }, + { + "epoch": 4.095611572265625e-05, + "model_forward_time": 0.02541065216064453, + "step": 26841 + }, + { + "epoch": 4.095611572265625e-05, + "step": 26841, + "training_step_time": 0.10454964637756348 + }, + { + "epoch": 4.09576416015625e-05, + "model_forward_time": 0.02534174919128418, + "step": 26842 + }, + { + "epoch": 4.09576416015625e-05, + "step": 26842, + "training_step_time": 0.10695767402648926 + }, + { + "epoch": 4.095916748046875e-05, + "model_forward_time": 0.02536487579345703, + "step": 26843 + }, + { + "epoch": 4.095916748046875e-05, + "step": 26843, + "training_step_time": 0.1092684268951416 + }, + { + "epoch": 4.0960693359375e-05, + "model_forward_time": 0.026121854782104492, + "step": 26844 + }, + { + "epoch": 4.0960693359375e-05, + "step": 26844, + "training_step_time": 0.10672616958618164 + }, + { + "epoch": 4.096221923828125e-05, + "model_forward_time": 0.02857661247253418, + "step": 26845 + }, + { + "epoch": 4.096221923828125e-05, + "step": 26845, + "training_step_time": 0.10812854766845703 + }, + { + "epoch": 4.09637451171875e-05, + "model_forward_time": 0.02514338493347168, + "step": 26846 + }, + { + "epoch": 4.09637451171875e-05, + "step": 26846, + "training_step_time": 0.10379528999328613 + }, + { + "epoch": 4.096527099609375e-05, + "model_forward_time": 0.02532196044921875, + "step": 26847 + }, + { + "epoch": 4.096527099609375e-05, + "step": 26847, + "training_step_time": 0.1054384708404541 + }, + { + "epoch": 4.0966796875e-05, + "model_forward_time": 0.025258779525756836, + "step": 26848 + }, + { + "epoch": 4.0966796875e-05, + "step": 26848, + "training_step_time": 0.10576868057250977 + }, + { + "epoch": 4.096832275390625e-05, + "model_forward_time": 0.024927139282226562, + "step": 26849 + }, + { + "epoch": 4.096832275390625e-05, + "step": 26849, + "training_step_time": 0.10403323173522949 + }, + { + "epoch": 4.09698486328125e-05, + "grad_norm": 0.301150918006897, + "learning_rate": 2.9840304941919415e-06, + "loss": 0.0056, + "step": 26850 + }, + { + "epoch": 4.09698486328125e-05, + "model_forward_time": 0.024908781051635742, + "step": 26850 + }, + { + "epoch": 4.09698486328125e-05, + "step": 26850, + "training_step_time": 0.10323572158813477 + }, + { + "epoch": 4.097137451171875e-05, + "model_forward_time": 0.02469468116760254, + "step": 26851 + }, + { + "epoch": 4.097137451171875e-05, + "step": 26851, + "training_step_time": 0.1034543514251709 + }, + { + "epoch": 4.0972900390625e-05, + "model_forward_time": 0.0252072811126709, + "step": 26852 + }, + { + "epoch": 4.0972900390625e-05, + "step": 26852, + "training_step_time": 0.14432215690612793 + }, + { + "epoch": 4.097442626953125e-05, + "model_forward_time": 0.02497243881225586, + "step": 26853 + }, + { + "epoch": 4.097442626953125e-05, + "step": 26853, + "training_step_time": 0.10369205474853516 + }, + { + "epoch": 4.09759521484375e-05, + "model_forward_time": 0.024309158325195312, + "step": 26854 + }, + { + "epoch": 4.09759521484375e-05, + "step": 26854, + "training_step_time": 0.18585491180419922 + }, + { + "epoch": 4.097747802734375e-05, + "model_forward_time": 0.02435755729675293, + "step": 26855 + }, + { + "epoch": 4.097747802734375e-05, + "step": 26855, + "training_step_time": 0.13795256614685059 + }, + { + "epoch": 4.097900390625e-05, + "model_forward_time": 0.02453303337097168, + "step": 26856 + }, + { + "epoch": 4.097900390625e-05, + "step": 26856, + "training_step_time": 0.11211752891540527 + }, + { + "epoch": 4.098052978515625e-05, + "model_forward_time": 0.025313377380371094, + "step": 26857 + }, + { + "epoch": 4.098052978515625e-05, + "step": 26857, + "training_step_time": 0.22170019149780273 + }, + { + "epoch": 4.09820556640625e-05, + "model_forward_time": 0.0244598388671875, + "step": 26858 + }, + { + "epoch": 4.09820556640625e-05, + "step": 26858, + "training_step_time": 0.11779308319091797 + }, + { + "epoch": 4.098358154296875e-05, + "model_forward_time": 0.024547100067138672, + "step": 26859 + }, + { + "epoch": 4.098358154296875e-05, + "step": 26859, + "training_step_time": 0.12204194068908691 + }, + { + "epoch": 4.0985107421875e-05, + "grad_norm": 0.07965141534805298, + "learning_rate": 2.965303579304973e-06, + "loss": 0.0042, + "step": 26860 + }, + { + "epoch": 4.0985107421875e-05, + "model_forward_time": 0.024420976638793945, + "step": 26860 + }, + { + "epoch": 4.0985107421875e-05, + "step": 26860, + "training_step_time": 0.16338467597961426 + }, + { + "epoch": 4.098663330078125e-05, + "model_forward_time": 0.024096012115478516, + "step": 26861 + }, + { + "epoch": 4.098663330078125e-05, + "step": 26861, + "training_step_time": 0.2120952606201172 + }, + { + "epoch": 4.09881591796875e-05, + "model_forward_time": 0.024346590042114258, + "step": 26862 + }, + { + "epoch": 4.09881591796875e-05, + "step": 26862, + "training_step_time": 0.11568140983581543 + }, + { + "epoch": 4.098968505859375e-05, + "model_forward_time": 0.02457737922668457, + "step": 26863 + }, + { + "epoch": 4.098968505859375e-05, + "step": 26863, + "training_step_time": 0.10689544677734375 + }, + { + "epoch": 4.09912109375e-05, + "model_forward_time": 0.02512669563293457, + "step": 26864 + }, + { + "epoch": 4.09912109375e-05, + "step": 26864, + "training_step_time": 0.11184382438659668 + }, + { + "epoch": 4.099273681640625e-05, + "model_forward_time": 0.024976253509521484, + "step": 26865 + }, + { + "epoch": 4.099273681640625e-05, + "step": 26865, + "training_step_time": 0.10662627220153809 + }, + { + "epoch": 4.09942626953125e-05, + "model_forward_time": 0.025468826293945312, + "step": 26866 + }, + { + "epoch": 4.09942626953125e-05, + "step": 26866, + "training_step_time": 0.10573887825012207 + }, + { + "epoch": 4.099578857421875e-05, + "model_forward_time": 0.024996042251586914, + "step": 26867 + }, + { + "epoch": 4.099578857421875e-05, + "step": 26867, + "training_step_time": 0.10895824432373047 + }, + { + "epoch": 4.0997314453125e-05, + "model_forward_time": 0.025645017623901367, + "step": 26868 + }, + { + "epoch": 4.0997314453125e-05, + "step": 26868, + "training_step_time": 0.10598373413085938 + }, + { + "epoch": 4.099884033203125e-05, + "model_forward_time": 0.025200366973876953, + "step": 26869 + }, + { + "epoch": 4.099884033203125e-05, + "step": 26869, + "training_step_time": 0.10590648651123047 + }, + { + "epoch": 4.10003662109375e-05, + "grad_norm": 0.1878916472196579, + "learning_rate": 2.946633816009242e-06, + "loss": 0.0036, + "step": 26870 + }, + { + "epoch": 4.10003662109375e-05, + "model_forward_time": 0.02509593963623047, + "step": 26870 + }, + { + "epoch": 4.10003662109375e-05, + "step": 26870, + "training_step_time": 0.1047048568725586 + }, + { + "epoch": 4.100189208984375e-05, + "model_forward_time": 0.0250699520111084, + "step": 26871 + }, + { + "epoch": 4.100189208984375e-05, + "step": 26871, + "training_step_time": 0.10566830635070801 + }, + { + "epoch": 4.100341796875e-05, + "model_forward_time": 0.025211334228515625, + "step": 26872 + }, + { + "epoch": 4.100341796875e-05, + "step": 26872, + "training_step_time": 0.10526847839355469 + }, + { + "epoch": 4.100494384765625e-05, + "model_forward_time": 0.024981021881103516, + "step": 26873 + }, + { + "epoch": 4.100494384765625e-05, + "step": 26873, + "training_step_time": 0.1922311782836914 + }, + { + "epoch": 4.10064697265625e-05, + "model_forward_time": 0.024867534637451172, + "step": 26874 + }, + { + "epoch": 4.10064697265625e-05, + "step": 26874, + "training_step_time": 0.11589479446411133 + }, + { + "epoch": 4.100799560546875e-05, + "model_forward_time": 0.024444103240966797, + "step": 26875 + }, + { + "epoch": 4.100799560546875e-05, + "step": 26875, + "training_step_time": 0.10891413688659668 + }, + { + "epoch": 4.1009521484375e-05, + "model_forward_time": 0.025258541107177734, + "step": 26876 + }, + { + "epoch": 4.1009521484375e-05, + "step": 26876, + "training_step_time": 0.1143946647644043 + }, + { + "epoch": 4.101104736328125e-05, + "model_forward_time": 0.025269746780395508, + "step": 26877 + }, + { + "epoch": 4.101104736328125e-05, + "step": 26877, + "training_step_time": 0.1162424087524414 + }, + { + "epoch": 4.10125732421875e-05, + "model_forward_time": 0.026995182037353516, + "step": 26878 + }, + { + "epoch": 4.10125732421875e-05, + "step": 26878, + "training_step_time": 0.1124117374420166 + }, + { + "epoch": 4.101409912109375e-05, + "model_forward_time": 0.024773597717285156, + "step": 26879 + }, + { + "epoch": 4.101409912109375e-05, + "step": 26879, + "training_step_time": 0.18625998497009277 + }, + { + "epoch": 4.1015625e-05, + "grad_norm": 0.2622108459472656, + "learning_rate": 2.928021226990263e-06, + "loss": 0.0053, + "step": 26880 + }, + { + "epoch": 4.1015625e-05, + "model_forward_time": 0.024438142776489258, + "step": 26880 + }, + { + "epoch": 4.1015625e-05, + "step": 26880, + "training_step_time": 0.10392928123474121 + }, + { + "epoch": 4.101715087890625e-05, + "model_forward_time": 0.02428913116455078, + "step": 26881 + }, + { + "epoch": 4.101715087890625e-05, + "step": 26881, + "training_step_time": 0.10577392578125 + }, + { + "epoch": 4.10186767578125e-05, + "model_forward_time": 0.024899005889892578, + "step": 26882 + }, + { + "epoch": 4.10186767578125e-05, + "step": 26882, + "training_step_time": 0.1053929328918457 + }, + { + "epoch": 4.102020263671875e-05, + "model_forward_time": 0.02494668960571289, + "step": 26883 + }, + { + "epoch": 4.102020263671875e-05, + "step": 26883, + "training_step_time": 0.1086430549621582 + }, + { + "epoch": 4.1021728515625e-05, + "model_forward_time": 0.025395870208740234, + "step": 26884 + }, + { + "epoch": 4.1021728515625e-05, + "step": 26884, + "training_step_time": 0.10634088516235352 + }, + { + "epoch": 4.102325439453125e-05, + "model_forward_time": 0.025565624237060547, + "step": 26885 + }, + { + "epoch": 4.102325439453125e-05, + "step": 26885, + "training_step_time": 0.10760998725891113 + }, + { + "epoch": 4.10247802734375e-05, + "model_forward_time": 0.025504589080810547, + "step": 26886 + }, + { + "epoch": 4.10247802734375e-05, + "step": 26886, + "training_step_time": 0.10422182083129883 + }, + { + "epoch": 4.102630615234375e-05, + "model_forward_time": 0.02504587173461914, + "step": 26887 + }, + { + "epoch": 4.102630615234375e-05, + "step": 26887, + "training_step_time": 0.6076052188873291 + }, + { + "epoch": 4.102783203125e-05, + "model_forward_time": 0.023173809051513672, + "step": 26888 + }, + { + "epoch": 4.102783203125e-05, + "step": 26888, + "training_step_time": 0.09851264953613281 + }, + { + "epoch": 4.102935791015625e-05, + "model_forward_time": 0.024643898010253906, + "step": 26889 + }, + { + "epoch": 4.102935791015625e-05, + "step": 26889, + "training_step_time": 0.10322785377502441 + }, + { + "epoch": 4.10308837890625e-05, + "grad_norm": 0.08564291894435883, + "learning_rate": 2.9094658348640945e-06, + "loss": 0.005, + "step": 26890 + }, + { + "epoch": 4.10308837890625e-05, + "model_forward_time": 0.025710105895996094, + "step": 26890 + }, + { + "epoch": 4.10308837890625e-05, + "step": 26890, + "training_step_time": 0.10444426536560059 + }, + { + "epoch": 4.103240966796875e-05, + "model_forward_time": 0.02441883087158203, + "step": 26891 + }, + { + "epoch": 4.103240966796875e-05, + "step": 26891, + "training_step_time": 0.11056947708129883 + }, + { + "epoch": 4.1033935546875e-05, + "model_forward_time": 0.025146007537841797, + "step": 26892 + }, + { + "epoch": 4.1033935546875e-05, + "step": 26892, + "training_step_time": 0.1063385009765625 + }, + { + "epoch": 4.103546142578125e-05, + "model_forward_time": 0.02554774284362793, + "step": 26893 + }, + { + "epoch": 4.103546142578125e-05, + "step": 26893, + "training_step_time": 0.10592961311340332 + }, + { + "epoch": 4.10369873046875e-05, + "model_forward_time": 0.026050567626953125, + "step": 26894 + }, + { + "epoch": 4.10369873046875e-05, + "step": 26894, + "training_step_time": 0.18458032608032227 + }, + { + "epoch": 4.103851318359375e-05, + "model_forward_time": 0.025455236434936523, + "step": 26895 + }, + { + "epoch": 4.103851318359375e-05, + "step": 26895, + "training_step_time": 0.11171698570251465 + }, + { + "epoch": 4.10400390625e-05, + "model_forward_time": 0.024263381958007812, + "step": 26896 + }, + { + "epoch": 4.10400390625e-05, + "step": 26896, + "training_step_time": 0.20047402381896973 + }, + { + "epoch": 4.104156494140625e-05, + "model_forward_time": 0.024469375610351562, + "step": 26897 + }, + { + "epoch": 4.104156494140625e-05, + "step": 26897, + "training_step_time": 0.14942646026611328 + }, + { + "epoch": 4.10430908203125e-05, + "model_forward_time": 0.02429485321044922, + "step": 26898 + }, + { + "epoch": 4.10430908203125e-05, + "step": 26898, + "training_step_time": 0.2046360969543457 + }, + { + "epoch": 4.104461669921875e-05, + "model_forward_time": 0.02417445182800293, + "step": 26899 + }, + { + "epoch": 4.104461669921875e-05, + "step": 26899, + "training_step_time": 0.19893884658813477 + }, + { + "epoch": 4.1046142578125e-05, + "grad_norm": 0.12696179747581482, + "learning_rate": 2.890967662177285e-06, + "loss": 0.0037, + "step": 26900 + }, + { + "epoch": 4.1046142578125e-05, + "model_forward_time": 0.024502992630004883, + "step": 26900 + }, + { + "epoch": 4.1046142578125e-05, + "step": 26900, + "training_step_time": 0.15789294242858887 + }, + { + "epoch": 4.104766845703125e-05, + "model_forward_time": 0.024645090103149414, + "step": 26901 + }, + { + "epoch": 4.104766845703125e-05, + "step": 26901, + "training_step_time": 0.15793752670288086 + }, + { + "epoch": 4.10491943359375e-05, + "model_forward_time": 0.024790287017822266, + "step": 26902 + }, + { + "epoch": 4.10491943359375e-05, + "step": 26902, + "training_step_time": 0.2149806022644043 + }, + { + "epoch": 4.105072021484375e-05, + "model_forward_time": 0.024631977081298828, + "step": 26903 + }, + { + "epoch": 4.105072021484375e-05, + "step": 26903, + "training_step_time": 0.11391448974609375 + }, + { + "epoch": 4.105224609375e-05, + "model_forward_time": 0.024395227432250977, + "step": 26904 + }, + { + "epoch": 4.105224609375e-05, + "step": 26904, + "training_step_time": 0.105194091796875 + }, + { + "epoch": 4.105377197265625e-05, + "model_forward_time": 0.02494502067565918, + "step": 26905 + }, + { + "epoch": 4.105377197265625e-05, + "step": 26905, + "training_step_time": 0.1076509952545166 + }, + { + "epoch": 4.10552978515625e-05, + "model_forward_time": 0.02440953254699707, + "step": 26906 + }, + { + "epoch": 4.10552978515625e-05, + "step": 26906, + "training_step_time": 0.1060950756072998 + }, + { + "epoch": 4.105682373046875e-05, + "model_forward_time": 0.023839712142944336, + "step": 26907 + }, + { + "epoch": 4.105682373046875e-05, + "step": 26907, + "training_step_time": 0.10770535469055176 + }, + { + "epoch": 4.1058349609375e-05, + "model_forward_time": 0.025078773498535156, + "step": 26908 + }, + { + "epoch": 4.1058349609375e-05, + "step": 26908, + "training_step_time": 0.10463571548461914 + }, + { + "epoch": 4.105987548828125e-05, + "model_forward_time": 0.024875402450561523, + "step": 26909 + }, + { + "epoch": 4.105987548828125e-05, + "step": 26909, + "training_step_time": 0.1064152717590332 + }, + { + "epoch": 4.10614013671875e-05, + "grad_norm": 0.039222050458192825, + "learning_rate": 2.8725267314068495e-06, + "loss": 0.0072, + "step": 26910 + }, + { + "epoch": 4.10614013671875e-05, + "model_forward_time": 0.025716781616210938, + "step": 26910 + }, + { + "epoch": 4.10614013671875e-05, + "step": 26910, + "training_step_time": 0.10561347007751465 + }, + { + "epoch": 4.106292724609375e-05, + "model_forward_time": 0.02525949478149414, + "step": 26911 + }, + { + "epoch": 4.106292724609375e-05, + "step": 26911, + "training_step_time": 0.10338473320007324 + }, + { + "epoch": 4.1064453125e-05, + "model_forward_time": 0.02485823631286621, + "step": 26912 + }, + { + "epoch": 4.1064453125e-05, + "step": 26912, + "training_step_time": 0.10731053352355957 + }, + { + "epoch": 4.106597900390625e-05, + "model_forward_time": 0.0251772403717041, + "step": 26913 + }, + { + "epoch": 4.106597900390625e-05, + "step": 26913, + "training_step_time": 0.10478639602661133 + }, + { + "epoch": 4.10675048828125e-05, + "model_forward_time": 0.02533698081970215, + "step": 26914 + }, + { + "epoch": 4.10675048828125e-05, + "step": 26914, + "training_step_time": 0.19304704666137695 + }, + { + "epoch": 4.106903076171875e-05, + "model_forward_time": 0.02449202537536621, + "step": 26915 + }, + { + "epoch": 4.106903076171875e-05, + "step": 26915, + "training_step_time": 0.14485764503479004 + }, + { + "epoch": 4.1070556640625e-05, + "model_forward_time": 0.024268388748168945, + "step": 26916 + }, + { + "epoch": 4.1070556640625e-05, + "step": 26916, + "training_step_time": 0.10529422760009766 + }, + { + "epoch": 4.107208251953125e-05, + "model_forward_time": 0.0252840518951416, + "step": 26917 + }, + { + "epoch": 4.107208251953125e-05, + "step": 26917, + "training_step_time": 0.10695004463195801 + }, + { + "epoch": 4.10736083984375e-05, + "model_forward_time": 0.025008440017700195, + "step": 26918 + }, + { + "epoch": 4.10736083984375e-05, + "step": 26918, + "training_step_time": 0.1177053451538086 + }, + { + "epoch": 4.107513427734375e-05, + "model_forward_time": 0.025182723999023438, + "step": 26919 + }, + { + "epoch": 4.107513427734375e-05, + "step": 26919, + "training_step_time": 0.10768604278564453 + }, + { + "epoch": 4.107666015625e-05, + "grad_norm": 0.31678056716918945, + "learning_rate": 2.854143064960274e-06, + "loss": 0.0069, + "step": 26920 + }, + { + "epoch": 4.107666015625e-05, + "model_forward_time": 0.025304794311523438, + "step": 26920 + }, + { + "epoch": 4.107666015625e-05, + "step": 26920, + "training_step_time": 0.18750691413879395 + }, + { + "epoch": 4.107818603515625e-05, + "model_forward_time": 0.02489495277404785, + "step": 26921 + }, + { + "epoch": 4.107818603515625e-05, + "step": 26921, + "training_step_time": 0.10476827621459961 + }, + { + "epoch": 4.10797119140625e-05, + "model_forward_time": 0.024906635284423828, + "step": 26922 + }, + { + "epoch": 4.10797119140625e-05, + "step": 26922, + "training_step_time": 0.10395693778991699 + }, + { + "epoch": 4.108123779296875e-05, + "model_forward_time": 0.02497720718383789, + "step": 26923 + }, + { + "epoch": 4.108123779296875e-05, + "step": 26923, + "training_step_time": 0.10654187202453613 + }, + { + "epoch": 4.1082763671875e-05, + "model_forward_time": 0.025064945220947266, + "step": 26924 + }, + { + "epoch": 4.1082763671875e-05, + "step": 26924, + "training_step_time": 0.10451483726501465 + }, + { + "epoch": 4.108428955078125e-05, + "model_forward_time": 0.02535533905029297, + "step": 26925 + }, + { + "epoch": 4.108428955078125e-05, + "step": 26925, + "training_step_time": 0.10527586936950684 + }, + { + "epoch": 4.10858154296875e-05, + "model_forward_time": 0.02514171600341797, + "step": 26926 + }, + { + "epoch": 4.10858154296875e-05, + "step": 26926, + "training_step_time": 0.10570073127746582 + }, + { + "epoch": 4.108734130859375e-05, + "model_forward_time": 0.025611162185668945, + "step": 26927 + }, + { + "epoch": 4.108734130859375e-05, + "step": 26927, + "training_step_time": 0.10526347160339355 + }, + { + "epoch": 4.10888671875e-05, + "model_forward_time": 0.025429487228393555, + "step": 26928 + }, + { + "epoch": 4.10888671875e-05, + "step": 26928, + "training_step_time": 0.10625648498535156 + }, + { + "epoch": 4.109039306640625e-05, + "model_forward_time": 0.025321006774902344, + "step": 26929 + }, + { + "epoch": 4.109039306640625e-05, + "step": 26929, + "training_step_time": 0.10836553573608398 + }, + { + "epoch": 4.10919189453125e-05, + "grad_norm": 0.11299989372491837, + "learning_rate": 2.8358166851754297e-06, + "loss": 0.0039, + "step": 26930 + }, + { + "epoch": 4.10919189453125e-05, + "model_forward_time": 0.02521347999572754, + "step": 26930 + }, + { + "epoch": 4.10919189453125e-05, + "step": 26930, + "training_step_time": 0.10728716850280762 + }, + { + "epoch": 4.109344482421875e-05, + "model_forward_time": 0.025056123733520508, + "step": 26931 + }, + { + "epoch": 4.109344482421875e-05, + "step": 26931, + "training_step_time": 0.10994529724121094 + }, + { + "epoch": 4.1094970703125e-05, + "model_forward_time": 0.02533578872680664, + "step": 26932 + }, + { + "epoch": 4.1094970703125e-05, + "step": 26932, + "training_step_time": 0.10656118392944336 + }, + { + "epoch": 4.109649658203125e-05, + "model_forward_time": 0.025205373764038086, + "step": 26933 + }, + { + "epoch": 4.109649658203125e-05, + "step": 26933, + "training_step_time": 0.10460543632507324 + }, + { + "epoch": 4.10980224609375e-05, + "model_forward_time": 0.025433063507080078, + "step": 26934 + }, + { + "epoch": 4.10980224609375e-05, + "step": 26934, + "training_step_time": 0.1058189868927002 + }, + { + "epoch": 4.109954833984375e-05, + "model_forward_time": 0.02487468719482422, + "step": 26935 + }, + { + "epoch": 4.109954833984375e-05, + "step": 26935, + "training_step_time": 0.10622310638427734 + }, + { + "epoch": 4.110107421875e-05, + "model_forward_time": 0.025209665298461914, + "step": 26936 + }, + { + "epoch": 4.110107421875e-05, + "step": 26936, + "training_step_time": 0.10853171348571777 + }, + { + "epoch": 4.110260009765625e-05, + "model_forward_time": 0.025269031524658203, + "step": 26937 + }, + { + "epoch": 4.110260009765625e-05, + "step": 26937, + "training_step_time": 0.10754728317260742 + }, + { + "epoch": 4.11041259765625e-05, + "model_forward_time": 0.024928569793701172, + "step": 26938 + }, + { + "epoch": 4.11041259765625e-05, + "step": 26938, + "training_step_time": 0.10909342765808105 + }, + { + "epoch": 4.110565185546875e-05, + "model_forward_time": 0.025156259536743164, + "step": 26939 + }, + { + "epoch": 4.110565185546875e-05, + "step": 26939, + "training_step_time": 0.197983980178833 + }, + { + "epoch": 4.1107177734375e-05, + "grad_norm": 0.32797518372535706, + "learning_rate": 2.817547614320615e-06, + "loss": 0.0135, + "step": 26940 + }, + { + "epoch": 4.1107177734375e-05, + "model_forward_time": 0.02484917640686035, + "step": 26940 + }, + { + "epoch": 4.1107177734375e-05, + "step": 26940, + "training_step_time": 0.10427689552307129 + }, + { + "epoch": 4.110870361328125e-05, + "model_forward_time": 0.024303674697875977, + "step": 26941 + }, + { + "epoch": 4.110870361328125e-05, + "step": 26941, + "training_step_time": 0.2096545696258545 + }, + { + "epoch": 4.11102294921875e-05, + "model_forward_time": 0.024344205856323242, + "step": 26942 + }, + { + "epoch": 4.11102294921875e-05, + "step": 26942, + "training_step_time": 0.12908577919006348 + }, + { + "epoch": 4.111175537109375e-05, + "model_forward_time": 0.024187564849853516, + "step": 26943 + }, + { + "epoch": 4.111175537109375e-05, + "step": 26943, + "training_step_time": 0.20830965042114258 + }, + { + "epoch": 4.111328125e-05, + "model_forward_time": 0.027543067932128906, + "step": 26944 + }, + { + "epoch": 4.111328125e-05, + "step": 26944, + "training_step_time": 0.18845725059509277 + }, + { + "epoch": 4.111480712890625e-05, + "model_forward_time": 0.024318695068359375, + "step": 26945 + }, + { + "epoch": 4.111480712890625e-05, + "step": 26945, + "training_step_time": 0.1826949119567871 + }, + { + "epoch": 4.11163330078125e-05, + "model_forward_time": 0.024022579193115234, + "step": 26946 + }, + { + "epoch": 4.11163330078125e-05, + "step": 26946, + "training_step_time": 0.15043330192565918 + }, + { + "epoch": 4.111785888671875e-05, + "model_forward_time": 0.024137020111083984, + "step": 26947 + }, + { + "epoch": 4.111785888671875e-05, + "step": 26947, + "training_step_time": 0.1702404022216797 + }, + { + "epoch": 4.1119384765625e-05, + "model_forward_time": 0.024047374725341797, + "step": 26948 + }, + { + "epoch": 4.1119384765625e-05, + "step": 26948, + "training_step_time": 0.16971158981323242 + }, + { + "epoch": 4.112091064453125e-05, + "model_forward_time": 0.024654626846313477, + "step": 26949 + }, + { + "epoch": 4.112091064453125e-05, + "step": 26949, + "training_step_time": 0.10151839256286621 + }, + { + "epoch": 4.11224365234375e-05, + "grad_norm": 0.06291752308607101, + "learning_rate": 2.7993358745944608e-06, + "loss": 0.0071, + "step": 26950 + }, + { + "epoch": 4.11224365234375e-05, + "model_forward_time": 0.024881362915039062, + "step": 26950 + }, + { + "epoch": 4.11224365234375e-05, + "step": 26950, + "training_step_time": 0.10083627700805664 + }, + { + "epoch": 4.112396240234375e-05, + "model_forward_time": 0.025052309036254883, + "step": 26951 + }, + { + "epoch": 4.112396240234375e-05, + "step": 26951, + "training_step_time": 0.10433268547058105 + }, + { + "epoch": 4.112548828125e-05, + "model_forward_time": 0.025424718856811523, + "step": 26952 + }, + { + "epoch": 4.112548828125e-05, + "step": 26952, + "training_step_time": 0.10338091850280762 + }, + { + "epoch": 4.112701416015625e-05, + "model_forward_time": 0.02649664878845215, + "step": 26953 + }, + { + "epoch": 4.112701416015625e-05, + "step": 26953, + "training_step_time": 0.10530352592468262 + }, + { + "epoch": 4.11285400390625e-05, + "model_forward_time": 0.02490830421447754, + "step": 26954 + }, + { + "epoch": 4.11285400390625e-05, + "step": 26954, + "training_step_time": 0.10238313674926758 + }, + { + "epoch": 4.113006591796875e-05, + "model_forward_time": 0.024704456329345703, + "step": 26955 + }, + { + "epoch": 4.113006591796875e-05, + "step": 26955, + "training_step_time": 0.10378575325012207 + }, + { + "epoch": 4.1131591796875e-05, + "model_forward_time": 0.02493453025817871, + "step": 26956 + }, + { + "epoch": 4.1131591796875e-05, + "step": 26956, + "training_step_time": 0.10528993606567383 + }, + { + "epoch": 4.113311767578125e-05, + "model_forward_time": 0.0252382755279541, + "step": 26957 + }, + { + "epoch": 4.113311767578125e-05, + "step": 26957, + "training_step_time": 0.10595273971557617 + }, + { + "epoch": 4.11346435546875e-05, + "model_forward_time": 0.02518177032470703, + "step": 26958 + }, + { + "epoch": 4.11346435546875e-05, + "step": 26958, + "training_step_time": 0.10848474502563477 + }, + { + "epoch": 4.113616943359375e-05, + "model_forward_time": 0.025376081466674805, + "step": 26959 + }, + { + "epoch": 4.113616943359375e-05, + "step": 26959, + "training_step_time": 0.10443806648254395 + }, + { + "epoch": 4.11376953125e-05, + "grad_norm": 0.04742514714598656, + "learning_rate": 2.7811814881259503e-06, + "loss": 0.0037, + "step": 26960 + }, + { + "epoch": 4.11376953125e-05, + "model_forward_time": 0.024628639221191406, + "step": 26960 + }, + { + "epoch": 4.11376953125e-05, + "step": 26960, + "training_step_time": 0.13226556777954102 + }, + { + "epoch": 4.113922119140625e-05, + "model_forward_time": 0.024543046951293945, + "step": 26961 + }, + { + "epoch": 4.113922119140625e-05, + "step": 26961, + "training_step_time": 0.14272332191467285 + }, + { + "epoch": 4.11407470703125e-05, + "model_forward_time": 0.024466276168823242, + "step": 26962 + }, + { + "epoch": 4.11407470703125e-05, + "step": 26962, + "training_step_time": 0.1117103099822998 + }, + { + "epoch": 4.114227294921875e-05, + "model_forward_time": 0.02486729621887207, + "step": 26963 + }, + { + "epoch": 4.114227294921875e-05, + "step": 26963, + "training_step_time": 0.10391831398010254 + }, + { + "epoch": 4.1143798828125e-05, + "model_forward_time": 0.024837255477905273, + "step": 26964 + }, + { + "epoch": 4.1143798828125e-05, + "step": 26964, + "training_step_time": 0.1153707504272461 + }, + { + "epoch": 4.114532470703125e-05, + "model_forward_time": 0.024841785430908203, + "step": 26965 + }, + { + "epoch": 4.114532470703125e-05, + "step": 26965, + "training_step_time": 0.18411588668823242 + }, + { + "epoch": 4.11468505859375e-05, + "model_forward_time": 0.025398969650268555, + "step": 26966 + }, + { + "epoch": 4.11468505859375e-05, + "step": 26966, + "training_step_time": 0.20819902420043945 + }, + { + "epoch": 4.114837646484375e-05, + "model_forward_time": 0.02320408821105957, + "step": 26967 + }, + { + "epoch": 4.114837646484375e-05, + "step": 26967, + "training_step_time": 0.2066655158996582 + }, + { + "epoch": 4.114990234375e-05, + "model_forward_time": 0.02342510223388672, + "step": 26968 + }, + { + "epoch": 4.114990234375e-05, + "step": 26968, + "training_step_time": 0.19957256317138672 + }, + { + "epoch": 4.115142822265625e-05, + "model_forward_time": 0.023195266723632812, + "step": 26969 + }, + { + "epoch": 4.115142822265625e-05, + "step": 26969, + "training_step_time": 0.1895458698272705 + }, + { + "epoch": 4.11529541015625e-05, + "grad_norm": 0.047411367297172546, + "learning_rate": 2.7630844769743757e-06, + "loss": 0.0075, + "step": 26970 + }, + { + "epoch": 4.11529541015625e-05, + "model_forward_time": 0.02340531349182129, + "step": 26970 + }, + { + "epoch": 4.11529541015625e-05, + "step": 26970, + "training_step_time": 0.18276381492614746 + }, + { + "epoch": 4.115447998046875e-05, + "model_forward_time": 0.024413108825683594, + "step": 26971 + }, + { + "epoch": 4.115447998046875e-05, + "step": 26971, + "training_step_time": 0.1675243377685547 + }, + { + "epoch": 4.1156005859375e-05, + "model_forward_time": 0.023451805114746094, + "step": 26972 + }, + { + "epoch": 4.1156005859375e-05, + "step": 26972, + "training_step_time": 0.14309310913085938 + }, + { + "epoch": 4.115753173828125e-05, + "model_forward_time": 0.024585723876953125, + "step": 26973 + }, + { + "epoch": 4.115753173828125e-05, + "step": 26973, + "training_step_time": 0.14286398887634277 + }, + { + "epoch": 4.11590576171875e-05, + "model_forward_time": 0.024394989013671875, + "step": 26974 + }, + { + "epoch": 4.11590576171875e-05, + "step": 26974, + "training_step_time": 0.13617897033691406 + }, + { + "epoch": 4.116058349609375e-05, + "model_forward_time": 0.024253129959106445, + "step": 26975 + }, + { + "epoch": 4.116058349609375e-05, + "step": 26975, + "training_step_time": 0.1284773349761963 + }, + { + "epoch": 4.1162109375e-05, + "model_forward_time": 0.02426624298095703, + "step": 26976 + }, + { + "epoch": 4.1162109375e-05, + "step": 26976, + "training_step_time": 0.10826921463012695 + }, + { + "epoch": 4.116363525390625e-05, + "model_forward_time": 0.02511119842529297, + "step": 26977 + }, + { + "epoch": 4.116363525390625e-05, + "step": 26977, + "training_step_time": 0.11243176460266113 + }, + { + "epoch": 4.11651611328125e-05, + "model_forward_time": 0.02518939971923828, + "step": 26978 + }, + { + "epoch": 4.11651611328125e-05, + "step": 26978, + "training_step_time": 0.10252261161804199 + }, + { + "epoch": 4.116668701171875e-05, + "model_forward_time": 0.024952411651611328, + "step": 26979 + }, + { + "epoch": 4.116668701171875e-05, + "step": 26979, + "training_step_time": 0.10800957679748535 + }, + { + "epoch": 4.1168212890625e-05, + "grad_norm": 0.042207036167383194, + "learning_rate": 2.7450448631293036e-06, + "loss": 0.0021, + "step": 26980 + }, + { + "epoch": 4.1168212890625e-05, + "model_forward_time": 0.025092601776123047, + "step": 26980 + }, + { + "epoch": 4.1168212890625e-05, + "step": 26980, + "training_step_time": 0.13275837898254395 + }, + { + "epoch": 4.116973876953125e-05, + "model_forward_time": 0.025404930114746094, + "step": 26981 + }, + { + "epoch": 4.116973876953125e-05, + "step": 26981, + "training_step_time": 0.1043252944946289 + }, + { + "epoch": 4.11712646484375e-05, + "model_forward_time": 0.024759292602539062, + "step": 26982 + }, + { + "epoch": 4.11712646484375e-05, + "step": 26982, + "training_step_time": 0.14568853378295898 + }, + { + "epoch": 4.117279052734375e-05, + "model_forward_time": 0.024811983108520508, + "step": 26983 + }, + { + "epoch": 4.117279052734375e-05, + "step": 26983, + "training_step_time": 0.16151905059814453 + }, + { + "epoch": 4.117431640625e-05, + "model_forward_time": 0.02485489845275879, + "step": 26984 + }, + { + "epoch": 4.117431640625e-05, + "step": 26984, + "training_step_time": 0.11294341087341309 + }, + { + "epoch": 4.117584228515625e-05, + "model_forward_time": 0.027725934982299805, + "step": 26985 + }, + { + "epoch": 4.117584228515625e-05, + "step": 26985, + "training_step_time": 0.18981504440307617 + }, + { + "epoch": 4.11773681640625e-05, + "model_forward_time": 0.024152278900146484, + "step": 26986 + }, + { + "epoch": 4.11773681640625e-05, + "step": 26986, + "training_step_time": 0.13902640342712402 + }, + { + "epoch": 4.117889404296875e-05, + "model_forward_time": 0.02409815788269043, + "step": 26987 + }, + { + "epoch": 4.117889404296875e-05, + "step": 26987, + "training_step_time": 0.10868501663208008 + }, + { + "epoch": 4.1180419921875e-05, + "model_forward_time": 0.0255281925201416, + "step": 26988 + }, + { + "epoch": 4.1180419921875e-05, + "step": 26988, + "training_step_time": 0.15943145751953125 + }, + { + "epoch": 4.118194580078125e-05, + "model_forward_time": 0.024147510528564453, + "step": 26989 + }, + { + "epoch": 4.118194580078125e-05, + "step": 26989, + "training_step_time": 0.21471047401428223 + }, + { + "epoch": 4.11834716796875e-05, + "grad_norm": 0.07503164559602737, + "learning_rate": 2.7270626685105828e-06, + "loss": 0.005, + "step": 26990 + }, + { + "epoch": 4.11834716796875e-05, + "model_forward_time": 0.02443552017211914, + "step": 26990 + }, + { + "epoch": 4.11834716796875e-05, + "step": 26990, + "training_step_time": 0.10893750190734863 + }, + { + "epoch": 4.118499755859375e-05, + "model_forward_time": 0.024914026260375977, + "step": 26991 + }, + { + "epoch": 4.118499755859375e-05, + "step": 26991, + "training_step_time": 0.10958528518676758 + }, + { + "epoch": 4.11865234375e-05, + "model_forward_time": 0.024821758270263672, + "step": 26992 + }, + { + "epoch": 4.11865234375e-05, + "step": 26992, + "training_step_time": 0.11252021789550781 + }, + { + "epoch": 4.118804931640625e-05, + "model_forward_time": 0.025542497634887695, + "step": 26993 + }, + { + "epoch": 4.118804931640625e-05, + "step": 26993, + "training_step_time": 0.11174678802490234 + }, + { + "epoch": 4.11895751953125e-05, + "model_forward_time": 0.024907350540161133, + "step": 26994 + }, + { + "epoch": 4.11895751953125e-05, + "step": 26994, + "training_step_time": 0.10571861267089844 + }, + { + "epoch": 4.119110107421875e-05, + "model_forward_time": 0.025124788284301758, + "step": 26995 + }, + { + "epoch": 4.119110107421875e-05, + "step": 26995, + "training_step_time": 0.1074991226196289 + }, + { + "epoch": 4.1192626953125e-05, + "model_forward_time": 0.02523207664489746, + "step": 26996 + }, + { + "epoch": 4.1192626953125e-05, + "step": 26996, + "training_step_time": 0.10913276672363281 + }, + { + "epoch": 4.119415283203125e-05, + "model_forward_time": 0.025465965270996094, + "step": 26997 + }, + { + "epoch": 4.119415283203125e-05, + "step": 26997, + "training_step_time": 0.1304309368133545 + }, + { + "epoch": 4.11956787109375e-05, + "model_forward_time": 0.0254361629486084, + "step": 26998 + }, + { + "epoch": 4.11956787109375e-05, + "step": 26998, + "training_step_time": 0.12624001502990723 + }, + { + "epoch": 4.119720458984375e-05, + "model_forward_time": 0.024965763092041016, + "step": 26999 + }, + { + "epoch": 4.119720458984375e-05, + "step": 26999, + "training_step_time": 0.12414026260375977 + }, + { + "epoch": 4.119873046875e-05, + "grad_norm": 0.09099958837032318, + "learning_rate": 2.7091379149682685e-06, + "loss": 0.0028, + "step": 27000 + }, + { + "epoch": 4.119873046875e-05, + "model_forward_time": 0.023960351943969727, + "step": 27000 + }, + { + "epoch": 4.119873046875e-05, + "step": 27000, + "training_step_time": 0.09961485862731934 + }, + { + "epoch": 4.120025634765625e-05, + "model_forward_time": 0.027181148529052734, + "step": 27001 + }, + { + "epoch": 4.120025634765625e-05, + "step": 27001, + "training_step_time": 0.10092806816101074 + }, + { + "epoch": 4.12017822265625e-05, + "model_forward_time": 0.024754762649536133, + "step": 27002 + }, + { + "epoch": 4.12017822265625e-05, + "step": 27002, + "training_step_time": 0.16902947425842285 + }, + { + "epoch": 4.120330810546875e-05, + "model_forward_time": 0.025109529495239258, + "step": 27003 + }, + { + "epoch": 4.120330810546875e-05, + "step": 27003, + "training_step_time": 0.1345357894897461 + }, + { + "epoch": 4.1204833984375e-05, + "model_forward_time": 0.024495601654052734, + "step": 27004 + }, + { + "epoch": 4.1204833984375e-05, + "step": 27004, + "training_step_time": 0.10534286499023438 + }, + { + "epoch": 4.120635986328125e-05, + "model_forward_time": 0.025167226791381836, + "step": 27005 + }, + { + "epoch": 4.120635986328125e-05, + "step": 27005, + "training_step_time": 0.11098265647888184 + }, + { + "epoch": 4.12078857421875e-05, + "model_forward_time": 0.025261878967285156, + "step": 27006 + }, + { + "epoch": 4.12078857421875e-05, + "step": 27006, + "training_step_time": 0.11241674423217773 + }, + { + "epoch": 4.120941162109375e-05, + "model_forward_time": 0.02484750747680664, + "step": 27007 + }, + { + "epoch": 4.120941162109375e-05, + "step": 27007, + "training_step_time": 0.11015057563781738 + }, + { + "epoch": 4.12109375e-05, + "model_forward_time": 0.025192975997924805, + "step": 27008 + }, + { + "epoch": 4.12109375e-05, + "step": 27008, + "training_step_time": 0.19356870651245117 + }, + { + "epoch": 4.121246337890625e-05, + "model_forward_time": 0.02445363998413086, + "step": 27009 + }, + { + "epoch": 4.121246337890625e-05, + "step": 27009, + "training_step_time": 0.11181640625 + }, + { + "epoch": 4.12139892578125e-05, + "grad_norm": 0.06130315735936165, + "learning_rate": 2.691270624282621e-06, + "loss": 0.0021, + "step": 27010 + }, + { + "epoch": 4.12139892578125e-05, + "model_forward_time": 0.023091554641723633, + "step": 27010 + }, + { + "epoch": 4.12139892578125e-05, + "step": 27010, + "training_step_time": 0.10469198226928711 + }, + { + "epoch": 4.121551513671875e-05, + "model_forward_time": 0.02430129051208496, + "step": 27011 + }, + { + "epoch": 4.121551513671875e-05, + "step": 27011, + "training_step_time": 0.11219048500061035 + }, + { + "epoch": 4.1217041015625e-05, + "model_forward_time": 0.025029659271240234, + "step": 27012 + }, + { + "epoch": 4.1217041015625e-05, + "step": 27012, + "training_step_time": 0.10821890830993652 + }, + { + "epoch": 4.121856689453125e-05, + "model_forward_time": 0.024878978729248047, + "step": 27013 + }, + { + "epoch": 4.121856689453125e-05, + "step": 27013, + "training_step_time": 0.10521078109741211 + }, + { + "epoch": 4.12200927734375e-05, + "model_forward_time": 0.025193452835083008, + "step": 27014 + }, + { + "epoch": 4.12200927734375e-05, + "step": 27014, + "training_step_time": 0.10845804214477539 + }, + { + "epoch": 4.122161865234375e-05, + "model_forward_time": 0.02523493766784668, + "step": 27015 + }, + { + "epoch": 4.122161865234375e-05, + "step": 27015, + "training_step_time": 0.1036832332611084 + }, + { + "epoch": 4.122314453125e-05, + "model_forward_time": 0.025578022003173828, + "step": 27016 + }, + { + "epoch": 4.122314453125e-05, + "step": 27016, + "training_step_time": 0.10643219947814941 + }, + { + "epoch": 4.122467041015625e-05, + "model_forward_time": 0.025878429412841797, + "step": 27017 + }, + { + "epoch": 4.122467041015625e-05, + "step": 27017, + "training_step_time": 0.11125588417053223 + }, + { + "epoch": 4.12261962890625e-05, + "model_forward_time": 0.025020122528076172, + "step": 27018 + }, + { + "epoch": 4.12261962890625e-05, + "step": 27018, + "training_step_time": 0.11412763595581055 + }, + { + "epoch": 4.122772216796875e-05, + "model_forward_time": 0.0253450870513916, + "step": 27019 + }, + { + "epoch": 4.122772216796875e-05, + "step": 27019, + "training_step_time": 0.11370062828063965 + }, + { + "epoch": 4.1229248046875e-05, + "grad_norm": 0.08990398049354553, + "learning_rate": 2.6734608181640917e-06, + "loss": 0.0044, + "step": 27020 + }, + { + "epoch": 4.1229248046875e-05, + "model_forward_time": 0.024729013442993164, + "step": 27020 + }, + { + "epoch": 4.1229248046875e-05, + "step": 27020, + "training_step_time": 0.11831951141357422 + }, + { + "epoch": 4.123077392578125e-05, + "model_forward_time": 0.024185895919799805, + "step": 27021 + }, + { + "epoch": 4.123077392578125e-05, + "step": 27021, + "training_step_time": 0.11263680458068848 + }, + { + "epoch": 4.12322998046875e-05, + "model_forward_time": 0.025237560272216797, + "step": 27022 + }, + { + "epoch": 4.12322998046875e-05, + "step": 27022, + "training_step_time": 0.11316514015197754 + }, + { + "epoch": 4.123382568359375e-05, + "model_forward_time": 0.025025367736816406, + "step": 27023 + }, + { + "epoch": 4.123382568359375e-05, + "step": 27023, + "training_step_time": 0.1130366325378418 + }, + { + "epoch": 4.12353515625e-05, + "model_forward_time": 0.02508997917175293, + "step": 27024 + }, + { + "epoch": 4.12353515625e-05, + "step": 27024, + "training_step_time": 0.11358356475830078 + }, + { + "epoch": 4.123687744140625e-05, + "model_forward_time": 0.024950504302978516, + "step": 27025 + }, + { + "epoch": 4.123687744140625e-05, + "step": 27025, + "training_step_time": 0.10752201080322266 + }, + { + "epoch": 4.12384033203125e-05, + "model_forward_time": 0.02515888214111328, + "step": 27026 + }, + { + "epoch": 4.12384033203125e-05, + "step": 27026, + "training_step_time": 0.10950589179992676 + }, + { + "epoch": 4.123992919921875e-05, + "model_forward_time": 0.025304079055786133, + "step": 27027 + }, + { + "epoch": 4.123992919921875e-05, + "step": 27027, + "training_step_time": 0.1083211898803711 + }, + { + "epoch": 4.1241455078125e-05, + "model_forward_time": 0.02550959587097168, + "step": 27028 + }, + { + "epoch": 4.1241455078125e-05, + "step": 27028, + "training_step_time": 0.1993846893310547 + }, + { + "epoch": 4.124298095703125e-05, + "model_forward_time": 0.02439570426940918, + "step": 27029 + }, + { + "epoch": 4.124298095703125e-05, + "step": 27029, + "training_step_time": 0.10496091842651367 + }, + { + "epoch": 4.12445068359375e-05, + "grad_norm": 0.04527433216571808, + "learning_rate": 2.6557085182532582e-06, + "loss": 0.0022, + "step": 27030 + }, + { + "epoch": 4.12445068359375e-05, + "model_forward_time": 0.02434563636779785, + "step": 27030 + }, + { + "epoch": 4.12445068359375e-05, + "step": 27030, + "training_step_time": 0.2103407382965088 + }, + { + "epoch": 4.124603271484375e-05, + "model_forward_time": 0.024880170822143555, + "step": 27031 + }, + { + "epoch": 4.124603271484375e-05, + "step": 27031, + "training_step_time": 0.15329790115356445 + }, + { + "epoch": 4.124755859375e-05, + "model_forward_time": 0.02429986000061035, + "step": 27032 + }, + { + "epoch": 4.124755859375e-05, + "step": 27032, + "training_step_time": 0.192047119140625 + }, + { + "epoch": 4.124908447265625e-05, + "model_forward_time": 0.02413153648376465, + "step": 27033 + }, + { + "epoch": 4.124908447265625e-05, + "step": 27033, + "training_step_time": 0.19038677215576172 + }, + { + "epoch": 4.12506103515625e-05, + "model_forward_time": 0.02465510368347168, + "step": 27034 + }, + { + "epoch": 4.12506103515625e-05, + "step": 27034, + "training_step_time": 0.14209556579589844 + }, + { + "epoch": 4.125213623046875e-05, + "model_forward_time": 0.025075435638427734, + "step": 27035 + }, + { + "epoch": 4.125213623046875e-05, + "step": 27035, + "training_step_time": 0.21680068969726562 + }, + { + "epoch": 4.1253662109375e-05, + "model_forward_time": 0.024895429611206055, + "step": 27036 + }, + { + "epoch": 4.1253662109375e-05, + "step": 27036, + "training_step_time": 0.11236238479614258 + }, + { + "epoch": 4.125518798828125e-05, + "model_forward_time": 0.02442455291748047, + "step": 27037 + }, + { + "epoch": 4.125518798828125e-05, + "step": 27037, + "training_step_time": 0.10294747352600098 + }, + { + "epoch": 4.12567138671875e-05, + "model_forward_time": 0.02545332908630371, + "step": 27038 + }, + { + "epoch": 4.12567138671875e-05, + "step": 27038, + "training_step_time": 0.10767889022827148 + }, + { + "epoch": 4.125823974609375e-05, + "model_forward_time": 0.025390625, + "step": 27039 + }, + { + "epoch": 4.125823974609375e-05, + "step": 27039, + "training_step_time": 0.10913991928100586 + }, + { + "epoch": 4.1259765625e-05, + "grad_norm": 0.03140696883201599, + "learning_rate": 2.63801374612086e-06, + "loss": 0.0104, + "step": 27040 + }, + { + "epoch": 4.1259765625e-05, + "model_forward_time": 0.023951292037963867, + "step": 27040 + }, + { + "epoch": 4.1259765625e-05, + "step": 27040, + "training_step_time": 0.10563325881958008 + }, + { + "epoch": 4.126129150390625e-05, + "model_forward_time": 0.024246931076049805, + "step": 27041 + }, + { + "epoch": 4.126129150390625e-05, + "step": 27041, + "training_step_time": 0.10909843444824219 + }, + { + "epoch": 4.12628173828125e-05, + "model_forward_time": 0.02557539939880371, + "step": 27042 + }, + { + "epoch": 4.12628173828125e-05, + "step": 27042, + "training_step_time": 0.1052253246307373 + }, + { + "epoch": 4.126434326171875e-05, + "model_forward_time": 0.025291919708251953, + "step": 27043 + }, + { + "epoch": 4.126434326171875e-05, + "step": 27043, + "training_step_time": 0.10856151580810547 + }, + { + "epoch": 4.1265869140625e-05, + "model_forward_time": 0.02512812614440918, + "step": 27044 + }, + { + "epoch": 4.1265869140625e-05, + "step": 27044, + "training_step_time": 0.1062319278717041 + }, + { + "epoch": 4.126739501953125e-05, + "model_forward_time": 0.025133371353149414, + "step": 27045 + }, + { + "epoch": 4.126739501953125e-05, + "step": 27045, + "training_step_time": 0.10752463340759277 + }, + { + "epoch": 4.12689208984375e-05, + "model_forward_time": 0.02543187141418457, + "step": 27046 + }, + { + "epoch": 4.12689208984375e-05, + "step": 27046, + "training_step_time": 0.10828781127929688 + }, + { + "epoch": 4.127044677734375e-05, + "model_forward_time": 0.024854421615600586, + "step": 27047 + }, + { + "epoch": 4.127044677734375e-05, + "step": 27047, + "training_step_time": 0.11273741722106934 + }, + { + "epoch": 4.127197265625e-05, + "model_forward_time": 0.026129722595214844, + "step": 27048 + }, + { + "epoch": 4.127197265625e-05, + "step": 27048, + "training_step_time": 0.10776948928833008 + }, + { + "epoch": 4.127349853515625e-05, + "model_forward_time": 0.025411367416381836, + "step": 27049 + }, + { + "epoch": 4.127349853515625e-05, + "step": 27049, + "training_step_time": 0.12827563285827637 + }, + { + "epoch": 4.12750244140625e-05, + "grad_norm": 0.21848095953464508, + "learning_rate": 2.620376523267698e-06, + "loss": 0.0028, + "step": 27050 + }, + { + "epoch": 4.12750244140625e-05, + "model_forward_time": 0.02498793601989746, + "step": 27050 + }, + { + "epoch": 4.12750244140625e-05, + "step": 27050, + "training_step_time": 0.11032748222351074 + }, + { + "epoch": 4.127655029296875e-05, + "model_forward_time": 0.025449275970458984, + "step": 27051 + }, + { + "epoch": 4.127655029296875e-05, + "step": 27051, + "training_step_time": 0.10655903816223145 + }, + { + "epoch": 4.1278076171875e-05, + "model_forward_time": 0.02609395980834961, + "step": 27052 + }, + { + "epoch": 4.1278076171875e-05, + "step": 27052, + "training_step_time": 0.11070036888122559 + }, + { + "epoch": 4.127960205078125e-05, + "model_forward_time": 0.025506019592285156, + "step": 27053 + }, + { + "epoch": 4.127960205078125e-05, + "step": 27053, + "training_step_time": 0.11590790748596191 + }, + { + "epoch": 4.12811279296875e-05, + "model_forward_time": 0.02569437026977539, + "step": 27054 + }, + { + "epoch": 4.12811279296875e-05, + "step": 27054, + "training_step_time": 0.2042102813720703 + }, + { + "epoch": 4.128265380859375e-05, + "model_forward_time": 0.02479243278503418, + "step": 27055 + }, + { + "epoch": 4.128265380859375e-05, + "step": 27055, + "training_step_time": 0.11034941673278809 + }, + { + "epoch": 4.12841796875e-05, + "model_forward_time": 0.023580312728881836, + "step": 27056 + }, + { + "epoch": 4.12841796875e-05, + "step": 27056, + "training_step_time": 0.10724830627441406 + }, + { + "epoch": 4.128570556640625e-05, + "model_forward_time": 0.025020360946655273, + "step": 27057 + }, + { + "epoch": 4.128570556640625e-05, + "step": 27057, + "training_step_time": 0.10899829864501953 + }, + { + "epoch": 4.12872314453125e-05, + "model_forward_time": 0.025716066360473633, + "step": 27058 + }, + { + "epoch": 4.12872314453125e-05, + "step": 27058, + "training_step_time": 0.11025333404541016 + }, + { + "epoch": 4.128875732421875e-05, + "model_forward_time": 0.025380849838256836, + "step": 27059 + }, + { + "epoch": 4.128875732421875e-05, + "step": 27059, + "training_step_time": 0.1088571548461914 + }, + { + "epoch": 4.1290283203125e-05, + "grad_norm": 0.04789227619767189, + "learning_rate": 2.602796871124663e-06, + "loss": 0.0025, + "step": 27060 + }, + { + "epoch": 4.1290283203125e-05, + "model_forward_time": 0.02533721923828125, + "step": 27060 + }, + { + "epoch": 4.1290283203125e-05, + "step": 27060, + "training_step_time": 0.11033177375793457 + }, + { + "epoch": 4.129180908203125e-05, + "model_forward_time": 0.025603771209716797, + "step": 27061 + }, + { + "epoch": 4.129180908203125e-05, + "step": 27061, + "training_step_time": 0.10684037208557129 + }, + { + "epoch": 4.12933349609375e-05, + "model_forward_time": 0.02563190460205078, + "step": 27062 + }, + { + "epoch": 4.12933349609375e-05, + "step": 27062, + "training_step_time": 0.1090090274810791 + }, + { + "epoch": 4.129486083984375e-05, + "model_forward_time": 0.02566671371459961, + "step": 27063 + }, + { + "epoch": 4.129486083984375e-05, + "step": 27063, + "training_step_time": 0.1087045669555664 + }, + { + "epoch": 4.129638671875e-05, + "model_forward_time": 0.02546858787536621, + "step": 27064 + }, + { + "epoch": 4.129638671875e-05, + "step": 27064, + "training_step_time": 0.10641646385192871 + }, + { + "epoch": 4.129791259765625e-05, + "model_forward_time": 0.02562999725341797, + "step": 27065 + }, + { + "epoch": 4.129791259765625e-05, + "step": 27065, + "training_step_time": 0.10577392578125 + }, + { + "epoch": 4.12994384765625e-05, + "model_forward_time": 0.025210857391357422, + "step": 27066 + }, + { + "epoch": 4.12994384765625e-05, + "step": 27066, + "training_step_time": 0.10653424263000488 + }, + { + "epoch": 4.130096435546875e-05, + "model_forward_time": 0.024494647979736328, + "step": 27067 + }, + { + "epoch": 4.130096435546875e-05, + "step": 27067, + "training_step_time": 0.10486197471618652 + }, + { + "epoch": 4.1302490234375e-05, + "model_forward_time": 0.025325298309326172, + "step": 27068 + }, + { + "epoch": 4.1302490234375e-05, + "step": 27068, + "training_step_time": 0.10727667808532715 + }, + { + "epoch": 4.130401611328125e-05, + "model_forward_time": 0.02538895606994629, + "step": 27069 + }, + { + "epoch": 4.130401611328125e-05, + "step": 27069, + "training_step_time": 0.11631155014038086 + }, + { + "epoch": 4.13055419921875e-05, + "grad_norm": 0.05423854663968086, + "learning_rate": 2.5852748110527014e-06, + "loss": 0.0037, + "step": 27070 + }, + { + "epoch": 4.13055419921875e-05, + "model_forward_time": 0.025330543518066406, + "step": 27070 + }, + { + "epoch": 4.13055419921875e-05, + "step": 27070, + "training_step_time": 0.10634231567382812 + }, + { + "epoch": 4.130706787109375e-05, + "model_forward_time": 0.02494502067565918, + "step": 27071 + }, + { + "epoch": 4.130706787109375e-05, + "step": 27071, + "training_step_time": 0.10900282859802246 + }, + { + "epoch": 4.130859375e-05, + "model_forward_time": 0.025794267654418945, + "step": 27072 + }, + { + "epoch": 4.130859375e-05, + "step": 27072, + "training_step_time": 0.10683178901672363 + }, + { + "epoch": 4.131011962890625e-05, + "model_forward_time": 0.025348901748657227, + "step": 27073 + }, + { + "epoch": 4.131011962890625e-05, + "step": 27073, + "training_step_time": 0.10675883293151855 + }, + { + "epoch": 4.13116455078125e-05, + "model_forward_time": 0.025124788284301758, + "step": 27074 + }, + { + "epoch": 4.13116455078125e-05, + "step": 27074, + "training_step_time": 0.10666322708129883 + }, + { + "epoch": 4.131317138671875e-05, + "model_forward_time": 0.02539372444152832, + "step": 27075 + }, + { + "epoch": 4.131317138671875e-05, + "step": 27075, + "training_step_time": 0.10860610008239746 + }, + { + "epoch": 4.1314697265625e-05, + "model_forward_time": 0.024425506591796875, + "step": 27076 + }, + { + "epoch": 4.1314697265625e-05, + "step": 27076, + "training_step_time": 0.14626669883728027 + }, + { + "epoch": 4.131622314453125e-05, + "model_forward_time": 0.025066137313842773, + "step": 27077 + }, + { + "epoch": 4.131622314453125e-05, + "step": 27077, + "training_step_time": 0.16222572326660156 + }, + { + "epoch": 4.13177490234375e-05, + "model_forward_time": 0.02506422996520996, + "step": 27078 + }, + { + "epoch": 4.13177490234375e-05, + "step": 27078, + "training_step_time": 0.18921208381652832 + }, + { + "epoch": 4.131927490234375e-05, + "model_forward_time": 0.024865150451660156, + "step": 27079 + }, + { + "epoch": 4.131927490234375e-05, + "step": 27079, + "training_step_time": 0.14633464813232422 + }, + { + "epoch": 4.132080078125e-05, + "grad_norm": 0.10550269484519958, + "learning_rate": 2.567810364342765e-06, + "loss": 0.0053, + "step": 27080 + }, + { + "epoch": 4.132080078125e-05, + "model_forward_time": 0.024815797805786133, + "step": 27080 + }, + { + "epoch": 4.132080078125e-05, + "step": 27080, + "training_step_time": 0.10638809204101562 + }, + { + "epoch": 4.132232666015625e-05, + "model_forward_time": 0.024890661239624023, + "step": 27081 + }, + { + "epoch": 4.132232666015625e-05, + "step": 27081, + "training_step_time": 0.16589999198913574 + }, + { + "epoch": 4.13238525390625e-05, + "model_forward_time": 0.02510523796081543, + "step": 27082 + }, + { + "epoch": 4.13238525390625e-05, + "step": 27082, + "training_step_time": 0.21301770210266113 + }, + { + "epoch": 4.132537841796875e-05, + "model_forward_time": 0.024335861206054688, + "step": 27083 + }, + { + "epoch": 4.132537841796875e-05, + "step": 27083, + "training_step_time": 0.10935139656066895 + }, + { + "epoch": 4.1326904296875e-05, + "model_forward_time": 0.02464127540588379, + "step": 27084 + }, + { + "epoch": 4.1326904296875e-05, + "step": 27084, + "training_step_time": 0.1063542366027832 + }, + { + "epoch": 4.132843017578125e-05, + "model_forward_time": 0.028451919555664062, + "step": 27085 + }, + { + "epoch": 4.132843017578125e-05, + "step": 27085, + "training_step_time": 0.10713887214660645 + }, + { + "epoch": 4.13299560546875e-05, + "model_forward_time": 0.025197505950927734, + "step": 27086 + }, + { + "epoch": 4.13299560546875e-05, + "step": 27086, + "training_step_time": 0.10317111015319824 + }, + { + "epoch": 4.133148193359375e-05, + "model_forward_time": 0.024071455001831055, + "step": 27087 + }, + { + "epoch": 4.133148193359375e-05, + "step": 27087, + "training_step_time": 0.10592985153198242 + }, + { + "epoch": 4.13330078125e-05, + "model_forward_time": 0.024669647216796875, + "step": 27088 + }, + { + "epoch": 4.13330078125e-05, + "step": 27088, + "training_step_time": 0.10637211799621582 + }, + { + "epoch": 4.133453369140625e-05, + "model_forward_time": 0.025848865509033203, + "step": 27089 + }, + { + "epoch": 4.133453369140625e-05, + "step": 27089, + "training_step_time": 0.10802316665649414 + }, + { + "epoch": 4.13360595703125e-05, + "grad_norm": 0.0726233571767807, + "learning_rate": 2.5504035522157854e-06, + "loss": 0.0037, + "step": 27090 + }, + { + "epoch": 4.13360595703125e-05, + "model_forward_time": 0.025147199630737305, + "step": 27090 + }, + { + "epoch": 4.13360595703125e-05, + "step": 27090, + "training_step_time": 0.10675644874572754 + }, + { + "epoch": 4.133758544921875e-05, + "model_forward_time": 0.02559065818786621, + "step": 27091 + }, + { + "epoch": 4.133758544921875e-05, + "step": 27091, + "training_step_time": 0.1077423095703125 + }, + { + "epoch": 4.1339111328125e-05, + "model_forward_time": 0.025440454483032227, + "step": 27092 + }, + { + "epoch": 4.1339111328125e-05, + "step": 27092, + "training_step_time": 0.11034727096557617 + }, + { + "epoch": 4.134063720703125e-05, + "model_forward_time": 0.025664806365966797, + "step": 27093 + }, + { + "epoch": 4.134063720703125e-05, + "step": 27093, + "training_step_time": 0.1103217601776123 + }, + { + "epoch": 4.13421630859375e-05, + "model_forward_time": 0.025201082229614258, + "step": 27094 + }, + { + "epoch": 4.13421630859375e-05, + "step": 27094, + "training_step_time": 0.20853710174560547 + }, + { + "epoch": 4.134368896484375e-05, + "model_forward_time": 0.024445533752441406, + "step": 27095 + }, + { + "epoch": 4.134368896484375e-05, + "step": 27095, + "training_step_time": 0.1099843978881836 + }, + { + "epoch": 4.134521484375e-05, + "model_forward_time": 0.0247652530670166, + "step": 27096 + }, + { + "epoch": 4.134521484375e-05, + "step": 27096, + "training_step_time": 0.10718774795532227 + }, + { + "epoch": 4.134674072265625e-05, + "model_forward_time": 0.025397062301635742, + "step": 27097 + }, + { + "epoch": 4.134674072265625e-05, + "step": 27097, + "training_step_time": 0.10552573204040527 + }, + { + "epoch": 4.13482666015625e-05, + "model_forward_time": 0.0255434513092041, + "step": 27098 + }, + { + "epoch": 4.13482666015625e-05, + "step": 27098, + "training_step_time": 0.13417363166809082 + }, + { + "epoch": 4.134979248046875e-05, + "model_forward_time": 0.02527141571044922, + "step": 27099 + }, + { + "epoch": 4.134979248046875e-05, + "step": 27099, + "training_step_time": 0.10861468315124512 + }, + { + "epoch": 4.1351318359375e-05, + "grad_norm": 0.08323527127504349, + "learning_rate": 2.533054395822704e-06, + "loss": 0.0026, + "step": 27100 + }, + { + "epoch": 4.1351318359375e-05, + "model_forward_time": 0.02525043487548828, + "step": 27100 + }, + { + "epoch": 4.1351318359375e-05, + "step": 27100, + "training_step_time": 0.19332408905029297 + }, + { + "epoch": 4.135284423828125e-05, + "model_forward_time": 0.024570465087890625, + "step": 27101 + }, + { + "epoch": 4.135284423828125e-05, + "step": 27101, + "training_step_time": 0.1037447452545166 + }, + { + "epoch": 4.13543701171875e-05, + "model_forward_time": 0.024534940719604492, + "step": 27102 + }, + { + "epoch": 4.13543701171875e-05, + "step": 27102, + "training_step_time": 0.10181570053100586 + }, + { + "epoch": 4.135589599609375e-05, + "model_forward_time": 0.025430679321289062, + "step": 27103 + }, + { + "epoch": 4.135589599609375e-05, + "step": 27103, + "training_step_time": 0.10852432250976562 + }, + { + "epoch": 4.1357421875e-05, + "model_forward_time": 0.025399208068847656, + "step": 27104 + }, + { + "epoch": 4.1357421875e-05, + "step": 27104, + "training_step_time": 0.10819149017333984 + }, + { + "epoch": 4.135894775390625e-05, + "model_forward_time": 0.024935007095336914, + "step": 27105 + }, + { + "epoch": 4.135894775390625e-05, + "step": 27105, + "training_step_time": 0.10955429077148438 + }, + { + "epoch": 4.13604736328125e-05, + "model_forward_time": 0.02556753158569336, + "step": 27106 + }, + { + "epoch": 4.13604736328125e-05, + "step": 27106, + "training_step_time": 0.11037516593933105 + }, + { + "epoch": 4.136199951171875e-05, + "model_forward_time": 0.02503371238708496, + "step": 27107 + }, + { + "epoch": 4.136199951171875e-05, + "step": 27107, + "training_step_time": 0.10656452178955078 + }, + { + "epoch": 4.1363525390625e-05, + "model_forward_time": 0.025246143341064453, + "step": 27108 + }, + { + "epoch": 4.1363525390625e-05, + "step": 27108, + "training_step_time": 0.10650897026062012 + }, + { + "epoch": 4.136505126953125e-05, + "model_forward_time": 0.025477886199951172, + "step": 27109 + }, + { + "epoch": 4.136505126953125e-05, + "step": 27109, + "training_step_time": 0.1107480525970459 + }, + { + "epoch": 4.13665771484375e-05, + "grad_norm": 0.46716248989105225, + "learning_rate": 2.515762916244374e-06, + "loss": 0.0055, + "step": 27110 + }, + { + "epoch": 4.13665771484375e-05, + "model_forward_time": 0.02565932273864746, + "step": 27110 + }, + { + "epoch": 4.13665771484375e-05, + "step": 27110, + "training_step_time": 0.10685276985168457 + }, + { + "epoch": 4.136810302734375e-05, + "model_forward_time": 0.025774717330932617, + "step": 27111 + }, + { + "epoch": 4.136810302734375e-05, + "step": 27111, + "training_step_time": 0.10646486282348633 + }, + { + "epoch": 4.136962890625e-05, + "model_forward_time": 0.02550363540649414, + "step": 27112 + }, + { + "epoch": 4.136962890625e-05, + "step": 27112, + "training_step_time": 0.10633063316345215 + }, + { + "epoch": 4.137115478515625e-05, + "model_forward_time": 0.02534937858581543, + "step": 27113 + }, + { + "epoch": 4.137115478515625e-05, + "step": 27113, + "training_step_time": 0.11271476745605469 + }, + { + "epoch": 4.13726806640625e-05, + "model_forward_time": 0.02518630027770996, + "step": 27114 + }, + { + "epoch": 4.13726806640625e-05, + "step": 27114, + "training_step_time": 0.10434269905090332 + }, + { + "epoch": 4.137420654296875e-05, + "model_forward_time": 0.02531719207763672, + "step": 27115 + }, + { + "epoch": 4.137420654296875e-05, + "step": 27115, + "training_step_time": 0.10865044593811035 + }, + { + "epoch": 4.1375732421875e-05, + "model_forward_time": 0.025368213653564453, + "step": 27116 + }, + { + "epoch": 4.1375732421875e-05, + "step": 27116, + "training_step_time": 0.10389161109924316 + }, + { + "epoch": 4.137725830078125e-05, + "model_forward_time": 0.024411916732788086, + "step": 27117 + }, + { + "epoch": 4.137725830078125e-05, + "step": 27117, + "training_step_time": 0.10581660270690918 + }, + { + "epoch": 4.13787841796875e-05, + "model_forward_time": 0.02437734603881836, + "step": 27118 + }, + { + "epoch": 4.13787841796875e-05, + "step": 27118, + "training_step_time": 0.10658526420593262 + }, + { + "epoch": 4.138031005859375e-05, + "model_forward_time": 0.02540874481201172, + "step": 27119 + }, + { + "epoch": 4.138031005859375e-05, + "step": 27119, + "training_step_time": 0.11419677734375 + }, + { + "epoch": 4.13818359375e-05, + "grad_norm": 0.1938505321741104, + "learning_rate": 2.4985291344915674e-06, + "loss": 0.0045, + "step": 27120 + }, + { + "epoch": 4.13818359375e-05, + "model_forward_time": 0.02473926544189453, + "step": 27120 + }, + { + "epoch": 4.13818359375e-05, + "step": 27120, + "training_step_time": 0.12047886848449707 + }, + { + "epoch": 4.138336181640625e-05, + "model_forward_time": 0.02515578269958496, + "step": 27121 + }, + { + "epoch": 4.138336181640625e-05, + "step": 27121, + "training_step_time": 0.10773468017578125 + }, + { + "epoch": 4.13848876953125e-05, + "model_forward_time": 0.02457284927368164, + "step": 27122 + }, + { + "epoch": 4.13848876953125e-05, + "step": 27122, + "training_step_time": 0.14998173713684082 + }, + { + "epoch": 4.138641357421875e-05, + "model_forward_time": 0.025141239166259766, + "step": 27123 + }, + { + "epoch": 4.138641357421875e-05, + "step": 27123, + "training_step_time": 0.16455912590026855 + }, + { + "epoch": 4.1387939453125e-05, + "model_forward_time": 0.024574995040893555, + "step": 27124 + }, + { + "epoch": 4.1387939453125e-05, + "step": 27124, + "training_step_time": 0.1673569679260254 + }, + { + "epoch": 4.138946533203125e-05, + "model_forward_time": 0.024762868881225586, + "step": 27125 + }, + { + "epoch": 4.138946533203125e-05, + "step": 27125, + "training_step_time": 0.15564870834350586 + }, + { + "epoch": 4.13909912109375e-05, + "model_forward_time": 0.024801015853881836, + "step": 27126 + }, + { + "epoch": 4.13909912109375e-05, + "step": 27126, + "training_step_time": 0.11758065223693848 + }, + { + "epoch": 4.139251708984375e-05, + "model_forward_time": 0.024770021438598633, + "step": 27127 + }, + { + "epoch": 4.139251708984375e-05, + "step": 27127, + "training_step_time": 0.14072299003601074 + }, + { + "epoch": 4.139404296875e-05, + "model_forward_time": 0.024611711502075195, + "step": 27128 + }, + { + "epoch": 4.139404296875e-05, + "step": 27128, + "training_step_time": 0.15895557403564453 + }, + { + "epoch": 4.139556884765625e-05, + "model_forward_time": 0.024137258529663086, + "step": 27129 + }, + { + "epoch": 4.139556884765625e-05, + "step": 27129, + "training_step_time": 0.1314702033996582 + }, + { + "epoch": 4.13970947265625e-05, + "grad_norm": 0.09073584526777267, + "learning_rate": 2.481353071504966e-06, + "loss": 0.0024, + "step": 27130 + }, + { + "epoch": 4.13970947265625e-05, + "model_forward_time": 0.0239260196685791, + "step": 27130 + }, + { + "epoch": 4.13970947265625e-05, + "step": 27130, + "training_step_time": 0.10956525802612305 + }, + { + "epoch": 4.139862060546875e-05, + "model_forward_time": 0.025166749954223633, + "step": 27131 + }, + { + "epoch": 4.139862060546875e-05, + "step": 27131, + "training_step_time": 0.11165142059326172 + }, + { + "epoch": 4.1400146484375e-05, + "model_forward_time": 0.025182008743286133, + "step": 27132 + }, + { + "epoch": 4.1400146484375e-05, + "step": 27132, + "training_step_time": 0.1120445728302002 + }, + { + "epoch": 4.140167236328125e-05, + "model_forward_time": 0.024922847747802734, + "step": 27133 + }, + { + "epoch": 4.140167236328125e-05, + "step": 27133, + "training_step_time": 0.11076855659484863 + }, + { + "epoch": 4.14031982421875e-05, + "model_forward_time": 0.02542710304260254, + "step": 27134 + }, + { + "epoch": 4.14031982421875e-05, + "step": 27134, + "training_step_time": 0.11251330375671387 + }, + { + "epoch": 4.140472412109375e-05, + "model_forward_time": 0.02524423599243164, + "step": 27135 + }, + { + "epoch": 4.140472412109375e-05, + "step": 27135, + "training_step_time": 0.11557984352111816 + }, + { + "epoch": 4.140625e-05, + "model_forward_time": 0.025435209274291992, + "step": 27136 + }, + { + "epoch": 4.140625e-05, + "step": 27136, + "training_step_time": 0.12040162086486816 + }, + { + "epoch": 4.140777587890625e-05, + "model_forward_time": 0.025095224380493164, + "step": 27137 + }, + { + "epoch": 4.140777587890625e-05, + "step": 27137, + "training_step_time": 0.11200666427612305 + }, + { + "epoch": 4.14093017578125e-05, + "model_forward_time": 0.02540445327758789, + "step": 27138 + }, + { + "epoch": 4.14093017578125e-05, + "step": 27138, + "training_step_time": 0.11178112030029297 + }, + { + "epoch": 4.141082763671875e-05, + "model_forward_time": 0.025470495223999023, + "step": 27139 + }, + { + "epoch": 4.141082763671875e-05, + "step": 27139, + "training_step_time": 0.11330199241638184 + }, + { + "epoch": 4.1412353515625e-05, + "grad_norm": 0.11247889697551727, + "learning_rate": 2.4642347481550865e-06, + "loss": 0.0084, + "step": 27140 + }, + { + "epoch": 4.1412353515625e-05, + "model_forward_time": 0.025043487548828125, + "step": 27140 + }, + { + "epoch": 4.1412353515625e-05, + "step": 27140, + "training_step_time": 0.1107022762298584 + }, + { + "epoch": 4.141387939453125e-05, + "model_forward_time": 0.0256500244140625, + "step": 27141 + }, + { + "epoch": 4.141387939453125e-05, + "step": 27141, + "training_step_time": 0.1199500560760498 + }, + { + "epoch": 4.14154052734375e-05, + "model_forward_time": 0.02564835548400879, + "step": 27142 + }, + { + "epoch": 4.14154052734375e-05, + "step": 27142, + "training_step_time": 0.13262557983398438 + }, + { + "epoch": 4.141693115234375e-05, + "model_forward_time": 0.025178909301757812, + "step": 27143 + }, + { + "epoch": 4.141693115234375e-05, + "step": 27143, + "training_step_time": 0.10856127738952637 + }, + { + "epoch": 4.141845703125e-05, + "model_forward_time": 0.02529168128967285, + "step": 27144 + }, + { + "epoch": 4.141845703125e-05, + "step": 27144, + "training_step_time": 0.12717461585998535 + }, + { + "epoch": 4.141998291015625e-05, + "model_forward_time": 0.02571582794189453, + "step": 27145 + }, + { + "epoch": 4.141998291015625e-05, + "step": 27145, + "training_step_time": 0.11343050003051758 + }, + { + "epoch": 4.14215087890625e-05, + "model_forward_time": 0.025376081466674805, + "step": 27146 + }, + { + "epoch": 4.14215087890625e-05, + "step": 27146, + "training_step_time": 0.11295914649963379 + }, + { + "epoch": 4.142303466796875e-05, + "model_forward_time": 0.026096582412719727, + "step": 27147 + }, + { + "epoch": 4.142303466796875e-05, + "step": 27147, + "training_step_time": 0.1576695442199707 + }, + { + "epoch": 4.1424560546875e-05, + "model_forward_time": 0.02475738525390625, + "step": 27148 + }, + { + "epoch": 4.1424560546875e-05, + "step": 27148, + "training_step_time": 0.10870170593261719 + }, + { + "epoch": 4.142608642578125e-05, + "model_forward_time": 0.02508068084716797, + "step": 27149 + }, + { + "epoch": 4.142608642578125e-05, + "step": 27149, + "training_step_time": 0.10722684860229492 + }, + { + "epoch": 4.14276123046875e-05, + "grad_norm": 0.09447456896305084, + "learning_rate": 2.4471741852423237e-06, + "loss": 0.0036, + "step": 27150 + }, + { + "epoch": 4.14276123046875e-05, + "model_forward_time": 0.025452613830566406, + "step": 27150 + }, + { + "epoch": 4.14276123046875e-05, + "step": 27150, + "training_step_time": 0.10950160026550293 + }, + { + "epoch": 4.142913818359375e-05, + "model_forward_time": 0.02527308464050293, + "step": 27151 + }, + { + "epoch": 4.142913818359375e-05, + "step": 27151, + "training_step_time": 0.11713886260986328 + }, + { + "epoch": 4.14306640625e-05, + "model_forward_time": 0.025470495223999023, + "step": 27152 + }, + { + "epoch": 4.14306640625e-05, + "step": 27152, + "training_step_time": 0.11479997634887695 + }, + { + "epoch": 4.143218994140625e-05, + "model_forward_time": 0.025516986846923828, + "step": 27153 + }, + { + "epoch": 4.143218994140625e-05, + "step": 27153, + "training_step_time": 0.10962080955505371 + }, + { + "epoch": 4.14337158203125e-05, + "model_forward_time": 0.025424480438232422, + "step": 27154 + }, + { + "epoch": 4.14337158203125e-05, + "step": 27154, + "training_step_time": 0.11248993873596191 + }, + { + "epoch": 4.143524169921875e-05, + "model_forward_time": 0.025280237197875977, + "step": 27155 + }, + { + "epoch": 4.143524169921875e-05, + "step": 27155, + "training_step_time": 0.10719728469848633 + }, + { + "epoch": 4.1436767578125e-05, + "model_forward_time": 0.025478363037109375, + "step": 27156 + }, + { + "epoch": 4.1436767578125e-05, + "step": 27156, + "training_step_time": 0.1108705997467041 + }, + { + "epoch": 4.143829345703125e-05, + "model_forward_time": 0.025449275970458984, + "step": 27157 + }, + { + "epoch": 4.143829345703125e-05, + "step": 27157, + "training_step_time": 0.10879993438720703 + }, + { + "epoch": 4.14398193359375e-05, + "model_forward_time": 0.025259971618652344, + "step": 27158 + }, + { + "epoch": 4.14398193359375e-05, + "step": 27158, + "training_step_time": 0.11244678497314453 + }, + { + "epoch": 4.144134521484375e-05, + "model_forward_time": 0.025121450424194336, + "step": 27159 + }, + { + "epoch": 4.144134521484375e-05, + "step": 27159, + "training_step_time": 0.10891532897949219 + }, + { + "epoch": 4.144287109375e-05, + "grad_norm": 0.040682245045900345, + "learning_rate": 2.430171403496867e-06, + "loss": 0.0035, + "step": 27160 + }, + { + "epoch": 4.144287109375e-05, + "model_forward_time": 0.02549004554748535, + "step": 27160 + }, + { + "epoch": 4.144287109375e-05, + "step": 27160, + "training_step_time": 0.11022377014160156 + }, + { + "epoch": 4.144439697265625e-05, + "model_forward_time": 0.02590012550354004, + "step": 27161 + }, + { + "epoch": 4.144439697265625e-05, + "step": 27161, + "training_step_time": 0.10942292213439941 + }, + { + "epoch": 4.14459228515625e-05, + "model_forward_time": 0.025229215621948242, + "step": 27162 + }, + { + "epoch": 4.14459228515625e-05, + "step": 27162, + "training_step_time": 0.10913228988647461 + }, + { + "epoch": 4.144744873046875e-05, + "model_forward_time": 0.025436878204345703, + "step": 27163 + }, + { + "epoch": 4.144744873046875e-05, + "step": 27163, + "training_step_time": 0.1092681884765625 + }, + { + "epoch": 4.1448974609375e-05, + "model_forward_time": 0.025141477584838867, + "step": 27164 + }, + { + "epoch": 4.1448974609375e-05, + "step": 27164, + "training_step_time": 0.10697197914123535 + }, + { + "epoch": 4.145050048828125e-05, + "model_forward_time": 0.025154590606689453, + "step": 27165 + }, + { + "epoch": 4.145050048828125e-05, + "step": 27165, + "training_step_time": 0.11335539817810059 + }, + { + "epoch": 4.14520263671875e-05, + "model_forward_time": 0.025440454483032227, + "step": 27166 + }, + { + "epoch": 4.14520263671875e-05, + "step": 27166, + "training_step_time": 0.11067032814025879 + }, + { + "epoch": 4.145355224609375e-05, + "model_forward_time": 0.025561809539794922, + "step": 27167 + }, + { + "epoch": 4.145355224609375e-05, + "step": 27167, + "training_step_time": 0.10559821128845215 + }, + { + "epoch": 4.1455078125e-05, + "model_forward_time": 0.026412487030029297, + "step": 27168 + }, + { + "epoch": 4.1455078125e-05, + "step": 27168, + "training_step_time": 0.10989737510681152 + }, + { + "epoch": 4.145660400390625e-05, + "model_forward_time": 0.025352954864501953, + "step": 27169 + }, + { + "epoch": 4.145660400390625e-05, + "step": 27169, + "training_step_time": 0.18042898178100586 + }, + { + "epoch": 4.14581298828125e-05, + "grad_norm": 0.06102828308939934, + "learning_rate": 2.413226423578696e-06, + "loss": 0.0066, + "step": 27170 + }, + { + "epoch": 4.14581298828125e-05, + "model_forward_time": 0.024439096450805664, + "step": 27170 + }, + { + "epoch": 4.14581298828125e-05, + "step": 27170, + "training_step_time": 0.13613390922546387 + }, + { + "epoch": 4.145965576171875e-05, + "model_forward_time": 0.024977684020996094, + "step": 27171 + }, + { + "epoch": 4.145965576171875e-05, + "step": 27171, + "training_step_time": 0.17462849617004395 + }, + { + "epoch": 4.1461181640625e-05, + "model_forward_time": 0.024787187576293945, + "step": 27172 + }, + { + "epoch": 4.1461181640625e-05, + "step": 27172, + "training_step_time": 0.21291637420654297 + }, + { + "epoch": 4.146270751953125e-05, + "model_forward_time": 0.024611473083496094, + "step": 27173 + }, + { + "epoch": 4.146270751953125e-05, + "step": 27173, + "training_step_time": 0.15426921844482422 + }, + { + "epoch": 4.14642333984375e-05, + "model_forward_time": 0.024663209915161133, + "step": 27174 + }, + { + "epoch": 4.14642333984375e-05, + "step": 27174, + "training_step_time": 0.14537358283996582 + }, + { + "epoch": 4.146575927734375e-05, + "model_forward_time": 0.024399518966674805, + "step": 27175 + }, + { + "epoch": 4.146575927734375e-05, + "step": 27175, + "training_step_time": 0.2080399990081787 + }, + { + "epoch": 4.146728515625e-05, + "model_forward_time": 0.024671554565429688, + "step": 27176 + }, + { + "epoch": 4.146728515625e-05, + "step": 27176, + "training_step_time": 0.13054990768432617 + }, + { + "epoch": 4.146881103515625e-05, + "model_forward_time": 0.02411341667175293, + "step": 27177 + }, + { + "epoch": 4.146881103515625e-05, + "step": 27177, + "training_step_time": 0.11742734909057617 + }, + { + "epoch": 4.14703369140625e-05, + "model_forward_time": 0.0253298282623291, + "step": 27178 + }, + { + "epoch": 4.14703369140625e-05, + "step": 27178, + "training_step_time": 0.11133432388305664 + }, + { + "epoch": 4.147186279296875e-05, + "model_forward_time": 0.02551555633544922, + "step": 27179 + }, + { + "epoch": 4.147186279296875e-05, + "step": 27179, + "training_step_time": 0.11390447616577148 + }, + { + "epoch": 4.1473388671875e-05, + "grad_norm": 0.3329187333583832, + "learning_rate": 2.3963392660775575e-06, + "loss": 0.0086, + "step": 27180 + }, + { + "epoch": 4.1473388671875e-05, + "model_forward_time": 0.024847984313964844, + "step": 27180 + }, + { + "epoch": 4.1473388671875e-05, + "step": 27180, + "training_step_time": 0.10757017135620117 + }, + { + "epoch": 4.147491455078125e-05, + "model_forward_time": 0.02534008026123047, + "step": 27181 + }, + { + "epoch": 4.147491455078125e-05, + "step": 27181, + "training_step_time": 0.11074352264404297 + }, + { + "epoch": 4.14764404296875e-05, + "model_forward_time": 0.02534937858581543, + "step": 27182 + }, + { + "epoch": 4.14764404296875e-05, + "step": 27182, + "training_step_time": 0.10532093048095703 + }, + { + "epoch": 4.147796630859375e-05, + "model_forward_time": 0.025316715240478516, + "step": 27183 + }, + { + "epoch": 4.147796630859375e-05, + "step": 27183, + "training_step_time": 0.10759496688842773 + }, + { + "epoch": 4.14794921875e-05, + "model_forward_time": 0.025157690048217773, + "step": 27184 + }, + { + "epoch": 4.14794921875e-05, + "step": 27184, + "training_step_time": 0.10499048233032227 + }, + { + "epoch": 4.148101806640625e-05, + "model_forward_time": 0.025734663009643555, + "step": 27185 + }, + { + "epoch": 4.148101806640625e-05, + "step": 27185, + "training_step_time": 0.10912466049194336 + }, + { + "epoch": 4.14825439453125e-05, + "model_forward_time": 0.027640342712402344, + "step": 27186 + }, + { + "epoch": 4.14825439453125e-05, + "step": 27186, + "training_step_time": 0.10944414138793945 + }, + { + "epoch": 4.148406982421875e-05, + "model_forward_time": 0.025097370147705078, + "step": 27187 + }, + { + "epoch": 4.148406982421875e-05, + "step": 27187, + "training_step_time": 0.13417506217956543 + }, + { + "epoch": 4.1485595703125e-05, + "model_forward_time": 0.02521038055419922, + "step": 27188 + }, + { + "epoch": 4.1485595703125e-05, + "step": 27188, + "training_step_time": 0.11906552314758301 + }, + { + "epoch": 4.148712158203125e-05, + "model_forward_time": 0.025262832641601562, + "step": 27189 + }, + { + "epoch": 4.148712158203125e-05, + "step": 27189, + "training_step_time": 0.11391377449035645 + }, + { + "epoch": 4.14886474609375e-05, + "grad_norm": 0.11542549729347229, + "learning_rate": 2.379509951512937e-06, + "loss": 0.0057, + "step": 27190 + }, + { + "epoch": 4.14886474609375e-05, + "model_forward_time": 0.02521228790283203, + "step": 27190 + }, + { + "epoch": 4.14886474609375e-05, + "step": 27190, + "training_step_time": 0.10749006271362305 + }, + { + "epoch": 4.149017333984375e-05, + "model_forward_time": 0.025402545928955078, + "step": 27191 + }, + { + "epoch": 4.149017333984375e-05, + "step": 27191, + "training_step_time": 0.11640119552612305 + }, + { + "epoch": 4.149169921875e-05, + "model_forward_time": 0.025147438049316406, + "step": 27192 + }, + { + "epoch": 4.149169921875e-05, + "step": 27192, + "training_step_time": 0.10838079452514648 + }, + { + "epoch": 4.149322509765625e-05, + "model_forward_time": 0.025116443634033203, + "step": 27193 + }, + { + "epoch": 4.149322509765625e-05, + "step": 27193, + "training_step_time": 0.13714265823364258 + }, + { + "epoch": 4.14947509765625e-05, + "model_forward_time": 0.025503158569335938, + "step": 27194 + }, + { + "epoch": 4.14947509765625e-05, + "step": 27194, + "training_step_time": 0.10992598533630371 + }, + { + "epoch": 4.149627685546875e-05, + "model_forward_time": 0.02547144889831543, + "step": 27195 + }, + { + "epoch": 4.149627685546875e-05, + "step": 27195, + "training_step_time": 0.1053018569946289 + }, + { + "epoch": 4.1497802734375e-05, + "model_forward_time": 0.0257718563079834, + "step": 27196 + }, + { + "epoch": 4.1497802734375e-05, + "step": 27196, + "training_step_time": 0.10913300514221191 + }, + { + "epoch": 4.149932861328125e-05, + "model_forward_time": 0.025367021560668945, + "step": 27197 + }, + { + "epoch": 4.149932861328125e-05, + "step": 27197, + "training_step_time": 0.10893106460571289 + }, + { + "epoch": 4.15008544921875e-05, + "model_forward_time": 0.025524139404296875, + "step": 27198 + }, + { + "epoch": 4.15008544921875e-05, + "step": 27198, + "training_step_time": 0.1048281192779541 + }, + { + "epoch": 4.150238037109375e-05, + "model_forward_time": 0.02491593360900879, + "step": 27199 + }, + { + "epoch": 4.150238037109375e-05, + "step": 27199, + "training_step_time": 0.10783505439758301 + }, + { + "epoch": 4.150390625e-05, + "grad_norm": 0.19436855614185333, + "learning_rate": 2.362738500334055e-06, + "loss": 0.0118, + "step": 27200 + }, + { + "epoch": 4.150390625e-05, + "model_forward_time": 0.024727344512939453, + "step": 27200 + }, + { + "epoch": 4.150390625e-05, + "step": 27200, + "training_step_time": 0.10413098335266113 + }, + { + "epoch": 4.150543212890625e-05, + "model_forward_time": 0.025806427001953125, + "step": 27201 + }, + { + "epoch": 4.150543212890625e-05, + "step": 27201, + "training_step_time": 0.10806012153625488 + }, + { + "epoch": 4.15069580078125e-05, + "model_forward_time": 0.025191783905029297, + "step": 27202 + }, + { + "epoch": 4.15069580078125e-05, + "step": 27202, + "training_step_time": 0.10746145248413086 + }, + { + "epoch": 4.150848388671875e-05, + "model_forward_time": 0.025487661361694336, + "step": 27203 + }, + { + "epoch": 4.150848388671875e-05, + "step": 27203, + "training_step_time": 0.1034233570098877 + }, + { + "epoch": 4.1510009765625e-05, + "model_forward_time": 0.0242464542388916, + "step": 27204 + }, + { + "epoch": 4.1510009765625e-05, + "step": 27204, + "training_step_time": 0.11036014556884766 + }, + { + "epoch": 4.151153564453125e-05, + "model_forward_time": 0.02443408966064453, + "step": 27205 + }, + { + "epoch": 4.151153564453125e-05, + "step": 27205, + "training_step_time": 0.10556626319885254 + }, + { + "epoch": 4.15130615234375e-05, + "model_forward_time": 0.024636268615722656, + "step": 27206 + }, + { + "epoch": 4.15130615234375e-05, + "step": 27206, + "training_step_time": 0.13117241859436035 + }, + { + "epoch": 4.151458740234375e-05, + "model_forward_time": 0.02502274513244629, + "step": 27207 + }, + { + "epoch": 4.151458740234375e-05, + "step": 27207, + "training_step_time": 0.15232491493225098 + }, + { + "epoch": 4.151611328125e-05, + "model_forward_time": 0.024282455444335938, + "step": 27208 + }, + { + "epoch": 4.151611328125e-05, + "step": 27208, + "training_step_time": 0.12797212600708008 + }, + { + "epoch": 4.151763916015625e-05, + "model_forward_time": 0.024542808532714844, + "step": 27209 + }, + { + "epoch": 4.151763916015625e-05, + "step": 27209, + "training_step_time": 0.12417173385620117 + }, + { + "epoch": 4.15191650390625e-05, + "grad_norm": 0.05312748998403549, + "learning_rate": 2.3460249329197824e-06, + "loss": 0.0032, + "step": 27210 + }, + { + "epoch": 4.15191650390625e-05, + "model_forward_time": 0.02474665641784668, + "step": 27210 + }, + { + "epoch": 4.15191650390625e-05, + "step": 27210, + "training_step_time": 0.1182100772857666 + }, + { + "epoch": 4.152069091796875e-05, + "model_forward_time": 0.025084495544433594, + "step": 27211 + }, + { + "epoch": 4.152069091796875e-05, + "step": 27211, + "training_step_time": 0.11852192878723145 + }, + { + "epoch": 4.1522216796875e-05, + "model_forward_time": 0.0252072811126709, + "step": 27212 + }, + { + "epoch": 4.1522216796875e-05, + "step": 27212, + "training_step_time": 0.1143496036529541 + }, + { + "epoch": 4.152374267578125e-05, + "model_forward_time": 0.025771379470825195, + "step": 27213 + }, + { + "epoch": 4.152374267578125e-05, + "step": 27213, + "training_step_time": 0.11190104484558105 + }, + { + "epoch": 4.15252685546875e-05, + "model_forward_time": 0.024744510650634766, + "step": 27214 + }, + { + "epoch": 4.15252685546875e-05, + "step": 27214, + "training_step_time": 0.1457371711730957 + }, + { + "epoch": 4.152679443359375e-05, + "model_forward_time": 0.02526402473449707, + "step": 27215 + }, + { + "epoch": 4.152679443359375e-05, + "step": 27215, + "training_step_time": 0.15436482429504395 + }, + { + "epoch": 4.15283203125e-05, + "model_forward_time": 0.02505207061767578, + "step": 27216 + }, + { + "epoch": 4.15283203125e-05, + "step": 27216, + "training_step_time": 0.21744155883789062 + }, + { + "epoch": 4.152984619140625e-05, + "model_forward_time": 0.0249631404876709, + "step": 27217 + }, + { + "epoch": 4.152984619140625e-05, + "step": 27217, + "training_step_time": 0.19009661674499512 + }, + { + "epoch": 4.15313720703125e-05, + "model_forward_time": 0.0244290828704834, + "step": 27218 + }, + { + "epoch": 4.15313720703125e-05, + "step": 27218, + "training_step_time": 0.14074063301086426 + }, + { + "epoch": 4.153289794921875e-05, + "model_forward_time": 0.024813413619995117, + "step": 27219 + }, + { + "epoch": 4.153289794921875e-05, + "step": 27219, + "training_step_time": 0.14382076263427734 + }, + { + "epoch": 4.1534423828125e-05, + "grad_norm": 0.08608946949243546, + "learning_rate": 2.3293692695787017e-06, + "loss": 0.003, + "step": 27220 + }, + { + "epoch": 4.1534423828125e-05, + "model_forward_time": 0.024718284606933594, + "step": 27220 + }, + { + "epoch": 4.1534423828125e-05, + "step": 27220, + "training_step_time": 0.21212434768676758 + }, + { + "epoch": 4.153594970703125e-05, + "model_forward_time": 0.024696826934814453, + "step": 27221 + }, + { + "epoch": 4.153594970703125e-05, + "step": 27221, + "training_step_time": 0.11261200904846191 + }, + { + "epoch": 4.15374755859375e-05, + "model_forward_time": 0.024791240692138672, + "step": 27222 + }, + { + "epoch": 4.15374755859375e-05, + "step": 27222, + "training_step_time": 0.10515213012695312 + }, + { + "epoch": 4.153900146484375e-05, + "model_forward_time": 0.02500438690185547, + "step": 27223 + }, + { + "epoch": 4.153900146484375e-05, + "step": 27223, + "training_step_time": 0.10395288467407227 + }, + { + "epoch": 4.154052734375e-05, + "model_forward_time": 0.025331974029541016, + "step": 27224 + }, + { + "epoch": 4.154052734375e-05, + "step": 27224, + "training_step_time": 0.10754251480102539 + }, + { + "epoch": 4.154205322265625e-05, + "model_forward_time": 0.025434017181396484, + "step": 27225 + }, + { + "epoch": 4.154205322265625e-05, + "step": 27225, + "training_step_time": 0.10514974594116211 + }, + { + "epoch": 4.15435791015625e-05, + "model_forward_time": 0.02901768684387207, + "step": 27226 + }, + { + "epoch": 4.15435791015625e-05, + "step": 27226, + "training_step_time": 0.10860252380371094 + }, + { + "epoch": 4.154510498046875e-05, + "model_forward_time": 0.025676965713500977, + "step": 27227 + }, + { + "epoch": 4.154510498046875e-05, + "step": 27227, + "training_step_time": 0.10698628425598145 + }, + { + "epoch": 4.1546630859375e-05, + "model_forward_time": 0.02530837059020996, + "step": 27228 + }, + { + "epoch": 4.1546630859375e-05, + "step": 27228, + "training_step_time": 0.10467171669006348 + }, + { + "epoch": 4.154815673828125e-05, + "model_forward_time": 0.024173259735107422, + "step": 27229 + }, + { + "epoch": 4.154815673828125e-05, + "step": 27229, + "training_step_time": 0.10990262031555176 + }, + { + "epoch": 4.15496826171875e-05, + "grad_norm": 0.05302784964442253, + "learning_rate": 2.3127715305490073e-06, + "loss": 0.0043, + "step": 27230 + }, + { + "epoch": 4.15496826171875e-05, + "model_forward_time": 0.02562117576599121, + "step": 27230 + }, + { + "epoch": 4.15496826171875e-05, + "step": 27230, + "training_step_time": 0.10649919509887695 + }, + { + "epoch": 4.155120849609375e-05, + "model_forward_time": 0.025318622589111328, + "step": 27231 + }, + { + "epoch": 4.155120849609375e-05, + "step": 27231, + "training_step_time": 0.1703331470489502 + }, + { + "epoch": 4.1552734375e-05, + "model_forward_time": 0.024808645248413086, + "step": 27232 + }, + { + "epoch": 4.1552734375e-05, + "step": 27232, + "training_step_time": 0.17896413803100586 + }, + { + "epoch": 4.155426025390625e-05, + "model_forward_time": 0.02453899383544922, + "step": 27233 + }, + { + "epoch": 4.155426025390625e-05, + "step": 27233, + "training_step_time": 0.18662500381469727 + }, + { + "epoch": 4.15557861328125e-05, + "model_forward_time": 0.02443218231201172, + "step": 27234 + }, + { + "epoch": 4.15557861328125e-05, + "step": 27234, + "training_step_time": 0.16045784950256348 + }, + { + "epoch": 4.155731201171875e-05, + "model_forward_time": 0.02357935905456543, + "step": 27235 + }, + { + "epoch": 4.155731201171875e-05, + "step": 27235, + "training_step_time": 0.1817488670349121 + }, + { + "epoch": 4.1558837890625e-05, + "model_forward_time": 0.02414107322692871, + "step": 27236 + }, + { + "epoch": 4.1558837890625e-05, + "step": 27236, + "training_step_time": 0.10860633850097656 + }, + { + "epoch": 4.156036376953125e-05, + "model_forward_time": 0.024489641189575195, + "step": 27237 + }, + { + "epoch": 4.156036376953125e-05, + "step": 27237, + "training_step_time": 0.11304378509521484 + }, + { + "epoch": 4.15618896484375e-05, + "model_forward_time": 0.02534627914428711, + "step": 27238 + }, + { + "epoch": 4.15618896484375e-05, + "step": 27238, + "training_step_time": 0.10930371284484863 + }, + { + "epoch": 4.156341552734375e-05, + "model_forward_time": 0.02501201629638672, + "step": 27239 + }, + { + "epoch": 4.156341552734375e-05, + "step": 27239, + "training_step_time": 0.10605883598327637 + }, + { + "epoch": 4.156494140625e-05, + "grad_norm": 0.042341746389865875, + "learning_rate": 2.296231735998511e-06, + "loss": 0.0028, + "step": 27240 + }, + { + "epoch": 4.156494140625e-05, + "model_forward_time": 0.025308847427368164, + "step": 27240 + }, + { + "epoch": 4.156494140625e-05, + "step": 27240, + "training_step_time": 0.10765624046325684 + }, + { + "epoch": 4.156646728515625e-05, + "model_forward_time": 0.02490544319152832, + "step": 27241 + }, + { + "epoch": 4.156646728515625e-05, + "step": 27241, + "training_step_time": 0.10683536529541016 + }, + { + "epoch": 4.15679931640625e-05, + "model_forward_time": 0.025323867797851562, + "step": 27242 + }, + { + "epoch": 4.15679931640625e-05, + "step": 27242, + "training_step_time": 0.11064696311950684 + }, + { + "epoch": 4.156951904296875e-05, + "model_forward_time": 0.02522444725036621, + "step": 27243 + }, + { + "epoch": 4.156951904296875e-05, + "step": 27243, + "training_step_time": 0.10591602325439453 + }, + { + "epoch": 4.1571044921875e-05, + "model_forward_time": 0.02507162094116211, + "step": 27244 + }, + { + "epoch": 4.1571044921875e-05, + "step": 27244, + "training_step_time": 0.10682129859924316 + }, + { + "epoch": 4.157257080078125e-05, + "model_forward_time": 0.025344133377075195, + "step": 27245 + }, + { + "epoch": 4.157257080078125e-05, + "step": 27245, + "training_step_time": 0.10580635070800781 + }, + { + "epoch": 4.15740966796875e-05, + "model_forward_time": 0.02527761459350586, + "step": 27246 + }, + { + "epoch": 4.15740966796875e-05, + "step": 27246, + "training_step_time": 0.10463190078735352 + }, + { + "epoch": 4.157562255859375e-05, + "model_forward_time": 0.024954795837402344, + "step": 27247 + }, + { + "epoch": 4.157562255859375e-05, + "step": 27247, + "training_step_time": 0.10550522804260254 + }, + { + "epoch": 4.15771484375e-05, + "model_forward_time": 0.025226831436157227, + "step": 27248 + }, + { + "epoch": 4.15771484375e-05, + "step": 27248, + "training_step_time": 0.10754179954528809 + }, + { + "epoch": 4.157867431640625e-05, + "model_forward_time": 0.025364398956298828, + "step": 27249 + }, + { + "epoch": 4.157867431640625e-05, + "step": 27249, + "training_step_time": 0.10554623603820801 + }, + { + "epoch": 4.15802001953125e-05, + "grad_norm": 0.19960153102874756, + "learning_rate": 2.2797499060246253e-06, + "loss": 0.0147, + "step": 27250 + }, + { + "epoch": 4.15802001953125e-05, + "model_forward_time": 0.024859189987182617, + "step": 27250 + }, + { + "epoch": 4.15802001953125e-05, + "step": 27250, + "training_step_time": 0.1105506420135498 + }, + { + "epoch": 4.158172607421875e-05, + "model_forward_time": 0.024476289749145508, + "step": 27251 + }, + { + "epoch": 4.158172607421875e-05, + "step": 27251, + "training_step_time": 0.1045844554901123 + }, + { + "epoch": 4.1583251953125e-05, + "model_forward_time": 0.02500009536743164, + "step": 27252 + }, + { + "epoch": 4.1583251953125e-05, + "step": 27252, + "training_step_time": 0.10751843452453613 + }, + { + "epoch": 4.158477783203125e-05, + "model_forward_time": 0.025313615798950195, + "step": 27253 + }, + { + "epoch": 4.158477783203125e-05, + "step": 27253, + "training_step_time": 0.1093604564666748 + }, + { + "epoch": 4.15863037109375e-05, + "model_forward_time": 0.02519989013671875, + "step": 27254 + }, + { + "epoch": 4.15863037109375e-05, + "step": 27254, + "training_step_time": 0.10562944412231445 + }, + { + "epoch": 4.158782958984375e-05, + "model_forward_time": 0.025484800338745117, + "step": 27255 + }, + { + "epoch": 4.158782958984375e-05, + "step": 27255, + "training_step_time": 0.10617184638977051 + }, + { + "epoch": 4.158935546875e-05, + "model_forward_time": 0.02481842041015625, + "step": 27256 + }, + { + "epoch": 4.158935546875e-05, + "step": 27256, + "training_step_time": 0.10778450965881348 + }, + { + "epoch": 4.159088134765625e-05, + "model_forward_time": 0.02503180503845215, + "step": 27257 + }, + { + "epoch": 4.159088134765625e-05, + "step": 27257, + "training_step_time": 0.11006975173950195 + }, + { + "epoch": 4.15924072265625e-05, + "model_forward_time": 0.026149988174438477, + "step": 27258 + }, + { + "epoch": 4.15924072265625e-05, + "step": 27258, + "training_step_time": 0.10747838020324707 + }, + { + "epoch": 4.159393310546875e-05, + "model_forward_time": 0.025532245635986328, + "step": 27259 + }, + { + "epoch": 4.159393310546875e-05, + "step": 27259, + "training_step_time": 0.14352846145629883 + }, + { + "epoch": 4.1595458984375e-05, + "grad_norm": 0.09094958007335663, + "learning_rate": 2.263326060654336e-06, + "loss": 0.0025, + "step": 27260 + }, + { + "epoch": 4.1595458984375e-05, + "model_forward_time": 0.024706125259399414, + "step": 27260 + }, + { + "epoch": 4.1595458984375e-05, + "step": 27260, + "training_step_time": 0.15816569328308105 + }, + { + "epoch": 4.159698486328125e-05, + "model_forward_time": 0.024568796157836914, + "step": 27261 + }, + { + "epoch": 4.159698486328125e-05, + "step": 27261, + "training_step_time": 0.15113282203674316 + }, + { + "epoch": 4.15985107421875e-05, + "model_forward_time": 0.024058818817138672, + "step": 27262 + }, + { + "epoch": 4.15985107421875e-05, + "step": 27262, + "training_step_time": 0.1407921314239502 + }, + { + "epoch": 4.160003662109375e-05, + "model_forward_time": 0.02455878257751465, + "step": 27263 + }, + { + "epoch": 4.160003662109375e-05, + "step": 27263, + "training_step_time": 0.15768003463745117 + }, + { + "epoch": 4.16015625e-05, + "model_forward_time": 0.0253140926361084, + "step": 27264 + }, + { + "epoch": 4.16015625e-05, + "step": 27264, + "training_step_time": 0.15874528884887695 + }, + { + "epoch": 4.160308837890625e-05, + "model_forward_time": 0.024596452713012695, + "step": 27265 + }, + { + "epoch": 4.160308837890625e-05, + "step": 27265, + "training_step_time": 0.16185450553894043 + }, + { + "epoch": 4.16046142578125e-05, + "model_forward_time": 0.02475285530090332, + "step": 27266 + }, + { + "epoch": 4.16046142578125e-05, + "step": 27266, + "training_step_time": 0.11216068267822266 + }, + { + "epoch": 4.160614013671875e-05, + "model_forward_time": 0.02438640594482422, + "step": 27267 + }, + { + "epoch": 4.160614013671875e-05, + "step": 27267, + "training_step_time": 0.10574841499328613 + }, + { + "epoch": 4.1607666015625e-05, + "model_forward_time": 0.025110483169555664, + "step": 27268 + }, + { + "epoch": 4.1607666015625e-05, + "step": 27268, + "training_step_time": 0.10468578338623047 + }, + { + "epoch": 4.160919189453125e-05, + "model_forward_time": 0.025249958038330078, + "step": 27269 + }, + { + "epoch": 4.160919189453125e-05, + "step": 27269, + "training_step_time": 0.10563182830810547 + }, + { + "epoch": 4.16107177734375e-05, + "grad_norm": 0.05373050272464752, + "learning_rate": 2.2469602198441573e-06, + "loss": 0.0017, + "step": 27270 + }, + { + "epoch": 4.16107177734375e-05, + "model_forward_time": 0.025408506393432617, + "step": 27270 + }, + { + "epoch": 4.16107177734375e-05, + "step": 27270, + "training_step_time": 0.10768294334411621 + }, + { + "epoch": 4.161224365234375e-05, + "model_forward_time": 0.02541208267211914, + "step": 27271 + }, + { + "epoch": 4.161224365234375e-05, + "step": 27271, + "training_step_time": 0.10850858688354492 + }, + { + "epoch": 4.161376953125e-05, + "model_forward_time": 0.025004863739013672, + "step": 27272 + }, + { + "epoch": 4.161376953125e-05, + "step": 27272, + "training_step_time": 0.1045832633972168 + }, + { + "epoch": 4.161529541015625e-05, + "model_forward_time": 0.02532649040222168, + "step": 27273 + }, + { + "epoch": 4.161529541015625e-05, + "step": 27273, + "training_step_time": 0.13447165489196777 + }, + { + "epoch": 4.16168212890625e-05, + "model_forward_time": 0.02423405647277832, + "step": 27274 + }, + { + "epoch": 4.16168212890625e-05, + "step": 27274, + "training_step_time": 0.17017817497253418 + }, + { + "epoch": 4.161834716796875e-05, + "model_forward_time": 0.023329734802246094, + "step": 27275 + }, + { + "epoch": 4.161834716796875e-05, + "step": 27275, + "training_step_time": 0.16400837898254395 + }, + { + "epoch": 4.1619873046875e-05, + "model_forward_time": 0.02439594268798828, + "step": 27276 + }, + { + "epoch": 4.1619873046875e-05, + "step": 27276, + "training_step_time": 0.1668076515197754 + }, + { + "epoch": 4.162139892578125e-05, + "model_forward_time": 0.024407148361206055, + "step": 27277 + }, + { + "epoch": 4.162139892578125e-05, + "step": 27277, + "training_step_time": 0.16244220733642578 + }, + { + "epoch": 4.16229248046875e-05, + "model_forward_time": 0.024383544921875, + "step": 27278 + }, + { + "epoch": 4.16229248046875e-05, + "step": 27278, + "training_step_time": 0.16250157356262207 + }, + { + "epoch": 4.162445068359375e-05, + "model_forward_time": 0.023990869522094727, + "step": 27279 + }, + { + "epoch": 4.162445068359375e-05, + "step": 27279, + "training_step_time": 0.1788322925567627 + }, + { + "epoch": 4.16259765625e-05, + "grad_norm": 0.13063377141952515, + "learning_rate": 2.230652403480127e-06, + "loss": 0.0037, + "step": 27280 + }, + { + "epoch": 4.16259765625e-05, + "model_forward_time": 0.024212121963500977, + "step": 27280 + }, + { + "epoch": 4.16259765625e-05, + "step": 27280, + "training_step_time": 0.1295943260192871 + }, + { + "epoch": 4.162750244140625e-05, + "model_forward_time": 0.02401900291442871, + "step": 27281 + }, + { + "epoch": 4.162750244140625e-05, + "step": 27281, + "training_step_time": 0.1951618194580078 + }, + { + "epoch": 4.16290283203125e-05, + "model_forward_time": 0.024231433868408203, + "step": 27282 + }, + { + "epoch": 4.16290283203125e-05, + "step": 27282, + "training_step_time": 0.11963868141174316 + }, + { + "epoch": 4.163055419921875e-05, + "model_forward_time": 0.024352312088012695, + "step": 27283 + }, + { + "epoch": 4.163055419921875e-05, + "step": 27283, + "training_step_time": 0.116668701171875 + }, + { + "epoch": 4.1632080078125e-05, + "model_forward_time": 0.02626776695251465, + "step": 27284 + }, + { + "epoch": 4.1632080078125e-05, + "step": 27284, + "training_step_time": 0.11092948913574219 + }, + { + "epoch": 4.163360595703125e-05, + "model_forward_time": 0.02512645721435547, + "step": 27285 + }, + { + "epoch": 4.163360595703125e-05, + "step": 27285, + "training_step_time": 0.10993194580078125 + }, + { + "epoch": 4.16351318359375e-05, + "model_forward_time": 0.025506973266601562, + "step": 27286 + }, + { + "epoch": 4.16351318359375e-05, + "step": 27286, + "training_step_time": 0.11314678192138672 + }, + { + "epoch": 4.163665771484375e-05, + "model_forward_time": 0.02520132064819336, + "step": 27287 + }, + { + "epoch": 4.163665771484375e-05, + "step": 27287, + "training_step_time": 0.10960865020751953 + }, + { + "epoch": 4.163818359375e-05, + "model_forward_time": 0.024880170822143555, + "step": 27288 + }, + { + "epoch": 4.163818359375e-05, + "step": 27288, + "training_step_time": 0.11181998252868652 + }, + { + "epoch": 4.163970947265625e-05, + "model_forward_time": 0.02557969093322754, + "step": 27289 + }, + { + "epoch": 4.163970947265625e-05, + "step": 27289, + "training_step_time": 0.10891604423522949 + }, + { + "epoch": 4.16412353515625e-05, + "grad_norm": 0.12735968828201294, + "learning_rate": 2.214402631377782e-06, + "loss": 0.0024, + "step": 27290 + }, + { + "epoch": 4.16412353515625e-05, + "model_forward_time": 0.028454065322875977, + "step": 27290 + }, + { + "epoch": 4.16412353515625e-05, + "step": 27290, + "training_step_time": 0.11230134963989258 + }, + { + "epoch": 4.164276123046875e-05, + "model_forward_time": 0.025388240814208984, + "step": 27291 + }, + { + "epoch": 4.164276123046875e-05, + "step": 27291, + "training_step_time": 0.10927891731262207 + }, + { + "epoch": 4.1644287109375e-05, + "model_forward_time": 0.026097774505615234, + "step": 27292 + }, + { + "epoch": 4.1644287109375e-05, + "step": 27292, + "training_step_time": 0.11127090454101562 + }, + { + "epoch": 4.164581298828125e-05, + "model_forward_time": 0.025423765182495117, + "step": 27293 + }, + { + "epoch": 4.164581298828125e-05, + "step": 27293, + "training_step_time": 0.11099672317504883 + }, + { + "epoch": 4.16473388671875e-05, + "model_forward_time": 0.025485515594482422, + "step": 27294 + }, + { + "epoch": 4.16473388671875e-05, + "step": 27294, + "training_step_time": 0.1084287166595459 + }, + { + "epoch": 4.164886474609375e-05, + "model_forward_time": 0.025081634521484375, + "step": 27295 + }, + { + "epoch": 4.164886474609375e-05, + "step": 27295, + "training_step_time": 0.10566353797912598 + }, + { + "epoch": 4.1650390625e-05, + "model_forward_time": 0.025013208389282227, + "step": 27296 + }, + { + "epoch": 4.1650390625e-05, + "step": 27296, + "training_step_time": 0.10587263107299805 + }, + { + "epoch": 4.165191650390625e-05, + "model_forward_time": 0.025463581085205078, + "step": 27297 + }, + { + "epoch": 4.165191650390625e-05, + "step": 27297, + "training_step_time": 0.10672402381896973 + }, + { + "epoch": 4.16534423828125e-05, + "model_forward_time": 0.02516484260559082, + "step": 27298 + }, + { + "epoch": 4.16534423828125e-05, + "step": 27298, + "training_step_time": 0.10550856590270996 + }, + { + "epoch": 4.165496826171875e-05, + "model_forward_time": 0.025278329849243164, + "step": 27299 + }, + { + "epoch": 4.165496826171875e-05, + "step": 27299, + "training_step_time": 0.10809063911437988 + }, + { + "epoch": 4.1656494140625e-05, + "grad_norm": 0.05588368698954582, + "learning_rate": 2.1982109232821178e-06, + "loss": 0.0022, + "step": 27300 + }, + { + "epoch": 4.1656494140625e-05, + "model_forward_time": 0.025710344314575195, + "step": 27300 + }, + { + "epoch": 4.1656494140625e-05, + "step": 27300, + "training_step_time": 0.10536336898803711 + }, + { + "epoch": 4.165802001953125e-05, + "model_forward_time": 0.027010679244995117, + "step": 27301 + }, + { + "epoch": 4.165802001953125e-05, + "step": 27301, + "training_step_time": 0.10754013061523438 + }, + { + "epoch": 4.16595458984375e-05, + "model_forward_time": 0.02555370330810547, + "step": 27302 + }, + { + "epoch": 4.16595458984375e-05, + "step": 27302, + "training_step_time": 0.10669493675231934 + }, + { + "epoch": 4.166107177734375e-05, + "model_forward_time": 0.025969266891479492, + "step": 27303 + }, + { + "epoch": 4.166107177734375e-05, + "step": 27303, + "training_step_time": 0.10797119140625 + }, + { + "epoch": 4.166259765625e-05, + "model_forward_time": 0.02422642707824707, + "step": 27304 + }, + { + "epoch": 4.166259765625e-05, + "step": 27304, + "training_step_time": 0.14526081085205078 + }, + { + "epoch": 4.166412353515625e-05, + "model_forward_time": 0.024912118911743164, + "step": 27305 + }, + { + "epoch": 4.166412353515625e-05, + "step": 27305, + "training_step_time": 0.16699457168579102 + }, + { + "epoch": 4.16656494140625e-05, + "model_forward_time": 0.02438950538635254, + "step": 27306 + }, + { + "epoch": 4.16656494140625e-05, + "step": 27306, + "training_step_time": 0.20656299591064453 + }, + { + "epoch": 4.166717529296875e-05, + "model_forward_time": 0.024440526962280273, + "step": 27307 + }, + { + "epoch": 4.166717529296875e-05, + "step": 27307, + "training_step_time": 0.15294170379638672 + }, + { + "epoch": 4.1668701171875e-05, + "model_forward_time": 0.024743080139160156, + "step": 27308 + }, + { + "epoch": 4.1668701171875e-05, + "step": 27308, + "training_step_time": 0.18179035186767578 + }, + { + "epoch": 4.167022705078125e-05, + "model_forward_time": 0.024389982223510742, + "step": 27309 + }, + { + "epoch": 4.167022705078125e-05, + "step": 27309, + "training_step_time": 0.1519324779510498 + }, + { + "epoch": 4.16717529296875e-05, + "grad_norm": 0.16500595211982727, + "learning_rate": 2.1820772988676076e-06, + "loss": 0.0054, + "step": 27310 + }, + { + "epoch": 4.16717529296875e-05, + "model_forward_time": 0.024807214736938477, + "step": 27310 + }, + { + "epoch": 4.16717529296875e-05, + "step": 27310, + "training_step_time": 0.1231386661529541 + }, + { + "epoch": 4.167327880859375e-05, + "model_forward_time": 0.027429580688476562, + "step": 27311 + }, + { + "epoch": 4.167327880859375e-05, + "step": 27311, + "training_step_time": 0.10866975784301758 + }, + { + "epoch": 4.16748046875e-05, + "model_forward_time": 0.025394201278686523, + "step": 27312 + }, + { + "epoch": 4.16748046875e-05, + "step": 27312, + "training_step_time": 0.10531187057495117 + }, + { + "epoch": 4.167633056640625e-05, + "model_forward_time": 0.025202035903930664, + "step": 27313 + }, + { + "epoch": 4.167633056640625e-05, + "step": 27313, + "training_step_time": 0.10666823387145996 + }, + { + "epoch": 4.16778564453125e-05, + "model_forward_time": 0.02541041374206543, + "step": 27314 + }, + { + "epoch": 4.16778564453125e-05, + "step": 27314, + "training_step_time": 0.10874700546264648 + }, + { + "epoch": 4.167938232421875e-05, + "model_forward_time": 0.025158405303955078, + "step": 27315 + }, + { + "epoch": 4.167938232421875e-05, + "step": 27315, + "training_step_time": 0.10769200325012207 + }, + { + "epoch": 4.1680908203125e-05, + "model_forward_time": 0.026424407958984375, + "step": 27316 + }, + { + "epoch": 4.1680908203125e-05, + "step": 27316, + "training_step_time": 0.10939788818359375 + }, + { + "epoch": 4.168243408203125e-05, + "model_forward_time": 0.024980545043945312, + "step": 27317 + }, + { + "epoch": 4.168243408203125e-05, + "step": 27317, + "training_step_time": 0.10655760765075684 + }, + { + "epoch": 4.16839599609375e-05, + "model_forward_time": 0.02508687973022461, + "step": 27318 + }, + { + "epoch": 4.16839599609375e-05, + "step": 27318, + "training_step_time": 0.10556173324584961 + }, + { + "epoch": 4.168548583984375e-05, + "model_forward_time": 0.025537490844726562, + "step": 27319 + }, + { + "epoch": 4.168548583984375e-05, + "step": 27319, + "training_step_time": 0.10593318939208984 + }, + { + "epoch": 4.168701171875e-05, + "grad_norm": 0.06887489557266235, + "learning_rate": 2.1660017777381135e-06, + "loss": 0.0041, + "step": 27320 + }, + { + "epoch": 4.168701171875e-05, + "model_forward_time": 0.025054454803466797, + "step": 27320 + }, + { + "epoch": 4.168701171875e-05, + "step": 27320, + "training_step_time": 0.10872149467468262 + }, + { + "epoch": 4.168853759765625e-05, + "model_forward_time": 0.024283409118652344, + "step": 27321 + }, + { + "epoch": 4.168853759765625e-05, + "step": 27321, + "training_step_time": 0.10677909851074219 + }, + { + "epoch": 4.16900634765625e-05, + "model_forward_time": 0.02422189712524414, + "step": 27322 + }, + { + "epoch": 4.16900634765625e-05, + "step": 27322, + "training_step_time": 0.10958290100097656 + }, + { + "epoch": 4.169158935546875e-05, + "model_forward_time": 0.02456951141357422, + "step": 27323 + }, + { + "epoch": 4.169158935546875e-05, + "step": 27323, + "training_step_time": 0.16224956512451172 + }, + { + "epoch": 4.1693115234375e-05, + "model_forward_time": 0.024704456329345703, + "step": 27324 + }, + { + "epoch": 4.1693115234375e-05, + "step": 27324, + "training_step_time": 0.15429282188415527 + }, + { + "epoch": 4.169464111328125e-05, + "model_forward_time": 0.024566173553466797, + "step": 27325 + }, + { + "epoch": 4.169464111328125e-05, + "step": 27325, + "training_step_time": 0.11595964431762695 + }, + { + "epoch": 4.16961669921875e-05, + "model_forward_time": 0.024733304977416992, + "step": 27326 + }, + { + "epoch": 4.16961669921875e-05, + "step": 27326, + "training_step_time": 0.1186983585357666 + }, + { + "epoch": 4.169769287109375e-05, + "model_forward_time": 0.025291919708251953, + "step": 27327 + }, + { + "epoch": 4.169769287109375e-05, + "step": 27327, + "training_step_time": 0.11393404006958008 + }, + { + "epoch": 4.169921875e-05, + "model_forward_time": 0.02526998519897461, + "step": 27328 + }, + { + "epoch": 4.169921875e-05, + "step": 27328, + "training_step_time": 0.11274313926696777 + }, + { + "epoch": 4.170074462890625e-05, + "model_forward_time": 0.024403095245361328, + "step": 27329 + }, + { + "epoch": 4.170074462890625e-05, + "step": 27329, + "training_step_time": 0.18658804893493652 + }, + { + "epoch": 4.17022705078125e-05, + "grad_norm": 0.07587850093841553, + "learning_rate": 2.149984379426906e-06, + "loss": 0.0019, + "step": 27330 + }, + { + "epoch": 4.17022705078125e-05, + "model_forward_time": 0.024632692337036133, + "step": 27330 + }, + { + "epoch": 4.17022705078125e-05, + "step": 27330, + "training_step_time": 0.10962820053100586 + }, + { + "epoch": 4.170379638671875e-05, + "model_forward_time": 0.024538278579711914, + "step": 27331 + }, + { + "epoch": 4.170379638671875e-05, + "step": 27331, + "training_step_time": 0.11240458488464355 + }, + { + "epoch": 4.1705322265625e-05, + "model_forward_time": 0.025141000747680664, + "step": 27332 + }, + { + "epoch": 4.1705322265625e-05, + "step": 27332, + "training_step_time": 0.10818266868591309 + }, + { + "epoch": 4.170684814453125e-05, + "model_forward_time": 0.025443077087402344, + "step": 27333 + }, + { + "epoch": 4.170684814453125e-05, + "step": 27333, + "training_step_time": 0.10554313659667969 + }, + { + "epoch": 4.17083740234375e-05, + "model_forward_time": 0.025351524353027344, + "step": 27334 + }, + { + "epoch": 4.17083740234375e-05, + "step": 27334, + "training_step_time": 0.10863184928894043 + }, + { + "epoch": 4.170989990234375e-05, + "model_forward_time": 0.02562403678894043, + "step": 27335 + }, + { + "epoch": 4.170989990234375e-05, + "step": 27335, + "training_step_time": 0.11043453216552734 + }, + { + "epoch": 4.171142578125e-05, + "model_forward_time": 0.025097131729125977, + "step": 27336 + }, + { + "epoch": 4.171142578125e-05, + "step": 27336, + "training_step_time": 0.10755085945129395 + }, + { + "epoch": 4.171295166015625e-05, + "model_forward_time": 0.025231361389160156, + "step": 27337 + }, + { + "epoch": 4.171295166015625e-05, + "step": 27337, + "training_step_time": 0.11263108253479004 + }, + { + "epoch": 4.17144775390625e-05, + "model_forward_time": 0.025311708450317383, + "step": 27338 + }, + { + "epoch": 4.17144775390625e-05, + "step": 27338, + "training_step_time": 0.10808801651000977 + }, + { + "epoch": 4.171600341796875e-05, + "model_forward_time": 0.0250396728515625, + "step": 27339 + }, + { + "epoch": 4.171600341796875e-05, + "step": 27339, + "training_step_time": 0.1073613166809082 + }, + { + "epoch": 4.1717529296875e-05, + "grad_norm": 0.05957550182938576, + "learning_rate": 2.134025123396638e-06, + "loss": 0.0037, + "step": 27340 + }, + { + "epoch": 4.1717529296875e-05, + "model_forward_time": 0.025179147720336914, + "step": 27340 + }, + { + "epoch": 4.1717529296875e-05, + "step": 27340, + "training_step_time": 0.10913562774658203 + }, + { + "epoch": 4.171905517578125e-05, + "model_forward_time": 0.02502608299255371, + "step": 27341 + }, + { + "epoch": 4.171905517578125e-05, + "step": 27341, + "training_step_time": 0.10604643821716309 + }, + { + "epoch": 4.17205810546875e-05, + "model_forward_time": 0.025432348251342773, + "step": 27342 + }, + { + "epoch": 4.17205810546875e-05, + "step": 27342, + "training_step_time": 0.1089167594909668 + }, + { + "epoch": 4.172210693359375e-05, + "model_forward_time": 0.02519512176513672, + "step": 27343 + }, + { + "epoch": 4.172210693359375e-05, + "step": 27343, + "training_step_time": 0.10561847686767578 + }, + { + "epoch": 4.17236328125e-05, + "model_forward_time": 0.025223731994628906, + "step": 27344 + }, + { + "epoch": 4.17236328125e-05, + "step": 27344, + "training_step_time": 0.10542917251586914 + }, + { + "epoch": 4.172515869140625e-05, + "model_forward_time": 0.025231599807739258, + "step": 27345 + }, + { + "epoch": 4.172515869140625e-05, + "step": 27345, + "training_step_time": 0.10626411437988281 + }, + { + "epoch": 4.17266845703125e-05, + "model_forward_time": 0.025161266326904297, + "step": 27346 + }, + { + "epoch": 4.17266845703125e-05, + "step": 27346, + "training_step_time": 0.10964298248291016 + }, + { + "epoch": 4.172821044921875e-05, + "model_forward_time": 0.025211572647094727, + "step": 27347 + }, + { + "epoch": 4.172821044921875e-05, + "step": 27347, + "training_step_time": 0.1046903133392334 + }, + { + "epoch": 4.1729736328125e-05, + "model_forward_time": 0.02510547637939453, + "step": 27348 + }, + { + "epoch": 4.1729736328125e-05, + "step": 27348, + "training_step_time": 0.10929656028747559 + }, + { + "epoch": 4.173126220703125e-05, + "model_forward_time": 0.025201797485351562, + "step": 27349 + }, + { + "epoch": 4.173126220703125e-05, + "step": 27349, + "training_step_time": 0.10480785369873047 + }, + { + "epoch": 4.17327880859375e-05, + "grad_norm": 0.05198904499411583, + "learning_rate": 2.118124029039309e-06, + "loss": 0.0075, + "step": 27350 + }, + { + "epoch": 4.17327880859375e-05, + "model_forward_time": 0.025602340698242188, + "step": 27350 + }, + { + "epoch": 4.17327880859375e-05, + "step": 27350, + "training_step_time": 0.15978789329528809 + }, + { + "epoch": 4.173431396484375e-05, + "model_forward_time": 0.024624347686767578, + "step": 27351 + }, + { + "epoch": 4.173431396484375e-05, + "step": 27351, + "training_step_time": 0.16864562034606934 + }, + { + "epoch": 4.173583984375e-05, + "model_forward_time": 0.024357080459594727, + "step": 27352 + }, + { + "epoch": 4.173583984375e-05, + "step": 27352, + "training_step_time": 0.18687701225280762 + }, + { + "epoch": 4.173736572265625e-05, + "model_forward_time": 0.024384737014770508, + "step": 27353 + }, + { + "epoch": 4.173736572265625e-05, + "step": 27353, + "training_step_time": 0.16687655448913574 + }, + { + "epoch": 4.17388916015625e-05, + "model_forward_time": 0.024014949798583984, + "step": 27354 + }, + { + "epoch": 4.17388916015625e-05, + "step": 27354, + "training_step_time": 0.19188261032104492 + }, + { + "epoch": 4.174041748046875e-05, + "model_forward_time": 0.024992942810058594, + "step": 27355 + }, + { + "epoch": 4.174041748046875e-05, + "step": 27355, + "training_step_time": 0.13675403594970703 + }, + { + "epoch": 4.1741943359375e-05, + "model_forward_time": 0.024609088897705078, + "step": 27356 + }, + { + "epoch": 4.1741943359375e-05, + "step": 27356, + "training_step_time": 0.21551728248596191 + }, + { + "epoch": 4.174346923828125e-05, + "model_forward_time": 0.024300813674926758, + "step": 27357 + }, + { + "epoch": 4.174346923828125e-05, + "step": 27357, + "training_step_time": 0.11113977432250977 + }, + { + "epoch": 4.17449951171875e-05, + "model_forward_time": 0.023906230926513672, + "step": 27358 + }, + { + "epoch": 4.17449951171875e-05, + "step": 27358, + "training_step_time": 0.10847210884094238 + }, + { + "epoch": 4.174652099609375e-05, + "model_forward_time": 0.025197505950927734, + "step": 27359 + }, + { + "epoch": 4.174652099609375e-05, + "step": 27359, + "training_step_time": 0.10991692543029785 + }, + { + "epoch": 4.1748046875e-05, + "grad_norm": 0.07137750834226608, + "learning_rate": 2.102281115676258e-06, + "loss": 0.0069, + "step": 27360 + }, + { + "epoch": 4.1748046875e-05, + "model_forward_time": 0.024916648864746094, + "step": 27360 + }, + { + "epoch": 4.1748046875e-05, + "step": 27360, + "training_step_time": 0.10718750953674316 + }, + { + "epoch": 4.174957275390625e-05, + "model_forward_time": 0.025011539459228516, + "step": 27361 + }, + { + "epoch": 4.174957275390625e-05, + "step": 27361, + "training_step_time": 0.11072492599487305 + }, + { + "epoch": 4.17510986328125e-05, + "model_forward_time": 0.025212764739990234, + "step": 27362 + }, + { + "epoch": 4.17510986328125e-05, + "step": 27362, + "training_step_time": 0.10950970649719238 + }, + { + "epoch": 4.175262451171875e-05, + "model_forward_time": 0.02538776397705078, + "step": 27363 + }, + { + "epoch": 4.175262451171875e-05, + "step": 27363, + "training_step_time": 0.10811376571655273 + }, + { + "epoch": 4.1754150390625e-05, + "model_forward_time": 0.025346994400024414, + "step": 27364 + }, + { + "epoch": 4.1754150390625e-05, + "step": 27364, + "training_step_time": 0.1065821647644043 + }, + { + "epoch": 4.175567626953125e-05, + "model_forward_time": 0.02498650550842285, + "step": 27365 + }, + { + "epoch": 4.175567626953125e-05, + "step": 27365, + "training_step_time": 0.10657715797424316 + }, + { + "epoch": 4.17572021484375e-05, + "model_forward_time": 0.025121688842773438, + "step": 27366 + }, + { + "epoch": 4.17572021484375e-05, + "step": 27366, + "training_step_time": 0.10641741752624512 + }, + { + "epoch": 4.175872802734375e-05, + "model_forward_time": 0.025173425674438477, + "step": 27367 + }, + { + "epoch": 4.175872802734375e-05, + "step": 27367, + "training_step_time": 0.10604643821716309 + }, + { + "epoch": 4.176025390625e-05, + "model_forward_time": 0.025542497634887695, + "step": 27368 + }, + { + "epoch": 4.176025390625e-05, + "step": 27368, + "training_step_time": 0.10823392868041992 + }, + { + "epoch": 4.176177978515625e-05, + "model_forward_time": 0.02549266815185547, + "step": 27369 + }, + { + "epoch": 4.176177978515625e-05, + "step": 27369, + "training_step_time": 0.1940605640411377 + }, + { + "epoch": 4.17633056640625e-05, + "grad_norm": 0.05839123576879501, + "learning_rate": 2.0864964025581135e-06, + "loss": 0.0041, + "step": 27370 + }, + { + "epoch": 4.17633056640625e-05, + "model_forward_time": 0.02423882484436035, + "step": 27370 + }, + { + "epoch": 4.17633056640625e-05, + "step": 27370, + "training_step_time": 0.1367037296295166 + }, + { + "epoch": 4.176483154296875e-05, + "model_forward_time": 0.02413344383239746, + "step": 27371 + }, + { + "epoch": 4.176483154296875e-05, + "step": 27371, + "training_step_time": 0.10798144340515137 + }, + { + "epoch": 4.1766357421875e-05, + "model_forward_time": 0.025147676467895508, + "step": 27372 + }, + { + "epoch": 4.1766357421875e-05, + "step": 27372, + "training_step_time": 0.12361860275268555 + }, + { + "epoch": 4.176788330078125e-05, + "model_forward_time": 0.02529311180114746, + "step": 27373 + }, + { + "epoch": 4.176788330078125e-05, + "step": 27373, + "training_step_time": 0.1168520450592041 + }, + { + "epoch": 4.17694091796875e-05, + "model_forward_time": 0.02512192726135254, + "step": 27374 + }, + { + "epoch": 4.17694091796875e-05, + "step": 27374, + "training_step_time": 0.10658550262451172 + }, + { + "epoch": 4.177093505859375e-05, + "model_forward_time": 0.02530956268310547, + "step": 27375 + }, + { + "epoch": 4.177093505859375e-05, + "step": 27375, + "training_step_time": 0.18764686584472656 + }, + { + "epoch": 4.17724609375e-05, + "model_forward_time": 0.02440500259399414, + "step": 27376 + }, + { + "epoch": 4.17724609375e-05, + "step": 27376, + "training_step_time": 0.10719728469848633 + }, + { + "epoch": 4.177398681640625e-05, + "model_forward_time": 0.024349451065063477, + "step": 27377 + }, + { + "epoch": 4.177398681640625e-05, + "step": 27377, + "training_step_time": 0.10791373252868652 + }, + { + "epoch": 4.17755126953125e-05, + "model_forward_time": 0.025031566619873047, + "step": 27378 + }, + { + "epoch": 4.17755126953125e-05, + "step": 27378, + "training_step_time": 0.12014293670654297 + }, + { + "epoch": 4.177703857421875e-05, + "model_forward_time": 0.02491593360900879, + "step": 27379 + }, + { + "epoch": 4.177703857421875e-05, + "step": 27379, + "training_step_time": 0.10888314247131348 + }, + { + "epoch": 4.1778564453125e-05, + "grad_norm": 0.06713982671499252, + "learning_rate": 2.0707699088647836e-06, + "loss": 0.004, + "step": 27380 + }, + { + "epoch": 4.1778564453125e-05, + "model_forward_time": 0.025205135345458984, + "step": 27380 + }, + { + "epoch": 4.1778564453125e-05, + "step": 27380, + "training_step_time": 0.10607433319091797 + }, + { + "epoch": 4.178009033203125e-05, + "model_forward_time": 0.025559425354003906, + "step": 27381 + }, + { + "epoch": 4.178009033203125e-05, + "step": 27381, + "training_step_time": 0.10652899742126465 + }, + { + "epoch": 4.17816162109375e-05, + "model_forward_time": 0.02518486976623535, + "step": 27382 + }, + { + "epoch": 4.17816162109375e-05, + "step": 27382, + "training_step_time": 0.10806679725646973 + }, + { + "epoch": 4.178314208984375e-05, + "model_forward_time": 0.025487422943115234, + "step": 27383 + }, + { + "epoch": 4.178314208984375e-05, + "step": 27383, + "training_step_time": 0.10729289054870605 + }, + { + "epoch": 4.178466796875e-05, + "model_forward_time": 0.025025129318237305, + "step": 27384 + }, + { + "epoch": 4.178466796875e-05, + "step": 27384, + "training_step_time": 0.10611486434936523 + }, + { + "epoch": 4.178619384765625e-05, + "model_forward_time": 0.025339365005493164, + "step": 27385 + }, + { + "epoch": 4.178619384765625e-05, + "step": 27385, + "training_step_time": 0.10610365867614746 + }, + { + "epoch": 4.17877197265625e-05, + "model_forward_time": 0.02490401268005371, + "step": 27386 + }, + { + "epoch": 4.17877197265625e-05, + "step": 27386, + "training_step_time": 0.10580229759216309 + }, + { + "epoch": 4.178924560546875e-05, + "model_forward_time": 0.02511429786682129, + "step": 27387 + }, + { + "epoch": 4.178924560546875e-05, + "step": 27387, + "training_step_time": 0.1052405834197998 + }, + { + "epoch": 4.1790771484375e-05, + "model_forward_time": 0.02518939971923828, + "step": 27388 + }, + { + "epoch": 4.1790771484375e-05, + "step": 27388, + "training_step_time": 0.11149716377258301 + }, + { + "epoch": 4.179229736328125e-05, + "model_forward_time": 0.02562713623046875, + "step": 27389 + }, + { + "epoch": 4.179229736328125e-05, + "step": 27389, + "training_step_time": 0.1062936782836914 + }, + { + "epoch": 4.17938232421875e-05, + "grad_norm": 0.09553220868110657, + "learning_rate": 2.0551016537054493e-06, + "loss": 0.0036, + "step": 27390 + }, + { + "epoch": 4.17938232421875e-05, + "model_forward_time": 0.025326013565063477, + "step": 27390 + }, + { + "epoch": 4.17938232421875e-05, + "step": 27390, + "training_step_time": 0.10575199127197266 + }, + { + "epoch": 4.179534912109375e-05, + "model_forward_time": 0.025343656539916992, + "step": 27391 + }, + { + "epoch": 4.179534912109375e-05, + "step": 27391, + "training_step_time": 0.10982632637023926 + }, + { + "epoch": 4.1796875e-05, + "model_forward_time": 0.02521800994873047, + "step": 27392 + }, + { + "epoch": 4.1796875e-05, + "step": 27392, + "training_step_time": 0.109375 + }, + { + "epoch": 4.179840087890625e-05, + "model_forward_time": 0.025253772735595703, + "step": 27393 + }, + { + "epoch": 4.179840087890625e-05, + "step": 27393, + "training_step_time": 0.10704207420349121 + }, + { + "epoch": 4.17999267578125e-05, + "model_forward_time": 0.025048255920410156, + "step": 27394 + }, + { + "epoch": 4.17999267578125e-05, + "step": 27394, + "training_step_time": 0.1058192253112793 + }, + { + "epoch": 4.180145263671875e-05, + "model_forward_time": 0.025005578994750977, + "step": 27395 + }, + { + "epoch": 4.180145263671875e-05, + "step": 27395, + "training_step_time": 0.10320258140563965 + }, + { + "epoch": 4.1802978515625e-05, + "model_forward_time": 0.024826526641845703, + "step": 27396 + }, + { + "epoch": 4.1802978515625e-05, + "step": 27396, + "training_step_time": 0.16946101188659668 + }, + { + "epoch": 4.180450439453125e-05, + "model_forward_time": 0.024760723114013672, + "step": 27397 + }, + { + "epoch": 4.180450439453125e-05, + "step": 27397, + "training_step_time": 0.18193459510803223 + }, + { + "epoch": 4.18060302734375e-05, + "model_forward_time": 0.02702498435974121, + "step": 27398 + }, + { + "epoch": 4.18060302734375e-05, + "step": 27398, + "training_step_time": 0.19617080688476562 + }, + { + "epoch": 4.180755615234375e-05, + "model_forward_time": 0.025606870651245117, + "step": 27399 + }, + { + "epoch": 4.180755615234375e-05, + "step": 27399, + "training_step_time": 0.17314505577087402 + }, + { + "epoch": 4.180908203125e-05, + "grad_norm": 0.05021870881319046, + "learning_rate": 2.0394916561185083e-06, + "loss": 0.0022, + "step": 27400 + }, + { + "epoch": 4.180908203125e-05, + "model_forward_time": 0.025249719619750977, + "step": 27400 + }, + { + "epoch": 4.180908203125e-05, + "step": 27400, + "training_step_time": 0.17282986640930176 + }, + { + "epoch": 4.181060791015625e-05, + "model_forward_time": 0.024559736251831055, + "step": 27401 + }, + { + "epoch": 4.181060791015625e-05, + "step": 27401, + "training_step_time": 0.11244368553161621 + }, + { + "epoch": 4.18121337890625e-05, + "model_forward_time": 0.024523019790649414, + "step": 27402 + }, + { + "epoch": 4.18121337890625e-05, + "step": 27402, + "training_step_time": 0.10839724540710449 + }, + { + "epoch": 4.181365966796875e-05, + "model_forward_time": 0.025531768798828125, + "step": 27403 + }, + { + "epoch": 4.181365966796875e-05, + "step": 27403, + "training_step_time": 0.11739587783813477 + }, + { + "epoch": 4.1815185546875e-05, + "model_forward_time": 0.025006532669067383, + "step": 27404 + }, + { + "epoch": 4.1815185546875e-05, + "step": 27404, + "training_step_time": 0.11202168464660645 + }, + { + "epoch": 4.181671142578125e-05, + "model_forward_time": 0.02541518211364746, + "step": 27405 + }, + { + "epoch": 4.181671142578125e-05, + "step": 27405, + "training_step_time": 0.10808777809143066 + }, + { + "epoch": 4.18182373046875e-05, + "model_forward_time": 0.024933338165283203, + "step": 27406 + }, + { + "epoch": 4.18182373046875e-05, + "step": 27406, + "training_step_time": 0.11101841926574707 + }, + { + "epoch": 4.181976318359375e-05, + "model_forward_time": 0.025141000747680664, + "step": 27407 + }, + { + "epoch": 4.181976318359375e-05, + "step": 27407, + "training_step_time": 0.10969662666320801 + }, + { + "epoch": 4.18212890625e-05, + "model_forward_time": 0.025233745574951172, + "step": 27408 + }, + { + "epoch": 4.18212890625e-05, + "step": 27408, + "training_step_time": 0.10629940032958984 + }, + { + "epoch": 4.182281494140625e-05, + "model_forward_time": 0.02537059783935547, + "step": 27409 + }, + { + "epoch": 4.182281494140625e-05, + "step": 27409, + "training_step_time": 0.10664510726928711 + }, + { + "epoch": 4.18243408203125e-05, + "grad_norm": 0.03802133724093437, + "learning_rate": 2.0239399350715895e-06, + "loss": 0.0065, + "step": 27410 + }, + { + "epoch": 4.18243408203125e-05, + "model_forward_time": 0.025231361389160156, + "step": 27410 + }, + { + "epoch": 4.18243408203125e-05, + "step": 27410, + "training_step_time": 0.10942363739013672 + }, + { + "epoch": 4.182586669921875e-05, + "model_forward_time": 0.023288249969482422, + "step": 27411 + }, + { + "epoch": 4.182586669921875e-05, + "step": 27411, + "training_step_time": 0.1096792221069336 + }, + { + "epoch": 4.1827392578125e-05, + "model_forward_time": 0.024753093719482422, + "step": 27412 + }, + { + "epoch": 4.1827392578125e-05, + "step": 27412, + "training_step_time": 0.11046385765075684 + }, + { + "epoch": 4.182891845703125e-05, + "model_forward_time": 0.025518417358398438, + "step": 27413 + }, + { + "epoch": 4.182891845703125e-05, + "step": 27413, + "training_step_time": 0.10704827308654785 + }, + { + "epoch": 4.18304443359375e-05, + "model_forward_time": 0.02505803108215332, + "step": 27414 + }, + { + "epoch": 4.18304443359375e-05, + "step": 27414, + "training_step_time": 0.1084287166595459 + }, + { + "epoch": 4.183197021484375e-05, + "model_forward_time": 0.025177955627441406, + "step": 27415 + }, + { + "epoch": 4.183197021484375e-05, + "step": 27415, + "training_step_time": 0.1071314811706543 + }, + { + "epoch": 4.183349609375e-05, + "model_forward_time": 0.02544236183166504, + "step": 27416 + }, + { + "epoch": 4.183349609375e-05, + "step": 27416, + "training_step_time": 0.10797572135925293 + }, + { + "epoch": 4.183502197265625e-05, + "model_forward_time": 0.025148391723632812, + "step": 27417 + }, + { + "epoch": 4.183502197265625e-05, + "step": 27417, + "training_step_time": 0.13752055168151855 + }, + { + "epoch": 4.18365478515625e-05, + "model_forward_time": 0.025104999542236328, + "step": 27418 + }, + { + "epoch": 4.18365478515625e-05, + "step": 27418, + "training_step_time": 0.10954475402832031 + }, + { + "epoch": 4.183807373046875e-05, + "model_forward_time": 0.02528071403503418, + "step": 27419 + }, + { + "epoch": 4.183807373046875e-05, + "step": 27419, + "training_step_time": 0.10897469520568848 + }, + { + "epoch": 4.1839599609375e-05, + "grad_norm": 0.07062240689992905, + "learning_rate": 2.008446509461498e-06, + "loss": 0.0035, + "step": 27420 + }, + { + "epoch": 4.1839599609375e-05, + "model_forward_time": 0.02506232261657715, + "step": 27420 + }, + { + "epoch": 4.1839599609375e-05, + "step": 27420, + "training_step_time": 0.10736250877380371 + }, + { + "epoch": 4.184112548828125e-05, + "model_forward_time": 0.02533888816833496, + "step": 27421 + }, + { + "epoch": 4.184112548828125e-05, + "step": 27421, + "training_step_time": 0.15326237678527832 + }, + { + "epoch": 4.18426513671875e-05, + "model_forward_time": 0.025276660919189453, + "step": 27422 + }, + { + "epoch": 4.18426513671875e-05, + "step": 27422, + "training_step_time": 0.15224552154541016 + }, + { + "epoch": 4.184417724609375e-05, + "model_forward_time": 0.024387836456298828, + "step": 27423 + }, + { + "epoch": 4.184417724609375e-05, + "step": 27423, + "training_step_time": 0.1059575080871582 + }, + { + "epoch": 4.1845703125e-05, + "model_forward_time": 0.024991989135742188, + "step": 27424 + }, + { + "epoch": 4.1845703125e-05, + "step": 27424, + "training_step_time": 0.10260868072509766 + }, + { + "epoch": 4.184722900390625e-05, + "model_forward_time": 0.025104045867919922, + "step": 27425 + }, + { + "epoch": 4.184722900390625e-05, + "step": 27425, + "training_step_time": 0.10508489608764648 + }, + { + "epoch": 4.18487548828125e-05, + "model_forward_time": 0.025986433029174805, + "step": 27426 + }, + { + "epoch": 4.18487548828125e-05, + "step": 27426, + "training_step_time": 0.10611343383789062 + }, + { + "epoch": 4.185028076171875e-05, + "model_forward_time": 0.024981975555419922, + "step": 27427 + }, + { + "epoch": 4.185028076171875e-05, + "step": 27427, + "training_step_time": 0.11332058906555176 + }, + { + "epoch": 4.1851806640625e-05, + "model_forward_time": 0.025382518768310547, + "step": 27428 + }, + { + "epoch": 4.1851806640625e-05, + "step": 27428, + "training_step_time": 0.11293601989746094 + }, + { + "epoch": 4.185333251953125e-05, + "model_forward_time": 0.0257418155670166, + "step": 27429 + }, + { + "epoch": 4.185333251953125e-05, + "step": 27429, + "training_step_time": 0.11393260955810547 + }, + { + "epoch": 4.18548583984375e-05, + "grad_norm": 0.16383881866931915, + "learning_rate": 1.9930113981142028e-06, + "loss": 0.0054, + "step": 27430 + }, + { + "epoch": 4.18548583984375e-05, + "model_forward_time": 0.025405168533325195, + "step": 27430 + }, + { + "epoch": 4.18548583984375e-05, + "step": 27430, + "training_step_time": 0.11467337608337402 + }, + { + "epoch": 4.185638427734375e-05, + "model_forward_time": 0.024990558624267578, + "step": 27431 + }, + { + "epoch": 4.185638427734375e-05, + "step": 27431, + "training_step_time": 0.1144256591796875 + }, + { + "epoch": 4.185791015625e-05, + "model_forward_time": 0.0258023738861084, + "step": 27432 + }, + { + "epoch": 4.185791015625e-05, + "step": 27432, + "training_step_time": 0.11575436592102051 + }, + { + "epoch": 4.185943603515625e-05, + "model_forward_time": 0.02470874786376953, + "step": 27433 + }, + { + "epoch": 4.185943603515625e-05, + "step": 27433, + "training_step_time": 0.11305570602416992 + }, + { + "epoch": 4.18609619140625e-05, + "model_forward_time": 0.025424718856811523, + "step": 27434 + }, + { + "epoch": 4.18609619140625e-05, + "step": 27434, + "training_step_time": 0.11548686027526855 + }, + { + "epoch": 4.186248779296875e-05, + "model_forward_time": 0.02555561065673828, + "step": 27435 + }, + { + "epoch": 4.186248779296875e-05, + "step": 27435, + "training_step_time": 0.11454463005065918 + }, + { + "epoch": 4.1864013671875e-05, + "model_forward_time": 0.027447223663330078, + "step": 27436 + }, + { + "epoch": 4.1864013671875e-05, + "step": 27436, + "training_step_time": 0.1119835376739502 + }, + { + "epoch": 4.186553955078125e-05, + "model_forward_time": 0.025762319564819336, + "step": 27437 + }, + { + "epoch": 4.186553955078125e-05, + "step": 27437, + "training_step_time": 0.10747933387756348 + }, + { + "epoch": 4.18670654296875e-05, + "model_forward_time": 0.02572035789489746, + "step": 27438 + }, + { + "epoch": 4.18670654296875e-05, + "step": 27438, + "training_step_time": 0.11485743522644043 + }, + { + "epoch": 4.186859130859375e-05, + "model_forward_time": 0.025011539459228516, + "step": 27439 + }, + { + "epoch": 4.186859130859375e-05, + "step": 27439, + "training_step_time": 0.1107323169708252 + }, + { + "epoch": 4.18701171875e-05, + "grad_norm": 0.3348250389099121, + "learning_rate": 1.9776346197848296e-06, + "loss": 0.0097, + "step": 27440 + }, + { + "epoch": 4.18701171875e-05, + "model_forward_time": 0.02513599395751953, + "step": 27440 + }, + { + "epoch": 4.18701171875e-05, + "step": 27440, + "training_step_time": 0.10911083221435547 + }, + { + "epoch": 4.187164306640625e-05, + "model_forward_time": 0.025310754776000977, + "step": 27441 + }, + { + "epoch": 4.187164306640625e-05, + "step": 27441, + "training_step_time": 0.10758543014526367 + }, + { + "epoch": 4.18731689453125e-05, + "model_forward_time": 0.026711702346801758, + "step": 27442 + }, + { + "epoch": 4.18731689453125e-05, + "step": 27442, + "training_step_time": 0.10889315605163574 + }, + { + "epoch": 4.187469482421875e-05, + "model_forward_time": 0.024873733520507812, + "step": 27443 + }, + { + "epoch": 4.187469482421875e-05, + "step": 27443, + "training_step_time": 0.10815167427062988 + }, + { + "epoch": 4.1876220703125e-05, + "model_forward_time": 0.025715112686157227, + "step": 27444 + }, + { + "epoch": 4.1876220703125e-05, + "step": 27444, + "training_step_time": 0.13560724258422852 + }, + { + "epoch": 4.187774658203125e-05, + "model_forward_time": 0.025656938552856445, + "step": 27445 + }, + { + "epoch": 4.187774658203125e-05, + "step": 27445, + "training_step_time": 0.1906285285949707 + }, + { + "epoch": 4.18792724609375e-05, + "model_forward_time": 0.024431943893432617, + "step": 27446 + }, + { + "epoch": 4.18792724609375e-05, + "step": 27446, + "training_step_time": 0.16294407844543457 + }, + { + "epoch": 4.188079833984375e-05, + "model_forward_time": 0.02448582649230957, + "step": 27447 + }, + { + "epoch": 4.188079833984375e-05, + "step": 27447, + "training_step_time": 0.17828917503356934 + }, + { + "epoch": 4.188232421875e-05, + "model_forward_time": 0.025101423263549805, + "step": 27448 + }, + { + "epoch": 4.188232421875e-05, + "step": 27448, + "training_step_time": 0.18698430061340332 + }, + { + "epoch": 4.188385009765625e-05, + "model_forward_time": 0.02508711814880371, + "step": 27449 + }, + { + "epoch": 4.188385009765625e-05, + "step": 27449, + "training_step_time": 0.11213231086730957 + }, + { + "epoch": 4.18853759765625e-05, + "grad_norm": 0.057641200721263885, + "learning_rate": 1.962316193157593e-06, + "loss": 0.0047, + "step": 27450 + }, + { + "epoch": 4.18853759765625e-05, + "model_forward_time": 0.02467203140258789, + "step": 27450 + }, + { + "epoch": 4.18853759765625e-05, + "step": 27450, + "training_step_time": 0.11140108108520508 + }, + { + "epoch": 4.188690185546875e-05, + "model_forward_time": 0.027614831924438477, + "step": 27451 + }, + { + "epoch": 4.188690185546875e-05, + "step": 27451, + "training_step_time": 0.1136178970336914 + }, + { + "epoch": 4.1888427734375e-05, + "model_forward_time": 0.025501012802124023, + "step": 27452 + }, + { + "epoch": 4.1888427734375e-05, + "step": 27452, + "training_step_time": 0.10609841346740723 + }, + { + "epoch": 4.188995361328125e-05, + "model_forward_time": 0.02535104751586914, + "step": 27453 + }, + { + "epoch": 4.188995361328125e-05, + "step": 27453, + "training_step_time": 0.10734415054321289 + }, + { + "epoch": 4.18914794921875e-05, + "model_forward_time": 0.025449037551879883, + "step": 27454 + }, + { + "epoch": 4.18914794921875e-05, + "step": 27454, + "training_step_time": 0.10927295684814453 + }, + { + "epoch": 4.189300537109375e-05, + "model_forward_time": 0.02522587776184082, + "step": 27455 + }, + { + "epoch": 4.189300537109375e-05, + "step": 27455, + "training_step_time": 0.10616087913513184 + }, + { + "epoch": 4.189453125e-05, + "model_forward_time": 0.026230335235595703, + "step": 27456 + }, + { + "epoch": 4.189453125e-05, + "step": 27456, + "training_step_time": 0.10807371139526367 + }, + { + "epoch": 4.189605712890625e-05, + "model_forward_time": 0.025315523147583008, + "step": 27457 + }, + { + "epoch": 4.189605712890625e-05, + "step": 27457, + "training_step_time": 0.10823178291320801 + }, + { + "epoch": 4.18975830078125e-05, + "model_forward_time": 0.02517104148864746, + "step": 27458 + }, + { + "epoch": 4.18975830078125e-05, + "step": 27458, + "training_step_time": 0.1043243408203125 + }, + { + "epoch": 4.189910888671875e-05, + "model_forward_time": 0.025259733200073242, + "step": 27459 + }, + { + "epoch": 4.189910888671875e-05, + "step": 27459, + "training_step_time": 0.10832905769348145 + }, + { + "epoch": 4.1900634765625e-05, + "grad_norm": 0.04290970042347908, + "learning_rate": 1.9470561368458485e-06, + "loss": 0.0041, + "step": 27460 + }, + { + "epoch": 4.1900634765625e-05, + "model_forward_time": 0.02535104751586914, + "step": 27460 + }, + { + "epoch": 4.1900634765625e-05, + "step": 27460, + "training_step_time": 0.10336017608642578 + }, + { + "epoch": 4.190216064453125e-05, + "model_forward_time": 0.025298357009887695, + "step": 27461 + }, + { + "epoch": 4.190216064453125e-05, + "step": 27461, + "training_step_time": 0.11078977584838867 + }, + { + "epoch": 4.19036865234375e-05, + "model_forward_time": 0.025175809860229492, + "step": 27462 + }, + { + "epoch": 4.19036865234375e-05, + "step": 27462, + "training_step_time": 0.10450291633605957 + }, + { + "epoch": 4.190521240234375e-05, + "model_forward_time": 0.02521967887878418, + "step": 27463 + }, + { + "epoch": 4.190521240234375e-05, + "step": 27463, + "training_step_time": 0.16716885566711426 + }, + { + "epoch": 4.190673828125e-05, + "model_forward_time": 0.024560928344726562, + "step": 27464 + }, + { + "epoch": 4.190673828125e-05, + "step": 27464, + "training_step_time": 0.14967584609985352 + }, + { + "epoch": 4.190826416015625e-05, + "model_forward_time": 0.024634122848510742, + "step": 27465 + }, + { + "epoch": 4.190826416015625e-05, + "step": 27465, + "training_step_time": 0.10407304763793945 + }, + { + "epoch": 4.19097900390625e-05, + "model_forward_time": 0.025407075881958008, + "step": 27466 + }, + { + "epoch": 4.19097900390625e-05, + "step": 27466, + "training_step_time": 0.10511398315429688 + }, + { + "epoch": 4.191131591796875e-05, + "model_forward_time": 0.02519512176513672, + "step": 27467 + }, + { + "epoch": 4.191131591796875e-05, + "step": 27467, + "training_step_time": 0.10986018180847168 + }, + { + "epoch": 4.1912841796875e-05, + "model_forward_time": 0.025296926498413086, + "step": 27468 + }, + { + "epoch": 4.1912841796875e-05, + "step": 27468, + "training_step_time": 0.10811519622802734 + }, + { + "epoch": 4.191436767578125e-05, + "model_forward_time": 0.02529120445251465, + "step": 27469 + }, + { + "epoch": 4.191436767578125e-05, + "step": 27469, + "training_step_time": 0.20360732078552246 + }, + { + "epoch": 4.19158935546875e-05, + "grad_norm": 0.22710780799388885, + "learning_rate": 1.9318544693919916e-06, + "loss": 0.0119, + "step": 27470 + }, + { + "epoch": 4.19158935546875e-05, + "model_forward_time": 0.024785995483398438, + "step": 27470 + }, + { + "epoch": 4.19158935546875e-05, + "step": 27470, + "training_step_time": 0.10249567031860352 + }, + { + "epoch": 4.191741943359375e-05, + "model_forward_time": 0.025137662887573242, + "step": 27471 + }, + { + "epoch": 4.191741943359375e-05, + "step": 27471, + "training_step_time": 0.10365486145019531 + }, + { + "epoch": 4.19189453125e-05, + "model_forward_time": 0.025381088256835938, + "step": 27472 + }, + { + "epoch": 4.19189453125e-05, + "step": 27472, + "training_step_time": 0.10603547096252441 + }, + { + "epoch": 4.192047119140625e-05, + "model_forward_time": 0.025968313217163086, + "step": 27473 + }, + { + "epoch": 4.192047119140625e-05, + "step": 27473, + "training_step_time": 0.11260771751403809 + }, + { + "epoch": 4.19219970703125e-05, + "model_forward_time": 0.02595353126525879, + "step": 27474 + }, + { + "epoch": 4.19219970703125e-05, + "step": 27474, + "training_step_time": 0.12001347541809082 + }, + { + "epoch": 4.192352294921875e-05, + "model_forward_time": 0.025643348693847656, + "step": 27475 + }, + { + "epoch": 4.192352294921875e-05, + "step": 27475, + "training_step_time": 0.11197257041931152 + }, + { + "epoch": 4.1925048828125e-05, + "model_forward_time": 0.025481224060058594, + "step": 27476 + }, + { + "epoch": 4.1925048828125e-05, + "step": 27476, + "training_step_time": 0.11362981796264648 + }, + { + "epoch": 4.192657470703125e-05, + "model_forward_time": 0.025565147399902344, + "step": 27477 + }, + { + "epoch": 4.192657470703125e-05, + "step": 27477, + "training_step_time": 0.11202287673950195 + }, + { + "epoch": 4.19281005859375e-05, + "model_forward_time": 0.025464773178100586, + "step": 27478 + }, + { + "epoch": 4.19281005859375e-05, + "step": 27478, + "training_step_time": 0.11535215377807617 + }, + { + "epoch": 4.192962646484375e-05, + "model_forward_time": 0.02550029754638672, + "step": 27479 + }, + { + "epoch": 4.192962646484375e-05, + "step": 27479, + "training_step_time": 0.1105949878692627 + }, + { + "epoch": 4.193115234375e-05, + "grad_norm": 0.31672078371047974, + "learning_rate": 1.91671120926748e-06, + "loss": 0.0068, + "step": 27480 + }, + { + "epoch": 4.193115234375e-05, + "model_forward_time": 0.025203227996826172, + "step": 27480 + }, + { + "epoch": 4.193115234375e-05, + "step": 27480, + "training_step_time": 0.11097550392150879 + }, + { + "epoch": 4.193267822265625e-05, + "model_forward_time": 0.025226116180419922, + "step": 27481 + }, + { + "epoch": 4.193267822265625e-05, + "step": 27481, + "training_step_time": 0.12115907669067383 + }, + { + "epoch": 4.19342041015625e-05, + "model_forward_time": 0.025313377380371094, + "step": 27482 + }, + { + "epoch": 4.19342041015625e-05, + "step": 27482, + "training_step_time": 0.11470460891723633 + }, + { + "epoch": 4.193572998046875e-05, + "model_forward_time": 0.02600836753845215, + "step": 27483 + }, + { + "epoch": 4.193572998046875e-05, + "step": 27483, + "training_step_time": 0.10695433616638184 + }, + { + "epoch": 4.1937255859375e-05, + "model_forward_time": 0.025343656539916992, + "step": 27484 + }, + { + "epoch": 4.1937255859375e-05, + "step": 27484, + "training_step_time": 0.10799527168273926 + }, + { + "epoch": 4.193878173828125e-05, + "model_forward_time": 0.025595426559448242, + "step": 27485 + }, + { + "epoch": 4.193878173828125e-05, + "step": 27485, + "training_step_time": 0.10973119735717773 + }, + { + "epoch": 4.19403076171875e-05, + "model_forward_time": 0.025278091430664062, + "step": 27486 + }, + { + "epoch": 4.19403076171875e-05, + "step": 27486, + "training_step_time": 0.11096644401550293 + }, + { + "epoch": 4.194183349609375e-05, + "model_forward_time": 0.026182174682617188, + "step": 27487 + }, + { + "epoch": 4.194183349609375e-05, + "step": 27487, + "training_step_time": 0.10775089263916016 + }, + { + "epoch": 4.1943359375e-05, + "model_forward_time": 0.025049448013305664, + "step": 27488 + }, + { + "epoch": 4.1943359375e-05, + "step": 27488, + "training_step_time": 0.10559749603271484 + }, + { + "epoch": 4.194488525390625e-05, + "model_forward_time": 0.025813817977905273, + "step": 27489 + }, + { + "epoch": 4.194488525390625e-05, + "step": 27489, + "training_step_time": 0.10961151123046875 + }, + { + "epoch": 4.19464111328125e-05, + "grad_norm": 0.1764764040708542, + "learning_rate": 1.9016263748728114e-06, + "loss": 0.0052, + "step": 27490 + }, + { + "epoch": 4.19464111328125e-05, + "model_forward_time": 0.025191545486450195, + "step": 27490 + }, + { + "epoch": 4.19464111328125e-05, + "step": 27490, + "training_step_time": 0.15883755683898926 + }, + { + "epoch": 4.194793701171875e-05, + "model_forward_time": 0.02490973472595215, + "step": 27491 + }, + { + "epoch": 4.194793701171875e-05, + "step": 27491, + "training_step_time": 0.1653451919555664 + }, + { + "epoch": 4.1949462890625e-05, + "model_forward_time": 0.024480342864990234, + "step": 27492 + }, + { + "epoch": 4.1949462890625e-05, + "step": 27492, + "training_step_time": 0.224379301071167 + }, + { + "epoch": 4.195098876953125e-05, + "model_forward_time": 0.024535655975341797, + "step": 27493 + }, + { + "epoch": 4.195098876953125e-05, + "step": 27493, + "training_step_time": 0.21618080139160156 + }, + { + "epoch": 4.19525146484375e-05, + "model_forward_time": 0.024358034133911133, + "step": 27494 + }, + { + "epoch": 4.19525146484375e-05, + "step": 27494, + "training_step_time": 0.14006328582763672 + }, + { + "epoch": 4.195404052734375e-05, + "model_forward_time": 0.024471282958984375, + "step": 27495 + }, + { + "epoch": 4.195404052734375e-05, + "step": 27495, + "training_step_time": 0.1847212314605713 + }, + { + "epoch": 4.195556640625e-05, + "model_forward_time": 0.02492213249206543, + "step": 27496 + }, + { + "epoch": 4.195556640625e-05, + "step": 27496, + "training_step_time": 0.10326552391052246 + }, + { + "epoch": 4.195709228515625e-05, + "model_forward_time": 0.024483680725097656, + "step": 27497 + }, + { + "epoch": 4.195709228515625e-05, + "step": 27497, + "training_step_time": 0.10661172866821289 + }, + { + "epoch": 4.19586181640625e-05, + "model_forward_time": 0.025578737258911133, + "step": 27498 + }, + { + "epoch": 4.19586181640625e-05, + "step": 27498, + "training_step_time": 0.1071021556854248 + }, + { + "epoch": 4.196014404296875e-05, + "model_forward_time": 0.025483369827270508, + "step": 27499 + }, + { + "epoch": 4.196014404296875e-05, + "step": 27499, + "training_step_time": 0.10799717903137207 + }, + { + "epoch": 4.1961669921875e-05, + "grad_norm": 0.31481289863586426, + "learning_rate": 1.8865999845374793e-06, + "loss": 0.0124, + "step": 27500 + }, + { + "epoch": 4.1961669921875e-05, + "model_forward_time": 0.025333404541015625, + "step": 27500 + }, + { + "epoch": 4.1961669921875e-05, + "step": 27500, + "training_step_time": 0.10700631141662598 + }, + { + "epoch": 4.196319580078125e-05, + "model_forward_time": 0.025399208068847656, + "step": 27501 + }, + { + "epoch": 4.196319580078125e-05, + "step": 27501, + "training_step_time": 0.10657262802124023 + }, + { + "epoch": 4.19647216796875e-05, + "model_forward_time": 0.025539159774780273, + "step": 27502 + }, + { + "epoch": 4.19647216796875e-05, + "step": 27502, + "training_step_time": 0.10805869102478027 + }, + { + "epoch": 4.196624755859375e-05, + "model_forward_time": 0.0250852108001709, + "step": 27503 + }, + { + "epoch": 4.196624755859375e-05, + "step": 27503, + "training_step_time": 0.11168670654296875 + }, + { + "epoch": 4.19677734375e-05, + "model_forward_time": 0.02507758140563965, + "step": 27504 + }, + { + "epoch": 4.19677734375e-05, + "step": 27504, + "training_step_time": 0.11144471168518066 + }, + { + "epoch": 4.196929931640625e-05, + "model_forward_time": 0.025075674057006836, + "step": 27505 + }, + { + "epoch": 4.196929931640625e-05, + "step": 27505, + "training_step_time": 0.11420679092407227 + }, + { + "epoch": 4.19708251953125e-05, + "model_forward_time": 0.025195598602294922, + "step": 27506 + }, + { + "epoch": 4.19708251953125e-05, + "step": 27506, + "training_step_time": 0.15414690971374512 + }, + { + "epoch": 4.197235107421875e-05, + "model_forward_time": 0.02484416961669922, + "step": 27507 + }, + { + "epoch": 4.197235107421875e-05, + "step": 27507, + "training_step_time": 0.15661048889160156 + }, + { + "epoch": 4.1973876953125e-05, + "model_forward_time": 0.024560213088989258, + "step": 27508 + }, + { + "epoch": 4.1973876953125e-05, + "step": 27508, + "training_step_time": 0.1725625991821289 + }, + { + "epoch": 4.197540283203125e-05, + "model_forward_time": 0.024504899978637695, + "step": 27509 + }, + { + "epoch": 4.197540283203125e-05, + "step": 27509, + "training_step_time": 0.14648652076721191 + }, + { + "epoch": 4.19769287109375e-05, + "grad_norm": 0.310109406709671, + "learning_rate": 1.8716320565199618e-06, + "loss": 0.008, + "step": 27510 + }, + { + "epoch": 4.19769287109375e-05, + "model_forward_time": 0.024860382080078125, + "step": 27510 + }, + { + "epoch": 4.19769287109375e-05, + "step": 27510, + "training_step_time": 0.20289945602416992 + }, + { + "epoch": 4.197845458984375e-05, + "model_forward_time": 0.024822235107421875, + "step": 27511 + }, + { + "epoch": 4.197845458984375e-05, + "step": 27511, + "training_step_time": 0.12235832214355469 + }, + { + "epoch": 4.197998046875e-05, + "model_forward_time": 0.02441883087158203, + "step": 27512 + }, + { + "epoch": 4.197998046875e-05, + "step": 27512, + "training_step_time": 0.18161416053771973 + }, + { + "epoch": 4.198150634765625e-05, + "model_forward_time": 0.02444601058959961, + "step": 27513 + }, + { + "epoch": 4.198150634765625e-05, + "step": 27513, + "training_step_time": 0.11646628379821777 + }, + { + "epoch": 4.19830322265625e-05, + "model_forward_time": 0.025057315826416016, + "step": 27514 + }, + { + "epoch": 4.19830322265625e-05, + "step": 27514, + "training_step_time": 0.1098940372467041 + }, + { + "epoch": 4.198455810546875e-05, + "model_forward_time": 0.025715351104736328, + "step": 27515 + }, + { + "epoch": 4.198455810546875e-05, + "step": 27515, + "training_step_time": 0.11123085021972656 + }, + { + "epoch": 4.1986083984375e-05, + "model_forward_time": 0.025283336639404297, + "step": 27516 + }, + { + "epoch": 4.1986083984375e-05, + "step": 27516, + "training_step_time": 0.11224913597106934 + }, + { + "epoch": 4.198760986328125e-05, + "model_forward_time": 0.02502131462097168, + "step": 27517 + }, + { + "epoch": 4.198760986328125e-05, + "step": 27517, + "training_step_time": 0.10814046859741211 + }, + { + "epoch": 4.19891357421875e-05, + "model_forward_time": 0.025496482849121094, + "step": 27518 + }, + { + "epoch": 4.19891357421875e-05, + "step": 27518, + "training_step_time": 0.10894203186035156 + }, + { + "epoch": 4.199066162109375e-05, + "model_forward_time": 0.025363922119140625, + "step": 27519 + }, + { + "epoch": 4.199066162109375e-05, + "step": 27519, + "training_step_time": 0.10723090171813965 + }, + { + "epoch": 4.19921875e-05, + "grad_norm": 0.09316647052764893, + "learning_rate": 1.856722609007705e-06, + "loss": 0.0094, + "step": 27520 + }, + { + "epoch": 4.19921875e-05, + "model_forward_time": 0.02691817283630371, + "step": 27520 + }, + { + "epoch": 4.19921875e-05, + "step": 27520, + "training_step_time": 0.1132967472076416 + }, + { + "epoch": 4.199371337890625e-05, + "model_forward_time": 0.026094675064086914, + "step": 27521 + }, + { + "epoch": 4.199371337890625e-05, + "step": 27521, + "training_step_time": 0.10701322555541992 + }, + { + "epoch": 4.19952392578125e-05, + "model_forward_time": 0.025216102600097656, + "step": 27522 + }, + { + "epoch": 4.19952392578125e-05, + "step": 27522, + "training_step_time": 0.10623741149902344 + }, + { + "epoch": 4.199676513671875e-05, + "model_forward_time": 0.02497076988220215, + "step": 27523 + }, + { + "epoch": 4.199676513671875e-05, + "step": 27523, + "training_step_time": 0.10799932479858398 + }, + { + "epoch": 4.1998291015625e-05, + "model_forward_time": 0.025278091430664062, + "step": 27524 + }, + { + "epoch": 4.1998291015625e-05, + "step": 27524, + "training_step_time": 0.10667729377746582 + }, + { + "epoch": 4.199981689453125e-05, + "model_forward_time": 0.02546858787536621, + "step": 27525 + }, + { + "epoch": 4.199981689453125e-05, + "step": 27525, + "training_step_time": 0.10734081268310547 + }, + { + "epoch": 4.20013427734375e-05, + "model_forward_time": 0.025707483291625977, + "step": 27526 + }, + { + "epoch": 4.20013427734375e-05, + "step": 27526, + "training_step_time": 0.10908102989196777 + }, + { + "epoch": 4.200286865234375e-05, + "model_forward_time": 0.025506973266601562, + "step": 27527 + }, + { + "epoch": 4.200286865234375e-05, + "step": 27527, + "training_step_time": 0.10759925842285156 + }, + { + "epoch": 4.200439453125e-05, + "model_forward_time": 0.025616168975830078, + "step": 27528 + }, + { + "epoch": 4.200439453125e-05, + "step": 27528, + "training_step_time": 0.10637927055358887 + }, + { + "epoch": 4.200592041015625e-05, + "model_forward_time": 0.02531290054321289, + "step": 27529 + }, + { + "epoch": 4.200592041015625e-05, + "step": 27529, + "training_step_time": 0.10735034942626953 + }, + { + "epoch": 4.20074462890625e-05, + "grad_norm": 0.0915100947022438, + "learning_rate": 1.841871660117095e-06, + "loss": 0.0072, + "step": 27530 + }, + { + "epoch": 4.20074462890625e-05, + "model_forward_time": 0.025414705276489258, + "step": 27530 + }, + { + "epoch": 4.20074462890625e-05, + "step": 27530, + "training_step_time": 0.11011242866516113 + }, + { + "epoch": 4.200897216796875e-05, + "model_forward_time": 0.025435447692871094, + "step": 27531 + }, + { + "epoch": 4.200897216796875e-05, + "step": 27531, + "training_step_time": 0.10951375961303711 + }, + { + "epoch": 4.2010498046875e-05, + "model_forward_time": 0.025510787963867188, + "step": 27532 + }, + { + "epoch": 4.2010498046875e-05, + "step": 27532, + "training_step_time": 0.10940361022949219 + }, + { + "epoch": 4.201202392578125e-05, + "model_forward_time": 0.025747060775756836, + "step": 27533 + }, + { + "epoch": 4.201202392578125e-05, + "step": 27533, + "training_step_time": 0.10470867156982422 + }, + { + "epoch": 4.20135498046875e-05, + "model_forward_time": 0.024981260299682617, + "step": 27534 + }, + { + "epoch": 4.20135498046875e-05, + "step": 27534, + "training_step_time": 0.1567833423614502 + }, + { + "epoch": 4.201507568359375e-05, + "model_forward_time": 0.02532672882080078, + "step": 27535 + }, + { + "epoch": 4.201507568359375e-05, + "step": 27535, + "training_step_time": 0.17128252983093262 + }, + { + "epoch": 4.20166015625e-05, + "model_forward_time": 0.025480985641479492, + "step": 27536 + }, + { + "epoch": 4.20166015625e-05, + "step": 27536, + "training_step_time": 0.19112920761108398 + }, + { + "epoch": 4.201812744140625e-05, + "model_forward_time": 0.024856090545654297, + "step": 27537 + }, + { + "epoch": 4.201812744140625e-05, + "step": 27537, + "training_step_time": 0.17227768898010254 + }, + { + "epoch": 4.20196533203125e-05, + "model_forward_time": 0.024065256118774414, + "step": 27538 + }, + { + "epoch": 4.20196533203125e-05, + "step": 27538, + "training_step_time": 0.20054841041564941 + }, + { + "epoch": 4.202117919921875e-05, + "model_forward_time": 0.02421402931213379, + "step": 27539 + }, + { + "epoch": 4.202117919921875e-05, + "step": 27539, + "training_step_time": 0.13978862762451172 + }, + { + "epoch": 4.2022705078125e-05, + "grad_norm": 0.07453785836696625, + "learning_rate": 1.8270792278934302e-06, + "loss": 0.0041, + "step": 27540 + }, + { + "epoch": 4.2022705078125e-05, + "model_forward_time": 0.02553701400756836, + "step": 27540 + }, + { + "epoch": 4.2022705078125e-05, + "step": 27540, + "training_step_time": 0.20325040817260742 + }, + { + "epoch": 4.202423095703125e-05, + "model_forward_time": 0.024491548538208008, + "step": 27541 + }, + { + "epoch": 4.202423095703125e-05, + "step": 27541, + "training_step_time": 0.1246337890625 + }, + { + "epoch": 4.20257568359375e-05, + "model_forward_time": 0.024100542068481445, + "step": 27542 + }, + { + "epoch": 4.20257568359375e-05, + "step": 27542, + "training_step_time": 0.10514497756958008 + }, + { + "epoch": 4.202728271484375e-05, + "model_forward_time": 0.025437116622924805, + "step": 27543 + }, + { + "epoch": 4.202728271484375e-05, + "step": 27543, + "training_step_time": 0.10455775260925293 + }, + { + "epoch": 4.202880859375e-05, + "model_forward_time": 0.02534770965576172, + "step": 27544 + }, + { + "epoch": 4.202880859375e-05, + "step": 27544, + "training_step_time": 0.1079859733581543 + }, + { + "epoch": 4.203033447265625e-05, + "model_forward_time": 0.025345563888549805, + "step": 27545 + }, + { + "epoch": 4.203033447265625e-05, + "step": 27545, + "training_step_time": 0.11057829856872559 + }, + { + "epoch": 4.20318603515625e-05, + "model_forward_time": 0.02565622329711914, + "step": 27546 + }, + { + "epoch": 4.20318603515625e-05, + "step": 27546, + "training_step_time": 0.10788464546203613 + }, + { + "epoch": 4.203338623046875e-05, + "model_forward_time": 0.025149822235107422, + "step": 27547 + }, + { + "epoch": 4.203338623046875e-05, + "step": 27547, + "training_step_time": 0.10787343978881836 + }, + { + "epoch": 4.2034912109375e-05, + "model_forward_time": 0.025337696075439453, + "step": 27548 + }, + { + "epoch": 4.2034912109375e-05, + "step": 27548, + "training_step_time": 0.1053626537322998 + }, + { + "epoch": 4.203643798828125e-05, + "model_forward_time": 0.025126218795776367, + "step": 27549 + }, + { + "epoch": 4.203643798828125e-05, + "step": 27549, + "training_step_time": 0.1347651481628418 + }, + { + "epoch": 4.20379638671875e-05, + "grad_norm": 0.10368197411298752, + "learning_rate": 1.812345330310916e-06, + "loss": 0.0084, + "step": 27550 + }, + { + "epoch": 4.20379638671875e-05, + "model_forward_time": 0.025801658630371094, + "step": 27550 + }, + { + "epoch": 4.20379638671875e-05, + "step": 27550, + "training_step_time": 0.16543054580688477 + }, + { + "epoch": 4.203948974609375e-05, + "model_forward_time": 0.023992300033569336, + "step": 27551 + }, + { + "epoch": 4.203948974609375e-05, + "step": 27551, + "training_step_time": 0.1585555076599121 + }, + { + "epoch": 4.2041015625e-05, + "model_forward_time": 0.024158239364624023, + "step": 27552 + }, + { + "epoch": 4.2041015625e-05, + "step": 27552, + "training_step_time": 0.1614081859588623 + }, + { + "epoch": 4.204254150390625e-05, + "model_forward_time": 0.024393796920776367, + "step": 27553 + }, + { + "epoch": 4.204254150390625e-05, + "step": 27553, + "training_step_time": 0.1886730194091797 + }, + { + "epoch": 4.20440673828125e-05, + "model_forward_time": 0.025038719177246094, + "step": 27554 + }, + { + "epoch": 4.20440673828125e-05, + "step": 27554, + "training_step_time": 0.13743019104003906 + }, + { + "epoch": 4.204559326171875e-05, + "model_forward_time": 0.024355411529541016, + "step": 27555 + }, + { + "epoch": 4.204559326171875e-05, + "step": 27555, + "training_step_time": 0.18871212005615234 + }, + { + "epoch": 4.2047119140625e-05, + "model_forward_time": 0.024212360382080078, + "step": 27556 + }, + { + "epoch": 4.2047119140625e-05, + "step": 27556, + "training_step_time": 0.11986827850341797 + }, + { + "epoch": 4.204864501953125e-05, + "model_forward_time": 0.024112224578857422, + "step": 27557 + }, + { + "epoch": 4.204864501953125e-05, + "step": 27557, + "training_step_time": 0.11790919303894043 + }, + { + "epoch": 4.20501708984375e-05, + "model_forward_time": 0.0249936580657959, + "step": 27558 + }, + { + "epoch": 4.20501708984375e-05, + "step": 27558, + "training_step_time": 0.11476397514343262 + }, + { + "epoch": 4.205169677734375e-05, + "model_forward_time": 0.025356531143188477, + "step": 27559 + }, + { + "epoch": 4.205169677734375e-05, + "step": 27559, + "training_step_time": 0.11282587051391602 + }, + { + "epoch": 4.205322265625e-05, + "grad_norm": 0.07172807306051254, + "learning_rate": 1.7976699852726153e-06, + "loss": 0.0026, + "step": 27560 + }, + { + "epoch": 4.205322265625e-05, + "model_forward_time": 0.0253908634185791, + "step": 27560 + }, + { + "epoch": 4.205322265625e-05, + "step": 27560, + "training_step_time": 0.11072373390197754 + }, + { + "epoch": 4.205474853515625e-05, + "model_forward_time": 0.025568246841430664, + "step": 27561 + }, + { + "epoch": 4.205474853515625e-05, + "step": 27561, + "training_step_time": 0.11153697967529297 + }, + { + "epoch": 4.20562744140625e-05, + "model_forward_time": 0.024016618728637695, + "step": 27562 + }, + { + "epoch": 4.20562744140625e-05, + "step": 27562, + "training_step_time": 0.10944962501525879 + }, + { + "epoch": 4.205780029296875e-05, + "model_forward_time": 0.02501535415649414, + "step": 27563 + }, + { + "epoch": 4.205780029296875e-05, + "step": 27563, + "training_step_time": 0.10572671890258789 + }, + { + "epoch": 4.2059326171875e-05, + "model_forward_time": 0.0252838134765625, + "step": 27564 + }, + { + "epoch": 4.2059326171875e-05, + "step": 27564, + "training_step_time": 0.10776209831237793 + }, + { + "epoch": 4.206085205078125e-05, + "model_forward_time": 0.02505016326904297, + "step": 27565 + }, + { + "epoch": 4.206085205078125e-05, + "step": 27565, + "training_step_time": 0.10538935661315918 + }, + { + "epoch": 4.20623779296875e-05, + "model_forward_time": 0.026669740676879883, + "step": 27566 + }, + { + "epoch": 4.20623779296875e-05, + "step": 27566, + "training_step_time": 0.10636568069458008 + }, + { + "epoch": 4.206390380859375e-05, + "model_forward_time": 0.025302410125732422, + "step": 27567 + }, + { + "epoch": 4.206390380859375e-05, + "step": 27567, + "training_step_time": 0.10702681541442871 + }, + { + "epoch": 4.20654296875e-05, + "model_forward_time": 0.024237632751464844, + "step": 27568 + }, + { + "epoch": 4.20654296875e-05, + "step": 27568, + "training_step_time": 0.10477256774902344 + }, + { + "epoch": 4.206695556640625e-05, + "model_forward_time": 0.024177074432373047, + "step": 27569 + }, + { + "epoch": 4.206695556640625e-05, + "step": 27569, + "training_step_time": 0.11072707176208496 + }, + { + "epoch": 4.20684814453125e-05, + "grad_norm": 0.20210319757461548, + "learning_rate": 1.7830532106104747e-06, + "loss": 0.0048, + "step": 27570 + }, + { + "epoch": 4.20684814453125e-05, + "model_forward_time": 0.024779558181762695, + "step": 27570 + }, + { + "epoch": 4.20684814453125e-05, + "step": 27570, + "training_step_time": 0.10695195198059082 + }, + { + "epoch": 4.207000732421875e-05, + "model_forward_time": 0.025423526763916016, + "step": 27571 + }, + { + "epoch": 4.207000732421875e-05, + "step": 27571, + "training_step_time": 0.1106729507446289 + }, + { + "epoch": 4.2071533203125e-05, + "model_forward_time": 0.02490377426147461, + "step": 27572 + }, + { + "epoch": 4.2071533203125e-05, + "step": 27572, + "training_step_time": 0.10621452331542969 + }, + { + "epoch": 4.207305908203125e-05, + "model_forward_time": 0.02536153793334961, + "step": 27573 + }, + { + "epoch": 4.207305908203125e-05, + "step": 27573, + "training_step_time": 0.10739302635192871 + }, + { + "epoch": 4.20745849609375e-05, + "model_forward_time": 0.024981975555419922, + "step": 27574 + }, + { + "epoch": 4.20745849609375e-05, + "step": 27574, + "training_step_time": 0.1068265438079834 + }, + { + "epoch": 4.207611083984375e-05, + "model_forward_time": 0.0252683162689209, + "step": 27575 + }, + { + "epoch": 4.207611083984375e-05, + "step": 27575, + "training_step_time": 0.11684250831604004 + }, + { + "epoch": 4.207763671875e-05, + "model_forward_time": 0.025206804275512695, + "step": 27576 + }, + { + "epoch": 4.207763671875e-05, + "step": 27576, + "training_step_time": 0.1087493896484375 + }, + { + "epoch": 4.207916259765625e-05, + "model_forward_time": 0.024904251098632812, + "step": 27577 + }, + { + "epoch": 4.207916259765625e-05, + "step": 27577, + "training_step_time": 0.10351276397705078 + }, + { + "epoch": 4.20806884765625e-05, + "model_forward_time": 0.024743318557739258, + "step": 27578 + }, + { + "epoch": 4.20806884765625e-05, + "step": 27578, + "training_step_time": 0.15907716751098633 + }, + { + "epoch": 4.208221435546875e-05, + "model_forward_time": 0.024530887603759766, + "step": 27579 + }, + { + "epoch": 4.208221435546875e-05, + "step": 27579, + "training_step_time": 0.1522693634033203 + }, + { + "epoch": 4.2083740234375e-05, + "grad_norm": 0.034953873604536057, + "learning_rate": 1.7684950240852372e-06, + "loss": 0.0036, + "step": 27580 + }, + { + "epoch": 4.2083740234375e-05, + "model_forward_time": 0.024492740631103516, + "step": 27580 + }, + { + "epoch": 4.2083740234375e-05, + "step": 27580, + "training_step_time": 0.1039276123046875 + }, + { + "epoch": 4.208526611328125e-05, + "model_forward_time": 0.024318695068359375, + "step": 27581 + }, + { + "epoch": 4.208526611328125e-05, + "step": 27581, + "training_step_time": 0.13969826698303223 + }, + { + "epoch": 4.20867919921875e-05, + "model_forward_time": 0.025610923767089844, + "step": 27582 + }, + { + "epoch": 4.20867919921875e-05, + "step": 27582, + "training_step_time": 0.2103722095489502 + }, + { + "epoch": 4.208831787109375e-05, + "model_forward_time": 0.024657726287841797, + "step": 27583 + }, + { + "epoch": 4.208831787109375e-05, + "step": 27583, + "training_step_time": 0.1298537254333496 + }, + { + "epoch": 4.208984375e-05, + "model_forward_time": 0.024837017059326172, + "step": 27584 + }, + { + "epoch": 4.208984375e-05, + "step": 27584, + "training_step_time": 0.146087646484375 + }, + { + "epoch": 4.209136962890625e-05, + "model_forward_time": 0.02493429183959961, + "step": 27585 + }, + { + "epoch": 4.209136962890625e-05, + "step": 27585, + "training_step_time": 0.1825244426727295 + }, + { + "epoch": 4.20928955078125e-05, + "model_forward_time": 0.024359941482543945, + "step": 27586 + }, + { + "epoch": 4.20928955078125e-05, + "step": 27586, + "training_step_time": 0.10082507133483887 + }, + { + "epoch": 4.209442138671875e-05, + "model_forward_time": 0.02447199821472168, + "step": 27587 + }, + { + "epoch": 4.209442138671875e-05, + "step": 27587, + "training_step_time": 0.10001969337463379 + }, + { + "epoch": 4.2095947265625e-05, + "model_forward_time": 0.024995088577270508, + "step": 27588 + }, + { + "epoch": 4.2095947265625e-05, + "step": 27588, + "training_step_time": 0.10413527488708496 + }, + { + "epoch": 4.209747314453125e-05, + "model_forward_time": 0.02521657943725586, + "step": 27589 + }, + { + "epoch": 4.209747314453125e-05, + "step": 27589, + "training_step_time": 0.10712146759033203 + }, + { + "epoch": 4.20989990234375e-05, + "grad_norm": 0.10998217761516571, + "learning_rate": 1.7539954433864858e-06, + "loss": 0.0034, + "step": 27590 + }, + { + "epoch": 4.20989990234375e-05, + "model_forward_time": 0.025073528289794922, + "step": 27590 + }, + { + "epoch": 4.20989990234375e-05, + "step": 27590, + "training_step_time": 0.10562396049499512 + }, + { + "epoch": 4.210052490234375e-05, + "model_forward_time": 0.02523946762084961, + "step": 27591 + }, + { + "epoch": 4.210052490234375e-05, + "step": 27591, + "training_step_time": 0.18592286109924316 + }, + { + "epoch": 4.210205078125e-05, + "model_forward_time": 0.024280071258544922, + "step": 27592 + }, + { + "epoch": 4.210205078125e-05, + "step": 27592, + "training_step_time": 0.16002988815307617 + }, + { + "epoch": 4.210357666015625e-05, + "model_forward_time": 0.02422499656677246, + "step": 27593 + }, + { + "epoch": 4.210357666015625e-05, + "step": 27593, + "training_step_time": 0.13994431495666504 + }, + { + "epoch": 4.21051025390625e-05, + "model_forward_time": 0.024435043334960938, + "step": 27594 + }, + { + "epoch": 4.21051025390625e-05, + "step": 27594, + "training_step_time": 0.1456737518310547 + }, + { + "epoch": 4.210662841796875e-05, + "model_forward_time": 0.024753093719482422, + "step": 27595 + }, + { + "epoch": 4.210662841796875e-05, + "step": 27595, + "training_step_time": 0.13695359230041504 + }, + { + "epoch": 4.2108154296875e-05, + "model_forward_time": 0.024649381637573242, + "step": 27596 + }, + { + "epoch": 4.2108154296875e-05, + "step": 27596, + "training_step_time": 0.2193615436553955 + }, + { + "epoch": 4.210968017578125e-05, + "model_forward_time": 0.025158166885375977, + "step": 27597 + }, + { + "epoch": 4.210968017578125e-05, + "step": 27597, + "training_step_time": 0.1297159194946289 + }, + { + "epoch": 4.21112060546875e-05, + "model_forward_time": 0.024258136749267578, + "step": 27598 + }, + { + "epoch": 4.21112060546875e-05, + "step": 27598, + "training_step_time": 0.19692397117614746 + }, + { + "epoch": 4.211273193359375e-05, + "model_forward_time": 0.024234771728515625, + "step": 27599 + }, + { + "epoch": 4.211273193359375e-05, + "step": 27599, + "training_step_time": 0.12053132057189941 + }, + { + "epoch": 4.21142578125e-05, + "grad_norm": 0.19780333340168, + "learning_rate": 1.7395544861325718e-06, + "loss": 0.0062, + "step": 27600 + }, + { + "epoch": 4.21142578125e-05, + "model_forward_time": 0.02364826202392578, + "step": 27600 + }, + { + "epoch": 4.21142578125e-05, + "step": 27600, + "training_step_time": 0.18732953071594238 + }, + { + "epoch": 4.211578369140625e-05, + "model_forward_time": 0.02459263801574707, + "step": 27601 + }, + { + "epoch": 4.211578369140625e-05, + "step": 27601, + "training_step_time": 0.11251521110534668 + }, + { + "epoch": 4.21173095703125e-05, + "model_forward_time": 0.0247650146484375, + "step": 27602 + }, + { + "epoch": 4.21173095703125e-05, + "step": 27602, + "training_step_time": 0.11029386520385742 + }, + { + "epoch": 4.211883544921875e-05, + "model_forward_time": 0.025539398193359375, + "step": 27603 + }, + { + "epoch": 4.211883544921875e-05, + "step": 27603, + "training_step_time": 0.1074066162109375 + }, + { + "epoch": 4.2120361328125e-05, + "model_forward_time": 0.0255584716796875, + "step": 27604 + }, + { + "epoch": 4.2120361328125e-05, + "step": 27604, + "training_step_time": 0.10654091835021973 + }, + { + "epoch": 4.212188720703125e-05, + "model_forward_time": 0.0254976749420166, + "step": 27605 + }, + { + "epoch": 4.212188720703125e-05, + "step": 27605, + "training_step_time": 0.11065459251403809 + }, + { + "epoch": 4.21234130859375e-05, + "model_forward_time": 0.025205373764038086, + "step": 27606 + }, + { + "epoch": 4.21234130859375e-05, + "step": 27606, + "training_step_time": 0.1060938835144043 + }, + { + "epoch": 4.212493896484375e-05, + "model_forward_time": 0.0254824161529541, + "step": 27607 + }, + { + "epoch": 4.212493896484375e-05, + "step": 27607, + "training_step_time": 0.10522627830505371 + }, + { + "epoch": 4.212646484375e-05, + "model_forward_time": 0.025151968002319336, + "step": 27608 + }, + { + "epoch": 4.212646484375e-05, + "step": 27608, + "training_step_time": 0.10329437255859375 + }, + { + "epoch": 4.212799072265625e-05, + "model_forward_time": 0.025222301483154297, + "step": 27609 + }, + { + "epoch": 4.212799072265625e-05, + "step": 27609, + "training_step_time": 0.1039586067199707 + }, + { + "epoch": 4.21295166015625e-05, + "grad_norm": 0.15223300457000732, + "learning_rate": 1.7251721698706147e-06, + "loss": 0.0088, + "step": 27610 + }, + { + "epoch": 4.21295166015625e-05, + "model_forward_time": 0.025224924087524414, + "step": 27610 + }, + { + "epoch": 4.21295166015625e-05, + "step": 27610, + "training_step_time": 0.11183428764343262 + }, + { + "epoch": 4.213104248046875e-05, + "model_forward_time": 0.024543285369873047, + "step": 27611 + }, + { + "epoch": 4.213104248046875e-05, + "step": 27611, + "training_step_time": 0.10614728927612305 + }, + { + "epoch": 4.2132568359375e-05, + "model_forward_time": 0.025307178497314453, + "step": 27612 + }, + { + "epoch": 4.2132568359375e-05, + "step": 27612, + "training_step_time": 0.10941624641418457 + }, + { + "epoch": 4.213409423828125e-05, + "model_forward_time": 0.025407791137695312, + "step": 27613 + }, + { + "epoch": 4.213409423828125e-05, + "step": 27613, + "training_step_time": 0.10332059860229492 + }, + { + "epoch": 4.21356201171875e-05, + "model_forward_time": 0.02514338493347168, + "step": 27614 + }, + { + "epoch": 4.21356201171875e-05, + "step": 27614, + "training_step_time": 0.10655832290649414 + }, + { + "epoch": 4.213714599609375e-05, + "model_forward_time": 0.025196313858032227, + "step": 27615 + }, + { + "epoch": 4.213714599609375e-05, + "step": 27615, + "training_step_time": 0.10486698150634766 + }, + { + "epoch": 4.2138671875e-05, + "model_forward_time": 0.02527475357055664, + "step": 27616 + }, + { + "epoch": 4.2138671875e-05, + "step": 27616, + "training_step_time": 0.10414671897888184 + }, + { + "epoch": 4.214019775390625e-05, + "model_forward_time": 0.025246381759643555, + "step": 27617 + }, + { + "epoch": 4.214019775390625e-05, + "step": 27617, + "training_step_time": 0.10378575325012207 + }, + { + "epoch": 4.21417236328125e-05, + "model_forward_time": 0.02530503273010254, + "step": 27618 + }, + { + "epoch": 4.21417236328125e-05, + "step": 27618, + "training_step_time": 0.10435795783996582 + }, + { + "epoch": 4.214324951171875e-05, + "model_forward_time": 0.025318622589111328, + "step": 27619 + }, + { + "epoch": 4.214324951171875e-05, + "step": 27619, + "training_step_time": 0.10833263397216797 + }, + { + "epoch": 4.2144775390625e-05, + "grad_norm": 0.061246681958436966, + "learning_rate": 1.7108485120764905e-06, + "loss": 0.0043, + "step": 27620 + }, + { + "epoch": 4.2144775390625e-05, + "model_forward_time": 0.024988889694213867, + "step": 27620 + }, + { + "epoch": 4.2144775390625e-05, + "step": 27620, + "training_step_time": 0.10388636589050293 + }, + { + "epoch": 4.214630126953125e-05, + "model_forward_time": 0.025701284408569336, + "step": 27621 + }, + { + "epoch": 4.214630126953125e-05, + "step": 27621, + "training_step_time": 0.10527634620666504 + }, + { + "epoch": 4.21478271484375e-05, + "model_forward_time": 0.024407148361206055, + "step": 27622 + }, + { + "epoch": 4.21478271484375e-05, + "step": 27622, + "training_step_time": 0.14646148681640625 + }, + { + "epoch": 4.214935302734375e-05, + "model_forward_time": 0.024723291397094727, + "step": 27623 + }, + { + "epoch": 4.214935302734375e-05, + "step": 27623, + "training_step_time": 0.15848016738891602 + }, + { + "epoch": 4.215087890625e-05, + "model_forward_time": 0.024689912796020508, + "step": 27624 + }, + { + "epoch": 4.215087890625e-05, + "step": 27624, + "training_step_time": 0.21657061576843262 + }, + { + "epoch": 4.215240478515625e-05, + "model_forward_time": 0.024332046508789062, + "step": 27625 + }, + { + "epoch": 4.215240478515625e-05, + "step": 27625, + "training_step_time": 0.15656328201293945 + }, + { + "epoch": 4.21539306640625e-05, + "model_forward_time": 0.024501800537109375, + "step": 27626 + }, + { + "epoch": 4.21539306640625e-05, + "step": 27626, + "training_step_time": 0.14226675033569336 + }, + { + "epoch": 4.215545654296875e-05, + "model_forward_time": 0.023906230926513672, + "step": 27627 + }, + { + "epoch": 4.215545654296875e-05, + "step": 27627, + "training_step_time": 0.12164425849914551 + }, + { + "epoch": 4.2156982421875e-05, + "model_forward_time": 0.024671077728271484, + "step": 27628 + }, + { + "epoch": 4.2156982421875e-05, + "step": 27628, + "training_step_time": 0.22577667236328125 + }, + { + "epoch": 4.215850830078125e-05, + "model_forward_time": 0.02451014518737793, + "step": 27629 + }, + { + "epoch": 4.215850830078125e-05, + "step": 27629, + "training_step_time": 0.11810612678527832 + }, + { + "epoch": 4.21600341796875e-05, + "grad_norm": 0.04241884499788284, + "learning_rate": 1.696583530154794e-06, + "loss": 0.0036, + "step": 27630 + }, + { + "epoch": 4.21600341796875e-05, + "model_forward_time": 0.024547576904296875, + "step": 27630 + }, + { + "epoch": 4.21600341796875e-05, + "step": 27630, + "training_step_time": 0.13604140281677246 + }, + { + "epoch": 4.216156005859375e-05, + "model_forward_time": 0.02526998519897461, + "step": 27631 + }, + { + "epoch": 4.216156005859375e-05, + "step": 27631, + "training_step_time": 0.14240646362304688 + }, + { + "epoch": 4.21630859375e-05, + "model_forward_time": 0.024836063385009766, + "step": 27632 + }, + { + "epoch": 4.21630859375e-05, + "step": 27632, + "training_step_time": 0.13510608673095703 + }, + { + "epoch": 4.216461181640625e-05, + "model_forward_time": 0.02458477020263672, + "step": 27633 + }, + { + "epoch": 4.216461181640625e-05, + "step": 27633, + "training_step_time": 0.12606143951416016 + }, + { + "epoch": 4.21661376953125e-05, + "model_forward_time": 0.02495551109313965, + "step": 27634 + }, + { + "epoch": 4.21661376953125e-05, + "step": 27634, + "training_step_time": 0.12200784683227539 + }, + { + "epoch": 4.216766357421875e-05, + "model_forward_time": 0.025063037872314453, + "step": 27635 + }, + { + "epoch": 4.216766357421875e-05, + "step": 27635, + "training_step_time": 0.11873269081115723 + }, + { + "epoch": 4.2169189453125e-05, + "model_forward_time": 0.024874210357666016, + "step": 27636 + }, + { + "epoch": 4.2169189453125e-05, + "step": 27636, + "training_step_time": 0.11419558525085449 + }, + { + "epoch": 4.217071533203125e-05, + "model_forward_time": 0.025151968002319336, + "step": 27637 + }, + { + "epoch": 4.217071533203125e-05, + "step": 27637, + "training_step_time": 0.11036872863769531 + }, + { + "epoch": 4.21722412109375e-05, + "model_forward_time": 0.02498340606689453, + "step": 27638 + }, + { + "epoch": 4.21722412109375e-05, + "step": 27638, + "training_step_time": 0.10825681686401367 + }, + { + "epoch": 4.217376708984375e-05, + "model_forward_time": 0.025016307830810547, + "step": 27639 + }, + { + "epoch": 4.217376708984375e-05, + "step": 27639, + "training_step_time": 0.20430684089660645 + }, + { + "epoch": 4.217529296875e-05, + "grad_norm": 0.06447423994541168, + "learning_rate": 1.682377241438826e-06, + "loss": 0.0035, + "step": 27640 + }, + { + "epoch": 4.217529296875e-05, + "model_forward_time": 0.02485203742980957, + "step": 27640 + }, + { + "epoch": 4.217529296875e-05, + "step": 27640, + "training_step_time": 0.12594294548034668 + }, + { + "epoch": 4.217681884765625e-05, + "model_forward_time": 0.023974180221557617, + "step": 27641 + }, + { + "epoch": 4.217681884765625e-05, + "step": 27641, + "training_step_time": 0.1966104507446289 + }, + { + "epoch": 4.21783447265625e-05, + "model_forward_time": 0.024400711059570312, + "step": 27642 + }, + { + "epoch": 4.21783447265625e-05, + "step": 27642, + "training_step_time": 0.12387800216674805 + }, + { + "epoch": 4.217987060546875e-05, + "model_forward_time": 0.02424454689025879, + "step": 27643 + }, + { + "epoch": 4.217987060546875e-05, + "step": 27643, + "training_step_time": 0.10306358337402344 + }, + { + "epoch": 4.2181396484375e-05, + "model_forward_time": 0.02456498146057129, + "step": 27644 + }, + { + "epoch": 4.2181396484375e-05, + "step": 27644, + "training_step_time": 0.1922774314880371 + }, + { + "epoch": 4.218292236328125e-05, + "model_forward_time": 0.024243831634521484, + "step": 27645 + }, + { + "epoch": 4.218292236328125e-05, + "step": 27645, + "training_step_time": 0.10531926155090332 + }, + { + "epoch": 4.21844482421875e-05, + "model_forward_time": 0.023311376571655273, + "step": 27646 + }, + { + "epoch": 4.21844482421875e-05, + "step": 27646, + "training_step_time": 0.10196924209594727 + }, + { + "epoch": 4.218597412109375e-05, + "model_forward_time": 0.02417778968811035, + "step": 27647 + }, + { + "epoch": 4.218597412109375e-05, + "step": 27647, + "training_step_time": 0.10902690887451172 + }, + { + "epoch": 4.21875e-05, + "model_forward_time": 0.025592327117919922, + "step": 27648 + }, + { + "epoch": 4.21875e-05, + "step": 27648, + "training_step_time": 0.10908293724060059 + }, + { + "epoch": 4.218902587890625e-05, + "model_forward_time": 0.024844646453857422, + "step": 27649 + }, + { + "epoch": 4.218902587890625e-05, + "step": 27649, + "training_step_time": 0.10633730888366699 + }, + { + "epoch": 4.21905517578125e-05, + "grad_norm": 0.0660465881228447, + "learning_rate": 1.6682296631905626e-06, + "loss": 0.0102, + "step": 27650 + }, + { + "epoch": 4.21905517578125e-05, + "model_forward_time": 0.025337696075439453, + "step": 27650 + }, + { + "epoch": 4.21905517578125e-05, + "step": 27650, + "training_step_time": 0.10471677780151367 + }, + { + "epoch": 4.219207763671875e-05, + "model_forward_time": 0.025357961654663086, + "step": 27651 + }, + { + "epoch": 4.219207763671875e-05, + "step": 27651, + "training_step_time": 0.10409045219421387 + }, + { + "epoch": 4.2193603515625e-05, + "model_forward_time": 0.024956226348876953, + "step": 27652 + }, + { + "epoch": 4.2193603515625e-05, + "step": 27652, + "training_step_time": 0.10526013374328613 + }, + { + "epoch": 4.219512939453125e-05, + "model_forward_time": 0.026139497756958008, + "step": 27653 + }, + { + "epoch": 4.219512939453125e-05, + "step": 27653, + "training_step_time": 0.10611605644226074 + }, + { + "epoch": 4.21966552734375e-05, + "model_forward_time": 0.025371789932250977, + "step": 27654 + }, + { + "epoch": 4.21966552734375e-05, + "step": 27654, + "training_step_time": 0.10560846328735352 + }, + { + "epoch": 4.219818115234375e-05, + "model_forward_time": 0.02519536018371582, + "step": 27655 + }, + { + "epoch": 4.219818115234375e-05, + "step": 27655, + "training_step_time": 0.10461139678955078 + }, + { + "epoch": 4.219970703125e-05, + "model_forward_time": 0.025520801544189453, + "step": 27656 + }, + { + "epoch": 4.219970703125e-05, + "step": 27656, + "training_step_time": 0.1064450740814209 + }, + { + "epoch": 4.220123291015625e-05, + "model_forward_time": 0.025105953216552734, + "step": 27657 + }, + { + "epoch": 4.220123291015625e-05, + "step": 27657, + "training_step_time": 0.10495805740356445 + }, + { + "epoch": 4.22027587890625e-05, + "model_forward_time": 0.025074481964111328, + "step": 27658 + }, + { + "epoch": 4.22027587890625e-05, + "step": 27658, + "training_step_time": 0.10374188423156738 + }, + { + "epoch": 4.220428466796875e-05, + "model_forward_time": 0.025078296661376953, + "step": 27659 + }, + { + "epoch": 4.220428466796875e-05, + "step": 27659, + "training_step_time": 0.10534286499023438 + }, + { + "epoch": 4.2205810546875e-05, + "grad_norm": 0.06475159525871277, + "learning_rate": 1.6541408126006463e-06, + "loss": 0.01, + "step": 27660 + }, + { + "epoch": 4.2205810546875e-05, + "model_forward_time": 0.025194644927978516, + "step": 27660 + }, + { + "epoch": 4.2205810546875e-05, + "step": 27660, + "training_step_time": 0.10530352592468262 + }, + { + "epoch": 4.220733642578125e-05, + "model_forward_time": 0.02490687370300293, + "step": 27661 + }, + { + "epoch": 4.220733642578125e-05, + "step": 27661, + "training_step_time": 0.10345339775085449 + }, + { + "epoch": 4.22088623046875e-05, + "model_forward_time": 0.025268077850341797, + "step": 27662 + }, + { + "epoch": 4.22088623046875e-05, + "step": 27662, + "training_step_time": 0.10631561279296875 + }, + { + "epoch": 4.221038818359375e-05, + "model_forward_time": 0.025289058685302734, + "step": 27663 + }, + { + "epoch": 4.221038818359375e-05, + "step": 27663, + "training_step_time": 0.12271618843078613 + }, + { + "epoch": 4.22119140625e-05, + "model_forward_time": 0.025401592254638672, + "step": 27664 + }, + { + "epoch": 4.22119140625e-05, + "step": 27664, + "training_step_time": 0.12385916709899902 + }, + { + "epoch": 4.221343994140625e-05, + "model_forward_time": 0.024962425231933594, + "step": 27665 + }, + { + "epoch": 4.221343994140625e-05, + "step": 27665, + "training_step_time": 0.11117696762084961 + }, + { + "epoch": 4.22149658203125e-05, + "model_forward_time": 0.0244596004486084, + "step": 27666 + }, + { + "epoch": 4.22149658203125e-05, + "step": 27666, + "training_step_time": 0.13835668563842773 + }, + { + "epoch": 4.221649169921875e-05, + "model_forward_time": 0.02455759048461914, + "step": 27667 + }, + { + "epoch": 4.221649169921875e-05, + "step": 27667, + "training_step_time": 0.16215872764587402 + }, + { + "epoch": 4.2218017578125e-05, + "model_forward_time": 0.0248262882232666, + "step": 27668 + }, + { + "epoch": 4.2218017578125e-05, + "step": 27668, + "training_step_time": 0.20669078826904297 + }, + { + "epoch": 4.221954345703125e-05, + "model_forward_time": 0.024550437927246094, + "step": 27669 + }, + { + "epoch": 4.221954345703125e-05, + "step": 27669, + "training_step_time": 0.16620492935180664 + }, + { + "epoch": 4.22210693359375e-05, + "grad_norm": 0.07270149886608124, + "learning_rate": 1.6401107067883559e-06, + "loss": 0.0084, + "step": 27670 + }, + { + "epoch": 4.22210693359375e-05, + "model_forward_time": 0.0258176326751709, + "step": 27670 + }, + { + "epoch": 4.22210693359375e-05, + "step": 27670, + "training_step_time": 0.1777806282043457 + }, + { + "epoch": 4.222259521484375e-05, + "model_forward_time": 0.02454853057861328, + "step": 27671 + }, + { + "epoch": 4.222259521484375e-05, + "step": 27671, + "training_step_time": 0.13738679885864258 + }, + { + "epoch": 4.222412109375e-05, + "model_forward_time": 0.024824857711791992, + "step": 27672 + }, + { + "epoch": 4.222412109375e-05, + "step": 27672, + "training_step_time": 0.18199920654296875 + }, + { + "epoch": 4.222564697265625e-05, + "model_forward_time": 0.024954795837402344, + "step": 27673 + }, + { + "epoch": 4.222564697265625e-05, + "step": 27673, + "training_step_time": 0.15314388275146484 + }, + { + "epoch": 4.22271728515625e-05, + "model_forward_time": 0.02451944351196289, + "step": 27674 + }, + { + "epoch": 4.22271728515625e-05, + "step": 27674, + "training_step_time": 0.10490560531616211 + }, + { + "epoch": 4.222869873046875e-05, + "model_forward_time": 0.024959564208984375, + "step": 27675 + }, + { + "epoch": 4.222869873046875e-05, + "step": 27675, + "training_step_time": 0.10483050346374512 + }, + { + "epoch": 4.2230224609375e-05, + "model_forward_time": 0.02555108070373535, + "step": 27676 + }, + { + "epoch": 4.2230224609375e-05, + "step": 27676, + "training_step_time": 0.10722804069519043 + }, + { + "epoch": 4.223175048828125e-05, + "model_forward_time": 0.025754451751708984, + "step": 27677 + }, + { + "epoch": 4.223175048828125e-05, + "step": 27677, + "training_step_time": 0.10642719268798828 + }, + { + "epoch": 4.22332763671875e-05, + "model_forward_time": 0.025213956832885742, + "step": 27678 + }, + { + "epoch": 4.22332763671875e-05, + "step": 27678, + "training_step_time": 0.10613417625427246 + }, + { + "epoch": 4.223480224609375e-05, + "model_forward_time": 0.02508401870727539, + "step": 27679 + }, + { + "epoch": 4.223480224609375e-05, + "step": 27679, + "training_step_time": 0.11035823822021484 + }, + { + "epoch": 4.2236328125e-05, + "grad_norm": 0.1756133884191513, + "learning_rate": 1.626139362801604e-06, + "loss": 0.0056, + "step": 27680 + }, + { + "epoch": 4.2236328125e-05, + "model_forward_time": 0.025370359420776367, + "step": 27680 + }, + { + "epoch": 4.2236328125e-05, + "step": 27680, + "training_step_time": 0.10494422912597656 + }, + { + "epoch": 4.223785400390625e-05, + "model_forward_time": 0.0251157283782959, + "step": 27681 + }, + { + "epoch": 4.223785400390625e-05, + "step": 27681, + "training_step_time": 0.1048576831817627 + }, + { + "epoch": 4.22393798828125e-05, + "model_forward_time": 0.02539992332458496, + "step": 27682 + }, + { + "epoch": 4.22393798828125e-05, + "step": 27682, + "training_step_time": 0.10538291931152344 + }, + { + "epoch": 4.224090576171875e-05, + "model_forward_time": 0.026517629623413086, + "step": 27683 + }, + { + "epoch": 4.224090576171875e-05, + "step": 27683, + "training_step_time": 0.10849261283874512 + }, + { + "epoch": 4.2242431640625e-05, + "model_forward_time": 0.025059223175048828, + "step": 27684 + }, + { + "epoch": 4.2242431640625e-05, + "step": 27684, + "training_step_time": 0.19494891166687012 + }, + { + "epoch": 4.224395751953125e-05, + "model_forward_time": 0.02438521385192871, + "step": 27685 + }, + { + "epoch": 4.224395751953125e-05, + "step": 27685, + "training_step_time": 0.1343832015991211 + }, + { + "epoch": 4.22454833984375e-05, + "model_forward_time": 0.02482318878173828, + "step": 27686 + }, + { + "epoch": 4.22454833984375e-05, + "step": 27686, + "training_step_time": 0.10733890533447266 + }, + { + "epoch": 4.224700927734375e-05, + "model_forward_time": 0.025352001190185547, + "step": 27687 + }, + { + "epoch": 4.224700927734375e-05, + "step": 27687, + "training_step_time": 0.11685633659362793 + }, + { + "epoch": 4.224853515625e-05, + "model_forward_time": 0.02523946762084961, + "step": 27688 + }, + { + "epoch": 4.224853515625e-05, + "step": 27688, + "training_step_time": 0.10916829109191895 + }, + { + "epoch": 4.225006103515625e-05, + "model_forward_time": 0.025341272354125977, + "step": 27689 + }, + { + "epoch": 4.225006103515625e-05, + "step": 27689, + "training_step_time": 0.10696029663085938 + }, + { + "epoch": 4.22515869140625e-05, + "grad_norm": 0.04706356301903725, + "learning_rate": 1.6122267976168781e-06, + "loss": 0.0028, + "step": 27690 + }, + { + "epoch": 4.22515869140625e-05, + "model_forward_time": 0.025202274322509766, + "step": 27690 + }, + { + "epoch": 4.22515869140625e-05, + "step": 27690, + "training_step_time": 0.2031550407409668 + }, + { + "epoch": 4.225311279296875e-05, + "model_forward_time": 0.02489185333251953, + "step": 27691 + }, + { + "epoch": 4.225311279296875e-05, + "step": 27691, + "training_step_time": 0.10516738891601562 + }, + { + "epoch": 4.2254638671875e-05, + "model_forward_time": 0.024877309799194336, + "step": 27692 + }, + { + "epoch": 4.2254638671875e-05, + "step": 27692, + "training_step_time": 0.11028456687927246 + }, + { + "epoch": 4.225616455078125e-05, + "model_forward_time": 0.026930570602416992, + "step": 27693 + }, + { + "epoch": 4.225616455078125e-05, + "step": 27693, + "training_step_time": 0.11659812927246094 + }, + { + "epoch": 4.22576904296875e-05, + "model_forward_time": 0.02515101432800293, + "step": 27694 + }, + { + "epoch": 4.22576904296875e-05, + "step": 27694, + "training_step_time": 0.10611438751220703 + }, + { + "epoch": 4.225921630859375e-05, + "model_forward_time": 0.025331974029541016, + "step": 27695 + }, + { + "epoch": 4.225921630859375e-05, + "step": 27695, + "training_step_time": 0.1058206558227539 + }, + { + "epoch": 4.22607421875e-05, + "model_forward_time": 0.025413990020751953, + "step": 27696 + }, + { + "epoch": 4.22607421875e-05, + "step": 27696, + "training_step_time": 0.10953187942504883 + }, + { + "epoch": 4.226226806640625e-05, + "model_forward_time": 0.02500462532043457, + "step": 27697 + }, + { + "epoch": 4.226226806640625e-05, + "step": 27697, + "training_step_time": 0.10342764854431152 + }, + { + "epoch": 4.22637939453125e-05, + "model_forward_time": 0.02593207359313965, + "step": 27698 + }, + { + "epoch": 4.22637939453125e-05, + "step": 27698, + "training_step_time": 0.10918784141540527 + }, + { + "epoch": 4.226531982421875e-05, + "model_forward_time": 0.024413347244262695, + "step": 27699 + }, + { + "epoch": 4.226531982421875e-05, + "step": 27699, + "training_step_time": 0.10820317268371582 + }, + { + "epoch": 4.2266845703125e-05, + "grad_norm": 0.08612319827079773, + "learning_rate": 1.5983730281392662e-06, + "loss": 0.0047, + "step": 27700 + }, + { + "epoch": 4.2266845703125e-05, + "model_forward_time": 0.02556633949279785, + "step": 27700 + }, + { + "epoch": 4.2266845703125e-05, + "step": 27700, + "training_step_time": 0.11347150802612305 + }, + { + "epoch": 4.226837158203125e-05, + "model_forward_time": 0.02498316764831543, + "step": 27701 + }, + { + "epoch": 4.226837158203125e-05, + "step": 27701, + "training_step_time": 0.10439515113830566 + }, + { + "epoch": 4.22698974609375e-05, + "model_forward_time": 0.024886131286621094, + "step": 27702 + }, + { + "epoch": 4.22698974609375e-05, + "step": 27702, + "training_step_time": 0.10900568962097168 + }, + { + "epoch": 4.227142333984375e-05, + "model_forward_time": 0.025036096572875977, + "step": 27703 + }, + { + "epoch": 4.227142333984375e-05, + "step": 27703, + "training_step_time": 0.10880541801452637 + }, + { + "epoch": 4.227294921875e-05, + "model_forward_time": 0.02498912811279297, + "step": 27704 + }, + { + "epoch": 4.227294921875e-05, + "step": 27704, + "training_step_time": 0.10824990272521973 + }, + { + "epoch": 4.227447509765625e-05, + "model_forward_time": 0.025271177291870117, + "step": 27705 + }, + { + "epoch": 4.227447509765625e-05, + "step": 27705, + "training_step_time": 0.10960817337036133 + }, + { + "epoch": 4.22760009765625e-05, + "model_forward_time": 0.025146007537841797, + "step": 27706 + }, + { + "epoch": 4.22760009765625e-05, + "step": 27706, + "training_step_time": 0.10738110542297363 + }, + { + "epoch": 4.227752685546875e-05, + "model_forward_time": 0.025606870651245117, + "step": 27707 + }, + { + "epoch": 4.227752685546875e-05, + "step": 27707, + "training_step_time": 0.10852336883544922 + }, + { + "epoch": 4.2279052734375e-05, + "model_forward_time": 0.02523183822631836, + "step": 27708 + }, + { + "epoch": 4.2279052734375e-05, + "step": 27708, + "training_step_time": 0.10644960403442383 + }, + { + "epoch": 4.228057861328125e-05, + "model_forward_time": 0.02565455436706543, + "step": 27709 + }, + { + "epoch": 4.228057861328125e-05, + "step": 27709, + "training_step_time": 0.1053462028503418 + }, + { + "epoch": 4.22821044921875e-05, + "grad_norm": 0.12188015878200531, + "learning_rate": 1.5845780712023973e-06, + "loss": 0.0025, + "step": 27710 + }, + { + "epoch": 4.22821044921875e-05, + "model_forward_time": 0.025073528289794922, + "step": 27710 + }, + { + "epoch": 4.22821044921875e-05, + "step": 27710, + "training_step_time": 0.10300779342651367 + }, + { + "epoch": 4.228363037109375e-05, + "model_forward_time": 0.024135351181030273, + "step": 27711 + }, + { + "epoch": 4.228363037109375e-05, + "step": 27711, + "training_step_time": 0.14708924293518066 + }, + { + "epoch": 4.228515625e-05, + "model_forward_time": 0.024503231048583984, + "step": 27712 + }, + { + "epoch": 4.228515625e-05, + "step": 27712, + "training_step_time": 0.15679025650024414 + }, + { + "epoch": 4.228668212890625e-05, + "model_forward_time": 0.024532318115234375, + "step": 27713 + }, + { + "epoch": 4.228668212890625e-05, + "step": 27713, + "training_step_time": 0.17716169357299805 + }, + { + "epoch": 4.22882080078125e-05, + "model_forward_time": 0.0243985652923584, + "step": 27714 + }, + { + "epoch": 4.22882080078125e-05, + "step": 27714, + "training_step_time": 0.16577982902526855 + }, + { + "epoch": 4.228973388671875e-05, + "model_forward_time": 0.024974584579467773, + "step": 27715 + }, + { + "epoch": 4.228973388671875e-05, + "step": 27715, + "training_step_time": 0.18579697608947754 + }, + { + "epoch": 4.2291259765625e-05, + "model_forward_time": 0.023831605911254883, + "step": 27716 + }, + { + "epoch": 4.2291259765625e-05, + "step": 27716, + "training_step_time": 0.1444697380065918 + }, + { + "epoch": 4.229278564453125e-05, + "model_forward_time": 0.023415327072143555, + "step": 27717 + }, + { + "epoch": 4.229278564453125e-05, + "step": 27717, + "training_step_time": 0.5963430404663086 + }, + { + "epoch": 4.22943115234375e-05, + "model_forward_time": 0.021768808364868164, + "step": 27718 + }, + { + "epoch": 4.22943115234375e-05, + "step": 27718, + "training_step_time": 0.10294556617736816 + }, + { + "epoch": 4.229583740234375e-05, + "model_forward_time": 0.023662805557250977, + "step": 27719 + }, + { + "epoch": 4.229583740234375e-05, + "step": 27719, + "training_step_time": 0.10234904289245605 + }, + { + "epoch": 4.229736328125e-05, + "grad_norm": 0.025142701342701912, + "learning_rate": 1.5708419435684462e-06, + "loss": 0.003, + "step": 27720 + }, + { + "epoch": 4.229736328125e-05, + "model_forward_time": 0.02426767349243164, + "step": 27720 + }, + { + "epoch": 4.229736328125e-05, + "step": 27720, + "training_step_time": 0.10664916038513184 + }, + { + "epoch": 4.229888916015625e-05, + "model_forward_time": 0.024670124053955078, + "step": 27721 + }, + { + "epoch": 4.229888916015625e-05, + "step": 27721, + "training_step_time": 0.10760378837585449 + }, + { + "epoch": 4.23004150390625e-05, + "model_forward_time": 0.02463984489440918, + "step": 27722 + }, + { + "epoch": 4.23004150390625e-05, + "step": 27722, + "training_step_time": 0.11524438858032227 + }, + { + "epoch": 4.230194091796875e-05, + "model_forward_time": 0.02450728416442871, + "step": 27723 + }, + { + "epoch": 4.230194091796875e-05, + "step": 27723, + "training_step_time": 0.10560369491577148 + }, + { + "epoch": 4.2303466796875e-05, + "model_forward_time": 0.024857282638549805, + "step": 27724 + }, + { + "epoch": 4.2303466796875e-05, + "step": 27724, + "training_step_time": 0.10944819450378418 + }, + { + "epoch": 4.230499267578125e-05, + "model_forward_time": 0.02480030059814453, + "step": 27725 + }, + { + "epoch": 4.230499267578125e-05, + "step": 27725, + "training_step_time": 0.10605478286743164 + }, + { + "epoch": 4.23065185546875e-05, + "model_forward_time": 0.025127410888671875, + "step": 27726 + }, + { + "epoch": 4.23065185546875e-05, + "step": 27726, + "training_step_time": 0.10770082473754883 + }, + { + "epoch": 4.230804443359375e-05, + "model_forward_time": 0.027119159698486328, + "step": 27727 + }, + { + "epoch": 4.230804443359375e-05, + "step": 27727, + "training_step_time": 0.13924479484558105 + }, + { + "epoch": 4.23095703125e-05, + "model_forward_time": 0.024493694305419922, + "step": 27728 + }, + { + "epoch": 4.23095703125e-05, + "step": 27728, + "training_step_time": 0.14240503311157227 + }, + { + "epoch": 4.231109619140625e-05, + "model_forward_time": 0.023777008056640625, + "step": 27729 + }, + { + "epoch": 4.231109619140625e-05, + "step": 27729, + "training_step_time": 0.10647845268249512 + }, + { + "epoch": 4.23126220703125e-05, + "grad_norm": 0.10114025324583054, + "learning_rate": 1.5571646619281066e-06, + "loss": 0.0027, + "step": 27730 + }, + { + "epoch": 4.23126220703125e-05, + "model_forward_time": 0.024282455444335938, + "step": 27730 + }, + { + "epoch": 4.23126220703125e-05, + "step": 27730, + "training_step_time": 0.1061711311340332 + }, + { + "epoch": 4.231414794921875e-05, + "model_forward_time": 0.024338483810424805, + "step": 27731 + }, + { + "epoch": 4.231414794921875e-05, + "step": 27731, + "training_step_time": 0.11275649070739746 + }, + { + "epoch": 4.2315673828125e-05, + "model_forward_time": 0.025748014450073242, + "step": 27732 + }, + { + "epoch": 4.2315673828125e-05, + "step": 27732, + "training_step_time": 0.11031794548034668 + }, + { + "epoch": 4.231719970703125e-05, + "model_forward_time": 0.025377273559570312, + "step": 27733 + }, + { + "epoch": 4.231719970703125e-05, + "step": 27733, + "training_step_time": 0.18670272827148438 + }, + { + "epoch": 4.23187255859375e-05, + "model_forward_time": 0.02460026741027832, + "step": 27734 + }, + { + "epoch": 4.23187255859375e-05, + "step": 27734, + "training_step_time": 0.10669660568237305 + }, + { + "epoch": 4.232025146484375e-05, + "model_forward_time": 0.02521491050720215, + "step": 27735 + }, + { + "epoch": 4.232025146484375e-05, + "step": 27735, + "training_step_time": 0.1014094352722168 + }, + { + "epoch": 4.232177734375e-05, + "model_forward_time": 0.025114774703979492, + "step": 27736 + }, + { + "epoch": 4.232177734375e-05, + "step": 27736, + "training_step_time": 0.10554051399230957 + }, + { + "epoch": 4.232330322265625e-05, + "model_forward_time": 0.024494409561157227, + "step": 27737 + }, + { + "epoch": 4.232330322265625e-05, + "step": 27737, + "training_step_time": 0.10537910461425781 + }, + { + "epoch": 4.23248291015625e-05, + "model_forward_time": 0.024053096771240234, + "step": 27738 + }, + { + "epoch": 4.23248291015625e-05, + "step": 27738, + "training_step_time": 0.10490655899047852 + }, + { + "epoch": 4.232635498046875e-05, + "model_forward_time": 0.02542257308959961, + "step": 27739 + }, + { + "epoch": 4.232635498046875e-05, + "step": 27739, + "training_step_time": 0.16970491409301758 + }, + { + "epoch": 4.2327880859375e-05, + "grad_norm": 0.03618144243955612, + "learning_rate": 1.5435462429005675e-06, + "loss": 0.0036, + "step": 27740 + }, + { + "epoch": 4.2327880859375e-05, + "model_forward_time": 0.02414989471435547, + "step": 27740 + }, + { + "epoch": 4.2327880859375e-05, + "step": 27740, + "training_step_time": 0.17909622192382812 + }, + { + "epoch": 4.232940673828125e-05, + "model_forward_time": 0.024317502975463867, + "step": 27741 + }, + { + "epoch": 4.232940673828125e-05, + "step": 27741, + "training_step_time": 0.16564083099365234 + }, + { + "epoch": 4.23309326171875e-05, + "model_forward_time": 0.024778127670288086, + "step": 27742 + }, + { + "epoch": 4.23309326171875e-05, + "step": 27742, + "training_step_time": 0.15216350555419922 + }, + { + "epoch": 4.233245849609375e-05, + "model_forward_time": 0.024600744247436523, + "step": 27743 + }, + { + "epoch": 4.233245849609375e-05, + "step": 27743, + "training_step_time": 0.1533827781677246 + }, + { + "epoch": 4.2333984375e-05, + "model_forward_time": 0.02443981170654297, + "step": 27744 + }, + { + "epoch": 4.2333984375e-05, + "step": 27744, + "training_step_time": 0.1368401050567627 + }, + { + "epoch": 4.233551025390625e-05, + "model_forward_time": 0.025228023529052734, + "step": 27745 + }, + { + "epoch": 4.233551025390625e-05, + "step": 27745, + "training_step_time": 0.131392240524292 + }, + { + "epoch": 4.23370361328125e-05, + "model_forward_time": 0.02487659454345703, + "step": 27746 + }, + { + "epoch": 4.23370361328125e-05, + "step": 27746, + "training_step_time": 0.1256556510925293 + }, + { + "epoch": 4.233856201171875e-05, + "model_forward_time": 0.025441646575927734, + "step": 27747 + }, + { + "epoch": 4.233856201171875e-05, + "step": 27747, + "training_step_time": 0.12474560737609863 + }, + { + "epoch": 4.2340087890625e-05, + "model_forward_time": 0.02576446533203125, + "step": 27748 + }, + { + "epoch": 4.2340087890625e-05, + "step": 27748, + "training_step_time": 0.11660218238830566 + }, + { + "epoch": 4.234161376953125e-05, + "model_forward_time": 0.02531886100769043, + "step": 27749 + }, + { + "epoch": 4.234161376953125e-05, + "step": 27749, + "training_step_time": 0.11276483535766602 + }, + { + "epoch": 4.23431396484375e-05, + "grad_norm": 0.042110636830329895, + "learning_rate": 1.5299867030334814e-06, + "loss": 0.0032, + "step": 27750 + }, + { + "epoch": 4.23431396484375e-05, + "model_forward_time": 0.025428056716918945, + "step": 27750 + }, + { + "epoch": 4.23431396484375e-05, + "step": 27750, + "training_step_time": 0.11369776725769043 + }, + { + "epoch": 4.234466552734375e-05, + "model_forward_time": 0.02568507194519043, + "step": 27751 + }, + { + "epoch": 4.234466552734375e-05, + "step": 27751, + "training_step_time": 0.10561418533325195 + }, + { + "epoch": 4.234619140625e-05, + "model_forward_time": 0.02510523796081543, + "step": 27752 + }, + { + "epoch": 4.234619140625e-05, + "step": 27752, + "training_step_time": 0.14813947677612305 + }, + { + "epoch": 4.234771728515625e-05, + "model_forward_time": 0.025078773498535156, + "step": 27753 + }, + { + "epoch": 4.234771728515625e-05, + "step": 27753, + "training_step_time": 0.1586003303527832 + }, + { + "epoch": 4.23492431640625e-05, + "model_forward_time": 0.024753093719482422, + "step": 27754 + }, + { + "epoch": 4.23492431640625e-05, + "step": 27754, + "training_step_time": 0.1386735439300537 + }, + { + "epoch": 4.235076904296875e-05, + "model_forward_time": 0.02470088005065918, + "step": 27755 + }, + { + "epoch": 4.235076904296875e-05, + "step": 27755, + "training_step_time": 0.13225913047790527 + }, + { + "epoch": 4.2352294921875e-05, + "model_forward_time": 0.025704383850097656, + "step": 27756 + }, + { + "epoch": 4.2352294921875e-05, + "step": 27756, + "training_step_time": 0.17214250564575195 + }, + { + "epoch": 4.235382080078125e-05, + "model_forward_time": 0.025185346603393555, + "step": 27757 + }, + { + "epoch": 4.235382080078125e-05, + "step": 27757, + "training_step_time": 0.17632579803466797 + }, + { + "epoch": 4.23553466796875e-05, + "model_forward_time": 0.02440667152404785, + "step": 27758 + }, + { + "epoch": 4.23553466796875e-05, + "step": 27758, + "training_step_time": 0.1441481113433838 + }, + { + "epoch": 4.235687255859375e-05, + "model_forward_time": 0.02455306053161621, + "step": 27759 + }, + { + "epoch": 4.235687255859375e-05, + "step": 27759, + "training_step_time": 0.1085672378540039 + }, + { + "epoch": 4.23583984375e-05, + "grad_norm": 0.04515612870454788, + "learning_rate": 1.516486058802974e-06, + "loss": 0.0027, + "step": 27760 + }, + { + "epoch": 4.23583984375e-05, + "model_forward_time": 0.024704933166503906, + "step": 27760 + }, + { + "epoch": 4.23583984375e-05, + "step": 27760, + "training_step_time": 0.11039590835571289 + }, + { + "epoch": 4.235992431640625e-05, + "model_forward_time": 0.025605201721191406, + "step": 27761 + }, + { + "epoch": 4.235992431640625e-05, + "step": 27761, + "training_step_time": 0.10695195198059082 + }, + { + "epoch": 4.23614501953125e-05, + "model_forward_time": 0.025614500045776367, + "step": 27762 + }, + { + "epoch": 4.23614501953125e-05, + "step": 27762, + "training_step_time": 0.10595226287841797 + }, + { + "epoch": 4.236297607421875e-05, + "model_forward_time": 0.025545358657836914, + "step": 27763 + }, + { + "epoch": 4.236297607421875e-05, + "step": 27763, + "training_step_time": 0.11027359962463379 + }, + { + "epoch": 4.2364501953125e-05, + "model_forward_time": 0.025343656539916992, + "step": 27764 + }, + { + "epoch": 4.2364501953125e-05, + "step": 27764, + "training_step_time": 0.10791611671447754 + }, + { + "epoch": 4.236602783203125e-05, + "model_forward_time": 0.025295019149780273, + "step": 27765 + }, + { + "epoch": 4.236602783203125e-05, + "step": 27765, + "training_step_time": 0.10526633262634277 + }, + { + "epoch": 4.23675537109375e-05, + "model_forward_time": 0.025359630584716797, + "step": 27766 + }, + { + "epoch": 4.23675537109375e-05, + "step": 27766, + "training_step_time": 0.10560274124145508 + }, + { + "epoch": 4.236907958984375e-05, + "model_forward_time": 0.025148630142211914, + "step": 27767 + }, + { + "epoch": 4.236907958984375e-05, + "step": 27767, + "training_step_time": 0.1112675666809082 + }, + { + "epoch": 4.237060546875e-05, + "model_forward_time": 0.02546858787536621, + "step": 27768 + }, + { + "epoch": 4.237060546875e-05, + "step": 27768, + "training_step_time": 0.10685563087463379 + }, + { + "epoch": 4.237213134765625e-05, + "model_forward_time": 0.02554941177368164, + "step": 27769 + }, + { + "epoch": 4.237213134765625e-05, + "step": 27769, + "training_step_time": 0.10724282264709473 + }, + { + "epoch": 4.23736572265625e-05, + "grad_norm": 0.09268509596586227, + "learning_rate": 1.5030443266136118e-06, + "loss": 0.0118, + "step": 27770 + }, + { + "epoch": 4.23736572265625e-05, + "model_forward_time": 0.025165081024169922, + "step": 27770 + }, + { + "epoch": 4.23736572265625e-05, + "step": 27770, + "training_step_time": 0.10873174667358398 + }, + { + "epoch": 4.237518310546875e-05, + "model_forward_time": 0.026018142700195312, + "step": 27771 + }, + { + "epoch": 4.237518310546875e-05, + "step": 27771, + "training_step_time": 0.10544133186340332 + }, + { + "epoch": 4.2376708984375e-05, + "model_forward_time": 0.025185585021972656, + "step": 27772 + }, + { + "epoch": 4.2376708984375e-05, + "step": 27772, + "training_step_time": 0.12476205825805664 + }, + { + "epoch": 4.237823486328125e-05, + "model_forward_time": 0.025534391403198242, + "step": 27773 + }, + { + "epoch": 4.237823486328125e-05, + "step": 27773, + "training_step_time": 0.14030790328979492 + }, + { + "epoch": 4.23797607421875e-05, + "model_forward_time": 0.025883197784423828, + "step": 27774 + }, + { + "epoch": 4.23797607421875e-05, + "step": 27774, + "training_step_time": 0.10585474967956543 + }, + { + "epoch": 4.238128662109375e-05, + "model_forward_time": 0.025811433792114258, + "step": 27775 + }, + { + "epoch": 4.238128662109375e-05, + "step": 27775, + "training_step_time": 0.10695052146911621 + }, + { + "epoch": 4.23828125e-05, + "model_forward_time": 0.024994373321533203, + "step": 27776 + }, + { + "epoch": 4.23828125e-05, + "step": 27776, + "training_step_time": 0.1168055534362793 + }, + { + "epoch": 4.238433837890625e-05, + "model_forward_time": 0.025405406951904297, + "step": 27777 + }, + { + "epoch": 4.238433837890625e-05, + "step": 27777, + "training_step_time": 0.10906100273132324 + }, + { + "epoch": 4.23858642578125e-05, + "model_forward_time": 0.02455282211303711, + "step": 27778 + }, + { + "epoch": 4.23858642578125e-05, + "step": 27778, + "training_step_time": 0.19130706787109375 + }, + { + "epoch": 4.238739013671875e-05, + "model_forward_time": 0.02480626106262207, + "step": 27779 + }, + { + "epoch": 4.238739013671875e-05, + "step": 27779, + "training_step_time": 0.1045677661895752 + }, + { + "epoch": 4.2388916015625e-05, + "grad_norm": 0.05210455507040024, + "learning_rate": 1.4896615227983468e-06, + "loss": 0.0039, + "step": 27780 + }, + { + "epoch": 4.2388916015625e-05, + "model_forward_time": 0.0239102840423584, + "step": 27780 + }, + { + "epoch": 4.2388916015625e-05, + "step": 27780, + "training_step_time": 0.10227751731872559 + }, + { + "epoch": 4.239044189453125e-05, + "model_forward_time": 0.02455878257751465, + "step": 27781 + }, + { + "epoch": 4.239044189453125e-05, + "step": 27781, + "training_step_time": 0.10583376884460449 + }, + { + "epoch": 4.23919677734375e-05, + "model_forward_time": 0.025266647338867188, + "step": 27782 + }, + { + "epoch": 4.23919677734375e-05, + "step": 27782, + "training_step_time": 0.1070241928100586 + }, + { + "epoch": 4.239349365234375e-05, + "model_forward_time": 0.025438308715820312, + "step": 27783 + }, + { + "epoch": 4.239349365234375e-05, + "step": 27783, + "training_step_time": 0.10763788223266602 + }, + { + "epoch": 4.239501953125e-05, + "model_forward_time": 0.024671077728271484, + "step": 27784 + }, + { + "epoch": 4.239501953125e-05, + "step": 27784, + "training_step_time": 0.10678482055664062 + }, + { + "epoch": 4.239654541015625e-05, + "model_forward_time": 0.025286197662353516, + "step": 27785 + }, + { + "epoch": 4.239654541015625e-05, + "step": 27785, + "training_step_time": 0.10775494575500488 + }, + { + "epoch": 4.23980712890625e-05, + "model_forward_time": 0.025350093841552734, + "step": 27786 + }, + { + "epoch": 4.23980712890625e-05, + "step": 27786, + "training_step_time": 0.11060523986816406 + }, + { + "epoch": 4.239959716796875e-05, + "model_forward_time": 0.02554488182067871, + "step": 27787 + }, + { + "epoch": 4.239959716796875e-05, + "step": 27787, + "training_step_time": 0.10510087013244629 + }, + { + "epoch": 4.2401123046875e-05, + "model_forward_time": 0.02471160888671875, + "step": 27788 + }, + { + "epoch": 4.2401123046875e-05, + "step": 27788, + "training_step_time": 0.10821223258972168 + }, + { + "epoch": 4.240264892578125e-05, + "model_forward_time": 0.025285720825195312, + "step": 27789 + }, + { + "epoch": 4.240264892578125e-05, + "step": 27789, + "training_step_time": 0.10730099678039551 + }, + { + "epoch": 4.24041748046875e-05, + "grad_norm": 0.1868578940629959, + "learning_rate": 1.4763376636185599e-06, + "loss": 0.0055, + "step": 27790 + }, + { + "epoch": 4.24041748046875e-05, + "model_forward_time": 0.025423526763916016, + "step": 27790 + }, + { + "epoch": 4.24041748046875e-05, + "step": 27790, + "training_step_time": 0.10583901405334473 + }, + { + "epoch": 4.240570068359375e-05, + "model_forward_time": 0.025251150131225586, + "step": 27791 + }, + { + "epoch": 4.240570068359375e-05, + "step": 27791, + "training_step_time": 0.10694384574890137 + }, + { + "epoch": 4.24072265625e-05, + "model_forward_time": 0.025606870651245117, + "step": 27792 + }, + { + "epoch": 4.24072265625e-05, + "step": 27792, + "training_step_time": 0.10620403289794922 + }, + { + "epoch": 4.240875244140625e-05, + "model_forward_time": 0.02562427520751953, + "step": 27793 + }, + { + "epoch": 4.240875244140625e-05, + "step": 27793, + "training_step_time": 0.11023759841918945 + }, + { + "epoch": 4.24102783203125e-05, + "model_forward_time": 0.025459766387939453, + "step": 27794 + }, + { + "epoch": 4.24102783203125e-05, + "step": 27794, + "training_step_time": 0.10524201393127441 + }, + { + "epoch": 4.241180419921875e-05, + "model_forward_time": 0.025812387466430664, + "step": 27795 + }, + { + "epoch": 4.241180419921875e-05, + "step": 27795, + "training_step_time": 0.10813498497009277 + }, + { + "epoch": 4.2413330078125e-05, + "model_forward_time": 0.025298118591308594, + "step": 27796 + }, + { + "epoch": 4.2413330078125e-05, + "step": 27796, + "training_step_time": 0.10708022117614746 + }, + { + "epoch": 4.241485595703125e-05, + "model_forward_time": 0.025615692138671875, + "step": 27797 + }, + { + "epoch": 4.241485595703125e-05, + "step": 27797, + "training_step_time": 0.10777115821838379 + }, + { + "epoch": 4.24163818359375e-05, + "model_forward_time": 0.02490401268005371, + "step": 27798 + }, + { + "epoch": 4.24163818359375e-05, + "step": 27798, + "training_step_time": 0.1049349308013916 + }, + { + "epoch": 4.241790771484375e-05, + "model_forward_time": 0.024810791015625, + "step": 27799 + }, + { + "epoch": 4.241790771484375e-05, + "step": 27799, + "training_step_time": 0.149885892868042 + }, + { + "epoch": 4.241943359375e-05, + "grad_norm": 0.07447796314954758, + "learning_rate": 1.463072765264001e-06, + "loss": 0.0049, + "step": 27800 + }, + { + "epoch": 4.241943359375e-05, + "model_forward_time": 0.02437424659729004, + "step": 27800 + }, + { + "epoch": 4.241943359375e-05, + "step": 27800, + "training_step_time": 0.15484380722045898 + }, + { + "epoch": 4.242095947265625e-05, + "model_forward_time": 0.024450302124023438, + "step": 27801 + }, + { + "epoch": 4.242095947265625e-05, + "step": 27801, + "training_step_time": 0.13563823699951172 + }, + { + "epoch": 4.24224853515625e-05, + "model_forward_time": 0.02437305450439453, + "step": 27802 + }, + { + "epoch": 4.24224853515625e-05, + "step": 27802, + "training_step_time": 0.21140241622924805 + }, + { + "epoch": 4.242401123046875e-05, + "model_forward_time": 0.02472543716430664, + "step": 27803 + }, + { + "epoch": 4.242401123046875e-05, + "step": 27803, + "training_step_time": 0.12787652015686035 + }, + { + "epoch": 4.2425537109375e-05, + "model_forward_time": 0.02444911003112793, + "step": 27804 + }, + { + "epoch": 4.2425537109375e-05, + "step": 27804, + "training_step_time": 0.21919870376586914 + }, + { + "epoch": 4.242706298828125e-05, + "model_forward_time": 0.024511337280273438, + "step": 27805 + }, + { + "epoch": 4.242706298828125e-05, + "step": 27805, + "training_step_time": 0.17590641975402832 + }, + { + "epoch": 4.24285888671875e-05, + "model_forward_time": 0.02406144142150879, + "step": 27806 + }, + { + "epoch": 4.24285888671875e-05, + "step": 27806, + "training_step_time": 0.19149518013000488 + }, + { + "epoch": 4.243011474609375e-05, + "model_forward_time": 0.02446889877319336, + "step": 27807 + }, + { + "epoch": 4.243011474609375e-05, + "step": 27807, + "training_step_time": 0.10854840278625488 + }, + { + "epoch": 4.2431640625e-05, + "model_forward_time": 0.025229692459106445, + "step": 27808 + }, + { + "epoch": 4.2431640625e-05, + "step": 27808, + "training_step_time": 0.10677576065063477 + }, + { + "epoch": 4.243316650390625e-05, + "model_forward_time": 0.0252227783203125, + "step": 27809 + }, + { + "epoch": 4.243316650390625e-05, + "step": 27809, + "training_step_time": 0.10595273971557617 + }, + { + "epoch": 4.24346923828125e-05, + "grad_norm": 0.42719563841819763, + "learning_rate": 1.4498668438527597e-06, + "loss": 0.0082, + "step": 27810 + }, + { + "epoch": 4.24346923828125e-05, + "model_forward_time": 0.02503657341003418, + "step": 27810 + }, + { + "epoch": 4.24346923828125e-05, + "step": 27810, + "training_step_time": 0.1055910587310791 + }, + { + "epoch": 4.243621826171875e-05, + "model_forward_time": 0.02529597282409668, + "step": 27811 + }, + { + "epoch": 4.243621826171875e-05, + "step": 27811, + "training_step_time": 0.10571026802062988 + }, + { + "epoch": 4.2437744140625e-05, + "model_forward_time": 0.02472662925720215, + "step": 27812 + }, + { + "epoch": 4.2437744140625e-05, + "step": 27812, + "training_step_time": 0.10520124435424805 + }, + { + "epoch": 4.243927001953125e-05, + "model_forward_time": 0.02493000030517578, + "step": 27813 + }, + { + "epoch": 4.243927001953125e-05, + "step": 27813, + "training_step_time": 0.10967206954956055 + }, + { + "epoch": 4.24407958984375e-05, + "model_forward_time": 0.025262832641601562, + "step": 27814 + }, + { + "epoch": 4.24407958984375e-05, + "step": 27814, + "training_step_time": 0.1060795783996582 + }, + { + "epoch": 4.244232177734375e-05, + "model_forward_time": 0.025254011154174805, + "step": 27815 + }, + { + "epoch": 4.244232177734375e-05, + "step": 27815, + "training_step_time": 0.10849165916442871 + }, + { + "epoch": 4.244384765625e-05, + "model_forward_time": 0.025343894958496094, + "step": 27816 + }, + { + "epoch": 4.244384765625e-05, + "step": 27816, + "training_step_time": 0.10576343536376953 + }, + { + "epoch": 4.244537353515625e-05, + "model_forward_time": 0.025491714477539062, + "step": 27817 + }, + { + "epoch": 4.244537353515625e-05, + "step": 27817, + "training_step_time": 0.10484504699707031 + }, + { + "epoch": 4.24468994140625e-05, + "model_forward_time": 0.02534174919128418, + "step": 27818 + }, + { + "epoch": 4.24468994140625e-05, + "step": 27818, + "training_step_time": 0.12265968322753906 + }, + { + "epoch": 4.244842529296875e-05, + "model_forward_time": 0.025585412979125977, + "step": 27819 + }, + { + "epoch": 4.244842529296875e-05, + "step": 27819, + "training_step_time": 0.13701558113098145 + }, + { + "epoch": 4.2449951171875e-05, + "grad_norm": 0.029894618317484856, + "learning_rate": 1.4367199154312783e-06, + "loss": 0.0025, + "step": 27820 + }, + { + "epoch": 4.2449951171875e-05, + "model_forward_time": 0.027461528778076172, + "step": 27820 + }, + { + "epoch": 4.2449951171875e-05, + "step": 27820, + "training_step_time": 0.10947537422180176 + }, + { + "epoch": 4.245147705078125e-05, + "model_forward_time": 0.02530980110168457, + "step": 27821 + }, + { + "epoch": 4.245147705078125e-05, + "step": 27821, + "training_step_time": 0.10671806335449219 + }, + { + "epoch": 4.24530029296875e-05, + "model_forward_time": 0.02665114402770996, + "step": 27822 + }, + { + "epoch": 4.24530029296875e-05, + "step": 27822, + "training_step_time": 0.10999488830566406 + }, + { + "epoch": 4.245452880859375e-05, + "model_forward_time": 0.02570629119873047, + "step": 27823 + }, + { + "epoch": 4.245452880859375e-05, + "step": 27823, + "training_step_time": 0.1114661693572998 + }, + { + "epoch": 4.24560546875e-05, + "model_forward_time": 0.025671720504760742, + "step": 27824 + }, + { + "epoch": 4.24560546875e-05, + "step": 27824, + "training_step_time": 0.20056700706481934 + }, + { + "epoch": 4.245758056640625e-05, + "model_forward_time": 0.024576902389526367, + "step": 27825 + }, + { + "epoch": 4.245758056640625e-05, + "step": 27825, + "training_step_time": 0.10237884521484375 + }, + { + "epoch": 4.24591064453125e-05, + "model_forward_time": 0.024979352951049805, + "step": 27826 + }, + { + "epoch": 4.24591064453125e-05, + "step": 27826, + "training_step_time": 0.10492539405822754 + }, + { + "epoch": 4.246063232421875e-05, + "model_forward_time": 0.024428367614746094, + "step": 27827 + }, + { + "epoch": 4.246063232421875e-05, + "step": 27827, + "training_step_time": 0.10420584678649902 + }, + { + "epoch": 4.2462158203125e-05, + "model_forward_time": 0.0242002010345459, + "step": 27828 + }, + { + "epoch": 4.2462158203125e-05, + "step": 27828, + "training_step_time": 0.10303044319152832 + }, + { + "epoch": 4.246368408203125e-05, + "model_forward_time": 0.024187803268432617, + "step": 27829 + }, + { + "epoch": 4.246368408203125e-05, + "step": 27829, + "training_step_time": 0.10311603546142578 + }, + { + "epoch": 4.24652099609375e-05, + "grad_norm": 0.14968939125537872, + "learning_rate": 1.4236319959743227e-06, + "loss": 0.0038, + "step": 27830 + }, + { + "epoch": 4.24652099609375e-05, + "model_forward_time": 0.02434086799621582, + "step": 27830 + }, + { + "epoch": 4.24652099609375e-05, + "step": 27830, + "training_step_time": 0.10735964775085449 + }, + { + "epoch": 4.246673583984375e-05, + "model_forward_time": 0.025661945343017578, + "step": 27831 + }, + { + "epoch": 4.246673583984375e-05, + "step": 27831, + "training_step_time": 0.11038899421691895 + }, + { + "epoch": 4.246826171875e-05, + "model_forward_time": 0.025493860244750977, + "step": 27832 + }, + { + "epoch": 4.246826171875e-05, + "step": 27832, + "training_step_time": 0.10648226737976074 + }, + { + "epoch": 4.246978759765625e-05, + "model_forward_time": 0.025682449340820312, + "step": 27833 + }, + { + "epoch": 4.246978759765625e-05, + "step": 27833, + "training_step_time": 0.10470938682556152 + }, + { + "epoch": 4.24713134765625e-05, + "model_forward_time": 0.025580406188964844, + "step": 27834 + }, + { + "epoch": 4.24713134765625e-05, + "step": 27834, + "training_step_time": 0.10486507415771484 + }, + { + "epoch": 4.247283935546875e-05, + "model_forward_time": 0.025501012802124023, + "step": 27835 + }, + { + "epoch": 4.247283935546875e-05, + "step": 27835, + "training_step_time": 0.1041867733001709 + }, + { + "epoch": 4.2474365234375e-05, + "model_forward_time": 0.0249173641204834, + "step": 27836 + }, + { + "epoch": 4.2474365234375e-05, + "step": 27836, + "training_step_time": 0.10445499420166016 + }, + { + "epoch": 4.247589111328125e-05, + "model_forward_time": 0.024964332580566406, + "step": 27837 + }, + { + "epoch": 4.247589111328125e-05, + "step": 27837, + "training_step_time": 0.11200428009033203 + }, + { + "epoch": 4.24774169921875e-05, + "model_forward_time": 0.025246143341064453, + "step": 27838 + }, + { + "epoch": 4.24774169921875e-05, + "step": 27838, + "training_step_time": 0.11360526084899902 + }, + { + "epoch": 4.247894287109375e-05, + "model_forward_time": 0.025316953659057617, + "step": 27839 + }, + { + "epoch": 4.247894287109375e-05, + "step": 27839, + "training_step_time": 0.10687971115112305 + }, + { + "epoch": 4.248046875e-05, + "grad_norm": 0.0546284094452858, + "learning_rate": 1.4106031013849496e-06, + "loss": 0.0061, + "step": 27840 + }, + { + "epoch": 4.248046875e-05, + "model_forward_time": 0.02528524398803711, + "step": 27840 + }, + { + "epoch": 4.248046875e-05, + "step": 27840, + "training_step_time": 0.10661196708679199 + }, + { + "epoch": 4.248199462890625e-05, + "model_forward_time": 0.025310277938842773, + "step": 27841 + }, + { + "epoch": 4.248199462890625e-05, + "step": 27841, + "training_step_time": 0.10816431045532227 + }, + { + "epoch": 4.24835205078125e-05, + "model_forward_time": 0.025682926177978516, + "step": 27842 + }, + { + "epoch": 4.24835205078125e-05, + "step": 27842, + "training_step_time": 0.10800051689147949 + }, + { + "epoch": 4.248504638671875e-05, + "model_forward_time": 0.024756193161010742, + "step": 27843 + }, + { + "epoch": 4.248504638671875e-05, + "step": 27843, + "training_step_time": 0.11150908470153809 + }, + { + "epoch": 4.2486572265625e-05, + "model_forward_time": 0.025356531143188477, + "step": 27844 + }, + { + "epoch": 4.2486572265625e-05, + "step": 27844, + "training_step_time": 0.10523748397827148 + }, + { + "epoch": 4.248809814453125e-05, + "model_forward_time": 0.024932861328125, + "step": 27845 + }, + { + "epoch": 4.248809814453125e-05, + "step": 27845, + "training_step_time": 0.12113285064697266 + }, + { + "epoch": 4.24896240234375e-05, + "model_forward_time": 0.025599241256713867, + "step": 27846 + }, + { + "epoch": 4.24896240234375e-05, + "step": 27846, + "training_step_time": 0.11135435104370117 + }, + { + "epoch": 4.249114990234375e-05, + "model_forward_time": 0.025439977645874023, + "step": 27847 + }, + { + "epoch": 4.249114990234375e-05, + "step": 27847, + "training_step_time": 0.22305846214294434 + }, + { + "epoch": 4.249267578125e-05, + "model_forward_time": 0.024872303009033203, + "step": 27848 + }, + { + "epoch": 4.249267578125e-05, + "step": 27848, + "training_step_time": 0.14758968353271484 + }, + { + "epoch": 4.249420166015625e-05, + "model_forward_time": 0.02462029457092285, + "step": 27849 + }, + { + "epoch": 4.249420166015625e-05, + "step": 27849, + "training_step_time": 0.1842501163482666 + }, + { + "epoch": 4.24957275390625e-05, + "grad_norm": 0.03513207659125328, + "learning_rate": 1.3976332474944843e-06, + "loss": 0.0034, + "step": 27850 + }, + { + "epoch": 4.24957275390625e-05, + "model_forward_time": 0.025066375732421875, + "step": 27850 + }, + { + "epoch": 4.24957275390625e-05, + "step": 27850, + "training_step_time": 0.16826081275939941 + }, + { + "epoch": 4.249725341796875e-05, + "model_forward_time": 0.024399995803833008, + "step": 27851 + }, + { + "epoch": 4.249725341796875e-05, + "step": 27851, + "training_step_time": 0.17161297798156738 + }, + { + "epoch": 4.2498779296875e-05, + "model_forward_time": 0.02479386329650879, + "step": 27852 + }, + { + "epoch": 4.2498779296875e-05, + "step": 27852, + "training_step_time": 0.1333465576171875 + }, + { + "epoch": 4.250030517578125e-05, + "model_forward_time": 0.02429342269897461, + "step": 27853 + }, + { + "epoch": 4.250030517578125e-05, + "step": 27853, + "training_step_time": 0.11715388298034668 + }, + { + "epoch": 4.25018310546875e-05, + "model_forward_time": 0.025049209594726562, + "step": 27854 + }, + { + "epoch": 4.25018310546875e-05, + "step": 27854, + "training_step_time": 0.11929464340209961 + }, + { + "epoch": 4.250335693359375e-05, + "model_forward_time": 0.025506973266601562, + "step": 27855 + }, + { + "epoch": 4.250335693359375e-05, + "step": 27855, + "training_step_time": 0.11747884750366211 + }, + { + "epoch": 4.25048828125e-05, + "model_forward_time": 0.025383710861206055, + "step": 27856 + }, + { + "epoch": 4.25048828125e-05, + "step": 27856, + "training_step_time": 0.11568307876586914 + }, + { + "epoch": 4.250640869140625e-05, + "model_forward_time": 0.025324344635009766, + "step": 27857 + }, + { + "epoch": 4.250640869140625e-05, + "step": 27857, + "training_step_time": 0.11356496810913086 + }, + { + "epoch": 4.25079345703125e-05, + "model_forward_time": 0.02538132667541504, + "step": 27858 + }, + { + "epoch": 4.25079345703125e-05, + "step": 27858, + "training_step_time": 0.10979771614074707 + }, + { + "epoch": 4.250946044921875e-05, + "model_forward_time": 0.02555370330810547, + "step": 27859 + }, + { + "epoch": 4.250946044921875e-05, + "step": 27859, + "training_step_time": 0.10915350914001465 + }, + { + "epoch": 4.2510986328125e-05, + "grad_norm": 0.05167609825730324, + "learning_rate": 1.3847224500625256e-06, + "loss": 0.0039, + "step": 27860 + }, + { + "epoch": 4.2510986328125e-05, + "model_forward_time": 0.024819612503051758, + "step": 27860 + }, + { + "epoch": 4.2510986328125e-05, + "step": 27860, + "training_step_time": 0.1078941822052002 + }, + { + "epoch": 4.251251220703125e-05, + "model_forward_time": 0.025228261947631836, + "step": 27861 + }, + { + "epoch": 4.251251220703125e-05, + "step": 27861, + "training_step_time": 0.10863733291625977 + }, + { + "epoch": 4.25140380859375e-05, + "model_forward_time": 0.025562047958374023, + "step": 27862 + }, + { + "epoch": 4.25140380859375e-05, + "step": 27862, + "training_step_time": 0.10815024375915527 + }, + { + "epoch": 4.251556396484375e-05, + "model_forward_time": 0.0254671573638916, + "step": 27863 + }, + { + "epoch": 4.251556396484375e-05, + "step": 27863, + "training_step_time": 0.10552644729614258 + }, + { + "epoch": 4.251708984375e-05, + "model_forward_time": 0.025632143020629883, + "step": 27864 + }, + { + "epoch": 4.251708984375e-05, + "step": 27864, + "training_step_time": 0.1062624454498291 + }, + { + "epoch": 4.251861572265625e-05, + "model_forward_time": 0.02562689781188965, + "step": 27865 + }, + { + "epoch": 4.251861572265625e-05, + "step": 27865, + "training_step_time": 0.10991859436035156 + }, + { + "epoch": 4.25201416015625e-05, + "model_forward_time": 0.02547287940979004, + "step": 27866 + }, + { + "epoch": 4.25201416015625e-05, + "step": 27866, + "training_step_time": 0.13116097450256348 + }, + { + "epoch": 4.252166748046875e-05, + "model_forward_time": 0.025582313537597656, + "step": 27867 + }, + { + "epoch": 4.252166748046875e-05, + "step": 27867, + "training_step_time": 0.1120157241821289 + }, + { + "epoch": 4.2523193359375e-05, + "model_forward_time": 0.02555060386657715, + "step": 27868 + }, + { + "epoch": 4.2523193359375e-05, + "step": 27868, + "training_step_time": 0.1085810661315918 + }, + { + "epoch": 4.252471923828125e-05, + "model_forward_time": 0.025282621383666992, + "step": 27869 + }, + { + "epoch": 4.252471923828125e-05, + "step": 27869, + "training_step_time": 0.11660623550415039 + }, + { + "epoch": 4.25262451171875e-05, + "grad_norm": 0.03676657751202583, + "learning_rate": 1.3718707247769135e-06, + "loss": 0.0023, + "step": 27870 + }, + { + "epoch": 4.25262451171875e-05, + "model_forward_time": 0.02561163902282715, + "step": 27870 + }, + { + "epoch": 4.25262451171875e-05, + "step": 27870, + "training_step_time": 0.18759512901306152 + }, + { + "epoch": 4.252777099609375e-05, + "model_forward_time": 0.02464604377746582, + "step": 27871 + }, + { + "epoch": 4.252777099609375e-05, + "step": 27871, + "training_step_time": 0.11059355735778809 + }, + { + "epoch": 4.2529296875e-05, + "model_forward_time": 0.024799108505249023, + "step": 27872 + }, + { + "epoch": 4.2529296875e-05, + "step": 27872, + "training_step_time": 0.10209178924560547 + }, + { + "epoch": 4.253082275390625e-05, + "model_forward_time": 0.025667428970336914, + "step": 27873 + }, + { + "epoch": 4.253082275390625e-05, + "step": 27873, + "training_step_time": 0.10463213920593262 + }, + { + "epoch": 4.25323486328125e-05, + "model_forward_time": 0.027620315551757812, + "step": 27874 + }, + { + "epoch": 4.25323486328125e-05, + "step": 27874, + "training_step_time": 0.10800051689147949 + }, + { + "epoch": 4.253387451171875e-05, + "model_forward_time": 0.026723861694335938, + "step": 27875 + }, + { + "epoch": 4.253387451171875e-05, + "step": 27875, + "training_step_time": 0.11061406135559082 + }, + { + "epoch": 4.2535400390625e-05, + "model_forward_time": 0.026164531707763672, + "step": 27876 + }, + { + "epoch": 4.2535400390625e-05, + "step": 27876, + "training_step_time": 0.10574626922607422 + }, + { + "epoch": 4.253692626953125e-05, + "model_forward_time": 0.025716781616210938, + "step": 27877 + }, + { + "epoch": 4.253692626953125e-05, + "step": 27877, + "training_step_time": 0.1097412109375 + }, + { + "epoch": 4.25384521484375e-05, + "model_forward_time": 0.02550649642944336, + "step": 27878 + }, + { + "epoch": 4.25384521484375e-05, + "step": 27878, + "training_step_time": 0.10526251792907715 + }, + { + "epoch": 4.253997802734375e-05, + "model_forward_time": 0.02563762664794922, + "step": 27879 + }, + { + "epoch": 4.253997802734375e-05, + "step": 27879, + "training_step_time": 0.10612869262695312 + }, + { + "epoch": 4.254150390625e-05, + "grad_norm": 0.31312671303749084, + "learning_rate": 1.3590780872536958e-06, + "loss": 0.0131, + "step": 27880 + }, + { + "epoch": 4.254150390625e-05, + "model_forward_time": 0.025370121002197266, + "step": 27880 + }, + { + "epoch": 4.254150390625e-05, + "step": 27880, + "training_step_time": 0.10602498054504395 + }, + { + "epoch": 4.254302978515625e-05, + "model_forward_time": 0.02553725242614746, + "step": 27881 + }, + { + "epoch": 4.254302978515625e-05, + "step": 27881, + "training_step_time": 0.10577726364135742 + }, + { + "epoch": 4.25445556640625e-05, + "model_forward_time": 0.025724411010742188, + "step": 27882 + }, + { + "epoch": 4.25445556640625e-05, + "step": 27882, + "training_step_time": 0.17434048652648926 + }, + { + "epoch": 4.254608154296875e-05, + "model_forward_time": 0.02409648895263672, + "step": 27883 + }, + { + "epoch": 4.254608154296875e-05, + "step": 27883, + "training_step_time": 0.19861888885498047 + }, + { + "epoch": 4.2547607421875e-05, + "model_forward_time": 0.02398371696472168, + "step": 27884 + }, + { + "epoch": 4.2547607421875e-05, + "step": 27884, + "training_step_time": 0.18986248970031738 + }, + { + "epoch": 4.254913330078125e-05, + "model_forward_time": 0.02332448959350586, + "step": 27885 + }, + { + "epoch": 4.254913330078125e-05, + "step": 27885, + "training_step_time": 0.17373037338256836 + }, + { + "epoch": 4.25506591796875e-05, + "model_forward_time": 0.02451634407043457, + "step": 27886 + }, + { + "epoch": 4.25506591796875e-05, + "step": 27886, + "training_step_time": 0.16798830032348633 + }, + { + "epoch": 4.255218505859375e-05, + "model_forward_time": 0.023904085159301758, + "step": 27887 + }, + { + "epoch": 4.255218505859375e-05, + "step": 27887, + "training_step_time": 0.11904740333557129 + }, + { + "epoch": 4.25537109375e-05, + "model_forward_time": 0.025408029556274414, + "step": 27888 + }, + { + "epoch": 4.25537109375e-05, + "step": 27888, + "training_step_time": 0.10461926460266113 + }, + { + "epoch": 4.255523681640625e-05, + "model_forward_time": 0.024646997451782227, + "step": 27889 + }, + { + "epoch": 4.255523681640625e-05, + "step": 27889, + "training_step_time": 0.14524006843566895 + }, + { + "epoch": 4.25567626953125e-05, + "grad_norm": 0.07141478359699249, + "learning_rate": 1.3463445530371488e-06, + "loss": 0.0052, + "step": 27890 + }, + { + "epoch": 4.25567626953125e-05, + "model_forward_time": 0.026252269744873047, + "step": 27890 + }, + { + "epoch": 4.25567626953125e-05, + "step": 27890, + "training_step_time": 0.19132685661315918 + }, + { + "epoch": 4.255828857421875e-05, + "model_forward_time": 0.024653911590576172, + "step": 27891 + }, + { + "epoch": 4.255828857421875e-05, + "step": 27891, + "training_step_time": 0.12134814262390137 + }, + { + "epoch": 4.2559814453125e-05, + "model_forward_time": 0.024671077728271484, + "step": 27892 + }, + { + "epoch": 4.2559814453125e-05, + "step": 27892, + "training_step_time": 0.13105273246765137 + }, + { + "epoch": 4.256134033203125e-05, + "model_forward_time": 0.025290250778198242, + "step": 27893 + }, + { + "epoch": 4.256134033203125e-05, + "step": 27893, + "training_step_time": 0.1995391845703125 + }, + { + "epoch": 4.25628662109375e-05, + "model_forward_time": 0.024758577346801758, + "step": 27894 + }, + { + "epoch": 4.25628662109375e-05, + "step": 27894, + "training_step_time": 0.17871856689453125 + }, + { + "epoch": 4.256439208984375e-05, + "model_forward_time": 0.024981021881103516, + "step": 27895 + }, + { + "epoch": 4.256439208984375e-05, + "step": 27895, + "training_step_time": 0.10334467887878418 + }, + { + "epoch": 4.256591796875e-05, + "model_forward_time": 0.02443218231201172, + "step": 27896 + }, + { + "epoch": 4.256591796875e-05, + "step": 27896, + "training_step_time": 0.12168192863464355 + }, + { + "epoch": 4.256744384765625e-05, + "model_forward_time": 0.025715112686157227, + "step": 27897 + }, + { + "epoch": 4.256744384765625e-05, + "step": 27897, + "training_step_time": 0.10761451721191406 + }, + { + "epoch": 4.25689697265625e-05, + "model_forward_time": 0.025214672088623047, + "step": 27898 + }, + { + "epoch": 4.25689697265625e-05, + "step": 27898, + "training_step_time": 0.11822724342346191 + }, + { + "epoch": 4.257049560546875e-05, + "model_forward_time": 0.025624752044677734, + "step": 27899 + }, + { + "epoch": 4.257049560546875e-05, + "step": 27899, + "training_step_time": 0.10529327392578125 + }, + { + "epoch": 4.2572021484375e-05, + "grad_norm": 0.05580778047442436, + "learning_rate": 1.333670137599713e-06, + "loss": 0.0045, + "step": 27900 + }, + { + "epoch": 4.2572021484375e-05, + "model_forward_time": 0.025719165802001953, + "step": 27900 + }, + { + "epoch": 4.2572021484375e-05, + "step": 27900, + "training_step_time": 0.10622739791870117 + }, + { + "epoch": 4.257354736328125e-05, + "model_forward_time": 0.025370359420776367, + "step": 27901 + }, + { + "epoch": 4.257354736328125e-05, + "step": 27901, + "training_step_time": 0.10665321350097656 + }, + { + "epoch": 4.25750732421875e-05, + "model_forward_time": 0.02575397491455078, + "step": 27902 + }, + { + "epoch": 4.25750732421875e-05, + "step": 27902, + "training_step_time": 0.10714101791381836 + }, + { + "epoch": 4.257659912109375e-05, + "model_forward_time": 0.025780916213989258, + "step": 27903 + }, + { + "epoch": 4.257659912109375e-05, + "step": 27903, + "training_step_time": 0.10862016677856445 + }, + { + "epoch": 4.2578125e-05, + "model_forward_time": 0.025417566299438477, + "step": 27904 + }, + { + "epoch": 4.2578125e-05, + "step": 27904, + "training_step_time": 0.1513967514038086 + }, + { + "epoch": 4.257965087890625e-05, + "model_forward_time": 0.025171279907226562, + "step": 27905 + }, + { + "epoch": 4.257965087890625e-05, + "step": 27905, + "training_step_time": 0.17249774932861328 + }, + { + "epoch": 4.25811767578125e-05, + "model_forward_time": 0.02461719512939453, + "step": 27906 + }, + { + "epoch": 4.25811767578125e-05, + "step": 27906, + "training_step_time": 0.17644524574279785 + }, + { + "epoch": 4.258270263671875e-05, + "model_forward_time": 0.02466750144958496, + "step": 27907 + }, + { + "epoch": 4.258270263671875e-05, + "step": 27907, + "training_step_time": 0.1753687858581543 + }, + { + "epoch": 4.2584228515625e-05, + "model_forward_time": 0.024820327758789062, + "step": 27908 + }, + { + "epoch": 4.2584228515625e-05, + "step": 27908, + "training_step_time": 0.14687514305114746 + }, + { + "epoch": 4.258575439453125e-05, + "model_forward_time": 0.024682998657226562, + "step": 27909 + }, + { + "epoch": 4.258575439453125e-05, + "step": 27909, + "training_step_time": 0.2015371322631836 + }, + { + "epoch": 4.25872802734375e-05, + "grad_norm": 0.06222119927406311, + "learning_rate": 1.3210548563419856e-06, + "loss": 0.0041, + "step": 27910 + }, + { + "epoch": 4.25872802734375e-05, + "model_forward_time": 0.02773594856262207, + "step": 27910 + }, + { + "epoch": 4.25872802734375e-05, + "step": 27910, + "training_step_time": 0.1265099048614502 + }, + { + "epoch": 4.258880615234375e-05, + "model_forward_time": 0.02469491958618164, + "step": 27911 + }, + { + "epoch": 4.258880615234375e-05, + "step": 27911, + "training_step_time": 0.19068121910095215 + }, + { + "epoch": 4.259033203125e-05, + "model_forward_time": 0.024985313415527344, + "step": 27912 + }, + { + "epoch": 4.259033203125e-05, + "step": 27912, + "training_step_time": 0.11831974983215332 + }, + { + "epoch": 4.259185791015625e-05, + "model_forward_time": 0.024536848068237305, + "step": 27913 + }, + { + "epoch": 4.259185791015625e-05, + "step": 27913, + "training_step_time": 0.11262941360473633 + }, + { + "epoch": 4.25933837890625e-05, + "model_forward_time": 0.025439739227294922, + "step": 27914 + }, + { + "epoch": 4.25933837890625e-05, + "step": 27914, + "training_step_time": 0.11113357543945312 + }, + { + "epoch": 4.259490966796875e-05, + "model_forward_time": 0.025494813919067383, + "step": 27915 + }, + { + "epoch": 4.259490966796875e-05, + "step": 27915, + "training_step_time": 0.10785484313964844 + }, + { + "epoch": 4.2596435546875e-05, + "model_forward_time": 0.02586817741394043, + "step": 27916 + }, + { + "epoch": 4.2596435546875e-05, + "step": 27916, + "training_step_time": 0.10970187187194824 + }, + { + "epoch": 4.259796142578125e-05, + "model_forward_time": 0.025429725646972656, + "step": 27917 + }, + { + "epoch": 4.259796142578125e-05, + "step": 27917, + "training_step_time": 0.11130952835083008 + }, + { + "epoch": 4.25994873046875e-05, + "model_forward_time": 0.025813579559326172, + "step": 27918 + }, + { + "epoch": 4.25994873046875e-05, + "step": 27918, + "training_step_time": 0.10906457901000977 + }, + { + "epoch": 4.260101318359375e-05, + "model_forward_time": 0.025560379028320312, + "step": 27919 + }, + { + "epoch": 4.260101318359375e-05, + "step": 27919, + "training_step_time": 0.10883951187133789 + }, + { + "epoch": 4.26025390625e-05, + "grad_norm": 0.17487028241157532, + "learning_rate": 1.3084987245927383e-06, + "loss": 0.0042, + "step": 27920 + }, + { + "epoch": 4.26025390625e-05, + "model_forward_time": 0.025545597076416016, + "step": 27920 + }, + { + "epoch": 4.26025390625e-05, + "step": 27920, + "training_step_time": 0.10739731788635254 + }, + { + "epoch": 4.260406494140625e-05, + "model_forward_time": 0.025815486907958984, + "step": 27921 + }, + { + "epoch": 4.260406494140625e-05, + "step": 27921, + "training_step_time": 0.10610008239746094 + }, + { + "epoch": 4.26055908203125e-05, + "model_forward_time": 0.025365114212036133, + "step": 27922 + }, + { + "epoch": 4.26055908203125e-05, + "step": 27922, + "training_step_time": 0.1053469181060791 + }, + { + "epoch": 4.260711669921875e-05, + "model_forward_time": 0.025623559951782227, + "step": 27923 + }, + { + "epoch": 4.260711669921875e-05, + "step": 27923, + "training_step_time": 0.10929369926452637 + }, + { + "epoch": 4.2608642578125e-05, + "model_forward_time": 0.02564239501953125, + "step": 27924 + }, + { + "epoch": 4.2608642578125e-05, + "step": 27924, + "training_step_time": 0.10677647590637207 + }, + { + "epoch": 4.261016845703125e-05, + "model_forward_time": 0.025307655334472656, + "step": 27925 + }, + { + "epoch": 4.261016845703125e-05, + "step": 27925, + "training_step_time": 0.10831093788146973 + }, + { + "epoch": 4.26116943359375e-05, + "model_forward_time": 0.026432514190673828, + "step": 27926 + }, + { + "epoch": 4.26116943359375e-05, + "step": 27926, + "training_step_time": 0.10590600967407227 + }, + { + "epoch": 4.261322021484375e-05, + "model_forward_time": 0.025585174560546875, + "step": 27927 + }, + { + "epoch": 4.261322021484375e-05, + "step": 27927, + "training_step_time": 0.10826683044433594 + }, + { + "epoch": 4.261474609375e-05, + "model_forward_time": 0.02531886100769043, + "step": 27928 + }, + { + "epoch": 4.261474609375e-05, + "step": 27928, + "training_step_time": 0.10998320579528809 + }, + { + "epoch": 4.261627197265625e-05, + "model_forward_time": 0.025435686111450195, + "step": 27929 + }, + { + "epoch": 4.261627197265625e-05, + "step": 27929, + "training_step_time": 0.10559892654418945 + }, + { + "epoch": 4.26177978515625e-05, + "grad_norm": 0.09832992404699326, + "learning_rate": 1.2960017576088446e-06, + "loss": 0.005, + "step": 27930 + }, + { + "epoch": 4.26177978515625e-05, + "model_forward_time": 0.02521824836730957, + "step": 27930 + }, + { + "epoch": 4.26177978515625e-05, + "step": 27930, + "training_step_time": 0.10478949546813965 + }, + { + "epoch": 4.261932373046875e-05, + "model_forward_time": 0.02548670768737793, + "step": 27931 + }, + { + "epoch": 4.261932373046875e-05, + "step": 27931, + "training_step_time": 0.10495376586914062 + }, + { + "epoch": 4.2620849609375e-05, + "model_forward_time": 0.025649070739746094, + "step": 27932 + }, + { + "epoch": 4.2620849609375e-05, + "step": 27932, + "training_step_time": 0.10551261901855469 + }, + { + "epoch": 4.262237548828125e-05, + "model_forward_time": 0.025668621063232422, + "step": 27933 + }, + { + "epoch": 4.262237548828125e-05, + "step": 27933, + "training_step_time": 0.10544824600219727 + }, + { + "epoch": 4.26239013671875e-05, + "model_forward_time": 0.02572345733642578, + "step": 27934 + }, + { + "epoch": 4.26239013671875e-05, + "step": 27934, + "training_step_time": 0.14290952682495117 + }, + { + "epoch": 4.262542724609375e-05, + "model_forward_time": 0.025204896926879883, + "step": 27935 + }, + { + "epoch": 4.262542724609375e-05, + "step": 27935, + "training_step_time": 0.16504144668579102 + }, + { + "epoch": 4.2626953125e-05, + "model_forward_time": 0.02519059181213379, + "step": 27936 + }, + { + "epoch": 4.2626953125e-05, + "step": 27936, + "training_step_time": 0.20573830604553223 + }, + { + "epoch": 4.262847900390625e-05, + "model_forward_time": 0.025143146514892578, + "step": 27937 + }, + { + "epoch": 4.262847900390625e-05, + "step": 27937, + "training_step_time": 0.13631677627563477 + }, + { + "epoch": 4.26300048828125e-05, + "model_forward_time": 0.02484869956970215, + "step": 27938 + }, + { + "epoch": 4.26300048828125e-05, + "step": 27938, + "training_step_time": 0.18605422973632812 + }, + { + "epoch": 4.263153076171875e-05, + "model_forward_time": 0.024816274642944336, + "step": 27939 + }, + { + "epoch": 4.263153076171875e-05, + "step": 27939, + "training_step_time": 0.17425918579101562 + }, + { + "epoch": 4.2633056640625e-05, + "grad_norm": 0.0365869477391243, + "learning_rate": 1.2835639705753078e-06, + "loss": 0.0049, + "step": 27940 + }, + { + "epoch": 4.2633056640625e-05, + "model_forward_time": 0.025331735610961914, + "step": 27940 + }, + { + "epoch": 4.2633056640625e-05, + "step": 27940, + "training_step_time": 0.19548249244689941 + }, + { + "epoch": 4.263458251953125e-05, + "model_forward_time": 0.0250396728515625, + "step": 27941 + }, + { + "epoch": 4.263458251953125e-05, + "step": 27941, + "training_step_time": 0.11956048011779785 + }, + { + "epoch": 4.26361083984375e-05, + "model_forward_time": 0.025290250778198242, + "step": 27942 + }, + { + "epoch": 4.26361083984375e-05, + "step": 27942, + "training_step_time": 0.11834287643432617 + }, + { + "epoch": 4.263763427734375e-05, + "model_forward_time": 0.02579975128173828, + "step": 27943 + }, + { + "epoch": 4.263763427734375e-05, + "step": 27943, + "training_step_time": 0.1056208610534668 + }, + { + "epoch": 4.263916015625e-05, + "model_forward_time": 0.02572035789489746, + "step": 27944 + }, + { + "epoch": 4.263916015625e-05, + "step": 27944, + "training_step_time": 0.10506153106689453 + }, + { + "epoch": 4.264068603515625e-05, + "model_forward_time": 0.025712013244628906, + "step": 27945 + }, + { + "epoch": 4.264068603515625e-05, + "step": 27945, + "training_step_time": 0.1472768783569336 + }, + { + "epoch": 4.26422119140625e-05, + "model_forward_time": 0.02608776092529297, + "step": 27946 + }, + { + "epoch": 4.26422119140625e-05, + "step": 27946, + "training_step_time": 0.15186285972595215 + }, + { + "epoch": 4.264373779296875e-05, + "model_forward_time": 0.024965286254882812, + "step": 27947 + }, + { + "epoch": 4.264373779296875e-05, + "step": 27947, + "training_step_time": 0.14381670951843262 + }, + { + "epoch": 4.2645263671875e-05, + "model_forward_time": 0.024871826171875, + "step": 27948 + }, + { + "epoch": 4.2645263671875e-05, + "step": 27948, + "training_step_time": 0.12871646881103516 + }, + { + "epoch": 4.264678955078125e-05, + "model_forward_time": 0.025005578994750977, + "step": 27949 + }, + { + "epoch": 4.264678955078125e-05, + "step": 27949, + "training_step_time": 0.1214454174041748 + }, + { + "epoch": 4.26483154296875e-05, + "grad_norm": 0.2922270894050598, + "learning_rate": 1.2711853786052109e-06, + "loss": 0.0056, + "step": 27950 + }, + { + "epoch": 4.26483154296875e-05, + "model_forward_time": 0.025091171264648438, + "step": 27950 + }, + { + "epoch": 4.26483154296875e-05, + "step": 27950, + "training_step_time": 0.11933326721191406 + }, + { + "epoch": 4.264984130859375e-05, + "model_forward_time": 0.02534317970275879, + "step": 27951 + }, + { + "epoch": 4.264984130859375e-05, + "step": 27951, + "training_step_time": 0.13525056838989258 + }, + { + "epoch": 4.26513671875e-05, + "model_forward_time": 0.025454282760620117, + "step": 27952 + }, + { + "epoch": 4.26513671875e-05, + "step": 27952, + "training_step_time": 0.14168810844421387 + }, + { + "epoch": 4.265289306640625e-05, + "model_forward_time": 0.02507495880126953, + "step": 27953 + }, + { + "epoch": 4.265289306640625e-05, + "step": 27953, + "training_step_time": 0.11379218101501465 + }, + { + "epoch": 4.26544189453125e-05, + "model_forward_time": 0.025511741638183594, + "step": 27954 + }, + { + "epoch": 4.26544189453125e-05, + "step": 27954, + "training_step_time": 0.10819888114929199 + }, + { + "epoch": 4.265594482421875e-05, + "model_forward_time": 0.0258028507232666, + "step": 27955 + }, + { + "epoch": 4.265594482421875e-05, + "step": 27955, + "training_step_time": 0.11082243919372559 + }, + { + "epoch": 4.2657470703125e-05, + "model_forward_time": 0.025349855422973633, + "step": 27956 + }, + { + "epoch": 4.2657470703125e-05, + "step": 27956, + "training_step_time": 0.17798066139221191 + }, + { + "epoch": 4.265899658203125e-05, + "model_forward_time": 0.02451801300048828, + "step": 27957 + }, + { + "epoch": 4.265899658203125e-05, + "step": 27957, + "training_step_time": 0.11390304565429688 + }, + { + "epoch": 4.26605224609375e-05, + "model_forward_time": 0.025112152099609375, + "step": 27958 + }, + { + "epoch": 4.26605224609375e-05, + "step": 27958, + "training_step_time": 0.10487127304077148 + }, + { + "epoch": 4.266204833984375e-05, + "model_forward_time": 0.025587797164916992, + "step": 27959 + }, + { + "epoch": 4.266204833984375e-05, + "step": 27959, + "training_step_time": 0.10451459884643555 + }, + { + "epoch": 4.266357421875e-05, + "grad_norm": 0.05221688374876976, + "learning_rate": 1.2588659967397e-06, + "loss": 0.0114, + "step": 27960 + }, + { + "epoch": 4.266357421875e-05, + "model_forward_time": 0.025776147842407227, + "step": 27960 + }, + { + "epoch": 4.266357421875e-05, + "step": 27960, + "training_step_time": 0.10680270195007324 + }, + { + "epoch": 4.266510009765625e-05, + "model_forward_time": 0.02565145492553711, + "step": 27961 + }, + { + "epoch": 4.266510009765625e-05, + "step": 27961, + "training_step_time": 0.10778188705444336 + }, + { + "epoch": 4.26666259765625e-05, + "model_forward_time": 0.025341033935546875, + "step": 27962 + }, + { + "epoch": 4.26666259765625e-05, + "step": 27962, + "training_step_time": 0.10608410835266113 + }, + { + "epoch": 4.266815185546875e-05, + "model_forward_time": 0.025365829467773438, + "step": 27963 + }, + { + "epoch": 4.266815185546875e-05, + "step": 27963, + "training_step_time": 0.10579872131347656 + }, + { + "epoch": 4.2669677734375e-05, + "model_forward_time": 0.025606393814086914, + "step": 27964 + }, + { + "epoch": 4.2669677734375e-05, + "step": 27964, + "training_step_time": 0.10605478286743164 + }, + { + "epoch": 4.267120361328125e-05, + "model_forward_time": 0.02568984031677246, + "step": 27965 + }, + { + "epoch": 4.267120361328125e-05, + "step": 27965, + "training_step_time": 0.10603857040405273 + }, + { + "epoch": 4.26727294921875e-05, + "model_forward_time": 0.025866270065307617, + "step": 27966 + }, + { + "epoch": 4.26727294921875e-05, + "step": 27966, + "training_step_time": 0.1074059009552002 + }, + { + "epoch": 4.267425537109375e-05, + "model_forward_time": 0.0254061222076416, + "step": 27967 + }, + { + "epoch": 4.267425537109375e-05, + "step": 27967, + "training_step_time": 0.10718178749084473 + }, + { + "epoch": 4.267578125e-05, + "model_forward_time": 0.025082111358642578, + "step": 27968 + }, + { + "epoch": 4.267578125e-05, + "step": 27968, + "training_step_time": 0.10847663879394531 + }, + { + "epoch": 4.267730712890625e-05, + "model_forward_time": 0.025501012802124023, + "step": 27969 + }, + { + "epoch": 4.267730712890625e-05, + "step": 27969, + "training_step_time": 0.10683178901672363 + }, + { + "epoch": 4.26788330078125e-05, + "grad_norm": 0.06859178096055984, + "learning_rate": 1.2466058399479952e-06, + "loss": 0.0029, + "step": 27970 + }, + { + "epoch": 4.26788330078125e-05, + "model_forward_time": 0.025496959686279297, + "step": 27970 + }, + { + "epoch": 4.26788330078125e-05, + "step": 27970, + "training_step_time": 0.11376738548278809 + }, + { + "epoch": 4.268035888671875e-05, + "model_forward_time": 0.025566577911376953, + "step": 27971 + }, + { + "epoch": 4.268035888671875e-05, + "step": 27971, + "training_step_time": 0.10528016090393066 + }, + { + "epoch": 4.2681884765625e-05, + "model_forward_time": 0.02509140968322754, + "step": 27972 + }, + { + "epoch": 4.2681884765625e-05, + "step": 27972, + "training_step_time": 0.10582137107849121 + }, + { + "epoch": 4.268341064453125e-05, + "model_forward_time": 0.025210857391357422, + "step": 27973 + }, + { + "epoch": 4.268341064453125e-05, + "step": 27973, + "training_step_time": 0.14858794212341309 + }, + { + "epoch": 4.26849365234375e-05, + "model_forward_time": 0.025287151336669922, + "step": 27974 + }, + { + "epoch": 4.26849365234375e-05, + "step": 27974, + "training_step_time": 0.17290425300598145 + }, + { + "epoch": 4.268646240234375e-05, + "model_forward_time": 0.024263858795166016, + "step": 27975 + }, + { + "epoch": 4.268646240234375e-05, + "step": 27975, + "training_step_time": 0.1626110076904297 + }, + { + "epoch": 4.268798828125e-05, + "model_forward_time": 0.024059295654296875, + "step": 27976 + }, + { + "epoch": 4.268798828125e-05, + "step": 27976, + "training_step_time": 0.1560075283050537 + }, + { + "epoch": 4.268951416015625e-05, + "model_forward_time": 0.02505350112915039, + "step": 27977 + }, + { + "epoch": 4.268951416015625e-05, + "step": 27977, + "training_step_time": 0.11144375801086426 + }, + { + "epoch": 4.26910400390625e-05, + "model_forward_time": 0.024424076080322266, + "step": 27978 + }, + { + "epoch": 4.26910400390625e-05, + "step": 27978, + "training_step_time": 0.14347386360168457 + }, + { + "epoch": 4.269256591796875e-05, + "model_forward_time": 0.024840116500854492, + "step": 27979 + }, + { + "epoch": 4.269256591796875e-05, + "step": 27979, + "training_step_time": 0.1513216495513916 + }, + { + "epoch": 4.2694091796875e-05, + "grad_norm": 0.039643775671720505, + "learning_rate": 1.2344049231273302e-06, + "loss": 0.0021, + "step": 27980 + }, + { + "epoch": 4.2694091796875e-05, + "model_forward_time": 0.024786710739135742, + "step": 27980 + }, + { + "epoch": 4.2694091796875e-05, + "step": 27980, + "training_step_time": 0.16834235191345215 + }, + { + "epoch": 4.269561767578125e-05, + "model_forward_time": 0.024988889694213867, + "step": 27981 + }, + { + "epoch": 4.269561767578125e-05, + "step": 27981, + "training_step_time": 0.16028285026550293 + }, + { + "epoch": 4.26971435546875e-05, + "model_forward_time": 0.02411818504333496, + "step": 27982 + }, + { + "epoch": 4.26971435546875e-05, + "step": 27982, + "training_step_time": 0.16425490379333496 + }, + { + "epoch": 4.269866943359375e-05, + "model_forward_time": 0.025319576263427734, + "step": 27983 + }, + { + "epoch": 4.269866943359375e-05, + "step": 27983, + "training_step_time": 0.10875105857849121 + }, + { + "epoch": 4.27001953125e-05, + "model_forward_time": 0.024596452713012695, + "step": 27984 + }, + { + "epoch": 4.27001953125e-05, + "step": 27984, + "training_step_time": 0.1310865879058838 + }, + { + "epoch": 4.270172119140625e-05, + "model_forward_time": 0.02504444122314453, + "step": 27985 + }, + { + "epoch": 4.270172119140625e-05, + "step": 27985, + "training_step_time": 0.14220118522644043 + }, + { + "epoch": 4.27032470703125e-05, + "model_forward_time": 0.024613142013549805, + "step": 27986 + }, + { + "epoch": 4.27032470703125e-05, + "step": 27986, + "training_step_time": 0.1565239429473877 + }, + { + "epoch": 4.270477294921875e-05, + "model_forward_time": 0.02362799644470215, + "step": 27987 + }, + { + "epoch": 4.270477294921875e-05, + "step": 27987, + "training_step_time": 0.1807713508605957 + }, + { + "epoch": 4.2706298828125e-05, + "model_forward_time": 0.02415013313293457, + "step": 27988 + }, + { + "epoch": 4.2706298828125e-05, + "step": 27988, + "training_step_time": 0.1542208194732666 + }, + { + "epoch": 4.270782470703125e-05, + "model_forward_time": 0.024315834045410156, + "step": 27989 + }, + { + "epoch": 4.270782470703125e-05, + "step": 27989, + "training_step_time": 0.13875961303710938 + }, + { + "epoch": 4.27093505859375e-05, + "grad_norm": 0.05708553269505501, + "learning_rate": 1.222263261102985e-06, + "loss": 0.004, + "step": 27990 + }, + { + "epoch": 4.27093505859375e-05, + "model_forward_time": 0.02445673942565918, + "step": 27990 + }, + { + "epoch": 4.27093505859375e-05, + "step": 27990, + "training_step_time": 0.12975358963012695 + }, + { + "epoch": 4.271087646484375e-05, + "model_forward_time": 0.024536848068237305, + "step": 27991 + }, + { + "epoch": 4.271087646484375e-05, + "step": 27991, + "training_step_time": 0.13229155540466309 + }, + { + "epoch": 4.271240234375e-05, + "model_forward_time": 0.02481389045715332, + "step": 27992 + }, + { + "epoch": 4.271240234375e-05, + "step": 27992, + "training_step_time": 0.1214592456817627 + }, + { + "epoch": 4.271392822265625e-05, + "model_forward_time": 0.025017738342285156, + "step": 27993 + }, + { + "epoch": 4.271392822265625e-05, + "step": 27993, + "training_step_time": 0.11903858184814453 + }, + { + "epoch": 4.27154541015625e-05, + "model_forward_time": 0.025713443756103516, + "step": 27994 + }, + { + "epoch": 4.27154541015625e-05, + "step": 27994, + "training_step_time": 0.2052774429321289 + }, + { + "epoch": 4.271697998046875e-05, + "model_forward_time": 0.02530646324157715, + "step": 27995 + }, + { + "epoch": 4.271697998046875e-05, + "step": 27995, + "training_step_time": 0.1184077262878418 + }, + { + "epoch": 4.2718505859375e-05, + "model_forward_time": 0.02584528923034668, + "step": 27996 + }, + { + "epoch": 4.2718505859375e-05, + "step": 27996, + "training_step_time": 0.10634112358093262 + }, + { + "epoch": 4.272003173828125e-05, + "model_forward_time": 0.026337623596191406, + "step": 27997 + }, + { + "epoch": 4.272003173828125e-05, + "step": 27997, + "training_step_time": 0.11828494071960449 + }, + { + "epoch": 4.27215576171875e-05, + "model_forward_time": 0.025213241577148438, + "step": 27998 + }, + { + "epoch": 4.27215576171875e-05, + "step": 27998, + "training_step_time": 0.11823081970214844 + }, + { + "epoch": 4.272308349609375e-05, + "model_forward_time": 0.025218486785888672, + "step": 27999 + }, + { + "epoch": 4.272308349609375e-05, + "step": 27999, + "training_step_time": 0.1120145320892334 + }, + { + "epoch": 4.2724609375e-05, + "grad_norm": 0.06876952946186066, + "learning_rate": 1.210180868628219e-06, + "loss": 0.0072, + "step": 28000 + }, + { + "epoch": 4.2724609375e-05, + "model_forward_time": 0.025133609771728516, + "step": 28000 + }, + { + "epoch": 4.2724609375e-05, + "step": 28000, + "training_step_time": 0.10908770561218262 + }, + { + "epoch": 4.272613525390625e-05, + "model_forward_time": 0.022487640380859375, + "step": 28001 + }, + { + "epoch": 4.272613525390625e-05, + "step": 28001, + "training_step_time": 0.15211749076843262 + }, + { + "epoch": 4.27276611328125e-05, + "model_forward_time": 0.024354219436645508, + "step": 28002 + }, + { + "epoch": 4.27276611328125e-05, + "step": 28002, + "training_step_time": 0.10347366333007812 + }, + { + "epoch": 4.272918701171875e-05, + "model_forward_time": 0.025212526321411133, + "step": 28003 + }, + { + "epoch": 4.272918701171875e-05, + "step": 28003, + "training_step_time": 0.11535882949829102 + }, + { + "epoch": 4.2730712890625e-05, + "model_forward_time": 0.025705575942993164, + "step": 28004 + }, + { + "epoch": 4.2730712890625e-05, + "step": 28004, + "training_step_time": 0.10598611831665039 + }, + { + "epoch": 4.273223876953125e-05, + "model_forward_time": 0.025698423385620117, + "step": 28005 + }, + { + "epoch": 4.273223876953125e-05, + "step": 28005, + "training_step_time": 0.10723996162414551 + }, + { + "epoch": 4.27337646484375e-05, + "model_forward_time": 0.025939464569091797, + "step": 28006 + }, + { + "epoch": 4.27337646484375e-05, + "step": 28006, + "training_step_time": 0.10460209846496582 + }, + { + "epoch": 4.273529052734375e-05, + "model_forward_time": 0.025622129440307617, + "step": 28007 + }, + { + "epoch": 4.273529052734375e-05, + "step": 28007, + "training_step_time": 0.11146187782287598 + }, + { + "epoch": 4.273681640625e-05, + "model_forward_time": 0.02557206153869629, + "step": 28008 + }, + { + "epoch": 4.273681640625e-05, + "step": 28008, + "training_step_time": 0.10476064682006836 + }, + { + "epoch": 4.273834228515625e-05, + "model_forward_time": 0.025312185287475586, + "step": 28009 + }, + { + "epoch": 4.273834228515625e-05, + "step": 28009, + "training_step_time": 0.10474443435668945 + }, + { + "epoch": 4.27398681640625e-05, + "grad_norm": 0.04854021221399307, + "learning_rate": 1.1981577603842776e-06, + "loss": 0.0023, + "step": 28010 + }, + { + "epoch": 4.27398681640625e-05, + "model_forward_time": 0.025357484817504883, + "step": 28010 + }, + { + "epoch": 4.27398681640625e-05, + "step": 28010, + "training_step_time": 0.10552358627319336 + }, + { + "epoch": 4.274139404296875e-05, + "model_forward_time": 0.02460336685180664, + "step": 28011 + }, + { + "epoch": 4.274139404296875e-05, + "step": 28011, + "training_step_time": 0.10281848907470703 + }, + { + "epoch": 4.2742919921875e-05, + "model_forward_time": 0.025520801544189453, + "step": 28012 + }, + { + "epoch": 4.2742919921875e-05, + "step": 28012, + "training_step_time": 0.10708379745483398 + }, + { + "epoch": 4.274444580078125e-05, + "model_forward_time": 0.025524616241455078, + "step": 28013 + }, + { + "epoch": 4.274444580078125e-05, + "step": 28013, + "training_step_time": 0.1055593490600586 + }, + { + "epoch": 4.27459716796875e-05, + "model_forward_time": 0.025459766387939453, + "step": 28014 + }, + { + "epoch": 4.27459716796875e-05, + "step": 28014, + "training_step_time": 0.10937333106994629 + }, + { + "epoch": 4.274749755859375e-05, + "model_forward_time": 0.025835514068603516, + "step": 28015 + }, + { + "epoch": 4.274749755859375e-05, + "step": 28015, + "training_step_time": 0.1066441535949707 + }, + { + "epoch": 4.27490234375e-05, + "model_forward_time": 0.02529287338256836, + "step": 28016 + }, + { + "epoch": 4.27490234375e-05, + "step": 28016, + "training_step_time": 0.10380792617797852 + }, + { + "epoch": 4.275054931640625e-05, + "model_forward_time": 0.026292085647583008, + "step": 28017 + }, + { + "epoch": 4.275054931640625e-05, + "step": 28017, + "training_step_time": 0.10513162612915039 + }, + { + "epoch": 4.27520751953125e-05, + "model_forward_time": 0.025243520736694336, + "step": 28018 + }, + { + "epoch": 4.27520751953125e-05, + "step": 28018, + "training_step_time": 0.10362100601196289 + }, + { + "epoch": 4.275360107421875e-05, + "model_forward_time": 0.02588486671447754, + "step": 28019 + }, + { + "epoch": 4.275360107421875e-05, + "step": 28019, + "training_step_time": 0.1051938533782959 + }, + { + "epoch": 4.2755126953125e-05, + "grad_norm": 0.44585156440734863, + "learning_rate": 1.1861939509803687e-06, + "loss": 0.0068, + "step": 28020 + }, + { + "epoch": 4.2755126953125e-05, + "model_forward_time": 0.025751113891601562, + "step": 28020 + }, + { + "epoch": 4.2755126953125e-05, + "step": 28020, + "training_step_time": 0.10657715797424316 + }, + { + "epoch": 4.275665283203125e-05, + "model_forward_time": 0.025300025939941406, + "step": 28021 + }, + { + "epoch": 4.275665283203125e-05, + "step": 28021, + "training_step_time": 0.10511255264282227 + }, + { + "epoch": 4.27581787109375e-05, + "model_forward_time": 0.025283098220825195, + "step": 28022 + }, + { + "epoch": 4.27581787109375e-05, + "step": 28022, + "training_step_time": 0.11232757568359375 + }, + { + "epoch": 4.275970458984375e-05, + "model_forward_time": 0.025183439254760742, + "step": 28023 + }, + { + "epoch": 4.275970458984375e-05, + "step": 28023, + "training_step_time": 0.1087639331817627 + }, + { + "epoch": 4.276123046875e-05, + "model_forward_time": 0.02609395980834961, + "step": 28024 + }, + { + "epoch": 4.276123046875e-05, + "step": 28024, + "training_step_time": 0.11055231094360352 + }, + { + "epoch": 4.276275634765625e-05, + "model_forward_time": 0.024733543395996094, + "step": 28025 + }, + { + "epoch": 4.276275634765625e-05, + "step": 28025, + "training_step_time": 0.14400339126586914 + }, + { + "epoch": 4.27642822265625e-05, + "model_forward_time": 0.025200366973876953, + "step": 28026 + }, + { + "epoch": 4.27642822265625e-05, + "step": 28026, + "training_step_time": 0.1917896270751953 + }, + { + "epoch": 4.276580810546875e-05, + "model_forward_time": 0.025035381317138672, + "step": 28027 + }, + { + "epoch": 4.276580810546875e-05, + "step": 28027, + "training_step_time": 0.21288537979125977 + }, + { + "epoch": 4.2767333984375e-05, + "model_forward_time": 0.024612903594970703, + "step": 28028 + }, + { + "epoch": 4.2767333984375e-05, + "step": 28028, + "training_step_time": 0.18929052352905273 + }, + { + "epoch": 4.276885986328125e-05, + "model_forward_time": 0.026010751724243164, + "step": 28029 + }, + { + "epoch": 4.276885986328125e-05, + "step": 28029, + "training_step_time": 0.10692119598388672 + }, + { + "epoch": 4.27703857421875e-05, + "grad_norm": 0.15105819702148438, + "learning_rate": 1.1742894549536477e-06, + "loss": 0.0041, + "step": 28030 + }, + { + "epoch": 4.27703857421875e-05, + "model_forward_time": 0.024358034133911133, + "step": 28030 + }, + { + "epoch": 4.27703857421875e-05, + "step": 28030, + "training_step_time": 0.17907190322875977 + }, + { + "epoch": 4.277191162109375e-05, + "model_forward_time": 0.024917125701904297, + "step": 28031 + }, + { + "epoch": 4.277191162109375e-05, + "step": 28031, + "training_step_time": 0.12036561965942383 + }, + { + "epoch": 4.27734375e-05, + "model_forward_time": 0.025186538696289062, + "step": 28032 + }, + { + "epoch": 4.27734375e-05, + "step": 28032, + "training_step_time": 0.10973906517028809 + }, + { + "epoch": 4.277496337890625e-05, + "model_forward_time": 0.025532007217407227, + "step": 28033 + }, + { + "epoch": 4.277496337890625e-05, + "step": 28033, + "training_step_time": 0.12372922897338867 + }, + { + "epoch": 4.27764892578125e-05, + "model_forward_time": 0.025794506072998047, + "step": 28034 + }, + { + "epoch": 4.27764892578125e-05, + "step": 28034, + "training_step_time": 0.14110684394836426 + }, + { + "epoch": 4.277801513671875e-05, + "model_forward_time": 0.024763822555541992, + "step": 28035 + }, + { + "epoch": 4.277801513671875e-05, + "step": 28035, + "training_step_time": 0.11424756050109863 + }, + { + "epoch": 4.2779541015625e-05, + "model_forward_time": 0.02552652359008789, + "step": 28036 + }, + { + "epoch": 4.2779541015625e-05, + "step": 28036, + "training_step_time": 0.10770750045776367 + }, + { + "epoch": 4.278106689453125e-05, + "model_forward_time": 0.025593042373657227, + "step": 28037 + }, + { + "epoch": 4.278106689453125e-05, + "step": 28037, + "training_step_time": 0.13204526901245117 + }, + { + "epoch": 4.27825927734375e-05, + "model_forward_time": 0.026675939559936523, + "step": 28038 + }, + { + "epoch": 4.27825927734375e-05, + "step": 28038, + "training_step_time": 0.13980460166931152 + }, + { + "epoch": 4.278411865234375e-05, + "model_forward_time": 0.025687694549560547, + "step": 28039 + }, + { + "epoch": 4.278411865234375e-05, + "step": 28039, + "training_step_time": 0.12833142280578613 + }, + { + "epoch": 4.278564453125e-05, + "grad_norm": 0.07757231593132019, + "learning_rate": 1.16244428676921e-06, + "loss": 0.0047, + "step": 28040 + }, + { + "epoch": 4.278564453125e-05, + "model_forward_time": 0.024254322052001953, + "step": 28040 + }, + { + "epoch": 4.278564453125e-05, + "step": 28040, + "training_step_time": 0.12408614158630371 + }, + { + "epoch": 4.278717041015625e-05, + "model_forward_time": 0.024830102920532227, + "step": 28041 + }, + { + "epoch": 4.278717041015625e-05, + "step": 28041, + "training_step_time": 0.12283539772033691 + }, + { + "epoch": 4.27886962890625e-05, + "model_forward_time": 0.0246274471282959, + "step": 28042 + }, + { + "epoch": 4.27886962890625e-05, + "step": 28042, + "training_step_time": 0.12130618095397949 + }, + { + "epoch": 4.279022216796875e-05, + "model_forward_time": 0.024924755096435547, + "step": 28043 + }, + { + "epoch": 4.279022216796875e-05, + "step": 28043, + "training_step_time": 0.17862915992736816 + }, + { + "epoch": 4.2791748046875e-05, + "model_forward_time": 0.025013208389282227, + "step": 28044 + }, + { + "epoch": 4.2791748046875e-05, + "step": 28044, + "training_step_time": 0.14233994483947754 + }, + { + "epoch": 4.279327392578125e-05, + "model_forward_time": 0.02472710609436035, + "step": 28045 + }, + { + "epoch": 4.279327392578125e-05, + "step": 28045, + "training_step_time": 0.10658884048461914 + }, + { + "epoch": 4.27947998046875e-05, + "model_forward_time": 0.025505781173706055, + "step": 28046 + }, + { + "epoch": 4.27947998046875e-05, + "step": 28046, + "training_step_time": 0.10288357734680176 + }, + { + "epoch": 4.279632568359375e-05, + "model_forward_time": 0.025631427764892578, + "step": 28047 + }, + { + "epoch": 4.279632568359375e-05, + "step": 28047, + "training_step_time": 0.11542534828186035 + }, + { + "epoch": 4.27978515625e-05, + "model_forward_time": 0.02580428123474121, + "step": 28048 + }, + { + "epoch": 4.27978515625e-05, + "step": 28048, + "training_step_time": 0.1045529842376709 + }, + { + "epoch": 4.279937744140625e-05, + "model_forward_time": 0.025458574295043945, + "step": 28049 + }, + { + "epoch": 4.279937744140625e-05, + "step": 28049, + "training_step_time": 0.19089555740356445 + }, + { + "epoch": 4.28009033203125e-05, + "grad_norm": 0.024919060990214348, + "learning_rate": 1.1506584608200367e-06, + "loss": 0.0035, + "step": 28050 + }, + { + "epoch": 4.28009033203125e-05, + "model_forward_time": 0.024945735931396484, + "step": 28050 + }, + { + "epoch": 4.28009033203125e-05, + "step": 28050, + "training_step_time": 0.10418534278869629 + }, + { + "epoch": 4.280242919921875e-05, + "model_forward_time": 0.025121212005615234, + "step": 28051 + }, + { + "epoch": 4.280242919921875e-05, + "step": 28051, + "training_step_time": 0.10232686996459961 + }, + { + "epoch": 4.2803955078125e-05, + "model_forward_time": 0.02559828758239746, + "step": 28052 + }, + { + "epoch": 4.2803955078125e-05, + "step": 28052, + "training_step_time": 0.1072854995727539 + }, + { + "epoch": 4.280548095703125e-05, + "model_forward_time": 0.02575230598449707, + "step": 28053 + }, + { + "epoch": 4.280548095703125e-05, + "step": 28053, + "training_step_time": 0.10650467872619629 + }, + { + "epoch": 4.28070068359375e-05, + "model_forward_time": 0.0249788761138916, + "step": 28054 + }, + { + "epoch": 4.28070068359375e-05, + "step": 28054, + "training_step_time": 0.10440659523010254 + }, + { + "epoch": 4.280853271484375e-05, + "model_forward_time": 0.025271892547607422, + "step": 28055 + }, + { + "epoch": 4.280853271484375e-05, + "step": 28055, + "training_step_time": 0.13753819465637207 + }, + { + "epoch": 4.281005859375e-05, + "model_forward_time": 0.025707721710205078, + "step": 28056 + }, + { + "epoch": 4.281005859375e-05, + "step": 28056, + "training_step_time": 0.13420724868774414 + }, + { + "epoch": 4.281158447265625e-05, + "model_forward_time": 0.02409505844116211, + "step": 28057 + }, + { + "epoch": 4.281158447265625e-05, + "step": 28057, + "training_step_time": 0.12948894500732422 + }, + { + "epoch": 4.28131103515625e-05, + "model_forward_time": 0.024275779724121094, + "step": 28058 + }, + { + "epoch": 4.28131103515625e-05, + "step": 28058, + "training_step_time": 0.13346385955810547 + }, + { + "epoch": 4.281463623046875e-05, + "model_forward_time": 0.02422165870666504, + "step": 28059 + }, + { + "epoch": 4.281463623046875e-05, + "step": 28059, + "training_step_time": 0.11788535118103027 + }, + { + "epoch": 4.2816162109375e-05, + "grad_norm": 0.04625081270933151, + "learning_rate": 1.138931991427028e-06, + "loss": 0.0027, + "step": 28060 + }, + { + "epoch": 4.2816162109375e-05, + "model_forward_time": 0.024268388748168945, + "step": 28060 + }, + { + "epoch": 4.2816162109375e-05, + "step": 28060, + "training_step_time": 0.11838340759277344 + }, + { + "epoch": 4.281768798828125e-05, + "model_forward_time": 0.025130271911621094, + "step": 28061 + }, + { + "epoch": 4.281768798828125e-05, + "step": 28061, + "training_step_time": 0.11795234680175781 + }, + { + "epoch": 4.28192138671875e-05, + "model_forward_time": 0.024408578872680664, + "step": 28062 + }, + { + "epoch": 4.28192138671875e-05, + "step": 28062, + "training_step_time": 0.11512994766235352 + }, + { + "epoch": 4.282073974609375e-05, + "model_forward_time": 0.027941226959228516, + "step": 28063 + }, + { + "epoch": 4.282073974609375e-05, + "step": 28063, + "training_step_time": 0.1131432056427002 + }, + { + "epoch": 4.2822265625e-05, + "model_forward_time": 0.02573227882385254, + "step": 28064 + }, + { + "epoch": 4.2822265625e-05, + "step": 28064, + "training_step_time": 0.11481738090515137 + }, + { + "epoch": 4.282379150390625e-05, + "model_forward_time": 0.02604365348815918, + "step": 28065 + }, + { + "epoch": 4.282379150390625e-05, + "step": 28065, + "training_step_time": 0.11078023910522461 + }, + { + "epoch": 4.28253173828125e-05, + "model_forward_time": 0.02553105354309082, + "step": 28066 + }, + { + "epoch": 4.28253173828125e-05, + "step": 28066, + "training_step_time": 0.1105649471282959 + }, + { + "epoch": 4.282684326171875e-05, + "model_forward_time": 0.02533578872680664, + "step": 28067 + }, + { + "epoch": 4.282684326171875e-05, + "step": 28067, + "training_step_time": 0.10690593719482422 + }, + { + "epoch": 4.2828369140625e-05, + "model_forward_time": 0.026012420654296875, + "step": 28068 + }, + { + "epoch": 4.2828369140625e-05, + "step": 28068, + "training_step_time": 0.15660619735717773 + }, + { + "epoch": 4.282989501953125e-05, + "model_forward_time": 0.02501654624938965, + "step": 28069 + }, + { + "epoch": 4.282989501953125e-05, + "step": 28069, + "training_step_time": 0.11046075820922852 + }, + { + "epoch": 4.28314208984375e-05, + "grad_norm": 0.07085675746202469, + "learning_rate": 1.1272648928389473e-06, + "loss": 0.0028, + "step": 28070 + }, + { + "epoch": 4.28314208984375e-05, + "model_forward_time": 0.025017499923706055, + "step": 28070 + }, + { + "epoch": 4.28314208984375e-05, + "step": 28070, + "training_step_time": 0.15639376640319824 + }, + { + "epoch": 4.283294677734375e-05, + "model_forward_time": 0.025296926498413086, + "step": 28071 + }, + { + "epoch": 4.283294677734375e-05, + "step": 28071, + "training_step_time": 0.13969755172729492 + }, + { + "epoch": 4.283447265625e-05, + "model_forward_time": 0.024966001510620117, + "step": 28072 + }, + { + "epoch": 4.283447265625e-05, + "step": 28072, + "training_step_time": 0.20888018608093262 + }, + { + "epoch": 4.283599853515625e-05, + "model_forward_time": 0.0251462459564209, + "step": 28073 + }, + { + "epoch": 4.283599853515625e-05, + "step": 28073, + "training_step_time": 0.10761117935180664 + }, + { + "epoch": 4.28375244140625e-05, + "model_forward_time": 0.025363683700561523, + "step": 28074 + }, + { + "epoch": 4.28375244140625e-05, + "step": 28074, + "training_step_time": 0.10629796981811523 + }, + { + "epoch": 4.283905029296875e-05, + "model_forward_time": 0.025469303131103516, + "step": 28075 + }, + { + "epoch": 4.283905029296875e-05, + "step": 28075, + "training_step_time": 0.2293379306793213 + }, + { + "epoch": 4.2840576171875e-05, + "model_forward_time": 0.024816036224365234, + "step": 28076 + }, + { + "epoch": 4.2840576171875e-05, + "step": 28076, + "training_step_time": 0.21250391006469727 + }, + { + "epoch": 4.284210205078125e-05, + "model_forward_time": 0.024728775024414062, + "step": 28077 + }, + { + "epoch": 4.284210205078125e-05, + "step": 28077, + "training_step_time": 0.17592072486877441 + }, + { + "epoch": 4.28436279296875e-05, + "model_forward_time": 0.02487468719482422, + "step": 28078 + }, + { + "epoch": 4.28436279296875e-05, + "step": 28078, + "training_step_time": 0.19633102416992188 + }, + { + "epoch": 4.284515380859375e-05, + "model_forward_time": 0.024659395217895508, + "step": 28079 + }, + { + "epoch": 4.284515380859375e-05, + "step": 28079, + "training_step_time": 0.14942717552185059 + }, + { + "epoch": 4.28466796875e-05, + "grad_norm": 0.04600098729133606, + "learning_rate": 1.1156571792324211e-06, + "loss": 0.003, + "step": 28080 + }, + { + "epoch": 4.28466796875e-05, + "model_forward_time": 0.023808956146240234, + "step": 28080 + }, + { + "epoch": 4.28466796875e-05, + "step": 28080, + "training_step_time": 0.12925004959106445 + }, + { + "epoch": 4.284820556640625e-05, + "model_forward_time": 0.02474522590637207, + "step": 28081 + }, + { + "epoch": 4.284820556640625e-05, + "step": 28081, + "training_step_time": 0.1114661693572998 + }, + { + "epoch": 4.28497314453125e-05, + "model_forward_time": 0.025533199310302734, + "step": 28082 + }, + { + "epoch": 4.28497314453125e-05, + "step": 28082, + "training_step_time": 0.10246682167053223 + }, + { + "epoch": 4.285125732421875e-05, + "model_forward_time": 0.025798559188842773, + "step": 28083 + }, + { + "epoch": 4.285125732421875e-05, + "step": 28083, + "training_step_time": 0.10435366630554199 + }, + { + "epoch": 4.2852783203125e-05, + "model_forward_time": 0.025624990463256836, + "step": 28084 + }, + { + "epoch": 4.2852783203125e-05, + "step": 28084, + "training_step_time": 0.103851318359375 + }, + { + "epoch": 4.285430908203125e-05, + "model_forward_time": 0.02579045295715332, + "step": 28085 + }, + { + "epoch": 4.285430908203125e-05, + "step": 28085, + "training_step_time": 0.10672783851623535 + }, + { + "epoch": 4.28558349609375e-05, + "model_forward_time": 0.025568008422851562, + "step": 28086 + }, + { + "epoch": 4.28558349609375e-05, + "step": 28086, + "training_step_time": 0.15857982635498047 + }, + { + "epoch": 4.285736083984375e-05, + "model_forward_time": 0.024938106536865234, + "step": 28087 + }, + { + "epoch": 4.285736083984375e-05, + "step": 28087, + "training_step_time": 0.16930484771728516 + }, + { + "epoch": 4.285888671875e-05, + "model_forward_time": 0.024765491485595703, + "step": 28088 + }, + { + "epoch": 4.285888671875e-05, + "step": 28088, + "training_step_time": 0.11368107795715332 + }, + { + "epoch": 4.286041259765625e-05, + "model_forward_time": 0.024991512298583984, + "step": 28089 + }, + { + "epoch": 4.286041259765625e-05, + "step": 28089, + "training_step_time": 0.10468292236328125 + }, + { + "epoch": 4.28619384765625e-05, + "grad_norm": 0.118675097823143, + "learning_rate": 1.1041088647119114e-06, + "loss": 0.0046, + "step": 28090 + }, + { + "epoch": 4.28619384765625e-05, + "model_forward_time": 0.02599048614501953, + "step": 28090 + }, + { + "epoch": 4.28619384765625e-05, + "step": 28090, + "training_step_time": 0.1128849983215332 + }, + { + "epoch": 4.286346435546875e-05, + "model_forward_time": 0.02588963508605957, + "step": 28091 + }, + { + "epoch": 4.286346435546875e-05, + "step": 28091, + "training_step_time": 0.10625219345092773 + }, + { + "epoch": 4.2864990234375e-05, + "model_forward_time": 0.025443077087402344, + "step": 28092 + }, + { + "epoch": 4.2864990234375e-05, + "step": 28092, + "training_step_time": 0.19758391380310059 + }, + { + "epoch": 4.286651611328125e-05, + "model_forward_time": 0.02498316764831543, + "step": 28093 + }, + { + "epoch": 4.286651611328125e-05, + "step": 28093, + "training_step_time": 0.10712242126464844 + }, + { + "epoch": 4.28680419921875e-05, + "model_forward_time": 0.02486586570739746, + "step": 28094 + }, + { + "epoch": 4.28680419921875e-05, + "step": 28094, + "training_step_time": 0.10593652725219727 + }, + { + "epoch": 4.286956787109375e-05, + "model_forward_time": 0.025663375854492188, + "step": 28095 + }, + { + "epoch": 4.286956787109375e-05, + "step": 28095, + "training_step_time": 0.1056520938873291 + }, + { + "epoch": 4.287109375e-05, + "model_forward_time": 0.025650739669799805, + "step": 28096 + }, + { + "epoch": 4.287109375e-05, + "step": 28096, + "training_step_time": 0.10642790794372559 + }, + { + "epoch": 4.287261962890625e-05, + "model_forward_time": 0.02581167221069336, + "step": 28097 + }, + { + "epoch": 4.287261962890625e-05, + "step": 28097, + "training_step_time": 0.10577678680419922 + }, + { + "epoch": 4.28741455078125e-05, + "model_forward_time": 0.02585744857788086, + "step": 28098 + }, + { + "epoch": 4.28741455078125e-05, + "step": 28098, + "training_step_time": 0.10816359519958496 + }, + { + "epoch": 4.287567138671875e-05, + "model_forward_time": 0.025646209716796875, + "step": 28099 + }, + { + "epoch": 4.287567138671875e-05, + "step": 28099, + "training_step_time": 0.11125969886779785 + }, + { + "epoch": 4.2877197265625e-05, + "grad_norm": 0.02818923443555832, + "learning_rate": 1.0926199633097157e-06, + "loss": 0.0042, + "step": 28100 + }, + { + "epoch": 4.2877197265625e-05, + "model_forward_time": 0.025690317153930664, + "step": 28100 + }, + { + "epoch": 4.2877197265625e-05, + "step": 28100, + "training_step_time": 0.11008667945861816 + }, + { + "epoch": 4.287872314453125e-05, + "model_forward_time": 0.029033184051513672, + "step": 28101 + }, + { + "epoch": 4.287872314453125e-05, + "step": 28101, + "training_step_time": 0.1139678955078125 + }, + { + "epoch": 4.28802490234375e-05, + "model_forward_time": 0.025434494018554688, + "step": 28102 + }, + { + "epoch": 4.28802490234375e-05, + "step": 28102, + "training_step_time": 0.11111235618591309 + }, + { + "epoch": 4.288177490234375e-05, + "model_forward_time": 0.025571823120117188, + "step": 28103 + }, + { + "epoch": 4.288177490234375e-05, + "step": 28103, + "training_step_time": 0.10701942443847656 + }, + { + "epoch": 4.288330078125e-05, + "model_forward_time": 0.025650978088378906, + "step": 28104 + }, + { + "epoch": 4.288330078125e-05, + "step": 28104, + "training_step_time": 0.10683321952819824 + }, + { + "epoch": 4.288482666015625e-05, + "model_forward_time": 0.025441408157348633, + "step": 28105 + }, + { + "epoch": 4.288482666015625e-05, + "step": 28105, + "training_step_time": 0.10703063011169434 + }, + { + "epoch": 4.28863525390625e-05, + "model_forward_time": 0.025362491607666016, + "step": 28106 + }, + { + "epoch": 4.28863525390625e-05, + "step": 28106, + "training_step_time": 0.10633420944213867 + }, + { + "epoch": 4.288787841796875e-05, + "model_forward_time": 0.025365352630615234, + "step": 28107 + }, + { + "epoch": 4.288787841796875e-05, + "step": 28107, + "training_step_time": 0.10951447486877441 + }, + { + "epoch": 4.2889404296875e-05, + "model_forward_time": 0.02532029151916504, + "step": 28108 + }, + { + "epoch": 4.2889404296875e-05, + "step": 28108, + "training_step_time": 0.10729670524597168 + }, + { + "epoch": 4.289093017578125e-05, + "model_forward_time": 0.024563312530517578, + "step": 28109 + }, + { + "epoch": 4.289093017578125e-05, + "step": 28109, + "training_step_time": 0.10837173461914062 + }, + { + "epoch": 4.28924560546875e-05, + "grad_norm": 0.05564859136939049, + "learning_rate": 1.0811904889859336e-06, + "loss": 0.0046, + "step": 28110 + }, + { + "epoch": 4.28924560546875e-05, + "model_forward_time": 0.02592325210571289, + "step": 28110 + }, + { + "epoch": 4.28924560546875e-05, + "step": 28110, + "training_step_time": 0.10683822631835938 + }, + { + "epoch": 4.289398193359375e-05, + "model_forward_time": 0.025578975677490234, + "step": 28111 + }, + { + "epoch": 4.289398193359375e-05, + "step": 28111, + "training_step_time": 0.10595178604125977 + }, + { + "epoch": 4.28955078125e-05, + "model_forward_time": 0.025487422943115234, + "step": 28112 + }, + { + "epoch": 4.28955078125e-05, + "step": 28112, + "training_step_time": 0.10527920722961426 + }, + { + "epoch": 4.289703369140625e-05, + "model_forward_time": 0.024895906448364258, + "step": 28113 + }, + { + "epoch": 4.289703369140625e-05, + "step": 28113, + "training_step_time": 0.1571955680847168 + }, + { + "epoch": 4.28985595703125e-05, + "model_forward_time": 0.024834156036376953, + "step": 28114 + }, + { + "epoch": 4.28985595703125e-05, + "step": 28114, + "training_step_time": 0.16796636581420898 + }, + { + "epoch": 4.290008544921875e-05, + "model_forward_time": 0.024730443954467773, + "step": 28115 + }, + { + "epoch": 4.290008544921875e-05, + "step": 28115, + "training_step_time": 0.16473150253295898 + }, + { + "epoch": 4.2901611328125e-05, + "model_forward_time": 0.025068283081054688, + "step": 28116 + }, + { + "epoch": 4.2901611328125e-05, + "step": 28116, + "training_step_time": 0.11041641235351562 + }, + { + "epoch": 4.290313720703125e-05, + "model_forward_time": 0.02454686164855957, + "step": 28117 + }, + { + "epoch": 4.290313720703125e-05, + "step": 28117, + "training_step_time": 0.13852810859680176 + }, + { + "epoch": 4.29046630859375e-05, + "model_forward_time": 0.025707244873046875, + "step": 28118 + }, + { + "epoch": 4.29046630859375e-05, + "step": 28118, + "training_step_time": 0.10743856430053711 + }, + { + "epoch": 4.290618896484375e-05, + "model_forward_time": 0.025455951690673828, + "step": 28119 + }, + { + "epoch": 4.290618896484375e-05, + "step": 28119, + "training_step_time": 0.10467886924743652 + }, + { + "epoch": 4.290771484375e-05, + "grad_norm": 0.15088430047035217, + "learning_rate": 1.0698204556284452e-06, + "loss": 0.0053, + "step": 28120 + }, + { + "epoch": 4.290771484375e-05, + "model_forward_time": 0.024934768676757812, + "step": 28120 + }, + { + "epoch": 4.290771484375e-05, + "step": 28120, + "training_step_time": 0.20907306671142578 + }, + { + "epoch": 4.290924072265625e-05, + "model_forward_time": 0.02465057373046875, + "step": 28121 + }, + { + "epoch": 4.290924072265625e-05, + "step": 28121, + "training_step_time": 0.22231125831604004 + }, + { + "epoch": 4.29107666015625e-05, + "model_forward_time": 0.02471923828125, + "step": 28122 + }, + { + "epoch": 4.29107666015625e-05, + "step": 28122, + "training_step_time": 0.11757469177246094 + }, + { + "epoch": 4.291229248046875e-05, + "model_forward_time": 0.024435043334960938, + "step": 28123 + }, + { + "epoch": 4.291229248046875e-05, + "step": 28123, + "training_step_time": 0.12290835380554199 + }, + { + "epoch": 4.2913818359375e-05, + "model_forward_time": 0.025502681732177734, + "step": 28124 + }, + { + "epoch": 4.2913818359375e-05, + "step": 28124, + "training_step_time": 0.1162412166595459 + }, + { + "epoch": 4.291534423828125e-05, + "model_forward_time": 0.025265216827392578, + "step": 28125 + }, + { + "epoch": 4.291534423828125e-05, + "step": 28125, + "training_step_time": 0.11341333389282227 + }, + { + "epoch": 4.29168701171875e-05, + "model_forward_time": 0.025426149368286133, + "step": 28126 + }, + { + "epoch": 4.29168701171875e-05, + "step": 28126, + "training_step_time": 0.14142131805419922 + }, + { + "epoch": 4.291839599609375e-05, + "model_forward_time": 0.025430917739868164, + "step": 28127 + }, + { + "epoch": 4.291839599609375e-05, + "step": 28127, + "training_step_time": 0.103271484375 + }, + { + "epoch": 4.2919921875e-05, + "model_forward_time": 0.025682687759399414, + "step": 28128 + }, + { + "epoch": 4.2919921875e-05, + "step": 28128, + "training_step_time": 0.10607409477233887 + }, + { + "epoch": 4.292144775390625e-05, + "model_forward_time": 0.0254056453704834, + "step": 28129 + }, + { + "epoch": 4.292144775390625e-05, + "step": 28129, + "training_step_time": 0.10705685615539551 + }, + { + "epoch": 4.29229736328125e-05, + "grad_norm": 0.03304993733763695, + "learning_rate": 1.0585098770529157e-06, + "loss": 0.003, + "step": 28130 + }, + { + "epoch": 4.29229736328125e-05, + "model_forward_time": 0.02542901039123535, + "step": 28130 + }, + { + "epoch": 4.29229736328125e-05, + "step": 28130, + "training_step_time": 0.10944628715515137 + }, + { + "epoch": 4.292449951171875e-05, + "model_forward_time": 0.02570486068725586, + "step": 28131 + }, + { + "epoch": 4.292449951171875e-05, + "step": 28131, + "training_step_time": 0.11533522605895996 + }, + { + "epoch": 4.2926025390625e-05, + "model_forward_time": 0.025487661361694336, + "step": 28132 + }, + { + "epoch": 4.2926025390625e-05, + "step": 28132, + "training_step_time": 0.1496903896331787 + }, + { + "epoch": 4.292755126953125e-05, + "model_forward_time": 0.025234699249267578, + "step": 28133 + }, + { + "epoch": 4.292755126953125e-05, + "step": 28133, + "training_step_time": 0.14674067497253418 + }, + { + "epoch": 4.29290771484375e-05, + "model_forward_time": 0.024352073669433594, + "step": 28134 + }, + { + "epoch": 4.29290771484375e-05, + "step": 28134, + "training_step_time": 0.1492311954498291 + }, + { + "epoch": 4.293060302734375e-05, + "model_forward_time": 0.025493860244750977, + "step": 28135 + }, + { + "epoch": 4.293060302734375e-05, + "step": 28135, + "training_step_time": 0.17179393768310547 + }, + { + "epoch": 4.293212890625e-05, + "model_forward_time": 0.025066375732421875, + "step": 28136 + }, + { + "epoch": 4.293212890625e-05, + "step": 28136, + "training_step_time": 0.11461687088012695 + }, + { + "epoch": 4.293365478515625e-05, + "model_forward_time": 0.023778200149536133, + "step": 28137 + }, + { + "epoch": 4.293365478515625e-05, + "step": 28137, + "training_step_time": 0.18233203887939453 + }, + { + "epoch": 4.29351806640625e-05, + "model_forward_time": 0.024322032928466797, + "step": 28138 + }, + { + "epoch": 4.29351806640625e-05, + "step": 28138, + "training_step_time": 0.10688185691833496 + }, + { + "epoch": 4.293670654296875e-05, + "model_forward_time": 0.024742841720581055, + "step": 28139 + }, + { + "epoch": 4.293670654296875e-05, + "step": 28139, + "training_step_time": 0.10454940795898438 + }, + { + "epoch": 4.2938232421875e-05, + "grad_norm": 0.17801275849342346, + "learning_rate": 1.0472587670027678e-06, + "loss": 0.0033, + "step": 28140 + }, + { + "epoch": 4.2938232421875e-05, + "model_forward_time": 0.025681257247924805, + "step": 28140 + }, + { + "epoch": 4.2938232421875e-05, + "step": 28140, + "training_step_time": 0.11020898818969727 + }, + { + "epoch": 4.293975830078125e-05, + "model_forward_time": 0.02542281150817871, + "step": 28141 + }, + { + "epoch": 4.293975830078125e-05, + "step": 28141, + "training_step_time": 0.10460519790649414 + }, + { + "epoch": 4.29412841796875e-05, + "model_forward_time": 0.0254056453704834, + "step": 28142 + }, + { + "epoch": 4.29412841796875e-05, + "step": 28142, + "training_step_time": 0.10540962219238281 + }, + { + "epoch": 4.294281005859375e-05, + "model_forward_time": 0.025315523147583008, + "step": 28143 + }, + { + "epoch": 4.294281005859375e-05, + "step": 28143, + "training_step_time": 0.10575366020202637 + }, + { + "epoch": 4.29443359375e-05, + "model_forward_time": 0.02530503273010254, + "step": 28144 + }, + { + "epoch": 4.29443359375e-05, + "step": 28144, + "training_step_time": 0.10960984230041504 + }, + { + "epoch": 4.294586181640625e-05, + "model_forward_time": 0.02729511260986328, + "step": 28145 + }, + { + "epoch": 4.294586181640625e-05, + "step": 28145, + "training_step_time": 0.10846996307373047 + }, + { + "epoch": 4.29473876953125e-05, + "model_forward_time": 0.025135278701782227, + "step": 28146 + }, + { + "epoch": 4.29473876953125e-05, + "step": 28146, + "training_step_time": 0.10770273208618164 + }, + { + "epoch": 4.294891357421875e-05, + "model_forward_time": 0.02505779266357422, + "step": 28147 + }, + { + "epoch": 4.294891357421875e-05, + "step": 28147, + "training_step_time": 0.10571455955505371 + }, + { + "epoch": 4.2950439453125e-05, + "model_forward_time": 0.025591373443603516, + "step": 28148 + }, + { + "epoch": 4.2950439453125e-05, + "step": 28148, + "training_step_time": 0.10745716094970703 + }, + { + "epoch": 4.295196533203125e-05, + "model_forward_time": 0.02503657341003418, + "step": 28149 + }, + { + "epoch": 4.295196533203125e-05, + "step": 28149, + "training_step_time": 0.10613107681274414 + }, + { + "epoch": 4.29534912109375e-05, + "grad_norm": 0.29600000381469727, + "learning_rate": 1.0360671391491606e-06, + "loss": 0.0131, + "step": 28150 + }, + { + "epoch": 4.29534912109375e-05, + "model_forward_time": 0.02520442008972168, + "step": 28150 + }, + { + "epoch": 4.29534912109375e-05, + "step": 28150, + "training_step_time": 0.10464811325073242 + }, + { + "epoch": 4.295501708984375e-05, + "model_forward_time": 0.0256650447845459, + "step": 28151 + }, + { + "epoch": 4.295501708984375e-05, + "step": 28151, + "training_step_time": 0.10714006423950195 + }, + { + "epoch": 4.295654296875e-05, + "model_forward_time": 0.02523517608642578, + "step": 28152 + }, + { + "epoch": 4.295654296875e-05, + "step": 28152, + "training_step_time": 0.10495471954345703 + }, + { + "epoch": 4.295806884765625e-05, + "model_forward_time": 0.02512502670288086, + "step": 28153 + }, + { + "epoch": 4.295806884765625e-05, + "step": 28153, + "training_step_time": 0.10732030868530273 + }, + { + "epoch": 4.29595947265625e-05, + "model_forward_time": 0.025397300720214844, + "step": 28154 + }, + { + "epoch": 4.29595947265625e-05, + "step": 28154, + "training_step_time": 0.1050872802734375 + }, + { + "epoch": 4.296112060546875e-05, + "model_forward_time": 0.025055646896362305, + "step": 28155 + }, + { + "epoch": 4.296112060546875e-05, + "step": 28155, + "training_step_time": 0.10627460479736328 + }, + { + "epoch": 4.2962646484375e-05, + "model_forward_time": 0.0256044864654541, + "step": 28156 + }, + { + "epoch": 4.2962646484375e-05, + "step": 28156, + "training_step_time": 0.10723519325256348 + }, + { + "epoch": 4.296417236328125e-05, + "model_forward_time": 0.024872779846191406, + "step": 28157 + }, + { + "epoch": 4.296417236328125e-05, + "step": 28157, + "training_step_time": 0.10510730743408203 + }, + { + "epoch": 4.29656982421875e-05, + "model_forward_time": 0.025111913681030273, + "step": 28158 + }, + { + "epoch": 4.29656982421875e-05, + "step": 28158, + "training_step_time": 0.10512185096740723 + }, + { + "epoch": 4.296722412109375e-05, + "model_forward_time": 0.0239865779876709, + "step": 28159 + }, + { + "epoch": 4.296722412109375e-05, + "step": 28159, + "training_step_time": 0.14714455604553223 + }, + { + "epoch": 4.296875e-05, + "grad_norm": 0.04726273566484451, + "learning_rate": 1.0249350070909768e-06, + "loss": 0.007, + "step": 28160 + }, + { + "epoch": 4.296875e-05, + "model_forward_time": 0.024842262268066406, + "step": 28160 + }, + { + "epoch": 4.296875e-05, + "step": 28160, + "training_step_time": 0.20943999290466309 + }, + { + "epoch": 4.297027587890625e-05, + "model_forward_time": 0.024492263793945312, + "step": 28161 + }, + { + "epoch": 4.297027587890625e-05, + "step": 28161, + "training_step_time": 0.10382938385009766 + }, + { + "epoch": 4.29718017578125e-05, + "model_forward_time": 0.024725675582885742, + "step": 28162 + }, + { + "epoch": 4.29718017578125e-05, + "step": 28162, + "training_step_time": 0.17518854141235352 + }, + { + "epoch": 4.297332763671875e-05, + "model_forward_time": 0.024048566818237305, + "step": 28163 + }, + { + "epoch": 4.297332763671875e-05, + "step": 28163, + "training_step_time": 0.15068626403808594 + }, + { + "epoch": 4.2974853515625e-05, + "model_forward_time": 0.024222135543823242, + "step": 28164 + }, + { + "epoch": 4.2974853515625e-05, + "step": 28164, + "training_step_time": 0.10298681259155273 + }, + { + "epoch": 4.297637939453125e-05, + "model_forward_time": 0.02491021156311035, + "step": 28165 + }, + { + "epoch": 4.297637939453125e-05, + "step": 28165, + "training_step_time": 0.10348963737487793 + }, + { + "epoch": 4.29779052734375e-05, + "model_forward_time": 0.02529120445251465, + "step": 28166 + }, + { + "epoch": 4.29779052734375e-05, + "step": 28166, + "training_step_time": 0.1744542121887207 + }, + { + "epoch": 4.297943115234375e-05, + "model_forward_time": 0.024445295333862305, + "step": 28167 + }, + { + "epoch": 4.297943115234375e-05, + "step": 28167, + "training_step_time": 0.11167669296264648 + }, + { + "epoch": 4.298095703125e-05, + "model_forward_time": 0.02465510368347168, + "step": 28168 + }, + { + "epoch": 4.298095703125e-05, + "step": 28168, + "training_step_time": 0.1209871768951416 + }, + { + "epoch": 4.298248291015625e-05, + "model_forward_time": 0.025136470794677734, + "step": 28169 + }, + { + "epoch": 4.298248291015625e-05, + "step": 28169, + "training_step_time": 0.1324923038482666 + }, + { + "epoch": 4.29840087890625e-05, + "grad_norm": 0.03980742767453194, + "learning_rate": 1.0138623843548078e-06, + "loss": 0.0037, + "step": 28170 + }, + { + "epoch": 4.29840087890625e-05, + "model_forward_time": 0.02494025230407715, + "step": 28170 + }, + { + "epoch": 4.29840087890625e-05, + "step": 28170, + "training_step_time": 0.11650586128234863 + }, + { + "epoch": 4.298553466796875e-05, + "model_forward_time": 0.02489328384399414, + "step": 28171 + }, + { + "epoch": 4.298553466796875e-05, + "step": 28171, + "training_step_time": 0.11912274360656738 + }, + { + "epoch": 4.2987060546875e-05, + "model_forward_time": 0.026041030883789062, + "step": 28172 + }, + { + "epoch": 4.2987060546875e-05, + "step": 28172, + "training_step_time": 0.11481356620788574 + }, + { + "epoch": 4.298858642578125e-05, + "model_forward_time": 0.02515125274658203, + "step": 28173 + }, + { + "epoch": 4.298858642578125e-05, + "step": 28173, + "training_step_time": 0.14807891845703125 + }, + { + "epoch": 4.29901123046875e-05, + "model_forward_time": 0.024869203567504883, + "step": 28174 + }, + { + "epoch": 4.29901123046875e-05, + "step": 28174, + "training_step_time": 0.10747885704040527 + }, + { + "epoch": 4.299163818359375e-05, + "model_forward_time": 0.02606797218322754, + "step": 28175 + }, + { + "epoch": 4.299163818359375e-05, + "step": 28175, + "training_step_time": 0.10799264907836914 + }, + { + "epoch": 4.29931640625e-05, + "model_forward_time": 0.025571107864379883, + "step": 28176 + }, + { + "epoch": 4.29931640625e-05, + "step": 28176, + "training_step_time": 0.1104433536529541 + }, + { + "epoch": 4.299468994140625e-05, + "model_forward_time": 0.02513432502746582, + "step": 28177 + }, + { + "epoch": 4.299468994140625e-05, + "step": 28177, + "training_step_time": 0.10561108589172363 + }, + { + "epoch": 4.29962158203125e-05, + "model_forward_time": 0.02480626106262207, + "step": 28178 + }, + { + "epoch": 4.29962158203125e-05, + "step": 28178, + "training_step_time": 0.1166372299194336 + }, + { + "epoch": 4.299774169921875e-05, + "model_forward_time": 0.024896860122680664, + "step": 28179 + }, + { + "epoch": 4.299774169921875e-05, + "step": 28179, + "training_step_time": 0.13382267951965332 + }, + { + "epoch": 4.2999267578125e-05, + "grad_norm": 0.05883041396737099, + "learning_rate": 1.00284928439493e-06, + "loss": 0.0093, + "step": 28180 + }, + { + "epoch": 4.2999267578125e-05, + "model_forward_time": 0.025124788284301758, + "step": 28180 + }, + { + "epoch": 4.2999267578125e-05, + "step": 28180, + "training_step_time": 0.1123046875 + }, + { + "epoch": 4.300079345703125e-05, + "model_forward_time": 0.024877548217773438, + "step": 28181 + }, + { + "epoch": 4.300079345703125e-05, + "step": 28181, + "training_step_time": 0.10957503318786621 + }, + { + "epoch": 4.30023193359375e-05, + "model_forward_time": 0.02510380744934082, + "step": 28182 + }, + { + "epoch": 4.30023193359375e-05, + "step": 28182, + "training_step_time": 0.11555647850036621 + }, + { + "epoch": 4.300384521484375e-05, + "model_forward_time": 0.024840831756591797, + "step": 28183 + }, + { + "epoch": 4.300384521484375e-05, + "step": 28183, + "training_step_time": 0.10877656936645508 + }, + { + "epoch": 4.300537109375e-05, + "model_forward_time": 0.024894237518310547, + "step": 28184 + }, + { + "epoch": 4.300537109375e-05, + "step": 28184, + "training_step_time": 0.19675993919372559 + }, + { + "epoch": 4.300689697265625e-05, + "model_forward_time": 0.024432659149169922, + "step": 28185 + }, + { + "epoch": 4.300689697265625e-05, + "step": 28185, + "training_step_time": 0.12584710121154785 + }, + { + "epoch": 4.30084228515625e-05, + "model_forward_time": 0.02417445182800293, + "step": 28186 + }, + { + "epoch": 4.30084228515625e-05, + "step": 28186, + "training_step_time": 0.12694740295410156 + }, + { + "epoch": 4.300994873046875e-05, + "model_forward_time": 0.02432084083557129, + "step": 28187 + }, + { + "epoch": 4.300994873046875e-05, + "step": 28187, + "training_step_time": 0.12506532669067383 + }, + { + "epoch": 4.3011474609375e-05, + "model_forward_time": 0.024622678756713867, + "step": 28188 + }, + { + "epoch": 4.3011474609375e-05, + "step": 28188, + "training_step_time": 0.12342071533203125 + }, + { + "epoch": 4.301300048828125e-05, + "model_forward_time": 0.025003671646118164, + "step": 28189 + }, + { + "epoch": 4.301300048828125e-05, + "step": 28189, + "training_step_time": 0.11469721794128418 + }, + { + "epoch": 4.30145263671875e-05, + "grad_norm": 0.18981486558914185, + "learning_rate": 9.918957205933e-07, + "loss": 0.0166, + "step": 28190 + }, + { + "epoch": 4.30145263671875e-05, + "model_forward_time": 0.025143861770629883, + "step": 28190 + }, + { + "epoch": 4.30145263671875e-05, + "step": 28190, + "training_step_time": 0.11365652084350586 + }, + { + "epoch": 4.301605224609375e-05, + "model_forward_time": 0.025329113006591797, + "step": 28191 + }, + { + "epoch": 4.301605224609375e-05, + "step": 28191, + "training_step_time": 0.11486983299255371 + }, + { + "epoch": 4.3017578125e-05, + "model_forward_time": 0.025015830993652344, + "step": 28192 + }, + { + "epoch": 4.3017578125e-05, + "step": 28192, + "training_step_time": 0.10884952545166016 + }, + { + "epoch": 4.301910400390625e-05, + "model_forward_time": 0.025074005126953125, + "step": 28193 + }, + { + "epoch": 4.301910400390625e-05, + "step": 28193, + "training_step_time": 0.11087918281555176 + }, + { + "epoch": 4.30206298828125e-05, + "model_forward_time": 0.024862051010131836, + "step": 28194 + }, + { + "epoch": 4.30206298828125e-05, + "step": 28194, + "training_step_time": 0.10720634460449219 + }, + { + "epoch": 4.302215576171875e-05, + "model_forward_time": 0.024864912033081055, + "step": 28195 + }, + { + "epoch": 4.302215576171875e-05, + "step": 28195, + "training_step_time": 0.10701656341552734 + }, + { + "epoch": 4.3023681640625e-05, + "model_forward_time": 0.024621248245239258, + "step": 28196 + }, + { + "epoch": 4.3023681640625e-05, + "step": 28196, + "training_step_time": 0.11017298698425293 + }, + { + "epoch": 4.302520751953125e-05, + "model_forward_time": 0.02510833740234375, + "step": 28197 + }, + { + "epoch": 4.302520751953125e-05, + "step": 28197, + "training_step_time": 0.10614943504333496 + }, + { + "epoch": 4.30267333984375e-05, + "model_forward_time": 0.02577495574951172, + "step": 28198 + }, + { + "epoch": 4.30267333984375e-05, + "step": 28198, + "training_step_time": 0.1085202693939209 + }, + { + "epoch": 4.302825927734375e-05, + "model_forward_time": 0.02536773681640625, + "step": 28199 + }, + { + "epoch": 4.302825927734375e-05, + "step": 28199, + "training_step_time": 0.10648727416992188 + }, + { + "epoch": 4.302978515625e-05, + "grad_norm": 0.1806434690952301, + "learning_rate": 9.810017062595322e-07, + "loss": 0.0063, + "step": 28200 + }, + { + "epoch": 4.302978515625e-05, + "model_forward_time": 0.024773597717285156, + "step": 28200 + }, + { + "epoch": 4.302978515625e-05, + "step": 28200, + "training_step_time": 0.11128950119018555 + }, + { + "epoch": 4.303131103515625e-05, + "model_forward_time": 0.024989843368530273, + "step": 28201 + }, + { + "epoch": 4.303131103515625e-05, + "step": 28201, + "training_step_time": 0.10437560081481934 + }, + { + "epoch": 4.30328369140625e-05, + "model_forward_time": 0.025533199310302734, + "step": 28202 + }, + { + "epoch": 4.30328369140625e-05, + "step": 28202, + "training_step_time": 0.1064298152923584 + }, + { + "epoch": 4.303436279296875e-05, + "model_forward_time": 0.025118350982666016, + "step": 28203 + }, + { + "epoch": 4.303436279296875e-05, + "step": 28203, + "training_step_time": 0.10435366630554199 + }, + { + "epoch": 4.3035888671875e-05, + "model_forward_time": 0.02518463134765625, + "step": 28204 + }, + { + "epoch": 4.3035888671875e-05, + "step": 28204, + "training_step_time": 0.1034393310546875 + }, + { + "epoch": 4.303741455078125e-05, + "model_forward_time": 0.024702072143554688, + "step": 28205 + }, + { + "epoch": 4.303741455078125e-05, + "step": 28205, + "training_step_time": 0.18328022956848145 + }, + { + "epoch": 4.30389404296875e-05, + "model_forward_time": 0.02493882179260254, + "step": 28206 + }, + { + "epoch": 4.30389404296875e-05, + "step": 28206, + "training_step_time": 0.1294238567352295 + }, + { + "epoch": 4.304046630859375e-05, + "model_forward_time": 0.024860382080078125, + "step": 28207 + }, + { + "epoch": 4.304046630859375e-05, + "step": 28207, + "training_step_time": 0.15462756156921387 + }, + { + "epoch": 4.30419921875e-05, + "model_forward_time": 0.024163246154785156, + "step": 28208 + }, + { + "epoch": 4.30419921875e-05, + "step": 28208, + "training_step_time": 0.21624112129211426 + }, + { + "epoch": 4.304351806640625e-05, + "model_forward_time": 0.02478814125061035, + "step": 28209 + }, + { + "epoch": 4.304351806640625e-05, + "step": 28209, + "training_step_time": 0.11119961738586426 + }, + { + "epoch": 4.30450439453125e-05, + "grad_norm": 0.05699380114674568, + "learning_rate": 9.701672546308827e-07, + "loss": 0.0049, + "step": 28210 + }, + { + "epoch": 4.30450439453125e-05, + "model_forward_time": 0.02462482452392578, + "step": 28210 + }, + { + "epoch": 4.30450439453125e-05, + "step": 28210, + "training_step_time": 0.10605740547180176 + }, + { + "epoch": 4.304656982421875e-05, + "model_forward_time": 0.02567124366760254, + "step": 28211 + }, + { + "epoch": 4.304656982421875e-05, + "step": 28211, + "training_step_time": 0.10669803619384766 + }, + { + "epoch": 4.3048095703125e-05, + "model_forward_time": 0.026340484619140625, + "step": 28212 + }, + { + "epoch": 4.3048095703125e-05, + "step": 28212, + "training_step_time": 0.10974860191345215 + }, + { + "epoch": 4.304962158203125e-05, + "model_forward_time": 0.025995969772338867, + "step": 28213 + }, + { + "epoch": 4.304962158203125e-05, + "step": 28213, + "training_step_time": 0.12052607536315918 + }, + { + "epoch": 4.30511474609375e-05, + "model_forward_time": 0.027274370193481445, + "step": 28214 + }, + { + "epoch": 4.30511474609375e-05, + "step": 28214, + "training_step_time": 0.22322678565979004 + }, + { + "epoch": 4.305267333984375e-05, + "model_forward_time": 0.024499177932739258, + "step": 28215 + }, + { + "epoch": 4.305267333984375e-05, + "step": 28215, + "training_step_time": 0.11465907096862793 + }, + { + "epoch": 4.305419921875e-05, + "model_forward_time": 0.02574634552001953, + "step": 28216 + }, + { + "epoch": 4.305419921875e-05, + "step": 28216, + "training_step_time": 0.11844086647033691 + }, + { + "epoch": 4.305572509765625e-05, + "model_forward_time": 0.025334835052490234, + "step": 28217 + }, + { + "epoch": 4.305572509765625e-05, + "step": 28217, + "training_step_time": 0.16774702072143555 + }, + { + "epoch": 4.30572509765625e-05, + "model_forward_time": 0.024841785430908203, + "step": 28218 + }, + { + "epoch": 4.30572509765625e-05, + "step": 28218, + "training_step_time": 0.17049193382263184 + }, + { + "epoch": 4.305877685546875e-05, + "model_forward_time": 0.024930477142333984, + "step": 28219 + }, + { + "epoch": 4.305877685546875e-05, + "step": 28219, + "training_step_time": 0.10560393333435059 + }, + { + "epoch": 4.3060302734375e-05, + "grad_norm": 0.07996619492769241, + "learning_rate": 9.593923788722315e-07, + "loss": 0.0042, + "step": 28220 + }, + { + "epoch": 4.3060302734375e-05, + "model_forward_time": 0.024774551391601562, + "step": 28220 + }, + { + "epoch": 4.3060302734375e-05, + "step": 28220, + "training_step_time": 0.10705971717834473 + }, + { + "epoch": 4.306182861328125e-05, + "model_forward_time": 0.024884462356567383, + "step": 28221 + }, + { + "epoch": 4.306182861328125e-05, + "step": 28221, + "training_step_time": 0.10380029678344727 + }, + { + "epoch": 4.30633544921875e-05, + "model_forward_time": 0.025249481201171875, + "step": 28222 + }, + { + "epoch": 4.30633544921875e-05, + "step": 28222, + "training_step_time": 0.18227505683898926 + }, + { + "epoch": 4.306488037109375e-05, + "model_forward_time": 0.024384498596191406, + "step": 28223 + }, + { + "epoch": 4.306488037109375e-05, + "step": 28223, + "training_step_time": 0.14304304122924805 + }, + { + "epoch": 4.306640625e-05, + "model_forward_time": 0.024037599563598633, + "step": 28224 + }, + { + "epoch": 4.306640625e-05, + "step": 28224, + "training_step_time": 0.10934042930603027 + }, + { + "epoch": 4.306793212890625e-05, + "model_forward_time": 0.024297475814819336, + "step": 28225 + }, + { + "epoch": 4.306793212890625e-05, + "step": 28225, + "training_step_time": 0.10251498222351074 + }, + { + "epoch": 4.30694580078125e-05, + "model_forward_time": 0.024562835693359375, + "step": 28226 + }, + { + "epoch": 4.30694580078125e-05, + "step": 28226, + "training_step_time": 0.12615728378295898 + }, + { + "epoch": 4.307098388671875e-05, + "model_forward_time": 0.025257110595703125, + "step": 28227 + }, + { + "epoch": 4.307098388671875e-05, + "step": 28227, + "training_step_time": 0.155104398727417 + }, + { + "epoch": 4.3072509765625e-05, + "model_forward_time": 0.024361133575439453, + "step": 28228 + }, + { + "epoch": 4.3072509765625e-05, + "step": 28228, + "training_step_time": 0.15401816368103027 + }, + { + "epoch": 4.307403564453125e-05, + "model_forward_time": 0.024705886840820312, + "step": 28229 + }, + { + "epoch": 4.307403564453125e-05, + "step": 28229, + "training_step_time": 0.13148045539855957 + }, + { + "epoch": 4.30755615234375e-05, + "grad_norm": 0.17489050328731537, + "learning_rate": 9.486770920760668e-07, + "loss": 0.0044, + "step": 28230 + }, + { + "epoch": 4.30755615234375e-05, + "model_forward_time": 0.02397632598876953, + "step": 28230 + }, + { + "epoch": 4.30755615234375e-05, + "step": 28230, + "training_step_time": 0.1252427101135254 + }, + { + "epoch": 4.307708740234375e-05, + "model_forward_time": 0.024882793426513672, + "step": 28231 + }, + { + "epoch": 4.307708740234375e-05, + "step": 28231, + "training_step_time": 0.1289370059967041 + }, + { + "epoch": 4.307861328125e-05, + "model_forward_time": 0.02490520477294922, + "step": 28232 + }, + { + "epoch": 4.307861328125e-05, + "step": 28232, + "training_step_time": 0.1280078887939453 + }, + { + "epoch": 4.308013916015625e-05, + "model_forward_time": 0.024621009826660156, + "step": 28233 + }, + { + "epoch": 4.308013916015625e-05, + "step": 28233, + "training_step_time": 0.1309969425201416 + }, + { + "epoch": 4.30816650390625e-05, + "model_forward_time": 0.024280071258544922, + "step": 28234 + }, + { + "epoch": 4.30816650390625e-05, + "step": 28234, + "training_step_time": 0.12112188339233398 + }, + { + "epoch": 4.308319091796875e-05, + "model_forward_time": 0.02489781379699707, + "step": 28235 + }, + { + "epoch": 4.308319091796875e-05, + "step": 28235, + "training_step_time": 0.11665225028991699 + }, + { + "epoch": 4.3084716796875e-05, + "model_forward_time": 0.02522134780883789, + "step": 28236 + }, + { + "epoch": 4.3084716796875e-05, + "step": 28236, + "training_step_time": 0.11767888069152832 + }, + { + "epoch": 4.308624267578125e-05, + "model_forward_time": 0.025439739227294922, + "step": 28237 + }, + { + "epoch": 4.308624267578125e-05, + "step": 28237, + "training_step_time": 0.1096193790435791 + }, + { + "epoch": 4.30877685546875e-05, + "model_forward_time": 0.02503657341003418, + "step": 28238 + }, + { + "epoch": 4.30877685546875e-05, + "step": 28238, + "training_step_time": 0.11063909530639648 + }, + { + "epoch": 4.308929443359375e-05, + "model_forward_time": 0.0253903865814209, + "step": 28239 + }, + { + "epoch": 4.308929443359375e-05, + "step": 28239, + "training_step_time": 0.10936403274536133 + }, + { + "epoch": 4.30908203125e-05, + "grad_norm": 0.04891812801361084, + "learning_rate": 9.380214072624682e-07, + "loss": 0.0025, + "step": 28240 + }, + { + "epoch": 4.30908203125e-05, + "model_forward_time": 0.025256633758544922, + "step": 28240 + }, + { + "epoch": 4.30908203125e-05, + "step": 28240, + "training_step_time": 0.1112825870513916 + }, + { + "epoch": 4.309234619140625e-05, + "model_forward_time": 0.025159597396850586, + "step": 28241 + }, + { + "epoch": 4.309234619140625e-05, + "step": 28241, + "training_step_time": 0.11025381088256836 + }, + { + "epoch": 4.30938720703125e-05, + "model_forward_time": 0.024993419647216797, + "step": 28242 + }, + { + "epoch": 4.30938720703125e-05, + "step": 28242, + "training_step_time": 0.10692548751831055 + }, + { + "epoch": 4.309539794921875e-05, + "model_forward_time": 0.02507495880126953, + "step": 28243 + }, + { + "epoch": 4.309539794921875e-05, + "step": 28243, + "training_step_time": 0.10691332817077637 + }, + { + "epoch": 4.3096923828125e-05, + "model_forward_time": 0.025977611541748047, + "step": 28244 + }, + { + "epoch": 4.3096923828125e-05, + "step": 28244, + "training_step_time": 0.10986566543579102 + }, + { + "epoch": 4.309844970703125e-05, + "model_forward_time": 0.024999618530273438, + "step": 28245 + }, + { + "epoch": 4.309844970703125e-05, + "step": 28245, + "training_step_time": 0.1065213680267334 + }, + { + "epoch": 4.30999755859375e-05, + "model_forward_time": 0.02481532096862793, + "step": 28246 + }, + { + "epoch": 4.30999755859375e-05, + "step": 28246, + "training_step_time": 0.10678696632385254 + }, + { + "epoch": 4.310150146484375e-05, + "model_forward_time": 0.024996519088745117, + "step": 28247 + }, + { + "epoch": 4.310150146484375e-05, + "step": 28247, + "training_step_time": 0.10556387901306152 + }, + { + "epoch": 4.310302734375e-05, + "model_forward_time": 0.02518773078918457, + "step": 28248 + }, + { + "epoch": 4.310302734375e-05, + "step": 28248, + "training_step_time": 0.10475993156433105 + }, + { + "epoch": 4.310455322265625e-05, + "model_forward_time": 0.02476358413696289, + "step": 28249 + }, + { + "epoch": 4.310455322265625e-05, + "step": 28249, + "training_step_time": 0.16903138160705566 + }, + { + "epoch": 4.31060791015625e-05, + "grad_norm": 0.34499987959861755, + "learning_rate": 9.274253373791064e-07, + "loss": 0.0093, + "step": 28250 + }, + { + "epoch": 4.31060791015625e-05, + "model_forward_time": 0.024405956268310547, + "step": 28250 + }, + { + "epoch": 4.31060791015625e-05, + "step": 28250, + "training_step_time": 0.1441822052001953 + }, + { + "epoch": 4.310760498046875e-05, + "model_forward_time": 0.02486705780029297, + "step": 28251 + }, + { + "epoch": 4.310760498046875e-05, + "step": 28251, + "training_step_time": 0.14620566368103027 + }, + { + "epoch": 4.3109130859375e-05, + "model_forward_time": 0.02442789077758789, + "step": 28252 + }, + { + "epoch": 4.3109130859375e-05, + "step": 28252, + "training_step_time": 0.21453332901000977 + }, + { + "epoch": 4.311065673828125e-05, + "model_forward_time": 0.02476644515991211, + "step": 28253 + }, + { + "epoch": 4.311065673828125e-05, + "step": 28253, + "training_step_time": 0.11744499206542969 + }, + { + "epoch": 4.31121826171875e-05, + "model_forward_time": 0.024611949920654297, + "step": 28254 + }, + { + "epoch": 4.31121826171875e-05, + "step": 28254, + "training_step_time": 0.10311126708984375 + }, + { + "epoch": 4.311370849609375e-05, + "model_forward_time": 0.02522110939025879, + "step": 28255 + }, + { + "epoch": 4.311370849609375e-05, + "step": 28255, + "training_step_time": 0.10689926147460938 + }, + { + "epoch": 4.3115234375e-05, + "model_forward_time": 0.02557969093322754, + "step": 28256 + }, + { + "epoch": 4.3115234375e-05, + "step": 28256, + "training_step_time": 0.10348844528198242 + }, + { + "epoch": 4.311676025390625e-05, + "model_forward_time": 0.02521347999572754, + "step": 28257 + }, + { + "epoch": 4.311676025390625e-05, + "step": 28257, + "training_step_time": 0.1448228359222412 + }, + { + "epoch": 4.31182861328125e-05, + "model_forward_time": 0.025263547897338867, + "step": 28258 + }, + { + "epoch": 4.31182861328125e-05, + "step": 28258, + "training_step_time": 0.2192821502685547 + }, + { + "epoch": 4.311981201171875e-05, + "model_forward_time": 0.02473282814025879, + "step": 28259 + }, + { + "epoch": 4.311981201171875e-05, + "step": 28259, + "training_step_time": 0.17320942878723145 + }, + { + "epoch": 4.3121337890625e-05, + "grad_norm": 0.11382688581943512, + "learning_rate": 9.168888953011989e-07, + "loss": 0.006, + "step": 28260 + }, + { + "epoch": 4.3121337890625e-05, + "model_forward_time": 0.024869441986083984, + "step": 28260 + }, + { + "epoch": 4.3121337890625e-05, + "step": 28260, + "training_step_time": 0.14842438697814941 + }, + { + "epoch": 4.312286376953125e-05, + "model_forward_time": 0.02479243278503418, + "step": 28261 + }, + { + "epoch": 4.312286376953125e-05, + "step": 28261, + "training_step_time": 0.11565065383911133 + }, + { + "epoch": 4.31243896484375e-05, + "model_forward_time": 0.026392698287963867, + "step": 28262 + }, + { + "epoch": 4.31243896484375e-05, + "step": 28262, + "training_step_time": 0.14272475242614746 + }, + { + "epoch": 4.312591552734375e-05, + "model_forward_time": 0.025150537490844727, + "step": 28263 + }, + { + "epoch": 4.312591552734375e-05, + "step": 28263, + "training_step_time": 0.10574936866760254 + }, + { + "epoch": 4.312744140625e-05, + "model_forward_time": 0.025140047073364258, + "step": 28264 + }, + { + "epoch": 4.312744140625e-05, + "step": 28264, + "training_step_time": 0.10417962074279785 + }, + { + "epoch": 4.312896728515625e-05, + "model_forward_time": 0.02566385269165039, + "step": 28265 + }, + { + "epoch": 4.312896728515625e-05, + "step": 28265, + "training_step_time": 0.1063389778137207 + }, + { + "epoch": 4.31304931640625e-05, + "model_forward_time": 0.02505803108215332, + "step": 28266 + }, + { + "epoch": 4.31304931640625e-05, + "step": 28266, + "training_step_time": 0.16218280792236328 + }, + { + "epoch": 4.313201904296875e-05, + "model_forward_time": 0.024709701538085938, + "step": 28267 + }, + { + "epoch": 4.313201904296875e-05, + "step": 28267, + "training_step_time": 0.13949322700500488 + }, + { + "epoch": 4.3133544921875e-05, + "model_forward_time": 0.02446126937866211, + "step": 28268 + }, + { + "epoch": 4.3133544921875e-05, + "step": 28268, + "training_step_time": 0.10626792907714844 + }, + { + "epoch": 4.313507080078125e-05, + "model_forward_time": 0.025397777557373047, + "step": 28269 + }, + { + "epoch": 4.313507080078125e-05, + "step": 28269, + "training_step_time": 0.11220073699951172 + }, + { + "epoch": 4.31365966796875e-05, + "grad_norm": 0.08717884123325348, + "learning_rate": 9.064120938315213e-07, + "loss": 0.0023, + "step": 28270 + }, + { + "epoch": 4.31365966796875e-05, + "model_forward_time": 0.0251922607421875, + "step": 28270 + }, + { + "epoch": 4.31365966796875e-05, + "step": 28270, + "training_step_time": 0.1097097396850586 + }, + { + "epoch": 4.313812255859375e-05, + "model_forward_time": 0.02539539337158203, + "step": 28271 + }, + { + "epoch": 4.313812255859375e-05, + "step": 28271, + "training_step_time": 0.10501718521118164 + }, + { + "epoch": 4.31396484375e-05, + "model_forward_time": 0.02543354034423828, + "step": 28272 + }, + { + "epoch": 4.31396484375e-05, + "step": 28272, + "training_step_time": 0.19508075714111328 + }, + { + "epoch": 4.314117431640625e-05, + "model_forward_time": 0.024466753005981445, + "step": 28273 + }, + { + "epoch": 4.314117431640625e-05, + "step": 28273, + "training_step_time": 0.1120765209197998 + }, + { + "epoch": 4.31427001953125e-05, + "model_forward_time": 0.024715900421142578, + "step": 28274 + }, + { + "epoch": 4.31427001953125e-05, + "step": 28274, + "training_step_time": 0.12807583808898926 + }, + { + "epoch": 4.314422607421875e-05, + "model_forward_time": 0.02486252784729004, + "step": 28275 + }, + { + "epoch": 4.314422607421875e-05, + "step": 28275, + "training_step_time": 0.12356305122375488 + }, + { + "epoch": 4.3145751953125e-05, + "model_forward_time": 0.02499985694885254, + "step": 28276 + }, + { + "epoch": 4.3145751953125e-05, + "step": 28276, + "training_step_time": 0.12183666229248047 + }, + { + "epoch": 4.314727783203125e-05, + "model_forward_time": 0.025493144989013672, + "step": 28277 + }, + { + "epoch": 4.314727783203125e-05, + "step": 28277, + "training_step_time": 0.12496089935302734 + }, + { + "epoch": 4.31488037109375e-05, + "model_forward_time": 0.025468826293945312, + "step": 28278 + }, + { + "epoch": 4.31488037109375e-05, + "step": 28278, + "training_step_time": 0.11671662330627441 + }, + { + "epoch": 4.315032958984375e-05, + "model_forward_time": 0.024949073791503906, + "step": 28279 + }, + { + "epoch": 4.315032958984375e-05, + "step": 28279, + "training_step_time": 0.1119842529296875 + }, + { + "epoch": 4.315185546875e-05, + "grad_norm": 0.04625631868839264, + "learning_rate": 8.959949457003736e-07, + "loss": 0.0054, + "step": 28280 + }, + { + "epoch": 4.315185546875e-05, + "model_forward_time": 0.025320768356323242, + "step": 28280 + }, + { + "epoch": 4.315185546875e-05, + "step": 28280, + "training_step_time": 0.11227297782897949 + }, + { + "epoch": 4.315338134765625e-05, + "model_forward_time": 0.025124073028564453, + "step": 28281 + }, + { + "epoch": 4.315338134765625e-05, + "step": 28281, + "training_step_time": 0.10923576354980469 + }, + { + "epoch": 4.31549072265625e-05, + "model_forward_time": 0.02552032470703125, + "step": 28282 + }, + { + "epoch": 4.31549072265625e-05, + "step": 28282, + "training_step_time": 0.1077263355255127 + }, + { + "epoch": 4.315643310546875e-05, + "model_forward_time": 0.02720165252685547, + "step": 28283 + }, + { + "epoch": 4.315643310546875e-05, + "step": 28283, + "training_step_time": 0.10877156257629395 + }, + { + "epoch": 4.3157958984375e-05, + "model_forward_time": 0.025402545928955078, + "step": 28284 + }, + { + "epoch": 4.3157958984375e-05, + "step": 28284, + "training_step_time": 0.11357951164245605 + }, + { + "epoch": 4.315948486328125e-05, + "model_forward_time": 0.027219772338867188, + "step": 28285 + }, + { + "epoch": 4.315948486328125e-05, + "step": 28285, + "training_step_time": 0.10899949073791504 + }, + { + "epoch": 4.31610107421875e-05, + "model_forward_time": 0.025385618209838867, + "step": 28286 + }, + { + "epoch": 4.31610107421875e-05, + "step": 28286, + "training_step_time": 0.11057853698730469 + }, + { + "epoch": 4.316253662109375e-05, + "model_forward_time": 0.025757551193237305, + "step": 28287 + }, + { + "epoch": 4.316253662109375e-05, + "step": 28287, + "training_step_time": 0.1057596206665039 + }, + { + "epoch": 4.31640625e-05, + "model_forward_time": 0.02553415298461914, + "step": 28288 + }, + { + "epoch": 4.31640625e-05, + "step": 28288, + "training_step_time": 0.10589098930358887 + }, + { + "epoch": 4.316558837890625e-05, + "model_forward_time": 0.02532505989074707, + "step": 28289 + }, + { + "epoch": 4.316558837890625e-05, + "step": 28289, + "training_step_time": 0.10649704933166504 + }, + { + "epoch": 4.31671142578125e-05, + "grad_norm": 0.05212622508406639, + "learning_rate": 8.856374635655695e-07, + "loss": 0.0025, + "step": 28290 + }, + { + "epoch": 4.31671142578125e-05, + "model_forward_time": 0.025525331497192383, + "step": 28290 + }, + { + "epoch": 4.31671142578125e-05, + "step": 28290, + "training_step_time": 0.10468745231628418 + }, + { + "epoch": 4.316864013671875e-05, + "model_forward_time": 0.025528907775878906, + "step": 28291 + }, + { + "epoch": 4.316864013671875e-05, + "step": 28291, + "training_step_time": 0.11216282844543457 + }, + { + "epoch": 4.3170166015625e-05, + "model_forward_time": 0.025467634201049805, + "step": 28292 + }, + { + "epoch": 4.3170166015625e-05, + "step": 28292, + "training_step_time": 0.10926532745361328 + }, + { + "epoch": 4.317169189453125e-05, + "model_forward_time": 0.02861166000366211, + "step": 28293 + }, + { + "epoch": 4.317169189453125e-05, + "step": 28293, + "training_step_time": 0.16585516929626465 + }, + { + "epoch": 4.31732177734375e-05, + "model_forward_time": 0.024658679962158203, + "step": 28294 + }, + { + "epoch": 4.31732177734375e-05, + "step": 28294, + "training_step_time": 0.16858959197998047 + }, + { + "epoch": 4.317474365234375e-05, + "model_forward_time": 0.024713516235351562, + "step": 28295 + }, + { + "epoch": 4.317474365234375e-05, + "step": 28295, + "training_step_time": 0.18735861778259277 + }, + { + "epoch": 4.317626953125e-05, + "model_forward_time": 0.026334524154663086, + "step": 28296 + }, + { + "epoch": 4.317626953125e-05, + "step": 28296, + "training_step_time": 0.1381378173828125 + }, + { + "epoch": 4.317779541015625e-05, + "model_forward_time": 0.024439096450805664, + "step": 28297 + }, + { + "epoch": 4.317779541015625e-05, + "step": 28297, + "training_step_time": 0.19620060920715332 + }, + { + "epoch": 4.31793212890625e-05, + "model_forward_time": 0.024636268615722656, + "step": 28298 + }, + { + "epoch": 4.31793212890625e-05, + "step": 28298, + "training_step_time": 0.18410444259643555 + }, + { + "epoch": 4.318084716796875e-05, + "model_forward_time": 0.024605274200439453, + "step": 28299 + }, + { + "epoch": 4.318084716796875e-05, + "step": 28299, + "training_step_time": 0.10127019882202148 + }, + { + "epoch": 4.3182373046875e-05, + "grad_norm": 0.0862986296415329, + "learning_rate": 8.753396600124253e-07, + "loss": 0.003, + "step": 28300 + }, + { + "epoch": 4.3182373046875e-05, + "model_forward_time": 0.02459716796875, + "step": 28300 + }, + { + "epoch": 4.3182373046875e-05, + "step": 28300, + "training_step_time": 0.10310173034667969 + }, + { + "epoch": 4.318389892578125e-05, + "model_forward_time": 0.025064706802368164, + "step": 28301 + }, + { + "epoch": 4.318389892578125e-05, + "step": 28301, + "training_step_time": 0.1896193027496338 + }, + { + "epoch": 4.31854248046875e-05, + "model_forward_time": 0.02478647232055664, + "step": 28302 + }, + { + "epoch": 4.31854248046875e-05, + "step": 28302, + "training_step_time": 0.2170579433441162 + }, + { + "epoch": 4.318695068359375e-05, + "model_forward_time": 0.024701833724975586, + "step": 28303 + }, + { + "epoch": 4.318695068359375e-05, + "step": 28303, + "training_step_time": 0.1712322235107422 + }, + { + "epoch": 4.31884765625e-05, + "model_forward_time": 0.023927927017211914, + "step": 28304 + }, + { + "epoch": 4.31884765625e-05, + "step": 28304, + "training_step_time": 0.14850974082946777 + }, + { + "epoch": 4.319000244140625e-05, + "model_forward_time": 0.024541139602661133, + "step": 28305 + }, + { + "epoch": 4.319000244140625e-05, + "step": 28305, + "training_step_time": 0.13044452667236328 + }, + { + "epoch": 4.31915283203125e-05, + "model_forward_time": 0.024962663650512695, + "step": 28306 + }, + { + "epoch": 4.31915283203125e-05, + "step": 28306, + "training_step_time": 0.17561054229736328 + }, + { + "epoch": 4.319305419921875e-05, + "model_forward_time": 0.024559974670410156, + "step": 28307 + }, + { + "epoch": 4.319305419921875e-05, + "step": 28307, + "training_step_time": 0.1543276309967041 + }, + { + "epoch": 4.3194580078125e-05, + "model_forward_time": 0.024391889572143555, + "step": 28308 + }, + { + "epoch": 4.3194580078125e-05, + "step": 28308, + "training_step_time": 0.14554142951965332 + }, + { + "epoch": 4.319610595703125e-05, + "model_forward_time": 0.02467036247253418, + "step": 28309 + }, + { + "epoch": 4.319610595703125e-05, + "step": 28309, + "training_step_time": 0.20698904991149902 + }, + { + "epoch": 4.31976318359375e-05, + "grad_norm": 0.06878754496574402, + "learning_rate": 8.651015475537538e-07, + "loss": 0.003, + "step": 28310 + }, + { + "epoch": 4.31976318359375e-05, + "model_forward_time": 0.024463176727294922, + "step": 28310 + }, + { + "epoch": 4.31976318359375e-05, + "step": 28310, + "training_step_time": 0.1341533660888672 + }, + { + "epoch": 4.319915771484375e-05, + "model_forward_time": 0.02340841293334961, + "step": 28311 + }, + { + "epoch": 4.319915771484375e-05, + "step": 28311, + "training_step_time": 0.19932293891906738 + }, + { + "epoch": 4.320068359375e-05, + "model_forward_time": 0.02461385726928711, + "step": 28312 + }, + { + "epoch": 4.320068359375e-05, + "step": 28312, + "training_step_time": 0.11487221717834473 + }, + { + "epoch": 4.320220947265625e-05, + "model_forward_time": 0.024092435836791992, + "step": 28313 + }, + { + "epoch": 4.320220947265625e-05, + "step": 28313, + "training_step_time": 0.18918967247009277 + }, + { + "epoch": 4.32037353515625e-05, + "model_forward_time": 0.024534940719604492, + "step": 28314 + }, + { + "epoch": 4.32037353515625e-05, + "step": 28314, + "training_step_time": 0.10866928100585938 + }, + { + "epoch": 4.320526123046875e-05, + "model_forward_time": 0.024505615234375, + "step": 28315 + }, + { + "epoch": 4.320526123046875e-05, + "step": 28315, + "training_step_time": 0.11437535285949707 + }, + { + "epoch": 4.3206787109375e-05, + "model_forward_time": 0.02554154396057129, + "step": 28316 + }, + { + "epoch": 4.3206787109375e-05, + "step": 28316, + "training_step_time": 0.10933947563171387 + }, + { + "epoch": 4.320831298828125e-05, + "model_forward_time": 0.025414466857910156, + "step": 28317 + }, + { + "epoch": 4.320831298828125e-05, + "step": 28317, + "training_step_time": 0.109588623046875 + }, + { + "epoch": 4.32098388671875e-05, + "model_forward_time": 0.025625944137573242, + "step": 28318 + }, + { + "epoch": 4.32098388671875e-05, + "step": 28318, + "training_step_time": 0.10823202133178711 + }, + { + "epoch": 4.321136474609375e-05, + "model_forward_time": 0.02571582794189453, + "step": 28319 + }, + { + "epoch": 4.321136474609375e-05, + "step": 28319, + "training_step_time": 0.10779619216918945 + }, + { + "epoch": 4.3212890625e-05, + "grad_norm": 0.04035257175564766, + "learning_rate": 8.549231386298151e-07, + "loss": 0.0038, + "step": 28320 + }, + { + "epoch": 4.3212890625e-05, + "model_forward_time": 0.025146484375, + "step": 28320 + }, + { + "epoch": 4.3212890625e-05, + "step": 28320, + "training_step_time": 0.10576033592224121 + }, + { + "epoch": 4.321441650390625e-05, + "model_forward_time": 0.02530074119567871, + "step": 28321 + }, + { + "epoch": 4.321441650390625e-05, + "step": 28321, + "training_step_time": 0.10698771476745605 + }, + { + "epoch": 4.32159423828125e-05, + "model_forward_time": 0.025170326232910156, + "step": 28322 + }, + { + "epoch": 4.32159423828125e-05, + "step": 28322, + "training_step_time": 0.10840225219726562 + }, + { + "epoch": 4.321746826171875e-05, + "model_forward_time": 0.025565385818481445, + "step": 28323 + }, + { + "epoch": 4.321746826171875e-05, + "step": 28323, + "training_step_time": 0.10559844970703125 + }, + { + "epoch": 4.3218994140625e-05, + "model_forward_time": 0.02537393569946289, + "step": 28324 + }, + { + "epoch": 4.3218994140625e-05, + "step": 28324, + "training_step_time": 0.10785841941833496 + }, + { + "epoch": 4.322052001953125e-05, + "model_forward_time": 0.025562047958374023, + "step": 28325 + }, + { + "epoch": 4.322052001953125e-05, + "step": 28325, + "training_step_time": 0.10639643669128418 + }, + { + "epoch": 4.32220458984375e-05, + "model_forward_time": 0.027622222900390625, + "step": 28326 + }, + { + "epoch": 4.32220458984375e-05, + "step": 28326, + "training_step_time": 0.10772156715393066 + }, + { + "epoch": 4.322357177734375e-05, + "model_forward_time": 0.025440454483032227, + "step": 28327 + }, + { + "epoch": 4.322357177734375e-05, + "step": 28327, + "training_step_time": 0.10530447959899902 + }, + { + "epoch": 4.322509765625e-05, + "model_forward_time": 0.02528071403503418, + "step": 28328 + }, + { + "epoch": 4.322509765625e-05, + "step": 28328, + "training_step_time": 0.1056208610534668 + }, + { + "epoch": 4.322662353515625e-05, + "model_forward_time": 0.02536630630493164, + "step": 28329 + }, + { + "epoch": 4.322662353515625e-05, + "step": 28329, + "training_step_time": 0.10483694076538086 + }, + { + "epoch": 4.32281494140625e-05, + "grad_norm": 0.042170602828264236, + "learning_rate": 8.448044456083493e-07, + "loss": 0.0049, + "step": 28330 + }, + { + "epoch": 4.32281494140625e-05, + "model_forward_time": 0.025777339935302734, + "step": 28330 + }, + { + "epoch": 4.32281494140625e-05, + "step": 28330, + "training_step_time": 0.10893654823303223 + }, + { + "epoch": 4.322967529296875e-05, + "model_forward_time": 0.02574014663696289, + "step": 28331 + }, + { + "epoch": 4.322967529296875e-05, + "step": 28331, + "training_step_time": 0.10791540145874023 + }, + { + "epoch": 4.3231201171875e-05, + "model_forward_time": 0.027769088745117188, + "step": 28332 + }, + { + "epoch": 4.3231201171875e-05, + "step": 28332, + "training_step_time": 0.10796689987182617 + }, + { + "epoch": 4.323272705078125e-05, + "model_forward_time": 0.025609254837036133, + "step": 28333 + }, + { + "epoch": 4.323272705078125e-05, + "step": 28333, + "training_step_time": 0.10552978515625 + }, + { + "epoch": 4.32342529296875e-05, + "model_forward_time": 0.02552652359008789, + "step": 28334 + }, + { + "epoch": 4.32342529296875e-05, + "step": 28334, + "training_step_time": 0.10676908493041992 + }, + { + "epoch": 4.323577880859375e-05, + "model_forward_time": 0.02567267417907715, + "step": 28335 + }, + { + "epoch": 4.323577880859375e-05, + "step": 28335, + "training_step_time": 0.10521364212036133 + }, + { + "epoch": 4.32373046875e-05, + "model_forward_time": 0.026295185089111328, + "step": 28336 + }, + { + "epoch": 4.32373046875e-05, + "step": 28336, + "training_step_time": 0.15050673484802246 + }, + { + "epoch": 4.323883056640625e-05, + "model_forward_time": 0.025277376174926758, + "step": 28337 + }, + { + "epoch": 4.323883056640625e-05, + "step": 28337, + "training_step_time": 0.1932518482208252 + }, + { + "epoch": 4.32403564453125e-05, + "model_forward_time": 0.024654150009155273, + "step": 28338 + }, + { + "epoch": 4.32403564453125e-05, + "step": 28338, + "training_step_time": 0.21532201766967773 + }, + { + "epoch": 4.324188232421875e-05, + "model_forward_time": 0.024502992630004883, + "step": 28339 + }, + { + "epoch": 4.324188232421875e-05, + "step": 28339, + "training_step_time": 0.16487741470336914 + }, + { + "epoch": 4.3243408203125e-05, + "grad_norm": 0.0799122229218483, + "learning_rate": 8.347454807845045e-07, + "loss": 0.0043, + "step": 28340 + }, + { + "epoch": 4.3243408203125e-05, + "model_forward_time": 0.02477741241455078, + "step": 28340 + }, + { + "epoch": 4.3243408203125e-05, + "step": 28340, + "training_step_time": 0.10457611083984375 + }, + { + "epoch": 4.324493408203125e-05, + "model_forward_time": 0.025510311126708984, + "step": 28341 + }, + { + "epoch": 4.324493408203125e-05, + "step": 28341, + "training_step_time": 0.10272932052612305 + }, + { + "epoch": 4.32464599609375e-05, + "model_forward_time": 0.02563953399658203, + "step": 28342 + }, + { + "epoch": 4.32464599609375e-05, + "step": 28342, + "training_step_time": 0.1034855842590332 + }, + { + "epoch": 4.324798583984375e-05, + "model_forward_time": 0.025463104248046875, + "step": 28343 + }, + { + "epoch": 4.324798583984375e-05, + "step": 28343, + "training_step_time": 0.10534262657165527 + }, + { + "epoch": 4.324951171875e-05, + "model_forward_time": 0.025864839553833008, + "step": 28344 + }, + { + "epoch": 4.324951171875e-05, + "step": 28344, + "training_step_time": 0.17320752143859863 + }, + { + "epoch": 4.325103759765625e-05, + "model_forward_time": 0.024507999420166016, + "step": 28345 + }, + { + "epoch": 4.325103759765625e-05, + "step": 28345, + "training_step_time": 0.22058534622192383 + }, + { + "epoch": 4.32525634765625e-05, + "model_forward_time": 0.024581193923950195, + "step": 28346 + }, + { + "epoch": 4.32525634765625e-05, + "step": 28346, + "training_step_time": 0.2081892490386963 + }, + { + "epoch": 4.325408935546875e-05, + "model_forward_time": 0.024051189422607422, + "step": 28347 + }, + { + "epoch": 4.325408935546875e-05, + "step": 28347, + "training_step_time": 0.11482667922973633 + }, + { + "epoch": 4.3255615234375e-05, + "model_forward_time": 0.024718284606933594, + "step": 28348 + }, + { + "epoch": 4.3255615234375e-05, + "step": 28348, + "training_step_time": 0.12096476554870605 + }, + { + "epoch": 4.325714111328125e-05, + "model_forward_time": 0.025110721588134766, + "step": 28349 + }, + { + "epoch": 4.325714111328125e-05, + "step": 28349, + "training_step_time": 0.13621950149536133 + }, + { + "epoch": 4.32586669921875e-05, + "grad_norm": 0.05207629129290581, + "learning_rate": 8.247462563808817e-07, + "loss": 0.0028, + "step": 28350 + }, + { + "epoch": 4.32586669921875e-05, + "model_forward_time": 0.02513432502746582, + "step": 28350 + }, + { + "epoch": 4.32586669921875e-05, + "step": 28350, + "training_step_time": 0.10732269287109375 + }, + { + "epoch": 4.326019287109375e-05, + "model_forward_time": 0.025487422943115234, + "step": 28351 + }, + { + "epoch": 4.326019287109375e-05, + "step": 28351, + "training_step_time": 0.10588431358337402 + }, + { + "epoch": 4.326171875e-05, + "model_forward_time": 0.025292396545410156, + "step": 28352 + }, + { + "epoch": 4.326171875e-05, + "step": 28352, + "training_step_time": 0.10665154457092285 + }, + { + "epoch": 4.326324462890625e-05, + "model_forward_time": 0.025133371353149414, + "step": 28353 + }, + { + "epoch": 4.326324462890625e-05, + "step": 28353, + "training_step_time": 0.12339425086975098 + }, + { + "epoch": 4.32647705078125e-05, + "model_forward_time": 0.02559828758239746, + "step": 28354 + }, + { + "epoch": 4.32647705078125e-05, + "step": 28354, + "training_step_time": 0.14159178733825684 + }, + { + "epoch": 4.326629638671875e-05, + "model_forward_time": 0.024936676025390625, + "step": 28355 + }, + { + "epoch": 4.326629638671875e-05, + "step": 28355, + "training_step_time": 0.10797977447509766 + }, + { + "epoch": 4.3267822265625e-05, + "model_forward_time": 0.02536797523498535, + "step": 28356 + }, + { + "epoch": 4.3267822265625e-05, + "step": 28356, + "training_step_time": 0.10581254959106445 + }, + { + "epoch": 4.326934814453125e-05, + "model_forward_time": 0.02582550048828125, + "step": 28357 + }, + { + "epoch": 4.326934814453125e-05, + "step": 28357, + "training_step_time": 0.11330509185791016 + }, + { + "epoch": 4.32708740234375e-05, + "model_forward_time": 0.025530576705932617, + "step": 28358 + }, + { + "epoch": 4.32708740234375e-05, + "step": 28358, + "training_step_time": 0.10529446601867676 + }, + { + "epoch": 4.327239990234375e-05, + "model_forward_time": 0.024924516677856445, + "step": 28359 + }, + { + "epoch": 4.327239990234375e-05, + "step": 28359, + "training_step_time": 0.20937108993530273 + }, + { + "epoch": 4.327392578125e-05, + "grad_norm": 0.029038051143288612, + "learning_rate": 8.148067845474838e-07, + "loss": 0.0026, + "step": 28360 + }, + { + "epoch": 4.327392578125e-05, + "model_forward_time": 0.024399757385253906, + "step": 28360 + }, + { + "epoch": 4.327392578125e-05, + "step": 28360, + "training_step_time": 0.10466861724853516 + }, + { + "epoch": 4.327545166015625e-05, + "model_forward_time": 0.024623870849609375, + "step": 28361 + }, + { + "epoch": 4.327545166015625e-05, + "step": 28361, + "training_step_time": 0.10635733604431152 + }, + { + "epoch": 4.32769775390625e-05, + "model_forward_time": 0.027372121810913086, + "step": 28362 + }, + { + "epoch": 4.32769775390625e-05, + "step": 28362, + "training_step_time": 0.10723304748535156 + }, + { + "epoch": 4.327850341796875e-05, + "model_forward_time": 0.02536940574645996, + "step": 28363 + }, + { + "epoch": 4.327850341796875e-05, + "step": 28363, + "training_step_time": 0.10953235626220703 + }, + { + "epoch": 4.3280029296875e-05, + "model_forward_time": 0.02526545524597168, + "step": 28364 + }, + { + "epoch": 4.3280029296875e-05, + "step": 28364, + "training_step_time": 0.10798263549804688 + }, + { + "epoch": 4.328155517578125e-05, + "model_forward_time": 0.024599790573120117, + "step": 28365 + }, + { + "epoch": 4.328155517578125e-05, + "step": 28365, + "training_step_time": 0.10374927520751953 + }, + { + "epoch": 4.32830810546875e-05, + "model_forward_time": 0.025591373443603516, + "step": 28366 + }, + { + "epoch": 4.32830810546875e-05, + "step": 28366, + "training_step_time": 0.1036214828491211 + }, + { + "epoch": 4.328460693359375e-05, + "model_forward_time": 0.025064706802368164, + "step": 28367 + }, + { + "epoch": 4.328460693359375e-05, + "step": 28367, + "training_step_time": 0.1049656867980957 + }, + { + "epoch": 4.32861328125e-05, + "model_forward_time": 0.025427579879760742, + "step": 28368 + }, + { + "epoch": 4.32861328125e-05, + "step": 28368, + "training_step_time": 0.10957956314086914 + }, + { + "epoch": 4.328765869140625e-05, + "model_forward_time": 0.025300264358520508, + "step": 28369 + }, + { + "epoch": 4.328765869140625e-05, + "step": 28369, + "training_step_time": 0.10419106483459473 + }, + { + "epoch": 4.32891845703125e-05, + "grad_norm": 0.0704699456691742, + "learning_rate": 8.049270773617057e-07, + "loss": 0.0056, + "step": 28370 + }, + { + "epoch": 4.32891845703125e-05, + "model_forward_time": 0.02546095848083496, + "step": 28370 + }, + { + "epoch": 4.32891845703125e-05, + "step": 28370, + "training_step_time": 0.10808873176574707 + }, + { + "epoch": 4.329071044921875e-05, + "model_forward_time": 0.02561163902282715, + "step": 28371 + }, + { + "epoch": 4.329071044921875e-05, + "step": 28371, + "training_step_time": 0.10383391380310059 + }, + { + "epoch": 4.3292236328125e-05, + "model_forward_time": 0.02538585662841797, + "step": 28372 + }, + { + "epoch": 4.3292236328125e-05, + "step": 28372, + "training_step_time": 0.10588693618774414 + }, + { + "epoch": 4.329376220703125e-05, + "model_forward_time": 0.02519965171813965, + "step": 28373 + }, + { + "epoch": 4.329376220703125e-05, + "step": 28373, + "training_step_time": 0.10465693473815918 + }, + { + "epoch": 4.32952880859375e-05, + "model_forward_time": 0.025647878646850586, + "step": 28374 + }, + { + "epoch": 4.32952880859375e-05, + "step": 28374, + "training_step_time": 0.10506415367126465 + }, + { + "epoch": 4.329681396484375e-05, + "model_forward_time": 0.025729894638061523, + "step": 28375 + }, + { + "epoch": 4.329681396484375e-05, + "step": 28375, + "training_step_time": 0.10447239875793457 + }, + { + "epoch": 4.329833984375e-05, + "model_forward_time": 0.02562689781188965, + "step": 28376 + }, + { + "epoch": 4.329833984375e-05, + "step": 28376, + "training_step_time": 0.10456967353820801 + }, + { + "epoch": 4.329986572265625e-05, + "model_forward_time": 0.025527000427246094, + "step": 28377 + }, + { + "epoch": 4.329986572265625e-05, + "step": 28377, + "training_step_time": 0.1053462028503418 + }, + { + "epoch": 4.33013916015625e-05, + "model_forward_time": 0.026200532913208008, + "step": 28378 + }, + { + "epoch": 4.33013916015625e-05, + "step": 28378, + "training_step_time": 0.10837292671203613 + }, + { + "epoch": 4.330291748046875e-05, + "model_forward_time": 0.02554607391357422, + "step": 28379 + }, + { + "epoch": 4.330291748046875e-05, + "step": 28379, + "training_step_time": 0.11129450798034668 + }, + { + "epoch": 4.3304443359375e-05, + "grad_norm": 0.04326881840825081, + "learning_rate": 7.951071468283167e-07, + "loss": 0.0044, + "step": 28380 + }, + { + "epoch": 4.3304443359375e-05, + "model_forward_time": 0.024960041046142578, + "step": 28380 + }, + { + "epoch": 4.3304443359375e-05, + "step": 28380, + "training_step_time": 0.17937064170837402 + }, + { + "epoch": 4.330596923828125e-05, + "model_forward_time": 0.02476811408996582, + "step": 28381 + }, + { + "epoch": 4.330596923828125e-05, + "step": 28381, + "training_step_time": 0.20872068405151367 + }, + { + "epoch": 4.33074951171875e-05, + "model_forward_time": 0.02447199821472168, + "step": 28382 + }, + { + "epoch": 4.33074951171875e-05, + "step": 28382, + "training_step_time": 0.16397500038146973 + }, + { + "epoch": 4.330902099609375e-05, + "model_forward_time": 0.024724960327148438, + "step": 28383 + }, + { + "epoch": 4.330902099609375e-05, + "step": 28383, + "training_step_time": 0.16937494277954102 + }, + { + "epoch": 4.3310546875e-05, + "model_forward_time": 0.02457714080810547, + "step": 28384 + }, + { + "epoch": 4.3310546875e-05, + "step": 28384, + "training_step_time": 0.2020871639251709 + }, + { + "epoch": 4.331207275390625e-05, + "model_forward_time": 0.025063514709472656, + "step": 28385 + }, + { + "epoch": 4.331207275390625e-05, + "step": 28385, + "training_step_time": 0.14789819717407227 + }, + { + "epoch": 4.33135986328125e-05, + "model_forward_time": 0.024549484252929688, + "step": 28386 + }, + { + "epoch": 4.33135986328125e-05, + "step": 28386, + "training_step_time": 0.13049674034118652 + }, + { + "epoch": 4.331512451171875e-05, + "model_forward_time": 0.0248873233795166, + "step": 28387 + }, + { + "epoch": 4.331512451171875e-05, + "step": 28387, + "training_step_time": 0.12593984603881836 + }, + { + "epoch": 4.3316650390625e-05, + "model_forward_time": 0.02491283416748047, + "step": 28388 + }, + { + "epoch": 4.3316650390625e-05, + "step": 28388, + "training_step_time": 0.12424492835998535 + }, + { + "epoch": 4.331817626953125e-05, + "model_forward_time": 0.02517104148864746, + "step": 28389 + }, + { + "epoch": 4.331817626953125e-05, + "step": 28389, + "training_step_time": 0.13075041770935059 + }, + { + "epoch": 4.33197021484375e-05, + "grad_norm": 0.032751381397247314, + "learning_rate": 7.853470048794664e-07, + "loss": 0.0081, + "step": 28390 + }, + { + "epoch": 4.33197021484375e-05, + "model_forward_time": 0.02523064613342285, + "step": 28390 + }, + { + "epoch": 4.33197021484375e-05, + "step": 28390, + "training_step_time": 0.20630574226379395 + }, + { + "epoch": 4.332122802734375e-05, + "model_forward_time": 0.023665904998779297, + "step": 28391 + }, + { + "epoch": 4.332122802734375e-05, + "step": 28391, + "training_step_time": 0.11001729965209961 + }, + { + "epoch": 4.332275390625e-05, + "model_forward_time": 0.02480292320251465, + "step": 28392 + }, + { + "epoch": 4.332275390625e-05, + "step": 28392, + "training_step_time": 0.10884547233581543 + }, + { + "epoch": 4.332427978515625e-05, + "model_forward_time": 0.02496480941772461, + "step": 28393 + }, + { + "epoch": 4.332427978515625e-05, + "step": 28393, + "training_step_time": 0.11189484596252441 + }, + { + "epoch": 4.33258056640625e-05, + "model_forward_time": 0.025141239166259766, + "step": 28394 + }, + { + "epoch": 4.33258056640625e-05, + "step": 28394, + "training_step_time": 0.10819745063781738 + }, + { + "epoch": 4.332733154296875e-05, + "model_forward_time": 0.02517986297607422, + "step": 28395 + }, + { + "epoch": 4.332733154296875e-05, + "step": 28395, + "training_step_time": 0.13584160804748535 + }, + { + "epoch": 4.3328857421875e-05, + "model_forward_time": 0.025691747665405273, + "step": 28396 + }, + { + "epoch": 4.3328857421875e-05, + "step": 28396, + "training_step_time": 0.10487222671508789 + }, + { + "epoch": 4.333038330078125e-05, + "model_forward_time": 0.0257570743560791, + "step": 28397 + }, + { + "epoch": 4.333038330078125e-05, + "step": 28397, + "training_step_time": 0.10948920249938965 + }, + { + "epoch": 4.33319091796875e-05, + "model_forward_time": 0.02497076988220215, + "step": 28398 + }, + { + "epoch": 4.33319091796875e-05, + "step": 28398, + "training_step_time": 0.10701894760131836 + }, + { + "epoch": 4.333343505859375e-05, + "model_forward_time": 0.02463364601135254, + "step": 28399 + }, + { + "epoch": 4.333343505859375e-05, + "step": 28399, + "training_step_time": 0.12427473068237305 + }, + { + "epoch": 4.33349609375e-05, + "grad_norm": 0.0698418915271759, + "learning_rate": 7.756466633746407e-07, + "loss": 0.0028, + "step": 28400 + }, + { + "epoch": 4.33349609375e-05, + "model_forward_time": 0.024944305419921875, + "step": 28400 + }, + { + "epoch": 4.33349609375e-05, + "step": 28400, + "training_step_time": 0.12619304656982422 + }, + { + "epoch": 4.333648681640625e-05, + "model_forward_time": 0.02522134780883789, + "step": 28401 + }, + { + "epoch": 4.333648681640625e-05, + "step": 28401, + "training_step_time": 0.1080789566040039 + }, + { + "epoch": 4.33380126953125e-05, + "model_forward_time": 0.0251920223236084, + "step": 28402 + }, + { + "epoch": 4.33380126953125e-05, + "step": 28402, + "training_step_time": 0.10990762710571289 + }, + { + "epoch": 4.333953857421875e-05, + "model_forward_time": 0.025344133377075195, + "step": 28403 + }, + { + "epoch": 4.333953857421875e-05, + "step": 28403, + "training_step_time": 0.11408782005310059 + }, + { + "epoch": 4.3341064453125e-05, + "model_forward_time": 0.02684807777404785, + "step": 28404 + }, + { + "epoch": 4.3341064453125e-05, + "step": 28404, + "training_step_time": 0.10718750953674316 + }, + { + "epoch": 4.334259033203125e-05, + "model_forward_time": 0.025441884994506836, + "step": 28405 + }, + { + "epoch": 4.334259033203125e-05, + "step": 28405, + "training_step_time": 0.19615864753723145 + }, + { + "epoch": 4.33441162109375e-05, + "model_forward_time": 0.024446487426757812, + "step": 28406 + }, + { + "epoch": 4.33441162109375e-05, + "step": 28406, + "training_step_time": 0.10939240455627441 + }, + { + "epoch": 4.334564208984375e-05, + "model_forward_time": 0.026717185974121094, + "step": 28407 + }, + { + "epoch": 4.334564208984375e-05, + "step": 28407, + "training_step_time": 0.10455703735351562 + }, + { + "epoch": 4.334716796875e-05, + "model_forward_time": 0.02548360824584961, + "step": 28408 + }, + { + "epoch": 4.334716796875e-05, + "step": 28408, + "training_step_time": 0.10851883888244629 + }, + { + "epoch": 4.334869384765625e-05, + "model_forward_time": 0.025321006774902344, + "step": 28409 + }, + { + "epoch": 4.334869384765625e-05, + "step": 28409, + "training_step_time": 0.10486459732055664 + }, + { + "epoch": 4.33502197265625e-05, + "grad_norm": 0.5670133233070374, + "learning_rate": 7.66006134100672e-07, + "loss": 0.0056, + "step": 28410 + }, + { + "epoch": 4.33502197265625e-05, + "model_forward_time": 0.025903940200805664, + "step": 28410 + }, + { + "epoch": 4.33502197265625e-05, + "step": 28410, + "training_step_time": 0.10527443885803223 + }, + { + "epoch": 4.335174560546875e-05, + "model_forward_time": 0.025406837463378906, + "step": 28411 + }, + { + "epoch": 4.335174560546875e-05, + "step": 28411, + "training_step_time": 0.10618281364440918 + }, + { + "epoch": 4.3353271484375e-05, + "model_forward_time": 0.025653839111328125, + "step": 28412 + }, + { + "epoch": 4.3353271484375e-05, + "step": 28412, + "training_step_time": 0.10473847389221191 + }, + { + "epoch": 4.335479736328125e-05, + "model_forward_time": 0.025304794311523438, + "step": 28413 + }, + { + "epoch": 4.335479736328125e-05, + "step": 28413, + "training_step_time": 0.10793423652648926 + }, + { + "epoch": 4.33563232421875e-05, + "model_forward_time": 0.026701688766479492, + "step": 28414 + }, + { + "epoch": 4.33563232421875e-05, + "step": 28414, + "training_step_time": 0.10609793663024902 + }, + { + "epoch": 4.335784912109375e-05, + "model_forward_time": 0.025300979614257812, + "step": 28415 + }, + { + "epoch": 4.335784912109375e-05, + "step": 28415, + "training_step_time": 0.10976386070251465 + }, + { + "epoch": 4.3359375e-05, + "model_forward_time": 0.025431394577026367, + "step": 28416 + }, + { + "epoch": 4.3359375e-05, + "step": 28416, + "training_step_time": 0.10615658760070801 + }, + { + "epoch": 4.336090087890625e-05, + "model_forward_time": 0.025214672088623047, + "step": 28417 + }, + { + "epoch": 4.336090087890625e-05, + "step": 28417, + "training_step_time": 0.10617351531982422 + }, + { + "epoch": 4.33624267578125e-05, + "model_forward_time": 0.025531530380249023, + "step": 28418 + }, + { + "epoch": 4.33624267578125e-05, + "step": 28418, + "training_step_time": 0.10517525672912598 + }, + { + "epoch": 4.336395263671875e-05, + "model_forward_time": 0.025285959243774414, + "step": 28419 + }, + { + "epoch": 4.336395263671875e-05, + "step": 28419, + "training_step_time": 0.10478591918945312 + }, + { + "epoch": 4.3365478515625e-05, + "grad_norm": 0.13137125968933105, + "learning_rate": 7.564254287717176e-07, + "loss": 0.008, + "step": 28420 + }, + { + "epoch": 4.3365478515625e-05, + "model_forward_time": 0.02513718605041504, + "step": 28420 + }, + { + "epoch": 4.3365478515625e-05, + "step": 28420, + "training_step_time": 0.10477781295776367 + }, + { + "epoch": 4.336700439453125e-05, + "model_forward_time": 0.02508544921875, + "step": 28421 + }, + { + "epoch": 4.336700439453125e-05, + "step": 28421, + "training_step_time": 0.10487127304077148 + }, + { + "epoch": 4.33685302734375e-05, + "model_forward_time": 0.026108980178833008, + "step": 28422 + }, + { + "epoch": 4.33685302734375e-05, + "step": 28422, + "training_step_time": 0.10954022407531738 + }, + { + "epoch": 4.337005615234375e-05, + "model_forward_time": 0.025206804275512695, + "step": 28423 + }, + { + "epoch": 4.337005615234375e-05, + "step": 28423, + "training_step_time": 0.10972356796264648 + }, + { + "epoch": 4.337158203125e-05, + "model_forward_time": 0.028063535690307617, + "step": 28424 + }, + { + "epoch": 4.337158203125e-05, + "step": 28424, + "training_step_time": 0.11254763603210449 + }, + { + "epoch": 4.337310791015625e-05, + "model_forward_time": 0.025130271911621094, + "step": 28425 + }, + { + "epoch": 4.337310791015625e-05, + "step": 28425, + "training_step_time": 0.10553693771362305 + }, + { + "epoch": 4.33746337890625e-05, + "model_forward_time": 0.025455713272094727, + "step": 28426 + }, + { + "epoch": 4.33746337890625e-05, + "step": 28426, + "training_step_time": 0.1358485221862793 + }, + { + "epoch": 4.337615966796875e-05, + "model_forward_time": 0.025660276412963867, + "step": 28427 + }, + { + "epoch": 4.337615966796875e-05, + "step": 28427, + "training_step_time": 0.11090707778930664 + }, + { + "epoch": 4.3377685546875e-05, + "model_forward_time": 0.02532219886779785, + "step": 28428 + }, + { + "epoch": 4.3377685546875e-05, + "step": 28428, + "training_step_time": 0.10703229904174805 + }, + { + "epoch": 4.337921142578125e-05, + "model_forward_time": 0.025213956832885742, + "step": 28429 + }, + { + "epoch": 4.337921142578125e-05, + "step": 28429, + "training_step_time": 0.11951375007629395 + }, + { + "epoch": 4.33807373046875e-05, + "grad_norm": 0.20636147260665894, + "learning_rate": 7.469045590292323e-07, + "loss": 0.0043, + "step": 28430 + }, + { + "epoch": 4.33807373046875e-05, + "model_forward_time": 0.025378942489624023, + "step": 28430 + }, + { + "epoch": 4.33807373046875e-05, + "step": 28430, + "training_step_time": 0.11332273483276367 + }, + { + "epoch": 4.338226318359375e-05, + "model_forward_time": 0.02594447135925293, + "step": 28431 + }, + { + "epoch": 4.338226318359375e-05, + "step": 28431, + "training_step_time": 0.12037944793701172 + }, + { + "epoch": 4.33837890625e-05, + "model_forward_time": 0.027541637420654297, + "step": 28432 + }, + { + "epoch": 4.33837890625e-05, + "step": 28432, + "training_step_time": 0.16683673858642578 + }, + { + "epoch": 4.338531494140625e-05, + "model_forward_time": 0.024903297424316406, + "step": 28433 + }, + { + "epoch": 4.338531494140625e-05, + "step": 28433, + "training_step_time": 0.1829085350036621 + }, + { + "epoch": 4.33868408203125e-05, + "model_forward_time": 0.02461409568786621, + "step": 28434 + }, + { + "epoch": 4.33868408203125e-05, + "step": 28434, + "training_step_time": 0.10988569259643555 + }, + { + "epoch": 4.338836669921875e-05, + "model_forward_time": 0.024920940399169922, + "step": 28435 + }, + { + "epoch": 4.338836669921875e-05, + "step": 28435, + "training_step_time": 0.11377382278442383 + }, + { + "epoch": 4.3389892578125e-05, + "model_forward_time": 0.02543783187866211, + "step": 28436 + }, + { + "epoch": 4.3389892578125e-05, + "step": 28436, + "training_step_time": 0.11171746253967285 + }, + { + "epoch": 4.339141845703125e-05, + "model_forward_time": 0.02561497688293457, + "step": 28437 + }, + { + "epoch": 4.339141845703125e-05, + "step": 28437, + "training_step_time": 0.1553177833557129 + }, + { + "epoch": 4.33929443359375e-05, + "model_forward_time": 0.024953842163085938, + "step": 28438 + }, + { + "epoch": 4.33929443359375e-05, + "step": 28438, + "training_step_time": 0.10993361473083496 + }, + { + "epoch": 4.339447021484375e-05, + "model_forward_time": 0.025770187377929688, + "step": 28439 + }, + { + "epoch": 4.339447021484375e-05, + "step": 28439, + "training_step_time": 0.12657999992370605 + }, + { + "epoch": 4.339599609375e-05, + "grad_norm": 0.04000052809715271, + "learning_rate": 7.374435364419674e-07, + "loss": 0.0032, + "step": 28440 + }, + { + "epoch": 4.339599609375e-05, + "model_forward_time": 0.025289058685302734, + "step": 28440 + }, + { + "epoch": 4.339599609375e-05, + "step": 28440, + "training_step_time": 0.1298682689666748 + }, + { + "epoch": 4.339752197265625e-05, + "model_forward_time": 0.024939537048339844, + "step": 28441 + }, + { + "epoch": 4.339752197265625e-05, + "step": 28441, + "training_step_time": 0.12552380561828613 + }, + { + "epoch": 4.33990478515625e-05, + "model_forward_time": 0.02550053596496582, + "step": 28442 + }, + { + "epoch": 4.33990478515625e-05, + "step": 28442, + "training_step_time": 0.11357283592224121 + }, + { + "epoch": 4.340057373046875e-05, + "model_forward_time": 0.025386571884155273, + "step": 28443 + }, + { + "epoch": 4.340057373046875e-05, + "step": 28443, + "training_step_time": 0.11450409889221191 + }, + { + "epoch": 4.3402099609375e-05, + "model_forward_time": 0.025455713272094727, + "step": 28444 + }, + { + "epoch": 4.3402099609375e-05, + "step": 28444, + "training_step_time": 0.14101576805114746 + }, + { + "epoch": 4.340362548828125e-05, + "model_forward_time": 0.024930477142333984, + "step": 28445 + }, + { + "epoch": 4.340362548828125e-05, + "step": 28445, + "training_step_time": 0.10496807098388672 + }, + { + "epoch": 4.34051513671875e-05, + "model_forward_time": 0.025425195693969727, + "step": 28446 + }, + { + "epoch": 4.34051513671875e-05, + "step": 28446, + "training_step_time": 0.10696625709533691 + }, + { + "epoch": 4.340667724609375e-05, + "model_forward_time": 0.025606155395507812, + "step": 28447 + }, + { + "epoch": 4.340667724609375e-05, + "step": 28447, + "training_step_time": 0.11908602714538574 + }, + { + "epoch": 4.3408203125e-05, + "model_forward_time": 0.025002241134643555, + "step": 28448 + }, + { + "epoch": 4.3408203125e-05, + "step": 28448, + "training_step_time": 0.13784241676330566 + }, + { + "epoch": 4.340972900390625e-05, + "model_forward_time": 0.025092601776123047, + "step": 28449 + }, + { + "epoch": 4.340972900390625e-05, + "step": 28449, + "training_step_time": 0.10826444625854492 + }, + { + "epoch": 4.34112548828125e-05, + "grad_norm": 0.029679667204618454, + "learning_rate": 7.280423725059604e-07, + "loss": 0.0034, + "step": 28450 + }, + { + "epoch": 4.34112548828125e-05, + "model_forward_time": 0.02518630027770996, + "step": 28450 + }, + { + "epoch": 4.34112548828125e-05, + "step": 28450, + "training_step_time": 0.10819125175476074 + }, + { + "epoch": 4.341278076171875e-05, + "model_forward_time": 0.025093555450439453, + "step": 28451 + }, + { + "epoch": 4.341278076171875e-05, + "step": 28451, + "training_step_time": 0.11107993125915527 + }, + { + "epoch": 4.3414306640625e-05, + "model_forward_time": 0.02514958381652832, + "step": 28452 + }, + { + "epoch": 4.3414306640625e-05, + "step": 28452, + "training_step_time": 0.10954093933105469 + }, + { + "epoch": 4.341583251953125e-05, + "model_forward_time": 0.025235652923583984, + "step": 28453 + }, + { + "epoch": 4.341583251953125e-05, + "step": 28453, + "training_step_time": 0.19406819343566895 + }, + { + "epoch": 4.34173583984375e-05, + "model_forward_time": 0.028592348098754883, + "step": 28454 + }, + { + "epoch": 4.34173583984375e-05, + "step": 28454, + "training_step_time": 0.10696721076965332 + }, + { + "epoch": 4.341888427734375e-05, + "model_forward_time": 0.024432897567749023, + "step": 28455 + }, + { + "epoch": 4.341888427734375e-05, + "step": 28455, + "training_step_time": 0.10461091995239258 + }, + { + "epoch": 4.342041015625e-05, + "model_forward_time": 0.025321483612060547, + "step": 28456 + }, + { + "epoch": 4.342041015625e-05, + "step": 28456, + "training_step_time": 0.10473322868347168 + }, + { + "epoch": 4.342193603515625e-05, + "model_forward_time": 0.024932146072387695, + "step": 28457 + }, + { + "epoch": 4.342193603515625e-05, + "step": 28457, + "training_step_time": 0.1053776741027832 + }, + { + "epoch": 4.34234619140625e-05, + "model_forward_time": 0.025384902954101562, + "step": 28458 + }, + { + "epoch": 4.34234619140625e-05, + "step": 28458, + "training_step_time": 0.11069917678833008 + }, + { + "epoch": 4.342498779296875e-05, + "model_forward_time": 0.026886940002441406, + "step": 28459 + }, + { + "epoch": 4.342498779296875e-05, + "step": 28459, + "training_step_time": 0.12270545959472656 + }, + { + "epoch": 4.3426513671875e-05, + "grad_norm": 0.07157254219055176, + "learning_rate": 7.187010786445181e-07, + "loss": 0.0056, + "step": 28460 + }, + { + "epoch": 4.3426513671875e-05, + "model_forward_time": 0.02575206756591797, + "step": 28460 + }, + { + "epoch": 4.3426513671875e-05, + "step": 28460, + "training_step_time": 0.12765192985534668 + }, + { + "epoch": 4.342803955078125e-05, + "model_forward_time": 0.024914979934692383, + "step": 28461 + }, + { + "epoch": 4.342803955078125e-05, + "step": 28461, + "training_step_time": 0.12010741233825684 + }, + { + "epoch": 4.34295654296875e-05, + "model_forward_time": 0.026850461959838867, + "step": 28462 + }, + { + "epoch": 4.34295654296875e-05, + "step": 28462, + "training_step_time": 0.12210941314697266 + }, + { + "epoch": 4.343109130859375e-05, + "model_forward_time": 0.025312185287475586, + "step": 28463 + }, + { + "epoch": 4.343109130859375e-05, + "step": 28463, + "training_step_time": 0.12207746505737305 + }, + { + "epoch": 4.34326171875e-05, + "model_forward_time": 0.024826765060424805, + "step": 28464 + }, + { + "epoch": 4.34326171875e-05, + "step": 28464, + "training_step_time": 0.11501002311706543 + }, + { + "epoch": 4.343414306640625e-05, + "model_forward_time": 0.02542424201965332, + "step": 28465 + }, + { + "epoch": 4.343414306640625e-05, + "step": 28465, + "training_step_time": 0.11893677711486816 + }, + { + "epoch": 4.34356689453125e-05, + "model_forward_time": 0.025875091552734375, + "step": 28466 + }, + { + "epoch": 4.34356689453125e-05, + "step": 28466, + "training_step_time": 0.10900759696960449 + }, + { + "epoch": 4.343719482421875e-05, + "model_forward_time": 0.025378942489624023, + "step": 28467 + }, + { + "epoch": 4.343719482421875e-05, + "step": 28467, + "training_step_time": 0.11067676544189453 + }, + { + "epoch": 4.3438720703125e-05, + "model_forward_time": 0.025061845779418945, + "step": 28468 + }, + { + "epoch": 4.3438720703125e-05, + "step": 28468, + "training_step_time": 0.1082911491394043 + }, + { + "epoch": 4.344024658203125e-05, + "model_forward_time": 0.02568197250366211, + "step": 28469 + }, + { + "epoch": 4.344024658203125e-05, + "step": 28469, + "training_step_time": 0.10946369171142578 + }, + { + "epoch": 4.34417724609375e-05, + "grad_norm": 0.09917882084846497, + "learning_rate": 7.094196662081831e-07, + "loss": 0.0037, + "step": 28470 + }, + { + "epoch": 4.34417724609375e-05, + "model_forward_time": 0.025172948837280273, + "step": 28470 + }, + { + "epoch": 4.34417724609375e-05, + "step": 28470, + "training_step_time": 0.1076650619506836 + }, + { + "epoch": 4.344329833984375e-05, + "model_forward_time": 0.025418519973754883, + "step": 28471 + }, + { + "epoch": 4.344329833984375e-05, + "step": 28471, + "training_step_time": 0.10642600059509277 + }, + { + "epoch": 4.344482421875e-05, + "model_forward_time": 0.025420427322387695, + "step": 28472 + }, + { + "epoch": 4.344482421875e-05, + "step": 28472, + "training_step_time": 0.10732269287109375 + }, + { + "epoch": 4.344635009765625e-05, + "model_forward_time": 0.025669097900390625, + "step": 28473 + }, + { + "epoch": 4.344635009765625e-05, + "step": 28473, + "training_step_time": 0.16993093490600586 + }, + { + "epoch": 4.34478759765625e-05, + "model_forward_time": 0.025479793548583984, + "step": 28474 + }, + { + "epoch": 4.34478759765625e-05, + "step": 28474, + "training_step_time": 0.10505270957946777 + }, + { + "epoch": 4.344940185546875e-05, + "model_forward_time": 0.02503490447998047, + "step": 28475 + }, + { + "epoch": 4.344940185546875e-05, + "step": 28475, + "training_step_time": 0.17090129852294922 + }, + { + "epoch": 4.3450927734375e-05, + "model_forward_time": 0.024590253829956055, + "step": 28476 + }, + { + "epoch": 4.3450927734375e-05, + "step": 28476, + "training_step_time": 0.16178441047668457 + }, + { + "epoch": 4.345245361328125e-05, + "model_forward_time": 0.024831295013427734, + "step": 28477 + }, + { + "epoch": 4.345245361328125e-05, + "step": 28477, + "training_step_time": 0.14457058906555176 + }, + { + "epoch": 4.34539794921875e-05, + "model_forward_time": 0.02495551109313965, + "step": 28478 + }, + { + "epoch": 4.34539794921875e-05, + "step": 28478, + "training_step_time": 0.1083688735961914 + }, + { + "epoch": 4.345550537109375e-05, + "model_forward_time": 0.025355100631713867, + "step": 28479 + }, + { + "epoch": 4.345550537109375e-05, + "step": 28479, + "training_step_time": 0.19072651863098145 + }, + { + "epoch": 4.345703125e-05, + "grad_norm": 0.3075224459171295, + "learning_rate": 7.001981464747565e-07, + "loss": 0.0089, + "step": 28480 + }, + { + "epoch": 4.345703125e-05, + "model_forward_time": 0.02473592758178711, + "step": 28480 + }, + { + "epoch": 4.345703125e-05, + "step": 28480, + "training_step_time": 0.10347795486450195 + }, + { + "epoch": 4.345855712890625e-05, + "model_forward_time": 0.024626731872558594, + "step": 28481 + }, + { + "epoch": 4.345855712890625e-05, + "step": 28481, + "training_step_time": 0.10383129119873047 + }, + { + "epoch": 4.34600830078125e-05, + "model_forward_time": 0.025348663330078125, + "step": 28482 + }, + { + "epoch": 4.34600830078125e-05, + "step": 28482, + "training_step_time": 0.10932469367980957 + }, + { + "epoch": 4.346160888671875e-05, + "model_forward_time": 0.025280237197875977, + "step": 28483 + }, + { + "epoch": 4.346160888671875e-05, + "step": 28483, + "training_step_time": 0.10519838333129883 + }, + { + "epoch": 4.3463134765625e-05, + "model_forward_time": 0.025243759155273438, + "step": 28484 + }, + { + "epoch": 4.3463134765625e-05, + "step": 28484, + "training_step_time": 0.17499041557312012 + }, + { + "epoch": 4.346466064453125e-05, + "model_forward_time": 0.02454066276550293, + "step": 28485 + }, + { + "epoch": 4.346466064453125e-05, + "step": 28485, + "training_step_time": 0.22075915336608887 + }, + { + "epoch": 4.34661865234375e-05, + "model_forward_time": 0.0244905948638916, + "step": 28486 + }, + { + "epoch": 4.34661865234375e-05, + "step": 28486, + "training_step_time": 0.10803580284118652 + }, + { + "epoch": 4.346771240234375e-05, + "model_forward_time": 0.024473905563354492, + "step": 28487 + }, + { + "epoch": 4.346771240234375e-05, + "step": 28487, + "training_step_time": 0.11919045448303223 + }, + { + "epoch": 4.346923828125e-05, + "model_forward_time": 0.025184154510498047, + "step": 28488 + }, + { + "epoch": 4.346923828125e-05, + "step": 28488, + "training_step_time": 0.11977267265319824 + }, + { + "epoch": 4.347076416015625e-05, + "model_forward_time": 0.025565624237060547, + "step": 28489 + }, + { + "epoch": 4.347076416015625e-05, + "step": 28489, + "training_step_time": 0.10879993438720703 + }, + { + "epoch": 4.34722900390625e-05, + "grad_norm": 0.05301102250814438, + "learning_rate": 6.910365306492416e-07, + "loss": 0.0019, + "step": 28490 + }, + { + "epoch": 4.34722900390625e-05, + "model_forward_time": 0.026027202606201172, + "step": 28490 + }, + { + "epoch": 4.34722900390625e-05, + "step": 28490, + "training_step_time": 0.17548179626464844 + }, + { + "epoch": 4.347381591796875e-05, + "model_forward_time": 0.024810075759887695, + "step": 28491 + }, + { + "epoch": 4.347381591796875e-05, + "step": 28491, + "training_step_time": 0.17010188102722168 + }, + { + "epoch": 4.3475341796875e-05, + "model_forward_time": 0.02449965476989746, + "step": 28492 + }, + { + "epoch": 4.3475341796875e-05, + "step": 28492, + "training_step_time": 0.20152664184570312 + }, + { + "epoch": 4.347686767578125e-05, + "model_forward_time": 0.024408578872680664, + "step": 28493 + }, + { + "epoch": 4.347686767578125e-05, + "step": 28493, + "training_step_time": 0.1467597484588623 + }, + { + "epoch": 4.34783935546875e-05, + "model_forward_time": 0.024835824966430664, + "step": 28494 + }, + { + "epoch": 4.34783935546875e-05, + "step": 28494, + "training_step_time": 0.1776142120361328 + }, + { + "epoch": 4.347991943359375e-05, + "model_forward_time": 0.02476811408996582, + "step": 28495 + }, + { + "epoch": 4.347991943359375e-05, + "step": 28495, + "training_step_time": 0.10346817970275879 + }, + { + "epoch": 4.34814453125e-05, + "model_forward_time": 0.024451255798339844, + "step": 28496 + }, + { + "epoch": 4.34814453125e-05, + "step": 28496, + "training_step_time": 0.18772602081298828 + }, + { + "epoch": 4.348297119140625e-05, + "model_forward_time": 0.024767160415649414, + "step": 28497 + }, + { + "epoch": 4.348297119140625e-05, + "step": 28497, + "training_step_time": 0.10262608528137207 + }, + { + "epoch": 4.34844970703125e-05, + "model_forward_time": 0.024786710739135742, + "step": 28498 + }, + { + "epoch": 4.34844970703125e-05, + "step": 28498, + "training_step_time": 0.10196542739868164 + }, + { + "epoch": 4.348602294921875e-05, + "model_forward_time": 0.025360822677612305, + "step": 28499 + }, + { + "epoch": 4.348602294921875e-05, + "step": 28499, + "training_step_time": 0.10585379600524902 + }, + { + "epoch": 4.3487548828125e-05, + "grad_norm": 0.04340476915240288, + "learning_rate": 6.819348298638839e-07, + "loss": 0.0051, + "step": 28500 + }, + { + "epoch": 4.3487548828125e-05, + "model_forward_time": 0.024919748306274414, + "step": 28500 + }, + { + "epoch": 4.3487548828125e-05, + "step": 28500, + "training_step_time": 0.10794901847839355 + }, + { + "epoch": 4.348907470703125e-05, + "model_forward_time": 0.025435924530029297, + "step": 28501 + }, + { + "epoch": 4.348907470703125e-05, + "step": 28501, + "training_step_time": 0.1041259765625 + }, + { + "epoch": 4.34906005859375e-05, + "model_forward_time": 0.02566981315612793, + "step": 28502 + }, + { + "epoch": 4.34906005859375e-05, + "step": 28502, + "training_step_time": 0.10534191131591797 + }, + { + "epoch": 4.349212646484375e-05, + "model_forward_time": 0.025554656982421875, + "step": 28503 + }, + { + "epoch": 4.349212646484375e-05, + "step": 28503, + "training_step_time": 0.11136603355407715 + }, + { + "epoch": 4.349365234375e-05, + "model_forward_time": 0.025473356246948242, + "step": 28504 + }, + { + "epoch": 4.349365234375e-05, + "step": 28504, + "training_step_time": 0.10604190826416016 + }, + { + "epoch": 4.349517822265625e-05, + "model_forward_time": 0.027715682983398438, + "step": 28505 + }, + { + "epoch": 4.349517822265625e-05, + "step": 28505, + "training_step_time": 0.10896015167236328 + }, + { + "epoch": 4.34967041015625e-05, + "model_forward_time": 0.025409221649169922, + "step": 28506 + }, + { + "epoch": 4.34967041015625e-05, + "step": 28506, + "training_step_time": 0.1063232421875 + }, + { + "epoch": 4.349822998046875e-05, + "model_forward_time": 0.025758743286132812, + "step": 28507 + }, + { + "epoch": 4.349822998046875e-05, + "step": 28507, + "training_step_time": 0.10753798484802246 + }, + { + "epoch": 4.3499755859375e-05, + "model_forward_time": 0.025473356246948242, + "step": 28508 + }, + { + "epoch": 4.3499755859375e-05, + "step": 28508, + "training_step_time": 0.11147928237915039 + }, + { + "epoch": 4.350128173828125e-05, + "model_forward_time": 0.025168418884277344, + "step": 28509 + }, + { + "epoch": 4.350128173828125e-05, + "step": 28509, + "training_step_time": 0.11475610733032227 + }, + { + "epoch": 4.35028076171875e-05, + "grad_norm": 0.3876003324985504, + "learning_rate": 6.728930551780865e-07, + "loss": 0.004, + "step": 28510 + }, + { + "epoch": 4.35028076171875e-05, + "model_forward_time": 0.02562117576599121, + "step": 28510 + }, + { + "epoch": 4.35028076171875e-05, + "step": 28510, + "training_step_time": 0.10536026954650879 + }, + { + "epoch": 4.350433349609375e-05, + "model_forward_time": 0.025583982467651367, + "step": 28511 + }, + { + "epoch": 4.350433349609375e-05, + "step": 28511, + "training_step_time": 0.10833311080932617 + }, + { + "epoch": 4.3505859375e-05, + "model_forward_time": 0.026442527770996094, + "step": 28512 + }, + { + "epoch": 4.3505859375e-05, + "step": 28512, + "training_step_time": 0.10732173919677734 + }, + { + "epoch": 4.350738525390625e-05, + "model_forward_time": 0.025744199752807617, + "step": 28513 + }, + { + "epoch": 4.350738525390625e-05, + "step": 28513, + "training_step_time": 0.11157751083374023 + }, + { + "epoch": 4.35089111328125e-05, + "model_forward_time": 0.025368452072143555, + "step": 28514 + }, + { + "epoch": 4.35089111328125e-05, + "step": 28514, + "training_step_time": 0.10731792449951172 + }, + { + "epoch": 4.351043701171875e-05, + "model_forward_time": 0.0254819393157959, + "step": 28515 + }, + { + "epoch": 4.351043701171875e-05, + "step": 28515, + "training_step_time": 0.10789299011230469 + }, + { + "epoch": 4.3511962890625e-05, + "model_forward_time": 0.02768397331237793, + "step": 28516 + }, + { + "epoch": 4.3511962890625e-05, + "step": 28516, + "training_step_time": 0.11053299903869629 + }, + { + "epoch": 4.351348876953125e-05, + "model_forward_time": 0.027440547943115234, + "step": 28517 + }, + { + "epoch": 4.351348876953125e-05, + "step": 28517, + "training_step_time": 0.17016959190368652 + }, + { + "epoch": 4.35150146484375e-05, + "model_forward_time": 0.026053905487060547, + "step": 28518 + }, + { + "epoch": 4.35150146484375e-05, + "step": 28518, + "training_step_time": 0.10873699188232422 + }, + { + "epoch": 4.351654052734375e-05, + "model_forward_time": 0.025048255920410156, + "step": 28519 + }, + { + "epoch": 4.351654052734375e-05, + "step": 28519, + "training_step_time": 0.17573928833007812 + }, + { + "epoch": 4.351806640625e-05, + "grad_norm": 0.04569575935602188, + "learning_rate": 6.639112175784778e-07, + "loss": 0.0078, + "step": 28520 + }, + { + "epoch": 4.351806640625e-05, + "model_forward_time": 0.026684999465942383, + "step": 28520 + }, + { + "epoch": 4.351806640625e-05, + "step": 28520, + "training_step_time": 0.18541431427001953 + }, + { + "epoch": 4.351959228515625e-05, + "model_forward_time": 0.024338960647583008, + "step": 28521 + }, + { + "epoch": 4.351959228515625e-05, + "step": 28521, + "training_step_time": 0.18069696426391602 + }, + { + "epoch": 4.35211181640625e-05, + "model_forward_time": 0.02463078498840332, + "step": 28522 + }, + { + "epoch": 4.35211181640625e-05, + "step": 28522, + "training_step_time": 0.1444244384765625 + }, + { + "epoch": 4.352264404296875e-05, + "model_forward_time": 0.0252532958984375, + "step": 28523 + }, + { + "epoch": 4.352264404296875e-05, + "step": 28523, + "training_step_time": 0.10292387008666992 + }, + { + "epoch": 4.3524169921875e-05, + "model_forward_time": 0.025261878967285156, + "step": 28524 + }, + { + "epoch": 4.3524169921875e-05, + "step": 28524, + "training_step_time": 0.10213589668273926 + }, + { + "epoch": 4.352569580078125e-05, + "model_forward_time": 0.025493383407592773, + "step": 28525 + }, + { + "epoch": 4.352569580078125e-05, + "step": 28525, + "training_step_time": 0.10588884353637695 + }, + { + "epoch": 4.35272216796875e-05, + "model_forward_time": 0.025641918182373047, + "step": 28526 + }, + { + "epoch": 4.35272216796875e-05, + "step": 28526, + "training_step_time": 0.11014914512634277 + }, + { + "epoch": 4.352874755859375e-05, + "model_forward_time": 0.024883747100830078, + "step": 28527 + }, + { + "epoch": 4.352874755859375e-05, + "step": 28527, + "training_step_time": 0.11501193046569824 + }, + { + "epoch": 4.35302734375e-05, + "model_forward_time": 0.025816679000854492, + "step": 28528 + }, + { + "epoch": 4.35302734375e-05, + "step": 28528, + "training_step_time": 0.2022254467010498 + }, + { + "epoch": 4.353179931640625e-05, + "model_forward_time": 0.024451017379760742, + "step": 28529 + }, + { + "epoch": 4.353179931640625e-05, + "step": 28529, + "training_step_time": 0.21092963218688965 + }, + { + "epoch": 4.35333251953125e-05, + "grad_norm": 0.15558089315891266, + "learning_rate": 6.549893279788277e-07, + "loss": 0.0054, + "step": 28530 + }, + { + "epoch": 4.35333251953125e-05, + "model_forward_time": 0.025069713592529297, + "step": 28530 + }, + { + "epoch": 4.35333251953125e-05, + "step": 28530, + "training_step_time": 0.1679999828338623 + }, + { + "epoch": 4.353485107421875e-05, + "model_forward_time": 0.024863243103027344, + "step": 28531 + }, + { + "epoch": 4.353485107421875e-05, + "step": 28531, + "training_step_time": 0.15567612648010254 + }, + { + "epoch": 4.3536376953125e-05, + "model_forward_time": 0.025053739547729492, + "step": 28532 + }, + { + "epoch": 4.3536376953125e-05, + "step": 28532, + "training_step_time": 0.11257410049438477 + }, + { + "epoch": 4.353790283203125e-05, + "model_forward_time": 0.026127338409423828, + "step": 28533 + }, + { + "epoch": 4.353790283203125e-05, + "step": 28533, + "training_step_time": 0.1369321346282959 + }, + { + "epoch": 4.35394287109375e-05, + "model_forward_time": 0.025477886199951172, + "step": 28534 + }, + { + "epoch": 4.35394287109375e-05, + "step": 28534, + "training_step_time": 0.1584947109222412 + }, + { + "epoch": 4.354095458984375e-05, + "model_forward_time": 0.025111913681030273, + "step": 28535 + }, + { + "epoch": 4.354095458984375e-05, + "step": 28535, + "training_step_time": 0.17205452919006348 + }, + { + "epoch": 4.354248046875e-05, + "model_forward_time": 0.024799346923828125, + "step": 28536 + }, + { + "epoch": 4.354248046875e-05, + "step": 28536, + "training_step_time": 0.11008405685424805 + }, + { + "epoch": 4.354400634765625e-05, + "model_forward_time": 0.025413036346435547, + "step": 28537 + }, + { + "epoch": 4.354400634765625e-05, + "step": 28537, + "training_step_time": 0.1068882942199707 + }, + { + "epoch": 4.35455322265625e-05, + "model_forward_time": 0.026021957397460938, + "step": 28538 + }, + { + "epoch": 4.35455322265625e-05, + "step": 28538, + "training_step_time": 0.12607479095458984 + }, + { + "epoch": 4.354705810546875e-05, + "model_forward_time": 0.025748252868652344, + "step": 28539 + }, + { + "epoch": 4.354705810546875e-05, + "step": 28539, + "training_step_time": 0.10910534858703613 + }, + { + "epoch": 4.3548583984375e-05, + "grad_norm": 0.048946134746074677, + "learning_rate": 6.461273972200755e-07, + "loss": 0.0032, + "step": 28540 + }, + { + "epoch": 4.3548583984375e-05, + "model_forward_time": 0.026276826858520508, + "step": 28540 + }, + { + "epoch": 4.3548583984375e-05, + "step": 28540, + "training_step_time": 0.1281576156616211 + }, + { + "epoch": 4.355010986328125e-05, + "model_forward_time": 0.02550482749938965, + "step": 28541 + }, + { + "epoch": 4.355010986328125e-05, + "step": 28541, + "training_step_time": 0.12551021575927734 + }, + { + "epoch": 4.35516357421875e-05, + "model_forward_time": 0.025445222854614258, + "step": 28542 + }, + { + "epoch": 4.35516357421875e-05, + "step": 28542, + "training_step_time": 0.10669088363647461 + }, + { + "epoch": 4.355316162109375e-05, + "model_forward_time": 0.02532196044921875, + "step": 28543 + }, + { + "epoch": 4.355316162109375e-05, + "step": 28543, + "training_step_time": 0.10470199584960938 + }, + { + "epoch": 4.35546875e-05, + "model_forward_time": 0.025837182998657227, + "step": 28544 + }, + { + "epoch": 4.35546875e-05, + "step": 28544, + "training_step_time": 0.10712647438049316 + }, + { + "epoch": 4.355621337890625e-05, + "model_forward_time": 0.025485515594482422, + "step": 28545 + }, + { + "epoch": 4.355621337890625e-05, + "step": 28545, + "training_step_time": 0.1055910587310791 + }, + { + "epoch": 4.35577392578125e-05, + "model_forward_time": 0.025990724563598633, + "step": 28546 + }, + { + "epoch": 4.35577392578125e-05, + "step": 28546, + "training_step_time": 0.10730719566345215 + }, + { + "epoch": 4.355926513671875e-05, + "model_forward_time": 0.025641441345214844, + "step": 28547 + }, + { + "epoch": 4.355926513671875e-05, + "step": 28547, + "training_step_time": 0.10591316223144531 + }, + { + "epoch": 4.3560791015625e-05, + "model_forward_time": 0.025481224060058594, + "step": 28548 + }, + { + "epoch": 4.3560791015625e-05, + "step": 28548, + "training_step_time": 0.11147642135620117 + }, + { + "epoch": 4.356231689453125e-05, + "model_forward_time": 0.025760412216186523, + "step": 28549 + }, + { + "epoch": 4.356231689453125e-05, + "step": 28549, + "training_step_time": 0.10542678833007812 + }, + { + "epoch": 4.35638427734375e-05, + "grad_norm": 0.12368736416101456, + "learning_rate": 6.373254360703018e-07, + "loss": 0.0075, + "step": 28550 + }, + { + "epoch": 4.35638427734375e-05, + "model_forward_time": 0.02554178237915039, + "step": 28550 + }, + { + "epoch": 4.35638427734375e-05, + "step": 28550, + "training_step_time": 0.1055300235748291 + }, + { + "epoch": 4.356536865234375e-05, + "model_forward_time": 0.025308847427368164, + "step": 28551 + }, + { + "epoch": 4.356536865234375e-05, + "step": 28551, + "training_step_time": 0.10625481605529785 + }, + { + "epoch": 4.356689453125e-05, + "model_forward_time": 0.02552199363708496, + "step": 28552 + }, + { + "epoch": 4.356689453125e-05, + "step": 28552, + "training_step_time": 0.10514521598815918 + }, + { + "epoch": 4.356842041015625e-05, + "model_forward_time": 0.025453805923461914, + "step": 28553 + }, + { + "epoch": 4.356842041015625e-05, + "step": 28553, + "training_step_time": 0.10446619987487793 + }, + { + "epoch": 4.35699462890625e-05, + "model_forward_time": 0.02574896812438965, + "step": 28554 + }, + { + "epoch": 4.35699462890625e-05, + "step": 28554, + "training_step_time": 0.10512971878051758 + }, + { + "epoch": 4.357147216796875e-05, + "model_forward_time": 0.025708436965942383, + "step": 28555 + }, + { + "epoch": 4.357147216796875e-05, + "step": 28555, + "training_step_time": 0.10616421699523926 + }, + { + "epoch": 4.3572998046875e-05, + "model_forward_time": 0.02606034278869629, + "step": 28556 + }, + { + "epoch": 4.3572998046875e-05, + "step": 28556, + "training_step_time": 0.11003375053405762 + }, + { + "epoch": 4.357452392578125e-05, + "model_forward_time": 0.026695966720581055, + "step": 28557 + }, + { + "epoch": 4.357452392578125e-05, + "step": 28557, + "training_step_time": 0.10884666442871094 + }, + { + "epoch": 4.35760498046875e-05, + "model_forward_time": 0.025632619857788086, + "step": 28558 + }, + { + "epoch": 4.35760498046875e-05, + "step": 28558, + "training_step_time": 0.10709595680236816 + }, + { + "epoch": 4.357757568359375e-05, + "model_forward_time": 0.025560617446899414, + "step": 28559 + }, + { + "epoch": 4.357757568359375e-05, + "step": 28559, + "training_step_time": 0.10614800453186035 + }, + { + "epoch": 4.35791015625e-05, + "grad_norm": 0.0649910569190979, + "learning_rate": 6.285834552247128e-07, + "loss": 0.006, + "step": 28560 + }, + { + "epoch": 4.35791015625e-05, + "model_forward_time": 0.02550482749938965, + "step": 28560 + }, + { + "epoch": 4.35791015625e-05, + "step": 28560, + "training_step_time": 0.10710692405700684 + }, + { + "epoch": 4.358062744140625e-05, + "model_forward_time": 0.026319503784179688, + "step": 28561 + }, + { + "epoch": 4.358062744140625e-05, + "step": 28561, + "training_step_time": 0.1077280044555664 + }, + { + "epoch": 4.35821533203125e-05, + "model_forward_time": 0.025849103927612305, + "step": 28562 + }, + { + "epoch": 4.35821533203125e-05, + "step": 28562, + "training_step_time": 0.13392400741577148 + }, + { + "epoch": 4.358367919921875e-05, + "model_forward_time": 0.02629566192626953, + "step": 28563 + }, + { + "epoch": 4.358367919921875e-05, + "step": 28563, + "training_step_time": 0.10712862014770508 + }, + { + "epoch": 4.3585205078125e-05, + "model_forward_time": 0.025490283966064453, + "step": 28564 + }, + { + "epoch": 4.3585205078125e-05, + "step": 28564, + "training_step_time": 0.14705657958984375 + }, + { + "epoch": 4.358673095703125e-05, + "model_forward_time": 0.02550506591796875, + "step": 28565 + }, + { + "epoch": 4.358673095703125e-05, + "step": 28565, + "training_step_time": 0.1252429485321045 + }, + { + "epoch": 4.35882568359375e-05, + "model_forward_time": 0.025360584259033203, + "step": 28566 + }, + { + "epoch": 4.35882568359375e-05, + "step": 28566, + "training_step_time": 0.2146608829498291 + }, + { + "epoch": 4.358978271484375e-05, + "model_forward_time": 0.024610280990600586, + "step": 28567 + }, + { + "epoch": 4.358978271484375e-05, + "step": 28567, + "training_step_time": 0.12557053565979004 + }, + { + "epoch": 4.359130859375e-05, + "model_forward_time": 0.025031566619873047, + "step": 28568 + }, + { + "epoch": 4.359130859375e-05, + "step": 28568, + "training_step_time": 0.12017822265625 + }, + { + "epoch": 4.359283447265625e-05, + "model_forward_time": 0.025310754776000977, + "step": 28569 + }, + { + "epoch": 4.359283447265625e-05, + "step": 28569, + "training_step_time": 0.1103971004486084 + }, + { + "epoch": 4.35943603515625e-05, + "grad_norm": 0.0443444661796093, + "learning_rate": 6.1990146530565e-07, + "loss": 0.0124, + "step": 28570 + }, + { + "epoch": 4.35943603515625e-05, + "model_forward_time": 0.024916410446166992, + "step": 28570 + }, + { + "epoch": 4.35943603515625e-05, + "step": 28570, + "training_step_time": 0.10602307319641113 + }, + { + "epoch": 4.359588623046875e-05, + "model_forward_time": 0.025303125381469727, + "step": 28571 + }, + { + "epoch": 4.359588623046875e-05, + "step": 28571, + "training_step_time": 0.1060328483581543 + }, + { + "epoch": 4.3597412109375e-05, + "model_forward_time": 0.025837421417236328, + "step": 28572 + }, + { + "epoch": 4.3597412109375e-05, + "step": 28572, + "training_step_time": 0.10612988471984863 + }, + { + "epoch": 4.359893798828125e-05, + "model_forward_time": 0.025573253631591797, + "step": 28573 + }, + { + "epoch": 4.359893798828125e-05, + "step": 28573, + "training_step_time": 0.10785531997680664 + }, + { + "epoch": 4.36004638671875e-05, + "model_forward_time": 0.025715112686157227, + "step": 28574 + }, + { + "epoch": 4.36004638671875e-05, + "step": 28574, + "training_step_time": 0.16468501091003418 + }, + { + "epoch": 4.360198974609375e-05, + "model_forward_time": 0.02467513084411621, + "step": 28575 + }, + { + "epoch": 4.360198974609375e-05, + "step": 28575, + "training_step_time": 0.10724735260009766 + }, + { + "epoch": 4.3603515625e-05, + "model_forward_time": 0.025193214416503906, + "step": 28576 + }, + { + "epoch": 4.3603515625e-05, + "step": 28576, + "training_step_time": 0.11670398712158203 + }, + { + "epoch": 4.360504150390625e-05, + "model_forward_time": 0.025644779205322266, + "step": 28577 + }, + { + "epoch": 4.360504150390625e-05, + "step": 28577, + "training_step_time": 0.10608243942260742 + }, + { + "epoch": 4.36065673828125e-05, + "model_forward_time": 0.025310516357421875, + "step": 28578 + }, + { + "epoch": 4.36065673828125e-05, + "step": 28578, + "training_step_time": 0.11088323593139648 + }, + { + "epoch": 4.360809326171875e-05, + "model_forward_time": 0.025697946548461914, + "step": 28579 + }, + { + "epoch": 4.360809326171875e-05, + "step": 28579, + "training_step_time": 0.12023282051086426 + }, + { + "epoch": 4.3609619140625e-05, + "grad_norm": 0.05735481157898903, + "learning_rate": 6.11279476862553e-07, + "loss": 0.0071, + "step": 28580 + }, + { + "epoch": 4.3609619140625e-05, + "model_forward_time": 0.025631189346313477, + "step": 28580 + }, + { + "epoch": 4.3609619140625e-05, + "step": 28580, + "training_step_time": 0.10937190055847168 + }, + { + "epoch": 4.361114501953125e-05, + "model_forward_time": 0.025699138641357422, + "step": 28581 + }, + { + "epoch": 4.361114501953125e-05, + "step": 28581, + "training_step_time": 0.14316773414611816 + }, + { + "epoch": 4.36126708984375e-05, + "model_forward_time": 0.026482105255126953, + "step": 28582 + }, + { + "epoch": 4.36126708984375e-05, + "step": 28582, + "training_step_time": 0.10617661476135254 + }, + { + "epoch": 4.361419677734375e-05, + "model_forward_time": 0.02537083625793457, + "step": 28583 + }, + { + "epoch": 4.361419677734375e-05, + "step": 28583, + "training_step_time": 0.16034579277038574 + }, + { + "epoch": 4.361572265625e-05, + "model_forward_time": 0.024753332138061523, + "step": 28584 + }, + { + "epoch": 4.361572265625e-05, + "step": 28584, + "training_step_time": 0.10776662826538086 + }, + { + "epoch": 4.361724853515625e-05, + "model_forward_time": 0.024502038955688477, + "step": 28585 + }, + { + "epoch": 4.361724853515625e-05, + "step": 28585, + "training_step_time": 0.10992574691772461 + }, + { + "epoch": 4.36187744140625e-05, + "model_forward_time": 0.02548360824584961, + "step": 28586 + }, + { + "epoch": 4.36187744140625e-05, + "step": 28586, + "training_step_time": 0.10399746894836426 + }, + { + "epoch": 4.362030029296875e-05, + "model_forward_time": 0.025161266326904297, + "step": 28587 + }, + { + "epoch": 4.362030029296875e-05, + "step": 28587, + "training_step_time": 0.1152961254119873 + }, + { + "epoch": 4.3621826171875e-05, + "model_forward_time": 0.025491714477539062, + "step": 28588 + }, + { + "epoch": 4.3621826171875e-05, + "step": 28588, + "training_step_time": 0.12170577049255371 + }, + { + "epoch": 4.362335205078125e-05, + "model_forward_time": 0.025161266326904297, + "step": 28589 + }, + { + "epoch": 4.362335205078125e-05, + "step": 28589, + "training_step_time": 0.13399744033813477 + }, + { + "epoch": 4.36248779296875e-05, + "grad_norm": 0.04751794412732124, + "learning_rate": 6.027175003719354e-07, + "loss": 0.0042, + "step": 28590 + }, + { + "epoch": 4.36248779296875e-05, + "model_forward_time": 0.025127172470092773, + "step": 28590 + }, + { + "epoch": 4.36248779296875e-05, + "step": 28590, + "training_step_time": 0.12870049476623535 + }, + { + "epoch": 4.362640380859375e-05, + "model_forward_time": 0.024999141693115234, + "step": 28591 + }, + { + "epoch": 4.362640380859375e-05, + "step": 28591, + "training_step_time": 0.1277327537536621 + }, + { + "epoch": 4.36279296875e-05, + "model_forward_time": 0.024950504302978516, + "step": 28592 + }, + { + "epoch": 4.36279296875e-05, + "step": 28592, + "training_step_time": 0.12285947799682617 + }, + { + "epoch": 4.362945556640625e-05, + "model_forward_time": 0.02522587776184082, + "step": 28593 + }, + { + "epoch": 4.362945556640625e-05, + "step": 28593, + "training_step_time": 0.12109613418579102 + }, + { + "epoch": 4.36309814453125e-05, + "model_forward_time": 0.02544379234313965, + "step": 28594 + }, + { + "epoch": 4.36309814453125e-05, + "step": 28594, + "training_step_time": 0.11397051811218262 + }, + { + "epoch": 4.363250732421875e-05, + "model_forward_time": 0.026660442352294922, + "step": 28595 + }, + { + "epoch": 4.363250732421875e-05, + "step": 28595, + "training_step_time": 0.11322784423828125 + }, + { + "epoch": 4.3634033203125e-05, + "model_forward_time": 0.02526545524597168, + "step": 28596 + }, + { + "epoch": 4.3634033203125e-05, + "step": 28596, + "training_step_time": 0.11051225662231445 + }, + { + "epoch": 4.363555908203125e-05, + "model_forward_time": 0.025479555130004883, + "step": 28597 + }, + { + "epoch": 4.363555908203125e-05, + "step": 28597, + "training_step_time": 0.11149311065673828 + }, + { + "epoch": 4.36370849609375e-05, + "model_forward_time": 0.025597333908081055, + "step": 28598 + }, + { + "epoch": 4.36370849609375e-05, + "step": 28598, + "training_step_time": 0.10731387138366699 + }, + { + "epoch": 4.363861083984375e-05, + "model_forward_time": 0.025788545608520508, + "step": 28599 + }, + { + "epoch": 4.363861083984375e-05, + "step": 28599, + "training_step_time": 0.10622906684875488 + }, + { + "epoch": 4.364013671875e-05, + "grad_norm": 0.04318931698799133, + "learning_rate": 5.9421554623742e-07, + "loss": 0.0024, + "step": 28600 + }, + { + "epoch": 4.364013671875e-05, + "model_forward_time": 0.024780750274658203, + "step": 28600 + }, + { + "epoch": 4.364013671875e-05, + "step": 28600, + "training_step_time": 0.10535550117492676 + }, + { + "epoch": 4.364166259765625e-05, + "model_forward_time": 0.025745153427124023, + "step": 28601 + }, + { + "epoch": 4.364166259765625e-05, + "step": 28601, + "training_step_time": 0.10775208473205566 + }, + { + "epoch": 4.36431884765625e-05, + "model_forward_time": 0.025554656982421875, + "step": 28602 + }, + { + "epoch": 4.36431884765625e-05, + "step": 28602, + "training_step_time": 0.10738229751586914 + }, + { + "epoch": 4.364471435546875e-05, + "model_forward_time": 0.02525496482849121, + "step": 28603 + }, + { + "epoch": 4.364471435546875e-05, + "step": 28603, + "training_step_time": 0.10945391654968262 + }, + { + "epoch": 4.3646240234375e-05, + "model_forward_time": 0.025452375411987305, + "step": 28604 + }, + { + "epoch": 4.3646240234375e-05, + "step": 28604, + "training_step_time": 0.10456585884094238 + }, + { + "epoch": 4.364776611328125e-05, + "model_forward_time": 0.024976253509521484, + "step": 28605 + }, + { + "epoch": 4.364776611328125e-05, + "step": 28605, + "training_step_time": 0.10734701156616211 + }, + { + "epoch": 4.36492919921875e-05, + "model_forward_time": 0.02535700798034668, + "step": 28606 + }, + { + "epoch": 4.36492919921875e-05, + "step": 28606, + "training_step_time": 0.10553812980651855 + }, + { + "epoch": 4.365081787109375e-05, + "model_forward_time": 0.025526046752929688, + "step": 28607 + }, + { + "epoch": 4.365081787109375e-05, + "step": 28607, + "training_step_time": 0.10612058639526367 + }, + { + "epoch": 4.365234375e-05, + "model_forward_time": 0.025731325149536133, + "step": 28608 + }, + { + "epoch": 4.365234375e-05, + "step": 28608, + "training_step_time": 0.10743904113769531 + }, + { + "epoch": 4.365386962890625e-05, + "model_forward_time": 0.025513648986816406, + "step": 28609 + }, + { + "epoch": 4.365386962890625e-05, + "step": 28609, + "training_step_time": 0.1964414119720459 + }, + { + "epoch": 4.36553955078125e-05, + "grad_norm": 0.07380035519599915, + "learning_rate": 5.857736247896706e-07, + "loss": 0.0121, + "step": 28610 + }, + { + "epoch": 4.36553955078125e-05, + "model_forward_time": 0.024281024932861328, + "step": 28610 + }, + { + "epoch": 4.36553955078125e-05, + "step": 28610, + "training_step_time": 0.10724282264709473 + }, + { + "epoch": 4.365692138671875e-05, + "model_forward_time": 0.024554729461669922, + "step": 28611 + }, + { + "epoch": 4.365692138671875e-05, + "step": 28611, + "training_step_time": 0.14631390571594238 + }, + { + "epoch": 4.3658447265625e-05, + "model_forward_time": 0.025051116943359375, + "step": 28612 + }, + { + "epoch": 4.3658447265625e-05, + "step": 28612, + "training_step_time": 0.11396408081054688 + }, + { + "epoch": 4.365997314453125e-05, + "model_forward_time": 0.02541327476501465, + "step": 28613 + }, + { + "epoch": 4.365997314453125e-05, + "step": 28613, + "training_step_time": 0.19150447845458984 + }, + { + "epoch": 4.36614990234375e-05, + "model_forward_time": 0.024627685546875, + "step": 28614 + }, + { + "epoch": 4.36614990234375e-05, + "step": 28614, + "training_step_time": 0.1318202018737793 + }, + { + "epoch": 4.366302490234375e-05, + "model_forward_time": 0.02457594871520996, + "step": 28615 + }, + { + "epoch": 4.366302490234375e-05, + "step": 28615, + "training_step_time": 0.11049270629882812 + }, + { + "epoch": 4.366455078125e-05, + "model_forward_time": 0.025348186492919922, + "step": 28616 + }, + { + "epoch": 4.366455078125e-05, + "step": 28616, + "training_step_time": 0.10710954666137695 + }, + { + "epoch": 4.366607666015625e-05, + "model_forward_time": 0.0253903865814209, + "step": 28617 + }, + { + "epoch": 4.366607666015625e-05, + "step": 28617, + "training_step_time": 0.10844111442565918 + }, + { + "epoch": 4.36676025390625e-05, + "model_forward_time": 0.025473356246948242, + "step": 28618 + }, + { + "epoch": 4.36676025390625e-05, + "step": 28618, + "training_step_time": 0.10583710670471191 + }, + { + "epoch": 4.366912841796875e-05, + "model_forward_time": 0.025549650192260742, + "step": 28619 + }, + { + "epoch": 4.366912841796875e-05, + "step": 28619, + "training_step_time": 0.1052405834197998 + }, + { + "epoch": 4.3670654296875e-05, + "grad_norm": 0.05241416022181511, + "learning_rate": 5.773917462864264e-07, + "loss": 0.0082, + "step": 28620 + }, + { + "epoch": 4.3670654296875e-05, + "model_forward_time": 0.025548219680786133, + "step": 28620 + }, + { + "epoch": 4.3670654296875e-05, + "step": 28620, + "training_step_time": 0.10493206977844238 + }, + { + "epoch": 4.367218017578125e-05, + "model_forward_time": 0.025765419006347656, + "step": 28621 + }, + { + "epoch": 4.367218017578125e-05, + "step": 28621, + "training_step_time": 0.2019200325012207 + }, + { + "epoch": 4.36737060546875e-05, + "model_forward_time": 0.024622201919555664, + "step": 28622 + }, + { + "epoch": 4.36737060546875e-05, + "step": 28622, + "training_step_time": 0.18738198280334473 + }, + { + "epoch": 4.367523193359375e-05, + "model_forward_time": 0.024585485458374023, + "step": 28623 + }, + { + "epoch": 4.367523193359375e-05, + "step": 28623, + "training_step_time": 0.14621329307556152 + }, + { + "epoch": 4.36767578125e-05, + "model_forward_time": 0.02507328987121582, + "step": 28624 + }, + { + "epoch": 4.36767578125e-05, + "step": 28624, + "training_step_time": 0.12362337112426758 + }, + { + "epoch": 4.367828369140625e-05, + "model_forward_time": 0.024922847747802734, + "step": 28625 + }, + { + "epoch": 4.367828369140625e-05, + "step": 28625, + "training_step_time": 0.11743831634521484 + }, + { + "epoch": 4.36798095703125e-05, + "model_forward_time": 0.02505946159362793, + "step": 28626 + }, + { + "epoch": 4.36798095703125e-05, + "step": 28626, + "training_step_time": 0.11562252044677734 + }, + { + "epoch": 4.368133544921875e-05, + "model_forward_time": 0.02549123764038086, + "step": 28627 + }, + { + "epoch": 4.368133544921875e-05, + "step": 28627, + "training_step_time": 0.12139701843261719 + }, + { + "epoch": 4.3682861328125e-05, + "model_forward_time": 0.02548503875732422, + "step": 28628 + }, + { + "epoch": 4.3682861328125e-05, + "step": 28628, + "training_step_time": 0.10976624488830566 + }, + { + "epoch": 4.368438720703125e-05, + "model_forward_time": 0.025452136993408203, + "step": 28629 + }, + { + "epoch": 4.368438720703125e-05, + "step": 28629, + "training_step_time": 0.11101984977722168 + }, + { + "epoch": 4.36859130859375e-05, + "grad_norm": 0.02089017629623413, + "learning_rate": 5.690699209124573e-07, + "loss": 0.0033, + "step": 28630 + }, + { + "epoch": 4.36859130859375e-05, + "model_forward_time": 0.024380207061767578, + "step": 28630 + }, + { + "epoch": 4.36859130859375e-05, + "step": 28630, + "training_step_time": 0.11689567565917969 + }, + { + "epoch": 4.368743896484375e-05, + "model_forward_time": 0.025591611862182617, + "step": 28631 + }, + { + "epoch": 4.368743896484375e-05, + "step": 28631, + "training_step_time": 0.11536622047424316 + }, + { + "epoch": 4.368896484375e-05, + "model_forward_time": 0.025465965270996094, + "step": 28632 + }, + { + "epoch": 4.368896484375e-05, + "step": 28632, + "training_step_time": 0.1165170669555664 + }, + { + "epoch": 4.369049072265625e-05, + "model_forward_time": 0.025345563888549805, + "step": 28633 + }, + { + "epoch": 4.369049072265625e-05, + "step": 28633, + "training_step_time": 0.12158536911010742 + }, + { + "epoch": 4.36920166015625e-05, + "model_forward_time": 0.025699853897094727, + "step": 28634 + }, + { + "epoch": 4.36920166015625e-05, + "step": 28634, + "training_step_time": 0.11202764511108398 + }, + { + "epoch": 4.369354248046875e-05, + "model_forward_time": 0.02570509910583496, + "step": 28635 + }, + { + "epoch": 4.369354248046875e-05, + "step": 28635, + "training_step_time": 0.10531783103942871 + }, + { + "epoch": 4.3695068359375e-05, + "model_forward_time": 0.02556300163269043, + "step": 28636 + }, + { + "epoch": 4.3695068359375e-05, + "step": 28636, + "training_step_time": 0.10564160346984863 + }, + { + "epoch": 4.369659423828125e-05, + "model_forward_time": 0.02572011947631836, + "step": 28637 + }, + { + "epoch": 4.369659423828125e-05, + "step": 28637, + "training_step_time": 0.10660719871520996 + }, + { + "epoch": 4.36981201171875e-05, + "model_forward_time": 0.02571702003479004, + "step": 28638 + }, + { + "epoch": 4.36981201171875e-05, + "step": 28638, + "training_step_time": 0.10975003242492676 + }, + { + "epoch": 4.369964599609375e-05, + "model_forward_time": 0.02564716339111328, + "step": 28639 + }, + { + "epoch": 4.369964599609375e-05, + "step": 28639, + "training_step_time": 0.10535955429077148 + }, + { + "epoch": 4.3701171875e-05, + "grad_norm": 0.02224591188132763, + "learning_rate": 5.608081587795688e-07, + "loss": 0.0041, + "step": 28640 + }, + { + "epoch": 4.3701171875e-05, + "model_forward_time": 0.02522587776184082, + "step": 28640 + }, + { + "epoch": 4.3701171875e-05, + "step": 28640, + "training_step_time": 0.10827207565307617 + }, + { + "epoch": 4.370269775390625e-05, + "model_forward_time": 0.02577948570251465, + "step": 28641 + }, + { + "epoch": 4.370269775390625e-05, + "step": 28641, + "training_step_time": 0.10598969459533691 + }, + { + "epoch": 4.37042236328125e-05, + "model_forward_time": 0.025465965270996094, + "step": 28642 + }, + { + "epoch": 4.37042236328125e-05, + "step": 28642, + "training_step_time": 0.1079413890838623 + }, + { + "epoch": 4.370574951171875e-05, + "model_forward_time": 0.02557229995727539, + "step": 28643 + }, + { + "epoch": 4.370574951171875e-05, + "step": 28643, + "training_step_time": 0.10524821281433105 + }, + { + "epoch": 4.3707275390625e-05, + "model_forward_time": 0.025959253311157227, + "step": 28644 + }, + { + "epoch": 4.3707275390625e-05, + "step": 28644, + "training_step_time": 0.10634231567382812 + }, + { + "epoch": 4.370880126953125e-05, + "model_forward_time": 0.02557373046875, + "step": 28645 + }, + { + "epoch": 4.370880126953125e-05, + "step": 28645, + "training_step_time": 0.10688996315002441 + }, + { + "epoch": 4.37103271484375e-05, + "model_forward_time": 0.025282859802246094, + "step": 28646 + }, + { + "epoch": 4.37103271484375e-05, + "step": 28646, + "training_step_time": 0.10584163665771484 + }, + { + "epoch": 4.371185302734375e-05, + "model_forward_time": 0.02537059783935547, + "step": 28647 + }, + { + "epoch": 4.371185302734375e-05, + "step": 28647, + "training_step_time": 0.11057186126708984 + }, + { + "epoch": 4.371337890625e-05, + "model_forward_time": 0.025252103805541992, + "step": 28648 + }, + { + "epoch": 4.371337890625e-05, + "step": 28648, + "training_step_time": 0.11389875411987305 + }, + { + "epoch": 4.371490478515625e-05, + "model_forward_time": 0.02669548988342285, + "step": 28649 + }, + { + "epoch": 4.371490478515625e-05, + "step": 28649, + "training_step_time": 0.10694527626037598 + }, + { + "epoch": 4.37164306640625e-05, + "grad_norm": 0.031245356425642967, + "learning_rate": 5.526064699265753e-07, + "loss": 0.0035, + "step": 28650 + }, + { + "epoch": 4.37164306640625e-05, + "model_forward_time": 0.027602434158325195, + "step": 28650 + }, + { + "epoch": 4.37164306640625e-05, + "step": 28650, + "training_step_time": 0.10717177391052246 + }, + { + "epoch": 4.371795654296875e-05, + "model_forward_time": 0.02560281753540039, + "step": 28651 + }, + { + "epoch": 4.371795654296875e-05, + "step": 28651, + "training_step_time": 0.1047677993774414 + }, + { + "epoch": 4.3719482421875e-05, + "model_forward_time": 0.025110244750976562, + "step": 28652 + }, + { + "epoch": 4.3719482421875e-05, + "step": 28652, + "training_step_time": 0.10331916809082031 + }, + { + "epoch": 4.372100830078125e-05, + "model_forward_time": 0.025554418563842773, + "step": 28653 + }, + { + "epoch": 4.372100830078125e-05, + "step": 28653, + "training_step_time": 0.10694169998168945 + }, + { + "epoch": 4.37225341796875e-05, + "model_forward_time": 0.026206493377685547, + "step": 28654 + }, + { + "epoch": 4.37225341796875e-05, + "step": 28654, + "training_step_time": 0.10876035690307617 + }, + { + "epoch": 4.372406005859375e-05, + "model_forward_time": 0.02534341812133789, + "step": 28655 + }, + { + "epoch": 4.372406005859375e-05, + "step": 28655, + "training_step_time": 0.19789481163024902 + }, + { + "epoch": 4.37255859375e-05, + "model_forward_time": 0.024993419647216797, + "step": 28656 + }, + { + "epoch": 4.37255859375e-05, + "step": 28656, + "training_step_time": 0.11295914649963379 + }, + { + "epoch": 4.372711181640625e-05, + "model_forward_time": 0.025389909744262695, + "step": 28657 + }, + { + "epoch": 4.372711181640625e-05, + "step": 28657, + "training_step_time": 0.10670018196105957 + }, + { + "epoch": 4.37286376953125e-05, + "model_forward_time": 0.024616479873657227, + "step": 28658 + }, + { + "epoch": 4.37286376953125e-05, + "step": 28658, + "training_step_time": 0.14543581008911133 + }, + { + "epoch": 4.373016357421875e-05, + "model_forward_time": 0.024707317352294922, + "step": 28659 + }, + { + "epoch": 4.373016357421875e-05, + "step": 28659, + "training_step_time": 0.16424298286437988 + }, + { + "epoch": 4.3731689453125e-05, + "grad_norm": 0.22222883999347687, + "learning_rate": 5.444648643193051e-07, + "loss": 0.0053, + "step": 28660 + }, + { + "epoch": 4.3731689453125e-05, + "model_forward_time": 0.02474188804626465, + "step": 28660 + }, + { + "epoch": 4.3731689453125e-05, + "step": 28660, + "training_step_time": 0.10510492324829102 + }, + { + "epoch": 4.373321533203125e-05, + "model_forward_time": 0.024711132049560547, + "step": 28661 + }, + { + "epoch": 4.373321533203125e-05, + "step": 28661, + "training_step_time": 0.12618732452392578 + }, + { + "epoch": 4.37347412109375e-05, + "model_forward_time": 0.025019407272338867, + "step": 28662 + }, + { + "epoch": 4.37347412109375e-05, + "step": 28662, + "training_step_time": 0.19892597198486328 + }, + { + "epoch": 4.373626708984375e-05, + "model_forward_time": 0.024721860885620117, + "step": 28663 + }, + { + "epoch": 4.373626708984375e-05, + "step": 28663, + "training_step_time": 0.10156416893005371 + }, + { + "epoch": 4.373779296875e-05, + "model_forward_time": 0.02506566047668457, + "step": 28664 + }, + { + "epoch": 4.373779296875e-05, + "step": 28664, + "training_step_time": 0.10683441162109375 + }, + { + "epoch": 4.373931884765625e-05, + "model_forward_time": 0.025484323501586914, + "step": 28665 + }, + { + "epoch": 4.373931884765625e-05, + "step": 28665, + "training_step_time": 0.10516858100891113 + }, + { + "epoch": 4.37408447265625e-05, + "model_forward_time": 0.025399208068847656, + "step": 28666 + }, + { + "epoch": 4.37408447265625e-05, + "step": 28666, + "training_step_time": 0.10379433631896973 + }, + { + "epoch": 4.374237060546875e-05, + "model_forward_time": 0.025669574737548828, + "step": 28667 + }, + { + "epoch": 4.374237060546875e-05, + "step": 28667, + "training_step_time": 0.10510706901550293 + }, + { + "epoch": 4.3743896484375e-05, + "model_forward_time": 0.02569293975830078, + "step": 28668 + }, + { + "epoch": 4.3743896484375e-05, + "step": 28668, + "training_step_time": 0.10596442222595215 + }, + { + "epoch": 4.374542236328125e-05, + "model_forward_time": 0.025362730026245117, + "step": 28669 + }, + { + "epoch": 4.374542236328125e-05, + "step": 28669, + "training_step_time": 0.16637587547302246 + }, + { + "epoch": 4.37469482421875e-05, + "grad_norm": 0.030979402363300323, + "learning_rate": 5.363833518505834e-07, + "loss": 0.0027, + "step": 28670 + }, + { + "epoch": 4.37469482421875e-05, + "model_forward_time": 0.024903297424316406, + "step": 28670 + }, + { + "epoch": 4.37469482421875e-05, + "step": 28670, + "training_step_time": 0.20902800559997559 + }, + { + "epoch": 4.374847412109375e-05, + "model_forward_time": 0.0249326229095459, + "step": 28671 + }, + { + "epoch": 4.374847412109375e-05, + "step": 28671, + "training_step_time": 0.10184240341186523 + }, + { + "epoch": 4.375e-05, + "model_forward_time": 0.024610519409179688, + "step": 28672 + }, + { + "epoch": 4.375e-05, + "step": 28672, + "training_step_time": 0.11134147644042969 + }, + { + "epoch": 4.375152587890625e-05, + "model_forward_time": 0.026660680770874023, + "step": 28673 + }, + { + "epoch": 4.375152587890625e-05, + "step": 28673, + "training_step_time": 0.13817405700683594 + }, + { + "epoch": 4.37530517578125e-05, + "model_forward_time": 0.02506566047668457, + "step": 28674 + }, + { + "epoch": 4.37530517578125e-05, + "step": 28674, + "training_step_time": 0.13202238082885742 + }, + { + "epoch": 4.375457763671875e-05, + "model_forward_time": 0.02370929718017578, + "step": 28675 + }, + { + "epoch": 4.375457763671875e-05, + "step": 28675, + "training_step_time": 0.19225335121154785 + }, + { + "epoch": 4.3756103515625e-05, + "model_forward_time": 0.025133132934570312, + "step": 28676 + }, + { + "epoch": 4.3756103515625e-05, + "step": 28676, + "training_step_time": 0.15506219863891602 + }, + { + "epoch": 4.375762939453125e-05, + "model_forward_time": 0.024753808975219727, + "step": 28677 + }, + { + "epoch": 4.375762939453125e-05, + "step": 28677, + "training_step_time": 0.19497227668762207 + }, + { + "epoch": 4.37591552734375e-05, + "model_forward_time": 0.024383068084716797, + "step": 28678 + }, + { + "epoch": 4.37591552734375e-05, + "step": 28678, + "training_step_time": 0.12999725341796875 + }, + { + "epoch": 4.376068115234375e-05, + "model_forward_time": 0.024532794952392578, + "step": 28679 + }, + { + "epoch": 4.376068115234375e-05, + "step": 28679, + "training_step_time": 0.18098139762878418 + }, + { + "epoch": 4.376220703125e-05, + "grad_norm": 0.05937690660357475, + "learning_rate": 5.283619423401998e-07, + "loss": 0.0047, + "step": 28680 + }, + { + "epoch": 4.376220703125e-05, + "model_forward_time": 0.024864673614501953, + "step": 28680 + }, + { + "epoch": 4.376220703125e-05, + "step": 28680, + "training_step_time": 0.11048579216003418 + }, + { + "epoch": 4.376373291015625e-05, + "model_forward_time": 0.024738550186157227, + "step": 28681 + }, + { + "epoch": 4.376373291015625e-05, + "step": 28681, + "training_step_time": 0.11281442642211914 + }, + { + "epoch": 4.37652587890625e-05, + "model_forward_time": 0.02571272850036621, + "step": 28682 + }, + { + "epoch": 4.37652587890625e-05, + "step": 28682, + "training_step_time": 0.11158180236816406 + }, + { + "epoch": 4.376678466796875e-05, + "model_forward_time": 0.025527000427246094, + "step": 28683 + }, + { + "epoch": 4.376678466796875e-05, + "step": 28683, + "training_step_time": 0.10747480392456055 + }, + { + "epoch": 4.3768310546875e-05, + "model_forward_time": 0.025927305221557617, + "step": 28684 + }, + { + "epoch": 4.3768310546875e-05, + "step": 28684, + "training_step_time": 0.11036086082458496 + }, + { + "epoch": 4.376983642578125e-05, + "model_forward_time": 0.025383472442626953, + "step": 28685 + }, + { + "epoch": 4.376983642578125e-05, + "step": 28685, + "training_step_time": 0.1073920726776123 + }, + { + "epoch": 4.37713623046875e-05, + "model_forward_time": 0.025612831115722656, + "step": 28686 + }, + { + "epoch": 4.37713623046875e-05, + "step": 28686, + "training_step_time": 0.10630631446838379 + }, + { + "epoch": 4.377288818359375e-05, + "model_forward_time": 0.02557659149169922, + "step": 28687 + }, + { + "epoch": 4.377288818359375e-05, + "step": 28687, + "training_step_time": 0.10571908950805664 + }, + { + "epoch": 4.37744140625e-05, + "model_forward_time": 0.025554656982421875, + "step": 28688 + }, + { + "epoch": 4.37744140625e-05, + "step": 28688, + "training_step_time": 0.1061251163482666 + }, + { + "epoch": 4.377593994140625e-05, + "model_forward_time": 0.02557206153869629, + "step": 28689 + }, + { + "epoch": 4.377593994140625e-05, + "step": 28689, + "training_step_time": 0.10648465156555176 + }, + { + "epoch": 4.37774658203125e-05, + "grad_norm": 0.04048529267311096, + "learning_rate": 5.204006455349297e-07, + "loss": 0.0048, + "step": 28690 + }, + { + "epoch": 4.37774658203125e-05, + "model_forward_time": 0.025365591049194336, + "step": 28690 + }, + { + "epoch": 4.37774658203125e-05, + "step": 28690, + "training_step_time": 0.10643601417541504 + }, + { + "epoch": 4.377899169921875e-05, + "model_forward_time": 0.025557756423950195, + "step": 28691 + }, + { + "epoch": 4.377899169921875e-05, + "step": 28691, + "training_step_time": 0.10906744003295898 + }, + { + "epoch": 4.3780517578125e-05, + "model_forward_time": 0.02510547637939453, + "step": 28692 + }, + { + "epoch": 4.3780517578125e-05, + "step": 28692, + "training_step_time": 0.10975503921508789 + }, + { + "epoch": 4.378204345703125e-05, + "model_forward_time": 0.025404930114746094, + "step": 28693 + }, + { + "epoch": 4.378204345703125e-05, + "step": 28693, + "training_step_time": 0.10972857475280762 + }, + { + "epoch": 4.37835693359375e-05, + "model_forward_time": 0.02550053596496582, + "step": 28694 + }, + { + "epoch": 4.37835693359375e-05, + "step": 28694, + "training_step_time": 0.10531830787658691 + }, + { + "epoch": 4.378509521484375e-05, + "model_forward_time": 0.02547740936279297, + "step": 28695 + }, + { + "epoch": 4.378509521484375e-05, + "step": 28695, + "training_step_time": 0.105316162109375 + }, + { + "epoch": 4.378662109375e-05, + "model_forward_time": 0.0255124568939209, + "step": 28696 + }, + { + "epoch": 4.378662109375e-05, + "step": 28696, + "training_step_time": 0.10662174224853516 + }, + { + "epoch": 4.378814697265625e-05, + "model_forward_time": 0.02524733543395996, + "step": 28697 + }, + { + "epoch": 4.378814697265625e-05, + "step": 28697, + "training_step_time": 0.10446643829345703 + }, + { + "epoch": 4.37896728515625e-05, + "model_forward_time": 0.025721073150634766, + "step": 28698 + }, + { + "epoch": 4.37896728515625e-05, + "step": 28698, + "training_step_time": 0.10605764389038086 + }, + { + "epoch": 4.379119873046875e-05, + "model_forward_time": 0.025681018829345703, + "step": 28699 + }, + { + "epoch": 4.379119873046875e-05, + "step": 28699, + "training_step_time": 0.19356274604797363 + }, + { + "epoch": 4.3792724609375e-05, + "grad_norm": 0.05778107792139053, + "learning_rate": 5.124994711084963e-07, + "loss": 0.0065, + "step": 28700 + }, + { + "epoch": 4.3792724609375e-05, + "model_forward_time": 0.025851011276245117, + "step": 28700 + }, + { + "epoch": 4.3792724609375e-05, + "step": 28700, + "training_step_time": 0.10588860511779785 + }, + { + "epoch": 4.379425048828125e-05, + "model_forward_time": 0.0242769718170166, + "step": 28701 + }, + { + "epoch": 4.379425048828125e-05, + "step": 28701, + "training_step_time": 0.12964534759521484 + }, + { + "epoch": 4.37957763671875e-05, + "model_forward_time": 0.02562689781188965, + "step": 28702 + }, + { + "epoch": 4.37957763671875e-05, + "step": 28702, + "training_step_time": 0.12981057167053223 + }, + { + "epoch": 4.379730224609375e-05, + "model_forward_time": 0.024976015090942383, + "step": 28703 + }, + { + "epoch": 4.379730224609375e-05, + "step": 28703, + "training_step_time": 0.21854662895202637 + }, + { + "epoch": 4.3798828125e-05, + "model_forward_time": 0.025269269943237305, + "step": 28704 + }, + { + "epoch": 4.3798828125e-05, + "step": 28704, + "training_step_time": 0.18779921531677246 + }, + { + "epoch": 4.380035400390625e-05, + "model_forward_time": 0.0251920223236084, + "step": 28705 + }, + { + "epoch": 4.380035400390625e-05, + "step": 28705, + "training_step_time": 0.12252402305603027 + }, + { + "epoch": 4.38018798828125e-05, + "model_forward_time": 0.024760961532592773, + "step": 28706 + }, + { + "epoch": 4.38018798828125e-05, + "step": 28706, + "training_step_time": 0.19674134254455566 + }, + { + "epoch": 4.380340576171875e-05, + "model_forward_time": 0.025147438049316406, + "step": 28707 + }, + { + "epoch": 4.380340576171875e-05, + "step": 28707, + "training_step_time": 0.10844016075134277 + }, + { + "epoch": 4.3804931640625e-05, + "model_forward_time": 0.024641036987304688, + "step": 28708 + }, + { + "epoch": 4.3804931640625e-05, + "step": 28708, + "training_step_time": 0.10479879379272461 + }, + { + "epoch": 4.380645751953125e-05, + "model_forward_time": 0.025074481964111328, + "step": 28709 + }, + { + "epoch": 4.380645751953125e-05, + "step": 28709, + "training_step_time": 0.10576105117797852 + }, + { + "epoch": 4.38079833984375e-05, + "grad_norm": 0.03397779166698456, + "learning_rate": 5.046584286615697e-07, + "loss": 0.003, + "step": 28710 + }, + { + "epoch": 4.38079833984375e-05, + "model_forward_time": 0.025345563888549805, + "step": 28710 + }, + { + "epoch": 4.38079833984375e-05, + "step": 28710, + "training_step_time": 0.10462260246276855 + }, + { + "epoch": 4.380950927734375e-05, + "model_forward_time": 0.025316715240478516, + "step": 28711 + }, + { + "epoch": 4.380950927734375e-05, + "step": 28711, + "training_step_time": 0.10436415672302246 + }, + { + "epoch": 4.381103515625e-05, + "model_forward_time": 0.02571725845336914, + "step": 28712 + }, + { + "epoch": 4.381103515625e-05, + "step": 28712, + "training_step_time": 0.10566592216491699 + }, + { + "epoch": 4.381256103515625e-05, + "model_forward_time": 0.025564908981323242, + "step": 28713 + }, + { + "epoch": 4.381256103515625e-05, + "step": 28713, + "training_step_time": 0.15004801750183105 + }, + { + "epoch": 4.38140869140625e-05, + "model_forward_time": 0.025386571884155273, + "step": 28714 + }, + { + "epoch": 4.38140869140625e-05, + "step": 28714, + "training_step_time": 0.10986542701721191 + }, + { + "epoch": 4.381561279296875e-05, + "model_forward_time": 0.025165319442749023, + "step": 28715 + }, + { + "epoch": 4.381561279296875e-05, + "step": 28715, + "training_step_time": 0.11510515213012695 + }, + { + "epoch": 4.3817138671875e-05, + "model_forward_time": 0.025249242782592773, + "step": 28716 + }, + { + "epoch": 4.3817138671875e-05, + "step": 28716, + "training_step_time": 0.13802194595336914 + }, + { + "epoch": 4.381866455078125e-05, + "model_forward_time": 0.02555084228515625, + "step": 28717 + }, + { + "epoch": 4.381866455078125e-05, + "step": 28717, + "training_step_time": 0.13551115989685059 + }, + { + "epoch": 4.38201904296875e-05, + "model_forward_time": 0.02500295639038086, + "step": 28718 + }, + { + "epoch": 4.38201904296875e-05, + "step": 28718, + "training_step_time": 0.1396193504333496 + }, + { + "epoch": 4.382171630859375e-05, + "model_forward_time": 0.0247800350189209, + "step": 28719 + }, + { + "epoch": 4.382171630859375e-05, + "step": 28719, + "training_step_time": 0.16705060005187988 + }, + { + "epoch": 4.38232421875e-05, + "grad_norm": 0.03233213350176811, + "learning_rate": 4.968775277217563e-07, + "loss": 0.0067, + "step": 28720 + }, + { + "epoch": 4.38232421875e-05, + "model_forward_time": 0.024508953094482422, + "step": 28720 + }, + { + "epoch": 4.38232421875e-05, + "step": 28720, + "training_step_time": 0.2248837947845459 + }, + { + "epoch": 4.382476806640625e-05, + "model_forward_time": 0.025029897689819336, + "step": 28721 + }, + { + "epoch": 4.382476806640625e-05, + "step": 28721, + "training_step_time": 0.11561918258666992 + }, + { + "epoch": 4.38262939453125e-05, + "model_forward_time": 0.02369213104248047, + "step": 28722 + }, + { + "epoch": 4.38262939453125e-05, + "step": 28722, + "training_step_time": 0.11866092681884766 + }, + { + "epoch": 4.382781982421875e-05, + "model_forward_time": 0.025710105895996094, + "step": 28723 + }, + { + "epoch": 4.382781982421875e-05, + "step": 28723, + "training_step_time": 0.1869335174560547 + }, + { + "epoch": 4.3829345703125e-05, + "model_forward_time": 0.02476358413696289, + "step": 28724 + }, + { + "epoch": 4.3829345703125e-05, + "step": 28724, + "training_step_time": 0.10866451263427734 + }, + { + "epoch": 4.383087158203125e-05, + "model_forward_time": 0.0243074893951416, + "step": 28725 + }, + { + "epoch": 4.383087158203125e-05, + "step": 28725, + "training_step_time": 0.1094825267791748 + }, + { + "epoch": 4.38323974609375e-05, + "model_forward_time": 0.025636911392211914, + "step": 28726 + }, + { + "epoch": 4.38323974609375e-05, + "step": 28726, + "training_step_time": 0.1097109317779541 + }, + { + "epoch": 4.383392333984375e-05, + "model_forward_time": 0.025471925735473633, + "step": 28727 + }, + { + "epoch": 4.383392333984375e-05, + "step": 28727, + "training_step_time": 0.10805296897888184 + }, + { + "epoch": 4.383544921875e-05, + "model_forward_time": 0.025635480880737305, + "step": 28728 + }, + { + "epoch": 4.383544921875e-05, + "step": 28728, + "training_step_time": 0.10610580444335938 + }, + { + "epoch": 4.383697509765625e-05, + "model_forward_time": 0.026263952255249023, + "step": 28729 + }, + { + "epoch": 4.383697509765625e-05, + "step": 28729, + "training_step_time": 0.10891938209533691 + }, + { + "epoch": 4.38385009765625e-05, + "grad_norm": 0.04797426238656044, + "learning_rate": 4.891567777435879e-07, + "loss": 0.0149, + "step": 28730 + }, + { + "epoch": 4.38385009765625e-05, + "model_forward_time": 0.024899005889892578, + "step": 28730 + }, + { + "epoch": 4.38385009765625e-05, + "step": 28730, + "training_step_time": 0.10553216934204102 + }, + { + "epoch": 4.384002685546875e-05, + "model_forward_time": 0.025508403778076172, + "step": 28731 + }, + { + "epoch": 4.384002685546875e-05, + "step": 28731, + "training_step_time": 0.10512709617614746 + }, + { + "epoch": 4.3841552734375e-05, + "model_forward_time": 0.025115251541137695, + "step": 28732 + }, + { + "epoch": 4.3841552734375e-05, + "step": 28732, + "training_step_time": 0.10852861404418945 + }, + { + "epoch": 4.384307861328125e-05, + "model_forward_time": 0.02540302276611328, + "step": 28733 + }, + { + "epoch": 4.384307861328125e-05, + "step": 28733, + "training_step_time": 0.106414794921875 + }, + { + "epoch": 4.38446044921875e-05, + "model_forward_time": 0.02573537826538086, + "step": 28734 + }, + { + "epoch": 4.38446044921875e-05, + "step": 28734, + "training_step_time": 0.10681629180908203 + }, + { + "epoch": 4.384613037109375e-05, + "model_forward_time": 0.02525162696838379, + "step": 28735 + }, + { + "epoch": 4.384613037109375e-05, + "step": 28735, + "training_step_time": 0.10460925102233887 + }, + { + "epoch": 4.384765625e-05, + "model_forward_time": 0.025490522384643555, + "step": 28736 + }, + { + "epoch": 4.384765625e-05, + "step": 28736, + "training_step_time": 0.10910987854003906 + }, + { + "epoch": 4.384918212890625e-05, + "model_forward_time": 0.025560379028320312, + "step": 28737 + }, + { + "epoch": 4.384918212890625e-05, + "step": 28737, + "training_step_time": 0.1045072078704834 + }, + { + "epoch": 4.38507080078125e-05, + "model_forward_time": 0.025290489196777344, + "step": 28738 + }, + { + "epoch": 4.38507080078125e-05, + "step": 28738, + "training_step_time": 0.10716915130615234 + }, + { + "epoch": 4.385223388671875e-05, + "model_forward_time": 0.02543783187866211, + "step": 28739 + }, + { + "epoch": 4.385223388671875e-05, + "step": 28739, + "training_step_time": 0.10564160346984863 + }, + { + "epoch": 4.3853759765625e-05, + "grad_norm": 0.027422254905104637, + "learning_rate": 4.814961881085045e-07, + "loss": 0.0023, + "step": 28740 + }, + { + "epoch": 4.3853759765625e-05, + "model_forward_time": 0.024868011474609375, + "step": 28740 + }, + { + "epoch": 4.3853759765625e-05, + "step": 28740, + "training_step_time": 0.10418844223022461 + }, + { + "epoch": 4.385528564453125e-05, + "model_forward_time": 0.02553868293762207, + "step": 28741 + }, + { + "epoch": 4.385528564453125e-05, + "step": 28741, + "training_step_time": 0.106231689453125 + }, + { + "epoch": 4.38568115234375e-05, + "model_forward_time": 0.025843143463134766, + "step": 28742 + }, + { + "epoch": 4.38568115234375e-05, + "step": 28742, + "training_step_time": 0.10389828681945801 + }, + { + "epoch": 4.385833740234375e-05, + "model_forward_time": 0.02524733543395996, + "step": 28743 + }, + { + "epoch": 4.385833740234375e-05, + "step": 28743, + "training_step_time": 0.10532879829406738 + }, + { + "epoch": 4.385986328125e-05, + "model_forward_time": 0.025456905364990234, + "step": 28744 + }, + { + "epoch": 4.385986328125e-05, + "step": 28744, + "training_step_time": 0.1393134593963623 + }, + { + "epoch": 4.386138916015625e-05, + "model_forward_time": 0.02593255043029785, + "step": 28745 + }, + { + "epoch": 4.386138916015625e-05, + "step": 28745, + "training_step_time": 0.1181640625 + }, + { + "epoch": 4.38629150390625e-05, + "model_forward_time": 0.025081396102905273, + "step": 28746 + }, + { + "epoch": 4.38629150390625e-05, + "step": 28746, + "training_step_time": 0.12436532974243164 + }, + { + "epoch": 4.386444091796875e-05, + "model_forward_time": 0.02532172203063965, + "step": 28747 + }, + { + "epoch": 4.386444091796875e-05, + "step": 28747, + "training_step_time": 0.10606884956359863 + }, + { + "epoch": 4.3865966796875e-05, + "model_forward_time": 0.0247647762298584, + "step": 28748 + }, + { + "epoch": 4.3865966796875e-05, + "step": 28748, + "training_step_time": 0.2152705192565918 + }, + { + "epoch": 4.386749267578125e-05, + "model_forward_time": 0.024791717529296875, + "step": 28749 + }, + { + "epoch": 4.386749267578125e-05, + "step": 28749, + "training_step_time": 0.11233186721801758 + }, + { + "epoch": 4.38690185546875e-05, + "grad_norm": 0.050352275371551514, + "learning_rate": 4.738957681248379e-07, + "loss": 0.0043, + "step": 28750 + }, + { + "epoch": 4.38690185546875e-05, + "model_forward_time": 0.02482318878173828, + "step": 28750 + }, + { + "epoch": 4.38690185546875e-05, + "step": 28750, + "training_step_time": 0.21964049339294434 + }, + { + "epoch": 4.387054443359375e-05, + "model_forward_time": 0.024962186813354492, + "step": 28751 + }, + { + "epoch": 4.387054443359375e-05, + "step": 28751, + "training_step_time": 0.1973869800567627 + }, + { + "epoch": 4.38720703125e-05, + "model_forward_time": 0.025477170944213867, + "step": 28752 + }, + { + "epoch": 4.38720703125e-05, + "step": 28752, + "training_step_time": 0.10285401344299316 + }, + { + "epoch": 4.387359619140625e-05, + "model_forward_time": 0.025172710418701172, + "step": 28753 + }, + { + "epoch": 4.387359619140625e-05, + "step": 28753, + "training_step_time": 0.1022496223449707 + }, + { + "epoch": 4.38751220703125e-05, + "model_forward_time": 0.025309085845947266, + "step": 28754 + }, + { + "epoch": 4.38751220703125e-05, + "step": 28754, + "training_step_time": 0.1055145263671875 + }, + { + "epoch": 4.387664794921875e-05, + "model_forward_time": 0.025611162185668945, + "step": 28755 + }, + { + "epoch": 4.387664794921875e-05, + "step": 28755, + "training_step_time": 0.10721397399902344 + }, + { + "epoch": 4.3878173828125e-05, + "model_forward_time": 0.024740934371948242, + "step": 28756 + }, + { + "epoch": 4.3878173828125e-05, + "step": 28756, + "training_step_time": 0.10711526870727539 + }, + { + "epoch": 4.387969970703125e-05, + "model_forward_time": 0.024724483489990234, + "step": 28757 + }, + { + "epoch": 4.387969970703125e-05, + "step": 28757, + "training_step_time": 0.11022591590881348 + }, + { + "epoch": 4.38812255859375e-05, + "model_forward_time": 0.0256197452545166, + "step": 28758 + }, + { + "epoch": 4.38812255859375e-05, + "step": 28758, + "training_step_time": 0.11146116256713867 + }, + { + "epoch": 4.388275146484375e-05, + "model_forward_time": 0.02564549446105957, + "step": 28759 + }, + { + "epoch": 4.388275146484375e-05, + "step": 28759, + "training_step_time": 0.15193724632263184 + }, + { + "epoch": 4.388427734375e-05, + "grad_norm": 0.07244842499494553, + "learning_rate": 4.6635552702782305e-07, + "loss": 0.0101, + "step": 28760 + }, + { + "epoch": 4.388427734375e-05, + "model_forward_time": 0.025867700576782227, + "step": 28760 + }, + { + "epoch": 4.388427734375e-05, + "step": 28760, + "training_step_time": 0.23668694496154785 + }, + { + "epoch": 4.388580322265625e-05, + "model_forward_time": 0.024877548217773438, + "step": 28761 + }, + { + "epoch": 4.388580322265625e-05, + "step": 28761, + "training_step_time": 0.22859525680541992 + }, + { + "epoch": 4.38873291015625e-05, + "model_forward_time": 0.024869441986083984, + "step": 28762 + }, + { + "epoch": 4.38873291015625e-05, + "step": 28762, + "training_step_time": 0.17171335220336914 + }, + { + "epoch": 4.388885498046875e-05, + "model_forward_time": 0.024939298629760742, + "step": 28763 + }, + { + "epoch": 4.388885498046875e-05, + "step": 28763, + "training_step_time": 0.19254827499389648 + }, + { + "epoch": 4.3890380859375e-05, + "model_forward_time": 0.025153398513793945, + "step": 28764 + }, + { + "epoch": 4.3890380859375e-05, + "step": 28764, + "training_step_time": 0.1391303539276123 + }, + { + "epoch": 4.389190673828125e-05, + "model_forward_time": 0.02534031867980957, + "step": 28765 + }, + { + "epoch": 4.389190673828125e-05, + "step": 28765, + "training_step_time": 0.18651866912841797 + }, + { + "epoch": 4.38934326171875e-05, + "model_forward_time": 0.024590015411376953, + "step": 28766 + }, + { + "epoch": 4.38934326171875e-05, + "step": 28766, + "training_step_time": 0.12140154838562012 + }, + { + "epoch": 4.389495849609375e-05, + "model_forward_time": 0.024501562118530273, + "step": 28767 + }, + { + "epoch": 4.389495849609375e-05, + "step": 28767, + "training_step_time": 0.17989182472229004 + }, + { + "epoch": 4.3896484375e-05, + "model_forward_time": 0.024941444396972656, + "step": 28768 + }, + { + "epoch": 4.3896484375e-05, + "step": 28768, + "training_step_time": 0.11324524879455566 + }, + { + "epoch": 4.389801025390625e-05, + "model_forward_time": 0.024997472763061523, + "step": 28769 + }, + { + "epoch": 4.389801025390625e-05, + "step": 28769, + "training_step_time": 0.10687685012817383 + }, + { + "epoch": 4.38995361328125e-05, + "grad_norm": 0.22183045744895935, + "learning_rate": 4.5887547397955864e-07, + "loss": 0.0031, + "step": 28770 + }, + { + "epoch": 4.38995361328125e-05, + "model_forward_time": 0.02875542640686035, + "step": 28770 + }, + { + "epoch": 4.38995361328125e-05, + "step": 28770, + "training_step_time": 0.10820436477661133 + }, + { + "epoch": 4.390106201171875e-05, + "model_forward_time": 0.025441646575927734, + "step": 28771 + }, + { + "epoch": 4.390106201171875e-05, + "step": 28771, + "training_step_time": 0.10809493064880371 + }, + { + "epoch": 4.3902587890625e-05, + "model_forward_time": 0.025870800018310547, + "step": 28772 + }, + { + "epoch": 4.3902587890625e-05, + "step": 28772, + "training_step_time": 0.10555553436279297 + }, + { + "epoch": 4.390411376953125e-05, + "model_forward_time": 0.02575850486755371, + "step": 28773 + }, + { + "epoch": 4.390411376953125e-05, + "step": 28773, + "training_step_time": 0.10755634307861328 + }, + { + "epoch": 4.39056396484375e-05, + "model_forward_time": 0.025369644165039062, + "step": 28774 + }, + { + "epoch": 4.39056396484375e-05, + "step": 28774, + "training_step_time": 0.10852360725402832 + }, + { + "epoch": 4.390716552734375e-05, + "model_forward_time": 0.025286436080932617, + "step": 28775 + }, + { + "epoch": 4.390716552734375e-05, + "step": 28775, + "training_step_time": 0.10610795021057129 + }, + { + "epoch": 4.390869140625e-05, + "model_forward_time": 0.025269746780395508, + "step": 28776 + }, + { + "epoch": 4.390869140625e-05, + "step": 28776, + "training_step_time": 0.1108560562133789 + }, + { + "epoch": 4.391021728515625e-05, + "model_forward_time": 0.02545475959777832, + "step": 28777 + }, + { + "epoch": 4.391021728515625e-05, + "step": 28777, + "training_step_time": 0.10515713691711426 + }, + { + "epoch": 4.39117431640625e-05, + "model_forward_time": 0.02550196647644043, + "step": 28778 + }, + { + "epoch": 4.39117431640625e-05, + "step": 28778, + "training_step_time": 0.11182999610900879 + }, + { + "epoch": 4.391326904296875e-05, + "model_forward_time": 0.025603294372558594, + "step": 28779 + }, + { + "epoch": 4.391326904296875e-05, + "step": 28779, + "training_step_time": 0.10822176933288574 + }, + { + "epoch": 4.3914794921875e-05, + "grad_norm": 0.082331083714962, + "learning_rate": 4.514556180690188e-07, + "loss": 0.0046, + "step": 28780 + }, + { + "epoch": 4.3914794921875e-05, + "model_forward_time": 0.025365352630615234, + "step": 28780 + }, + { + "epoch": 4.3914794921875e-05, + "step": 28780, + "training_step_time": 0.10804080963134766 + }, + { + "epoch": 4.391632080078125e-05, + "model_forward_time": 0.0254666805267334, + "step": 28781 + }, + { + "epoch": 4.391632080078125e-05, + "step": 28781, + "training_step_time": 0.10747027397155762 + }, + { + "epoch": 4.39178466796875e-05, + "model_forward_time": 0.025144100189208984, + "step": 28782 + }, + { + "epoch": 4.39178466796875e-05, + "step": 28782, + "training_step_time": 0.10770010948181152 + }, + { + "epoch": 4.391937255859375e-05, + "model_forward_time": 0.02653980255126953, + "step": 28783 + }, + { + "epoch": 4.391937255859375e-05, + "step": 28783, + "training_step_time": 0.10976195335388184 + }, + { + "epoch": 4.39208984375e-05, + "model_forward_time": 0.025371789932250977, + "step": 28784 + }, + { + "epoch": 4.39208984375e-05, + "step": 28784, + "training_step_time": 0.1084136962890625 + }, + { + "epoch": 4.392242431640625e-05, + "model_forward_time": 0.025032520294189453, + "step": 28785 + }, + { + "epoch": 4.392242431640625e-05, + "step": 28785, + "training_step_time": 0.10915040969848633 + }, + { + "epoch": 4.39239501953125e-05, + "model_forward_time": 0.02529764175415039, + "step": 28786 + }, + { + "epoch": 4.39239501953125e-05, + "step": 28786, + "training_step_time": 0.21702051162719727 + }, + { + "epoch": 4.392547607421875e-05, + "model_forward_time": 0.024346113204956055, + "step": 28787 + }, + { + "epoch": 4.392547607421875e-05, + "step": 28787, + "training_step_time": 0.11910724639892578 + }, + { + "epoch": 4.3927001953125e-05, + "model_forward_time": 0.024736881256103516, + "step": 28788 + }, + { + "epoch": 4.3927001953125e-05, + "step": 28788, + "training_step_time": 0.12718558311462402 + }, + { + "epoch": 4.392852783203125e-05, + "model_forward_time": 0.02529144287109375, + "step": 28789 + }, + { + "epoch": 4.392852783203125e-05, + "step": 28789, + "training_step_time": 0.10621905326843262 + }, + { + "epoch": 4.39300537109375e-05, + "grad_norm": 0.02719235047698021, + "learning_rate": 4.440959683120194e-07, + "loss": 0.0064, + "step": 28790 + }, + { + "epoch": 4.39300537109375e-05, + "model_forward_time": 0.025585174560546875, + "step": 28790 + }, + { + "epoch": 4.39300537109375e-05, + "step": 28790, + "training_step_time": 0.16669607162475586 + }, + { + "epoch": 4.393157958984375e-05, + "model_forward_time": 0.02438807487487793, + "step": 28791 + }, + { + "epoch": 4.393157958984375e-05, + "step": 28791, + "training_step_time": 0.14309453964233398 + }, + { + "epoch": 4.393310546875e-05, + "model_forward_time": 0.025151968002319336, + "step": 28792 + }, + { + "epoch": 4.393310546875e-05, + "step": 28792, + "training_step_time": 0.11692285537719727 + }, + { + "epoch": 4.393463134765625e-05, + "model_forward_time": 0.025306224822998047, + "step": 28793 + }, + { + "epoch": 4.393463134765625e-05, + "step": 28793, + "training_step_time": 0.12682294845581055 + }, + { + "epoch": 4.39361572265625e-05, + "model_forward_time": 0.025196313858032227, + "step": 28794 + }, + { + "epoch": 4.39361572265625e-05, + "step": 28794, + "training_step_time": 0.19578099250793457 + }, + { + "epoch": 4.393768310546875e-05, + "model_forward_time": 0.025259733200073242, + "step": 28795 + }, + { + "epoch": 4.393768310546875e-05, + "step": 28795, + "training_step_time": 0.10339617729187012 + }, + { + "epoch": 4.3939208984375e-05, + "model_forward_time": 0.024817466735839844, + "step": 28796 + }, + { + "epoch": 4.3939208984375e-05, + "step": 28796, + "training_step_time": 0.10300493240356445 + }, + { + "epoch": 4.394073486328125e-05, + "model_forward_time": 0.02497243881225586, + "step": 28797 + }, + { + "epoch": 4.394073486328125e-05, + "step": 28797, + "training_step_time": 0.10687494277954102 + }, + { + "epoch": 4.39422607421875e-05, + "model_forward_time": 0.02507615089416504, + "step": 28798 + }, + { + "epoch": 4.39422607421875e-05, + "step": 28798, + "training_step_time": 0.10734963417053223 + }, + { + "epoch": 4.394378662109375e-05, + "model_forward_time": 0.02510976791381836, + "step": 28799 + }, + { + "epoch": 4.394378662109375e-05, + "step": 28799, + "training_step_time": 0.10620403289794922 + }, + { + "epoch": 4.39453125e-05, + "grad_norm": 0.049385394901037216, + "learning_rate": 4.367965336512403e-07, + "loss": 0.0031, + "step": 28800 + }, + { + "epoch": 4.39453125e-05, + "model_forward_time": 0.024399280548095703, + "step": 28800 + }, + { + "epoch": 4.39453125e-05, + "step": 28800, + "training_step_time": 0.11004233360290527 + }, + { + "epoch": 4.394683837890625e-05, + "model_forward_time": 0.025557756423950195, + "step": 28801 + }, + { + "epoch": 4.394683837890625e-05, + "step": 28801, + "training_step_time": 0.10661673545837402 + }, + { + "epoch": 4.39483642578125e-05, + "model_forward_time": 0.02512359619140625, + "step": 28802 + }, + { + "epoch": 4.39483642578125e-05, + "step": 28802, + "training_step_time": 0.20676732063293457 + }, + { + "epoch": 4.394989013671875e-05, + "model_forward_time": 0.024614334106445312, + "step": 28803 + }, + { + "epoch": 4.394989013671875e-05, + "step": 28803, + "training_step_time": 0.2124791145324707 + }, + { + "epoch": 4.3951416015625e-05, + "model_forward_time": 0.024322032928466797, + "step": 28804 + }, + { + "epoch": 4.3951416015625e-05, + "step": 28804, + "training_step_time": 0.17612934112548828 + }, + { + "epoch": 4.395294189453125e-05, + "model_forward_time": 0.024463653564453125, + "step": 28805 + }, + { + "epoch": 4.395294189453125e-05, + "step": 28805, + "training_step_time": 0.1540968418121338 + }, + { + "epoch": 4.39544677734375e-05, + "model_forward_time": 0.02434706687927246, + "step": 28806 + }, + { + "epoch": 4.39544677734375e-05, + "step": 28806, + "training_step_time": 0.1662280559539795 + }, + { + "epoch": 4.395599365234375e-05, + "model_forward_time": 0.024532556533813477, + "step": 28807 + }, + { + "epoch": 4.395599365234375e-05, + "step": 28807, + "training_step_time": 0.13926935195922852 + }, + { + "epoch": 4.395751953125e-05, + "model_forward_time": 0.024302244186401367, + "step": 28808 + }, + { + "epoch": 4.395751953125e-05, + "step": 28808, + "training_step_time": 0.10504961013793945 + }, + { + "epoch": 4.395904541015625e-05, + "model_forward_time": 0.02523946762084961, + "step": 28809 + }, + { + "epoch": 4.395904541015625e-05, + "step": 28809, + "training_step_time": 0.10904264450073242 + }, + { + "epoch": 4.39605712890625e-05, + "grad_norm": 0.024276915937662125, + "learning_rate": 4.2955732295617554e-07, + "loss": 0.0022, + "step": 28810 + }, + { + "epoch": 4.39605712890625e-05, + "model_forward_time": 0.02545452117919922, + "step": 28810 + }, + { + "epoch": 4.39605712890625e-05, + "step": 28810, + "training_step_time": 0.10454940795898438 + }, + { + "epoch": 4.396209716796875e-05, + "model_forward_time": 0.025461673736572266, + "step": 28811 + }, + { + "epoch": 4.396209716796875e-05, + "step": 28811, + "training_step_time": 0.10764288902282715 + }, + { + "epoch": 4.3963623046875e-05, + "model_forward_time": 0.025270462036132812, + "step": 28812 + }, + { + "epoch": 4.3963623046875e-05, + "step": 28812, + "training_step_time": 0.19205927848815918 + }, + { + "epoch": 4.396514892578125e-05, + "model_forward_time": 0.024467945098876953, + "step": 28813 + }, + { + "epoch": 4.396514892578125e-05, + "step": 28813, + "training_step_time": 0.10696792602539062 + }, + { + "epoch": 4.39666748046875e-05, + "model_forward_time": 0.02460503578186035, + "step": 28814 + }, + { + "epoch": 4.39666748046875e-05, + "step": 28814, + "training_step_time": 0.10497260093688965 + }, + { + "epoch": 4.396820068359375e-05, + "model_forward_time": 0.025258779525756836, + "step": 28815 + }, + { + "epoch": 4.396820068359375e-05, + "step": 28815, + "training_step_time": 0.10583806037902832 + }, + { + "epoch": 4.39697265625e-05, + "model_forward_time": 0.025498151779174805, + "step": 28816 + }, + { + "epoch": 4.39697265625e-05, + "step": 28816, + "training_step_time": 0.10678863525390625 + }, + { + "epoch": 4.397125244140625e-05, + "model_forward_time": 0.025346994400024414, + "step": 28817 + }, + { + "epoch": 4.397125244140625e-05, + "step": 28817, + "training_step_time": 0.10732054710388184 + }, + { + "epoch": 4.39727783203125e-05, + "model_forward_time": 0.025068998336791992, + "step": 28818 + }, + { + "epoch": 4.39727783203125e-05, + "step": 28818, + "training_step_time": 0.10530805587768555 + }, + { + "epoch": 4.397430419921875e-05, + "model_forward_time": 0.025461912155151367, + "step": 28819 + }, + { + "epoch": 4.397430419921875e-05, + "step": 28819, + "training_step_time": 0.10402679443359375 + }, + { + "epoch": 4.3975830078125e-05, + "grad_norm": 0.025169670581817627, + "learning_rate": 4.2237834502314997e-07, + "loss": 0.0038, + "step": 28820 + }, + { + "epoch": 4.3975830078125e-05, + "model_forward_time": 0.025188207626342773, + "step": 28820 + }, + { + "epoch": 4.3975830078125e-05, + "step": 28820, + "training_step_time": 0.10598039627075195 + }, + { + "epoch": 4.397735595703125e-05, + "model_forward_time": 0.025551795959472656, + "step": 28821 + }, + { + "epoch": 4.397735595703125e-05, + "step": 28821, + "training_step_time": 0.10508370399475098 + }, + { + "epoch": 4.39788818359375e-05, + "model_forward_time": 0.02833247184753418, + "step": 28822 + }, + { + "epoch": 4.39788818359375e-05, + "step": 28822, + "training_step_time": 0.10770201683044434 + }, + { + "epoch": 4.398040771484375e-05, + "model_forward_time": 0.024962186813354492, + "step": 28823 + }, + { + "epoch": 4.398040771484375e-05, + "step": 28823, + "training_step_time": 0.10485339164733887 + }, + { + "epoch": 4.398193359375e-05, + "model_forward_time": 0.02578258514404297, + "step": 28824 + }, + { + "epoch": 4.398193359375e-05, + "step": 28824, + "training_step_time": 0.10573816299438477 + }, + { + "epoch": 4.398345947265625e-05, + "model_forward_time": 0.024971485137939453, + "step": 28825 + }, + { + "epoch": 4.398345947265625e-05, + "step": 28825, + "training_step_time": 0.10591983795166016 + }, + { + "epoch": 4.39849853515625e-05, + "model_forward_time": 0.025452136993408203, + "step": 28826 + }, + { + "epoch": 4.39849853515625e-05, + "step": 28826, + "training_step_time": 0.10672640800476074 + }, + { + "epoch": 4.398651123046875e-05, + "model_forward_time": 0.025547027587890625, + "step": 28827 + }, + { + "epoch": 4.398651123046875e-05, + "step": 28827, + "training_step_time": 0.10734677314758301 + }, + { + "epoch": 4.3988037109375e-05, + "model_forward_time": 0.025259971618652344, + "step": 28828 + }, + { + "epoch": 4.3988037109375e-05, + "step": 28828, + "training_step_time": 0.11012053489685059 + }, + { + "epoch": 4.398956298828125e-05, + "model_forward_time": 0.025006532669067383, + "step": 28829 + }, + { + "epoch": 4.398956298828125e-05, + "step": 28829, + "training_step_time": 0.13423705101013184 + }, + { + "epoch": 4.39910888671875e-05, + "grad_norm": 0.09788142144680023, + "learning_rate": 4.1525960857530243e-07, + "loss": 0.0026, + "step": 28830 + }, + { + "epoch": 4.39910888671875e-05, + "model_forward_time": 0.024151086807250977, + "step": 28830 + }, + { + "epoch": 4.39910888671875e-05, + "step": 28830, + "training_step_time": 0.18470382690429688 + }, + { + "epoch": 4.399261474609375e-05, + "model_forward_time": 0.02412700653076172, + "step": 28831 + }, + { + "epoch": 4.399261474609375e-05, + "step": 28831, + "training_step_time": 0.20001959800720215 + }, + { + "epoch": 4.3994140625e-05, + "model_forward_time": 0.02448868751525879, + "step": 28832 + }, + { + "epoch": 4.3994140625e-05, + "step": 28832, + "training_step_time": 0.172684907913208 + }, + { + "epoch": 4.399566650390625e-05, + "model_forward_time": 0.025416135787963867, + "step": 28833 + }, + { + "epoch": 4.399566650390625e-05, + "step": 28833, + "training_step_time": 0.11399722099304199 + }, + { + "epoch": 4.39971923828125e-05, + "model_forward_time": 0.024176359176635742, + "step": 28834 + }, + { + "epoch": 4.39971923828125e-05, + "step": 28834, + "training_step_time": 0.17377471923828125 + }, + { + "epoch": 4.399871826171875e-05, + "model_forward_time": 0.02455759048461914, + "step": 28835 + }, + { + "epoch": 4.399871826171875e-05, + "step": 28835, + "training_step_time": 0.1382441520690918 + }, + { + "epoch": 4.4000244140625e-05, + "model_forward_time": 0.02526545524597168, + "step": 28836 + }, + { + "epoch": 4.4000244140625e-05, + "step": 28836, + "training_step_time": 0.12614202499389648 + }, + { + "epoch": 4.400177001953125e-05, + "model_forward_time": 0.02462315559387207, + "step": 28837 + }, + { + "epoch": 4.400177001953125e-05, + "step": 28837, + "training_step_time": 0.1191110610961914 + }, + { + "epoch": 4.40032958984375e-05, + "model_forward_time": 0.02477717399597168, + "step": 28838 + }, + { + "epoch": 4.40032958984375e-05, + "step": 28838, + "training_step_time": 0.18198323249816895 + }, + { + "epoch": 4.400482177734375e-05, + "model_forward_time": 0.02429366111755371, + "step": 28839 + }, + { + "epoch": 4.400482177734375e-05, + "step": 28839, + "training_step_time": 0.10352921485900879 + }, + { + "epoch": 4.400634765625e-05, + "grad_norm": 0.13595394790172577, + "learning_rate": 4.082011222625637e-07, + "loss": 0.0024, + "step": 28840 + }, + { + "epoch": 4.400634765625e-05, + "model_forward_time": 0.024634838104248047, + "step": 28840 + }, + { + "epoch": 4.400634765625e-05, + "step": 28840, + "training_step_time": 0.1066291332244873 + }, + { + "epoch": 4.400787353515625e-05, + "model_forward_time": 0.025807619094848633, + "step": 28841 + }, + { + "epoch": 4.400787353515625e-05, + "step": 28841, + "training_step_time": 0.10807108879089355 + }, + { + "epoch": 4.40093994140625e-05, + "model_forward_time": 0.0251462459564209, + "step": 28842 + }, + { + "epoch": 4.40093994140625e-05, + "step": 28842, + "training_step_time": 0.10721302032470703 + }, + { + "epoch": 4.401092529296875e-05, + "model_forward_time": 0.026604413986206055, + "step": 28843 + }, + { + "epoch": 4.401092529296875e-05, + "step": 28843, + "training_step_time": 0.1101830005645752 + }, + { + "epoch": 4.4012451171875e-05, + "model_forward_time": 0.024954795837402344, + "step": 28844 + }, + { + "epoch": 4.4012451171875e-05, + "step": 28844, + "training_step_time": 0.10507941246032715 + }, + { + "epoch": 4.401397705078125e-05, + "model_forward_time": 0.025441884994506836, + "step": 28845 + }, + { + "epoch": 4.401397705078125e-05, + "step": 28845, + "training_step_time": 0.10663938522338867 + }, + { + "epoch": 4.40155029296875e-05, + "model_forward_time": 0.02576613426208496, + "step": 28846 + }, + { + "epoch": 4.40155029296875e-05, + "step": 28846, + "training_step_time": 0.11036968231201172 + }, + { + "epoch": 4.401702880859375e-05, + "model_forward_time": 0.02521491050720215, + "step": 28847 + }, + { + "epoch": 4.401702880859375e-05, + "step": 28847, + "training_step_time": 0.11086630821228027 + }, + { + "epoch": 4.40185546875e-05, + "model_forward_time": 0.02510547637939453, + "step": 28848 + }, + { + "epoch": 4.40185546875e-05, + "step": 28848, + "training_step_time": 0.2214219570159912 + }, + { + "epoch": 4.402008056640625e-05, + "model_forward_time": 0.024599075317382812, + "step": 28849 + }, + { + "epoch": 4.402008056640625e-05, + "step": 28849, + "training_step_time": 0.16921019554138184 + }, + { + "epoch": 4.40216064453125e-05, + "grad_norm": 0.513080358505249, + "learning_rate": 4.012028946616675e-07, + "loss": 0.0072, + "step": 28850 + }, + { + "epoch": 4.40216064453125e-05, + "model_forward_time": 0.025159358978271484, + "step": 28850 + }, + { + "epoch": 4.40216064453125e-05, + "step": 28850, + "training_step_time": 0.15341901779174805 + }, + { + "epoch": 4.402313232421875e-05, + "model_forward_time": 0.02451300621032715, + "step": 28851 + }, + { + "epoch": 4.402313232421875e-05, + "step": 28851, + "training_step_time": 0.10731053352355957 + }, + { + "epoch": 4.4024658203125e-05, + "model_forward_time": 0.02467823028564453, + "step": 28852 + }, + { + "epoch": 4.4024658203125e-05, + "step": 28852, + "training_step_time": 0.12532258033752441 + }, + { + "epoch": 4.402618408203125e-05, + "model_forward_time": 0.025277137756347656, + "step": 28853 + }, + { + "epoch": 4.402618408203125e-05, + "step": 28853, + "training_step_time": 0.10787487030029297 + }, + { + "epoch": 4.40277099609375e-05, + "model_forward_time": 0.025817155838012695, + "step": 28854 + }, + { + "epoch": 4.40277099609375e-05, + "step": 28854, + "training_step_time": 0.10804390907287598 + }, + { + "epoch": 4.402923583984375e-05, + "model_forward_time": 0.024899721145629883, + "step": 28855 + }, + { + "epoch": 4.402923583984375e-05, + "step": 28855, + "training_step_time": 0.11023569107055664 + }, + { + "epoch": 4.403076171875e-05, + "model_forward_time": 0.025223493576049805, + "step": 28856 + }, + { + "epoch": 4.403076171875e-05, + "step": 28856, + "training_step_time": 0.10880446434020996 + }, + { + "epoch": 4.403228759765625e-05, + "model_forward_time": 0.025037527084350586, + "step": 28857 + }, + { + "epoch": 4.403228759765625e-05, + "step": 28857, + "training_step_time": 0.20793533325195312 + }, + { + "epoch": 4.40338134765625e-05, + "model_forward_time": 0.024899721145629883, + "step": 28858 + }, + { + "epoch": 4.40338134765625e-05, + "step": 28858, + "training_step_time": 0.1049356460571289 + }, + { + "epoch": 4.403533935546875e-05, + "model_forward_time": 0.026018857955932617, + "step": 28859 + }, + { + "epoch": 4.403533935546875e-05, + "step": 28859, + "training_step_time": 0.11279058456420898 + }, + { + "epoch": 4.4036865234375e-05, + "grad_norm": 0.05081988126039505, + "learning_rate": 3.9426493427611177e-07, + "loss": 0.0045, + "step": 28860 + }, + { + "epoch": 4.4036865234375e-05, + "model_forward_time": 0.02529621124267578, + "step": 28860 + }, + { + "epoch": 4.4036865234375e-05, + "step": 28860, + "training_step_time": 0.10503101348876953 + }, + { + "epoch": 4.403839111328125e-05, + "model_forward_time": 0.025681257247924805, + "step": 28861 + }, + { + "epoch": 4.403839111328125e-05, + "step": 28861, + "training_step_time": 0.10536026954650879 + }, + { + "epoch": 4.40399169921875e-05, + "model_forward_time": 0.02542591094970703, + "step": 28862 + }, + { + "epoch": 4.40399169921875e-05, + "step": 28862, + "training_step_time": 0.10457992553710938 + }, + { + "epoch": 4.404144287109375e-05, + "model_forward_time": 0.025218725204467773, + "step": 28863 + }, + { + "epoch": 4.404144287109375e-05, + "step": 28863, + "training_step_time": 0.12376117706298828 + }, + { + "epoch": 4.404296875e-05, + "model_forward_time": 0.025081634521484375, + "step": 28864 + }, + { + "epoch": 4.404296875e-05, + "step": 28864, + "training_step_time": 0.1279911994934082 + }, + { + "epoch": 4.404449462890625e-05, + "model_forward_time": 0.024982213973999023, + "step": 28865 + }, + { + "epoch": 4.404449462890625e-05, + "step": 28865, + "training_step_time": 0.12434577941894531 + }, + { + "epoch": 4.40460205078125e-05, + "model_forward_time": 0.02506852149963379, + "step": 28866 + }, + { + "epoch": 4.40460205078125e-05, + "step": 28866, + "training_step_time": 0.12430357933044434 + }, + { + "epoch": 4.404754638671875e-05, + "model_forward_time": 0.025437116622924805, + "step": 28867 + }, + { + "epoch": 4.404754638671875e-05, + "step": 28867, + "training_step_time": 0.12047362327575684 + }, + { + "epoch": 4.4049072265625e-05, + "model_forward_time": 0.025087356567382812, + "step": 28868 + }, + { + "epoch": 4.4049072265625e-05, + "step": 28868, + "training_step_time": 0.11482882499694824 + }, + { + "epoch": 4.405059814453125e-05, + "model_forward_time": 0.02448296546936035, + "step": 28869 + }, + { + "epoch": 4.405059814453125e-05, + "step": 28869, + "training_step_time": 0.11351704597473145 + }, + { + "epoch": 4.40521240234375e-05, + "grad_norm": 0.05161585658788681, + "learning_rate": 3.873872495361697e-07, + "loss": 0.0046, + "step": 28870 + }, + { + "epoch": 4.40521240234375e-05, + "model_forward_time": 0.024175167083740234, + "step": 28870 + }, + { + "epoch": 4.40521240234375e-05, + "step": 28870, + "training_step_time": 0.11364078521728516 + }, + { + "epoch": 4.405364990234375e-05, + "model_forward_time": 0.026335477828979492, + "step": 28871 + }, + { + "epoch": 4.405364990234375e-05, + "step": 28871, + "training_step_time": 0.11411714553833008 + }, + { + "epoch": 4.405517578125e-05, + "model_forward_time": 0.024860620498657227, + "step": 28872 + }, + { + "epoch": 4.405517578125e-05, + "step": 28872, + "training_step_time": 0.10787200927734375 + }, + { + "epoch": 4.405670166015625e-05, + "model_forward_time": 0.02548360824584961, + "step": 28873 + }, + { + "epoch": 4.405670166015625e-05, + "step": 28873, + "training_step_time": 0.11005330085754395 + }, + { + "epoch": 4.40582275390625e-05, + "model_forward_time": 0.025074005126953125, + "step": 28874 + }, + { + "epoch": 4.40582275390625e-05, + "step": 28874, + "training_step_time": 0.10732698440551758 + }, + { + "epoch": 4.405975341796875e-05, + "model_forward_time": 0.02527165412902832, + "step": 28875 + }, + { + "epoch": 4.405975341796875e-05, + "step": 28875, + "training_step_time": 0.19524073600769043 + }, + { + "epoch": 4.4061279296875e-05, + "model_forward_time": 0.024552583694458008, + "step": 28876 + }, + { + "epoch": 4.4061279296875e-05, + "step": 28876, + "training_step_time": 0.1189119815826416 + }, + { + "epoch": 4.406280517578125e-05, + "model_forward_time": 0.024407386779785156, + "step": 28877 + }, + { + "epoch": 4.406280517578125e-05, + "step": 28877, + "training_step_time": 0.12789463996887207 + }, + { + "epoch": 4.40643310546875e-05, + "model_forward_time": 0.02468085289001465, + "step": 28878 + }, + { + "epoch": 4.40643310546875e-05, + "step": 28878, + "training_step_time": 0.10458493232727051 + }, + { + "epoch": 4.406585693359375e-05, + "model_forward_time": 0.02455282211303711, + "step": 28879 + }, + { + "epoch": 4.406585693359375e-05, + "step": 28879, + "training_step_time": 0.18645238876342773 + }, + { + "epoch": 4.40673828125e-05, + "grad_norm": 0.04541734606027603, + "learning_rate": 3.805698487988951e-07, + "loss": 0.0099, + "step": 28880 + }, + { + "epoch": 4.40673828125e-05, + "model_forward_time": 0.02471613883972168, + "step": 28880 + }, + { + "epoch": 4.40673828125e-05, + "step": 28880, + "training_step_time": 0.11664962768554688 + }, + { + "epoch": 4.406890869140625e-05, + "model_forward_time": 0.024853229522705078, + "step": 28881 + }, + { + "epoch": 4.406890869140625e-05, + "step": 28881, + "training_step_time": 0.11352849006652832 + }, + { + "epoch": 4.40704345703125e-05, + "model_forward_time": 0.02517557144165039, + "step": 28882 + }, + { + "epoch": 4.40704345703125e-05, + "step": 28882, + "training_step_time": 0.12951254844665527 + }, + { + "epoch": 4.407196044921875e-05, + "model_forward_time": 0.0255124568939209, + "step": 28883 + }, + { + "epoch": 4.407196044921875e-05, + "step": 28883, + "training_step_time": 0.1979384422302246 + }, + { + "epoch": 4.4073486328125e-05, + "model_forward_time": 0.02451944351196289, + "step": 28884 + }, + { + "epoch": 4.4073486328125e-05, + "step": 28884, + "training_step_time": 0.10195326805114746 + }, + { + "epoch": 4.407501220703125e-05, + "model_forward_time": 0.024811506271362305, + "step": 28885 + }, + { + "epoch": 4.407501220703125e-05, + "step": 28885, + "training_step_time": 0.10289311408996582 + }, + { + "epoch": 4.40765380859375e-05, + "model_forward_time": 0.025223255157470703, + "step": 28886 + }, + { + "epoch": 4.40765380859375e-05, + "step": 28886, + "training_step_time": 0.10558891296386719 + }, + { + "epoch": 4.407806396484375e-05, + "model_forward_time": 0.029429912567138672, + "step": 28887 + }, + { + "epoch": 4.407806396484375e-05, + "step": 28887, + "training_step_time": 0.11038517951965332 + }, + { + "epoch": 4.407958984375e-05, + "model_forward_time": 0.02462029457092285, + "step": 28888 + }, + { + "epoch": 4.407958984375e-05, + "step": 28888, + "training_step_time": 0.10375475883483887 + }, + { + "epoch": 4.408111572265625e-05, + "model_forward_time": 0.024369239807128906, + "step": 28889 + }, + { + "epoch": 4.408111572265625e-05, + "step": 28889, + "training_step_time": 0.1042327880859375 + }, + { + "epoch": 4.40826416015625e-05, + "grad_norm": 0.06463886052370071, + "learning_rate": 3.738127403480507e-07, + "loss": 0.0026, + "step": 28890 + }, + { + "epoch": 4.40826416015625e-05, + "model_forward_time": 0.024164199829101562, + "step": 28890 + }, + { + "epoch": 4.40826416015625e-05, + "step": 28890, + "training_step_time": 0.10666346549987793 + }, + { + "epoch": 4.408416748046875e-05, + "model_forward_time": 0.02506232261657715, + "step": 28891 + }, + { + "epoch": 4.408416748046875e-05, + "step": 28891, + "training_step_time": 0.22899746894836426 + }, + { + "epoch": 4.4085693359375e-05, + "model_forward_time": 0.02410149574279785, + "step": 28892 + }, + { + "epoch": 4.4085693359375e-05, + "step": 28892, + "training_step_time": 0.20785951614379883 + }, + { + "epoch": 4.408721923828125e-05, + "model_forward_time": 0.024324893951416016, + "step": 28893 + }, + { + "epoch": 4.408721923828125e-05, + "step": 28893, + "training_step_time": 0.10620307922363281 + }, + { + "epoch": 4.40887451171875e-05, + "model_forward_time": 0.02455306053161621, + "step": 28894 + }, + { + "epoch": 4.40887451171875e-05, + "step": 28894, + "training_step_time": 0.10653328895568848 + }, + { + "epoch": 4.409027099609375e-05, + "model_forward_time": 0.02509617805480957, + "step": 28895 + }, + { + "epoch": 4.409027099609375e-05, + "step": 28895, + "training_step_time": 0.1209714412689209 + }, + { + "epoch": 4.4091796875e-05, + "model_forward_time": 0.025086402893066406, + "step": 28896 + }, + { + "epoch": 4.4091796875e-05, + "step": 28896, + "training_step_time": 0.10881519317626953 + }, + { + "epoch": 4.409332275390625e-05, + "model_forward_time": 0.025372743606567383, + "step": 28897 + }, + { + "epoch": 4.409332275390625e-05, + "step": 28897, + "training_step_time": 0.142564058303833 + }, + { + "epoch": 4.40948486328125e-05, + "model_forward_time": 0.024983882904052734, + "step": 28898 + }, + { + "epoch": 4.40948486328125e-05, + "step": 28898, + "training_step_time": 0.19273090362548828 + }, + { + "epoch": 4.409637451171875e-05, + "model_forward_time": 0.024141788482666016, + "step": 28899 + }, + { + "epoch": 4.409637451171875e-05, + "step": 28899, + "training_step_time": 0.1181938648223877 + }, + { + "epoch": 4.4097900390625e-05, + "grad_norm": 0.04314415156841278, + "learning_rate": 3.6711593239417973e-07, + "loss": 0.004, + "step": 28900 + }, + { + "epoch": 4.4097900390625e-05, + "model_forward_time": 0.025745630264282227, + "step": 28900 + }, + { + "epoch": 4.4097900390625e-05, + "step": 28900, + "training_step_time": 0.10544061660766602 + }, + { + "epoch": 4.409942626953125e-05, + "model_forward_time": 0.028963327407836914, + "step": 28901 + }, + { + "epoch": 4.409942626953125e-05, + "step": 28901, + "training_step_time": 0.190568208694458 + }, + { + "epoch": 4.41009521484375e-05, + "model_forward_time": 0.0243074893951416, + "step": 28902 + }, + { + "epoch": 4.41009521484375e-05, + "step": 28902, + "training_step_time": 0.10541963577270508 + }, + { + "epoch": 4.410247802734375e-05, + "model_forward_time": 0.024118661880493164, + "step": 28903 + }, + { + "epoch": 4.410247802734375e-05, + "step": 28903, + "training_step_time": 0.10113000869750977 + }, + { + "epoch": 4.410400390625e-05, + "model_forward_time": 0.02498483657836914, + "step": 28904 + }, + { + "epoch": 4.410400390625e-05, + "step": 28904, + "training_step_time": 0.10480904579162598 + }, + { + "epoch": 4.410552978515625e-05, + "model_forward_time": 0.024924039840698242, + "step": 28905 + }, + { + "epoch": 4.410552978515625e-05, + "step": 28905, + "training_step_time": 0.10590624809265137 + }, + { + "epoch": 4.41070556640625e-05, + "model_forward_time": 0.025211095809936523, + "step": 28906 + }, + { + "epoch": 4.41070556640625e-05, + "step": 28906, + "training_step_time": 0.10602545738220215 + }, + { + "epoch": 4.410858154296875e-05, + "model_forward_time": 0.024839401245117188, + "step": 28907 + }, + { + "epoch": 4.410858154296875e-05, + "step": 28907, + "training_step_time": 0.10840249061584473 + }, + { + "epoch": 4.4110107421875e-05, + "model_forward_time": 0.02520442008972168, + "step": 28908 + }, + { + "epoch": 4.4110107421875e-05, + "step": 28908, + "training_step_time": 0.10515737533569336 + }, + { + "epoch": 4.411163330078125e-05, + "model_forward_time": 0.028593778610229492, + "step": 28909 + }, + { + "epoch": 4.411163330078125e-05, + "step": 28909, + "training_step_time": 0.10853242874145508 + }, + { + "epoch": 4.41131591796875e-05, + "grad_norm": 0.0470416285097599, + "learning_rate": 3.604794330745176e-07, + "loss": 0.004, + "step": 28910 + }, + { + "epoch": 4.41131591796875e-05, + "model_forward_time": 0.0250546932220459, + "step": 28910 + }, + { + "epoch": 4.41131591796875e-05, + "step": 28910, + "training_step_time": 0.1045522689819336 + }, + { + "epoch": 4.411468505859375e-05, + "model_forward_time": 0.025251150131225586, + "step": 28911 + }, + { + "epoch": 4.411468505859375e-05, + "step": 28911, + "training_step_time": 0.10526728630065918 + }, + { + "epoch": 4.41162109375e-05, + "model_forward_time": 0.02504134178161621, + "step": 28912 + }, + { + "epoch": 4.41162109375e-05, + "step": 28912, + "training_step_time": 0.10443687438964844 + }, + { + "epoch": 4.411773681640625e-05, + "model_forward_time": 0.025326251983642578, + "step": 28913 + }, + { + "epoch": 4.411773681640625e-05, + "step": 28913, + "training_step_time": 0.10687613487243652 + }, + { + "epoch": 4.41192626953125e-05, + "model_forward_time": 0.025009870529174805, + "step": 28914 + }, + { + "epoch": 4.41192626953125e-05, + "step": 28914, + "training_step_time": 0.10468673706054688 + }, + { + "epoch": 4.412078857421875e-05, + "model_forward_time": 0.0255126953125, + "step": 28915 + }, + { + "epoch": 4.412078857421875e-05, + "step": 28915, + "training_step_time": 0.10990715026855469 + }, + { + "epoch": 4.4122314453125e-05, + "model_forward_time": 0.024985551834106445, + "step": 28916 + }, + { + "epoch": 4.4122314453125e-05, + "step": 28916, + "training_step_time": 0.1049659252166748 + }, + { + "epoch": 4.412384033203125e-05, + "model_forward_time": 0.02509021759033203, + "step": 28917 + }, + { + "epoch": 4.412384033203125e-05, + "step": 28917, + "training_step_time": 0.10686898231506348 + }, + { + "epoch": 4.41253662109375e-05, + "model_forward_time": 0.025774717330932617, + "step": 28918 + }, + { + "epoch": 4.41253662109375e-05, + "step": 28918, + "training_step_time": 0.10689449310302734 + }, + { + "epoch": 4.412689208984375e-05, + "model_forward_time": 0.025504589080810547, + "step": 28919 + }, + { + "epoch": 4.412689208984375e-05, + "step": 28919, + "training_step_time": 0.10934948921203613 + }, + { + "epoch": 4.412841796875e-05, + "grad_norm": 0.04599743336439133, + "learning_rate": 3.5390325045304706e-07, + "loss": 0.0025, + "step": 28920 + }, + { + "epoch": 4.412841796875e-05, + "model_forward_time": 0.025136709213256836, + "step": 28920 + }, + { + "epoch": 4.412841796875e-05, + "step": 28920, + "training_step_time": 0.10533905029296875 + }, + { + "epoch": 4.412994384765625e-05, + "model_forward_time": 0.02535247802734375, + "step": 28921 + }, + { + "epoch": 4.412994384765625e-05, + "step": 28921, + "training_step_time": 0.10580635070800781 + }, + { + "epoch": 4.41314697265625e-05, + "model_forward_time": 0.025734424591064453, + "step": 28922 + }, + { + "epoch": 4.41314697265625e-05, + "step": 28922, + "training_step_time": 0.139634370803833 + }, + { + "epoch": 4.413299560546875e-05, + "model_forward_time": 0.025485992431640625, + "step": 28923 + }, + { + "epoch": 4.413299560546875e-05, + "step": 28923, + "training_step_time": 0.12592744827270508 + }, + { + "epoch": 4.4134521484375e-05, + "model_forward_time": 0.025511741638183594, + "step": 28924 + }, + { + "epoch": 4.4134521484375e-05, + "step": 28924, + "training_step_time": 0.10522317886352539 + }, + { + "epoch": 4.413604736328125e-05, + "model_forward_time": 0.025289535522460938, + "step": 28925 + }, + { + "epoch": 4.413604736328125e-05, + "step": 28925, + "training_step_time": 0.1590440273284912 + }, + { + "epoch": 4.41375732421875e-05, + "model_forward_time": 0.02474808692932129, + "step": 28926 + }, + { + "epoch": 4.41375732421875e-05, + "step": 28926, + "training_step_time": 0.15165472030639648 + }, + { + "epoch": 4.413909912109375e-05, + "model_forward_time": 0.02467823028564453, + "step": 28927 + }, + { + "epoch": 4.413909912109375e-05, + "step": 28927, + "training_step_time": 0.10590267181396484 + }, + { + "epoch": 4.4140625e-05, + "model_forward_time": 0.0252838134765625, + "step": 28928 + }, + { + "epoch": 4.4140625e-05, + "step": 28928, + "training_step_time": 0.12900424003601074 + }, + { + "epoch": 4.414215087890625e-05, + "model_forward_time": 0.027978897094726562, + "step": 28929 + }, + { + "epoch": 4.414215087890625e-05, + "step": 28929, + "training_step_time": 0.165266752243042 + }, + { + "epoch": 4.41436767578125e-05, + "grad_norm": 0.041539549827575684, + "learning_rate": 3.4738739252045405e-07, + "loss": 0.0026, + "step": 28930 + }, + { + "epoch": 4.41436767578125e-05, + "model_forward_time": 0.024912357330322266, + "step": 28930 + }, + { + "epoch": 4.41436767578125e-05, + "step": 28930, + "training_step_time": 0.10999226570129395 + }, + { + "epoch": 4.414520263671875e-05, + "model_forward_time": 0.0251007080078125, + "step": 28931 + }, + { + "epoch": 4.414520263671875e-05, + "step": 28931, + "training_step_time": 0.10576152801513672 + }, + { + "epoch": 4.4146728515625e-05, + "model_forward_time": 0.02565479278564453, + "step": 28932 + }, + { + "epoch": 4.4146728515625e-05, + "step": 28932, + "training_step_time": 0.11045002937316895 + }, + { + "epoch": 4.414825439453125e-05, + "model_forward_time": 0.025359630584716797, + "step": 28933 + }, + { + "epoch": 4.414825439453125e-05, + "step": 28933, + "training_step_time": 0.10691070556640625 + }, + { + "epoch": 4.41497802734375e-05, + "model_forward_time": 0.02516770362854004, + "step": 28934 + }, + { + "epoch": 4.41497802734375e-05, + "step": 28934, + "training_step_time": 0.10630297660827637 + }, + { + "epoch": 4.415130615234375e-05, + "model_forward_time": 0.024569272994995117, + "step": 28935 + }, + { + "epoch": 4.415130615234375e-05, + "step": 28935, + "training_step_time": 0.10555052757263184 + }, + { + "epoch": 4.415283203125e-05, + "model_forward_time": 0.0256807804107666, + "step": 28936 + }, + { + "epoch": 4.415283203125e-05, + "step": 28936, + "training_step_time": 0.10944628715515137 + }, + { + "epoch": 4.415435791015625e-05, + "model_forward_time": 0.02529597282409668, + "step": 28937 + }, + { + "epoch": 4.415435791015625e-05, + "step": 28937, + "training_step_time": 0.1071007251739502 + }, + { + "epoch": 4.41558837890625e-05, + "model_forward_time": 0.026287078857421875, + "step": 28938 + }, + { + "epoch": 4.41558837890625e-05, + "step": 28938, + "training_step_time": 0.16593360900878906 + }, + { + "epoch": 4.415740966796875e-05, + "model_forward_time": 0.02499699592590332, + "step": 28939 + }, + { + "epoch": 4.415740966796875e-05, + "step": 28939, + "training_step_time": 0.2112419605255127 + }, + { + "epoch": 4.4158935546875e-05, + "grad_norm": 0.22400256991386414, + "learning_rate": 3.4093186719411085e-07, + "loss": 0.0055, + "step": 28940 + }, + { + "epoch": 4.4158935546875e-05, + "model_forward_time": 0.024495363235473633, + "step": 28940 + }, + { + "epoch": 4.4158935546875e-05, + "step": 28940, + "training_step_time": 0.20941877365112305 + }, + { + "epoch": 4.416046142578125e-05, + "model_forward_time": 0.024332046508789062, + "step": 28941 + }, + { + "epoch": 4.416046142578125e-05, + "step": 28941, + "training_step_time": 0.1098325252532959 + }, + { + "epoch": 4.41619873046875e-05, + "model_forward_time": 0.024298906326293945, + "step": 28942 + }, + { + "epoch": 4.41619873046875e-05, + "step": 28942, + "training_step_time": 0.11886477470397949 + }, + { + "epoch": 4.416351318359375e-05, + "model_forward_time": 0.025374174118041992, + "step": 28943 + }, + { + "epoch": 4.416351318359375e-05, + "step": 28943, + "training_step_time": 0.13247227668762207 + }, + { + "epoch": 4.41650390625e-05, + "model_forward_time": 0.025155305862426758, + "step": 28944 + }, + { + "epoch": 4.41650390625e-05, + "step": 28944, + "training_step_time": 0.10594677925109863 + }, + { + "epoch": 4.416656494140625e-05, + "model_forward_time": 0.02543807029724121, + "step": 28945 + }, + { + "epoch": 4.416656494140625e-05, + "step": 28945, + "training_step_time": 0.11469078063964844 + }, + { + "epoch": 4.41680908203125e-05, + "model_forward_time": 0.025419950485229492, + "step": 28946 + }, + { + "epoch": 4.41680908203125e-05, + "step": 28946, + "training_step_time": 0.10903024673461914 + }, + { + "epoch": 4.416961669921875e-05, + "model_forward_time": 0.02522730827331543, + "step": 28947 + }, + { + "epoch": 4.416961669921875e-05, + "step": 28947, + "training_step_time": 0.10511589050292969 + }, + { + "epoch": 4.4171142578125e-05, + "model_forward_time": 0.025255203247070312, + "step": 28948 + }, + { + "epoch": 4.4171142578125e-05, + "step": 28948, + "training_step_time": 0.1908271312713623 + }, + { + "epoch": 4.417266845703125e-05, + "model_forward_time": 0.024558067321777344, + "step": 28949 + }, + { + "epoch": 4.417266845703125e-05, + "step": 28949, + "training_step_time": 0.10468149185180664 + }, + { + "epoch": 4.41741943359375e-05, + "grad_norm": 0.02406979538500309, + "learning_rate": 3.3453668231809286e-07, + "loss": 0.0061, + "step": 28950 + }, + { + "epoch": 4.41741943359375e-05, + "model_forward_time": 0.02489471435546875, + "step": 28950 + }, + { + "epoch": 4.41741943359375e-05, + "step": 28950, + "training_step_time": 0.1136469841003418 + }, + { + "epoch": 4.417572021484375e-05, + "model_forward_time": 0.025480031967163086, + "step": 28951 + }, + { + "epoch": 4.417572021484375e-05, + "step": 28951, + "training_step_time": 0.10869717597961426 + }, + { + "epoch": 4.417724609375e-05, + "model_forward_time": 0.02547454833984375, + "step": 28952 + }, + { + "epoch": 4.417724609375e-05, + "step": 28952, + "training_step_time": 0.10746073722839355 + }, + { + "epoch": 4.417877197265625e-05, + "model_forward_time": 0.02521228790283203, + "step": 28953 + }, + { + "epoch": 4.417877197265625e-05, + "step": 28953, + "training_step_time": 0.10619950294494629 + }, + { + "epoch": 4.41802978515625e-05, + "model_forward_time": 0.02527165412902832, + "step": 28954 + }, + { + "epoch": 4.41802978515625e-05, + "step": 28954, + "training_step_time": 0.10786700248718262 + }, + { + "epoch": 4.418182373046875e-05, + "model_forward_time": 0.02542400360107422, + "step": 28955 + }, + { + "epoch": 4.418182373046875e-05, + "step": 28955, + "training_step_time": 0.10452723503112793 + }, + { + "epoch": 4.4183349609375e-05, + "model_forward_time": 0.025342226028442383, + "step": 28956 + }, + { + "epoch": 4.4183349609375e-05, + "step": 28956, + "training_step_time": 0.10795402526855469 + }, + { + "epoch": 4.418487548828125e-05, + "model_forward_time": 0.025736093521118164, + "step": 28957 + }, + { + "epoch": 4.418487548828125e-05, + "step": 28957, + "training_step_time": 0.10680675506591797 + }, + { + "epoch": 4.41864013671875e-05, + "model_forward_time": 0.025098562240600586, + "step": 28958 + }, + { + "epoch": 4.41864013671875e-05, + "step": 28958, + "training_step_time": 0.10611391067504883 + }, + { + "epoch": 4.418792724609375e-05, + "model_forward_time": 0.02525615692138672, + "step": 28959 + }, + { + "epoch": 4.418792724609375e-05, + "step": 28959, + "training_step_time": 0.1047670841217041 + }, + { + "epoch": 4.4189453125e-05, + "grad_norm": 0.036531899124383926, + "learning_rate": 3.2820184566315084e-07, + "loss": 0.0049, + "step": 28960 + }, + { + "epoch": 4.4189453125e-05, + "model_forward_time": 0.025341272354125977, + "step": 28960 + }, + { + "epoch": 4.4189453125e-05, + "step": 28960, + "training_step_time": 0.10561847686767578 + }, + { + "epoch": 4.419097900390625e-05, + "model_forward_time": 0.025133132934570312, + "step": 28961 + }, + { + "epoch": 4.419097900390625e-05, + "step": 28961, + "training_step_time": 0.10742950439453125 + }, + { + "epoch": 4.41925048828125e-05, + "model_forward_time": 0.025563478469848633, + "step": 28962 + }, + { + "epoch": 4.41925048828125e-05, + "step": 28962, + "training_step_time": 0.13123226165771484 + }, + { + "epoch": 4.419403076171875e-05, + "model_forward_time": 0.02568817138671875, + "step": 28963 + }, + { + "epoch": 4.419403076171875e-05, + "step": 28963, + "training_step_time": 0.12539291381835938 + }, + { + "epoch": 4.4195556640625e-05, + "model_forward_time": 0.024170637130737305, + "step": 28964 + }, + { + "epoch": 4.4195556640625e-05, + "step": 28964, + "training_step_time": 0.13669657707214355 + }, + { + "epoch": 4.419708251953125e-05, + "model_forward_time": 0.023932933807373047, + "step": 28965 + }, + { + "epoch": 4.419708251953125e-05, + "step": 28965, + "training_step_time": 0.1270308494567871 + }, + { + "epoch": 4.41986083984375e-05, + "model_forward_time": 0.023763179779052734, + "step": 28966 + }, + { + "epoch": 4.41986083984375e-05, + "step": 28966, + "training_step_time": 0.11907172203063965 + }, + { + "epoch": 4.420013427734375e-05, + "model_forward_time": 0.024312734603881836, + "step": 28967 + }, + { + "epoch": 4.420013427734375e-05, + "step": 28967, + "training_step_time": 0.13876867294311523 + }, + { + "epoch": 4.420166015625e-05, + "model_forward_time": 0.02509284019470215, + "step": 28968 + }, + { + "epoch": 4.420166015625e-05, + "step": 28968, + "training_step_time": 0.1332089900970459 + }, + { + "epoch": 4.420318603515625e-05, + "model_forward_time": 0.02444624900817871, + "step": 28969 + }, + { + "epoch": 4.420318603515625e-05, + "step": 28969, + "training_step_time": 0.10707497596740723 + }, + { + "epoch": 4.42047119140625e-05, + "grad_norm": 0.03241880238056183, + "learning_rate": 3.219273649267163e-07, + "loss": 0.0043, + "step": 28970 + }, + { + "epoch": 4.42047119140625e-05, + "model_forward_time": 0.02509760856628418, + "step": 28970 + }, + { + "epoch": 4.42047119140625e-05, + "step": 28970, + "training_step_time": 0.14246392250061035 + }, + { + "epoch": 4.420623779296875e-05, + "model_forward_time": 0.025603294372558594, + "step": 28971 + }, + { + "epoch": 4.420623779296875e-05, + "step": 28971, + "training_step_time": 0.15750336647033691 + }, + { + "epoch": 4.4207763671875e-05, + "model_forward_time": 0.025864839553833008, + "step": 28972 + }, + { + "epoch": 4.4207763671875e-05, + "step": 28972, + "training_step_time": 0.10809588432312012 + }, + { + "epoch": 4.420928955078125e-05, + "model_forward_time": 0.02497100830078125, + "step": 28973 + }, + { + "epoch": 4.420928955078125e-05, + "step": 28973, + "training_step_time": 0.12720489501953125 + }, + { + "epoch": 4.42108154296875e-05, + "model_forward_time": 0.02567887306213379, + "step": 28974 + }, + { + "epoch": 4.42108154296875e-05, + "step": 28974, + "training_step_time": 0.17195773124694824 + }, + { + "epoch": 4.421234130859375e-05, + "model_forward_time": 0.024714231491088867, + "step": 28975 + }, + { + "epoch": 4.421234130859375e-05, + "step": 28975, + "training_step_time": 0.10414695739746094 + }, + { + "epoch": 4.42138671875e-05, + "model_forward_time": 0.024482250213623047, + "step": 28976 + }, + { + "epoch": 4.42138671875e-05, + "step": 28976, + "training_step_time": 0.10502767562866211 + }, + { + "epoch": 4.421539306640625e-05, + "model_forward_time": 0.025331735610961914, + "step": 28977 + }, + { + "epoch": 4.421539306640625e-05, + "step": 28977, + "training_step_time": 0.10675430297851562 + }, + { + "epoch": 4.42169189453125e-05, + "model_forward_time": 0.024800539016723633, + "step": 28978 + }, + { + "epoch": 4.42169189453125e-05, + "step": 28978, + "training_step_time": 0.1061248779296875 + }, + { + "epoch": 4.421844482421875e-05, + "model_forward_time": 0.02580571174621582, + "step": 28979 + }, + { + "epoch": 4.421844482421875e-05, + "step": 28979, + "training_step_time": 0.10585165023803711 + }, + { + "epoch": 4.4219970703125e-05, + "grad_norm": 0.03276151791214943, + "learning_rate": 3.157132477328628e-07, + "loss": 0.0028, + "step": 28980 + }, + { + "epoch": 4.4219970703125e-05, + "model_forward_time": 0.025708675384521484, + "step": 28980 + }, + { + "epoch": 4.4219970703125e-05, + "step": 28980, + "training_step_time": 0.10717535018920898 + }, + { + "epoch": 4.422149658203125e-05, + "model_forward_time": 0.025322437286376953, + "step": 28981 + }, + { + "epoch": 4.422149658203125e-05, + "step": 28981, + "training_step_time": 0.10813021659851074 + }, + { + "epoch": 4.42230224609375e-05, + "model_forward_time": 0.025374650955200195, + "step": 28982 + }, + { + "epoch": 4.42230224609375e-05, + "step": 28982, + "training_step_time": 0.10534453392028809 + }, + { + "epoch": 4.422454833984375e-05, + "model_forward_time": 0.025620222091674805, + "step": 28983 + }, + { + "epoch": 4.422454833984375e-05, + "step": 28983, + "training_step_time": 0.171766996383667 + }, + { + "epoch": 4.422607421875e-05, + "model_forward_time": 0.02486705780029297, + "step": 28984 + }, + { + "epoch": 4.422607421875e-05, + "step": 28984, + "training_step_time": 0.12384343147277832 + }, + { + "epoch": 4.422760009765625e-05, + "model_forward_time": 0.025088071823120117, + "step": 28985 + }, + { + "epoch": 4.422760009765625e-05, + "step": 28985, + "training_step_time": 0.11200428009033203 + }, + { + "epoch": 4.42291259765625e-05, + "model_forward_time": 0.025839567184448242, + "step": 28986 + }, + { + "epoch": 4.42291259765625e-05, + "step": 28986, + "training_step_time": 0.12949275970458984 + }, + { + "epoch": 4.423065185546875e-05, + "model_forward_time": 0.025454282760620117, + "step": 28987 + }, + { + "epoch": 4.423065185546875e-05, + "step": 28987, + "training_step_time": 0.12455344200134277 + }, + { + "epoch": 4.4232177734375e-05, + "model_forward_time": 0.025292396545410156, + "step": 28988 + }, + { + "epoch": 4.4232177734375e-05, + "step": 28988, + "training_step_time": 0.19358348846435547 + }, + { + "epoch": 4.423370361328125e-05, + "model_forward_time": 0.024565458297729492, + "step": 28989 + }, + { + "epoch": 4.423370361328125e-05, + "step": 28989, + "training_step_time": 0.1805591583251953 + }, + { + "epoch": 4.42352294921875e-05, + "grad_norm": 0.034987274557352066, + "learning_rate": 3.095595016323394e-07, + "loss": 0.003, + "step": 28990 + }, + { + "epoch": 4.42352294921875e-05, + "model_forward_time": 0.024019241333007812, + "step": 28990 + }, + { + "epoch": 4.42352294921875e-05, + "step": 28990, + "training_step_time": 0.1577620506286621 + }, + { + "epoch": 4.423675537109375e-05, + "model_forward_time": 0.023499011993408203, + "step": 28991 + }, + { + "epoch": 4.423675537109375e-05, + "step": 28991, + "training_step_time": 0.12829184532165527 + }, + { + "epoch": 4.423828125e-05, + "model_forward_time": 0.024863243103027344, + "step": 28992 + }, + { + "epoch": 4.423828125e-05, + "step": 28992, + "training_step_time": 0.10584640502929688 + }, + { + "epoch": 4.423980712890625e-05, + "model_forward_time": 0.025820255279541016, + "step": 28993 + }, + { + "epoch": 4.423980712890625e-05, + "step": 28993, + "training_step_time": 0.1330702304840088 + }, + { + "epoch": 4.42413330078125e-05, + "model_forward_time": 0.025403738021850586, + "step": 28994 + }, + { + "epoch": 4.42413330078125e-05, + "step": 28994, + "training_step_time": 0.1217191219329834 + }, + { + "epoch": 4.424285888671875e-05, + "model_forward_time": 0.02496170997619629, + "step": 28995 + }, + { + "epoch": 4.424285888671875e-05, + "step": 28995, + "training_step_time": 0.10331368446350098 + }, + { + "epoch": 4.4244384765625e-05, + "model_forward_time": 0.025321245193481445, + "step": 28996 + }, + { + "epoch": 4.4244384765625e-05, + "step": 28996, + "training_step_time": 0.10410070419311523 + }, + { + "epoch": 4.424591064453125e-05, + "model_forward_time": 0.0251920223236084, + "step": 28997 + }, + { + "epoch": 4.424591064453125e-05, + "step": 28997, + "training_step_time": 0.10428667068481445 + }, + { + "epoch": 4.42474365234375e-05, + "model_forward_time": 0.0249941349029541, + "step": 28998 + }, + { + "epoch": 4.42474365234375e-05, + "step": 28998, + "training_step_time": 0.10760021209716797 + }, + { + "epoch": 4.424896240234375e-05, + "model_forward_time": 0.025174856185913086, + "step": 28999 + }, + { + "epoch": 4.424896240234375e-05, + "step": 28999, + "training_step_time": 0.10345959663391113 + }, + { + "epoch": 4.425048828125e-05, + "grad_norm": 0.10391082614660263, + "learning_rate": 3.034661341025258e-07, + "loss": 0.0037, + "step": 29000 + }, + { + "epoch": 4.425048828125e-05, + "model_forward_time": 0.024109601974487305, + "step": 29000 + }, + { + "epoch": 4.425048828125e-05, + "step": 29000, + "training_step_time": 0.09999608993530273 + }, + { + "epoch": 4.425201416015625e-05, + "model_forward_time": 0.023409366607666016, + "step": 29001 + }, + { + "epoch": 4.425201416015625e-05, + "step": 29001, + "training_step_time": 0.10257744789123535 + }, + { + "epoch": 4.42535400390625e-05, + "model_forward_time": 0.02407670021057129, + "step": 29002 + }, + { + "epoch": 4.42535400390625e-05, + "step": 29002, + "training_step_time": 0.10204052925109863 + }, + { + "epoch": 4.425506591796875e-05, + "model_forward_time": 0.025092124938964844, + "step": 29003 + }, + { + "epoch": 4.425506591796875e-05, + "step": 29003, + "training_step_time": 0.10475707054138184 + }, + { + "epoch": 4.4256591796875e-05, + "model_forward_time": 0.024931907653808594, + "step": 29004 + }, + { + "epoch": 4.4256591796875e-05, + "step": 29004, + "training_step_time": 0.10244345664978027 + }, + { + "epoch": 4.425811767578125e-05, + "model_forward_time": 0.025270700454711914, + "step": 29005 + }, + { + "epoch": 4.425811767578125e-05, + "step": 29005, + "training_step_time": 0.10306048393249512 + }, + { + "epoch": 4.42596435546875e-05, + "model_forward_time": 0.024897336959838867, + "step": 29006 + }, + { + "epoch": 4.42596435546875e-05, + "step": 29006, + "training_step_time": 0.1074533462524414 + }, + { + "epoch": 4.426116943359375e-05, + "model_forward_time": 0.024905920028686523, + "step": 29007 + }, + { + "epoch": 4.426116943359375e-05, + "step": 29007, + "training_step_time": 0.10587167739868164 + }, + { + "epoch": 4.42626953125e-05, + "model_forward_time": 0.02533888816833496, + "step": 29008 + }, + { + "epoch": 4.42626953125e-05, + "step": 29008, + "training_step_time": 0.1386117935180664 + }, + { + "epoch": 4.426422119140625e-05, + "model_forward_time": 0.02637505531311035, + "step": 29009 + }, + { + "epoch": 4.426422119140625e-05, + "step": 29009, + "training_step_time": 0.1584486961364746 + }, + { + "epoch": 4.42657470703125e-05, + "grad_norm": 0.04666359722614288, + "learning_rate": 2.9743315254743833e-07, + "loss": 0.0032, + "step": 29010 + }, + { + "epoch": 4.42657470703125e-05, + "model_forward_time": 0.02333974838256836, + "step": 29010 + }, + { + "epoch": 4.42657470703125e-05, + "step": 29010, + "training_step_time": 0.14909768104553223 + }, + { + "epoch": 4.426727294921875e-05, + "model_forward_time": 0.02363729476928711, + "step": 29011 + }, + { + "epoch": 4.426727294921875e-05, + "step": 29011, + "training_step_time": 0.13351821899414062 + }, + { + "epoch": 4.4268798828125e-05, + "model_forward_time": 0.023451805114746094, + "step": 29012 + }, + { + "epoch": 4.4268798828125e-05, + "step": 29012, + "training_step_time": 0.10564279556274414 + }, + { + "epoch": 4.427032470703125e-05, + "model_forward_time": 0.024298906326293945, + "step": 29013 + }, + { + "epoch": 4.427032470703125e-05, + "step": 29013, + "training_step_time": 0.14726567268371582 + }, + { + "epoch": 4.42718505859375e-05, + "model_forward_time": 0.024817943572998047, + "step": 29014 + }, + { + "epoch": 4.42718505859375e-05, + "step": 29014, + "training_step_time": 0.1225738525390625 + }, + { + "epoch": 4.427337646484375e-05, + "model_forward_time": 0.024453163146972656, + "step": 29015 + }, + { + "epoch": 4.427337646484375e-05, + "step": 29015, + "training_step_time": 0.12597870826721191 + }, + { + "epoch": 4.427490234375e-05, + "model_forward_time": 0.025204181671142578, + "step": 29016 + }, + { + "epoch": 4.427490234375e-05, + "step": 29016, + "training_step_time": 0.10244345664978027 + }, + { + "epoch": 4.427642822265625e-05, + "model_forward_time": 0.02431488037109375, + "step": 29017 + }, + { + "epoch": 4.427642822265625e-05, + "step": 29017, + "training_step_time": 0.20625066757202148 + }, + { + "epoch": 4.42779541015625e-05, + "model_forward_time": 0.023919343948364258, + "step": 29018 + }, + { + "epoch": 4.42779541015625e-05, + "step": 29018, + "training_step_time": 0.14628887176513672 + }, + { + "epoch": 4.427947998046875e-05, + "model_forward_time": 0.02465653419494629, + "step": 29019 + }, + { + "epoch": 4.427947998046875e-05, + "step": 29019, + "training_step_time": 0.11667037010192871 + }, + { + "epoch": 4.4281005859375e-05, + "grad_norm": 0.05145147815346718, + "learning_rate": 2.9146056429771305e-07, + "loss": 0.0051, + "step": 29020 + }, + { + "epoch": 4.4281005859375e-05, + "model_forward_time": 0.02472853660583496, + "step": 29020 + }, + { + "epoch": 4.4281005859375e-05, + "step": 29020, + "training_step_time": 0.10970616340637207 + }, + { + "epoch": 4.428253173828125e-05, + "model_forward_time": 0.025367021560668945, + "step": 29021 + }, + { + "epoch": 4.428253173828125e-05, + "step": 29021, + "training_step_time": 0.12661290168762207 + }, + { + "epoch": 4.42840576171875e-05, + "model_forward_time": 0.024908065795898438, + "step": 29022 + }, + { + "epoch": 4.42840576171875e-05, + "step": 29022, + "training_step_time": 0.1276712417602539 + }, + { + "epoch": 4.428558349609375e-05, + "model_forward_time": 0.025496721267700195, + "step": 29023 + }, + { + "epoch": 4.428558349609375e-05, + "step": 29023, + "training_step_time": 0.10353803634643555 + }, + { + "epoch": 4.4287109375e-05, + "model_forward_time": 0.025337934494018555, + "step": 29024 + }, + { + "epoch": 4.4287109375e-05, + "step": 29024, + "training_step_time": 0.10413122177124023 + }, + { + "epoch": 4.428863525390625e-05, + "model_forward_time": 0.02543926239013672, + "step": 29025 + }, + { + "epoch": 4.428863525390625e-05, + "step": 29025, + "training_step_time": 0.1066279411315918 + }, + { + "epoch": 4.42901611328125e-05, + "model_forward_time": 0.02537250518798828, + "step": 29026 + }, + { + "epoch": 4.42901611328125e-05, + "step": 29026, + "training_step_time": 0.10367727279663086 + }, + { + "epoch": 4.429168701171875e-05, + "model_forward_time": 0.02523183822631836, + "step": 29027 + }, + { + "epoch": 4.429168701171875e-05, + "step": 29027, + "training_step_time": 0.10351181030273438 + }, + { + "epoch": 4.4293212890625e-05, + "model_forward_time": 0.025522947311401367, + "step": 29028 + }, + { + "epoch": 4.4293212890625e-05, + "step": 29028, + "training_step_time": 0.1075747013092041 + }, + { + "epoch": 4.429473876953125e-05, + "model_forward_time": 0.025315284729003906, + "step": 29029 + }, + { + "epoch": 4.429473876953125e-05, + "step": 29029, + "training_step_time": 0.1068105697631836 + }, + { + "epoch": 4.42962646484375e-05, + "grad_norm": 0.0309439804404974, + "learning_rate": 2.85548376610606e-07, + "loss": 0.0053, + "step": 29030 + }, + { + "epoch": 4.42962646484375e-05, + "model_forward_time": 0.02505350112915039, + "step": 29030 + }, + { + "epoch": 4.42962646484375e-05, + "step": 29030, + "training_step_time": 0.10500693321228027 + }, + { + "epoch": 4.429779052734375e-05, + "model_forward_time": 0.025599241256713867, + "step": 29031 + }, + { + "epoch": 4.429779052734375e-05, + "step": 29031, + "training_step_time": 0.10443305969238281 + }, + { + "epoch": 4.429931640625e-05, + "model_forward_time": 0.025168895721435547, + "step": 29032 + }, + { + "epoch": 4.429931640625e-05, + "step": 29032, + "training_step_time": 0.12389183044433594 + }, + { + "epoch": 4.430084228515625e-05, + "model_forward_time": 0.02502894401550293, + "step": 29033 + }, + { + "epoch": 4.430084228515625e-05, + "step": 29033, + "training_step_time": 0.11312389373779297 + }, + { + "epoch": 4.43023681640625e-05, + "model_forward_time": 0.025398731231689453, + "step": 29034 + }, + { + "epoch": 4.43023681640625e-05, + "step": 29034, + "training_step_time": 0.12799954414367676 + }, + { + "epoch": 4.430389404296875e-05, + "model_forward_time": 0.025562763214111328, + "step": 29035 + }, + { + "epoch": 4.430389404296875e-05, + "step": 29035, + "training_step_time": 0.12442755699157715 + }, + { + "epoch": 4.4305419921875e-05, + "model_forward_time": 0.025344371795654297, + "step": 29036 + }, + { + "epoch": 4.4305419921875e-05, + "step": 29036, + "training_step_time": 0.20122218132019043 + }, + { + "epoch": 4.430694580078125e-05, + "model_forward_time": 0.024864673614501953, + "step": 29037 + }, + { + "epoch": 4.430694580078125e-05, + "step": 29037, + "training_step_time": 0.18426012992858887 + }, + { + "epoch": 4.43084716796875e-05, + "model_forward_time": 0.024628400802612305, + "step": 29038 + }, + { + "epoch": 4.43084716796875e-05, + "step": 29038, + "training_step_time": 0.10523366928100586 + }, + { + "epoch": 4.430999755859375e-05, + "model_forward_time": 0.02457404136657715, + "step": 29039 + }, + { + "epoch": 4.430999755859375e-05, + "step": 29039, + "training_step_time": 0.12404322624206543 + }, + { + "epoch": 4.43115234375e-05, + "grad_norm": 0.22773705422878265, + "learning_rate": 2.796965966699927e-07, + "loss": 0.0053, + "step": 29040 + }, + { + "epoch": 4.43115234375e-05, + "model_forward_time": 0.025617599487304688, + "step": 29040 + }, + { + "epoch": 4.43115234375e-05, + "step": 29040, + "training_step_time": 0.22382831573486328 + }, + { + "epoch": 4.431304931640625e-05, + "model_forward_time": 0.02447342872619629, + "step": 29041 + }, + { + "epoch": 4.431304931640625e-05, + "step": 29041, + "training_step_time": 0.10941147804260254 + }, + { + "epoch": 4.43145751953125e-05, + "model_forward_time": 0.024534940719604492, + "step": 29042 + }, + { + "epoch": 4.43145751953125e-05, + "step": 29042, + "training_step_time": 0.10349750518798828 + }, + { + "epoch": 4.431610107421875e-05, + "model_forward_time": 0.025150299072265625, + "step": 29043 + }, + { + "epoch": 4.431610107421875e-05, + "step": 29043, + "training_step_time": 0.10471701622009277 + }, + { + "epoch": 4.4317626953125e-05, + "model_forward_time": 0.024787187576293945, + "step": 29044 + }, + { + "epoch": 4.4317626953125e-05, + "step": 29044, + "training_step_time": 0.10744404792785645 + }, + { + "epoch": 4.431915283203125e-05, + "model_forward_time": 0.025259733200073242, + "step": 29045 + }, + { + "epoch": 4.431915283203125e-05, + "step": 29045, + "training_step_time": 0.11159062385559082 + }, + { + "epoch": 4.43206787109375e-05, + "model_forward_time": 0.02506279945373535, + "step": 29046 + }, + { + "epoch": 4.43206787109375e-05, + "step": 29046, + "training_step_time": 0.1042473316192627 + }, + { + "epoch": 4.432220458984375e-05, + "model_forward_time": 0.025090456008911133, + "step": 29047 + }, + { + "epoch": 4.432220458984375e-05, + "step": 29047, + "training_step_time": 0.10808777809143066 + }, + { + "epoch": 4.432373046875e-05, + "model_forward_time": 0.025212764739990234, + "step": 29048 + }, + { + "epoch": 4.432373046875e-05, + "step": 29048, + "training_step_time": 0.12201237678527832 + }, + { + "epoch": 4.432525634765625e-05, + "model_forward_time": 0.02524542808532715, + "step": 29049 + }, + { + "epoch": 4.432525634765625e-05, + "step": 29049, + "training_step_time": 0.12130022048950195 + }, + { + "epoch": 4.43267822265625e-05, + "grad_norm": 0.023993385955691338, + "learning_rate": 2.7390523158633554e-07, + "loss": 0.0039, + "step": 29050 + }, + { + "epoch": 4.43267822265625e-05, + "model_forward_time": 0.02635478973388672, + "step": 29050 + }, + { + "epoch": 4.43267822265625e-05, + "step": 29050, + "training_step_time": 0.11919927597045898 + }, + { + "epoch": 4.432830810546875e-05, + "model_forward_time": 0.025228261947631836, + "step": 29051 + }, + { + "epoch": 4.432830810546875e-05, + "step": 29051, + "training_step_time": 0.12278008460998535 + }, + { + "epoch": 4.4329833984375e-05, + "model_forward_time": 0.025281429290771484, + "step": 29052 + }, + { + "epoch": 4.4329833984375e-05, + "step": 29052, + "training_step_time": 0.1181035041809082 + }, + { + "epoch": 4.433135986328125e-05, + "model_forward_time": 0.025684118270874023, + "step": 29053 + }, + { + "epoch": 4.433135986328125e-05, + "step": 29053, + "training_step_time": 0.11938238143920898 + }, + { + "epoch": 4.43328857421875e-05, + "model_forward_time": 0.026683568954467773, + "step": 29054 + }, + { + "epoch": 4.43328857421875e-05, + "step": 29054, + "training_step_time": 0.11470770835876465 + }, + { + "epoch": 4.433441162109375e-05, + "model_forward_time": 0.025034427642822266, + "step": 29055 + }, + { + "epoch": 4.433441162109375e-05, + "step": 29055, + "training_step_time": 0.10893440246582031 + }, + { + "epoch": 4.43359375e-05, + "model_forward_time": 0.025298118591308594, + "step": 29056 + }, + { + "epoch": 4.43359375e-05, + "step": 29056, + "training_step_time": 0.11025643348693848 + }, + { + "epoch": 4.433746337890625e-05, + "model_forward_time": 0.025495290756225586, + "step": 29057 + }, + { + "epoch": 4.433746337890625e-05, + "step": 29057, + "training_step_time": 0.10738062858581543 + }, + { + "epoch": 4.43389892578125e-05, + "model_forward_time": 0.02548050880432129, + "step": 29058 + }, + { + "epoch": 4.43389892578125e-05, + "step": 29058, + "training_step_time": 0.18467330932617188 + }, + { + "epoch": 4.434051513671875e-05, + "model_forward_time": 0.02475261688232422, + "step": 29059 + }, + { + "epoch": 4.434051513671875e-05, + "step": 29059, + "training_step_time": 0.11485576629638672 + }, + { + "epoch": 4.4342041015625e-05, + "grad_norm": 0.3404434323310852, + "learning_rate": 2.6817428839668315e-07, + "loss": 0.0048, + "step": 29060 + }, + { + "epoch": 4.4342041015625e-05, + "model_forward_time": 0.024493932723999023, + "step": 29060 + }, + { + "epoch": 4.4342041015625e-05, + "step": 29060, + "training_step_time": 0.12158823013305664 + }, + { + "epoch": 4.434356689453125e-05, + "model_forward_time": 0.025066614151000977, + "step": 29061 + }, + { + "epoch": 4.434356689453125e-05, + "step": 29061, + "training_step_time": 0.1555635929107666 + }, + { + "epoch": 4.43450927734375e-05, + "model_forward_time": 0.024604320526123047, + "step": 29062 + }, + { + "epoch": 4.43450927734375e-05, + "step": 29062, + "training_step_time": 0.1147608757019043 + }, + { + "epoch": 4.434661865234375e-05, + "model_forward_time": 0.02434372901916504, + "step": 29063 + }, + { + "epoch": 4.434661865234375e-05, + "step": 29063, + "training_step_time": 0.1413893699645996 + }, + { + "epoch": 4.434814453125e-05, + "model_forward_time": 0.024782180786132812, + "step": 29064 + }, + { + "epoch": 4.434814453125e-05, + "step": 29064, + "training_step_time": 0.1541309356689453 + }, + { + "epoch": 4.434967041015625e-05, + "model_forward_time": 0.02425861358642578, + "step": 29065 + }, + { + "epoch": 4.434967041015625e-05, + "step": 29065, + "training_step_time": 0.14547419548034668 + }, + { + "epoch": 4.43511962890625e-05, + "model_forward_time": 0.024445533752441406, + "step": 29066 + }, + { + "epoch": 4.43511962890625e-05, + "step": 29066, + "training_step_time": 0.13780736923217773 + }, + { + "epoch": 4.435272216796875e-05, + "model_forward_time": 0.024811267852783203, + "step": 29067 + }, + { + "epoch": 4.435272216796875e-05, + "step": 29067, + "training_step_time": 0.1019437313079834 + }, + { + "epoch": 4.4354248046875e-05, + "model_forward_time": 0.02548670768737793, + "step": 29068 + }, + { + "epoch": 4.4354248046875e-05, + "step": 29068, + "training_step_time": 0.11330342292785645 + }, + { + "epoch": 4.435577392578125e-05, + "model_forward_time": 0.02496194839477539, + "step": 29069 + }, + { + "epoch": 4.435577392578125e-05, + "step": 29069, + "training_step_time": 0.10464215278625488 + }, + { + "epoch": 4.43572998046875e-05, + "grad_norm": 0.08030667901039124, + "learning_rate": 2.625037740646763e-07, + "loss": 0.0042, + "step": 29070 + }, + { + "epoch": 4.43572998046875e-05, + "model_forward_time": 0.02514338493347168, + "step": 29070 + }, + { + "epoch": 4.43572998046875e-05, + "step": 29070, + "training_step_time": 0.10356783866882324 + }, + { + "epoch": 4.435882568359375e-05, + "model_forward_time": 0.025549650192260742, + "step": 29071 + }, + { + "epoch": 4.435882568359375e-05, + "step": 29071, + "training_step_time": 0.10477137565612793 + }, + { + "epoch": 4.43603515625e-05, + "model_forward_time": 0.0253448486328125, + "step": 29072 + }, + { + "epoch": 4.43603515625e-05, + "step": 29072, + "training_step_time": 0.10596418380737305 + }, + { + "epoch": 4.436187744140625e-05, + "model_forward_time": 0.025366783142089844, + "step": 29073 + }, + { + "epoch": 4.436187744140625e-05, + "step": 29073, + "training_step_time": 0.10829806327819824 + }, + { + "epoch": 4.43634033203125e-05, + "model_forward_time": 0.024848222732543945, + "step": 29074 + }, + { + "epoch": 4.43634033203125e-05, + "step": 29074, + "training_step_time": 0.18905878067016602 + }, + { + "epoch": 4.436492919921875e-05, + "model_forward_time": 0.024756431579589844, + "step": 29075 + }, + { + "epoch": 4.436492919921875e-05, + "step": 29075, + "training_step_time": 0.20671701431274414 + }, + { + "epoch": 4.4366455078125e-05, + "model_forward_time": 0.02454662322998047, + "step": 29076 + }, + { + "epoch": 4.4366455078125e-05, + "step": 29076, + "training_step_time": 0.21623730659484863 + }, + { + "epoch": 4.436798095703125e-05, + "model_forward_time": 0.0242459774017334, + "step": 29077 + }, + { + "epoch": 4.436798095703125e-05, + "step": 29077, + "training_step_time": 0.21062755584716797 + }, + { + "epoch": 4.43695068359375e-05, + "model_forward_time": 0.024140357971191406, + "step": 29078 + }, + { + "epoch": 4.43695068359375e-05, + "step": 29078, + "training_step_time": 0.20864343643188477 + }, + { + "epoch": 4.437103271484375e-05, + "model_forward_time": 0.024688243865966797, + "step": 29079 + }, + { + "epoch": 4.437103271484375e-05, + "step": 29079, + "training_step_time": 0.176499605178833 + }, + { + "epoch": 4.437255859375e-05, + "grad_norm": 0.05033090338110924, + "learning_rate": 2.568936954805201e-07, + "loss": 0.0042, + "step": 29080 + }, + { + "epoch": 4.437255859375e-05, + "model_forward_time": 0.024112462997436523, + "step": 29080 + }, + { + "epoch": 4.437255859375e-05, + "step": 29080, + "training_step_time": 0.16588997840881348 + }, + { + "epoch": 4.437408447265625e-05, + "model_forward_time": 0.02400946617126465, + "step": 29081 + }, + { + "epoch": 4.437408447265625e-05, + "step": 29081, + "training_step_time": 0.17953181266784668 + }, + { + "epoch": 4.43756103515625e-05, + "model_forward_time": 0.02435922622680664, + "step": 29082 + }, + { + "epoch": 4.43756103515625e-05, + "step": 29082, + "training_step_time": 0.13510823249816895 + }, + { + "epoch": 4.437713623046875e-05, + "model_forward_time": 0.024512529373168945, + "step": 29083 + }, + { + "epoch": 4.437713623046875e-05, + "step": 29083, + "training_step_time": 0.11664414405822754 + }, + { + "epoch": 4.4378662109375e-05, + "model_forward_time": 0.024792194366455078, + "step": 29084 + }, + { + "epoch": 4.4378662109375e-05, + "step": 29084, + "training_step_time": 0.10439538955688477 + }, + { + "epoch": 4.438018798828125e-05, + "model_forward_time": 0.024926185607910156, + "step": 29085 + }, + { + "epoch": 4.438018798828125e-05, + "step": 29085, + "training_step_time": 0.10226249694824219 + }, + { + "epoch": 4.43817138671875e-05, + "model_forward_time": 0.02522420883178711, + "step": 29086 + }, + { + "epoch": 4.43817138671875e-05, + "step": 29086, + "training_step_time": 0.10770225524902344 + }, + { + "epoch": 4.438323974609375e-05, + "model_forward_time": 0.025551557540893555, + "step": 29087 + }, + { + "epoch": 4.438323974609375e-05, + "step": 29087, + "training_step_time": 0.10296130180358887 + }, + { + "epoch": 4.4384765625e-05, + "model_forward_time": 0.0253450870513916, + "step": 29088 + }, + { + "epoch": 4.4384765625e-05, + "step": 29088, + "training_step_time": 0.10489773750305176 + }, + { + "epoch": 4.438629150390625e-05, + "model_forward_time": 0.02557659149169922, + "step": 29089 + }, + { + "epoch": 4.438629150390625e-05, + "step": 29089, + "training_step_time": 0.10745716094970703 + }, + { + "epoch": 4.43878173828125e-05, + "grad_norm": 0.03882180526852608, + "learning_rate": 2.51344059460995e-07, + "loss": 0.0029, + "step": 29090 + }, + { + "epoch": 4.43878173828125e-05, + "model_forward_time": 0.025041580200195312, + "step": 29090 + }, + { + "epoch": 4.43878173828125e-05, + "step": 29090, + "training_step_time": 0.1091463565826416 + }, + { + "epoch": 4.438934326171875e-05, + "model_forward_time": 0.025408029556274414, + "step": 29091 + }, + { + "epoch": 4.438934326171875e-05, + "step": 29091, + "training_step_time": 0.10446882247924805 + }, + { + "epoch": 4.4390869140625e-05, + "model_forward_time": 0.02408742904663086, + "step": 29092 + }, + { + "epoch": 4.4390869140625e-05, + "step": 29092, + "training_step_time": 0.12284255027770996 + }, + { + "epoch": 4.439239501953125e-05, + "model_forward_time": 0.02529764175415039, + "step": 29093 + }, + { + "epoch": 4.439239501953125e-05, + "step": 29093, + "training_step_time": 0.13942790031433105 + }, + { + "epoch": 4.43939208984375e-05, + "model_forward_time": 0.02500295639038086, + "step": 29094 + }, + { + "epoch": 4.43939208984375e-05, + "step": 29094, + "training_step_time": 0.14726018905639648 + }, + { + "epoch": 4.439544677734375e-05, + "model_forward_time": 0.024327993392944336, + "step": 29095 + }, + { + "epoch": 4.439544677734375e-05, + "step": 29095, + "training_step_time": 0.13877534866333008 + }, + { + "epoch": 4.439697265625e-05, + "model_forward_time": 0.024378538131713867, + "step": 29096 + }, + { + "epoch": 4.439697265625e-05, + "step": 29096, + "training_step_time": 0.13673067092895508 + }, + { + "epoch": 4.439849853515625e-05, + "model_forward_time": 0.024726390838623047, + "step": 29097 + }, + { + "epoch": 4.439849853515625e-05, + "step": 29097, + "training_step_time": 0.12851285934448242 + }, + { + "epoch": 4.44000244140625e-05, + "model_forward_time": 0.024462223052978516, + "step": 29098 + }, + { + "epoch": 4.44000244140625e-05, + "step": 29098, + "training_step_time": 0.12666034698486328 + }, + { + "epoch": 4.440155029296875e-05, + "model_forward_time": 0.024619340896606445, + "step": 29099 + }, + { + "epoch": 4.440155029296875e-05, + "step": 29099, + "training_step_time": 0.22264647483825684 + }, + { + "epoch": 4.4403076171875e-05, + "grad_norm": 0.08160519599914551, + "learning_rate": 2.458548727494292e-07, + "loss": 0.0076, + "step": 29100 + }, + { + "epoch": 4.4403076171875e-05, + "model_forward_time": 0.02480006217956543, + "step": 29100 + }, + { + "epoch": 4.4403076171875e-05, + "step": 29100, + "training_step_time": 0.1168978214263916 + }, + { + "epoch": 4.440460205078125e-05, + "model_forward_time": 0.025147676467895508, + "step": 29101 + }, + { + "epoch": 4.440460205078125e-05, + "step": 29101, + "training_step_time": 0.12009835243225098 + }, + { + "epoch": 4.44061279296875e-05, + "model_forward_time": 0.025388240814208984, + "step": 29102 + }, + { + "epoch": 4.44061279296875e-05, + "step": 29102, + "training_step_time": 0.1540079116821289 + }, + { + "epoch": 4.440765380859375e-05, + "model_forward_time": 0.025020122528076172, + "step": 29103 + }, + { + "epoch": 4.440765380859375e-05, + "step": 29103, + "training_step_time": 0.21747255325317383 + }, + { + "epoch": 4.44091796875e-05, + "model_forward_time": 0.024644136428833008, + "step": 29104 + }, + { + "epoch": 4.44091796875e-05, + "step": 29104, + "training_step_time": 0.17318153381347656 + }, + { + "epoch": 4.441070556640625e-05, + "model_forward_time": 0.023979902267456055, + "step": 29105 + }, + { + "epoch": 4.441070556640625e-05, + "step": 29105, + "training_step_time": 0.16699957847595215 + }, + { + "epoch": 4.44122314453125e-05, + "model_forward_time": 0.024883508682250977, + "step": 29106 + }, + { + "epoch": 4.44122314453125e-05, + "step": 29106, + "training_step_time": 0.1152486801147461 + }, + { + "epoch": 4.441375732421875e-05, + "model_forward_time": 0.02495098114013672, + "step": 29107 + }, + { + "epoch": 4.441375732421875e-05, + "step": 29107, + "training_step_time": 0.12934494018554688 + }, + { + "epoch": 4.4415283203125e-05, + "model_forward_time": 0.024727344512939453, + "step": 29108 + }, + { + "epoch": 4.4415283203125e-05, + "step": 29108, + "training_step_time": 0.19962835311889648 + }, + { + "epoch": 4.441680908203125e-05, + "model_forward_time": 0.02460193634033203, + "step": 29109 + }, + { + "epoch": 4.441680908203125e-05, + "step": 29109, + "training_step_time": 0.10294318199157715 + }, + { + "epoch": 4.44183349609375e-05, + "grad_norm": 0.04695097729563713, + "learning_rate": 2.404261420157039e-07, + "loss": 0.0026, + "step": 29110 + }, + { + "epoch": 4.44183349609375e-05, + "model_forward_time": 0.02520442008972168, + "step": 29110 + }, + { + "epoch": 4.44183349609375e-05, + "step": 29110, + "training_step_time": 0.10407471656799316 + }, + { + "epoch": 4.441986083984375e-05, + "model_forward_time": 0.025218963623046875, + "step": 29111 + }, + { + "epoch": 4.441986083984375e-05, + "step": 29111, + "training_step_time": 0.10590052604675293 + }, + { + "epoch": 4.442138671875e-05, + "model_forward_time": 0.02543020248413086, + "step": 29112 + }, + { + "epoch": 4.442138671875e-05, + "step": 29112, + "training_step_time": 0.10853838920593262 + }, + { + "epoch": 4.442291259765625e-05, + "model_forward_time": 0.024783849716186523, + "step": 29113 + }, + { + "epoch": 4.442291259765625e-05, + "step": 29113, + "training_step_time": 0.10422873497009277 + }, + { + "epoch": 4.44244384765625e-05, + "model_forward_time": 0.025430917739868164, + "step": 29114 + }, + { + "epoch": 4.44244384765625e-05, + "step": 29114, + "training_step_time": 0.11363792419433594 + }, + { + "epoch": 4.442596435546875e-05, + "model_forward_time": 0.025423765182495117, + "step": 29115 + }, + { + "epoch": 4.442596435546875e-05, + "step": 29115, + "training_step_time": 0.10458850860595703 + }, + { + "epoch": 4.4427490234375e-05, + "model_forward_time": 0.025220394134521484, + "step": 29116 + }, + { + "epoch": 4.4427490234375e-05, + "step": 29116, + "training_step_time": 0.10871195793151855 + }, + { + "epoch": 4.442901611328125e-05, + "model_forward_time": 0.025431394577026367, + "step": 29117 + }, + { + "epoch": 4.442901611328125e-05, + "step": 29117, + "training_step_time": 0.10499715805053711 + }, + { + "epoch": 4.44305419921875e-05, + "model_forward_time": 0.02538156509399414, + "step": 29118 + }, + { + "epoch": 4.44305419921875e-05, + "step": 29118, + "training_step_time": 0.21206164360046387 + }, + { + "epoch": 4.443206787109375e-05, + "model_forward_time": 0.02494192123413086, + "step": 29119 + }, + { + "epoch": 4.443206787109375e-05, + "step": 29119, + "training_step_time": 0.22455859184265137 + }, + { + "epoch": 4.443359375e-05, + "grad_norm": 0.07692018151283264, + "learning_rate": 2.3505787385623702e-07, + "loss": 0.0041, + "step": 29120 + }, + { + "epoch": 4.443359375e-05, + "model_forward_time": 0.024222373962402344, + "step": 29120 + }, + { + "epoch": 4.443359375e-05, + "step": 29120, + "training_step_time": 0.19524073600769043 + }, + { + "epoch": 4.443511962890625e-05, + "model_forward_time": 0.024816036224365234, + "step": 29121 + }, + { + "epoch": 4.443511962890625e-05, + "step": 29121, + "training_step_time": 0.15548396110534668 + }, + { + "epoch": 4.44366455078125e-05, + "model_forward_time": 0.02508068084716797, + "step": 29122 + }, + { + "epoch": 4.44366455078125e-05, + "step": 29122, + "training_step_time": 0.1475663185119629 + }, + { + "epoch": 4.443817138671875e-05, + "model_forward_time": 0.024451255798339844, + "step": 29123 + }, + { + "epoch": 4.443817138671875e-05, + "step": 29123, + "training_step_time": 0.11400413513183594 + }, + { + "epoch": 4.4439697265625e-05, + "model_forward_time": 0.024725675582885742, + "step": 29124 + }, + { + "epoch": 4.4439697265625e-05, + "step": 29124, + "training_step_time": 0.10689258575439453 + }, + { + "epoch": 4.444122314453125e-05, + "model_forward_time": 0.02590656280517578, + "step": 29125 + }, + { + "epoch": 4.444122314453125e-05, + "step": 29125, + "training_step_time": 0.13040876388549805 + }, + { + "epoch": 4.44427490234375e-05, + "model_forward_time": 0.02533555030822754, + "step": 29126 + }, + { + "epoch": 4.44427490234375e-05, + "step": 29126, + "training_step_time": 0.11995410919189453 + }, + { + "epoch": 4.444427490234375e-05, + "model_forward_time": 0.0250394344329834, + "step": 29127 + }, + { + "epoch": 4.444427490234375e-05, + "step": 29127, + "training_step_time": 0.1077110767364502 + }, + { + "epoch": 4.444580078125e-05, + "model_forward_time": 0.025464296340942383, + "step": 29128 + }, + { + "epoch": 4.444580078125e-05, + "step": 29128, + "training_step_time": 0.10610413551330566 + }, + { + "epoch": 4.444732666015625e-05, + "model_forward_time": 0.02468419075012207, + "step": 29129 + }, + { + "epoch": 4.444732666015625e-05, + "step": 29129, + "training_step_time": 0.10801529884338379 + }, + { + "epoch": 4.44488525390625e-05, + "grad_norm": 0.04510524496436119, + "learning_rate": 2.2975007479397738e-07, + "loss": 0.0021, + "step": 29130 + }, + { + "epoch": 4.44488525390625e-05, + "model_forward_time": 0.025274276733398438, + "step": 29130 + }, + { + "epoch": 4.44488525390625e-05, + "step": 29130, + "training_step_time": 0.11323881149291992 + }, + { + "epoch": 4.445037841796875e-05, + "model_forward_time": 0.025043725967407227, + "step": 29131 + }, + { + "epoch": 4.445037841796875e-05, + "step": 29131, + "training_step_time": 0.10579228401184082 + }, + { + "epoch": 4.4451904296875e-05, + "model_forward_time": 0.025148868560791016, + "step": 29132 + }, + { + "epoch": 4.4451904296875e-05, + "step": 29132, + "training_step_time": 0.10499954223632812 + }, + { + "epoch": 4.445343017578125e-05, + "model_forward_time": 0.02529120445251465, + "step": 29133 + }, + { + "epoch": 4.445343017578125e-05, + "step": 29133, + "training_step_time": 0.10580039024353027 + }, + { + "epoch": 4.44549560546875e-05, + "model_forward_time": 0.025068044662475586, + "step": 29134 + }, + { + "epoch": 4.44549560546875e-05, + "step": 29134, + "training_step_time": 0.1089785099029541 + }, + { + "epoch": 4.445648193359375e-05, + "model_forward_time": 0.0252530574798584, + "step": 29135 + }, + { + "epoch": 4.445648193359375e-05, + "step": 29135, + "training_step_time": 0.10535717010498047 + }, + { + "epoch": 4.44580078125e-05, + "model_forward_time": 0.02528071403503418, + "step": 29136 + }, + { + "epoch": 4.44580078125e-05, + "step": 29136, + "training_step_time": 0.11004066467285156 + }, + { + "epoch": 4.445953369140625e-05, + "model_forward_time": 0.025387048721313477, + "step": 29137 + }, + { + "epoch": 4.445953369140625e-05, + "step": 29137, + "training_step_time": 0.10559248924255371 + }, + { + "epoch": 4.44610595703125e-05, + "model_forward_time": 0.02553248405456543, + "step": 29138 + }, + { + "epoch": 4.44610595703125e-05, + "step": 29138, + "training_step_time": 0.107025146484375 + }, + { + "epoch": 4.446258544921875e-05, + "model_forward_time": 0.025373458862304688, + "step": 29139 + }, + { + "epoch": 4.446258544921875e-05, + "step": 29139, + "training_step_time": 0.10643148422241211 + }, + { + "epoch": 4.4464111328125e-05, + "grad_norm": 0.07037770748138428, + "learning_rate": 2.2450275127841036e-07, + "loss": 0.0036, + "step": 29140 + }, + { + "epoch": 4.4464111328125e-05, + "model_forward_time": 0.025453567504882812, + "step": 29140 + }, + { + "epoch": 4.4464111328125e-05, + "step": 29140, + "training_step_time": 0.10630440711975098 + }, + { + "epoch": 4.446563720703125e-05, + "model_forward_time": 0.025014877319335938, + "step": 29141 + }, + { + "epoch": 4.446563720703125e-05, + "step": 29141, + "training_step_time": 0.10526394844055176 + }, + { + "epoch": 4.44671630859375e-05, + "model_forward_time": 0.024930953979492188, + "step": 29142 + }, + { + "epoch": 4.44671630859375e-05, + "step": 29142, + "training_step_time": 0.10628724098205566 + }, + { + "epoch": 4.446868896484375e-05, + "model_forward_time": 0.02547430992126465, + "step": 29143 + }, + { + "epoch": 4.446868896484375e-05, + "step": 29143, + "training_step_time": 0.1433579921722412 + }, + { + "epoch": 4.447021484375e-05, + "model_forward_time": 0.025321245193481445, + "step": 29144 + }, + { + "epoch": 4.447021484375e-05, + "step": 29144, + "training_step_time": 0.11835265159606934 + }, + { + "epoch": 4.447174072265625e-05, + "model_forward_time": 0.02506279945373535, + "step": 29145 + }, + { + "epoch": 4.447174072265625e-05, + "step": 29145, + "training_step_time": 0.12581515312194824 + }, + { + "epoch": 4.44732666015625e-05, + "model_forward_time": 0.025551557540893555, + "step": 29146 + }, + { + "epoch": 4.44732666015625e-05, + "step": 29146, + "training_step_time": 0.15565061569213867 + }, + { + "epoch": 4.447479248046875e-05, + "model_forward_time": 0.02461409568786621, + "step": 29147 + }, + { + "epoch": 4.447479248046875e-05, + "step": 29147, + "training_step_time": 0.21603035926818848 + }, + { + "epoch": 4.4476318359375e-05, + "model_forward_time": 0.02523946762084961, + "step": 29148 + }, + { + "epoch": 4.4476318359375e-05, + "step": 29148, + "training_step_time": 0.11180758476257324 + }, + { + "epoch": 4.447784423828125e-05, + "model_forward_time": 0.024934768676757812, + "step": 29149 + }, + { + "epoch": 4.447784423828125e-05, + "step": 29149, + "training_step_time": 0.1334092617034912 + }, + { + "epoch": 4.44793701171875e-05, + "grad_norm": 0.04141829535365105, + "learning_rate": 2.1931590968551908e-07, + "loss": 0.0036, + "step": 29150 + }, + { + "epoch": 4.44793701171875e-05, + "model_forward_time": 0.025337696075439453, + "step": 29150 + }, + { + "epoch": 4.44793701171875e-05, + "step": 29150, + "training_step_time": 0.10977888107299805 + }, + { + "epoch": 4.448089599609375e-05, + "model_forward_time": 0.02550983428955078, + "step": 29151 + }, + { + "epoch": 4.448089599609375e-05, + "step": 29151, + "training_step_time": 0.12189102172851562 + }, + { + "epoch": 4.4482421875e-05, + "model_forward_time": 0.02548670768737793, + "step": 29152 + }, + { + "epoch": 4.4482421875e-05, + "step": 29152, + "training_step_time": 0.12258553504943848 + }, + { + "epoch": 4.448394775390625e-05, + "model_forward_time": 0.025317668914794922, + "step": 29153 + }, + { + "epoch": 4.448394775390625e-05, + "step": 29153, + "training_step_time": 0.11857938766479492 + }, + { + "epoch": 4.44854736328125e-05, + "model_forward_time": 0.025322914123535156, + "step": 29154 + }, + { + "epoch": 4.44854736328125e-05, + "step": 29154, + "training_step_time": 0.18203043937683105 + }, + { + "epoch": 4.448699951171875e-05, + "model_forward_time": 0.024985074996948242, + "step": 29155 + }, + { + "epoch": 4.448699951171875e-05, + "step": 29155, + "training_step_time": 0.10677886009216309 + }, + { + "epoch": 4.4488525390625e-05, + "model_forward_time": 0.025141477584838867, + "step": 29156 + }, + { + "epoch": 4.4488525390625e-05, + "step": 29156, + "training_step_time": 0.10850119590759277 + }, + { + "epoch": 4.449005126953125e-05, + "model_forward_time": 0.025155305862426758, + "step": 29157 + }, + { + "epoch": 4.449005126953125e-05, + "step": 29157, + "training_step_time": 0.1083366870880127 + }, + { + "epoch": 4.44915771484375e-05, + "model_forward_time": 0.025179386138916016, + "step": 29158 + }, + { + "epoch": 4.44915771484375e-05, + "step": 29158, + "training_step_time": 0.10811495780944824 + }, + { + "epoch": 4.449310302734375e-05, + "model_forward_time": 0.025850296020507812, + "step": 29159 + }, + { + "epoch": 4.449310302734375e-05, + "step": 29159, + "training_step_time": 0.11030793190002441 + }, + { + "epoch": 4.449462890625e-05, + "grad_norm": 0.056449707597494125, + "learning_rate": 2.1418955631781202e-07, + "loss": 0.008, + "step": 29160 + }, + { + "epoch": 4.449462890625e-05, + "model_forward_time": 0.025135278701782227, + "step": 29160 + }, + { + "epoch": 4.449462890625e-05, + "step": 29160, + "training_step_time": 0.1101830005645752 + }, + { + "epoch": 4.449615478515625e-05, + "model_forward_time": 0.02523040771484375, + "step": 29161 + }, + { + "epoch": 4.449615478515625e-05, + "step": 29161, + "training_step_time": 0.10569095611572266 + }, + { + "epoch": 4.44976806640625e-05, + "model_forward_time": 0.025432109832763672, + "step": 29162 + }, + { + "epoch": 4.44976806640625e-05, + "step": 29162, + "training_step_time": 0.11112070083618164 + }, + { + "epoch": 4.449920654296875e-05, + "model_forward_time": 0.025574922561645508, + "step": 29163 + }, + { + "epoch": 4.449920654296875e-05, + "step": 29163, + "training_step_time": 0.1077265739440918 + }, + { + "epoch": 4.4500732421875e-05, + "model_forward_time": 0.025356054306030273, + "step": 29164 + }, + { + "epoch": 4.4500732421875e-05, + "step": 29164, + "training_step_time": 0.1809539794921875 + }, + { + "epoch": 4.450225830078125e-05, + "model_forward_time": 0.024548768997192383, + "step": 29165 + }, + { + "epoch": 4.450225830078125e-05, + "step": 29165, + "training_step_time": 0.20938801765441895 + }, + { + "epoch": 4.45037841796875e-05, + "model_forward_time": 0.024847745895385742, + "step": 29166 + }, + { + "epoch": 4.45037841796875e-05, + "step": 29166, + "training_step_time": 0.12308073043823242 + }, + { + "epoch": 4.450531005859375e-05, + "model_forward_time": 0.024587631225585938, + "step": 29167 + }, + { + "epoch": 4.450531005859375e-05, + "step": 29167, + "training_step_time": 0.14258837699890137 + }, + { + "epoch": 4.45068359375e-05, + "model_forward_time": 0.024997711181640625, + "step": 29168 + }, + { + "epoch": 4.45068359375e-05, + "step": 29168, + "training_step_time": 0.20114874839782715 + }, + { + "epoch": 4.450836181640625e-05, + "model_forward_time": 0.024848222732543945, + "step": 29169 + }, + { + "epoch": 4.450836181640625e-05, + "step": 29169, + "training_step_time": 0.15875792503356934 + }, + { + "epoch": 4.45098876953125e-05, + "grad_norm": 0.33454886078834534, + "learning_rate": 2.0912369740428983e-07, + "loss": 0.0097, + "step": 29170 + }, + { + "epoch": 4.45098876953125e-05, + "model_forward_time": 0.024692773818969727, + "step": 29170 + }, + { + "epoch": 4.45098876953125e-05, + "step": 29170, + "training_step_time": 0.10991859436035156 + }, + { + "epoch": 4.451141357421875e-05, + "model_forward_time": 0.024778127670288086, + "step": 29171 + }, + { + "epoch": 4.451141357421875e-05, + "step": 29171, + "training_step_time": 0.10628128051757812 + }, + { + "epoch": 4.4512939453125e-05, + "model_forward_time": 0.02541637420654297, + "step": 29172 + }, + { + "epoch": 4.4512939453125e-05, + "step": 29172, + "training_step_time": 0.11589574813842773 + }, + { + "epoch": 4.451446533203125e-05, + "model_forward_time": 0.02544879913330078, + "step": 29173 + }, + { + "epoch": 4.451446533203125e-05, + "step": 29173, + "training_step_time": 0.10677027702331543 + }, + { + "epoch": 4.45159912109375e-05, + "model_forward_time": 0.02527594566345215, + "step": 29174 + }, + { + "epoch": 4.45159912109375e-05, + "step": 29174, + "training_step_time": 0.10689544677734375 + }, + { + "epoch": 4.451751708984375e-05, + "model_forward_time": 0.02545166015625, + "step": 29175 + }, + { + "epoch": 4.451751708984375e-05, + "step": 29175, + "training_step_time": 0.10645008087158203 + }, + { + "epoch": 4.451904296875e-05, + "model_forward_time": 0.025403499603271484, + "step": 29176 + }, + { + "epoch": 4.451904296875e-05, + "step": 29176, + "training_step_time": 0.10621428489685059 + }, + { + "epoch": 4.452056884765625e-05, + "model_forward_time": 0.025614023208618164, + "step": 29177 + }, + { + "epoch": 4.452056884765625e-05, + "step": 29177, + "training_step_time": 0.10514426231384277 + }, + { + "epoch": 4.45220947265625e-05, + "model_forward_time": 0.02497076988220215, + "step": 29178 + }, + { + "epoch": 4.45220947265625e-05, + "step": 29178, + "training_step_time": 0.10712051391601562 + }, + { + "epoch": 4.452362060546875e-05, + "model_forward_time": 0.025425195693969727, + "step": 29179 + }, + { + "epoch": 4.452362060546875e-05, + "step": 29179, + "training_step_time": 0.10726785659790039 + }, + { + "epoch": 4.4525146484375e-05, + "grad_norm": 0.07406570762395859, + "learning_rate": 2.041183391004453e-07, + "loss": 0.0046, + "step": 29180 + }, + { + "epoch": 4.4525146484375e-05, + "model_forward_time": 0.02498030662536621, + "step": 29180 + }, + { + "epoch": 4.4525146484375e-05, + "step": 29180, + "training_step_time": 0.10393023490905762 + }, + { + "epoch": 4.452667236328125e-05, + "model_forward_time": 0.02504110336303711, + "step": 29181 + }, + { + "epoch": 4.452667236328125e-05, + "step": 29181, + "training_step_time": 0.1090540885925293 + }, + { + "epoch": 4.45281982421875e-05, + "model_forward_time": 0.025954484939575195, + "step": 29182 + }, + { + "epoch": 4.45281982421875e-05, + "step": 29182, + "training_step_time": 0.10615181922912598 + }, + { + "epoch": 4.452972412109375e-05, + "model_forward_time": 0.025450944900512695, + "step": 29183 + }, + { + "epoch": 4.452972412109375e-05, + "step": 29183, + "training_step_time": 0.10523056983947754 + }, + { + "epoch": 4.453125e-05, + "model_forward_time": 0.0277864933013916, + "step": 29184 + }, + { + "epoch": 4.453125e-05, + "step": 29184, + "training_step_time": 0.10680460929870605 + }, + { + "epoch": 4.453277587890625e-05, + "model_forward_time": 0.02624225616455078, + "step": 29185 + }, + { + "epoch": 4.453277587890625e-05, + "step": 29185, + "training_step_time": 0.10616755485534668 + }, + { + "epoch": 4.45343017578125e-05, + "model_forward_time": 0.025322437286376953, + "step": 29186 + }, + { + "epoch": 4.45343017578125e-05, + "step": 29186, + "training_step_time": 0.10539865493774414 + }, + { + "epoch": 4.453582763671875e-05, + "model_forward_time": 0.025300979614257812, + "step": 29187 + }, + { + "epoch": 4.453582763671875e-05, + "step": 29187, + "training_step_time": 0.10503077507019043 + }, + { + "epoch": 4.4537353515625e-05, + "model_forward_time": 0.02532029151916504, + "step": 29188 + }, + { + "epoch": 4.4537353515625e-05, + "step": 29188, + "training_step_time": 0.10577821731567383 + }, + { + "epoch": 4.453887939453125e-05, + "model_forward_time": 0.025055408477783203, + "step": 29189 + }, + { + "epoch": 4.453887939453125e-05, + "step": 29189, + "training_step_time": 0.12801909446716309 + }, + { + "epoch": 4.45404052734375e-05, + "grad_norm": 0.16133560240268707, + "learning_rate": 1.9917348748826335e-07, + "loss": 0.0054, + "step": 29190 + }, + { + "epoch": 4.45404052734375e-05, + "model_forward_time": 0.027102231979370117, + "step": 29190 + }, + { + "epoch": 4.45404052734375e-05, + "step": 29190, + "training_step_time": 0.11545085906982422 + }, + { + "epoch": 4.454193115234375e-05, + "model_forward_time": 0.025290489196777344, + "step": 29191 + }, + { + "epoch": 4.454193115234375e-05, + "step": 29191, + "training_step_time": 0.11910319328308105 + }, + { + "epoch": 4.454345703125e-05, + "model_forward_time": 0.02520918846130371, + "step": 29192 + }, + { + "epoch": 4.454345703125e-05, + "step": 29192, + "training_step_time": 0.13320589065551758 + }, + { + "epoch": 4.454498291015625e-05, + "model_forward_time": 0.0253756046295166, + "step": 29193 + }, + { + "epoch": 4.454498291015625e-05, + "step": 29193, + "training_step_time": 0.10840129852294922 + }, + { + "epoch": 4.45465087890625e-05, + "model_forward_time": 0.02539539337158203, + "step": 29194 + }, + { + "epoch": 4.45465087890625e-05, + "step": 29194, + "training_step_time": 0.13821911811828613 + }, + { + "epoch": 4.454803466796875e-05, + "model_forward_time": 0.025503873825073242, + "step": 29195 + }, + { + "epoch": 4.454803466796875e-05, + "step": 29195, + "training_step_time": 0.10474467277526855 + }, + { + "epoch": 4.4549560546875e-05, + "model_forward_time": 0.024520397186279297, + "step": 29196 + }, + { + "epoch": 4.4549560546875e-05, + "step": 29196, + "training_step_time": 0.14089322090148926 + }, + { + "epoch": 4.455108642578125e-05, + "model_forward_time": 0.02516460418701172, + "step": 29197 + }, + { + "epoch": 4.455108642578125e-05, + "step": 29197, + "training_step_time": 0.11853504180908203 + }, + { + "epoch": 4.45526123046875e-05, + "model_forward_time": 0.024899959564208984, + "step": 29198 + }, + { + "epoch": 4.45526123046875e-05, + "step": 29198, + "training_step_time": 0.19440293312072754 + }, + { + "epoch": 4.455413818359375e-05, + "model_forward_time": 0.024455547332763672, + "step": 29199 + }, + { + "epoch": 4.455413818359375e-05, + "step": 29199, + "training_step_time": 0.1593494415283203 + }, + { + "epoch": 4.45556640625e-05, + "grad_norm": 0.04935256764292717, + "learning_rate": 1.942891485762044e-07, + "loss": 0.0042, + "step": 29200 + }, + { + "epoch": 4.45556640625e-05, + "model_forward_time": 0.024286746978759766, + "step": 29200 + }, + { + "epoch": 4.45556640625e-05, + "step": 29200, + "training_step_time": 0.20178890228271484 + }, + { + "epoch": 4.455718994140625e-05, + "model_forward_time": 0.02457714080810547, + "step": 29201 + }, + { + "epoch": 4.455718994140625e-05, + "step": 29201, + "training_step_time": 0.14940643310546875 + }, + { + "epoch": 4.45587158203125e-05, + "model_forward_time": 0.024638652801513672, + "step": 29202 + }, + { + "epoch": 4.45587158203125e-05, + "step": 29202, + "training_step_time": 0.1255781650543213 + }, + { + "epoch": 4.456024169921875e-05, + "model_forward_time": 0.024190187454223633, + "step": 29203 + }, + { + "epoch": 4.456024169921875e-05, + "step": 29203, + "training_step_time": 0.12446022033691406 + }, + { + "epoch": 4.4561767578125e-05, + "model_forward_time": 0.024898529052734375, + "step": 29204 + }, + { + "epoch": 4.4561767578125e-05, + "step": 29204, + "training_step_time": 0.12291383743286133 + }, + { + "epoch": 4.456329345703125e-05, + "model_forward_time": 0.0251162052154541, + "step": 29205 + }, + { + "epoch": 4.456329345703125e-05, + "step": 29205, + "training_step_time": 0.11806607246398926 + }, + { + "epoch": 4.45648193359375e-05, + "model_forward_time": 0.025507211685180664, + "step": 29206 + }, + { + "epoch": 4.45648193359375e-05, + "step": 29206, + "training_step_time": 0.1146240234375 + }, + { + "epoch": 4.456634521484375e-05, + "model_forward_time": 0.025272130966186523, + "step": 29207 + }, + { + "epoch": 4.456634521484375e-05, + "step": 29207, + "training_step_time": 0.11224746704101562 + }, + { + "epoch": 4.456787109375e-05, + "model_forward_time": 0.02498340606689453, + "step": 29208 + }, + { + "epoch": 4.456787109375e-05, + "step": 29208, + "training_step_time": 0.10706496238708496 + }, + { + "epoch": 4.456939697265625e-05, + "model_forward_time": 0.024964570999145508, + "step": 29209 + }, + { + "epoch": 4.456939697265625e-05, + "step": 29209, + "training_step_time": 0.20986461639404297 + }, + { + "epoch": 4.45709228515625e-05, + "grad_norm": 0.30231958627700806, + "learning_rate": 1.8946532829920426e-07, + "loss": 0.0043, + "step": 29210 + }, + { + "epoch": 4.45709228515625e-05, + "model_forward_time": 0.024324417114257812, + "step": 29210 + }, + { + "epoch": 4.45709228515625e-05, + "step": 29210, + "training_step_time": 0.20853281021118164 + }, + { + "epoch": 4.457244873046875e-05, + "model_forward_time": 0.024558544158935547, + "step": 29211 + }, + { + "epoch": 4.457244873046875e-05, + "step": 29211, + "training_step_time": 0.21250414848327637 + }, + { + "epoch": 4.4573974609375e-05, + "model_forward_time": 0.02430438995361328, + "step": 29212 + }, + { + "epoch": 4.4573974609375e-05, + "step": 29212, + "training_step_time": 0.11509418487548828 + }, + { + "epoch": 4.457550048828125e-05, + "model_forward_time": 0.02454090118408203, + "step": 29213 + }, + { + "epoch": 4.457550048828125e-05, + "step": 29213, + "training_step_time": 0.11182641983032227 + }, + { + "epoch": 4.45770263671875e-05, + "model_forward_time": 0.025635242462158203, + "step": 29214 + }, + { + "epoch": 4.45770263671875e-05, + "step": 29214, + "training_step_time": 0.14321184158325195 + }, + { + "epoch": 4.457855224609375e-05, + "model_forward_time": 0.02536749839782715, + "step": 29215 + }, + { + "epoch": 4.457855224609375e-05, + "step": 29215, + "training_step_time": 0.15502190589904785 + }, + { + "epoch": 4.4580078125e-05, + "model_forward_time": 0.024321794509887695, + "step": 29216 + }, + { + "epoch": 4.4580078125e-05, + "step": 29216, + "training_step_time": 0.12062668800354004 + }, + { + "epoch": 4.458160400390625e-05, + "model_forward_time": 0.024708271026611328, + "step": 29217 + }, + { + "epoch": 4.458160400390625e-05, + "step": 29217, + "training_step_time": 0.10248684883117676 + }, + { + "epoch": 4.45831298828125e-05, + "model_forward_time": 0.025120019912719727, + "step": 29218 + }, + { + "epoch": 4.45831298828125e-05, + "step": 29218, + "training_step_time": 0.10790634155273438 + }, + { + "epoch": 4.458465576171875e-05, + "model_forward_time": 0.025448322296142578, + "step": 29219 + }, + { + "epoch": 4.458465576171875e-05, + "step": 29219, + "training_step_time": 0.10582756996154785 + }, + { + "epoch": 4.4586181640625e-05, + "grad_norm": 0.04679938778281212, + "learning_rate": 1.847020325186577e-07, + "loss": 0.0103, + "step": 29220 + }, + { + "epoch": 4.4586181640625e-05, + "model_forward_time": 0.02504873275756836, + "step": 29220 + }, + { + "epoch": 4.4586181640625e-05, + "step": 29220, + "training_step_time": 0.10419988632202148 + }, + { + "epoch": 4.458770751953125e-05, + "model_forward_time": 0.025397539138793945, + "step": 29221 + }, + { + "epoch": 4.458770751953125e-05, + "step": 29221, + "training_step_time": 0.10366654396057129 + }, + { + "epoch": 4.45892333984375e-05, + "model_forward_time": 0.024834156036376953, + "step": 29222 + }, + { + "epoch": 4.45892333984375e-05, + "step": 29222, + "training_step_time": 0.10530328750610352 + }, + { + "epoch": 4.459075927734375e-05, + "model_forward_time": 0.025173425674438477, + "step": 29223 + }, + { + "epoch": 4.459075927734375e-05, + "step": 29223, + "training_step_time": 0.10408830642700195 + }, + { + "epoch": 4.459228515625e-05, + "model_forward_time": 0.025326251983642578, + "step": 29224 + }, + { + "epoch": 4.459228515625e-05, + "step": 29224, + "training_step_time": 0.10784053802490234 + }, + { + "epoch": 4.459381103515625e-05, + "model_forward_time": 0.02575850486755371, + "step": 29225 + }, + { + "epoch": 4.459381103515625e-05, + "step": 29225, + "training_step_time": 0.105438232421875 + }, + { + "epoch": 4.45953369140625e-05, + "model_forward_time": 0.025055646896362305, + "step": 29226 + }, + { + "epoch": 4.45953369140625e-05, + "step": 29226, + "training_step_time": 0.10932159423828125 + }, + { + "epoch": 4.459686279296875e-05, + "model_forward_time": 0.024958133697509766, + "step": 29227 + }, + { + "epoch": 4.459686279296875e-05, + "step": 29227, + "training_step_time": 0.10515761375427246 + }, + { + "epoch": 4.4598388671875e-05, + "model_forward_time": 0.02505660057067871, + "step": 29228 + }, + { + "epoch": 4.4598388671875e-05, + "step": 29228, + "training_step_time": 0.10516762733459473 + }, + { + "epoch": 4.459991455078125e-05, + "model_forward_time": 0.025411367416381836, + "step": 29229 + }, + { + "epoch": 4.459991455078125e-05, + "step": 29229, + "training_step_time": 0.10290980339050293 + }, + { + "epoch": 4.46014404296875e-05, + "grad_norm": 0.047037262469530106, + "learning_rate": 1.799992670224182e-07, + "loss": 0.003, + "step": 29230 + }, + { + "epoch": 4.46014404296875e-05, + "model_forward_time": 0.02561664581298828, + "step": 29230 + }, + { + "epoch": 4.46014404296875e-05, + "step": 29230, + "training_step_time": 0.1042780876159668 + }, + { + "epoch": 4.460296630859375e-05, + "model_forward_time": 0.02519369125366211, + "step": 29231 + }, + { + "epoch": 4.460296630859375e-05, + "step": 29231, + "training_step_time": 0.10624241828918457 + }, + { + "epoch": 4.46044921875e-05, + "model_forward_time": 0.02541041374206543, + "step": 29232 + }, + { + "epoch": 4.46044921875e-05, + "step": 29232, + "training_step_time": 0.10304951667785645 + }, + { + "epoch": 4.460601806640625e-05, + "model_forward_time": 0.02568507194519043, + "step": 29233 + }, + { + "epoch": 4.460601806640625e-05, + "step": 29233, + "training_step_time": 0.22035741806030273 + }, + { + "epoch": 4.46075439453125e-05, + "model_forward_time": 0.02461099624633789, + "step": 29234 + }, + { + "epoch": 4.46075439453125e-05, + "step": 29234, + "training_step_time": 0.12034082412719727 + }, + { + "epoch": 4.460906982421875e-05, + "model_forward_time": 0.024698257446289062, + "step": 29235 + }, + { + "epoch": 4.460906982421875e-05, + "step": 29235, + "training_step_time": 0.1255486011505127 + }, + { + "epoch": 4.4610595703125e-05, + "model_forward_time": 0.025273799896240234, + "step": 29236 + }, + { + "epoch": 4.4610595703125e-05, + "step": 29236, + "training_step_time": 0.20974516868591309 + }, + { + "epoch": 4.461212158203125e-05, + "model_forward_time": 0.024103403091430664, + "step": 29237 + }, + { + "epoch": 4.461212158203125e-05, + "step": 29237, + "training_step_time": 0.22941160202026367 + }, + { + "epoch": 4.46136474609375e-05, + "model_forward_time": 0.02493143081665039, + "step": 29238 + }, + { + "epoch": 4.46136474609375e-05, + "step": 29238, + "training_step_time": 0.17444443702697754 + }, + { + "epoch": 4.461517333984375e-05, + "model_forward_time": 0.025133848190307617, + "step": 29239 + }, + { + "epoch": 4.461517333984375e-05, + "step": 29239, + "training_step_time": 0.18807530403137207 + }, + { + "epoch": 4.461669921875e-05, + "grad_norm": 0.06753551214933395, + "learning_rate": 1.753570375247815e-07, + "loss": 0.0103, + "step": 29240 + }, + { + "epoch": 4.461669921875e-05, + "model_forward_time": 0.024422645568847656, + "step": 29240 + }, + { + "epoch": 4.461669921875e-05, + "step": 29240, + "training_step_time": 0.17309999465942383 + }, + { + "epoch": 4.461822509765625e-05, + "model_forward_time": 0.024614572525024414, + "step": 29241 + }, + { + "epoch": 4.461822509765625e-05, + "step": 29241, + "training_step_time": 0.1656970977783203 + }, + { + "epoch": 4.46197509765625e-05, + "model_forward_time": 0.024381399154663086, + "step": 29242 + }, + { + "epoch": 4.46197509765625e-05, + "step": 29242, + "training_step_time": 0.13591575622558594 + }, + { + "epoch": 4.462127685546875e-05, + "model_forward_time": 0.024503707885742188, + "step": 29243 + }, + { + "epoch": 4.462127685546875e-05, + "step": 29243, + "training_step_time": 0.12748312950134277 + }, + { + "epoch": 4.4622802734375e-05, + "model_forward_time": 0.024832963943481445, + "step": 29244 + }, + { + "epoch": 4.4622802734375e-05, + "step": 29244, + "training_step_time": 0.11878538131713867 + }, + { + "epoch": 4.462432861328125e-05, + "model_forward_time": 0.0252227783203125, + "step": 29245 + }, + { + "epoch": 4.462432861328125e-05, + "step": 29245, + "training_step_time": 0.11771202087402344 + }, + { + "epoch": 4.46258544921875e-05, + "model_forward_time": 0.025090932846069336, + "step": 29246 + }, + { + "epoch": 4.46258544921875e-05, + "step": 29246, + "training_step_time": 0.11478972434997559 + }, + { + "epoch": 4.462738037109375e-05, + "model_forward_time": 0.025682449340820312, + "step": 29247 + }, + { + "epoch": 4.462738037109375e-05, + "step": 29247, + "training_step_time": 0.11609292030334473 + }, + { + "epoch": 4.462890625e-05, + "model_forward_time": 0.02508091926574707, + "step": 29248 + }, + { + "epoch": 4.462890625e-05, + "step": 29248, + "training_step_time": 0.10498404502868652 + }, + { + "epoch": 4.463043212890625e-05, + "model_forward_time": 0.02520895004272461, + "step": 29249 + }, + { + "epoch": 4.463043212890625e-05, + "step": 29249, + "training_step_time": 0.10349464416503906 + }, + { + "epoch": 4.46319580078125e-05, + "grad_norm": 0.08164948225021362, + "learning_rate": 1.7077534966650766e-07, + "loss": 0.0029, + "step": 29250 + }, + { + "epoch": 4.46319580078125e-05, + "model_forward_time": 0.025195598602294922, + "step": 29250 + }, + { + "epoch": 4.46319580078125e-05, + "step": 29250, + "training_step_time": 0.1051490306854248 + }, + { + "epoch": 4.463348388671875e-05, + "model_forward_time": 0.025407075881958008, + "step": 29251 + }, + { + "epoch": 4.463348388671875e-05, + "step": 29251, + "training_step_time": 0.10389518737792969 + }, + { + "epoch": 4.4635009765625e-05, + "model_forward_time": 0.02539229393005371, + "step": 29252 + }, + { + "epoch": 4.4635009765625e-05, + "step": 29252, + "training_step_time": 0.1061701774597168 + }, + { + "epoch": 4.463653564453125e-05, + "model_forward_time": 0.025336265563964844, + "step": 29253 + }, + { + "epoch": 4.463653564453125e-05, + "step": 29253, + "training_step_time": 0.13142704963684082 + }, + { + "epoch": 4.46380615234375e-05, + "model_forward_time": 0.024914026260375977, + "step": 29254 + }, + { + "epoch": 4.46380615234375e-05, + "step": 29254, + "training_step_time": 0.10991072654724121 + }, + { + "epoch": 4.463958740234375e-05, + "model_forward_time": 0.026196956634521484, + "step": 29255 + }, + { + "epoch": 4.463958740234375e-05, + "step": 29255, + "training_step_time": 0.11872339248657227 + }, + { + "epoch": 4.464111328125e-05, + "model_forward_time": 0.025101661682128906, + "step": 29256 + }, + { + "epoch": 4.464111328125e-05, + "step": 29256, + "training_step_time": 0.13953566551208496 + }, + { + "epoch": 4.464263916015625e-05, + "model_forward_time": 0.02496814727783203, + "step": 29257 + }, + { + "epoch": 4.464263916015625e-05, + "step": 29257, + "training_step_time": 0.15061140060424805 + }, + { + "epoch": 4.46441650390625e-05, + "model_forward_time": 0.024850130081176758, + "step": 29258 + }, + { + "epoch": 4.46441650390625e-05, + "step": 29258, + "training_step_time": 0.19939279556274414 + }, + { + "epoch": 4.464569091796875e-05, + "model_forward_time": 0.024552583694458008, + "step": 29259 + }, + { + "epoch": 4.464569091796875e-05, + "step": 29259, + "training_step_time": 0.17019081115722656 + }, + { + "epoch": 4.4647216796875e-05, + "grad_norm": 0.044074781239032745, + "learning_rate": 1.662542090147712e-07, + "loss": 0.0053, + "step": 29260 + }, + { + "epoch": 4.4647216796875e-05, + "model_forward_time": 0.025516986846923828, + "step": 29260 + }, + { + "epoch": 4.4647216796875e-05, + "step": 29260, + "training_step_time": 0.10477018356323242 + }, + { + "epoch": 4.464874267578125e-05, + "model_forward_time": 0.024930715560913086, + "step": 29261 + }, + { + "epoch": 4.464874267578125e-05, + "step": 29261, + "training_step_time": 0.10395383834838867 + }, + { + "epoch": 4.46502685546875e-05, + "model_forward_time": 0.0251162052154541, + "step": 29262 + }, + { + "epoch": 4.46502685546875e-05, + "step": 29262, + "training_step_time": 0.10530376434326172 + }, + { + "epoch": 4.465179443359375e-05, + "model_forward_time": 0.025175094604492188, + "step": 29263 + }, + { + "epoch": 4.465179443359375e-05, + "step": 29263, + "training_step_time": 0.10660362243652344 + }, + { + "epoch": 4.46533203125e-05, + "model_forward_time": 0.025259971618652344, + "step": 29264 + }, + { + "epoch": 4.46533203125e-05, + "step": 29264, + "training_step_time": 0.10516238212585449 + }, + { + "epoch": 4.465484619140625e-05, + "model_forward_time": 0.025018692016601562, + "step": 29265 + }, + { + "epoch": 4.465484619140625e-05, + "step": 29265, + "training_step_time": 0.107025146484375 + }, + { + "epoch": 4.46563720703125e-05, + "model_forward_time": 0.025271892547607422, + "step": 29266 + }, + { + "epoch": 4.46563720703125e-05, + "step": 29266, + "training_step_time": 0.10708355903625488 + }, + { + "epoch": 4.465789794921875e-05, + "model_forward_time": 0.025465965270996094, + "step": 29267 + }, + { + "epoch": 4.465789794921875e-05, + "step": 29267, + "training_step_time": 0.10401201248168945 + }, + { + "epoch": 4.4659423828125e-05, + "model_forward_time": 0.025221586227416992, + "step": 29268 + }, + { + "epoch": 4.4659423828125e-05, + "step": 29268, + "training_step_time": 0.10880565643310547 + }, + { + "epoch": 4.466094970703125e-05, + "model_forward_time": 0.02512955665588379, + "step": 29269 + }, + { + "epoch": 4.466094970703125e-05, + "step": 29269, + "training_step_time": 0.1041872501373291 + }, + { + "epoch": 4.46624755859375e-05, + "grad_norm": 0.04014989733695984, + "learning_rate": 1.6179362106318874e-07, + "loss": 0.0035, + "step": 29270 + }, + { + "epoch": 4.46624755859375e-05, + "model_forward_time": 0.025320053100585938, + "step": 29270 + }, + { + "epoch": 4.46624755859375e-05, + "step": 29270, + "training_step_time": 0.10244536399841309 + }, + { + "epoch": 4.466400146484375e-05, + "model_forward_time": 0.025294065475463867, + "step": 29271 + }, + { + "epoch": 4.466400146484375e-05, + "step": 29271, + "training_step_time": 0.10302543640136719 + }, + { + "epoch": 4.466552734375e-05, + "model_forward_time": 0.025515079498291016, + "step": 29272 + }, + { + "epoch": 4.466552734375e-05, + "step": 29272, + "training_step_time": 0.10659074783325195 + }, + { + "epoch": 4.466705322265625e-05, + "model_forward_time": 0.025066375732421875, + "step": 29273 + }, + { + "epoch": 4.466705322265625e-05, + "step": 29273, + "training_step_time": 0.10381865501403809 + }, + { + "epoch": 4.46685791015625e-05, + "model_forward_time": 0.02525162696838379, + "step": 29274 + }, + { + "epoch": 4.46685791015625e-05, + "step": 29274, + "training_step_time": 0.10404229164123535 + }, + { + "epoch": 4.467010498046875e-05, + "model_forward_time": 0.02462291717529297, + "step": 29275 + }, + { + "epoch": 4.467010498046875e-05, + "step": 29275, + "training_step_time": 0.10335850715637207 + }, + { + "epoch": 4.4671630859375e-05, + "model_forward_time": 0.02446913719177246, + "step": 29276 + }, + { + "epoch": 4.4671630859375e-05, + "step": 29276, + "training_step_time": 0.10891079902648926 + }, + { + "epoch": 4.467315673828125e-05, + "model_forward_time": 0.02479100227355957, + "step": 29277 + }, + { + "epoch": 4.467315673828125e-05, + "step": 29277, + "training_step_time": 0.20103740692138672 + }, + { + "epoch": 4.46746826171875e-05, + "model_forward_time": 0.0239255428314209, + "step": 29278 + }, + { + "epoch": 4.46746826171875e-05, + "step": 29278, + "training_step_time": 0.1177988052368164 + }, + { + "epoch": 4.467620849609375e-05, + "model_forward_time": 0.024171113967895508, + "step": 29279 + }, + { + "epoch": 4.467620849609375e-05, + "step": 29279, + "training_step_time": 0.1255195140838623 + }, + { + "epoch": 4.4677734375e-05, + "grad_norm": 0.13707120716571808, + "learning_rate": 1.5739359123178587e-07, + "loss": 0.0044, + "step": 29280 + }, + { + "epoch": 4.4677734375e-05, + "model_forward_time": 0.025776386260986328, + "step": 29280 + }, + { + "epoch": 4.4677734375e-05, + "step": 29280, + "training_step_time": 0.15343093872070312 + }, + { + "epoch": 4.467926025390625e-05, + "model_forward_time": 0.02449965476989746, + "step": 29281 + }, + { + "epoch": 4.467926025390625e-05, + "step": 29281, + "training_step_time": 0.21709203720092773 + }, + { + "epoch": 4.46807861328125e-05, + "model_forward_time": 0.02377486228942871, + "step": 29282 + }, + { + "epoch": 4.46807861328125e-05, + "step": 29282, + "training_step_time": 0.10887479782104492 + }, + { + "epoch": 4.468231201171875e-05, + "model_forward_time": 0.025783061981201172, + "step": 29283 + }, + { + "epoch": 4.468231201171875e-05, + "step": 29283, + "training_step_time": 0.10442757606506348 + }, + { + "epoch": 4.4683837890625e-05, + "model_forward_time": 0.024457454681396484, + "step": 29284 + }, + { + "epoch": 4.4683837890625e-05, + "step": 29284, + "training_step_time": 0.14114928245544434 + }, + { + "epoch": 4.468536376953125e-05, + "model_forward_time": 0.025577545166015625, + "step": 29285 + }, + { + "epoch": 4.468536376953125e-05, + "step": 29285, + "training_step_time": 0.10525345802307129 + }, + { + "epoch": 4.46868896484375e-05, + "model_forward_time": 0.02523207664489746, + "step": 29286 + }, + { + "epoch": 4.46868896484375e-05, + "step": 29286, + "training_step_time": 0.2073674201965332 + }, + { + "epoch": 4.468841552734375e-05, + "model_forward_time": 0.02429366111755371, + "step": 29287 + }, + { + "epoch": 4.468841552734375e-05, + "step": 29287, + "training_step_time": 0.1457967758178711 + }, + { + "epoch": 4.468994140625e-05, + "model_forward_time": 0.024573802947998047, + "step": 29288 + }, + { + "epoch": 4.468994140625e-05, + "step": 29288, + "training_step_time": 0.1308450698852539 + }, + { + "epoch": 4.469146728515625e-05, + "model_forward_time": 0.024330854415893555, + "step": 29289 + }, + { + "epoch": 4.469146728515625e-05, + "step": 29289, + "training_step_time": 0.13199377059936523 + }, + { + "epoch": 4.46929931640625e-05, + "grad_norm": 0.27505090832710266, + "learning_rate": 1.5305412486702474e-07, + "loss": 0.0052, + "step": 29290 + }, + { + "epoch": 4.46929931640625e-05, + "model_forward_time": 0.024813175201416016, + "step": 29290 + }, + { + "epoch": 4.46929931640625e-05, + "step": 29290, + "training_step_time": 0.12634062767028809 + }, + { + "epoch": 4.469451904296875e-05, + "model_forward_time": 0.024620532989501953, + "step": 29291 + }, + { + "epoch": 4.469451904296875e-05, + "step": 29291, + "training_step_time": 0.12334704399108887 + }, + { + "epoch": 4.4696044921875e-05, + "model_forward_time": 0.02477407455444336, + "step": 29292 + }, + { + "epoch": 4.4696044921875e-05, + "step": 29292, + "training_step_time": 0.12033224105834961 + }, + { + "epoch": 4.469757080078125e-05, + "model_forward_time": 0.024919986724853516, + "step": 29293 + }, + { + "epoch": 4.469757080078125e-05, + "step": 29293, + "training_step_time": 0.11629605293273926 + }, + { + "epoch": 4.46990966796875e-05, + "model_forward_time": 0.024993419647216797, + "step": 29294 + }, + { + "epoch": 4.46990966796875e-05, + "step": 29294, + "training_step_time": 0.1137688159942627 + }, + { + "epoch": 4.470062255859375e-05, + "model_forward_time": 0.02485346794128418, + "step": 29295 + }, + { + "epoch": 4.470062255859375e-05, + "step": 29295, + "training_step_time": 0.11148858070373535 + }, + { + "epoch": 4.47021484375e-05, + "model_forward_time": 0.025007963180541992, + "step": 29296 + }, + { + "epoch": 4.47021484375e-05, + "step": 29296, + "training_step_time": 0.10565757751464844 + }, + { + "epoch": 4.470367431640625e-05, + "model_forward_time": 0.02490401268005371, + "step": 29297 + }, + { + "epoch": 4.470367431640625e-05, + "step": 29297, + "training_step_time": 0.11085367202758789 + }, + { + "epoch": 4.47052001953125e-05, + "model_forward_time": 0.02491617202758789, + "step": 29298 + }, + { + "epoch": 4.47052001953125e-05, + "step": 29298, + "training_step_time": 0.10672163963317871 + }, + { + "epoch": 4.470672607421875e-05, + "model_forward_time": 0.025066614151000977, + "step": 29299 + }, + { + "epoch": 4.470672607421875e-05, + "step": 29299, + "training_step_time": 0.12346649169921875 + }, + { + "epoch": 4.4708251953125e-05, + "grad_norm": 0.22749656438827515, + "learning_rate": 1.4877522724175973e-07, + "loss": 0.0039, + "step": 29300 + }, + { + "epoch": 4.4708251953125e-05, + "model_forward_time": 0.025049448013305664, + "step": 29300 + }, + { + "epoch": 4.4708251953125e-05, + "step": 29300, + "training_step_time": 0.22768306732177734 + }, + { + "epoch": 4.470977783203125e-05, + "model_forward_time": 0.0242002010345459, + "step": 29301 + }, + { + "epoch": 4.470977783203125e-05, + "step": 29301, + "training_step_time": 0.1381394863128662 + }, + { + "epoch": 4.47113037109375e-05, + "model_forward_time": 0.02422642707824707, + "step": 29302 + }, + { + "epoch": 4.47113037109375e-05, + "step": 29302, + "training_step_time": 0.2046375274658203 + }, + { + "epoch": 4.471282958984375e-05, + "model_forward_time": 0.024184703826904297, + "step": 29303 + }, + { + "epoch": 4.471282958984375e-05, + "step": 29303, + "training_step_time": 0.1348421573638916 + }, + { + "epoch": 4.471435546875e-05, + "model_forward_time": 0.024637937545776367, + "step": 29304 + }, + { + "epoch": 4.471435546875e-05, + "step": 29304, + "training_step_time": 0.1162862777709961 + }, + { + "epoch": 4.471588134765625e-05, + "model_forward_time": 0.02466559410095215, + "step": 29305 + }, + { + "epoch": 4.471588134765625e-05, + "step": 29305, + "training_step_time": 0.10253214836120605 + }, + { + "epoch": 4.47174072265625e-05, + "model_forward_time": 0.024791955947875977, + "step": 29306 + }, + { + "epoch": 4.47174072265625e-05, + "step": 29306, + "training_step_time": 0.10318541526794434 + }, + { + "epoch": 4.471893310546875e-05, + "model_forward_time": 0.024979829788208008, + "step": 29307 + }, + { + "epoch": 4.471893310546875e-05, + "step": 29307, + "training_step_time": 0.10462594032287598 + }, + { + "epoch": 4.4720458984375e-05, + "model_forward_time": 0.024505615234375, + "step": 29308 + }, + { + "epoch": 4.4720458984375e-05, + "step": 29308, + "training_step_time": 0.1066582202911377 + }, + { + "epoch": 4.472198486328125e-05, + "model_forward_time": 0.02563643455505371, + "step": 29309 + }, + { + "epoch": 4.472198486328125e-05, + "step": 29309, + "training_step_time": 0.11033797264099121 + }, + { + "epoch": 4.47235107421875e-05, + "grad_norm": 0.04503900930285454, + "learning_rate": 1.4455690355525964e-07, + "loss": 0.0096, + "step": 29310 + }, + { + "epoch": 4.47235107421875e-05, + "model_forward_time": 0.024950265884399414, + "step": 29310 + }, + { + "epoch": 4.47235107421875e-05, + "step": 29310, + "training_step_time": 0.10849785804748535 + }, + { + "epoch": 4.472503662109375e-05, + "model_forward_time": 0.02501821517944336, + "step": 29311 + }, + { + "epoch": 4.472503662109375e-05, + "step": 29311, + "training_step_time": 0.12184810638427734 + }, + { + "epoch": 4.47265625e-05, + "model_forward_time": 0.025444746017456055, + "step": 29312 + }, + { + "epoch": 4.47265625e-05, + "step": 29312, + "training_step_time": 0.10687923431396484 + }, + { + "epoch": 4.472808837890625e-05, + "model_forward_time": 0.025232553482055664, + "step": 29313 + }, + { + "epoch": 4.472808837890625e-05, + "step": 29313, + "training_step_time": 0.10608410835266113 + }, + { + "epoch": 4.47296142578125e-05, + "model_forward_time": 0.025468111038208008, + "step": 29314 + }, + { + "epoch": 4.47296142578125e-05, + "step": 29314, + "training_step_time": 0.10686111450195312 + }, + { + "epoch": 4.473114013671875e-05, + "model_forward_time": 0.02501392364501953, + "step": 29315 + }, + { + "epoch": 4.473114013671875e-05, + "step": 29315, + "training_step_time": 0.10678863525390625 + }, + { + "epoch": 4.4732666015625e-05, + "model_forward_time": 0.02531909942626953, + "step": 29316 + }, + { + "epoch": 4.4732666015625e-05, + "step": 29316, + "training_step_time": 0.10640597343444824 + }, + { + "epoch": 4.473419189453125e-05, + "model_forward_time": 0.025478601455688477, + "step": 29317 + }, + { + "epoch": 4.473419189453125e-05, + "step": 29317, + "training_step_time": 0.10679912567138672 + }, + { + "epoch": 4.47357177734375e-05, + "model_forward_time": 0.0250089168548584, + "step": 29318 + }, + { + "epoch": 4.47357177734375e-05, + "step": 29318, + "training_step_time": 0.10621905326843262 + }, + { + "epoch": 4.473724365234375e-05, + "model_forward_time": 0.02537822723388672, + "step": 29319 + }, + { + "epoch": 4.473724365234375e-05, + "step": 29319, + "training_step_time": 0.10548281669616699 + }, + { + "epoch": 4.473876953125e-05, + "grad_norm": 0.056688860058784485, + "learning_rate": 1.4039915893318544e-07, + "loss": 0.0055, + "step": 29320 + }, + { + "epoch": 4.473876953125e-05, + "model_forward_time": 0.025515317916870117, + "step": 29320 + }, + { + "epoch": 4.473876953125e-05, + "step": 29320, + "training_step_time": 0.11110639572143555 + }, + { + "epoch": 4.474029541015625e-05, + "model_forward_time": 0.025371313095092773, + "step": 29321 + }, + { + "epoch": 4.474029541015625e-05, + "step": 29321, + "training_step_time": 0.211134672164917 + }, + { + "epoch": 4.47418212890625e-05, + "model_forward_time": 0.02447986602783203, + "step": 29322 + }, + { + "epoch": 4.47418212890625e-05, + "step": 29322, + "training_step_time": 0.1211540699005127 + }, + { + "epoch": 4.474334716796875e-05, + "model_forward_time": 0.023842573165893555, + "step": 29323 + }, + { + "epoch": 4.474334716796875e-05, + "step": 29323, + "training_step_time": 0.1299142837524414 + }, + { + "epoch": 4.4744873046875e-05, + "model_forward_time": 0.024712800979614258, + "step": 29324 + }, + { + "epoch": 4.4744873046875e-05, + "step": 29324, + "training_step_time": 0.15225958824157715 + }, + { + "epoch": 4.474639892578125e-05, + "model_forward_time": 0.024759769439697266, + "step": 29325 + }, + { + "epoch": 4.474639892578125e-05, + "step": 29325, + "training_step_time": 0.21504831314086914 + }, + { + "epoch": 4.47479248046875e-05, + "model_forward_time": 0.024817943572998047, + "step": 29326 + }, + { + "epoch": 4.47479248046875e-05, + "step": 29326, + "training_step_time": 0.11866593360900879 + }, + { + "epoch": 4.474945068359375e-05, + "model_forward_time": 0.024143457412719727, + "step": 29327 + }, + { + "epoch": 4.474945068359375e-05, + "step": 29327, + "training_step_time": 0.10236763954162598 + }, + { + "epoch": 4.47509765625e-05, + "model_forward_time": 0.025563955307006836, + "step": 29328 + }, + { + "epoch": 4.47509765625e-05, + "step": 29328, + "training_step_time": 0.10315775871276855 + }, + { + "epoch": 4.475250244140625e-05, + "model_forward_time": 0.024791240692138672, + "step": 29329 + }, + { + "epoch": 4.475250244140625e-05, + "step": 29329, + "training_step_time": 0.1415700912475586 + }, + { + "epoch": 4.47540283203125e-05, + "grad_norm": 0.07716532051563263, + "learning_rate": 1.3630199842758484e-07, + "loss": 0.0044, + "step": 29330 + }, + { + "epoch": 4.47540283203125e-05, + "model_forward_time": 0.024802207946777344, + "step": 29330 + }, + { + "epoch": 4.47540283203125e-05, + "step": 29330, + "training_step_time": 0.15965914726257324 + }, + { + "epoch": 4.475555419921875e-05, + "model_forward_time": 0.02447342872619629, + "step": 29331 + }, + { + "epoch": 4.475555419921875e-05, + "step": 29331, + "training_step_time": 0.12029671669006348 + }, + { + "epoch": 4.4757080078125e-05, + "model_forward_time": 0.024561166763305664, + "step": 29332 + }, + { + "epoch": 4.4757080078125e-05, + "step": 29332, + "training_step_time": 0.12207198143005371 + }, + { + "epoch": 4.475860595703125e-05, + "model_forward_time": 0.025473833084106445, + "step": 29333 + }, + { + "epoch": 4.475860595703125e-05, + "step": 29333, + "training_step_time": 0.10641264915466309 + }, + { + "epoch": 4.47601318359375e-05, + "model_forward_time": 0.025437355041503906, + "step": 29334 + }, + { + "epoch": 4.47601318359375e-05, + "step": 29334, + "training_step_time": 0.11220908164978027 + }, + { + "epoch": 4.476165771484375e-05, + "model_forward_time": 0.025599241256713867, + "step": 29335 + }, + { + "epoch": 4.476165771484375e-05, + "step": 29335, + "training_step_time": 0.11154818534851074 + }, + { + "epoch": 4.476318359375e-05, + "model_forward_time": 0.024888277053833008, + "step": 29336 + }, + { + "epoch": 4.476318359375e-05, + "step": 29336, + "training_step_time": 0.12497949600219727 + }, + { + "epoch": 4.476470947265625e-05, + "model_forward_time": 0.025096654891967773, + "step": 29337 + }, + { + "epoch": 4.476470947265625e-05, + "step": 29337, + "training_step_time": 0.12675237655639648 + }, + { + "epoch": 4.47662353515625e-05, + "model_forward_time": 0.0248262882232666, + "step": 29338 + }, + { + "epoch": 4.47662353515625e-05, + "step": 29338, + "training_step_time": 0.12268280982971191 + }, + { + "epoch": 4.476776123046875e-05, + "model_forward_time": 0.024593114852905273, + "step": 29339 + }, + { + "epoch": 4.476776123046875e-05, + "step": 29339, + "training_step_time": 0.12254953384399414 + }, + { + "epoch": 4.4769287109375e-05, + "grad_norm": 0.04653387889266014, + "learning_rate": 1.3226542701689215e-07, + "loss": 0.0027, + "step": 29340 + }, + { + "epoch": 4.4769287109375e-05, + "model_forward_time": 0.02493762969970703, + "step": 29340 + }, + { + "epoch": 4.4769287109375e-05, + "step": 29340, + "training_step_time": 0.1229238510131836 + }, + { + "epoch": 4.477081298828125e-05, + "model_forward_time": 0.024954557418823242, + "step": 29341 + }, + { + "epoch": 4.477081298828125e-05, + "step": 29341, + "training_step_time": 0.12147760391235352 + }, + { + "epoch": 4.47723388671875e-05, + "model_forward_time": 0.025064945220947266, + "step": 29342 + }, + { + "epoch": 4.47723388671875e-05, + "step": 29342, + "training_step_time": 0.11735248565673828 + }, + { + "epoch": 4.477386474609375e-05, + "model_forward_time": 0.02505636215209961, + "step": 29343 + }, + { + "epoch": 4.477386474609375e-05, + "step": 29343, + "training_step_time": 0.11221003532409668 + }, + { + "epoch": 4.4775390625e-05, + "model_forward_time": 0.025596141815185547, + "step": 29344 + }, + { + "epoch": 4.4775390625e-05, + "step": 29344, + "training_step_time": 0.15523958206176758 + }, + { + "epoch": 4.477691650390625e-05, + "model_forward_time": 0.024835824966430664, + "step": 29345 + }, + { + "epoch": 4.477691650390625e-05, + "step": 29345, + "training_step_time": 0.19050073623657227 + }, + { + "epoch": 4.47784423828125e-05, + "model_forward_time": 0.02443552017211914, + "step": 29346 + }, + { + "epoch": 4.47784423828125e-05, + "step": 29346, + "training_step_time": 0.1771383285522461 + }, + { + "epoch": 4.477996826171875e-05, + "model_forward_time": 0.02448296546936035, + "step": 29347 + }, + { + "epoch": 4.477996826171875e-05, + "step": 29347, + "training_step_time": 0.16420888900756836 + }, + { + "epoch": 4.4781494140625e-05, + "model_forward_time": 0.02460312843322754, + "step": 29348 + }, + { + "epoch": 4.4781494140625e-05, + "step": 29348, + "training_step_time": 0.10727739334106445 + }, + { + "epoch": 4.478302001953125e-05, + "model_forward_time": 0.024475574493408203, + "step": 29349 + }, + { + "epoch": 4.478302001953125e-05, + "step": 29349, + "training_step_time": 0.19325852394104004 + }, + { + "epoch": 4.47845458984375e-05, + "grad_norm": 0.19449864327907562, + "learning_rate": 1.2828944960592836e-07, + "loss": 0.0076, + "step": 29350 + }, + { + "epoch": 4.47845458984375e-05, + "model_forward_time": 0.02480149269104004, + "step": 29350 + }, + { + "epoch": 4.47845458984375e-05, + "step": 29350, + "training_step_time": 0.10398125648498535 + }, + { + "epoch": 4.478607177734375e-05, + "model_forward_time": 0.024389982223510742, + "step": 29351 + }, + { + "epoch": 4.478607177734375e-05, + "step": 29351, + "training_step_time": 0.10195040702819824 + }, + { + "epoch": 4.478759765625e-05, + "model_forward_time": 0.025147676467895508, + "step": 29352 + }, + { + "epoch": 4.478759765625e-05, + "step": 29352, + "training_step_time": 0.10646390914916992 + }, + { + "epoch": 4.478912353515625e-05, + "model_forward_time": 0.025377750396728516, + "step": 29353 + }, + { + "epoch": 4.478912353515625e-05, + "step": 29353, + "training_step_time": 0.10848593711853027 + }, + { + "epoch": 4.47906494140625e-05, + "model_forward_time": 0.025032997131347656, + "step": 29354 + }, + { + "epoch": 4.47906494140625e-05, + "step": 29354, + "training_step_time": 0.1073918342590332 + }, + { + "epoch": 4.479217529296875e-05, + "model_forward_time": 0.02521681785583496, + "step": 29355 + }, + { + "epoch": 4.479217529296875e-05, + "step": 29355, + "training_step_time": 0.1096649169921875 + }, + { + "epoch": 4.4793701171875e-05, + "model_forward_time": 0.025640249252319336, + "step": 29356 + }, + { + "epoch": 4.4793701171875e-05, + "step": 29356, + "training_step_time": 0.10517334938049316 + }, + { + "epoch": 4.479522705078125e-05, + "model_forward_time": 0.02477431297302246, + "step": 29357 + }, + { + "epoch": 4.479522705078125e-05, + "step": 29357, + "training_step_time": 0.10350608825683594 + }, + { + "epoch": 4.47967529296875e-05, + "model_forward_time": 0.025552749633789062, + "step": 29358 + }, + { + "epoch": 4.47967529296875e-05, + "step": 29358, + "training_step_time": 0.10907411575317383 + }, + { + "epoch": 4.479827880859375e-05, + "model_forward_time": 0.025232791900634766, + "step": 29359 + }, + { + "epoch": 4.479827880859375e-05, + "step": 29359, + "training_step_time": 0.105926513671875 + }, + { + "epoch": 4.47998046875e-05, + "grad_norm": 0.24660265445709229, + "learning_rate": 1.243740710258734e-07, + "loss": 0.004, + "step": 29360 + }, + { + "epoch": 4.47998046875e-05, + "model_forward_time": 0.02512073516845703, + "step": 29360 + }, + { + "epoch": 4.47998046875e-05, + "step": 29360, + "training_step_time": 0.10642004013061523 + }, + { + "epoch": 4.480133056640625e-05, + "model_forward_time": 0.025548934936523438, + "step": 29361 + }, + { + "epoch": 4.480133056640625e-05, + "step": 29361, + "training_step_time": 0.10434293746948242 + }, + { + "epoch": 4.48028564453125e-05, + "model_forward_time": 0.025104999542236328, + "step": 29362 + }, + { + "epoch": 4.48028564453125e-05, + "step": 29362, + "training_step_time": 0.10578060150146484 + }, + { + "epoch": 4.480438232421875e-05, + "model_forward_time": 0.025576353073120117, + "step": 29363 + }, + { + "epoch": 4.480438232421875e-05, + "step": 29363, + "training_step_time": 0.10851120948791504 + }, + { + "epoch": 4.4805908203125e-05, + "model_forward_time": 0.025470256805419922, + "step": 29364 + }, + { + "epoch": 4.4805908203125e-05, + "step": 29364, + "training_step_time": 0.10692811012268066 + }, + { + "epoch": 4.480743408203125e-05, + "model_forward_time": 0.025290489196777344, + "step": 29365 + }, + { + "epoch": 4.480743408203125e-05, + "step": 29365, + "training_step_time": 0.11270308494567871 + }, + { + "epoch": 4.48089599609375e-05, + "model_forward_time": 0.02540731430053711, + "step": 29366 + }, + { + "epoch": 4.48089599609375e-05, + "step": 29366, + "training_step_time": 0.14193296432495117 + }, + { + "epoch": 4.481048583984375e-05, + "model_forward_time": 0.025343656539916992, + "step": 29367 + }, + { + "epoch": 4.481048583984375e-05, + "step": 29367, + "training_step_time": 0.1177818775177002 + }, + { + "epoch": 4.481201171875e-05, + "model_forward_time": 0.025026321411132812, + "step": 29368 + }, + { + "epoch": 4.481201171875e-05, + "step": 29368, + "training_step_time": 0.12896466255187988 + }, + { + "epoch": 4.481353759765625e-05, + "model_forward_time": 0.025910139083862305, + "step": 29369 + }, + { + "epoch": 4.481353759765625e-05, + "step": 29369, + "training_step_time": 0.15584993362426758 + }, + { + "epoch": 4.48150634765625e-05, + "grad_norm": 0.13673478364944458, + "learning_rate": 1.2051929603428825e-07, + "loss": 0.0062, + "step": 29370 + }, + { + "epoch": 4.48150634765625e-05, + "model_forward_time": 0.024271249771118164, + "step": 29370 + }, + { + "epoch": 4.48150634765625e-05, + "step": 29370, + "training_step_time": 0.21133208274841309 + }, + { + "epoch": 4.481658935546875e-05, + "model_forward_time": 0.025204896926879883, + "step": 29371 + }, + { + "epoch": 4.481658935546875e-05, + "step": 29371, + "training_step_time": 0.10173773765563965 + }, + { + "epoch": 4.4818115234375e-05, + "model_forward_time": 0.024027585983276367, + "step": 29372 + }, + { + "epoch": 4.4818115234375e-05, + "step": 29372, + "training_step_time": 0.10189962387084961 + }, + { + "epoch": 4.481964111328125e-05, + "model_forward_time": 0.02635812759399414, + "step": 29373 + }, + { + "epoch": 4.481964111328125e-05, + "step": 29373, + "training_step_time": 0.10457754135131836 + }, + { + "epoch": 4.48211669921875e-05, + "model_forward_time": 0.02486419677734375, + "step": 29374 + }, + { + "epoch": 4.48211669921875e-05, + "step": 29374, + "training_step_time": 0.1481611728668213 + }, + { + "epoch": 4.482269287109375e-05, + "model_forward_time": 0.025624513626098633, + "step": 29375 + }, + { + "epoch": 4.482269287109375e-05, + "step": 29375, + "training_step_time": 0.16077733039855957 + }, + { + "epoch": 4.482421875e-05, + "model_forward_time": 0.02535104751586914, + "step": 29376 + }, + { + "epoch": 4.482421875e-05, + "step": 29376, + "training_step_time": 0.11684322357177734 + }, + { + "epoch": 4.482574462890625e-05, + "model_forward_time": 0.024559974670410156, + "step": 29377 + }, + { + "epoch": 4.482574462890625e-05, + "step": 29377, + "training_step_time": 0.13288187980651855 + }, + { + "epoch": 4.48272705078125e-05, + "model_forward_time": 0.026221513748168945, + "step": 29378 + }, + { + "epoch": 4.48272705078125e-05, + "step": 29378, + "training_step_time": 0.19694876670837402 + }, + { + "epoch": 4.482879638671875e-05, + "model_forward_time": 0.025516986846923828, + "step": 29379 + }, + { + "epoch": 4.482879638671875e-05, + "step": 29379, + "training_step_time": 0.10409426689147949 + }, + { + "epoch": 4.4830322265625e-05, + "grad_norm": 0.09571550041437149, + "learning_rate": 1.1672512931509283e-07, + "loss": 0.0033, + "step": 29380 + }, + { + "epoch": 4.4830322265625e-05, + "model_forward_time": 0.025953054428100586, + "step": 29380 + }, + { + "epoch": 4.4830322265625e-05, + "step": 29380, + "training_step_time": 0.10621023178100586 + }, + { + "epoch": 4.483184814453125e-05, + "model_forward_time": 0.025391340255737305, + "step": 29381 + }, + { + "epoch": 4.483184814453125e-05, + "step": 29381, + "training_step_time": 0.10582971572875977 + }, + { + "epoch": 4.48333740234375e-05, + "model_forward_time": 0.02524399757385254, + "step": 29382 + }, + { + "epoch": 4.48333740234375e-05, + "step": 29382, + "training_step_time": 0.13315868377685547 + }, + { + "epoch": 4.483489990234375e-05, + "model_forward_time": 0.025664091110229492, + "step": 29383 + }, + { + "epoch": 4.483489990234375e-05, + "step": 29383, + "training_step_time": 0.16475868225097656 + }, + { + "epoch": 4.483642578125e-05, + "model_forward_time": 0.02571725845336914, + "step": 29384 + }, + { + "epoch": 4.483642578125e-05, + "step": 29384, + "training_step_time": 0.1621706485748291 + }, + { + "epoch": 4.483795166015625e-05, + "model_forward_time": 0.023891925811767578, + "step": 29385 + }, + { + "epoch": 4.483795166015625e-05, + "step": 29385, + "training_step_time": 0.16046619415283203 + }, + { + "epoch": 4.48394775390625e-05, + "model_forward_time": 0.02487492561340332, + "step": 29386 + }, + { + "epoch": 4.48394775390625e-05, + "step": 29386, + "training_step_time": 0.14921927452087402 + }, + { + "epoch": 4.484100341796875e-05, + "model_forward_time": 0.025042295455932617, + "step": 29387 + }, + { + "epoch": 4.484100341796875e-05, + "step": 29387, + "training_step_time": 0.13836145401000977 + }, + { + "epoch": 4.4842529296875e-05, + "model_forward_time": 0.024645566940307617, + "step": 29388 + }, + { + "epoch": 4.4842529296875e-05, + "step": 29388, + "training_step_time": 0.22414875030517578 + }, + { + "epoch": 4.484405517578125e-05, + "model_forward_time": 0.02482128143310547, + "step": 29389 + }, + { + "epoch": 4.484405517578125e-05, + "step": 29389, + "training_step_time": 0.12362360954284668 + }, + { + "epoch": 4.48455810546875e-05, + "grad_norm": 0.06256138533353806, + "learning_rate": 1.1299157547854377e-07, + "loss": 0.0019, + "step": 29390 + }, + { + "epoch": 4.48455810546875e-05, + "model_forward_time": 0.023352622985839844, + "step": 29390 + }, + { + "epoch": 4.48455810546875e-05, + "step": 29390, + "training_step_time": 0.2050638198852539 + }, + { + "epoch": 4.484710693359375e-05, + "model_forward_time": 0.024926424026489258, + "step": 29391 + }, + { + "epoch": 4.484710693359375e-05, + "step": 29391, + "training_step_time": 0.1689913272857666 + }, + { + "epoch": 4.48486328125e-05, + "model_forward_time": 0.024693727493286133, + "step": 29392 + }, + { + "epoch": 4.48486328125e-05, + "step": 29392, + "training_step_time": 0.14019131660461426 + }, + { + "epoch": 4.485015869140625e-05, + "model_forward_time": 0.024831295013427734, + "step": 29393 + }, + { + "epoch": 4.485015869140625e-05, + "step": 29393, + "training_step_time": 0.10866999626159668 + }, + { + "epoch": 4.48516845703125e-05, + "model_forward_time": 0.02533411979675293, + "step": 29394 + }, + { + "epoch": 4.48516845703125e-05, + "step": 29394, + "training_step_time": 0.10558748245239258 + }, + { + "epoch": 4.485321044921875e-05, + "model_forward_time": 0.02636408805847168, + "step": 29395 + }, + { + "epoch": 4.485321044921875e-05, + "step": 29395, + "training_step_time": 0.1079709529876709 + }, + { + "epoch": 4.4854736328125e-05, + "model_forward_time": 0.025482177734375, + "step": 29396 + }, + { + "epoch": 4.4854736328125e-05, + "step": 29396, + "training_step_time": 0.10696840286254883 + }, + { + "epoch": 4.485626220703125e-05, + "model_forward_time": 0.02669525146484375, + "step": 29397 + }, + { + "epoch": 4.485626220703125e-05, + "step": 29397, + "training_step_time": 0.11041784286499023 + }, + { + "epoch": 4.48577880859375e-05, + "model_forward_time": 0.02595353126525879, + "step": 29398 + }, + { + "epoch": 4.48577880859375e-05, + "step": 29398, + "training_step_time": 0.10701441764831543 + }, + { + "epoch": 4.485931396484375e-05, + "model_forward_time": 0.025667905807495117, + "step": 29399 + }, + { + "epoch": 4.485931396484375e-05, + "step": 29399, + "training_step_time": 0.10563087463378906 + }, + { + "epoch": 4.486083984375e-05, + "grad_norm": 0.03418092057108879, + "learning_rate": 1.0931863906127327e-07, + "loss": 0.0051, + "step": 29400 + }, + { + "epoch": 4.486083984375e-05, + "model_forward_time": 0.02523040771484375, + "step": 29400 + }, + { + "epoch": 4.486083984375e-05, + "step": 29400, + "training_step_time": 0.1109151840209961 + }, + { + "epoch": 4.486236572265625e-05, + "model_forward_time": 0.025670528411865234, + "step": 29401 + }, + { + "epoch": 4.486236572265625e-05, + "step": 29401, + "training_step_time": 0.10586166381835938 + }, + { + "epoch": 4.48638916015625e-05, + "model_forward_time": 0.025612592697143555, + "step": 29402 + }, + { + "epoch": 4.48638916015625e-05, + "step": 29402, + "training_step_time": 0.10492444038391113 + }, + { + "epoch": 4.486541748046875e-05, + "model_forward_time": 0.026044368743896484, + "step": 29403 + }, + { + "epoch": 4.486541748046875e-05, + "step": 29403, + "training_step_time": 0.10693025588989258 + }, + { + "epoch": 4.4866943359375e-05, + "model_forward_time": 0.02565455436706543, + "step": 29404 + }, + { + "epoch": 4.4866943359375e-05, + "step": 29404, + "training_step_time": 0.10594749450683594 + }, + { + "epoch": 4.486846923828125e-05, + "model_forward_time": 0.026036977767944336, + "step": 29405 + }, + { + "epoch": 4.486846923828125e-05, + "step": 29405, + "training_step_time": 0.10925412178039551 + }, + { + "epoch": 4.48699951171875e-05, + "model_forward_time": 0.029282569885253906, + "step": 29406 + }, + { + "epoch": 4.48699951171875e-05, + "step": 29406, + "training_step_time": 0.11032700538635254 + }, + { + "epoch": 4.487152099609375e-05, + "model_forward_time": 0.025209665298461914, + "step": 29407 + }, + { + "epoch": 4.487152099609375e-05, + "step": 29407, + "training_step_time": 0.10767579078674316 + }, + { + "epoch": 4.4873046875e-05, + "model_forward_time": 0.02625274658203125, + "step": 29408 + }, + { + "epoch": 4.4873046875e-05, + "step": 29408, + "training_step_time": 0.10532498359680176 + }, + { + "epoch": 4.487457275390625e-05, + "model_forward_time": 0.02665114402770996, + "step": 29409 + }, + { + "epoch": 4.487457275390625e-05, + "step": 29409, + "training_step_time": 0.16915082931518555 + }, + { + "epoch": 4.48760986328125e-05, + "grad_norm": 0.17175669968128204, + "learning_rate": 1.0570632452623353e-07, + "loss": 0.0045, + "step": 29410 + }, + { + "epoch": 4.48760986328125e-05, + "model_forward_time": 0.025166749954223633, + "step": 29410 + }, + { + "epoch": 4.48760986328125e-05, + "step": 29410, + "training_step_time": 0.12027359008789062 + }, + { + "epoch": 4.487762451171875e-05, + "model_forward_time": 0.025444984436035156, + "step": 29411 + }, + { + "epoch": 4.487762451171875e-05, + "step": 29411, + "training_step_time": 0.11290574073791504 + }, + { + "epoch": 4.4879150390625e-05, + "model_forward_time": 0.027086496353149414, + "step": 29412 + }, + { + "epoch": 4.4879150390625e-05, + "step": 29412, + "training_step_time": 0.14971017837524414 + }, + { + "epoch": 4.488067626953125e-05, + "model_forward_time": 0.025290727615356445, + "step": 29413 + }, + { + "epoch": 4.488067626953125e-05, + "step": 29413, + "training_step_time": 0.21808147430419922 + }, + { + "epoch": 4.48822021484375e-05, + "model_forward_time": 0.024555206298828125, + "step": 29414 + }, + { + "epoch": 4.48822021484375e-05, + "step": 29414, + "training_step_time": 0.11126422882080078 + }, + { + "epoch": 4.488372802734375e-05, + "model_forward_time": 0.02469348907470703, + "step": 29415 + }, + { + "epoch": 4.488372802734375e-05, + "step": 29415, + "training_step_time": 0.10843229293823242 + }, + { + "epoch": 4.488525390625e-05, + "model_forward_time": 0.025799036026000977, + "step": 29416 + }, + { + "epoch": 4.488525390625e-05, + "step": 29416, + "training_step_time": 0.11892032623291016 + }, + { + "epoch": 4.488677978515625e-05, + "model_forward_time": 0.02448415756225586, + "step": 29417 + }, + { + "epoch": 4.488677978515625e-05, + "step": 29417, + "training_step_time": 0.14757204055786133 + }, + { + "epoch": 4.48883056640625e-05, + "model_forward_time": 0.025730371475219727, + "step": 29418 + }, + { + "epoch": 4.48883056640625e-05, + "step": 29418, + "training_step_time": 0.1570439338684082 + }, + { + "epoch": 4.488983154296875e-05, + "model_forward_time": 0.024774551391601562, + "step": 29419 + }, + { + "epoch": 4.488983154296875e-05, + "step": 29419, + "training_step_time": 0.12609076499938965 + }, + { + "epoch": 4.4891357421875e-05, + "grad_norm": 0.0336330346763134, + "learning_rate": 1.0215463626274125e-07, + "loss": 0.0022, + "step": 29420 + }, + { + "epoch": 4.4891357421875e-05, + "model_forward_time": 0.025694847106933594, + "step": 29420 + }, + { + "epoch": 4.4891357421875e-05, + "step": 29420, + "training_step_time": 0.11999082565307617 + }, + { + "epoch": 4.489288330078125e-05, + "model_forward_time": 0.02543473243713379, + "step": 29421 + }, + { + "epoch": 4.489288330078125e-05, + "step": 29421, + "training_step_time": 0.18630027770996094 + }, + { + "epoch": 4.48944091796875e-05, + "model_forward_time": 0.024593353271484375, + "step": 29422 + }, + { + "epoch": 4.48944091796875e-05, + "step": 29422, + "training_step_time": 0.10984396934509277 + }, + { + "epoch": 4.489593505859375e-05, + "model_forward_time": 0.025817155838012695, + "step": 29423 + }, + { + "epoch": 4.489593505859375e-05, + "step": 29423, + "training_step_time": 0.10954904556274414 + }, + { + "epoch": 4.48974609375e-05, + "model_forward_time": 0.026928424835205078, + "step": 29424 + }, + { + "epoch": 4.48974609375e-05, + "step": 29424, + "training_step_time": 0.11081457138061523 + }, + { + "epoch": 4.489898681640625e-05, + "model_forward_time": 0.02454543113708496, + "step": 29425 + }, + { + "epoch": 4.489898681640625e-05, + "step": 29425, + "training_step_time": 0.11007428169250488 + }, + { + "epoch": 4.49005126953125e-05, + "model_forward_time": 0.024797916412353516, + "step": 29426 + }, + { + "epoch": 4.49005126953125e-05, + "step": 29426, + "training_step_time": 0.10775136947631836 + }, + { + "epoch": 4.490203857421875e-05, + "model_forward_time": 0.02487802505493164, + "step": 29427 + }, + { + "epoch": 4.490203857421875e-05, + "step": 29427, + "training_step_time": 0.10765814781188965 + }, + { + "epoch": 4.4903564453125e-05, + "model_forward_time": 0.025678634643554688, + "step": 29428 + }, + { + "epoch": 4.4903564453125e-05, + "step": 29428, + "training_step_time": 0.10901188850402832 + }, + { + "epoch": 4.490509033203125e-05, + "model_forward_time": 0.02556896209716797, + "step": 29429 + }, + { + "epoch": 4.490509033203125e-05, + "step": 29429, + "training_step_time": 0.10471272468566895 + }, + { + "epoch": 4.49066162109375e-05, + "grad_norm": 0.05490518733859062, + "learning_rate": 9.866357858642205e-08, + "loss": 0.005, + "step": 29430 + }, + { + "epoch": 4.49066162109375e-05, + "model_forward_time": 0.02416539192199707, + "step": 29430 + }, + { + "epoch": 4.49066162109375e-05, + "step": 29430, + "training_step_time": 0.10748982429504395 + }, + { + "epoch": 4.490814208984375e-05, + "model_forward_time": 0.02450251579284668, + "step": 29431 + }, + { + "epoch": 4.490814208984375e-05, + "step": 29431, + "training_step_time": 0.10341930389404297 + }, + { + "epoch": 4.490966796875e-05, + "model_forward_time": 0.025054931640625, + "step": 29432 + }, + { + "epoch": 4.490966796875e-05, + "step": 29432, + "training_step_time": 0.2066783905029297 + }, + { + "epoch": 4.491119384765625e-05, + "model_forward_time": 0.024718523025512695, + "step": 29433 + }, + { + "epoch": 4.491119384765625e-05, + "step": 29433, + "training_step_time": 0.11582636833190918 + }, + { + "epoch": 4.49127197265625e-05, + "model_forward_time": 0.024502277374267578, + "step": 29434 + }, + { + "epoch": 4.49127197265625e-05, + "step": 29434, + "training_step_time": 0.22933101654052734 + }, + { + "epoch": 4.491424560546875e-05, + "model_forward_time": 0.025376319885253906, + "step": 29435 + }, + { + "epoch": 4.491424560546875e-05, + "step": 29435, + "training_step_time": 0.11725831031799316 + }, + { + "epoch": 4.4915771484375e-05, + "model_forward_time": 0.024591445922851562, + "step": 29436 + }, + { + "epoch": 4.4915771484375e-05, + "step": 29436, + "training_step_time": 0.11391377449035645 + }, + { + "epoch": 4.491729736328125e-05, + "model_forward_time": 0.025354862213134766, + "step": 29437 + }, + { + "epoch": 4.491729736328125e-05, + "step": 29437, + "training_step_time": 0.1900019645690918 + }, + { + "epoch": 4.49188232421875e-05, + "model_forward_time": 0.024646520614624023, + "step": 29438 + }, + { + "epoch": 4.49188232421875e-05, + "step": 29438, + "training_step_time": 0.20105743408203125 + }, + { + "epoch": 4.492034912109375e-05, + "model_forward_time": 0.0247344970703125, + "step": 29439 + }, + { + "epoch": 4.492034912109375e-05, + "step": 29439, + "training_step_time": 0.10267257690429688 + }, + { + "epoch": 4.4921875e-05, + "grad_norm": 0.036482006311416626, + "learning_rate": 9.523315573924385e-08, + "loss": 0.0064, + "step": 29440 + }, + { + "epoch": 4.4921875e-05, + "model_forward_time": 0.024335145950317383, + "step": 29440 + }, + { + "epoch": 4.4921875e-05, + "step": 29440, + "training_step_time": 0.10899829864501953 + }, + { + "epoch": 4.492340087890625e-05, + "model_forward_time": 0.026276588439941406, + "step": 29441 + }, + { + "epoch": 4.492340087890625e-05, + "step": 29441, + "training_step_time": 0.10861968994140625 + }, + { + "epoch": 4.49249267578125e-05, + "model_forward_time": 0.02548670768737793, + "step": 29442 + }, + { + "epoch": 4.49249267578125e-05, + "step": 29442, + "training_step_time": 0.10769820213317871 + }, + { + "epoch": 4.492645263671875e-05, + "model_forward_time": 0.025188922882080078, + "step": 29443 + }, + { + "epoch": 4.492645263671875e-05, + "step": 29443, + "training_step_time": 0.10910940170288086 + }, + { + "epoch": 4.4927978515625e-05, + "model_forward_time": 0.02538776397705078, + "step": 29444 + }, + { + "epoch": 4.4927978515625e-05, + "step": 29444, + "training_step_time": 0.10652279853820801 + }, + { + "epoch": 4.492950439453125e-05, + "model_forward_time": 0.025638103485107422, + "step": 29445 + }, + { + "epoch": 4.492950439453125e-05, + "step": 29445, + "training_step_time": 0.11357355117797852 + }, + { + "epoch": 4.49310302734375e-05, + "model_forward_time": 0.02532029151916504, + "step": 29446 + }, + { + "epoch": 4.49310302734375e-05, + "step": 29446, + "training_step_time": 0.10700798034667969 + }, + { + "epoch": 4.493255615234375e-05, + "model_forward_time": 0.025609254837036133, + "step": 29447 + }, + { + "epoch": 4.493255615234375e-05, + "step": 29447, + "training_step_time": 0.10924243927001953 + }, + { + "epoch": 4.493408203125e-05, + "model_forward_time": 0.026072263717651367, + "step": 29448 + }, + { + "epoch": 4.493408203125e-05, + "step": 29448, + "training_step_time": 0.10736083984375 + }, + { + "epoch": 4.493560791015625e-05, + "model_forward_time": 0.025032997131347656, + "step": 29449 + }, + { + "epoch": 4.493560791015625e-05, + "step": 29449, + "training_step_time": 0.1064748764038086 + }, + { + "epoch": 4.49371337890625e-05, + "grad_norm": 0.032262369990348816, + "learning_rate": 9.186337188949457e-08, + "loss": 0.0054, + "step": 29450 + }, + { + "epoch": 4.49371337890625e-05, + "model_forward_time": 0.02523970603942871, + "step": 29450 + }, + { + "epoch": 4.49371337890625e-05, + "step": 29450, + "training_step_time": 0.10555768013000488 + }, + { + "epoch": 4.493865966796875e-05, + "model_forward_time": 0.025784730911254883, + "step": 29451 + }, + { + "epoch": 4.493865966796875e-05, + "step": 29451, + "training_step_time": 0.10557794570922852 + }, + { + "epoch": 4.4940185546875e-05, + "model_forward_time": 0.025780439376831055, + "step": 29452 + }, + { + "epoch": 4.4940185546875e-05, + "step": 29452, + "training_step_time": 0.1052999496459961 + }, + { + "epoch": 4.494171142578125e-05, + "model_forward_time": 0.025603055953979492, + "step": 29453 + }, + { + "epoch": 4.494171142578125e-05, + "step": 29453, + "training_step_time": 0.1891028881072998 + }, + { + "epoch": 4.49432373046875e-05, + "model_forward_time": 0.024623394012451172, + "step": 29454 + }, + { + "epoch": 4.49432373046875e-05, + "step": 29454, + "training_step_time": 0.12005233764648438 + }, + { + "epoch": 4.494476318359375e-05, + "model_forward_time": 0.024707555770874023, + "step": 29455 + }, + { + "epoch": 4.494476318359375e-05, + "step": 29455, + "training_step_time": 0.1356675624847412 + }, + { + "epoch": 4.49462890625e-05, + "model_forward_time": 0.02519369125366211, + "step": 29456 + }, + { + "epoch": 4.49462890625e-05, + "step": 29456, + "training_step_time": 0.1589033603668213 + }, + { + "epoch": 4.494781494140625e-05, + "model_forward_time": 0.02423095703125, + "step": 29457 + }, + { + "epoch": 4.494781494140625e-05, + "step": 29457, + "training_step_time": 0.21025347709655762 + }, + { + "epoch": 4.49493408203125e-05, + "model_forward_time": 0.024885892868041992, + "step": 29458 + }, + { + "epoch": 4.49493408203125e-05, + "step": 29458, + "training_step_time": 0.11473727226257324 + }, + { + "epoch": 4.495086669921875e-05, + "model_forward_time": 0.02506732940673828, + "step": 29459 + }, + { + "epoch": 4.495086669921875e-05, + "step": 29459, + "training_step_time": 0.10505080223083496 + }, + { + "epoch": 4.4952392578125e-05, + "grad_norm": 0.03905640169978142, + "learning_rate": 8.855423113177664e-08, + "loss": 0.002, + "step": 29460 + }, + { + "epoch": 4.4952392578125e-05, + "model_forward_time": 0.025784730911254883, + "step": 29460 + }, + { + "epoch": 4.4952392578125e-05, + "step": 29460, + "training_step_time": 0.10622811317443848 + }, + { + "epoch": 4.495391845703125e-05, + "model_forward_time": 0.024966001510620117, + "step": 29461 + }, + { + "epoch": 4.495391845703125e-05, + "step": 29461, + "training_step_time": 0.15274357795715332 + }, + { + "epoch": 4.49554443359375e-05, + "model_forward_time": 0.024703502655029297, + "step": 29462 + }, + { + "epoch": 4.49554443359375e-05, + "step": 29462, + "training_step_time": 0.1620626449584961 + }, + { + "epoch": 4.495697021484375e-05, + "model_forward_time": 0.02467203140258789, + "step": 29463 + }, + { + "epoch": 4.495697021484375e-05, + "step": 29463, + "training_step_time": 0.10690450668334961 + }, + { + "epoch": 4.495849609375e-05, + "model_forward_time": 0.024972915649414062, + "step": 29464 + }, + { + "epoch": 4.495849609375e-05, + "step": 29464, + "training_step_time": 0.12940526008605957 + }, + { + "epoch": 4.496002197265625e-05, + "model_forward_time": 0.0255889892578125, + "step": 29465 + }, + { + "epoch": 4.496002197265625e-05, + "step": 29465, + "training_step_time": 0.16599678993225098 + }, + { + "epoch": 4.49615478515625e-05, + "model_forward_time": 0.024616479873657227, + "step": 29466 + }, + { + "epoch": 4.49615478515625e-05, + "step": 29466, + "training_step_time": 0.10270428657531738 + }, + { + "epoch": 4.496307373046875e-05, + "model_forward_time": 0.024338722229003906, + "step": 29467 + }, + { + "epoch": 4.496307373046875e-05, + "step": 29467, + "training_step_time": 0.10360097885131836 + }, + { + "epoch": 4.4964599609375e-05, + "model_forward_time": 0.025195598602294922, + "step": 29468 + }, + { + "epoch": 4.4964599609375e-05, + "step": 29468, + "training_step_time": 0.10978460311889648 + }, + { + "epoch": 4.496612548828125e-05, + "model_forward_time": 0.025525569915771484, + "step": 29469 + }, + { + "epoch": 4.496612548828125e-05, + "step": 29469, + "training_step_time": 0.18714070320129395 + }, + { + "epoch": 4.49676513671875e-05, + "grad_norm": 0.1374875009059906, + "learning_rate": 8.530573748701253e-08, + "loss": 0.0035, + "step": 29470 + }, + { + "epoch": 4.49676513671875e-05, + "model_forward_time": 0.02646160125732422, + "step": 29470 + }, + { + "epoch": 4.49676513671875e-05, + "step": 29470, + "training_step_time": 0.20954585075378418 + }, + { + "epoch": 4.496917724609375e-05, + "model_forward_time": 0.023836612701416016, + "step": 29471 + }, + { + "epoch": 4.496917724609375e-05, + "step": 29471, + "training_step_time": 0.21194839477539062 + }, + { + "epoch": 4.4970703125e-05, + "model_forward_time": 0.024641752243041992, + "step": 29472 + }, + { + "epoch": 4.4970703125e-05, + "step": 29472, + "training_step_time": 0.21080231666564941 + }, + { + "epoch": 4.497222900390625e-05, + "model_forward_time": 0.024664878845214844, + "step": 29473 + }, + { + "epoch": 4.497222900390625e-05, + "step": 29473, + "training_step_time": 0.22679853439331055 + }, + { + "epoch": 4.49737548828125e-05, + "model_forward_time": 0.024332046508789062, + "step": 29474 + }, + { + "epoch": 4.49737548828125e-05, + "step": 29474, + "training_step_time": 0.2137765884399414 + }, + { + "epoch": 4.497528076171875e-05, + "model_forward_time": 0.024348735809326172, + "step": 29475 + }, + { + "epoch": 4.497528076171875e-05, + "step": 29475, + "training_step_time": 0.22655749320983887 + }, + { + "epoch": 4.4976806640625e-05, + "model_forward_time": 0.02452254295349121, + "step": 29476 + }, + { + "epoch": 4.4976806640625e-05, + "step": 29476, + "training_step_time": 0.17042064666748047 + }, + { + "epoch": 4.497833251953125e-05, + "model_forward_time": 0.024749040603637695, + "step": 29477 + }, + { + "epoch": 4.497833251953125e-05, + "step": 29477, + "training_step_time": 0.15212535858154297 + }, + { + "epoch": 4.49798583984375e-05, + "model_forward_time": 0.024416208267211914, + "step": 29478 + }, + { + "epoch": 4.49798583984375e-05, + "step": 29478, + "training_step_time": 0.13606572151184082 + }, + { + "epoch": 4.498138427734375e-05, + "model_forward_time": 0.024572372436523438, + "step": 29479 + }, + { + "epoch": 4.498138427734375e-05, + "step": 29479, + "training_step_time": 0.10658812522888184 + }, + { + "epoch": 4.498291015625e-05, + "grad_norm": 0.03711557388305664, + "learning_rate": 8.211789490242261e-08, + "loss": 0.0077, + "step": 29480 + }, + { + "epoch": 4.498291015625e-05, + "model_forward_time": 0.025015592575073242, + "step": 29480 + }, + { + "epoch": 4.498291015625e-05, + "step": 29480, + "training_step_time": 0.10750842094421387 + }, + { + "epoch": 4.498443603515625e-05, + "model_forward_time": 0.02528238296508789, + "step": 29481 + }, + { + "epoch": 4.498443603515625e-05, + "step": 29481, + "training_step_time": 0.10809707641601562 + }, + { + "epoch": 4.49859619140625e-05, + "model_forward_time": 0.02579665184020996, + "step": 29482 + }, + { + "epoch": 4.49859619140625e-05, + "step": 29482, + "training_step_time": 0.10844707489013672 + }, + { + "epoch": 4.498748779296875e-05, + "model_forward_time": 0.025348424911499023, + "step": 29483 + }, + { + "epoch": 4.498748779296875e-05, + "step": 29483, + "training_step_time": 0.10738515853881836 + }, + { + "epoch": 4.4989013671875e-05, + "model_forward_time": 0.025777101516723633, + "step": 29484 + }, + { + "epoch": 4.4989013671875e-05, + "step": 29484, + "training_step_time": 0.10957574844360352 + }, + { + "epoch": 4.499053955078125e-05, + "model_forward_time": 0.02539372444152832, + "step": 29485 + }, + { + "epoch": 4.499053955078125e-05, + "step": 29485, + "training_step_time": 0.10946273803710938 + }, + { + "epoch": 4.49920654296875e-05, + "model_forward_time": 0.02535557746887207, + "step": 29486 + }, + { + "epoch": 4.49920654296875e-05, + "step": 29486, + "training_step_time": 0.10946774482727051 + }, + { + "epoch": 4.499359130859375e-05, + "model_forward_time": 0.025066137313842773, + "step": 29487 + }, + { + "epoch": 4.499359130859375e-05, + "step": 29487, + "training_step_time": 0.10408854484558105 + }, + { + "epoch": 4.49951171875e-05, + "model_forward_time": 0.025450468063354492, + "step": 29488 + }, + { + "epoch": 4.49951171875e-05, + "step": 29488, + "training_step_time": 0.10880017280578613 + }, + { + "epoch": 4.499664306640625e-05, + "model_forward_time": 0.027222633361816406, + "step": 29489 + }, + { + "epoch": 4.499664306640625e-05, + "step": 29489, + "training_step_time": 0.10672712326049805 + }, + { + "epoch": 4.49981689453125e-05, + "grad_norm": 0.07051877677440643, + "learning_rate": 7.899070725153613e-08, + "loss": 0.0041, + "step": 29490 + }, + { + "epoch": 4.49981689453125e-05, + "model_forward_time": 0.025384902954101562, + "step": 29490 + }, + { + "epoch": 4.49981689453125e-05, + "step": 29490, + "training_step_time": 0.10701346397399902 + }, + { + "epoch": 4.499969482421875e-05, + "model_forward_time": 0.028430700302124023, + "step": 29491 + }, + { + "epoch": 4.499969482421875e-05, + "step": 29491, + "training_step_time": 0.10626935958862305 + }, + { + "epoch": 4.5001220703125e-05, + "model_forward_time": 0.025125503540039062, + "step": 29492 + }, + { + "epoch": 4.5001220703125e-05, + "step": 29492, + "training_step_time": 0.1042487621307373 + }, + { + "epoch": 4.500274658203125e-05, + "model_forward_time": 0.02514815330505371, + "step": 29493 + }, + { + "epoch": 4.500274658203125e-05, + "step": 29493, + "training_step_time": 0.10399985313415527 + }, + { + "epoch": 4.50042724609375e-05, + "model_forward_time": 0.02517414093017578, + "step": 29494 + }, + { + "epoch": 4.50042724609375e-05, + "step": 29494, + "training_step_time": 0.1473388671875 + }, + { + "epoch": 4.500579833984375e-05, + "model_forward_time": 0.025235891342163086, + "step": 29495 + }, + { + "epoch": 4.500579833984375e-05, + "step": 29495, + "training_step_time": 0.1195220947265625 + }, + { + "epoch": 4.500732421875e-05, + "model_forward_time": 0.024762392044067383, + "step": 29496 + }, + { + "epoch": 4.500732421875e-05, + "step": 29496, + "training_step_time": 0.13084673881530762 + }, + { + "epoch": 4.500885009765625e-05, + "model_forward_time": 0.02756810188293457, + "step": 29497 + }, + { + "epoch": 4.500885009765625e-05, + "step": 29497, + "training_step_time": 0.21385765075683594 + }, + { + "epoch": 4.50103759765625e-05, + "model_forward_time": 0.0243072509765625, + "step": 29498 + }, + { + "epoch": 4.50103759765625e-05, + "step": 29498, + "training_step_time": 0.24418091773986816 + }, + { + "epoch": 4.501190185546875e-05, + "model_forward_time": 0.024507999420166016, + "step": 29499 + }, + { + "epoch": 4.501190185546875e-05, + "step": 29499, + "training_step_time": 0.20441913604736328 + }, + { + "epoch": 4.5013427734375e-05, + "grad_norm": 0.03889711946249008, + "learning_rate": 7.59241783341913e-08, + "loss": 0.0071, + "step": 29500 + }, + { + "epoch": 4.5013427734375e-05, + "model_forward_time": 0.024188995361328125, + "step": 29500 + }, + { + "epoch": 4.5013427734375e-05, + "step": 29500, + "training_step_time": 0.1918184757232666 + }, + { + "epoch": 4.501495361328125e-05, + "model_forward_time": 0.024157285690307617, + "step": 29501 + }, + { + "epoch": 4.501495361328125e-05, + "step": 29501, + "training_step_time": 0.20156264305114746 + }, + { + "epoch": 4.50164794921875e-05, + "model_forward_time": 0.024899959564208984, + "step": 29502 + }, + { + "epoch": 4.50164794921875e-05, + "step": 29502, + "training_step_time": 0.1621389389038086 + }, + { + "epoch": 4.501800537109375e-05, + "model_forward_time": 0.0247647762298584, + "step": 29503 + }, + { + "epoch": 4.501800537109375e-05, + "step": 29503, + "training_step_time": 0.11053681373596191 + }, + { + "epoch": 4.501953125e-05, + "model_forward_time": 0.024320602416992188, + "step": 29504 + }, + { + "epoch": 4.501953125e-05, + "step": 29504, + "training_step_time": 0.1356067657470703 + }, + { + "epoch": 4.502105712890625e-05, + "model_forward_time": 0.025597572326660156, + "step": 29505 + }, + { + "epoch": 4.502105712890625e-05, + "step": 29505, + "training_step_time": 0.19828391075134277 + }, + { + "epoch": 4.50225830078125e-05, + "model_forward_time": 0.024166584014892578, + "step": 29506 + }, + { + "epoch": 4.50225830078125e-05, + "step": 29506, + "training_step_time": 0.12172150611877441 + }, + { + "epoch": 4.502410888671875e-05, + "model_forward_time": 0.024943828582763672, + "step": 29507 + }, + { + "epoch": 4.502410888671875e-05, + "step": 29507, + "training_step_time": 0.14272189140319824 + }, + { + "epoch": 4.5025634765625e-05, + "model_forward_time": 0.02544379234313965, + "step": 29508 + }, + { + "epoch": 4.5025634765625e-05, + "step": 29508, + "training_step_time": 0.1332087516784668 + }, + { + "epoch": 4.502716064453125e-05, + "model_forward_time": 0.024133682250976562, + "step": 29509 + }, + { + "epoch": 4.502716064453125e-05, + "step": 29509, + "training_step_time": 0.12697267532348633 + }, + { + "epoch": 4.50286865234375e-05, + "grad_norm": 0.07993664592504501, + "learning_rate": 7.291831187649645e-08, + "loss": 0.0036, + "step": 29510 + }, + { + "epoch": 4.50286865234375e-05, + "model_forward_time": 0.024898529052734375, + "step": 29510 + }, + { + "epoch": 4.50286865234375e-05, + "step": 29510, + "training_step_time": 0.12034821510314941 + }, + { + "epoch": 4.503021240234375e-05, + "model_forward_time": 0.024936437606811523, + "step": 29511 + }, + { + "epoch": 4.503021240234375e-05, + "step": 29511, + "training_step_time": 0.11817789077758789 + }, + { + "epoch": 4.503173828125e-05, + "model_forward_time": 0.025554656982421875, + "step": 29512 + }, + { + "epoch": 4.503173828125e-05, + "step": 29512, + "training_step_time": 0.11660552024841309 + }, + { + "epoch": 4.503326416015625e-05, + "model_forward_time": 0.02495741844177246, + "step": 29513 + }, + { + "epoch": 4.503326416015625e-05, + "step": 29513, + "training_step_time": 0.17430377006530762 + }, + { + "epoch": 4.50347900390625e-05, + "model_forward_time": 0.024595022201538086, + "step": 29514 + }, + { + "epoch": 4.50347900390625e-05, + "step": 29514, + "training_step_time": 0.1102595329284668 + }, + { + "epoch": 4.503631591796875e-05, + "model_forward_time": 0.025981426239013672, + "step": 29515 + }, + { + "epoch": 4.503631591796875e-05, + "step": 29515, + "training_step_time": 0.1913909912109375 + }, + { + "epoch": 4.5037841796875e-05, + "model_forward_time": 0.025408506393432617, + "step": 29516 + }, + { + "epoch": 4.5037841796875e-05, + "step": 29516, + "training_step_time": 0.14170575141906738 + }, + { + "epoch": 4.503936767578125e-05, + "model_forward_time": 0.02487635612487793, + "step": 29517 + }, + { + "epoch": 4.503936767578125e-05, + "step": 29517, + "training_step_time": 0.20699381828308105 + }, + { + "epoch": 4.50408935546875e-05, + "model_forward_time": 0.024484872817993164, + "step": 29518 + }, + { + "epoch": 4.50408935546875e-05, + "step": 29518, + "training_step_time": 0.20782995223999023 + }, + { + "epoch": 4.504241943359375e-05, + "model_forward_time": 0.024587154388427734, + "step": 29519 + }, + { + "epoch": 4.504241943359375e-05, + "step": 29519, + "training_step_time": 0.1246800422668457 + }, + { + "epoch": 4.50439453125e-05, + "grad_norm": 0.05316556245088577, + "learning_rate": 6.997311153086883e-08, + "loss": 0.0031, + "step": 29520 + }, + { + "epoch": 4.50439453125e-05, + "model_forward_time": 0.02485966682434082, + "step": 29520 + }, + { + "epoch": 4.50439453125e-05, + "step": 29520, + "training_step_time": 0.12635016441345215 + }, + { + "epoch": 4.504547119140625e-05, + "model_forward_time": 0.025443553924560547, + "step": 29521 + }, + { + "epoch": 4.504547119140625e-05, + "step": 29521, + "training_step_time": 0.10416936874389648 + }, + { + "epoch": 4.50469970703125e-05, + "model_forward_time": 0.025396347045898438, + "step": 29522 + }, + { + "epoch": 4.50469970703125e-05, + "step": 29522, + "training_step_time": 0.10674047470092773 + }, + { + "epoch": 4.504852294921875e-05, + "model_forward_time": 0.025795936584472656, + "step": 29523 + }, + { + "epoch": 4.504852294921875e-05, + "step": 29523, + "training_step_time": 0.10595059394836426 + }, + { + "epoch": 4.5050048828125e-05, + "model_forward_time": 0.02550196647644043, + "step": 29524 + }, + { + "epoch": 4.5050048828125e-05, + "step": 29524, + "training_step_time": 0.10480141639709473 + }, + { + "epoch": 4.505157470703125e-05, + "model_forward_time": 0.02515101432800293, + "step": 29525 + }, + { + "epoch": 4.505157470703125e-05, + "step": 29525, + "training_step_time": 0.10499286651611328 + }, + { + "epoch": 4.50531005859375e-05, + "model_forward_time": 0.024863243103027344, + "step": 29526 + }, + { + "epoch": 4.50531005859375e-05, + "step": 29526, + "training_step_time": 0.10365819931030273 + }, + { + "epoch": 4.505462646484375e-05, + "model_forward_time": 0.0252988338470459, + "step": 29527 + }, + { + "epoch": 4.505462646484375e-05, + "step": 29527, + "training_step_time": 0.1051180362701416 + }, + { + "epoch": 4.505615234375e-05, + "model_forward_time": 0.025111675262451172, + "step": 29528 + }, + { + "epoch": 4.505615234375e-05, + "step": 29528, + "training_step_time": 0.11017584800720215 + }, + { + "epoch": 4.505767822265625e-05, + "model_forward_time": 0.025771379470825195, + "step": 29529 + }, + { + "epoch": 4.505767822265625e-05, + "step": 29529, + "training_step_time": 0.11001253128051758 + }, + { + "epoch": 4.50592041015625e-05, + "grad_norm": 0.03150056302547455, + "learning_rate": 6.708858087601244e-08, + "loss": 0.0058, + "step": 29530 + }, + { + "epoch": 4.50592041015625e-05, + "model_forward_time": 0.025542497634887695, + "step": 29530 + }, + { + "epoch": 4.50592041015625e-05, + "step": 29530, + "training_step_time": 0.10853767395019531 + }, + { + "epoch": 4.506072998046875e-05, + "model_forward_time": 0.025412797927856445, + "step": 29531 + }, + { + "epoch": 4.506072998046875e-05, + "step": 29531, + "training_step_time": 0.10640072822570801 + }, + { + "epoch": 4.5062255859375e-05, + "model_forward_time": 0.024363994598388672, + "step": 29532 + }, + { + "epoch": 4.5062255859375e-05, + "step": 29532, + "training_step_time": 0.10201001167297363 + }, + { + "epoch": 4.506378173828125e-05, + "model_forward_time": 0.02498936653137207, + "step": 29533 + }, + { + "epoch": 4.506378173828125e-05, + "step": 29533, + "training_step_time": 0.10501909255981445 + }, + { + "epoch": 4.50653076171875e-05, + "model_forward_time": 0.025330781936645508, + "step": 29534 + }, + { + "epoch": 4.50653076171875e-05, + "step": 29534, + "training_step_time": 0.10584712028503418 + }, + { + "epoch": 4.506683349609375e-05, + "model_forward_time": 0.025069475173950195, + "step": 29535 + }, + { + "epoch": 4.506683349609375e-05, + "step": 29535, + "training_step_time": 0.13206076622009277 + }, + { + "epoch": 4.5068359375e-05, + "model_forward_time": 0.02539682388305664, + "step": 29536 + }, + { + "epoch": 4.5068359375e-05, + "step": 29536, + "training_step_time": 0.11951661109924316 + }, + { + "epoch": 4.506988525390625e-05, + "model_forward_time": 0.024988651275634766, + "step": 29537 + }, + { + "epoch": 4.506988525390625e-05, + "step": 29537, + "training_step_time": 0.12917017936706543 + }, + { + "epoch": 4.50714111328125e-05, + "model_forward_time": 0.02502751350402832, + "step": 29538 + }, + { + "epoch": 4.50714111328125e-05, + "step": 29538, + "training_step_time": 0.15400075912475586 + }, + { + "epoch": 4.507293701171875e-05, + "model_forward_time": 0.02503514289855957, + "step": 29539 + }, + { + "epoch": 4.507293701171875e-05, + "step": 29539, + "training_step_time": 0.21957707405090332 + }, + { + "epoch": 4.5074462890625e-05, + "grad_norm": 0.022882062941789627, + "learning_rate": 6.426472341689027e-08, + "loss": 0.0123, + "step": 29540 + }, + { + "epoch": 4.5074462890625e-05, + "model_forward_time": 0.02451300621032715, + "step": 29540 + }, + { + "epoch": 4.5074462890625e-05, + "step": 29540, + "training_step_time": 0.10738420486450195 + }, + { + "epoch": 4.507598876953125e-05, + "model_forward_time": 0.0245819091796875, + "step": 29541 + }, + { + "epoch": 4.507598876953125e-05, + "step": 29541, + "training_step_time": 0.10121941566467285 + }, + { + "epoch": 4.50775146484375e-05, + "model_forward_time": 0.02523064613342285, + "step": 29542 + }, + { + "epoch": 4.50775146484375e-05, + "step": 29542, + "training_step_time": 0.10693168640136719 + }, + { + "epoch": 4.507904052734375e-05, + "model_forward_time": 0.025423049926757812, + "step": 29543 + }, + { + "epoch": 4.507904052734375e-05, + "step": 29543, + "training_step_time": 0.13923025131225586 + }, + { + "epoch": 4.508056640625e-05, + "model_forward_time": 0.024567842483520508, + "step": 29544 + }, + { + "epoch": 4.508056640625e-05, + "step": 29544, + "training_step_time": 0.2061014175415039 + }, + { + "epoch": 4.508209228515625e-05, + "model_forward_time": 0.024405717849731445, + "step": 29545 + }, + { + "epoch": 4.508209228515625e-05, + "step": 29545, + "training_step_time": 0.22521543502807617 + }, + { + "epoch": 4.50836181640625e-05, + "model_forward_time": 0.02455282211303711, + "step": 29546 + }, + { + "epoch": 4.50836181640625e-05, + "step": 29546, + "training_step_time": 0.19358158111572266 + }, + { + "epoch": 4.508514404296875e-05, + "model_forward_time": 0.02466416358947754, + "step": 29547 + }, + { + "epoch": 4.508514404296875e-05, + "step": 29547, + "training_step_time": 0.17590022087097168 + }, + { + "epoch": 4.5086669921875e-05, + "model_forward_time": 0.0241241455078125, + "step": 29548 + }, + { + "epoch": 4.5086669921875e-05, + "step": 29548, + "training_step_time": 0.16410207748413086 + }, + { + "epoch": 4.508819580078125e-05, + "model_forward_time": 0.02409958839416504, + "step": 29549 + }, + { + "epoch": 4.508819580078125e-05, + "step": 29549, + "training_step_time": 0.15457534790039062 + }, + { + "epoch": 4.50897216796875e-05, + "grad_norm": 0.027359770610928535, + "learning_rate": 6.150154258476315e-08, + "loss": 0.0042, + "step": 29550 + }, + { + "epoch": 4.50897216796875e-05, + "model_forward_time": 0.024699926376342773, + "step": 29550 + }, + { + "epoch": 4.50897216796875e-05, + "step": 29550, + "training_step_time": 0.10277676582336426 + }, + { + "epoch": 4.509124755859375e-05, + "model_forward_time": 0.024931669235229492, + "step": 29551 + }, + { + "epoch": 4.509124755859375e-05, + "step": 29551, + "training_step_time": 0.10227799415588379 + }, + { + "epoch": 4.50927734375e-05, + "model_forward_time": 0.025644302368164062, + "step": 29552 + }, + { + "epoch": 4.50927734375e-05, + "step": 29552, + "training_step_time": 0.10514211654663086 + }, + { + "epoch": 4.509429931640625e-05, + "model_forward_time": 0.025731801986694336, + "step": 29553 + }, + { + "epoch": 4.509429931640625e-05, + "step": 29553, + "training_step_time": 0.1040337085723877 + }, + { + "epoch": 4.50958251953125e-05, + "model_forward_time": 0.025430917739868164, + "step": 29554 + }, + { + "epoch": 4.50958251953125e-05, + "step": 29554, + "training_step_time": 0.10729837417602539 + }, + { + "epoch": 4.509735107421875e-05, + "model_forward_time": 0.02573871612548828, + "step": 29555 + }, + { + "epoch": 4.509735107421875e-05, + "step": 29555, + "training_step_time": 0.12199234962463379 + }, + { + "epoch": 4.5098876953125e-05, + "model_forward_time": 0.025090456008911133, + "step": 29556 + }, + { + "epoch": 4.5098876953125e-05, + "step": 29556, + "training_step_time": 0.10915231704711914 + }, + { + "epoch": 4.510040283203125e-05, + "model_forward_time": 0.025728940963745117, + "step": 29557 + }, + { + "epoch": 4.510040283203125e-05, + "step": 29557, + "training_step_time": 0.18514370918273926 + }, + { + "epoch": 4.51019287109375e-05, + "model_forward_time": 0.024450063705444336, + "step": 29558 + }, + { + "epoch": 4.51019287109375e-05, + "step": 29558, + "training_step_time": 0.20688199996948242 + }, + { + "epoch": 4.510345458984375e-05, + "model_forward_time": 0.024922847747802734, + "step": 29559 + }, + { + "epoch": 4.510345458984375e-05, + "step": 29559, + "training_step_time": 0.22192049026489258 + }, + { + "epoch": 4.510498046875e-05, + "grad_norm": 0.02034337818622589, + "learning_rate": 5.8799041737150896e-08, + "loss": 0.0025, + "step": 29560 + }, + { + "epoch": 4.510498046875e-05, + "model_forward_time": 0.02508687973022461, + "step": 29560 + }, + { + "epoch": 4.510498046875e-05, + "step": 29560, + "training_step_time": 0.13025856018066406 + }, + { + "epoch": 4.510650634765625e-05, + "model_forward_time": 0.02457284927368164, + "step": 29561 + }, + { + "epoch": 4.510650634765625e-05, + "step": 29561, + "training_step_time": 0.12818384170532227 + }, + { + "epoch": 4.51080322265625e-05, + "model_forward_time": 0.02541184425354004, + "step": 29562 + }, + { + "epoch": 4.51080322265625e-05, + "step": 29562, + "training_step_time": 0.12350058555603027 + }, + { + "epoch": 4.510955810546875e-05, + "model_forward_time": 0.024986982345581055, + "step": 29563 + }, + { + "epoch": 4.510955810546875e-05, + "step": 29563, + "training_step_time": 0.11614370346069336 + }, + { + "epoch": 4.5111083984375e-05, + "model_forward_time": 0.027311086654663086, + "step": 29564 + }, + { + "epoch": 4.5111083984375e-05, + "step": 29564, + "training_step_time": 0.1427912712097168 + }, + { + "epoch": 4.511260986328125e-05, + "model_forward_time": 0.02537822723388672, + "step": 29565 + }, + { + "epoch": 4.511260986328125e-05, + "step": 29565, + "training_step_time": 0.14483189582824707 + }, + { + "epoch": 4.51141357421875e-05, + "model_forward_time": 0.02461385726928711, + "step": 29566 + }, + { + "epoch": 4.51141357421875e-05, + "step": 29566, + "training_step_time": 0.1836996078491211 + }, + { + "epoch": 4.511566162109375e-05, + "model_forward_time": 0.02485799789428711, + "step": 29567 + }, + { + "epoch": 4.511566162109375e-05, + "step": 29567, + "training_step_time": 0.1797933578491211 + }, + { + "epoch": 4.51171875e-05, + "model_forward_time": 0.024726152420043945, + "step": 29568 + }, + { + "epoch": 4.51171875e-05, + "step": 29568, + "training_step_time": 0.18574166297912598 + }, + { + "epoch": 4.511871337890625e-05, + "model_forward_time": 0.025325536727905273, + "step": 29569 + }, + { + "epoch": 4.511871337890625e-05, + "step": 29569, + "training_step_time": 0.1748056411743164 + }, + { + "epoch": 4.51202392578125e-05, + "grad_norm": 0.03413934260606766, + "learning_rate": 5.615722415785451e-08, + "loss": 0.0064, + "step": 29570 + }, + { + "epoch": 4.51202392578125e-05, + "model_forward_time": 0.024906635284423828, + "step": 29570 + }, + { + "epoch": 4.51202392578125e-05, + "step": 29570, + "training_step_time": 0.15620708465576172 + }, + { + "epoch": 4.512176513671875e-05, + "model_forward_time": 0.024425268173217773, + "step": 29571 + }, + { + "epoch": 4.512176513671875e-05, + "step": 29571, + "training_step_time": 0.13498377799987793 + }, + { + "epoch": 4.5123291015625e-05, + "model_forward_time": 0.024925947189331055, + "step": 29572 + }, + { + "epoch": 4.5123291015625e-05, + "step": 29572, + "training_step_time": 0.13255739212036133 + }, + { + "epoch": 4.512481689453125e-05, + "model_forward_time": 0.024873971939086914, + "step": 29573 + }, + { + "epoch": 4.512481689453125e-05, + "step": 29573, + "training_step_time": 0.1482563018798828 + }, + { + "epoch": 4.51263427734375e-05, + "model_forward_time": 0.025078296661376953, + "step": 29574 + }, + { + "epoch": 4.51263427734375e-05, + "step": 29574, + "training_step_time": 0.11328339576721191 + }, + { + "epoch": 4.512786865234375e-05, + "model_forward_time": 0.025264263153076172, + "step": 29575 + }, + { + "epoch": 4.512786865234375e-05, + "step": 29575, + "training_step_time": 0.12916111946105957 + }, + { + "epoch": 4.512939453125e-05, + "model_forward_time": 0.025290966033935547, + "step": 29576 + }, + { + "epoch": 4.512939453125e-05, + "step": 29576, + "training_step_time": 0.15732574462890625 + }, + { + "epoch": 4.513092041015625e-05, + "model_forward_time": 0.024350643157958984, + "step": 29577 + }, + { + "epoch": 4.513092041015625e-05, + "step": 29577, + "training_step_time": 0.21616196632385254 + }, + { + "epoch": 4.51324462890625e-05, + "model_forward_time": 0.024686098098754883, + "step": 29578 + }, + { + "epoch": 4.51324462890625e-05, + "step": 29578, + "training_step_time": 0.11483907699584961 + }, + { + "epoch": 4.513397216796875e-05, + "model_forward_time": 0.025191783905029297, + "step": 29579 + }, + { + "epoch": 4.513397216796875e-05, + "step": 29579, + "training_step_time": 0.10467362403869629 + }, + { + "epoch": 4.5135498046875e-05, + "grad_norm": 0.03096199594438076, + "learning_rate": 5.3576093056922906e-08, + "loss": 0.0061, + "step": 29580 + }, + { + "epoch": 4.5135498046875e-05, + "model_forward_time": 0.024056673049926758, + "step": 29580 + }, + { + "epoch": 4.5135498046875e-05, + "step": 29580, + "training_step_time": 0.10682868957519531 + }, + { + "epoch": 4.513702392578125e-05, + "model_forward_time": 0.02612137794494629, + "step": 29581 + }, + { + "epoch": 4.513702392578125e-05, + "step": 29581, + "training_step_time": 0.10569047927856445 + }, + { + "epoch": 4.51385498046875e-05, + "model_forward_time": 0.026274442672729492, + "step": 29582 + }, + { + "epoch": 4.51385498046875e-05, + "step": 29582, + "training_step_time": 0.14847588539123535 + }, + { + "epoch": 4.514007568359375e-05, + "model_forward_time": 0.025204181671142578, + "step": 29583 + }, + { + "epoch": 4.514007568359375e-05, + "step": 29583, + "training_step_time": 0.16208243370056152 + }, + { + "epoch": 4.51416015625e-05, + "model_forward_time": 0.02490854263305664, + "step": 29584 + }, + { + "epoch": 4.51416015625e-05, + "step": 29584, + "training_step_time": 0.1108555793762207 + }, + { + "epoch": 4.514312744140625e-05, + "model_forward_time": 0.02445054054260254, + "step": 29585 + }, + { + "epoch": 4.514312744140625e-05, + "step": 29585, + "training_step_time": 0.14454984664916992 + }, + { + "epoch": 4.51446533203125e-05, + "model_forward_time": 0.025461196899414062, + "step": 29586 + }, + { + "epoch": 4.51446533203125e-05, + "step": 29586, + "training_step_time": 0.20086145401000977 + }, + { + "epoch": 4.514617919921875e-05, + "model_forward_time": 0.025015830993652344, + "step": 29587 + }, + { + "epoch": 4.514617919921875e-05, + "step": 29587, + "training_step_time": 0.10629844665527344 + }, + { + "epoch": 4.5147705078125e-05, + "model_forward_time": 0.025182247161865234, + "step": 29588 + }, + { + "epoch": 4.5147705078125e-05, + "step": 29588, + "training_step_time": 0.10264205932617188 + }, + { + "epoch": 4.514923095703125e-05, + "model_forward_time": 0.02554774284362793, + "step": 29589 + }, + { + "epoch": 4.514923095703125e-05, + "step": 29589, + "training_step_time": 0.1043248176574707 + }, + { + "epoch": 4.51507568359375e-05, + "grad_norm": 0.03093460574746132, + "learning_rate": 5.105565157068615e-08, + "loss": 0.0046, + "step": 29590 + }, + { + "epoch": 4.51507568359375e-05, + "model_forward_time": 0.025167226791381836, + "step": 29590 + }, + { + "epoch": 4.51507568359375e-05, + "step": 29590, + "training_step_time": 0.10372567176818848 + }, + { + "epoch": 4.515228271484375e-05, + "model_forward_time": 0.025574922561645508, + "step": 29591 + }, + { + "epoch": 4.515228271484375e-05, + "step": 29591, + "training_step_time": 0.1045234203338623 + }, + { + "epoch": 4.515380859375e-05, + "model_forward_time": 0.025262117385864258, + "step": 29592 + }, + { + "epoch": 4.515380859375e-05, + "step": 29592, + "training_step_time": 0.10402798652648926 + }, + { + "epoch": 4.515533447265625e-05, + "model_forward_time": 0.025628328323364258, + "step": 29593 + }, + { + "epoch": 4.515533447265625e-05, + "step": 29593, + "training_step_time": 0.10593795776367188 + }, + { + "epoch": 4.51568603515625e-05, + "model_forward_time": 0.025805950164794922, + "step": 29594 + }, + { + "epoch": 4.51568603515625e-05, + "step": 29594, + "training_step_time": 0.10753583908081055 + }, + { + "epoch": 4.515838623046875e-05, + "model_forward_time": 0.025567054748535156, + "step": 29595 + }, + { + "epoch": 4.515838623046875e-05, + "step": 29595, + "training_step_time": 0.10724234580993652 + }, + { + "epoch": 4.5159912109375e-05, + "model_forward_time": 0.025744915008544922, + "step": 29596 + }, + { + "epoch": 4.5159912109375e-05, + "step": 29596, + "training_step_time": 0.17777085304260254 + }, + { + "epoch": 4.516143798828125e-05, + "model_forward_time": 0.024701356887817383, + "step": 29597 + }, + { + "epoch": 4.516143798828125e-05, + "step": 29597, + "training_step_time": 0.15043044090270996 + }, + { + "epoch": 4.51629638671875e-05, + "model_forward_time": 0.024866342544555664, + "step": 29598 + }, + { + "epoch": 4.51629638671875e-05, + "step": 29598, + "training_step_time": 0.13985466957092285 + }, + { + "epoch": 4.516448974609375e-05, + "model_forward_time": 0.024860382080078125, + "step": 29599 + }, + { + "epoch": 4.516448974609375e-05, + "step": 29599, + "training_step_time": 0.21168065071105957 + }, + { + "epoch": 4.5166015625e-05, + "grad_norm": 0.10285698622465134, + "learning_rate": 4.859590276170556e-08, + "loss": 0.0025, + "step": 29600 + }, + { + "epoch": 4.5166015625e-05, + "model_forward_time": 0.024628877639770508, + "step": 29600 + }, + { + "epoch": 4.5166015625e-05, + "step": 29600, + "training_step_time": 0.20354580879211426 + }, + { + "epoch": 4.516754150390625e-05, + "model_forward_time": 0.0248110294342041, + "step": 29601 + }, + { + "epoch": 4.516754150390625e-05, + "step": 29601, + "training_step_time": 0.10490560531616211 + }, + { + "epoch": 4.51690673828125e-05, + "model_forward_time": 0.027085065841674805, + "step": 29602 + }, + { + "epoch": 4.51690673828125e-05, + "step": 29602, + "training_step_time": 0.1075284481048584 + }, + { + "epoch": 4.517059326171875e-05, + "model_forward_time": 0.025545835494995117, + "step": 29603 + }, + { + "epoch": 4.517059326171875e-05, + "step": 29603, + "training_step_time": 0.11116170883178711 + }, + { + "epoch": 4.5172119140625e-05, + "model_forward_time": 0.025629758834838867, + "step": 29604 + }, + { + "epoch": 4.5172119140625e-05, + "step": 29604, + "training_step_time": 0.11196136474609375 + }, + { + "epoch": 4.517364501953125e-05, + "model_forward_time": 0.025400876998901367, + "step": 29605 + }, + { + "epoch": 4.517364501953125e-05, + "step": 29605, + "training_step_time": 0.1323997974395752 + }, + { + "epoch": 4.51751708984375e-05, + "model_forward_time": 0.02507615089416504, + "step": 29606 + }, + { + "epoch": 4.51751708984375e-05, + "step": 29606, + "training_step_time": 0.10466551780700684 + }, + { + "epoch": 4.517669677734375e-05, + "model_forward_time": 0.025432348251342773, + "step": 29607 + }, + { + "epoch": 4.517669677734375e-05, + "step": 29607, + "training_step_time": 0.10953330993652344 + }, + { + "epoch": 4.517822265625e-05, + "model_forward_time": 0.02554011344909668, + "step": 29608 + }, + { + "epoch": 4.517822265625e-05, + "step": 29608, + "training_step_time": 0.10809111595153809 + }, + { + "epoch": 4.517974853515625e-05, + "model_forward_time": 0.025523900985717773, + "step": 29609 + }, + { + "epoch": 4.517974853515625e-05, + "step": 29609, + "training_step_time": 0.11200356483459473 + }, + { + "epoch": 4.51812744140625e-05, + "grad_norm": 0.03034748136997223, + "learning_rate": 4.619684961881254e-08, + "loss": 0.0018, + "step": 29610 + }, + { + "epoch": 4.51812744140625e-05, + "model_forward_time": 0.02568197250366211, + "step": 29610 + }, + { + "epoch": 4.51812744140625e-05, + "step": 29610, + "training_step_time": 0.10518407821655273 + }, + { + "epoch": 4.518280029296875e-05, + "model_forward_time": 0.025279521942138672, + "step": 29611 + }, + { + "epoch": 4.518280029296875e-05, + "step": 29611, + "training_step_time": 0.10691499710083008 + }, + { + "epoch": 4.5184326171875e-05, + "model_forward_time": 0.025506258010864258, + "step": 29612 + }, + { + "epoch": 4.5184326171875e-05, + "step": 29612, + "training_step_time": 0.10376715660095215 + }, + { + "epoch": 4.518585205078125e-05, + "model_forward_time": 0.025234460830688477, + "step": 29613 + }, + { + "epoch": 4.518585205078125e-05, + "step": 29613, + "training_step_time": 0.10405778884887695 + }, + { + "epoch": 4.51873779296875e-05, + "model_forward_time": 0.025545597076416016, + "step": 29614 + }, + { + "epoch": 4.51873779296875e-05, + "step": 29614, + "training_step_time": 0.10643482208251953 + }, + { + "epoch": 4.518890380859375e-05, + "model_forward_time": 0.026504993438720703, + "step": 29615 + }, + { + "epoch": 4.518890380859375e-05, + "step": 29615, + "training_step_time": 0.10744071006774902 + }, + { + "epoch": 4.51904296875e-05, + "model_forward_time": 0.025057554244995117, + "step": 29616 + }, + { + "epoch": 4.51904296875e-05, + "step": 29616, + "training_step_time": 0.10919523239135742 + }, + { + "epoch": 4.519195556640625e-05, + "model_forward_time": 0.025006771087646484, + "step": 29617 + }, + { + "epoch": 4.519195556640625e-05, + "step": 29617, + "training_step_time": 0.10374331474304199 + }, + { + "epoch": 4.51934814453125e-05, + "model_forward_time": 0.025271177291870117, + "step": 29618 + }, + { + "epoch": 4.51934814453125e-05, + "step": 29618, + "training_step_time": 0.15247654914855957 + }, + { + "epoch": 4.519500732421875e-05, + "model_forward_time": 0.025696992874145508, + "step": 29619 + }, + { + "epoch": 4.519500732421875e-05, + "step": 29619, + "training_step_time": 0.1170954704284668 + }, + { + "epoch": 4.5196533203125e-05, + "grad_norm": 0.082671158015728, + "learning_rate": 4.385849505708084e-08, + "loss": 0.0028, + "step": 29620 + }, + { + "epoch": 4.5196533203125e-05, + "model_forward_time": 0.024617433547973633, + "step": 29620 + }, + { + "epoch": 4.5196533203125e-05, + "step": 29620, + "training_step_time": 0.15830063819885254 + }, + { + "epoch": 4.519805908203125e-05, + "model_forward_time": 0.02474188804626465, + "step": 29621 + }, + { + "epoch": 4.519805908203125e-05, + "step": 29621, + "training_step_time": 0.18254399299621582 + }, + { + "epoch": 4.51995849609375e-05, + "model_forward_time": 0.024033784866333008, + "step": 29622 + }, + { + "epoch": 4.51995849609375e-05, + "step": 29622, + "training_step_time": 0.15720224380493164 + }, + { + "epoch": 4.520111083984375e-05, + "model_forward_time": 0.024799108505249023, + "step": 29623 + }, + { + "epoch": 4.520111083984375e-05, + "step": 29623, + "training_step_time": 0.11114501953125 + }, + { + "epoch": 4.520263671875e-05, + "model_forward_time": 0.025066375732421875, + "step": 29624 + }, + { + "epoch": 4.520263671875e-05, + "step": 29624, + "training_step_time": 0.10458064079284668 + }, + { + "epoch": 4.520416259765625e-05, + "model_forward_time": 0.025496482849121094, + "step": 29625 + }, + { + "epoch": 4.520416259765625e-05, + "step": 29625, + "training_step_time": 0.10793375968933105 + }, + { + "epoch": 4.52056884765625e-05, + "model_forward_time": 0.025423049926757812, + "step": 29626 + }, + { + "epoch": 4.52056884765625e-05, + "step": 29626, + "training_step_time": 0.10202431678771973 + }, + { + "epoch": 4.520721435546875e-05, + "model_forward_time": 0.025261640548706055, + "step": 29627 + }, + { + "epoch": 4.520721435546875e-05, + "step": 29627, + "training_step_time": 0.15013718605041504 + }, + { + "epoch": 4.5208740234375e-05, + "model_forward_time": 0.02523207664489746, + "step": 29628 + }, + { + "epoch": 4.5208740234375e-05, + "step": 29628, + "training_step_time": 0.15972375869750977 + }, + { + "epoch": 4.521026611328125e-05, + "model_forward_time": 0.02500176429748535, + "step": 29629 + }, + { + "epoch": 4.521026611328125e-05, + "step": 29629, + "training_step_time": 0.11524581909179688 + }, + { + "epoch": 4.52117919921875e-05, + "grad_norm": 0.06548045575618744, + "learning_rate": 4.158084191783762e-08, + "loss": 0.0072, + "step": 29630 + }, + { + "epoch": 4.52117919921875e-05, + "model_forward_time": 0.025054454803466797, + "step": 29630 + }, + { + "epoch": 4.52117919921875e-05, + "step": 29630, + "training_step_time": 0.12883996963500977 + }, + { + "epoch": 4.521331787109375e-05, + "model_forward_time": 0.02620863914489746, + "step": 29631 + }, + { + "epoch": 4.521331787109375e-05, + "step": 29631, + "training_step_time": 0.13492131233215332 + }, + { + "epoch": 4.521484375e-05, + "model_forward_time": 0.025491952896118164, + "step": 29632 + }, + { + "epoch": 4.521484375e-05, + "step": 29632, + "training_step_time": 0.13121461868286133 + }, + { + "epoch": 4.521636962890625e-05, + "model_forward_time": 0.025008440017700195, + "step": 29633 + }, + { + "epoch": 4.521636962890625e-05, + "step": 29633, + "training_step_time": 0.1312880516052246 + }, + { + "epoch": 4.52178955078125e-05, + "model_forward_time": 0.02568984031677246, + "step": 29634 + }, + { + "epoch": 4.52178955078125e-05, + "step": 29634, + "training_step_time": 0.12571215629577637 + }, + { + "epoch": 4.521942138671875e-05, + "model_forward_time": 0.0250089168548584, + "step": 29635 + }, + { + "epoch": 4.521942138671875e-05, + "step": 29635, + "training_step_time": 0.12392401695251465 + }, + { + "epoch": 4.5220947265625e-05, + "model_forward_time": 0.02521538734436035, + "step": 29636 + }, + { + "epoch": 4.5220947265625e-05, + "step": 29636, + "training_step_time": 0.12266349792480469 + }, + { + "epoch": 4.522247314453125e-05, + "model_forward_time": 0.025313615798950195, + "step": 29637 + }, + { + "epoch": 4.522247314453125e-05, + "step": 29637, + "training_step_time": 0.12054300308227539 + }, + { + "epoch": 4.52239990234375e-05, + "model_forward_time": 0.02552962303161621, + "step": 29638 + }, + { + "epoch": 4.52239990234375e-05, + "step": 29638, + "training_step_time": 0.11164546012878418 + }, + { + "epoch": 4.522552490234375e-05, + "model_forward_time": 0.025350093841552734, + "step": 29639 + }, + { + "epoch": 4.522552490234375e-05, + "step": 29639, + "training_step_time": 0.11586546897888184 + }, + { + "epoch": 4.522705078125e-05, + "grad_norm": 0.09248725324869156, + "learning_rate": 3.936389296864129e-08, + "loss": 0.0031, + "step": 29640 + }, + { + "epoch": 4.522705078125e-05, + "model_forward_time": 0.025119304656982422, + "step": 29640 + }, + { + "epoch": 4.522705078125e-05, + "step": 29640, + "training_step_time": 0.13777756690979004 + }, + { + "epoch": 4.522857666015625e-05, + "model_forward_time": 0.02499556541442871, + "step": 29641 + }, + { + "epoch": 4.522857666015625e-05, + "step": 29641, + "training_step_time": 0.13263726234436035 + }, + { + "epoch": 4.52301025390625e-05, + "model_forward_time": 0.024793386459350586, + "step": 29642 + }, + { + "epoch": 4.52301025390625e-05, + "step": 29642, + "training_step_time": 0.10691165924072266 + }, + { + "epoch": 4.523162841796875e-05, + "model_forward_time": 0.02570033073425293, + "step": 29643 + }, + { + "epoch": 4.523162841796875e-05, + "step": 29643, + "training_step_time": 0.11609125137329102 + }, + { + "epoch": 4.5233154296875e-05, + "model_forward_time": 0.02536320686340332, + "step": 29644 + }, + { + "epoch": 4.5233154296875e-05, + "step": 29644, + "training_step_time": 0.17969465255737305 + }, + { + "epoch": 4.523468017578125e-05, + "model_forward_time": 0.025518178939819336, + "step": 29645 + }, + { + "epoch": 4.523468017578125e-05, + "step": 29645, + "training_step_time": 0.21148180961608887 + }, + { + "epoch": 4.52362060546875e-05, + "model_forward_time": 0.02523660659790039, + "step": 29646 + }, + { + "epoch": 4.52362060546875e-05, + "step": 29646, + "training_step_time": 0.17023825645446777 + }, + { + "epoch": 4.523773193359375e-05, + "model_forward_time": 0.02474236488342285, + "step": 29647 + }, + { + "epoch": 4.523773193359375e-05, + "step": 29647, + "training_step_time": 0.15087556838989258 + }, + { + "epoch": 4.52392578125e-05, + "model_forward_time": 0.025370359420776367, + "step": 29648 + }, + { + "epoch": 4.52392578125e-05, + "step": 29648, + "training_step_time": 0.1161494255065918 + }, + { + "epoch": 4.524078369140625e-05, + "model_forward_time": 0.025065898895263672, + "step": 29649 + }, + { + "epoch": 4.524078369140625e-05, + "step": 29649, + "training_step_time": 0.13740086555480957 + }, + { + "epoch": 4.52423095703125e-05, + "grad_norm": 0.024560654535889626, + "learning_rate": 3.720765090329814e-08, + "loss": 0.0065, + "step": 29650 + }, + { + "epoch": 4.52423095703125e-05, + "model_forward_time": 0.025783538818359375, + "step": 29650 + }, + { + "epoch": 4.52423095703125e-05, + "step": 29650, + "training_step_time": 0.10371065139770508 + }, + { + "epoch": 4.524383544921875e-05, + "model_forward_time": 0.025590181350708008, + "step": 29651 + }, + { + "epoch": 4.524383544921875e-05, + "step": 29651, + "training_step_time": 0.11135268211364746 + }, + { + "epoch": 4.5245361328125e-05, + "model_forward_time": 0.025798559188842773, + "step": 29652 + }, + { + "epoch": 4.5245361328125e-05, + "step": 29652, + "training_step_time": 0.10965442657470703 + }, + { + "epoch": 4.524688720703125e-05, + "model_forward_time": 0.028910160064697266, + "step": 29653 + }, + { + "epoch": 4.524688720703125e-05, + "step": 29653, + "training_step_time": 0.10869574546813965 + }, + { + "epoch": 4.52484130859375e-05, + "model_forward_time": 0.025638580322265625, + "step": 29654 + }, + { + "epoch": 4.52484130859375e-05, + "step": 29654, + "training_step_time": 0.10786223411560059 + }, + { + "epoch": 4.524993896484375e-05, + "model_forward_time": 0.02565622329711914, + "step": 29655 + }, + { + "epoch": 4.524993896484375e-05, + "step": 29655, + "training_step_time": 0.10527157783508301 + }, + { + "epoch": 4.525146484375e-05, + "model_forward_time": 0.027047395706176758, + "step": 29656 + }, + { + "epoch": 4.525146484375e-05, + "step": 29656, + "training_step_time": 0.11253166198730469 + }, + { + "epoch": 4.525299072265625e-05, + "model_forward_time": 0.025848388671875, + "step": 29657 + }, + { + "epoch": 4.525299072265625e-05, + "step": 29657, + "training_step_time": 0.10574674606323242 + }, + { + "epoch": 4.52545166015625e-05, + "model_forward_time": 0.02568221092224121, + "step": 29658 + }, + { + "epoch": 4.52545166015625e-05, + "step": 29658, + "training_step_time": 0.1050422191619873 + }, + { + "epoch": 4.525604248046875e-05, + "model_forward_time": 0.025738239288330078, + "step": 29659 + }, + { + "epoch": 4.525604248046875e-05, + "step": 29659, + "training_step_time": 0.10905933380126953 + }, + { + "epoch": 4.5257568359375e-05, + "grad_norm": 0.02502184920012951, + "learning_rate": 3.511211834184014e-08, + "loss": 0.0081, + "step": 29660 + }, + { + "epoch": 4.5257568359375e-05, + "model_forward_time": 0.025752544403076172, + "step": 29660 + }, + { + "epoch": 4.5257568359375e-05, + "step": 29660, + "training_step_time": 0.1080021858215332 + }, + { + "epoch": 4.525909423828125e-05, + "model_forward_time": 0.025728464126586914, + "step": 29661 + }, + { + "epoch": 4.525909423828125e-05, + "step": 29661, + "training_step_time": 0.2097759246826172 + }, + { + "epoch": 4.52606201171875e-05, + "model_forward_time": 0.024498462677001953, + "step": 29662 + }, + { + "epoch": 4.52606201171875e-05, + "step": 29662, + "training_step_time": 0.12109589576721191 + }, + { + "epoch": 4.526214599609375e-05, + "model_forward_time": 0.024850845336914062, + "step": 29663 + }, + { + "epoch": 4.526214599609375e-05, + "step": 29663, + "training_step_time": 0.12487363815307617 + }, + { + "epoch": 4.5263671875e-05, + "model_forward_time": 0.026038646697998047, + "step": 29664 + }, + { + "epoch": 4.5263671875e-05, + "step": 29664, + "training_step_time": 0.1618044376373291 + }, + { + "epoch": 4.526519775390625e-05, + "model_forward_time": 0.025014877319335938, + "step": 29665 + }, + { + "epoch": 4.526519775390625e-05, + "step": 29665, + "training_step_time": 0.21860194206237793 + }, + { + "epoch": 4.52667236328125e-05, + "model_forward_time": 0.026047945022583008, + "step": 29666 + }, + { + "epoch": 4.52667236328125e-05, + "step": 29666, + "training_step_time": 0.10300517082214355 + }, + { + "epoch": 4.526824951171875e-05, + "model_forward_time": 0.024447202682495117, + "step": 29667 + }, + { + "epoch": 4.526824951171875e-05, + "step": 29667, + "training_step_time": 0.10590958595275879 + }, + { + "epoch": 4.5269775390625e-05, + "model_forward_time": 0.025720596313476562, + "step": 29668 + }, + { + "epoch": 4.5269775390625e-05, + "step": 29668, + "training_step_time": 0.10593175888061523 + }, + { + "epoch": 4.527130126953125e-05, + "model_forward_time": 0.025516748428344727, + "step": 29669 + }, + { + "epoch": 4.527130126953125e-05, + "step": 29669, + "training_step_time": 0.10470247268676758 + }, + { + "epoch": 4.52728271484375e-05, + "grad_norm": 0.11808640509843826, + "learning_rate": 3.3077297830541584e-08, + "loss": 0.0051, + "step": 29670 + }, + { + "epoch": 4.52728271484375e-05, + "model_forward_time": 0.025986194610595703, + "step": 29670 + }, + { + "epoch": 4.52728271484375e-05, + "step": 29670, + "training_step_time": 0.10517525672912598 + }, + { + "epoch": 4.527435302734375e-05, + "model_forward_time": 0.025160789489746094, + "step": 29671 + }, + { + "epoch": 4.527435302734375e-05, + "step": 29671, + "training_step_time": 0.13320612907409668 + }, + { + "epoch": 4.527587890625e-05, + "model_forward_time": 0.02528095245361328, + "step": 29672 + }, + { + "epoch": 4.527587890625e-05, + "step": 29672, + "training_step_time": 0.1121985912322998 + }, + { + "epoch": 4.527740478515625e-05, + "model_forward_time": 0.025724411010742188, + "step": 29673 + }, + { + "epoch": 4.527740478515625e-05, + "step": 29673, + "training_step_time": 0.11493659019470215 + }, + { + "epoch": 4.52789306640625e-05, + "model_forward_time": 0.025823593139648438, + "step": 29674 + }, + { + "epoch": 4.52789306640625e-05, + "step": 29674, + "training_step_time": 0.12138032913208008 + }, + { + "epoch": 4.528045654296875e-05, + "model_forward_time": 0.0260159969329834, + "step": 29675 + }, + { + "epoch": 4.528045654296875e-05, + "step": 29675, + "training_step_time": 0.10980749130249023 + }, + { + "epoch": 4.5281982421875e-05, + "model_forward_time": 0.025948762893676758, + "step": 29676 + }, + { + "epoch": 4.5281982421875e-05, + "step": 29676, + "training_step_time": 0.12473344802856445 + }, + { + "epoch": 4.528350830078125e-05, + "model_forward_time": 0.025831937789916992, + "step": 29677 + }, + { + "epoch": 4.528350830078125e-05, + "step": 29677, + "training_step_time": 0.1088247299194336 + }, + { + "epoch": 4.52850341796875e-05, + "model_forward_time": 0.025553464889526367, + "step": 29678 + }, + { + "epoch": 4.52850341796875e-05, + "step": 29678, + "training_step_time": 0.10664176940917969 + }, + { + "epoch": 4.528656005859375e-05, + "model_forward_time": 0.025921106338500977, + "step": 29679 + }, + { + "epoch": 4.528656005859375e-05, + "step": 29679, + "training_step_time": 0.10860466957092285 + }, + { + "epoch": 4.52880859375e-05, + "grad_norm": 0.05965844541788101, + "learning_rate": 3.110319184189692e-08, + "loss": 0.0047, + "step": 29680 + }, + { + "epoch": 4.52880859375e-05, + "model_forward_time": 0.02552032470703125, + "step": 29680 + }, + { + "epoch": 4.52880859375e-05, + "step": 29680, + "training_step_time": 0.10634803771972656 + }, + { + "epoch": 4.528961181640625e-05, + "model_forward_time": 0.02579021453857422, + "step": 29681 + }, + { + "epoch": 4.528961181640625e-05, + "step": 29681, + "training_step_time": 0.1104276180267334 + }, + { + "epoch": 4.52911376953125e-05, + "model_forward_time": 0.02559494972229004, + "step": 29682 + }, + { + "epoch": 4.52911376953125e-05, + "step": 29682, + "training_step_time": 0.10550880432128906 + }, + { + "epoch": 4.529266357421875e-05, + "model_forward_time": 0.025532007217407227, + "step": 29683 + }, + { + "epoch": 4.529266357421875e-05, + "step": 29683, + "training_step_time": 0.10835838317871094 + }, + { + "epoch": 4.5294189453125e-05, + "model_forward_time": 0.02557539939880371, + "step": 29684 + }, + { + "epoch": 4.5294189453125e-05, + "step": 29684, + "training_step_time": 0.10563278198242188 + }, + { + "epoch": 4.529571533203125e-05, + "model_forward_time": 0.026108980178833008, + "step": 29685 + }, + { + "epoch": 4.529571533203125e-05, + "step": 29685, + "training_step_time": 0.15536189079284668 + }, + { + "epoch": 4.52972412109375e-05, + "model_forward_time": 0.024858713150024414, + "step": 29686 + }, + { + "epoch": 4.52972412109375e-05, + "step": 29686, + "training_step_time": 0.1387336254119873 + }, + { + "epoch": 4.529876708984375e-05, + "model_forward_time": 0.024775028228759766, + "step": 29687 + }, + { + "epoch": 4.529876708984375e-05, + "step": 29687, + "training_step_time": 0.10977816581726074 + }, + { + "epoch": 4.530029296875e-05, + "model_forward_time": 0.0258333683013916, + "step": 29688 + }, + { + "epoch": 4.530029296875e-05, + "step": 29688, + "training_step_time": 0.18958187103271484 + }, + { + "epoch": 4.530181884765625e-05, + "model_forward_time": 0.024982690811157227, + "step": 29689 + }, + { + "epoch": 4.530181884765625e-05, + "step": 29689, + "training_step_time": 0.2292780876159668 + }, + { + "epoch": 4.53033447265625e-05, + "grad_norm": 0.09275636821985245, + "learning_rate": 2.9189802774631792e-08, + "loss": 0.0054, + "step": 29690 + }, + { + "epoch": 4.53033447265625e-05, + "model_forward_time": 0.025409936904907227, + "step": 29690 + }, + { + "epoch": 4.53033447265625e-05, + "step": 29690, + "training_step_time": 0.23215198516845703 + }, + { + "epoch": 4.530487060546875e-05, + "model_forward_time": 0.024776220321655273, + "step": 29691 + }, + { + "epoch": 4.530487060546875e-05, + "step": 29691, + "training_step_time": 0.20402002334594727 + }, + { + "epoch": 4.5306396484375e-05, + "model_forward_time": 0.0251009464263916, + "step": 29692 + }, + { + "epoch": 4.5306396484375e-05, + "step": 29692, + "training_step_time": 0.20563030242919922 + }, + { + "epoch": 4.530792236328125e-05, + "model_forward_time": 0.025626420974731445, + "step": 29693 + }, + { + "epoch": 4.530792236328125e-05, + "step": 29693, + "training_step_time": 0.1843571662902832 + }, + { + "epoch": 4.53094482421875e-05, + "model_forward_time": 0.025150060653686523, + "step": 29694 + }, + { + "epoch": 4.53094482421875e-05, + "step": 29694, + "training_step_time": 0.16259241104125977 + }, + { + "epoch": 4.531097412109375e-05, + "model_forward_time": 0.024699687957763672, + "step": 29695 + }, + { + "epoch": 4.531097412109375e-05, + "step": 29695, + "training_step_time": 0.1451706886291504 + }, + { + "epoch": 4.53125e-05, + "model_forward_time": 0.02496170997619629, + "step": 29696 + }, + { + "epoch": 4.53125e-05, + "step": 29696, + "training_step_time": 0.10522031784057617 + }, + { + "epoch": 4.531402587890625e-05, + "model_forward_time": 0.02544999122619629, + "step": 29697 + }, + { + "epoch": 4.531402587890625e-05, + "step": 29697, + "training_step_time": 0.10150575637817383 + }, + { + "epoch": 4.53155517578125e-05, + "model_forward_time": 0.025568008422851562, + "step": 29698 + }, + { + "epoch": 4.53155517578125e-05, + "step": 29698, + "training_step_time": 0.1040036678314209 + }, + { + "epoch": 4.531707763671875e-05, + "model_forward_time": 0.025693178176879883, + "step": 29699 + }, + { + "epoch": 4.531707763671875e-05, + "step": 29699, + "training_step_time": 0.10436415672302246 + }, + { + "epoch": 4.5318603515625e-05, + "grad_norm": 0.047349270433187485, + "learning_rate": 2.7337132953697554e-08, + "loss": 0.0024, + "step": 29700 + }, + { + "epoch": 4.5318603515625e-05, + "model_forward_time": 0.026383399963378906, + "step": 29700 + }, + { + "epoch": 4.5318603515625e-05, + "step": 29700, + "training_step_time": 0.1092383861541748 + }, + { + "epoch": 4.532012939453125e-05, + "model_forward_time": 0.025580883026123047, + "step": 29701 + }, + { + "epoch": 4.532012939453125e-05, + "step": 29701, + "training_step_time": 0.10425519943237305 + }, + { + "epoch": 4.53216552734375e-05, + "model_forward_time": 0.025387287139892578, + "step": 29702 + }, + { + "epoch": 4.53216552734375e-05, + "step": 29702, + "training_step_time": 0.10656929016113281 + }, + { + "epoch": 4.532318115234375e-05, + "model_forward_time": 0.025997161865234375, + "step": 29703 + }, + { + "epoch": 4.532318115234375e-05, + "step": 29703, + "training_step_time": 0.10522937774658203 + }, + { + "epoch": 4.532470703125e-05, + "model_forward_time": 0.025434255599975586, + "step": 29704 + }, + { + "epoch": 4.532470703125e-05, + "step": 29704, + "training_step_time": 0.16423559188842773 + }, + { + "epoch": 4.532623291015625e-05, + "model_forward_time": 0.02486443519592285, + "step": 29705 + }, + { + "epoch": 4.532623291015625e-05, + "step": 29705, + "training_step_time": 0.1191549301147461 + }, + { + "epoch": 4.53277587890625e-05, + "model_forward_time": 0.024611711502075195, + "step": 29706 + }, + { + "epoch": 4.53277587890625e-05, + "step": 29706, + "training_step_time": 0.1169428825378418 + }, + { + "epoch": 4.532928466796875e-05, + "model_forward_time": 0.025295734405517578, + "step": 29707 + }, + { + "epoch": 4.532928466796875e-05, + "step": 29707, + "training_step_time": 0.15642118453979492 + }, + { + "epoch": 4.5330810546875e-05, + "model_forward_time": 0.025137662887573242, + "step": 29708 + }, + { + "epoch": 4.5330810546875e-05, + "step": 29708, + "training_step_time": 0.2115638256072998 + }, + { + "epoch": 4.533233642578125e-05, + "model_forward_time": 0.024944543838500977, + "step": 29709 + }, + { + "epoch": 4.533233642578125e-05, + "step": 29709, + "training_step_time": 0.11227607727050781 + }, + { + "epoch": 4.53338623046875e-05, + "grad_norm": 0.2016042172908783, + "learning_rate": 2.5545184630265672e-08, + "loss": 0.0059, + "step": 29710 + }, + { + "epoch": 4.53338623046875e-05, + "model_forward_time": 0.024680137634277344, + "step": 29710 + }, + { + "epoch": 4.53338623046875e-05, + "step": 29710, + "training_step_time": 0.1050412654876709 + }, + { + "epoch": 4.533538818359375e-05, + "model_forward_time": 0.025360822677612305, + "step": 29711 + }, + { + "epoch": 4.533538818359375e-05, + "step": 29711, + "training_step_time": 0.10575556755065918 + }, + { + "epoch": 4.53369140625e-05, + "model_forward_time": 0.02600860595703125, + "step": 29712 + }, + { + "epoch": 4.53369140625e-05, + "step": 29712, + "training_step_time": 0.10495972633361816 + }, + { + "epoch": 4.533843994140625e-05, + "model_forward_time": 0.02580738067626953, + "step": 29713 + }, + { + "epoch": 4.533843994140625e-05, + "step": 29713, + "training_step_time": 0.10371923446655273 + }, + { + "epoch": 4.53399658203125e-05, + "model_forward_time": 0.025251388549804688, + "step": 29714 + }, + { + "epoch": 4.53399658203125e-05, + "step": 29714, + "training_step_time": 0.10804605484008789 + }, + { + "epoch": 4.534149169921875e-05, + "model_forward_time": 0.025064706802368164, + "step": 29715 + }, + { + "epoch": 4.534149169921875e-05, + "step": 29715, + "training_step_time": 0.11443710327148438 + }, + { + "epoch": 4.5343017578125e-05, + "model_forward_time": 0.0256807804107666, + "step": 29716 + }, + { + "epoch": 4.5343017578125e-05, + "step": 29716, + "training_step_time": 0.11960506439208984 + }, + { + "epoch": 4.534454345703125e-05, + "model_forward_time": 0.02819085121154785, + "step": 29717 + }, + { + "epoch": 4.534454345703125e-05, + "step": 29717, + "training_step_time": 0.10873699188232422 + }, + { + "epoch": 4.53460693359375e-05, + "model_forward_time": 0.026059389114379883, + "step": 29718 + }, + { + "epoch": 4.53460693359375e-05, + "step": 29718, + "training_step_time": 0.21387195587158203 + }, + { + "epoch": 4.534759521484375e-05, + "model_forward_time": 0.025065183639526367, + "step": 29719 + }, + { + "epoch": 4.534759521484375e-05, + "step": 29719, + "training_step_time": 0.1087651252746582 + }, + { + "epoch": 4.534912109375e-05, + "grad_norm": 0.04984891787171364, + "learning_rate": 2.3813959981711097e-08, + "loss": 0.0059, + "step": 29720 + }, + { + "epoch": 4.534912109375e-05, + "model_forward_time": 0.02567744255065918, + "step": 29720 + }, + { + "epoch": 4.534912109375e-05, + "step": 29720, + "training_step_time": 0.10394477844238281 + }, + { + "epoch": 4.535064697265625e-05, + "model_forward_time": 0.025539636611938477, + "step": 29721 + }, + { + "epoch": 4.535064697265625e-05, + "step": 29721, + "training_step_time": 0.10599923133850098 + }, + { + "epoch": 4.53521728515625e-05, + "model_forward_time": 0.02572345733642578, + "step": 29722 + }, + { + "epoch": 4.53521728515625e-05, + "step": 29722, + "training_step_time": 0.10699701309204102 + }, + { + "epoch": 4.535369873046875e-05, + "model_forward_time": 0.02574777603149414, + "step": 29723 + }, + { + "epoch": 4.535369873046875e-05, + "step": 29723, + "training_step_time": 0.10710597038269043 + }, + { + "epoch": 4.5355224609375e-05, + "model_forward_time": 0.025864124298095703, + "step": 29724 + }, + { + "epoch": 4.5355224609375e-05, + "step": 29724, + "training_step_time": 0.1065073013305664 + }, + { + "epoch": 4.535675048828125e-05, + "model_forward_time": 0.0259554386138916, + "step": 29725 + }, + { + "epoch": 4.535675048828125e-05, + "step": 29725, + "training_step_time": 0.10703587532043457 + }, + { + "epoch": 4.53582763671875e-05, + "model_forward_time": 0.025142431259155273, + "step": 29726 + }, + { + "epoch": 4.53582763671875e-05, + "step": 29726, + "training_step_time": 0.1086885929107666 + }, + { + "epoch": 4.535980224609375e-05, + "model_forward_time": 0.02535104751586914, + "step": 29727 + }, + { + "epoch": 4.535980224609375e-05, + "step": 29727, + "training_step_time": 0.10484886169433594 + }, + { + "epoch": 4.5361328125e-05, + "model_forward_time": 0.025358915328979492, + "step": 29728 + }, + { + "epoch": 4.5361328125e-05, + "step": 29728, + "training_step_time": 0.14243268966674805 + }, + { + "epoch": 4.536285400390625e-05, + "model_forward_time": 0.025272369384765625, + "step": 29729 + }, + { + "epoch": 4.536285400390625e-05, + "step": 29729, + "training_step_time": 0.11534452438354492 + }, + { + "epoch": 4.53643798828125e-05, + "grad_norm": 0.039226289838552475, + "learning_rate": 2.214346111164556e-08, + "loss": 0.0122, + "step": 29730 + }, + { + "epoch": 4.53643798828125e-05, + "model_forward_time": 0.02506089210510254, + "step": 29730 + }, + { + "epoch": 4.53643798828125e-05, + "step": 29730, + "training_step_time": 0.10279297828674316 + }, + { + "epoch": 4.536590576171875e-05, + "model_forward_time": 0.02518773078918457, + "step": 29731 + }, + { + "epoch": 4.536590576171875e-05, + "step": 29731, + "training_step_time": 0.10540199279785156 + }, + { + "epoch": 4.5367431640625e-05, + "model_forward_time": 0.025439977645874023, + "step": 29732 + }, + { + "epoch": 4.5367431640625e-05, + "step": 29732, + "training_step_time": 0.20416831970214844 + }, + { + "epoch": 4.536895751953125e-05, + "model_forward_time": 0.024324893951416016, + "step": 29733 + }, + { + "epoch": 4.536895751953125e-05, + "step": 29733, + "training_step_time": 0.14699244499206543 + }, + { + "epoch": 4.53704833984375e-05, + "model_forward_time": 0.024393558502197266, + "step": 29734 + }, + { + "epoch": 4.53704833984375e-05, + "step": 29734, + "training_step_time": 0.21265959739685059 + }, + { + "epoch": 4.537200927734375e-05, + "model_forward_time": 0.02414679527282715, + "step": 29735 + }, + { + "epoch": 4.537200927734375e-05, + "step": 29735, + "training_step_time": 0.149916410446167 + }, + { + "epoch": 4.537353515625e-05, + "model_forward_time": 0.023150205612182617, + "step": 29736 + }, + { + "epoch": 4.537353515625e-05, + "step": 29736, + "training_step_time": 0.12035989761352539 + }, + { + "epoch": 4.537506103515625e-05, + "model_forward_time": 0.024893522262573242, + "step": 29737 + }, + { + "epoch": 4.537506103515625e-05, + "step": 29737, + "training_step_time": 0.1407489776611328 + }, + { + "epoch": 4.53765869140625e-05, + "model_forward_time": 0.024173736572265625, + "step": 29738 + }, + { + "epoch": 4.53765869140625e-05, + "step": 29738, + "training_step_time": 0.1465003490447998 + }, + { + "epoch": 4.537811279296875e-05, + "model_forward_time": 0.024143457412719727, + "step": 29739 + }, + { + "epoch": 4.537811279296875e-05, + "step": 29739, + "training_step_time": 0.13616204261779785 + }, + { + "epoch": 4.5379638671875e-05, + "grad_norm": 0.04317305609583855, + "learning_rate": 2.0533690049878707e-08, + "loss": 0.0029, + "step": 29740 + }, + { + "epoch": 4.5379638671875e-05, + "model_forward_time": 0.02652430534362793, + "step": 29740 + }, + { + "epoch": 4.5379638671875e-05, + "step": 29740, + "training_step_time": 0.1352841854095459 + }, + { + "epoch": 4.538116455078125e-05, + "model_forward_time": 0.024184465408325195, + "step": 29741 + }, + { + "epoch": 4.538116455078125e-05, + "step": 29741, + "training_step_time": 0.12969541549682617 + }, + { + "epoch": 4.53826904296875e-05, + "model_forward_time": 0.024678945541381836, + "step": 29742 + }, + { + "epoch": 4.53826904296875e-05, + "step": 29742, + "training_step_time": 0.1267850399017334 + }, + { + "epoch": 4.538421630859375e-05, + "model_forward_time": 0.02490544319152832, + "step": 29743 + }, + { + "epoch": 4.538421630859375e-05, + "step": 29743, + "training_step_time": 0.12626957893371582 + }, + { + "epoch": 4.53857421875e-05, + "model_forward_time": 0.024782896041870117, + "step": 29744 + }, + { + "epoch": 4.53857421875e-05, + "step": 29744, + "training_step_time": 0.12142038345336914 + }, + { + "epoch": 4.538726806640625e-05, + "model_forward_time": 0.025026321411132812, + "step": 29745 + }, + { + "epoch": 4.538726806640625e-05, + "step": 29745, + "training_step_time": 0.11894798278808594 + }, + { + "epoch": 4.53887939453125e-05, + "model_forward_time": 0.024596452713012695, + "step": 29746 + }, + { + "epoch": 4.53887939453125e-05, + "step": 29746, + "training_step_time": 0.11337661743164062 + }, + { + "epoch": 4.539031982421875e-05, + "model_forward_time": 0.02541518211364746, + "step": 29747 + }, + { + "epoch": 4.539031982421875e-05, + "step": 29747, + "training_step_time": 0.10958600044250488 + }, + { + "epoch": 4.5391845703125e-05, + "model_forward_time": 0.025316715240478516, + "step": 29748 + }, + { + "epoch": 4.5391845703125e-05, + "step": 29748, + "training_step_time": 0.13230347633361816 + }, + { + "epoch": 4.539337158203125e-05, + "model_forward_time": 0.025249481201171875, + "step": 29749 + }, + { + "epoch": 4.539337158203125e-05, + "step": 29749, + "training_step_time": 0.11367607116699219 + }, + { + "epoch": 4.53948974609375e-05, + "grad_norm": 0.062019359320402145, + "learning_rate": 1.8984648752429225e-08, + "loss": 0.0065, + "step": 29750 + }, + { + "epoch": 4.53948974609375e-05, + "model_forward_time": 0.024881362915039062, + "step": 29750 + }, + { + "epoch": 4.53948974609375e-05, + "step": 29750, + "training_step_time": 0.1212923526763916 + }, + { + "epoch": 4.539642333984375e-05, + "model_forward_time": 0.025597572326660156, + "step": 29751 + }, + { + "epoch": 4.539642333984375e-05, + "step": 29751, + "training_step_time": 0.13914871215820312 + }, + { + "epoch": 4.539794921875e-05, + "model_forward_time": 0.02506875991821289, + "step": 29752 + }, + { + "epoch": 4.539794921875e-05, + "step": 29752, + "training_step_time": 0.10605287551879883 + }, + { + "epoch": 4.539947509765625e-05, + "model_forward_time": 0.02693486213684082, + "step": 29753 + }, + { + "epoch": 4.539947509765625e-05, + "step": 29753, + "training_step_time": 0.13351106643676758 + }, + { + "epoch": 4.54010009765625e-05, + "model_forward_time": 0.025110960006713867, + "step": 29754 + }, + { + "epoch": 4.54010009765625e-05, + "step": 29754, + "training_step_time": 0.11182737350463867 + }, + { + "epoch": 4.540252685546875e-05, + "model_forward_time": 0.024997711181640625, + "step": 29755 + }, + { + "epoch": 4.540252685546875e-05, + "step": 29755, + "training_step_time": 0.10388636589050293 + }, + { + "epoch": 4.5404052734375e-05, + "model_forward_time": 0.024947166442871094, + "step": 29756 + }, + { + "epoch": 4.5404052734375e-05, + "step": 29756, + "training_step_time": 0.10567069053649902 + }, + { + "epoch": 4.540557861328125e-05, + "model_forward_time": 0.025150537490844727, + "step": 29757 + }, + { + "epoch": 4.540557861328125e-05, + "step": 29757, + "training_step_time": 0.10843157768249512 + }, + { + "epoch": 4.54071044921875e-05, + "model_forward_time": 0.025532245635986328, + "step": 29758 + }, + { + "epoch": 4.54071044921875e-05, + "step": 29758, + "training_step_time": 0.10559701919555664 + }, + { + "epoch": 4.540863037109375e-05, + "model_forward_time": 0.0243380069732666, + "step": 29759 + }, + { + "epoch": 4.540863037109375e-05, + "step": 29759, + "training_step_time": 0.10448431968688965 + }, + { + "epoch": 4.541015625e-05, + "grad_norm": 0.03795764967799187, + "learning_rate": 1.749633910153592e-08, + "loss": 0.0036, + "step": 29760 + }, + { + "epoch": 4.541015625e-05, + "model_forward_time": 0.025029897689819336, + "step": 29760 + }, + { + "epoch": 4.541015625e-05, + "step": 29760, + "training_step_time": 0.11104607582092285 + }, + { + "epoch": 4.541168212890625e-05, + "model_forward_time": 0.025667667388916016, + "step": 29761 + }, + { + "epoch": 4.541168212890625e-05, + "step": 29761, + "training_step_time": 0.10853362083435059 + }, + { + "epoch": 4.54132080078125e-05, + "model_forward_time": 0.025260210037231445, + "step": 29762 + }, + { + "epoch": 4.54132080078125e-05, + "step": 29762, + "training_step_time": 0.10883784294128418 + }, + { + "epoch": 4.541473388671875e-05, + "model_forward_time": 0.02529621124267578, + "step": 29763 + }, + { + "epoch": 4.541473388671875e-05, + "step": 29763, + "training_step_time": 0.21544289588928223 + }, + { + "epoch": 4.5416259765625e-05, + "model_forward_time": 0.025112152099609375, + "step": 29764 + }, + { + "epoch": 4.5416259765625e-05, + "step": 29764, + "training_step_time": 0.11326026916503906 + }, + { + "epoch": 4.541778564453125e-05, + "model_forward_time": 0.025382518768310547, + "step": 29765 + }, + { + "epoch": 4.541778564453125e-05, + "step": 29765, + "training_step_time": 0.10379528999328613 + }, + { + "epoch": 4.54193115234375e-05, + "model_forward_time": 0.025300264358520508, + "step": 29766 + }, + { + "epoch": 4.54193115234375e-05, + "step": 29766, + "training_step_time": 0.10572576522827148 + }, + { + "epoch": 4.542083740234375e-05, + "model_forward_time": 0.024794578552246094, + "step": 29767 + }, + { + "epoch": 4.542083740234375e-05, + "step": 29767, + "training_step_time": 0.1032106876373291 + }, + { + "epoch": 4.542236328125e-05, + "model_forward_time": 0.025298118591308594, + "step": 29768 + }, + { + "epoch": 4.542236328125e-05, + "step": 29768, + "training_step_time": 0.10455775260925293 + }, + { + "epoch": 4.542388916015625e-05, + "model_forward_time": 0.025965452194213867, + "step": 29769 + }, + { + "epoch": 4.542388916015625e-05, + "step": 29769, + "training_step_time": 0.10513448715209961 + }, + { + "epoch": 4.54254150390625e-05, + "grad_norm": 0.027176687493920326, + "learning_rate": 1.6068762905635527e-08, + "loss": 0.0056, + "step": 29770 + }, + { + "epoch": 4.54254150390625e-05, + "model_forward_time": 0.02522420883178711, + "step": 29770 + }, + { + "epoch": 4.54254150390625e-05, + "step": 29770, + "training_step_time": 0.10556864738464355 + }, + { + "epoch": 4.542694091796875e-05, + "model_forward_time": 0.0240938663482666, + "step": 29771 + }, + { + "epoch": 4.542694091796875e-05, + "step": 29771, + "training_step_time": 0.10435605049133301 + }, + { + "epoch": 4.5428466796875e-05, + "model_forward_time": 0.025252819061279297, + "step": 29772 + }, + { + "epoch": 4.5428466796875e-05, + "step": 29772, + "training_step_time": 0.10599827766418457 + }, + { + "epoch": 4.542999267578125e-05, + "model_forward_time": 0.02544426918029785, + "step": 29773 + }, + { + "epoch": 4.542999267578125e-05, + "step": 29773, + "training_step_time": 0.1511244773864746 + }, + { + "epoch": 4.54315185546875e-05, + "model_forward_time": 0.02490544319152832, + "step": 29774 + }, + { + "epoch": 4.54315185546875e-05, + "step": 29774, + "training_step_time": 0.14748072624206543 + }, + { + "epoch": 4.543304443359375e-05, + "model_forward_time": 0.024646520614624023, + "step": 29775 + }, + { + "epoch": 4.543304443359375e-05, + "step": 29775, + "training_step_time": 0.10059785842895508 + }, + { + "epoch": 4.54345703125e-05, + "model_forward_time": 0.025406837463378906, + "step": 29776 + }, + { + "epoch": 4.54345703125e-05, + "step": 29776, + "training_step_time": 0.12291479110717773 + }, + { + "epoch": 4.543609619140625e-05, + "model_forward_time": 0.02568197250366211, + "step": 29777 + }, + { + "epoch": 4.543609619140625e-05, + "step": 29777, + "training_step_time": 0.12318825721740723 + }, + { + "epoch": 4.54376220703125e-05, + "model_forward_time": 0.025368213653564453, + "step": 29778 + }, + { + "epoch": 4.54376220703125e-05, + "step": 29778, + "training_step_time": 0.21992874145507812 + }, + { + "epoch": 4.543914794921875e-05, + "model_forward_time": 0.024649620056152344, + "step": 29779 + }, + { + "epoch": 4.543914794921875e-05, + "step": 29779, + "training_step_time": 0.22472476959228516 + }, + { + "epoch": 4.5440673828125e-05, + "grad_norm": 0.04830660670995712, + "learning_rate": 1.4701921899362703e-08, + "loss": 0.0038, + "step": 29780 + }, + { + "epoch": 4.5440673828125e-05, + "model_forward_time": 0.024690628051757812, + "step": 29780 + }, + { + "epoch": 4.5440673828125e-05, + "step": 29780, + "training_step_time": 0.10743284225463867 + }, + { + "epoch": 4.544219970703125e-05, + "model_forward_time": 0.02454090118408203, + "step": 29781 + }, + { + "epoch": 4.544219970703125e-05, + "step": 29781, + "training_step_time": 0.1349046230316162 + }, + { + "epoch": 4.54437255859375e-05, + "model_forward_time": 0.025215864181518555, + "step": 29782 + }, + { + "epoch": 4.54437255859375e-05, + "step": 29782, + "training_step_time": 0.15891623497009277 + }, + { + "epoch": 4.544525146484375e-05, + "model_forward_time": 0.025135517120361328, + "step": 29783 + }, + { + "epoch": 4.544525146484375e-05, + "step": 29783, + "training_step_time": 0.22401165962219238 + }, + { + "epoch": 4.544677734375e-05, + "model_forward_time": 0.024492263793945312, + "step": 29784 + }, + { + "epoch": 4.544677734375e-05, + "step": 29784, + "training_step_time": 0.13244152069091797 + }, + { + "epoch": 4.544830322265625e-05, + "model_forward_time": 0.02503037452697754, + "step": 29785 + }, + { + "epoch": 4.544830322265625e-05, + "step": 29785, + "training_step_time": 0.1214301586151123 + }, + { + "epoch": 4.54498291015625e-05, + "model_forward_time": 0.02538895606994629, + "step": 29786 + }, + { + "epoch": 4.54498291015625e-05, + "step": 29786, + "training_step_time": 0.12203431129455566 + }, + { + "epoch": 4.545135498046875e-05, + "model_forward_time": 0.025394678115844727, + "step": 29787 + }, + { + "epoch": 4.545135498046875e-05, + "step": 29787, + "training_step_time": 0.11734390258789062 + }, + { + "epoch": 4.5452880859375e-05, + "model_forward_time": 0.025045394897460938, + "step": 29788 + }, + { + "epoch": 4.5452880859375e-05, + "step": 29788, + "training_step_time": 0.11571145057678223 + }, + { + "epoch": 4.545440673828125e-05, + "model_forward_time": 0.025708675384521484, + "step": 29789 + }, + { + "epoch": 4.545440673828125e-05, + "step": 29789, + "training_step_time": 0.11198616027832031 + }, + { + "epoch": 4.54559326171875e-05, + "grad_norm": 0.03371327370405197, + "learning_rate": 1.3395817743561134e-08, + "loss": 0.0023, + "step": 29790 + }, + { + "epoch": 4.54559326171875e-05, + "model_forward_time": 0.0252230167388916, + "step": 29790 + }, + { + "epoch": 4.54559326171875e-05, + "step": 29790, + "training_step_time": 0.10803437232971191 + }, + { + "epoch": 4.545745849609375e-05, + "model_forward_time": 0.025005102157592773, + "step": 29791 + }, + { + "epoch": 4.545745849609375e-05, + "step": 29791, + "training_step_time": 0.10507869720458984 + }, + { + "epoch": 4.5458984375e-05, + "model_forward_time": 0.025203943252563477, + "step": 29792 + }, + { + "epoch": 4.5458984375e-05, + "step": 29792, + "training_step_time": 0.20252323150634766 + }, + { + "epoch": 4.546051025390625e-05, + "model_forward_time": 0.024335145950317383, + "step": 29793 + }, + { + "epoch": 4.546051025390625e-05, + "step": 29793, + "training_step_time": 0.1155710220336914 + }, + { + "epoch": 4.54620361328125e-05, + "model_forward_time": 0.02441883087158203, + "step": 29794 + }, + { + "epoch": 4.54620361328125e-05, + "step": 29794, + "training_step_time": 0.1305527687072754 + }, + { + "epoch": 4.546356201171875e-05, + "model_forward_time": 0.02538323402404785, + "step": 29795 + }, + { + "epoch": 4.546356201171875e-05, + "step": 29795, + "training_step_time": 0.15538716316223145 + }, + { + "epoch": 4.5465087890625e-05, + "model_forward_time": 0.0249025821685791, + "step": 29796 + }, + { + "epoch": 4.5465087890625e-05, + "step": 29796, + "training_step_time": 0.2151353359222412 + }, + { + "epoch": 4.546661376953125e-05, + "model_forward_time": 0.024242162704467773, + "step": 29797 + }, + { + "epoch": 4.546661376953125e-05, + "step": 29797, + "training_step_time": 0.10626959800720215 + }, + { + "epoch": 4.54681396484375e-05, + "model_forward_time": 0.024651050567626953, + "step": 29798 + }, + { + "epoch": 4.54681396484375e-05, + "step": 29798, + "training_step_time": 0.1042943000793457 + }, + { + "epoch": 4.546966552734375e-05, + "model_forward_time": 0.025501728057861328, + "step": 29799 + }, + { + "epoch": 4.546966552734375e-05, + "step": 29799, + "training_step_time": 0.10441112518310547 + }, + { + "epoch": 4.547119140625e-05, + "grad_norm": 0.07544019818305969, + "learning_rate": 1.215045202527243e-08, + "loss": 0.0039, + "step": 29800 + }, + { + "epoch": 4.547119140625e-05, + "model_forward_time": 0.02561187744140625, + "step": 29800 + }, + { + "epoch": 4.547119140625e-05, + "step": 29800, + "training_step_time": 0.10857152938842773 + }, + { + "epoch": 4.547271728515625e-05, + "model_forward_time": 0.025725603103637695, + "step": 29801 + }, + { + "epoch": 4.547271728515625e-05, + "step": 29801, + "training_step_time": 0.10497689247131348 + }, + { + "epoch": 4.54742431640625e-05, + "model_forward_time": 0.0261690616607666, + "step": 29802 + }, + { + "epoch": 4.54742431640625e-05, + "step": 29802, + "training_step_time": 0.1060631275177002 + }, + { + "epoch": 4.547576904296875e-05, + "model_forward_time": 0.025210142135620117, + "step": 29803 + }, + { + "epoch": 4.547576904296875e-05, + "step": 29803, + "training_step_time": 0.14506292343139648 + }, + { + "epoch": 4.5477294921875e-05, + "model_forward_time": 0.02478933334350586, + "step": 29804 + }, + { + "epoch": 4.5477294921875e-05, + "step": 29804, + "training_step_time": 0.1537313461303711 + }, + { + "epoch": 4.547882080078125e-05, + "model_forward_time": 0.024748802185058594, + "step": 29805 + }, + { + "epoch": 4.547882080078125e-05, + "step": 29805, + "training_step_time": 0.14660048484802246 + }, + { + "epoch": 4.54803466796875e-05, + "model_forward_time": 0.0249941349029541, + "step": 29806 + }, + { + "epoch": 4.54803466796875e-05, + "step": 29806, + "training_step_time": 0.13646578788757324 + }, + { + "epoch": 4.548187255859375e-05, + "model_forward_time": 0.0247342586517334, + "step": 29807 + }, + { + "epoch": 4.548187255859375e-05, + "step": 29807, + "training_step_time": 0.10711455345153809 + }, + { + "epoch": 4.54833984375e-05, + "model_forward_time": 0.025429725646972656, + "step": 29808 + }, + { + "epoch": 4.54833984375e-05, + "step": 29808, + "training_step_time": 0.11243414878845215 + }, + { + "epoch": 4.548492431640625e-05, + "model_forward_time": 0.02510547637939453, + "step": 29809 + }, + { + "epoch": 4.548492431640625e-05, + "step": 29809, + "training_step_time": 0.11042451858520508 + }, + { + "epoch": 4.54864501953125e-05, + "grad_norm": 0.029395155608654022, + "learning_rate": 1.096582625772502e-08, + "loss": 0.0026, + "step": 29810 + }, + { + "epoch": 4.54864501953125e-05, + "model_forward_time": 0.025696754455566406, + "step": 29810 + }, + { + "epoch": 4.54864501953125e-05, + "step": 29810, + "training_step_time": 0.10933256149291992 + }, + { + "epoch": 4.548797607421875e-05, + "model_forward_time": 0.02564263343811035, + "step": 29811 + }, + { + "epoch": 4.548797607421875e-05, + "step": 29811, + "training_step_time": 0.10943603515625 + }, + { + "epoch": 4.5489501953125e-05, + "model_forward_time": 0.026520967483520508, + "step": 29812 + }, + { + "epoch": 4.5489501953125e-05, + "step": 29812, + "training_step_time": 0.11083364486694336 + }, + { + "epoch": 4.549102783203125e-05, + "model_forward_time": 0.024502277374267578, + "step": 29813 + }, + { + "epoch": 4.549102783203125e-05, + "step": 29813, + "training_step_time": 0.1049489974975586 + }, + { + "epoch": 4.54925537109375e-05, + "model_forward_time": 0.024890422821044922, + "step": 29814 + }, + { + "epoch": 4.54925537109375e-05, + "step": 29814, + "training_step_time": 0.10541272163391113 + }, + { + "epoch": 4.549407958984375e-05, + "model_forward_time": 0.02503657341003418, + "step": 29815 + }, + { + "epoch": 4.549407958984375e-05, + "step": 29815, + "training_step_time": 0.1040651798248291 + }, + { + "epoch": 4.549560546875e-05, + "model_forward_time": 0.024611949920654297, + "step": 29816 + }, + { + "epoch": 4.549560546875e-05, + "step": 29816, + "training_step_time": 0.1051797866821289 + }, + { + "epoch": 4.549713134765625e-05, + "model_forward_time": 0.02532649040222168, + "step": 29817 + }, + { + "epoch": 4.549713134765625e-05, + "step": 29817, + "training_step_time": 0.10568785667419434 + }, + { + "epoch": 4.54986572265625e-05, + "model_forward_time": 0.025198936462402344, + "step": 29818 + }, + { + "epoch": 4.54986572265625e-05, + "step": 29818, + "training_step_time": 0.10795974731445312 + }, + { + "epoch": 4.550018310546875e-05, + "model_forward_time": 0.02524590492248535, + "step": 29819 + }, + { + "epoch": 4.550018310546875e-05, + "step": 29819, + "training_step_time": 0.10977602005004883 + }, + { + "epoch": 4.5501708984375e-05, + "grad_norm": 0.029612349346280098, + "learning_rate": 9.841941880361916e-09, + "loss": 0.0024, + "step": 29820 + }, + { + "epoch": 4.5501708984375e-05, + "model_forward_time": 0.025239229202270508, + "step": 29820 + }, + { + "epoch": 4.5501708984375e-05, + "step": 29820, + "training_step_time": 0.10514068603515625 + }, + { + "epoch": 4.550323486328125e-05, + "model_forward_time": 0.02533411979675293, + "step": 29821 + }, + { + "epoch": 4.550323486328125e-05, + "step": 29821, + "training_step_time": 0.11825037002563477 + }, + { + "epoch": 4.55047607421875e-05, + "model_forward_time": 0.025560855865478516, + "step": 29822 + }, + { + "epoch": 4.55047607421875e-05, + "step": 29822, + "training_step_time": 0.1090996265411377 + }, + { + "epoch": 4.550628662109375e-05, + "model_forward_time": 0.02523946762084961, + "step": 29823 + }, + { + "epoch": 4.550628662109375e-05, + "step": 29823, + "training_step_time": 0.1298837661743164 + }, + { + "epoch": 4.55078125e-05, + "model_forward_time": 0.025089502334594727, + "step": 29824 + }, + { + "epoch": 4.55078125e-05, + "step": 29824, + "training_step_time": 0.11962127685546875 + }, + { + "epoch": 4.550933837890625e-05, + "model_forward_time": 0.025135040283203125, + "step": 29825 + }, + { + "epoch": 4.550933837890625e-05, + "step": 29825, + "training_step_time": 0.20621418952941895 + }, + { + "epoch": 4.55108642578125e-05, + "model_forward_time": 0.024346351623535156, + "step": 29826 + }, + { + "epoch": 4.55108642578125e-05, + "step": 29826, + "training_step_time": 0.1054377555847168 + }, + { + "epoch": 4.551239013671875e-05, + "model_forward_time": 0.025043487548828125, + "step": 29827 + }, + { + "epoch": 4.551239013671875e-05, + "step": 29827, + "training_step_time": 0.1144716739654541 + }, + { + "epoch": 4.5513916015625e-05, + "model_forward_time": 0.025482892990112305, + "step": 29828 + }, + { + "epoch": 4.5513916015625e-05, + "step": 29828, + "training_step_time": 0.11162424087524414 + }, + { + "epoch": 4.551544189453125e-05, + "model_forward_time": 0.025158166885375977, + "step": 29829 + }, + { + "epoch": 4.551544189453125e-05, + "step": 29829, + "training_step_time": 0.10683345794677734 + }, + { + "epoch": 4.55169677734375e-05, + "grad_norm": 0.4647483825683594, + "learning_rate": 8.778800258801844e-09, + "loss": 0.0119, + "step": 29830 + }, + { + "epoch": 4.55169677734375e-05, + "model_forward_time": 0.025578975677490234, + "step": 29830 + }, + { + "epoch": 4.55169677734375e-05, + "step": 29830, + "training_step_time": 0.13682055473327637 + }, + { + "epoch": 4.551849365234375e-05, + "model_forward_time": 0.024731874465942383, + "step": 29831 + }, + { + "epoch": 4.551849365234375e-05, + "step": 29831, + "training_step_time": 0.10227799415588379 + }, + { + "epoch": 4.552001953125e-05, + "model_forward_time": 0.025095224380493164, + "step": 29832 + }, + { + "epoch": 4.552001953125e-05, + "step": 29832, + "training_step_time": 0.1206061840057373 + }, + { + "epoch": 4.552154541015625e-05, + "model_forward_time": 0.025343656539916992, + "step": 29833 + }, + { + "epoch": 4.552154541015625e-05, + "step": 29833, + "training_step_time": 0.13239383697509766 + }, + { + "epoch": 4.55230712890625e-05, + "model_forward_time": 0.025284290313720703, + "step": 29834 + }, + { + "epoch": 4.55230712890625e-05, + "step": 29834, + "training_step_time": 0.13172507286071777 + }, + { + "epoch": 4.552459716796875e-05, + "model_forward_time": 0.025725841522216797, + "step": 29835 + }, + { + "epoch": 4.552459716796875e-05, + "step": 29835, + "training_step_time": 0.12440061569213867 + }, + { + "epoch": 4.5526123046875e-05, + "model_forward_time": 0.025058984756469727, + "step": 29836 + }, + { + "epoch": 4.5526123046875e-05, + "step": 29836, + "training_step_time": 0.11696410179138184 + }, + { + "epoch": 4.552764892578125e-05, + "model_forward_time": 0.025155067443847656, + "step": 29837 + }, + { + "epoch": 4.552764892578125e-05, + "step": 29837, + "training_step_time": 0.11116313934326172 + }, + { + "epoch": 4.55291748046875e-05, + "model_forward_time": 0.024876117706298828, + "step": 29838 + }, + { + "epoch": 4.55291748046875e-05, + "step": 29838, + "training_step_time": 0.17532849311828613 + }, + { + "epoch": 4.553070068359375e-05, + "model_forward_time": 0.02466440200805664, + "step": 29839 + }, + { + "epoch": 4.553070068359375e-05, + "step": 29839, + "training_step_time": 0.12402939796447754 + }, + { + "epoch": 4.55322265625e-05, + "grad_norm": 0.032721392810344696, + "learning_rate": 7.77640268486146e-09, + "loss": 0.0091, + "step": 29840 + }, + { + "epoch": 4.55322265625e-05, + "model_forward_time": 0.024419546127319336, + "step": 29840 + }, + { + "epoch": 4.55322265625e-05, + "step": 29840, + "training_step_time": 0.10987544059753418 + }, + { + "epoch": 4.553375244140625e-05, + "model_forward_time": 0.025065898895263672, + "step": 29841 + }, + { + "epoch": 4.553375244140625e-05, + "step": 29841, + "training_step_time": 0.10781073570251465 + }, + { + "epoch": 4.55352783203125e-05, + "model_forward_time": 0.02496933937072754, + "step": 29842 + }, + { + "epoch": 4.55352783203125e-05, + "step": 29842, + "training_step_time": 0.1091301441192627 + }, + { + "epoch": 4.553680419921875e-05, + "model_forward_time": 0.025188684463500977, + "step": 29843 + }, + { + "epoch": 4.553680419921875e-05, + "step": 29843, + "training_step_time": 0.10757565498352051 + }, + { + "epoch": 4.5538330078125e-05, + "model_forward_time": 0.025395631790161133, + "step": 29844 + }, + { + "epoch": 4.5538330078125e-05, + "step": 29844, + "training_step_time": 0.11426615715026855 + }, + { + "epoch": 4.553985595703125e-05, + "model_forward_time": 0.025185346603393555, + "step": 29845 + }, + { + "epoch": 4.553985595703125e-05, + "step": 29845, + "training_step_time": 0.11373162269592285 + }, + { + "epoch": 4.55413818359375e-05, + "model_forward_time": 0.025087356567382812, + "step": 29846 + }, + { + "epoch": 4.55413818359375e-05, + "step": 29846, + "training_step_time": 0.10472798347473145 + }, + { + "epoch": 4.554290771484375e-05, + "model_forward_time": 0.024913787841796875, + "step": 29847 + }, + { + "epoch": 4.554290771484375e-05, + "step": 29847, + "training_step_time": 0.10458898544311523 + }, + { + "epoch": 4.554443359375e-05, + "model_forward_time": 0.024812936782836914, + "step": 29848 + }, + { + "epoch": 4.554443359375e-05, + "step": 29848, + "training_step_time": 0.1044011116027832 + }, + { + "epoch": 4.554595947265625e-05, + "model_forward_time": 0.024538516998291016, + "step": 29849 + }, + { + "epoch": 4.554595947265625e-05, + "step": 29849, + "training_step_time": 0.10287642478942871 + }, + { + "epoch": 4.55474853515625e-05, + "grad_norm": 0.15437576174736023, + "learning_rate": 6.834750376549792e-09, + "loss": 0.0063, + "step": 29850 + }, + { + "epoch": 4.55474853515625e-05, + "model_forward_time": 0.024811506271362305, + "step": 29850 + }, + { + "epoch": 4.55474853515625e-05, + "step": 29850, + "training_step_time": 0.1455671787261963 + }, + { + "epoch": 4.554901123046875e-05, + "model_forward_time": 0.025674104690551758, + "step": 29851 + }, + { + "epoch": 4.554901123046875e-05, + "step": 29851, + "training_step_time": 0.15994048118591309 + }, + { + "epoch": 4.5550537109375e-05, + "model_forward_time": 0.024443626403808594, + "step": 29852 + }, + { + "epoch": 4.5550537109375e-05, + "step": 29852, + "training_step_time": 0.11379861831665039 + }, + { + "epoch": 4.555206298828125e-05, + "model_forward_time": 0.024624347686767578, + "step": 29853 + }, + { + "epoch": 4.555206298828125e-05, + "step": 29853, + "training_step_time": 0.12799954414367676 + }, + { + "epoch": 4.55535888671875e-05, + "model_forward_time": 0.02533864974975586, + "step": 29854 + }, + { + "epoch": 4.55535888671875e-05, + "step": 29854, + "training_step_time": 0.2027287483215332 + }, + { + "epoch": 4.555511474609375e-05, + "model_forward_time": 0.024456024169921875, + "step": 29855 + }, + { + "epoch": 4.555511474609375e-05, + "step": 29855, + "training_step_time": 0.10149312019348145 + }, + { + "epoch": 4.5556640625e-05, + "model_forward_time": 0.02468419075012207, + "step": 29856 + }, + { + "epoch": 4.5556640625e-05, + "step": 29856, + "training_step_time": 0.10879015922546387 + }, + { + "epoch": 4.555816650390625e-05, + "model_forward_time": 0.025716066360473633, + "step": 29857 + }, + { + "epoch": 4.555816650390625e-05, + "step": 29857, + "training_step_time": 0.10439038276672363 + }, + { + "epoch": 4.55596923828125e-05, + "model_forward_time": 0.025579214096069336, + "step": 29858 + }, + { + "epoch": 4.55596923828125e-05, + "step": 29858, + "training_step_time": 0.10485124588012695 + }, + { + "epoch": 4.556121826171875e-05, + "model_forward_time": 0.02590799331665039, + "step": 29859 + }, + { + "epoch": 4.556121826171875e-05, + "step": 29859, + "training_step_time": 0.11098670959472656 + }, + { + "epoch": 4.5562744140625e-05, + "grad_norm": 0.11889815330505371, + "learning_rate": 5.953844478068238e-09, + "loss": 0.0048, + "step": 29860 + }, + { + "epoch": 4.5562744140625e-05, + "model_forward_time": 0.025787830352783203, + "step": 29860 + }, + { + "epoch": 4.5562744140625e-05, + "step": 29860, + "training_step_time": 0.11009430885314941 + }, + { + "epoch": 4.556427001953125e-05, + "model_forward_time": 0.02519512176513672, + "step": 29861 + }, + { + "epoch": 4.556427001953125e-05, + "step": 29861, + "training_step_time": 0.10363125801086426 + }, + { + "epoch": 4.55657958984375e-05, + "model_forward_time": 0.025161027908325195, + "step": 29862 + }, + { + "epoch": 4.55657958984375e-05, + "step": 29862, + "training_step_time": 0.10190534591674805 + }, + { + "epoch": 4.556732177734375e-05, + "model_forward_time": 0.02541971206665039, + "step": 29863 + }, + { + "epoch": 4.556732177734375e-05, + "step": 29863, + "training_step_time": 0.17733097076416016 + }, + { + "epoch": 4.556884765625e-05, + "model_forward_time": 0.024814367294311523, + "step": 29864 + }, + { + "epoch": 4.556884765625e-05, + "step": 29864, + "training_step_time": 0.15914678573608398 + }, + { + "epoch": 4.557037353515625e-05, + "model_forward_time": 0.024339675903320312, + "step": 29865 + }, + { + "epoch": 4.557037353515625e-05, + "step": 29865, + "training_step_time": 0.17061281204223633 + }, + { + "epoch": 4.55718994140625e-05, + "model_forward_time": 0.024552583694458008, + "step": 29866 + }, + { + "epoch": 4.55718994140625e-05, + "step": 29866, + "training_step_time": 0.19593214988708496 + }, + { + "epoch": 4.557342529296875e-05, + "model_forward_time": 0.024770259857177734, + "step": 29867 + }, + { + "epoch": 4.557342529296875e-05, + "step": 29867, + "training_step_time": 0.16433930397033691 + }, + { + "epoch": 4.5574951171875e-05, + "model_forward_time": 0.02445673942565918, + "step": 29868 + }, + { + "epoch": 4.5574951171875e-05, + "step": 29868, + "training_step_time": 0.14076566696166992 + }, + { + "epoch": 4.557647705078125e-05, + "model_forward_time": 0.024722576141357422, + "step": 29869 + }, + { + "epoch": 4.557647705078125e-05, + "step": 29869, + "training_step_time": 0.2085716724395752 + }, + { + "epoch": 4.55780029296875e-05, + "grad_norm": 0.061708930879831314, + "learning_rate": 5.133686059793918e-09, + "loss": 0.0026, + "step": 29870 + }, + { + "epoch": 4.55780029296875e-05, + "model_forward_time": 0.025552988052368164, + "step": 29870 + }, + { + "epoch": 4.55780029296875e-05, + "step": 29870, + "training_step_time": 0.21052312850952148 + }, + { + "epoch": 4.557952880859375e-05, + "model_forward_time": 0.025050640106201172, + "step": 29871 + }, + { + "epoch": 4.557952880859375e-05, + "step": 29871, + "training_step_time": 0.11556434631347656 + }, + { + "epoch": 4.55810546875e-05, + "model_forward_time": 0.024522781372070312, + "step": 29872 + }, + { + "epoch": 4.55810546875e-05, + "step": 29872, + "training_step_time": 0.11649179458618164 + }, + { + "epoch": 4.558258056640625e-05, + "model_forward_time": 0.0255277156829834, + "step": 29873 + }, + { + "epoch": 4.558258056640625e-05, + "step": 29873, + "training_step_time": 0.13058733940124512 + }, + { + "epoch": 4.55841064453125e-05, + "model_forward_time": 0.025236129760742188, + "step": 29874 + }, + { + "epoch": 4.55841064453125e-05, + "step": 29874, + "training_step_time": 0.1099398136138916 + }, + { + "epoch": 4.558563232421875e-05, + "model_forward_time": 0.025589704513549805, + "step": 29875 + }, + { + "epoch": 4.558563232421875e-05, + "step": 29875, + "training_step_time": 0.10824131965637207 + }, + { + "epoch": 4.5587158203125e-05, + "model_forward_time": 0.024853229522705078, + "step": 29876 + }, + { + "epoch": 4.5587158203125e-05, + "step": 29876, + "training_step_time": 0.1059272289276123 + }, + { + "epoch": 4.558868408203125e-05, + "model_forward_time": 0.02524709701538086, + "step": 29877 + }, + { + "epoch": 4.558868408203125e-05, + "step": 29877, + "training_step_time": 0.10860228538513184 + }, + { + "epoch": 4.55902099609375e-05, + "model_forward_time": 0.026285648345947266, + "step": 29878 + }, + { + "epoch": 4.55902099609375e-05, + "step": 29878, + "training_step_time": 0.10982656478881836 + }, + { + "epoch": 4.559173583984375e-05, + "model_forward_time": 0.02453017234802246, + "step": 29879 + }, + { + "epoch": 4.559173583984375e-05, + "step": 29879, + "training_step_time": 0.10590934753417969 + }, + { + "epoch": 4.559326171875e-05, + "grad_norm": 0.15741203725337982, + "learning_rate": 4.3742761183018784e-09, + "loss": 0.0053, + "step": 29880 + }, + { + "epoch": 4.559326171875e-05, + "model_forward_time": 0.02405261993408203, + "step": 29880 + }, + { + "epoch": 4.559326171875e-05, + "step": 29880, + "training_step_time": 0.10668325424194336 + }, + { + "epoch": 4.559478759765625e-05, + "model_forward_time": 0.024428606033325195, + "step": 29881 + }, + { + "epoch": 4.559478759765625e-05, + "step": 29881, + "training_step_time": 0.10592341423034668 + }, + { + "epoch": 4.55963134765625e-05, + "model_forward_time": 0.024007320404052734, + "step": 29882 + }, + { + "epoch": 4.55963134765625e-05, + "step": 29882, + "training_step_time": 0.17463970184326172 + }, + { + "epoch": 4.559783935546875e-05, + "model_forward_time": 0.024793624877929688, + "step": 29883 + }, + { + "epoch": 4.559783935546875e-05, + "step": 29883, + "training_step_time": 0.11826205253601074 + }, + { + "epoch": 4.5599365234375e-05, + "model_forward_time": 0.0247189998626709, + "step": 29884 + }, + { + "epoch": 4.5599365234375e-05, + "step": 29884, + "training_step_time": 0.13265275955200195 + }, + { + "epoch": 4.560089111328125e-05, + "model_forward_time": 0.025522232055664062, + "step": 29885 + }, + { + "epoch": 4.560089111328125e-05, + "step": 29885, + "training_step_time": 0.10564398765563965 + }, + { + "epoch": 4.56024169921875e-05, + "model_forward_time": 0.025332927703857422, + "step": 29886 + }, + { + "epoch": 4.56024169921875e-05, + "step": 29886, + "training_step_time": 0.1683807373046875 + }, + { + "epoch": 4.560394287109375e-05, + "model_forward_time": 0.024429798126220703, + "step": 29887 + }, + { + "epoch": 4.560394287109375e-05, + "step": 29887, + "training_step_time": 0.13535571098327637 + }, + { + "epoch": 4.560546875e-05, + "model_forward_time": 0.024456262588500977, + "step": 29888 + }, + { + "epoch": 4.560546875e-05, + "step": 29888, + "training_step_time": 0.10717988014221191 + }, + { + "epoch": 4.560699462890625e-05, + "model_forward_time": 0.025233983993530273, + "step": 29889 + }, + { + "epoch": 4.560699462890625e-05, + "step": 29889, + "training_step_time": 0.10336875915527344 + }, + { + "epoch": 4.56085205078125e-05, + "grad_norm": 0.03077687695622444, + "learning_rate": 3.6756155763373323e-09, + "loss": 0.0025, + "step": 29890 + }, + { + "epoch": 4.56085205078125e-05, + "model_forward_time": 0.0254213809967041, + "step": 29890 + }, + { + "epoch": 4.56085205078125e-05, + "step": 29890, + "training_step_time": 0.1055135726928711 + }, + { + "epoch": 4.561004638671875e-05, + "model_forward_time": 0.02512216567993164, + "step": 29891 + }, + { + "epoch": 4.561004638671875e-05, + "step": 29891, + "training_step_time": 0.10843396186828613 + }, + { + "epoch": 4.5611572265625e-05, + "model_forward_time": 0.025209665298461914, + "step": 29892 + }, + { + "epoch": 4.5611572265625e-05, + "step": 29892, + "training_step_time": 0.10658574104309082 + }, + { + "epoch": 4.561309814453125e-05, + "model_forward_time": 0.025867223739624023, + "step": 29893 + }, + { + "epoch": 4.561309814453125e-05, + "step": 29893, + "training_step_time": 0.10712766647338867 + }, + { + "epoch": 4.56146240234375e-05, + "model_forward_time": 0.02500176429748535, + "step": 29894 + }, + { + "epoch": 4.56146240234375e-05, + "step": 29894, + "training_step_time": 0.14104533195495605 + }, + { + "epoch": 4.561614990234375e-05, + "model_forward_time": 0.02465963363647461, + "step": 29895 + }, + { + "epoch": 4.561614990234375e-05, + "step": 29895, + "training_step_time": 0.15581536293029785 + }, + { + "epoch": 4.561767578125e-05, + "model_forward_time": 0.025221824645996094, + "step": 29896 + }, + { + "epoch": 4.561767578125e-05, + "step": 29896, + "training_step_time": 0.11303591728210449 + }, + { + "epoch": 4.561920166015625e-05, + "model_forward_time": 0.024544239044189453, + "step": 29897 + }, + { + "epoch": 4.561920166015625e-05, + "step": 29897, + "training_step_time": 0.1325218677520752 + }, + { + "epoch": 4.56207275390625e-05, + "model_forward_time": 0.025615692138671875, + "step": 29898 + }, + { + "epoch": 4.56207275390625e-05, + "step": 29898, + "training_step_time": 0.1951141357421875 + }, + { + "epoch": 4.562225341796875e-05, + "model_forward_time": 0.0241239070892334, + "step": 29899 + }, + { + "epoch": 4.562225341796875e-05, + "step": 29899, + "training_step_time": 0.10769081115722656 + }, + { + "epoch": 4.5623779296875e-05, + "grad_norm": 0.10912805050611496, + "learning_rate": 3.0377052828489683e-09, + "loss": 0.0129, + "step": 29900 + }, + { + "epoch": 4.5623779296875e-05, + "model_forward_time": 0.02468132972717285, + "step": 29900 + }, + { + "epoch": 4.5623779296875e-05, + "step": 29900, + "training_step_time": 0.10791420936584473 + }, + { + "epoch": 4.562530517578125e-05, + "model_forward_time": 0.025246858596801758, + "step": 29901 + }, + { + "epoch": 4.562530517578125e-05, + "step": 29901, + "training_step_time": 0.10767698287963867 + }, + { + "epoch": 4.56268310546875e-05, + "model_forward_time": 0.025555133819580078, + "step": 29902 + }, + { + "epoch": 4.56268310546875e-05, + "step": 29902, + "training_step_time": 0.10740232467651367 + }, + { + "epoch": 4.562835693359375e-05, + "model_forward_time": 0.0254056453704834, + "step": 29903 + }, + { + "epoch": 4.562835693359375e-05, + "step": 29903, + "training_step_time": 0.10549807548522949 + }, + { + "epoch": 4.56298828125e-05, + "model_forward_time": 0.025142431259155273, + "step": 29904 + }, + { + "epoch": 4.56298828125e-05, + "step": 29904, + "training_step_time": 0.105438232421875 + }, + { + "epoch": 4.563140869140625e-05, + "model_forward_time": 0.0251920223236084, + "step": 29905 + }, + { + "epoch": 4.563140869140625e-05, + "step": 29905, + "training_step_time": 0.10515737533569336 + }, + { + "epoch": 4.56329345703125e-05, + "model_forward_time": 0.024955034255981445, + "step": 29906 + }, + { + "epoch": 4.56329345703125e-05, + "step": 29906, + "training_step_time": 0.18176889419555664 + }, + { + "epoch": 4.563446044921875e-05, + "model_forward_time": 0.024888277053833008, + "step": 29907 + }, + { + "epoch": 4.563446044921875e-05, + "step": 29907, + "training_step_time": 0.11118674278259277 + }, + { + "epoch": 4.5635986328125e-05, + "model_forward_time": 0.024846792221069336, + "step": 29908 + }, + { + "epoch": 4.5635986328125e-05, + "step": 29908, + "training_step_time": 0.18247151374816895 + }, + { + "epoch": 4.563751220703125e-05, + "model_forward_time": 0.024693727493286133, + "step": 29909 + }, + { + "epoch": 4.563751220703125e-05, + "step": 29909, + "training_step_time": 0.12749028205871582 + }, + { + "epoch": 4.56390380859375e-05, + "grad_norm": 0.06053199619054794, + "learning_rate": 2.4605460129556445e-09, + "loss": 0.0034, + "step": 29910 + }, + { + "epoch": 4.56390380859375e-05, + "model_forward_time": 0.024350881576538086, + "step": 29910 + }, + { + "epoch": 4.56390380859375e-05, + "step": 29910, + "training_step_time": 0.10874342918395996 + }, + { + "epoch": 4.564056396484375e-05, + "model_forward_time": 0.025455236434936523, + "step": 29911 + }, + { + "epoch": 4.564056396484375e-05, + "step": 29911, + "training_step_time": 0.10852837562561035 + }, + { + "epoch": 4.564208984375e-05, + "model_forward_time": 0.025014400482177734, + "step": 29912 + }, + { + "epoch": 4.564208984375e-05, + "step": 29912, + "training_step_time": 0.12110638618469238 + }, + { + "epoch": 4.564361572265625e-05, + "model_forward_time": 0.025246858596801758, + "step": 29913 + }, + { + "epoch": 4.564361572265625e-05, + "step": 29913, + "training_step_time": 0.14872527122497559 + }, + { + "epoch": 4.56451416015625e-05, + "model_forward_time": 0.025379657745361328, + "step": 29914 + }, + { + "epoch": 4.56451416015625e-05, + "step": 29914, + "training_step_time": 0.21952438354492188 + }, + { + "epoch": 4.564666748046875e-05, + "model_forward_time": 0.024655818939208984, + "step": 29915 + }, + { + "epoch": 4.564666748046875e-05, + "step": 29915, + "training_step_time": 0.17305517196655273 + }, + { + "epoch": 4.5648193359375e-05, + "model_forward_time": 0.02428746223449707, + "step": 29916 + }, + { + "epoch": 4.5648193359375e-05, + "step": 29916, + "training_step_time": 0.14905071258544922 + }, + { + "epoch": 4.564971923828125e-05, + "model_forward_time": 0.0247652530670166, + "step": 29917 + }, + { + "epoch": 4.564971923828125e-05, + "step": 29917, + "training_step_time": 0.11669349670410156 + }, + { + "epoch": 4.56512451171875e-05, + "model_forward_time": 0.024723291397094727, + "step": 29918 + }, + { + "epoch": 4.56512451171875e-05, + "step": 29918, + "training_step_time": 0.14152979850769043 + }, + { + "epoch": 4.565277099609375e-05, + "model_forward_time": 0.02499675750732422, + "step": 29919 + }, + { + "epoch": 4.565277099609375e-05, + "step": 29919, + "training_step_time": 0.10398554801940918 + }, + { + "epoch": 4.5654296875e-05, + "grad_norm": 0.07553080469369888, + "learning_rate": 1.9441384679574903e-09, + "loss": 0.0047, + "step": 29920 + }, + { + "epoch": 4.5654296875e-05, + "model_forward_time": 0.025573253631591797, + "step": 29920 + }, + { + "epoch": 4.5654296875e-05, + "step": 29920, + "training_step_time": 0.10768938064575195 + }, + { + "epoch": 4.565582275390625e-05, + "model_forward_time": 0.025451183319091797, + "step": 29921 + }, + { + "epoch": 4.565582275390625e-05, + "step": 29921, + "training_step_time": 0.10346627235412598 + }, + { + "epoch": 4.56573486328125e-05, + "model_forward_time": 0.025549650192260742, + "step": 29922 + }, + { + "epoch": 4.56573486328125e-05, + "step": 29922, + "training_step_time": 0.10369133949279785 + }, + { + "epoch": 4.565887451171875e-05, + "model_forward_time": 0.025081157684326172, + "step": 29923 + }, + { + "epoch": 4.565887451171875e-05, + "step": 29923, + "training_step_time": 0.1023261547088623 + }, + { + "epoch": 4.5660400390625e-05, + "model_forward_time": 0.025149822235107422, + "step": 29924 + }, + { + "epoch": 4.5660400390625e-05, + "step": 29924, + "training_step_time": 0.10977435111999512 + }, + { + "epoch": 4.566192626953125e-05, + "model_forward_time": 0.02494192123413086, + "step": 29925 + }, + { + "epoch": 4.566192626953125e-05, + "step": 29925, + "training_step_time": 0.10996079444885254 + }, + { + "epoch": 4.56634521484375e-05, + "model_forward_time": 0.02515721321105957, + "step": 29926 + }, + { + "epoch": 4.56634521484375e-05, + "step": 29926, + "training_step_time": 0.18644475936889648 + }, + { + "epoch": 4.566497802734375e-05, + "model_forward_time": 0.023569822311401367, + "step": 29927 + }, + { + "epoch": 4.566497802734375e-05, + "step": 29927, + "training_step_time": 0.23588109016418457 + }, + { + "epoch": 4.566650390625e-05, + "model_forward_time": 0.0247647762298584, + "step": 29928 + }, + { + "epoch": 4.566650390625e-05, + "step": 29928, + "training_step_time": 0.2453474998474121 + }, + { + "epoch": 4.566802978515625e-05, + "model_forward_time": 0.024123430252075195, + "step": 29929 + }, + { + "epoch": 4.566802978515625e-05, + "step": 29929, + "training_step_time": 0.19994688034057617 + }, + { + "epoch": 4.56695556640625e-05, + "grad_norm": 0.1345215141773224, + "learning_rate": 1.4884832753414569e-09, + "loss": 0.0107, + "step": 29930 + }, + { + "epoch": 4.56695556640625e-05, + "model_forward_time": 0.02463221549987793, + "step": 29930 + }, + { + "epoch": 4.56695556640625e-05, + "step": 29930, + "training_step_time": 0.18257498741149902 + }, + { + "epoch": 4.567108154296875e-05, + "model_forward_time": 0.024220705032348633, + "step": 29931 + }, + { + "epoch": 4.567108154296875e-05, + "step": 29931, + "training_step_time": 0.1744384765625 + }, + { + "epoch": 4.5672607421875e-05, + "model_forward_time": 0.024404525756835938, + "step": 29932 + }, + { + "epoch": 4.5672607421875e-05, + "step": 29932, + "training_step_time": 0.16705536842346191 + }, + { + "epoch": 4.567413330078125e-05, + "model_forward_time": 0.024343490600585938, + "step": 29933 + }, + { + "epoch": 4.567413330078125e-05, + "step": 29933, + "training_step_time": 0.10091090202331543 + }, + { + "epoch": 4.56756591796875e-05, + "model_forward_time": 0.02545785903930664, + "step": 29934 + }, + { + "epoch": 4.56756591796875e-05, + "step": 29934, + "training_step_time": 0.10606265068054199 + }, + { + "epoch": 4.567718505859375e-05, + "model_forward_time": 0.025053977966308594, + "step": 29935 + }, + { + "epoch": 4.567718505859375e-05, + "step": 29935, + "training_step_time": 0.14166498184204102 + }, + { + "epoch": 4.56787109375e-05, + "model_forward_time": 0.024908065795898438, + "step": 29936 + }, + { + "epoch": 4.56787109375e-05, + "step": 29936, + "training_step_time": 0.16064214706420898 + }, + { + "epoch": 4.568023681640625e-05, + "model_forward_time": 0.024546146392822266, + "step": 29937 + }, + { + "epoch": 4.568023681640625e-05, + "step": 29937, + "training_step_time": 0.11368393898010254 + }, + { + "epoch": 4.56817626953125e-05, + "model_forward_time": 0.024523019790649414, + "step": 29938 + }, + { + "epoch": 4.56817626953125e-05, + "step": 29938, + "training_step_time": 0.13491010665893555 + }, + { + "epoch": 4.568328857421875e-05, + "model_forward_time": 0.02511906623840332, + "step": 29939 + }, + { + "epoch": 4.568328857421875e-05, + "step": 29939, + "training_step_time": 0.20428204536437988 + }, + { + "epoch": 4.5684814453125e-05, + "grad_norm": 0.05975399166345596, + "learning_rate": 1.0935809887702154e-09, + "loss": 0.004, + "step": 29940 + }, + { + "epoch": 4.5684814453125e-05, + "model_forward_time": 0.02454686164855957, + "step": 29940 + }, + { + "epoch": 4.5684814453125e-05, + "step": 29940, + "training_step_time": 0.11071300506591797 + }, + { + "epoch": 4.568634033203125e-05, + "model_forward_time": 0.02504730224609375, + "step": 29941 + }, + { + "epoch": 4.568634033203125e-05, + "step": 29941, + "training_step_time": 0.10204768180847168 + }, + { + "epoch": 4.56878662109375e-05, + "model_forward_time": 0.025545120239257812, + "step": 29942 + }, + { + "epoch": 4.56878662109375e-05, + "step": 29942, + "training_step_time": 0.10422396659851074 + }, + { + "epoch": 4.568939208984375e-05, + "model_forward_time": 0.025277137756347656, + "step": 29943 + }, + { + "epoch": 4.568939208984375e-05, + "step": 29943, + "training_step_time": 0.10412430763244629 + }, + { + "epoch": 4.569091796875e-05, + "model_forward_time": 0.025634050369262695, + "step": 29944 + }, + { + "epoch": 4.569091796875e-05, + "step": 29944, + "training_step_time": 0.10509872436523438 + }, + { + "epoch": 4.569244384765625e-05, + "model_forward_time": 0.02518010139465332, + "step": 29945 + }, + { + "epoch": 4.569244384765625e-05, + "step": 29945, + "training_step_time": 0.10468316078186035 + }, + { + "epoch": 4.56939697265625e-05, + "model_forward_time": 0.02538132667541504, + "step": 29946 + }, + { + "epoch": 4.56939697265625e-05, + "step": 29946, + "training_step_time": 0.10490942001342773 + }, + { + "epoch": 4.569549560546875e-05, + "model_forward_time": 0.02390432357788086, + "step": 29947 + }, + { + "epoch": 4.569549560546875e-05, + "step": 29947, + "training_step_time": 0.19042181968688965 + }, + { + "epoch": 4.5697021484375e-05, + "model_forward_time": 0.02464604377746582, + "step": 29948 + }, + { + "epoch": 4.5697021484375e-05, + "step": 29948, + "training_step_time": 0.13690543174743652 + }, + { + "epoch": 4.569854736328125e-05, + "model_forward_time": 0.024507999420166016, + "step": 29949 + }, + { + "epoch": 4.569854736328125e-05, + "step": 29949, + "training_step_time": 0.11126923561096191 + }, + { + "epoch": 4.57000732421875e-05, + "grad_norm": 0.0483989343047142, + "learning_rate": 7.594320880821571e-10, + "loss": 0.0038, + "step": 29950 + }, + { + "epoch": 4.57000732421875e-05, + "model_forward_time": 0.024791479110717773, + "step": 29950 + }, + { + "epoch": 4.57000732421875e-05, + "step": 29950, + "training_step_time": 0.11907720565795898 + }, + { + "epoch": 4.570159912109375e-05, + "model_forward_time": 0.025133609771728516, + "step": 29951 + }, + { + "epoch": 4.570159912109375e-05, + "step": 29951, + "training_step_time": 0.1067807674407959 + }, + { + "epoch": 4.5703125e-05, + "model_forward_time": 0.02519369125366211, + "step": 29952 + }, + { + "epoch": 4.5703125e-05, + "step": 29952, + "training_step_time": 0.10656595230102539 + }, + { + "epoch": 4.570465087890625e-05, + "model_forward_time": 0.025478363037109375, + "step": 29953 + }, + { + "epoch": 4.570465087890625e-05, + "step": 29953, + "training_step_time": 0.12312483787536621 + }, + { + "epoch": 4.57061767578125e-05, + "model_forward_time": 0.02511763572692871, + "step": 29954 + }, + { + "epoch": 4.57061767578125e-05, + "step": 29954, + "training_step_time": 0.11763262748718262 + }, + { + "epoch": 4.570770263671875e-05, + "model_forward_time": 0.026611804962158203, + "step": 29955 + }, + { + "epoch": 4.570770263671875e-05, + "step": 29955, + "training_step_time": 0.11596274375915527 + }, + { + "epoch": 4.5709228515625e-05, + "model_forward_time": 0.026400089263916016, + "step": 29956 + }, + { + "epoch": 4.5709228515625e-05, + "step": 29956, + "training_step_time": 0.21816802024841309 + }, + { + "epoch": 4.571075439453125e-05, + "model_forward_time": 0.02498316764831543, + "step": 29957 + }, + { + "epoch": 4.571075439453125e-05, + "step": 29957, + "training_step_time": 0.20394253730773926 + }, + { + "epoch": 4.57122802734375e-05, + "model_forward_time": 0.024318456649780273, + "step": 29958 + }, + { + "epoch": 4.57122802734375e-05, + "step": 29958, + "training_step_time": 0.1201925277709961 + }, + { + "epoch": 4.571380615234375e-05, + "model_forward_time": 0.02431201934814453, + "step": 29959 + }, + { + "epoch": 4.571380615234375e-05, + "step": 29959, + "training_step_time": 0.11854720115661621 + }, + { + "epoch": 4.571533203125e-05, + "grad_norm": 0.06495802104473114, + "learning_rate": 4.860369793080466e-10, + "loss": 0.0062, + "step": 29960 + }, + { + "epoch": 4.571533203125e-05, + "model_forward_time": 0.024856090545654297, + "step": 29960 + }, + { + "epoch": 4.571533203125e-05, + "step": 29960, + "training_step_time": 0.12964510917663574 + }, + { + "epoch": 4.571685791015625e-05, + "model_forward_time": 0.0247037410736084, + "step": 29961 + }, + { + "epoch": 4.571685791015625e-05, + "step": 29961, + "training_step_time": 0.10771536827087402 + }, + { + "epoch": 4.57183837890625e-05, + "model_forward_time": 0.025290727615356445, + "step": 29962 + }, + { + "epoch": 4.57183837890625e-05, + "step": 29962, + "training_step_time": 0.10528135299682617 + }, + { + "epoch": 4.571990966796875e-05, + "model_forward_time": 0.025136470794677734, + "step": 29963 + }, + { + "epoch": 4.571990966796875e-05, + "step": 29963, + "training_step_time": 0.1065676212310791 + }, + { + "epoch": 4.5721435546875e-05, + "model_forward_time": 0.025384187698364258, + "step": 29964 + }, + { + "epoch": 4.5721435546875e-05, + "step": 29964, + "training_step_time": 0.10642337799072266 + }, + { + "epoch": 4.572296142578125e-05, + "model_forward_time": 0.024346113204956055, + "step": 29965 + }, + { + "epoch": 4.572296142578125e-05, + "step": 29965, + "training_step_time": 0.1043846607208252 + }, + { + "epoch": 4.57244873046875e-05, + "model_forward_time": 0.024061918258666992, + "step": 29966 + }, + { + "epoch": 4.57244873046875e-05, + "step": 29966, + "training_step_time": 0.10479950904846191 + }, + { + "epoch": 4.572601318359375e-05, + "model_forward_time": 0.025409221649169922, + "step": 29967 + }, + { + "epoch": 4.572601318359375e-05, + "step": 29967, + "training_step_time": 0.10571026802062988 + }, + { + "epoch": 4.57275390625e-05, + "model_forward_time": 0.026334762573242188, + "step": 29968 + }, + { + "epoch": 4.57275390625e-05, + "step": 29968, + "training_step_time": 0.10742473602294922 + }, + { + "epoch": 4.572906494140625e-05, + "model_forward_time": 0.024933576583862305, + "step": 29969 + }, + { + "epoch": 4.572906494140625e-05, + "step": 29969, + "training_step_time": 0.14802932739257812 + }, + { + "epoch": 4.57305908203125e-05, + "grad_norm": 0.1793670952320099, + "learning_rate": 2.7339599464326627e-10, + "loss": 0.0034, + "step": 29970 + }, + { + "epoch": 4.57305908203125e-05, + "model_forward_time": 0.025174617767333984, + "step": 29970 + }, + { + "epoch": 4.57305908203125e-05, + "step": 29970, + "training_step_time": 0.10462617874145508 + }, + { + "epoch": 4.573211669921875e-05, + "model_forward_time": 0.0256345272064209, + "step": 29971 + }, + { + "epoch": 4.573211669921875e-05, + "step": 29971, + "training_step_time": 0.12344837188720703 + }, + { + "epoch": 4.5733642578125e-05, + "model_forward_time": 0.02504563331604004, + "step": 29972 + }, + { + "epoch": 4.5733642578125e-05, + "step": 29972, + "training_step_time": 0.15032577514648438 + }, + { + "epoch": 4.573516845703125e-05, + "model_forward_time": 0.024487972259521484, + "step": 29973 + }, + { + "epoch": 4.573516845703125e-05, + "step": 29973, + "training_step_time": 0.10456323623657227 + }, + { + "epoch": 4.57366943359375e-05, + "model_forward_time": 0.025249719619750977, + "step": 29974 + }, + { + "epoch": 4.57366943359375e-05, + "step": 29974, + "training_step_time": 0.19520306587219238 + }, + { + "epoch": 4.573822021484375e-05, + "model_forward_time": 0.024792909622192383, + "step": 29975 + }, + { + "epoch": 4.573822021484375e-05, + "step": 29975, + "training_step_time": 0.20862936973571777 + }, + { + "epoch": 4.573974609375e-05, + "model_forward_time": 0.028766632080078125, + "step": 29976 + }, + { + "epoch": 4.573974609375e-05, + "step": 29976, + "training_step_time": 0.20551609992980957 + }, + { + "epoch": 4.574127197265625e-05, + "model_forward_time": 0.024842500686645508, + "step": 29977 + }, + { + "epoch": 4.574127197265625e-05, + "step": 29977, + "training_step_time": 0.12756824493408203 + }, + { + "epoch": 4.57427978515625e-05, + "model_forward_time": 0.024707555770874023, + "step": 29978 + }, + { + "epoch": 4.57427978515625e-05, + "step": 29978, + "training_step_time": 0.17250943183898926 + }, + { + "epoch": 4.574432373046875e-05, + "model_forward_time": 0.02458667755126953, + "step": 29979 + }, + { + "epoch": 4.574432373046875e-05, + "step": 29979, + "training_step_time": 0.11186718940734863 + }, + { + "epoch": 4.5745849609375e-05, + "grad_norm": 0.2396877557039261, + "learning_rate": 1.2150939247002058e-10, + "loss": 0.0053, + "step": 29980 + }, + { + "epoch": 4.5745849609375e-05, + "model_forward_time": 0.025346040725708008, + "step": 29980 + }, + { + "epoch": 4.5745849609375e-05, + "step": 29980, + "training_step_time": 0.10394740104675293 + }, + { + "epoch": 4.574737548828125e-05, + "model_forward_time": 0.025341510772705078, + "step": 29981 + }, + { + "epoch": 4.574737548828125e-05, + "step": 29981, + "training_step_time": 0.1427762508392334 + }, + { + "epoch": 4.57489013671875e-05, + "model_forward_time": 0.025478124618530273, + "step": 29982 + }, + { + "epoch": 4.57489013671875e-05, + "step": 29982, + "training_step_time": 0.1967456340789795 + }, + { + "epoch": 4.575042724609375e-05, + "model_forward_time": 0.024437427520751953, + "step": 29983 + }, + { + "epoch": 4.575042724609375e-05, + "step": 29983, + "training_step_time": 0.10135197639465332 + }, + { + "epoch": 4.5751953125e-05, + "model_forward_time": 0.02461528778076172, + "step": 29984 + }, + { + "epoch": 4.5751953125e-05, + "step": 29984, + "training_step_time": 0.10251975059509277 + }, + { + "epoch": 4.575347900390625e-05, + "model_forward_time": 0.0253293514251709, + "step": 29985 + }, + { + "epoch": 4.575347900390625e-05, + "step": 29985, + "training_step_time": 0.1026310920715332 + }, + { + "epoch": 4.57550048828125e-05, + "model_forward_time": 0.025458097457885742, + "step": 29986 + }, + { + "epoch": 4.57550048828125e-05, + "step": 29986, + "training_step_time": 0.11490917205810547 + }, + { + "epoch": 4.575653076171875e-05, + "model_forward_time": 0.02565932273864746, + "step": 29987 + }, + { + "epoch": 4.575653076171875e-05, + "step": 29987, + "training_step_time": 0.10691595077514648 + }, + { + "epoch": 4.5758056640625e-05, + "model_forward_time": 0.02588939666748047, + "step": 29988 + }, + { + "epoch": 4.5758056640625e-05, + "step": 29988, + "training_step_time": 0.188765287399292 + }, + { + "epoch": 4.575958251953125e-05, + "model_forward_time": 0.02950453758239746, + "step": 29989 + }, + { + "epoch": 4.575958251953125e-05, + "step": 29989, + "training_step_time": 0.20821714401245117 + }, + { + "epoch": 4.57611083984375e-05, + "grad_norm": 0.030882326886057854, + "learning_rate": 3.037735734623404e-11, + "loss": 0.0043, + "step": 29990 + }, + { + "epoch": 4.57611083984375e-05, + "model_forward_time": 0.026105403900146484, + "step": 29990 + }, + { + "epoch": 4.57611083984375e-05, + "step": 29990, + "training_step_time": 0.2144639492034912 + }, + { + "epoch": 4.576263427734375e-05, + "model_forward_time": 0.026969194412231445, + "step": 29991 + }, + { + "epoch": 4.576263427734375e-05, + "step": 29991, + "training_step_time": 0.23193597793579102 + }, + { + "epoch": 4.576416015625e-05, + "model_forward_time": 0.029546737670898438, + "step": 29992 + }, + { + "epoch": 4.576416015625e-05, + "step": 29992, + "training_step_time": 0.3120415210723877 + }, + { + "epoch": 4.576568603515625e-05, + "model_forward_time": 0.029010772705078125, + "step": 29993 + }, + { + "epoch": 4.576568603515625e-05, + "step": 29993, + "training_step_time": 0.27482128143310547 + }, + { + "epoch": 4.57672119140625e-05, + "model_forward_time": 0.030508756637573242, + "step": 29994 + }, + { + "epoch": 4.57672119140625e-05, + "step": 29994, + "training_step_time": 0.3284180164337158 + }, + { + "epoch": 4.576873779296875e-05, + "model_forward_time": 0.028658628463745117, + "step": 29995 + }, + { + "epoch": 4.576873779296875e-05, + "step": 29995, + "training_step_time": 0.36992478370666504 + }, + { + "epoch": 4.5770263671875e-05, + "model_forward_time": 0.028682947158813477, + "step": 29996 + }, + { + "epoch": 4.5770263671875e-05, + "step": 29996, + "training_step_time": 0.3473165035247803 + }, + { + "epoch": 4.577178955078125e-05, + "model_forward_time": 0.030662059783935547, + "step": 29997 + }, + { + "epoch": 4.577178955078125e-05, + "step": 29997, + "training_step_time": 0.31621313095092773 + }, + { + "epoch": 4.57733154296875e-05, + "model_forward_time": 0.04142260551452637, + "step": 29998 + }, + { + "epoch": 4.57733154296875e-05, + "step": 29998, + "training_step_time": 0.28385281562805176 + }, + { + "epoch": 4.577484130859375e-05, + "model_forward_time": 0.03106999397277832, + "step": 29999 + }, + { + "epoch": 4.577484130859375e-05, + "step": 29999, + "training_step_time": 0.3634684085845947 + }, + { + "epoch": 4.57763671875e-05, + "grad_norm": 0.07076636701822281, + "learning_rate": 0.0, + "loss": 0.0029, + "step": 30000 + } + ], + "logging_steps": 10, + "max_steps": 30000, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/zero_to_fp32.py b/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..24cc342e78d1a006c782b3a4cd68d9ce786d8fd8 --- /dev/null +++ b/zero_to_fp32.py @@ -0,0 +1,604 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: python zero_to_fp32.py . pytorch_model.bin + +import argparse +import torch +import glob +import math +import os +import re +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + + total_files = len(files) + state_dicts = [] + for f in files: + state_dict = torch.load(f, map_location=device) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + if zero_stage <= 2: + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + elif zero_stage == 3: + # if there is more than one param group, there will be multiple flattened tensors - one + # flattened tensor per group - for simplicity merge them into a single tensor + # + # XXX: could make the script more memory efficient for when there are multiple groups - it + # will require matching the sub-lists of param_shapes for each param group flattened tensor + + fp32_flat_groups = [ + torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts)) + ] + + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = fp32_flat_groups[0].numel() * world_size + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + for name, shape in param_shapes.items(): + + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # XXX: memory usage doubles here + state_dict[name] = torch.cat( + tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)), + 0).narrow(0, 0, unpartitioned_numel).view(shape) + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + + Returns: + - pytorch ``state_dict`` + + Note: this approach may not work if your application doesn't have sufficient free CPU memory and + you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None, exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters) + print(f"Saving fp32 state dict to {output_file}") + torch.save(state_dict, output_file) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument( + "output_file", + type=str, + help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_file, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters)